ViewVC Help
View File | Revision Log | Show Annotations | Download File | Root Listing
root/radiance/ray/src/util/rcollate.c
Revision: 2.28
Committed: Fri Oct 19 16:53:37 2018 UTC (5 years, 6 months ago) by greg
Content type: text/plain
Branch: MAIN
Changes since 2.27: +9 -9 lines
Log Message:
Minor tweaks on last change

File Contents

# Content
1 #ifndef lint
2 static const char RCSid[] = "$Id: rcollate.c,v 2.27 2018/10/18 22:59:48 greg Exp $";
3 #endif
4 /*
5 * Utility to re-order records in a binary or ASCII data file (matrix)
6 */
7
8 #include <stdlib.h>
9 #include <string.h>
10 #include <ctype.h>
11 #include "platform.h"
12 #include "rtio.h"
13 #include "resolu.h"
14 #if defined(_WIN32) || defined(_WIN64)
15 #undef ftello
16 #define ftello ftell
17 #undef ssize_t
18 #define ssize_t size_t
19 #else
20 #include <sys/mman.h>
21 #endif
22
23 typedef struct {
24 void *base; /* pointer to base memory */
25 size_t len; /* allocated memory length */
26 int mapped; /* memory-mapped file? */
27 } MEMLOAD; /* file loaded/mapped into memory */
28
29 typedef struct {
30 int nw_rec; /* number of words per record */
31 int nrecs; /* number of records we found */
32 char *rec[1]; /* record array (extends struct) */
33 } RECINDEX;
34
35 int warnings = 1; /* report warnings? */
36
37 /* free loaded file */
38 static void
39 free_load(MEMLOAD *mp)
40 {
41 if (mp == NULL || (mp->base == NULL) | (mp->len <= 0))
42 return;
43 #ifdef MAP_FILE
44 if (mp->mapped)
45 munmap(mp->base, mp->len);
46 else
47 #endif
48 free(mp->base);
49 mp->base = NULL;
50 mp->len = 0;
51 }
52
53 /* load memory from an input stream, starting from current position */
54 static int
55 load_stream(MEMLOAD *mp, FILE *fp)
56 {
57 size_t alloced = 0;
58 char buf[8192];
59 size_t nr;
60
61 if (mp == NULL)
62 return(-1);
63 mp->base = NULL;
64 mp->len = 0;
65 mp->mapped = 0;
66 if (fp == NULL)
67 return(-1);
68 while ((nr = fread(buf, 1, sizeof(buf), fp)) > 0) {
69 if (!alloced)
70 mp->base = malloc(alloced = nr);
71 else if (mp->len+nr > alloced)
72 mp->base = realloc(mp->base,
73 alloced = alloced*(2+(nr==sizeof(buf)))/2+nr);
74 if (mp->base == NULL)
75 return(-1);
76 memcpy((char *)mp->base + mp->len, buf, nr);
77 mp->len += nr;
78 }
79 if (ferror(fp)) {
80 free_load(mp);
81 return(-1);
82 }
83 if (alloced > mp->len*5/4) /* don't waste too much space */
84 mp->base = realloc(mp->base, mp->len);
85 return(mp->len > 0);
86 }
87
88 /* load a file into memory */
89 static int
90 load_file(MEMLOAD *mp, FILE *fp)
91 {
92 int fd;
93 off_t skip, flen, fpos;
94
95 #if defined(_WIN32) || defined(_WIN64)
96 /* too difficult to fix this */
97 return load_stream(mp, fp);
98 #endif
99 if (mp == NULL)
100 return(-1);
101 mp->base = NULL;
102 mp->len = 0;
103 mp->mapped = 0;
104 if (fp == NULL)
105 return(-1);
106 fd = fileno(fp);
107 skip = ftello(fp);
108 flen = lseek(fd, 0, SEEK_END);
109 if (flen <= skip)
110 return((int)(flen - skip));
111 mp->len = (size_t)(flen - skip);
112 #ifdef MAP_FILE
113 if (mp->len > 1L<<20) { /* map file if > 1 MByte */
114 mp->base = mmap(NULL, flen, PROT_READ, MAP_PRIVATE, fd, 0);
115 if (mp->base != MAP_FAILED) {
116 mp->base = (char *)mp->base + skip;
117 mp->mapped = 1;
118 return(1); /* mmap() success */
119 }
120 mp->base = NULL; /* else fall back to reading it in... */
121 }
122 #endif
123 if (lseek(fd, skip, SEEK_SET) != skip ||
124 (mp->base = malloc(mp->len)) == NULL) {
125 mp->len = 0;
126 return(-1);
127 }
128 fpos = skip;
129 while (fpos < flen) { /* read() fails if n > 2 GBytes */
130 ssize_t nread = read(fd, (char *)mp->base+(fpos-skip),
131 (flen-fpos < 1L<<24) ? flen-fpos : 1L<<24);
132 if (nread <= 0) {
133 free_load(mp);
134 return(-1);
135 }
136 fpos += nread;
137 }
138 return(1);
139 }
140
141 /* free a record index */
142 #define free_records(rp) free(rp)
143
144 /* compute record index */
145 static RECINDEX *
146 index_records(const MEMLOAD *mp, int nw_rec)
147 {
148 int nall = 0;
149 RECINDEX *rp;
150 char *cp, *mend;
151 int n;
152
153 if (mp == NULL || (mp->base == NULL) | (mp->len <= 0))
154 return(NULL);
155 if (nw_rec <= 0)
156 return(NULL);
157 nall = 1000;
158 rp = (RECINDEX *)malloc(sizeof(RECINDEX) + nall*sizeof(char *));
159 if (rp == NULL)
160 return(NULL);
161 rp->nw_rec = nw_rec;
162 rp->nrecs = 0;
163 cp = (char *)mp->base;
164 mend = cp + mp->len;
165 for ( ; ; ) { /* whitespace-separated words */
166 while (cp < mend && !*cp | isspace(*cp))
167 ++cp;
168 if (cp >= mend)
169 break;
170 if (rp->nrecs >= nall) {
171 nall += nall>>1; /* get more record space */
172 rp = (RECINDEX *)realloc(rp,
173 sizeof(RECINDEX) + nall*sizeof(char *));
174 if (rp == NULL)
175 return(NULL);
176 }
177 rp->rec[rp->nrecs++] = cp; /* point to first non-white */
178 n = rp->nw_rec;
179 while (++cp < mend) /* find end of record */
180 if (!*cp | isspace(*cp)) {
181 if (--n <= 0)
182 break; /* got requisite # words */
183 do { /* else find next word */
184 if (*cp == '\n') {
185 fprintf(stderr,
186 "Unexpected EOL in record!\n");
187 free_records(rp);
188 return(NULL);
189 }
190 if (++cp >= mend)
191 break;
192 } while (!*cp | isspace(*cp));
193 }
194 }
195 rp->rec[rp->nrecs] = mend; /* reallocate to save space */
196 rp = (RECINDEX *)realloc(rp,
197 sizeof(RECINDEX) + rp->nrecs*sizeof(char *));
198 return(rp);
199 }
200
201 /* count number of columns based on first EOL */
202 static int
203 count_columns(const RECINDEX *rp)
204 {
205 char *cp = rp->rec[0];
206 char *mend = rp->rec[rp->nrecs];
207 int i;
208
209 while (*cp != '\n')
210 if (++cp >= mend)
211 return(0);
212 for (i = 0; i < rp->nrecs; i++)
213 if (rp->rec[i] >= cp)
214 break;
215 return(i);
216 }
217
218 /* copy nth record from index to stdout */
219 static int
220 print_record(const RECINDEX *rp, int n)
221 {
222 int words2go = rp->nw_rec;
223 char *scp;
224
225 if ((n < 0) | (n >= rp->nrecs))
226 return(0);
227 scp = rp->rec[n];
228 do {
229 putc(*scp++, stdout);
230 if (!*scp | isspace(*scp)) {
231 if (--words2go <= 0)
232 break;
233 putc(' ', stdout); /* single space btwn. words */
234 do
235 if (++scp >= rp->rec[n+1])
236 break;
237 while (!*scp | isspace(*scp));
238 }
239 } while (scp < rp->rec[n+1]);
240 /* caller adds record sep. */
241 return(1);
242 }
243
244 /* copy a stream to stdout */
245 static int
246 output_stream(FILE *fp)
247 {
248 char buf[8192];
249 ssize_t n;
250
251 if (fp == NULL)
252 return(0);
253 fflush(stdout);
254 while ((n = fread(buf, 1, sizeof(buf), fp)) > 0)
255 if (write(fileno(stdout), buf, n) != n)
256 return(0);
257 return(!ferror(fp));
258 }
259
260 /* get next word from stream, leaving stream on EOL or start of next word */
261 static char *
262 fget_word(char buf[256], FILE *fp)
263 {
264 int c;
265 char *cp;
266 /* skip nul's and white space */
267 while (!(c = getc(fp)) || isspace(c))
268 ;
269 if (c == EOF)
270 return(NULL);
271 cp = buf;
272 do
273 *cp++ = c;
274 while ((c = getc(fp)) != EOF && !isspace(c) && cp < buf+255);
275 *cp = '\0';
276 while (isspace(c) & (c != '\n'))
277 c = getc(fp);
278 if (c != EOF)
279 ungetc(c, fp);
280 return(buf);
281 }
282
283 char *fmtid = NULL; /* format id */
284 int comp_size = 0; /* binary bytes/channel */
285 int n_comp = 0; /* components/record */
286 int ni_columns = 0; /* number of input columns */
287 int ni_rows = 0; /* number of input rows */
288 int no_columns = 0; /* number of output columns */
289 int no_rows = 0; /* number of output rows */
290 int transpose = 0; /* transpose rows & cols? */
291 int i_header = 1; /* input header? */
292 int o_header = 1; /* output header? */
293
294 /* check settings and assign defaults */
295 static int
296 check_sizes()
297 {
298 if (fmtid == NULL) {
299 fmtid = "ascii";
300 } else if (!comp_size) {
301 if (!strcmp(fmtid, "float"))
302 comp_size = sizeof(float);
303 else if (!strcmp(fmtid, "double"))
304 comp_size = sizeof(double);
305 else if (!strcmp(fmtid, "byte"))
306 comp_size = 1;
307 else if (strcmp(fmtid, "ascii")) {
308 fprintf(stderr, "Unsupported format: %s\n", fmtid);
309 return(0);
310 }
311 }
312 if (transpose && (no_rows <= 0) & (no_columns <= 0)) {
313 if (ni_rows > 0) no_columns = ni_rows;
314 if (ni_columns > 0) no_rows = ni_columns;
315 } else if ((no_rows <= 0) & (no_columns > 0) &&
316 !((ni_rows*ni_columns) % no_columns))
317 no_rows = ni_rows*ni_columns/no_columns;
318 if (n_comp <= 0)
319 n_comp = 3;
320 return(1);
321 }
322
323 /* output transposed ASCII or binary data from memory */
324 static int
325 do_transpose(const MEMLOAD *mp)
326 {
327 static const char tabEOL[2] = {'\t','\n'};
328 RECINDEX *rp = NULL;
329 long nrecords;
330 int i, j;
331 /* propogate sizes */
332 if (ni_rows <= 0)
333 ni_rows = no_columns;
334 if (ni_columns <= 0)
335 ni_columns = no_rows;
336 /* get # records (& index) */
337 if (!comp_size) {
338 if ((rp = index_records(mp, n_comp)) == NULL)
339 return(0);
340 if (ni_columns <= 0)
341 ni_columns = count_columns(rp);
342 nrecords = rp->nrecs;
343 } else if ((ni_rows > 0) & (ni_columns > 0)) {
344 nrecords = ni_rows*ni_columns;
345 if (nrecords > mp->len/(n_comp*comp_size)) {
346 fprintf(stderr,
347 "Input too small for specified size and type\n");
348 return(0);
349 }
350 } else
351 nrecords = mp->len/(n_comp*comp_size);
352 /* check sizes */
353 if ((ni_rows <= 0) & (ni_columns > 0))
354 ni_rows = nrecords/ni_columns;
355 if ((ni_columns <= 0) & (ni_rows > 0))
356 ni_columns = nrecords/ni_rows;
357 if (nrecords != ni_rows*ni_columns)
358 goto badspec;
359 if (no_columns <= 0)
360 no_columns = ni_rows;
361 if (no_rows <= 0)
362 no_rows = ni_columns;
363 if ((no_rows != ni_columns) | (no_columns != ni_rows))
364 goto badspec;
365 /* transpose records */
366 for (i = 0; i < no_rows; i++) {
367 for (j = 0; j < no_columns; j++)
368 if (rp != NULL) { /* ASCII output */
369 print_record(rp, j*ni_columns + i);
370 putc(tabEOL[j >= no_columns-1], stdout);
371 } else { /* binary output */
372 putbinary((char *)mp->base +
373 (size_t)(n_comp*comp_size)*(j*ni_columns + i),
374 comp_size, n_comp, stdout);
375 }
376 if (ferror(stdout)) {
377 fprintf(stderr, "Error writing to stdout\n");
378 return(0);
379 }
380 }
381 if (rp != NULL)
382 free_records(rp);
383 return(1);
384 badspec:
385 fprintf(stderr, "Bad transpose specification -- check dimension(s)\n");
386 return(0);
387 }
388
389 /* resize ASCII stream input by ignoring EOLs between records */
390 static int
391 do_resize(FILE *fp)
392 {
393 long records2go = ni_rows*ni_columns;
394 int columns2go = no_columns;
395 char word[256];
396 /* sanity checks */
397 if (comp_size || (no_columns == ni_columns) & (no_rows == ni_rows))
398 return(output_stream(fp)); /* no-op -- just copy */
399 if (no_columns <= 0) {
400 fprintf(stderr, "Missing -oc specification\n");
401 return(0);
402 }
403 if ((records2go <= 0) & (no_rows > 0))
404 records2go = no_rows*no_columns;
405 else if (no_rows*no_columns != records2go) {
406 fprintf(stderr,
407 "Input and output data sizes disagree (%dx%d != %dx%d)\n",
408 ni_rows, ni_columns, no_rows, no_columns);
409 return(0);
410 }
411 do { /* reshape records */
412 int n;
413
414 for (n = n_comp; n--; ) {
415 if (fget_word(word, fp) == NULL) {
416 if (records2go > 0 || n < n_comp-1)
417 break;
418 goto done; /* normal EOD */
419 }
420 fputs(word, stdout);
421 if (n) { /* mid-record? */
422 int c = getc(fp);
423 if ((c == '\n') | (c == EOF))
424 break;
425 ungetc(c, fp);
426 putc(' ', stdout);
427 }
428 }
429 if (n >= 0) {
430 fprintf(stderr, "Incomplete record / unexpected EOF\n");
431 return(0);
432 }
433 if (--columns2go <= 0) { /* time to end output row? */
434 putc('\n', stdout);
435 columns2go = no_columns;
436 } else /* else separate records */
437 putc('\t', stdout);
438 } while (--records2go); /* expected EOD? */
439 done:
440 if (warnings && columns2go != no_columns)
441 fprintf(stderr, "Warning -- incomplete final row\n");
442 if (warnings && fget_word(word, fp) != NULL)
443 fprintf(stderr, "Warning -- characters beyond expected EOD\n");
444 return(1);
445 }
446
447 /* process a header line and copy to stdout */
448 static int
449 headline(char *s, void *p)
450 {
451 static char fmt[MAXFMTLEN];
452 int n;
453
454 if (formatval(fmt, s)) {
455 if (fmtid == NULL) {
456 fmtid = fmt;
457 return(0);
458 }
459 if (!strcmp(fmt, fmtid))
460 return(0);
461 fprintf(stderr, "Input format '%s' != '%s'\n", fmt, fmtid);
462 return(-1);
463 }
464 if (!strncmp(s, "NROWS=", 6)) {
465 n = atoi(s+6);
466 if ((ni_rows > 0) & (n != ni_rows)) {
467 fputs("Incorrect input row count\n", stderr);
468 return(-1);
469 }
470 ni_rows = n;
471 return(0);
472 }
473 if (!strncmp(s, "NCOLS=", 6)) {
474 n = atoi(s+6);
475 if ((ni_columns > 0) & (n != ni_columns)) {
476 fputs("Incorrect input column count\n", stderr);
477 return(-1);
478 }
479 ni_columns = n;
480 return(0);
481 }
482 if (!strncmp(s, "NCOMP=", 6)) {
483 n = atoi(s+6);
484 if ((n_comp > 0) & (n != n_comp)) {
485 fputs("Incorrect number of components\n", stderr);
486 return(-1);
487 }
488 n_comp = n;
489 return(0);
490 }
491 if (o_header)
492 fputs(s, stdout); /* copy header info. */
493 return(0);
494 }
495
496 /* main routine for converting rows/columns in data file */
497 int
498 main(int argc, char *argv[])
499 {
500 int a;
501
502 for (a = 1; a < argc && argv[a][0] == '-'; a++)
503 switch (argv[a][1]) {
504 case 'i': /* input */
505 if (argv[a][2] == 'c') /* columns */
506 ni_columns = atoi(argv[++a]);
507 else if (argv[a][2] == 'r')
508 ni_rows = atoi(argv[++a]);
509 else
510 goto userr;
511 break;
512 case 'o': /* output */
513 if (argv[a][2] == 'c') /* columns */
514 no_columns = atoi(argv[++a]);
515 else if (argv[a][2] == 'r')
516 no_rows = atoi(argv[++a]);
517 else
518 goto userr;
519 break;
520 case 'h': /* turn off header */
521 switch (argv[a][2]) {
522 case 'i':
523 i_header = 0;
524 break;
525 case 'o':
526 o_header = 0;
527 break;
528 case '\0':
529 i_header = o_header = 0;
530 break;
531 default:
532 goto userr;
533 }
534 break;
535 case 't': /* transpose on/off */
536 transpose = !transpose;
537 break;
538 case 'f': /* format */
539 switch (argv[a][2]) {
540 case 'a': /* ASCII */
541 case 'A':
542 fmtid = "ascii";
543 comp_size = 0;
544 break;
545 case 'f': /* float */
546 case 'F':
547 fmtid = "float";
548 comp_size = sizeof(float);
549 break;
550 case 'd': /* double */
551 case 'D':
552 fmtid = "double";
553 comp_size = sizeof(double);
554 break;
555 case 'b': /* binary (bytes) */
556 case 'B':
557 fmtid = "byte";
558 comp_size = 1;
559 break;
560 default:
561 goto userr;
562 }
563 if (argv[a][3]) {
564 if (!isdigit(argv[a][3]))
565 goto userr;
566 n_comp = atoi(argv[a]+3);
567 } else
568 n_comp = 1;
569 break;
570 case 'w': /* warnings on/off */
571 warnings = !warnings;
572 break;
573 default:
574 goto userr;
575 }
576 if (a < argc-1) /* arg count OK? */
577 goto userr;
578 /* open input file? */
579 if (a == argc-1 && freopen(argv[a], "r", stdin) == NULL) {
580 fprintf(stderr, "%s: cannot open for reading\n", argv[a]);
581 return(1);
582 }
583 if (comp_size) {
584 SET_FILE_BINARY(stdin);
585 SET_FILE_BINARY(stdout);
586 }
587 /* check for no-op */
588 if (!transpose & (i_header == o_header) &&
589 (no_columns == ni_columns) & (no_rows == ni_rows)) {
590 if (warnings)
591 fprintf(stderr, "%s: no-op -- copying input verbatim\n",
592 argv[0]);
593 if (!output_stream(stdin))
594 return(1);
595 return(0);
596 }
597 if (i_header) { /* read header */
598 if (getheader(stdin, headline, NULL) < 0)
599 return(1);
600 if (!check_sizes())
601 return(1);
602 if (comp_size) { /* a little late... */
603 SET_FILE_BINARY(stdin);
604 SET_FILE_BINARY(stdout);
605 }
606 } else if (!check_sizes())
607 return(1);
608 if (o_header) { /* write header */
609 printargs(a, argv, stdout);
610 if (no_rows > 0)
611 printf("NROWS=%d\n", no_rows);
612 if (no_columns > 0)
613 printf("NCOLS=%d\n", no_columns);
614 printf("NCOMP=%d\n", n_comp);
615 fputformat(fmtid, stdout);
616 fputc('\n', stdout); /* finish new header */
617 }
618 if (transpose) { /* transposing rows & columns? */
619 MEMLOAD myMem; /* need to map into memory */
620 if (a == argc-1) {
621 if (load_file(&myMem, stdin) <= 0) {
622 fprintf(stderr, "%s: error loading file into memory\n",
623 argv[a]);
624 return(1);
625 }
626 } else if (load_stream(&myMem, stdin) <= 0) {
627 fprintf(stderr, "%s: error loading stdin into memory\n",
628 argv[0]);
629 return(1);
630 }
631 if (!do_transpose(&myMem))
632 return(1);
633 /* free_load(&myMem); about to exit, so don't bother */
634 } else if (!do_resize(stdin)) /* reshaping input */
635 return(1);
636 return(0);
637 userr:
638 fprintf(stderr,
639 "Usage: %s [-h[io]][-w][-f[afdb][N]][-t][-ic in_col][-ir in_row][-oc out_col][-or out_row] [input.dat]\n",
640 argv[0]);
641 return(1);
642 }