ViewVC Help
View File | Revision Log | Show Annotations | Download File | Root Listing
root/radiance/ray/src/util/rcollate.c
Revision: 2.27
Committed: Thu Oct 18 22:59:48 2018 UTC (5 years, 5 months ago) by greg
Content type: text/plain
Branch: MAIN
Changes since 2.26: +14 -7 lines
Log Message:
Fixed issues with very large file i/o

File Contents

# User Rev Content
1 greg 2.1 #ifndef lint
2 greg 2.27 static const char RCSid[] = "$Id: rcollate.c,v 2.26 2018/08/02 18:33:50 greg Exp $";
3 greg 2.1 #endif
4     /*
5     * Utility to re-order records in a binary or ASCII data file (matrix)
6     */
7    
8     #include <stdlib.h>
9     #include <string.h>
10     #include <ctype.h>
11     #include "platform.h"
12     #include "rtio.h"
13     #include "resolu.h"
14 schorsch 2.23 #if defined(_WIN32) || defined(_WIN64)
15     #undef ftello
16     #define ftello ftell
17     #undef ssize_t
18     #define ssize_t size_t
19 greg 2.7 #else
20 schorsch 2.23 #include <sys/mman.h>
21 greg 2.1 #endif
22    
23     typedef struct {
24     void *base; /* pointer to base memory */
25     size_t len; /* allocated memory length */
26     int mapped; /* memory-mapped file? */
27     } MEMLOAD; /* file loaded/mapped into memory */
28    
29     typedef struct {
30     int nw_rec; /* number of words per record */
31     int nrecs; /* number of records we found */
32     char *rec[1]; /* record array (extends struct) */
33     } RECINDEX;
34    
35 greg 2.4 int warnings = 1; /* report warnings? */
36    
37 greg 2.1 /* free loaded file */
38     static void
39     free_load(MEMLOAD *mp)
40     {
41     if (mp == NULL || (mp->base == NULL) | (mp->len <= 0))
42     return;
43     #ifdef MAP_FILE
44     if (mp->mapped)
45     munmap(mp->base, mp->len);
46     else
47     #endif
48     free(mp->base);
49     mp->base = NULL;
50     mp->len = 0;
51     }
52    
53 greg 2.20 /* load memory from an input stream, starting from current position */
54     static int
55     load_stream(MEMLOAD *mp, FILE *fp)
56     {
57     size_t alloced = 0;
58     char buf[8192];
59     size_t nr;
60    
61     if (mp == NULL)
62     return(-1);
63     mp->base = NULL;
64     mp->len = 0;
65     mp->mapped = 0;
66     if (fp == NULL)
67     return(-1);
68     while ((nr = fread(buf, 1, sizeof(buf), fp)) > 0) {
69     if (!alloced)
70 greg 2.21 mp->base = malloc(alloced = nr);
71 greg 2.20 else if (mp->len+nr > alloced)
72     mp->base = realloc(mp->base,
73     alloced = alloced*(2+(nr==sizeof(buf)))/2+nr);
74     if (mp->base == NULL)
75     return(-1);
76     memcpy((char *)mp->base + mp->len, buf, nr);
77     mp->len += nr;
78     }
79     if (ferror(fp)) {
80     free_load(mp);
81     return(-1);
82     }
83     if (alloced > mp->len*5/4) /* don't waste too much space */
84     mp->base = realloc(mp->base, mp->len);
85     return(mp->len > 0);
86     }
87    
88 greg 2.1 /* load a file into memory */
89     static int
90     load_file(MEMLOAD *mp, FILE *fp)
91     {
92     int fd;
93 greg 2.27 off_t skip, flen, skipped;
94 greg 2.1
95 schorsch 2.23 #if defined(_WIN32) || defined(_WIN64)
96     /* too difficult to fix this */
97 greg 2.20 return load_stream(mp, fp);
98     #endif
99 greg 2.1 if (mp == NULL)
100     return(-1);
101     mp->base = NULL;
102     mp->len = 0;
103     mp->mapped = 0;
104     if (fp == NULL)
105     return(-1);
106     fd = fileno(fp);
107     skip = ftello(fp);
108     flen = lseek(fd, 0, SEEK_END);
109     if (flen <= skip)
110     return((int)(flen - skip));
111     mp->len = (size_t)(flen - skip);
112     #ifdef MAP_FILE
113     if (mp->len > 1L<<20) { /* map file if > 1 MByte */
114 greg 2.27 mp->base = mmap(NULL, flen, PROT_READ, MAP_PRIVATE, fd, 0);
115 greg 2.1 if (mp->base != MAP_FAILED) {
116 greg 2.27 mp->base = (char *)mp->base + skip;
117 greg 2.1 mp->mapped = 1;
118     return(1); /* mmap() success */
119     }
120     mp->base = NULL; /* fall back to reading it in... */
121     }
122     #endif
123     if (lseek(fd, skip, SEEK_SET) != skip ||
124     (mp->base = malloc(mp->len)) == NULL) {
125     mp->len = 0;
126     return(-1);
127     }
128 greg 2.27 skipped = skip; /* read() fails on really big buffers */
129     while (skipped < flen) {
130     ssize_t nread = read(fd, (char *)mp->base+(skipped-skip),
131     (flen-skipped <= 1L<<30) ? flen-skipped : 1L<<30);
132     if (nread <= 0) {
133     free_load(mp);
134     return(-1);
135     }
136     skipped += nread;
137 greg 2.1 }
138     return(1);
139     }
140    
141     /* free a record index */
142     #define free_records(rp) free(rp)
143    
144     /* compute record index */
145     static RECINDEX *
146     index_records(const MEMLOAD *mp, int nw_rec)
147     {
148 greg 2.24 int nall = 0;
149 greg 2.1 RECINDEX *rp;
150     char *cp, *mend;
151     int n;
152    
153     if (mp == NULL || (mp->base == NULL) | (mp->len <= 0))
154     return(NULL);
155     if (nw_rec <= 0)
156     return(NULL);
157 greg 2.24 nall = 1000;
158     rp = (RECINDEX *)malloc(sizeof(RECINDEX) + nall*sizeof(char *));
159 greg 2.1 if (rp == NULL)
160     return(NULL);
161     rp->nw_rec = nw_rec;
162     rp->nrecs = 0;
163     cp = (char *)mp->base;
164     mend = cp + mp->len;
165     for ( ; ; ) { /* whitespace-separated words */
166     while (cp < mend && !*cp | isspace(*cp))
167     ++cp;
168     if (cp >= mend)
169     break;
170 greg 2.24 if (rp->nrecs >= nall) {
171     nall += nall>>1; /* get more record space */
172     rp = (RECINDEX *)realloc(rp,
173     sizeof(RECINDEX) + nall*sizeof(char *));
174     if (rp == NULL)
175     return(NULL);
176     }
177 greg 2.1 rp->rec[rp->nrecs++] = cp; /* point to first non-white */
178     n = rp->nw_rec;
179     while (++cp < mend) /* find end of record */
180     if (!*cp | isspace(*cp)) {
181     if (--n <= 0)
182     break; /* got requisite # words */
183     do { /* else find next word */
184     if (*cp == '\n') {
185     fprintf(stderr,
186     "Unexpected EOL in record!\n");
187     free_records(rp);
188     return(NULL);
189     }
190     if (++cp >= mend)
191     break;
192     } while (!*cp | isspace(*cp));
193     }
194     }
195     rp->rec[rp->nrecs] = mend; /* reallocate to save space */
196     rp = (RECINDEX *)realloc(rp,
197     sizeof(RECINDEX) + rp->nrecs*sizeof(char *));
198     return(rp);
199     }
200    
201     /* count number of columns based on first EOL */
202     static int
203     count_columns(const RECINDEX *rp)
204     {
205     char *cp = rp->rec[0];
206     char *mend = rp->rec[rp->nrecs];
207     int i;
208    
209     while (*cp != '\n')
210     if (++cp >= mend)
211     return(0);
212     for (i = 0; i < rp->nrecs; i++)
213     if (rp->rec[i] >= cp)
214     break;
215     return(i);
216     }
217    
218     /* copy nth record from index to stdout */
219     static int
220     print_record(const RECINDEX *rp, int n)
221     {
222     int words2go = rp->nw_rec;
223     char *scp;
224    
225     if ((n < 0) | (n >= rp->nrecs))
226     return(0);
227     scp = rp->rec[n];
228     do {
229     putc(*scp++, stdout);
230     if (!*scp | isspace(*scp)) {
231     if (--words2go <= 0)
232     break;
233     putc(' ', stdout); /* single space btwn. words */
234     do
235     if (++scp >= rp->rec[n+1])
236     break;
237     while (!*scp | isspace(*scp));
238     }
239     } while (scp < rp->rec[n+1]);
240     /* caller adds record sep. */
241     return(1);
242     }
243    
244     /* copy a stream to stdout */
245     static int
246     output_stream(FILE *fp)
247     {
248     char buf[8192];
249     ssize_t n;
250    
251     if (fp == NULL)
252     return(0);
253 greg 2.13 fflush(stdout);
254     while ((n = fread(buf, 1, sizeof(buf), fp)) > 0)
255 greg 2.1 if (write(fileno(stdout), buf, n) != n)
256     return(0);
257 greg 2.13 return(!ferror(fp));
258 greg 2.1 }
259    
260     /* get next word from stream, leaving stream on EOL or start of next word */
261     static char *
262     fget_word(char buf[256], FILE *fp)
263     {
264     int c;
265     char *cp;
266     /* skip nul's and white space */
267     while (!(c = getc(fp)) || isspace(c))
268     ;
269     if (c == EOF)
270     return(NULL);
271     cp = buf;
272     do
273     *cp++ = c;
274     while ((c = getc(fp)) != EOF && !isspace(c) && cp < buf+255);
275     *cp = '\0';
276     while (isspace(c) & (c != '\n'))
277     c = getc(fp);
278     if (c != EOF)
279     ungetc(c, fp);
280     return(buf);
281     }
282    
283 greg 2.9 char *fmtid = NULL; /* format id */
284     int comp_size = 0; /* binary bytes/channel */
285     int n_comp = 0; /* components/record */
286 greg 2.1 int ni_columns = 0; /* number of input columns */
287     int ni_rows = 0; /* number of input rows */
288     int no_columns = 0; /* number of output columns */
289     int no_rows = 0; /* number of output rows */
290 greg 2.16 int transpose = 0; /* transpose rows & cols? */
291     int i_header = 1; /* input header? */
292     int o_header = 1; /* output header? */
293 greg 2.1
294 greg 2.9 /* check settings and assign defaults */
295 greg 2.11 static int
296 greg 2.9 check_sizes()
297     {
298     if (fmtid == NULL) {
299     fmtid = "ascii";
300     } else if (!comp_size) {
301     if (!strcmp(fmtid, "float"))
302     comp_size = sizeof(float);
303     else if (!strcmp(fmtid, "double"))
304     comp_size = sizeof(double);
305     else if (!strcmp(fmtid, "byte"))
306     comp_size = 1;
307 greg 2.14 else if (strcmp(fmtid, "ascii")) {
308 greg 2.11 fprintf(stderr, "Unsupported format: %s\n", fmtid);
309     return(0);
310     }
311 greg 2.9 }
312 greg 2.16 if (transpose && (no_rows <= 0) & (no_columns <= 0)) {
313     if (ni_rows > 0) no_columns = ni_rows;
314     if (ni_columns > 0) no_rows = ni_columns;
315     } else if ((no_rows <= 0) & (no_columns > 0) &&
316     !((ni_rows*ni_columns) % no_columns))
317     no_rows = ni_rows*ni_columns/no_columns;
318 greg 2.9 if (n_comp <= 0)
319     n_comp = 3;
320 greg 2.11 return(1);
321 greg 2.9 }
322    
323 greg 2.1 /* output transposed ASCII or binary data from memory */
324     static int
325     do_transpose(const MEMLOAD *mp)
326     {
327     static const char tabEOL[2] = {'\t','\n'};
328     RECINDEX *rp = NULL;
329     long nrecords;
330     int i, j;
331 greg 2.5 /* propogate sizes */
332     if (ni_rows <= 0)
333     ni_rows = no_columns;
334     if (ni_columns <= 0)
335     ni_columns = no_rows;
336 greg 2.1 /* get # records (& index) */
337 greg 2.9 if (!comp_size) {
338     if ((rp = index_records(mp, n_comp)) == NULL)
339 greg 2.1 return(0);
340     if (ni_columns <= 0)
341     ni_columns = count_columns(rp);
342     nrecords = rp->nrecs;
343 greg 2.3 } else if ((ni_rows > 0) & (ni_columns > 0)) {
344 greg 2.1 nrecords = ni_rows*ni_columns;
345 greg 2.9 if (nrecords > mp->len/(n_comp*comp_size)) {
346 greg 2.3 fprintf(stderr,
347     "Input too small for specified size and type\n");
348     return(0);
349     }
350     } else
351 greg 2.9 nrecords = mp->len/(n_comp*comp_size);
352 greg 2.1 /* check sizes */
353     if ((ni_rows <= 0) & (ni_columns > 0))
354     ni_rows = nrecords/ni_columns;
355     if ((ni_columns <= 0) & (ni_rows > 0))
356     ni_columns = nrecords/ni_rows;
357     if (nrecords != ni_rows*ni_columns)
358     goto badspec;
359     if (no_columns <= 0)
360     no_columns = ni_rows;
361     if (no_rows <= 0)
362     no_rows = ni_columns;
363     if ((no_rows != ni_columns) | (no_columns != ni_rows))
364     goto badspec;
365     /* transpose records */
366     for (i = 0; i < no_rows; i++) {
367     for (j = 0; j < no_columns; j++)
368     if (rp != NULL) { /* ASCII output */
369     print_record(rp, j*ni_columns + i);
370     putc(tabEOL[j >= no_columns-1], stdout);
371     } else { /* binary output */
372 greg 2.25 putbinary((char *)mp->base +
373 greg 2.27 (unsigned long)(n_comp*comp_size)*(j*ni_columns + i),
374 greg 2.25 comp_size, n_comp, stdout);
375 greg 2.1 }
376     if (ferror(stdout)) {
377     fprintf(stderr, "Error writing to stdout\n");
378     return(0);
379     }
380     }
381     if (rp != NULL)
382     free_records(rp);
383     return(1);
384     badspec:
385     fprintf(stderr, "Bad transpose specification -- check dimension(s)\n");
386     return(0);
387     }
388    
389     /* resize ASCII stream input by ignoring EOLs between records */
390     static int
391     do_resize(FILE *fp)
392     {
393     long records2go = ni_rows*ni_columns;
394     int columns2go = no_columns;
395     char word[256];
396     /* sanity checks */
397 greg 2.19 if (comp_size || (no_columns == ni_columns) & (no_rows == ni_rows))
398     return(output_stream(fp)); /* no-op -- just copy */
399 greg 2.1 if (no_columns <= 0) {
400     fprintf(stderr, "Missing -oc specification\n");
401     return(0);
402     }
403     if ((records2go <= 0) & (no_rows > 0))
404     records2go = no_rows*no_columns;
405     else if (no_rows*no_columns != records2go) {
406     fprintf(stderr,
407     "Input and output data sizes disagree (%dx%d != %dx%d)\n",
408     ni_rows, ni_columns, no_rows, no_columns);
409     return(0);
410     }
411     do { /* reshape records */
412     int n;
413    
414 greg 2.9 for (n = n_comp; n--; ) {
415 greg 2.1 if (fget_word(word, fp) == NULL) {
416 greg 2.9 if (records2go > 0 || n < n_comp-1)
417 greg 2.1 break;
418     goto done; /* normal EOD */
419     }
420     fputs(word, stdout);
421     if (n) { /* mid-record? */
422     int c = getc(fp);
423     if ((c == '\n') | (c == EOF))
424     break;
425     ungetc(c, fp);
426     putc(' ', stdout);
427     }
428     }
429     if (n >= 0) {
430     fprintf(stderr, "Incomplete record / unexpected EOF\n");
431     return(0);
432     }
433     if (--columns2go <= 0) { /* time to end output row? */
434     putc('\n', stdout);
435     columns2go = no_columns;
436     } else /* else separate records */
437     putc('\t', stdout);
438     } while (--records2go); /* expected EOD? */
439     done:
440 greg 2.4 if (warnings && columns2go != no_columns)
441 greg 2.1 fprintf(stderr, "Warning -- incomplete final row\n");
442 greg 2.4 if (warnings && fget_word(word, fp) != NULL)
443     fprintf(stderr, "Warning -- characters beyond expected EOD\n");
444 greg 2.1 return(1);
445     }
446    
447     /* process a header line and copy to stdout */
448     static int
449     headline(char *s, void *p)
450     {
451 greg 2.26 static char fmt[MAXFMTLEN];
452 greg 2.9 int n;
453 greg 2.1
454     if (formatval(fmt, s)) {
455 greg 2.9 if (fmtid == NULL) {
456     fmtid = fmt;
457     return(0);
458     }
459 greg 2.1 if (!strcmp(fmt, fmtid))
460     return(0);
461     fprintf(stderr, "Input format '%s' != '%s'\n", fmt, fmtid);
462     return(-1);
463     }
464 greg 2.9 if (!strncmp(s, "NROWS=", 6)) {
465     n = atoi(s+6);
466     if ((ni_rows > 0) & (n != ni_rows)) {
467     fputs("Incorrect input row count\n", stderr);
468     return(-1);
469     }
470     ni_rows = n;
471     return(0);
472     }
473     if (!strncmp(s, "NCOLS=", 6)) {
474     n = atoi(s+6);
475     if ((ni_columns > 0) & (n != ni_columns)) {
476     fputs("Incorrect input column count\n", stderr);
477     return(-1);
478     }
479     ni_columns = n;
480     return(0);
481     }
482     if (!strncmp(s, "NCOMP=", 6)) {
483     n = atoi(s+6);
484     if ((n_comp > 0) & (n != n_comp)) {
485 greg 2.14 fputs("Incorrect number of components\n", stderr);
486 greg 2.9 return(-1);
487     }
488     n_comp = n;
489     return(0);
490     }
491 greg 2.16 if (o_header)
492     fputs(s, stdout); /* copy header info. */
493 greg 2.1 return(0);
494     }
495    
496     /* main routine for converting rows/columns in data file */
497     int
498     main(int argc, char *argv[])
499     {
500 greg 2.14 int a;
501 greg 2.1
502 greg 2.14 for (a = 1; a < argc && argv[a][0] == '-'; a++)
503     switch (argv[a][1]) {
504 greg 2.1 case 'i': /* input */
505 greg 2.14 if (argv[a][2] == 'c') /* columns */
506     ni_columns = atoi(argv[++a]);
507     else if (argv[a][2] == 'r')
508     ni_rows = atoi(argv[++a]);
509 greg 2.1 else
510     goto userr;
511     break;
512     case 'o': /* output */
513 greg 2.14 if (argv[a][2] == 'c') /* columns */
514     no_columns = atoi(argv[++a]);
515     else if (argv[a][2] == 'r')
516     no_rows = atoi(argv[++a]);
517 greg 2.1 else
518     goto userr;
519     break;
520 greg 2.10 case 'h': /* turn off header */
521 greg 2.14 switch (argv[a][2]) {
522 greg 2.10 case 'i':
523     i_header = 0;
524     break;
525     case 'o':
526     o_header = 0;
527     break;
528     case '\0':
529     i_header = o_header = 0;
530     break;
531     default:
532     goto userr;
533     }
534 greg 2.1 break;
535     case 't': /* transpose on/off */
536     transpose = !transpose;
537     break;
538     case 'f': /* format */
539 greg 2.14 switch (argv[a][2]) {
540 greg 2.1 case 'a': /* ASCII */
541     case 'A':
542     fmtid = "ascii";
543 greg 2.9 comp_size = 0;
544 greg 2.1 break;
545     case 'f': /* float */
546     case 'F':
547     fmtid = "float";
548 greg 2.9 comp_size = sizeof(float);
549 greg 2.1 break;
550     case 'd': /* double */
551     case 'D':
552     fmtid = "double";
553 greg 2.9 comp_size = sizeof(double);
554 greg 2.1 break;
555     case 'b': /* binary (bytes) */
556     case 'B':
557     fmtid = "byte";
558 greg 2.9 comp_size = 1;
559 greg 2.1 break;
560     default:
561     goto userr;
562     }
563 greg 2.14 if (argv[a][3]) {
564     if (!isdigit(argv[a][3]))
565 greg 2.1 goto userr;
566 greg 2.14 n_comp = atoi(argv[a]+3);
567     } else
568     n_comp = 1;
569 greg 2.1 break;
570 greg 2.4 case 'w': /* warnings on/off */
571     warnings = !warnings;
572     break;
573 greg 2.1 default:
574     goto userr;
575     }
576 greg 2.14 if (a < argc-1) /* arg count OK? */
577 greg 2.1 goto userr;
578     /* open input file? */
579 greg 2.14 if (a == argc-1 && freopen(argv[a], "r", stdin) == NULL) {
580     fprintf(stderr, "%s: cannot open for reading\n", argv[a]);
581 greg 2.1 return(1);
582     }
583 greg 2.9 if (comp_size) {
584 greg 2.1 SET_FILE_BINARY(stdin);
585     SET_FILE_BINARY(stdout);
586     }
587     /* check for no-op */
588 greg 2.19 if (!transpose & (i_header == o_header) &&
589     (no_columns == ni_columns) & (no_rows == ni_rows)) {
590 greg 2.4 if (warnings)
591     fprintf(stderr, "%s: no-op -- copying input verbatim\n",
592 greg 2.1 argv[0]);
593     if (!output_stream(stdin))
594     return(1);
595     return(0);
596     }
597 greg 2.10 if (i_header) { /* read header */
598 greg 2.15 if (getheader(stdin, headline, NULL) < 0)
599 greg 2.1 return(1);
600 greg 2.11 if (!check_sizes())
601     return(1);
602 greg 2.9 if (comp_size) { /* a little late... */
603     SET_FILE_BINARY(stdin);
604     SET_FILE_BINARY(stdout);
605     }
606 greg 2.11 } else if (!check_sizes())
607     return(1);
608 greg 2.10 if (o_header) { /* write header */
609 greg 2.14 printargs(a, argv, stdout);
610 greg 2.9 if (no_rows > 0)
611     printf("NROWS=%d\n", no_rows);
612     if (no_columns > 0)
613     printf("NCOLS=%d\n", no_columns);
614     printf("NCOMP=%d\n", n_comp);
615 greg 2.1 fputformat(fmtid, stdout);
616     fputc('\n', stdout); /* finish new header */
617 greg 2.10 }
618 greg 2.1 if (transpose) { /* transposing rows & columns? */
619 greg 2.19 MEMLOAD myMem; /* need to map into memory */
620 greg 2.14 if (a == argc-1) {
621 greg 2.1 if (load_file(&myMem, stdin) <= 0) {
622     fprintf(stderr, "%s: error loading file into memory\n",
623 greg 2.14 argv[a]);
624 greg 2.1 return(1);
625     }
626     } else if (load_stream(&myMem, stdin) <= 0) {
627     fprintf(stderr, "%s: error loading stdin into memory\n",
628     argv[0]);
629     return(1);
630     }
631     if (!do_transpose(&myMem))
632     return(1);
633 greg 2.17 /* free_load(&myMem); about to exit, so don't bother */
634     } else if (!do_resize(stdin)) /* reshaping input */
635 greg 2.1 return(1);
636     return(0);
637     userr:
638     fprintf(stderr,
639 greg 2.10 "Usage: %s [-h[io]][-w][-f[afdb][N]][-t][-ic in_col][-ir in_row][-oc out_col][-or out_row] [input.dat]\n",
640 greg 2.1 argv[0]);
641     return(1);
642     }