ViewVC Help
View File | Revision Log | Show Annotations | Download File | Root Listing
root/radiance/ray/src/util/rcollate.c
Revision: 2.24
Committed: Sat Apr 16 00:42:16 2016 UTC (8 years ago) by greg
Content type: text/plain
Branch: MAIN
Changes since 2.23: +11 -2 lines
Log Message:
Made memory allocation in index_records less greedy

File Contents

# User Rev Content
1 greg 2.1 #ifndef lint
2 greg 2.24 static const char RCSid[] = "$Id: rcollate.c,v 2.23 2016/03/06 01:13:18 schorsch Exp $";
3 greg 2.1 #endif
4     /*
5     * Utility to re-order records in a binary or ASCII data file (matrix)
6     */
7    
8     #include <stdlib.h>
9     #include <string.h>
10     #include <ctype.h>
11     #include "platform.h"
12     #include "rtio.h"
13     #include "resolu.h"
14 schorsch 2.23 #if defined(_WIN32) || defined(_WIN64)
15     #undef ftello
16     #define ftello ftell
17     #undef ssize_t
18     #define ssize_t size_t
19 greg 2.7 #else
20 schorsch 2.23 #include <sys/mman.h>
21 greg 2.1 #endif
22    
23     typedef struct {
24     void *base; /* pointer to base memory */
25     size_t len; /* allocated memory length */
26     int mapped; /* memory-mapped file? */
27     } MEMLOAD; /* file loaded/mapped into memory */
28    
29     typedef struct {
30     int nw_rec; /* number of words per record */
31     int nrecs; /* number of records we found */
32     char *rec[1]; /* record array (extends struct) */
33     } RECINDEX;
34    
35 greg 2.4 int warnings = 1; /* report warnings? */
36    
37 greg 2.1 /* free loaded file */
38     static void
39     free_load(MEMLOAD *mp)
40     {
41     if (mp == NULL || (mp->base == NULL) | (mp->len <= 0))
42     return;
43     #ifdef MAP_FILE
44     if (mp->mapped)
45     munmap(mp->base, mp->len);
46     else
47     #endif
48     free(mp->base);
49     mp->base = NULL;
50     mp->len = 0;
51     }
52    
53 greg 2.20 /* load memory from an input stream, starting from current position */
54     static int
55     load_stream(MEMLOAD *mp, FILE *fp)
56     {
57     size_t alloced = 0;
58     char buf[8192];
59     size_t nr;
60    
61     if (mp == NULL)
62     return(-1);
63     mp->base = NULL;
64     mp->len = 0;
65     mp->mapped = 0;
66     if (fp == NULL)
67     return(-1);
68     while ((nr = fread(buf, 1, sizeof(buf), fp)) > 0) {
69     if (!alloced)
70 greg 2.21 mp->base = malloc(alloced = nr);
71 greg 2.20 else if (mp->len+nr > alloced)
72     mp->base = realloc(mp->base,
73     alloced = alloced*(2+(nr==sizeof(buf)))/2+nr);
74     if (mp->base == NULL)
75     return(-1);
76     memcpy((char *)mp->base + mp->len, buf, nr);
77     mp->len += nr;
78     }
79     if (ferror(fp)) {
80     free_load(mp);
81     return(-1);
82     }
83     if (alloced > mp->len*5/4) /* don't waste too much space */
84     mp->base = realloc(mp->base, mp->len);
85     return(mp->len > 0);
86     }
87    
88 greg 2.1 /* load a file into memory */
89     static int
90     load_file(MEMLOAD *mp, FILE *fp)
91     {
92     int fd;
93     off_t skip, flen;
94    
95 schorsch 2.23 #if defined(_WIN32) || defined(_WIN64)
96     /* too difficult to fix this */
97 greg 2.20 return load_stream(mp, fp);
98     #endif
99 greg 2.1 if (mp == NULL)
100     return(-1);
101     mp->base = NULL;
102     mp->len = 0;
103     mp->mapped = 0;
104     if (fp == NULL)
105     return(-1);
106     fd = fileno(fp);
107     skip = ftello(fp);
108     flen = lseek(fd, 0, SEEK_END);
109     if (flen <= skip)
110     return((int)(flen - skip));
111     mp->len = (size_t)(flen - skip);
112     #ifdef MAP_FILE
113     if (mp->len > 1L<<20) { /* map file if > 1 MByte */
114 greg 2.2 mp->base = mmap(NULL, mp->len, PROT_READ, MAP_PRIVATE, fd, skip);
115 greg 2.1 if (mp->base != MAP_FAILED) {
116     mp->mapped = 1;
117     return(1); /* mmap() success */
118     }
119     mp->base = NULL; /* fall back to reading it in... */
120     }
121     #endif
122     if (lseek(fd, skip, SEEK_SET) != skip ||
123     (mp->base = malloc(mp->len)) == NULL) {
124     mp->len = 0;
125     return(-1);
126     }
127     if (read(fd, (char *)mp->base, mp->len) != mp->len) {
128     free_load(mp);
129     return(-1);
130     }
131     return(1);
132     }
133    
134     /* free a record index */
135     #define free_records(rp) free(rp)
136    
137     /* compute record index */
138     static RECINDEX *
139     index_records(const MEMLOAD *mp, int nw_rec)
140     {
141 greg 2.24 int nall = 0;
142 greg 2.1 RECINDEX *rp;
143     char *cp, *mend;
144     int n;
145    
146     if (mp == NULL || (mp->base == NULL) | (mp->len <= 0))
147     return(NULL);
148     if (nw_rec <= 0)
149     return(NULL);
150 greg 2.24 nall = 1000;
151     rp = (RECINDEX *)malloc(sizeof(RECINDEX) + nall*sizeof(char *));
152 greg 2.1 if (rp == NULL)
153     return(NULL);
154     rp->nw_rec = nw_rec;
155     rp->nrecs = 0;
156     cp = (char *)mp->base;
157     mend = cp + mp->len;
158     for ( ; ; ) { /* whitespace-separated words */
159     while (cp < mend && !*cp | isspace(*cp))
160     ++cp;
161     if (cp >= mend)
162     break;
163 greg 2.24 if (rp->nrecs >= nall) {
164     nall += nall>>1; /* get more record space */
165     rp = (RECINDEX *)realloc(rp,
166     sizeof(RECINDEX) + nall*sizeof(char *));
167     if (rp == NULL)
168     return(NULL);
169     }
170 greg 2.1 rp->rec[rp->nrecs++] = cp; /* point to first non-white */
171     n = rp->nw_rec;
172     while (++cp < mend) /* find end of record */
173     if (!*cp | isspace(*cp)) {
174     if (--n <= 0)
175     break; /* got requisite # words */
176     do { /* else find next word */
177     if (*cp == '\n') {
178     fprintf(stderr,
179     "Unexpected EOL in record!\n");
180     free_records(rp);
181     return(NULL);
182     }
183     if (++cp >= mend)
184     break;
185     } while (!*cp | isspace(*cp));
186     }
187     }
188     rp->rec[rp->nrecs] = mend; /* reallocate to save space */
189     rp = (RECINDEX *)realloc(rp,
190     sizeof(RECINDEX) + rp->nrecs*sizeof(char *));
191     return(rp);
192     }
193    
194     /* count number of columns based on first EOL */
195     static int
196     count_columns(const RECINDEX *rp)
197     {
198     char *cp = rp->rec[0];
199     char *mend = rp->rec[rp->nrecs];
200     int i;
201    
202     while (*cp != '\n')
203     if (++cp >= mend)
204     return(0);
205     for (i = 0; i < rp->nrecs; i++)
206     if (rp->rec[i] >= cp)
207     break;
208     return(i);
209     }
210    
211     /* copy nth record from index to stdout */
212     static int
213     print_record(const RECINDEX *rp, int n)
214     {
215     int words2go = rp->nw_rec;
216     char *scp;
217    
218     if ((n < 0) | (n >= rp->nrecs))
219     return(0);
220     scp = rp->rec[n];
221     do {
222     putc(*scp++, stdout);
223     if (!*scp | isspace(*scp)) {
224     if (--words2go <= 0)
225     break;
226     putc(' ', stdout); /* single space btwn. words */
227     do
228     if (++scp >= rp->rec[n+1])
229     break;
230     while (!*scp | isspace(*scp));
231     }
232     } while (scp < rp->rec[n+1]);
233     /* caller adds record sep. */
234     return(1);
235     }
236    
237     /* copy a stream to stdout */
238     static int
239     output_stream(FILE *fp)
240     {
241     char buf[8192];
242     ssize_t n;
243    
244     if (fp == NULL)
245     return(0);
246 greg 2.13 fflush(stdout);
247     while ((n = fread(buf, 1, sizeof(buf), fp)) > 0)
248 greg 2.1 if (write(fileno(stdout), buf, n) != n)
249     return(0);
250 greg 2.13 return(!ferror(fp));
251 greg 2.1 }
252    
253     /* get next word from stream, leaving stream on EOL or start of next word */
254     static char *
255     fget_word(char buf[256], FILE *fp)
256     {
257     int c;
258     char *cp;
259     /* skip nul's and white space */
260     while (!(c = getc(fp)) || isspace(c))
261     ;
262     if (c == EOF)
263     return(NULL);
264     cp = buf;
265     do
266     *cp++ = c;
267     while ((c = getc(fp)) != EOF && !isspace(c) && cp < buf+255);
268     *cp = '\0';
269     while (isspace(c) & (c != '\n'))
270     c = getc(fp);
271     if (c != EOF)
272     ungetc(c, fp);
273     return(buf);
274     }
275    
276 greg 2.9 char *fmtid = NULL; /* format id */
277     int comp_size = 0; /* binary bytes/channel */
278     int n_comp = 0; /* components/record */
279 greg 2.1 int ni_columns = 0; /* number of input columns */
280     int ni_rows = 0; /* number of input rows */
281     int no_columns = 0; /* number of output columns */
282     int no_rows = 0; /* number of output rows */
283 greg 2.16 int transpose = 0; /* transpose rows & cols? */
284     int i_header = 1; /* input header? */
285     int o_header = 1; /* output header? */
286 greg 2.1
287 greg 2.9 /* check settings and assign defaults */
288 greg 2.11 static int
289 greg 2.9 check_sizes()
290     {
291     if (fmtid == NULL) {
292     fmtid = "ascii";
293     } else if (!comp_size) {
294     if (!strcmp(fmtid, "float"))
295     comp_size = sizeof(float);
296     else if (!strcmp(fmtid, "double"))
297     comp_size = sizeof(double);
298     else if (!strcmp(fmtid, "byte"))
299     comp_size = 1;
300 greg 2.14 else if (strcmp(fmtid, "ascii")) {
301 greg 2.11 fprintf(stderr, "Unsupported format: %s\n", fmtid);
302     return(0);
303     }
304 greg 2.9 }
305 greg 2.16 if (transpose && (no_rows <= 0) & (no_columns <= 0)) {
306     if (ni_rows > 0) no_columns = ni_rows;
307     if (ni_columns > 0) no_rows = ni_columns;
308     } else if ((no_rows <= 0) & (no_columns > 0) &&
309     !((ni_rows*ni_columns) % no_columns))
310     no_rows = ni_rows*ni_columns/no_columns;
311 greg 2.9 if (n_comp <= 0)
312     n_comp = 3;
313 greg 2.11 return(1);
314 greg 2.9 }
315    
316 greg 2.1 /* output transposed ASCII or binary data from memory */
317     static int
318     do_transpose(const MEMLOAD *mp)
319     {
320     static const char tabEOL[2] = {'\t','\n'};
321     RECINDEX *rp = NULL;
322     long nrecords;
323     int i, j;
324 greg 2.5 /* propogate sizes */
325     if (ni_rows <= 0)
326     ni_rows = no_columns;
327     if (ni_columns <= 0)
328     ni_columns = no_rows;
329 greg 2.1 /* get # records (& index) */
330 greg 2.9 if (!comp_size) {
331     if ((rp = index_records(mp, n_comp)) == NULL)
332 greg 2.1 return(0);
333     if (ni_columns <= 0)
334     ni_columns = count_columns(rp);
335     nrecords = rp->nrecs;
336 greg 2.3 } else if ((ni_rows > 0) & (ni_columns > 0)) {
337 greg 2.1 nrecords = ni_rows*ni_columns;
338 greg 2.9 if (nrecords > mp->len/(n_comp*comp_size)) {
339 greg 2.3 fprintf(stderr,
340     "Input too small for specified size and type\n");
341     return(0);
342     }
343     } else
344 greg 2.9 nrecords = mp->len/(n_comp*comp_size);
345 greg 2.1 /* check sizes */
346     if ((ni_rows <= 0) & (ni_columns > 0))
347     ni_rows = nrecords/ni_columns;
348     if ((ni_columns <= 0) & (ni_rows > 0))
349     ni_columns = nrecords/ni_rows;
350     if (nrecords != ni_rows*ni_columns)
351     goto badspec;
352     if (no_columns <= 0)
353     no_columns = ni_rows;
354     if (no_rows <= 0)
355     no_rows = ni_columns;
356     if ((no_rows != ni_columns) | (no_columns != ni_rows))
357     goto badspec;
358     /* transpose records */
359     for (i = 0; i < no_rows; i++) {
360     for (j = 0; j < no_columns; j++)
361     if (rp != NULL) { /* ASCII output */
362     print_record(rp, j*ni_columns + i);
363     putc(tabEOL[j >= no_columns-1], stdout);
364     } else { /* binary output */
365     fwrite((char *)mp->base +
366 greg 2.9 (n_comp*comp_size)*(j*ni_columns + i),
367     n_comp*comp_size, 1, stdout);
368 greg 2.1 }
369     if (ferror(stdout)) {
370     fprintf(stderr, "Error writing to stdout\n");
371     return(0);
372     }
373     }
374     if (rp != NULL)
375     free_records(rp);
376     return(1);
377     badspec:
378     fprintf(stderr, "Bad transpose specification -- check dimension(s)\n");
379     return(0);
380     }
381    
382     /* resize ASCII stream input by ignoring EOLs between records */
383     static int
384     do_resize(FILE *fp)
385     {
386     long records2go = ni_rows*ni_columns;
387     int columns2go = no_columns;
388     char word[256];
389     /* sanity checks */
390 greg 2.19 if (comp_size || (no_columns == ni_columns) & (no_rows == ni_rows))
391     return(output_stream(fp)); /* no-op -- just copy */
392 greg 2.1 if (no_columns <= 0) {
393     fprintf(stderr, "Missing -oc specification\n");
394     return(0);
395     }
396     if ((records2go <= 0) & (no_rows > 0))
397     records2go = no_rows*no_columns;
398     else if (no_rows*no_columns != records2go) {
399     fprintf(stderr,
400     "Input and output data sizes disagree (%dx%d != %dx%d)\n",
401     ni_rows, ni_columns, no_rows, no_columns);
402     return(0);
403     }
404     do { /* reshape records */
405     int n;
406    
407 greg 2.9 for (n = n_comp; n--; ) {
408 greg 2.1 if (fget_word(word, fp) == NULL) {
409 greg 2.9 if (records2go > 0 || n < n_comp-1)
410 greg 2.1 break;
411     goto done; /* normal EOD */
412     }
413     fputs(word, stdout);
414     if (n) { /* mid-record? */
415     int c = getc(fp);
416     if ((c == '\n') | (c == EOF))
417     break;
418     ungetc(c, fp);
419     putc(' ', stdout);
420     }
421     }
422     if (n >= 0) {
423     fprintf(stderr, "Incomplete record / unexpected EOF\n");
424     return(0);
425     }
426     if (--columns2go <= 0) { /* time to end output row? */
427     putc('\n', stdout);
428     columns2go = no_columns;
429     } else /* else separate records */
430     putc('\t', stdout);
431     } while (--records2go); /* expected EOD? */
432     done:
433 greg 2.4 if (warnings && columns2go != no_columns)
434 greg 2.1 fprintf(stderr, "Warning -- incomplete final row\n");
435 greg 2.4 if (warnings && fget_word(word, fp) != NULL)
436     fprintf(stderr, "Warning -- characters beyond expected EOD\n");
437 greg 2.1 return(1);
438     }
439    
440     /* process a header line and copy to stdout */
441     static int
442     headline(char *s, void *p)
443     {
444 greg 2.9 static char fmt[32];
445     int n;
446 greg 2.1
447     if (formatval(fmt, s)) {
448 greg 2.9 if (fmtid == NULL) {
449     fmtid = fmt;
450     return(0);
451     }
452 greg 2.1 if (!strcmp(fmt, fmtid))
453     return(0);
454     fprintf(stderr, "Input format '%s' != '%s'\n", fmt, fmtid);
455     return(-1);
456     }
457 greg 2.9 if (!strncmp(s, "NROWS=", 6)) {
458     n = atoi(s+6);
459     if ((ni_rows > 0) & (n != ni_rows)) {
460     fputs("Incorrect input row count\n", stderr);
461     return(-1);
462     }
463     ni_rows = n;
464     return(0);
465     }
466     if (!strncmp(s, "NCOLS=", 6)) {
467     n = atoi(s+6);
468     if ((ni_columns > 0) & (n != ni_columns)) {
469     fputs("Incorrect input column count\n", stderr);
470     return(-1);
471     }
472     ni_columns = n;
473     return(0);
474     }
475     if (!strncmp(s, "NCOMP=", 6)) {
476     n = atoi(s+6);
477     if ((n_comp > 0) & (n != n_comp)) {
478 greg 2.14 fputs("Incorrect number of components\n", stderr);
479 greg 2.9 return(-1);
480     }
481     n_comp = n;
482     return(0);
483     }
484 greg 2.16 if (o_header)
485     fputs(s, stdout); /* copy header info. */
486 greg 2.1 return(0);
487     }
488    
489     /* main routine for converting rows/columns in data file */
490     int
491     main(int argc, char *argv[])
492     {
493 greg 2.14 int a;
494 greg 2.1
495 greg 2.14 for (a = 1; a < argc && argv[a][0] == '-'; a++)
496     switch (argv[a][1]) {
497 greg 2.1 case 'i': /* input */
498 greg 2.14 if (argv[a][2] == 'c') /* columns */
499     ni_columns = atoi(argv[++a]);
500     else if (argv[a][2] == 'r')
501     ni_rows = atoi(argv[++a]);
502 greg 2.1 else
503     goto userr;
504     break;
505     case 'o': /* output */
506 greg 2.14 if (argv[a][2] == 'c') /* columns */
507     no_columns = atoi(argv[++a]);
508     else if (argv[a][2] == 'r')
509     no_rows = atoi(argv[++a]);
510 greg 2.1 else
511     goto userr;
512     break;
513 greg 2.10 case 'h': /* turn off header */
514 greg 2.14 switch (argv[a][2]) {
515 greg 2.10 case 'i':
516     i_header = 0;
517     break;
518     case 'o':
519     o_header = 0;
520     break;
521     case '\0':
522     i_header = o_header = 0;
523     break;
524     default:
525     goto userr;
526     }
527 greg 2.1 break;
528     case 't': /* transpose on/off */
529     transpose = !transpose;
530     break;
531     case 'f': /* format */
532 greg 2.14 switch (argv[a][2]) {
533 greg 2.1 case 'a': /* ASCII */
534     case 'A':
535     fmtid = "ascii";
536 greg 2.9 comp_size = 0;
537 greg 2.1 break;
538     case 'f': /* float */
539     case 'F':
540     fmtid = "float";
541 greg 2.9 comp_size = sizeof(float);
542 greg 2.1 break;
543     case 'd': /* double */
544     case 'D':
545     fmtid = "double";
546 greg 2.9 comp_size = sizeof(double);
547 greg 2.1 break;
548     case 'b': /* binary (bytes) */
549     case 'B':
550     fmtid = "byte";
551 greg 2.9 comp_size = 1;
552 greg 2.1 break;
553     default:
554     goto userr;
555     }
556 greg 2.14 if (argv[a][3]) {
557     if (!isdigit(argv[a][3]))
558 greg 2.1 goto userr;
559 greg 2.14 n_comp = atoi(argv[a]+3);
560     } else
561     n_comp = 1;
562 greg 2.1 break;
563 greg 2.4 case 'w': /* warnings on/off */
564     warnings = !warnings;
565     break;
566 greg 2.1 default:
567     goto userr;
568     }
569 greg 2.14 if (a < argc-1) /* arg count OK? */
570 greg 2.1 goto userr;
571     /* open input file? */
572 greg 2.14 if (a == argc-1 && freopen(argv[a], "r", stdin) == NULL) {
573     fprintf(stderr, "%s: cannot open for reading\n", argv[a]);
574 greg 2.1 return(1);
575     }
576 greg 2.9 if (comp_size) {
577 greg 2.1 SET_FILE_BINARY(stdin);
578     SET_FILE_BINARY(stdout);
579     }
580     /* check for no-op */
581 greg 2.19 if (!transpose & (i_header == o_header) &&
582     (no_columns == ni_columns) & (no_rows == ni_rows)) {
583 greg 2.4 if (warnings)
584     fprintf(stderr, "%s: no-op -- copying input verbatim\n",
585 greg 2.1 argv[0]);
586     if (!output_stream(stdin))
587     return(1);
588     return(0);
589     }
590 greg 2.10 if (i_header) { /* read header */
591 greg 2.15 if (getheader(stdin, headline, NULL) < 0)
592 greg 2.1 return(1);
593 greg 2.11 if (!check_sizes())
594     return(1);
595 greg 2.9 if (comp_size) { /* a little late... */
596     SET_FILE_BINARY(stdin);
597     SET_FILE_BINARY(stdout);
598     }
599 greg 2.11 } else if (!check_sizes())
600     return(1);
601 greg 2.10 if (o_header) { /* write header */
602 greg 2.14 printargs(a, argv, stdout);
603 greg 2.9 if (no_rows > 0)
604     printf("NROWS=%d\n", no_rows);
605     if (no_columns > 0)
606     printf("NCOLS=%d\n", no_columns);
607     printf("NCOMP=%d\n", n_comp);
608 greg 2.1 fputformat(fmtid, stdout);
609     fputc('\n', stdout); /* finish new header */
610 greg 2.10 }
611 greg 2.1 if (transpose) { /* transposing rows & columns? */
612 greg 2.19 MEMLOAD myMem; /* need to map into memory */
613 greg 2.14 if (a == argc-1) {
614 greg 2.1 if (load_file(&myMem, stdin) <= 0) {
615     fprintf(stderr, "%s: error loading file into memory\n",
616 greg 2.14 argv[a]);
617 greg 2.1 return(1);
618     }
619     } else if (load_stream(&myMem, stdin) <= 0) {
620     fprintf(stderr, "%s: error loading stdin into memory\n",
621     argv[0]);
622     return(1);
623     }
624     if (!do_transpose(&myMem))
625     return(1);
626 greg 2.17 /* free_load(&myMem); about to exit, so don't bother */
627     } else if (!do_resize(stdin)) /* reshaping input */
628 greg 2.1 return(1);
629     return(0);
630     userr:
631     fprintf(stderr,
632 greg 2.10 "Usage: %s [-h[io]][-w][-f[afdb][N]][-t][-ic in_col][-ir in_row][-oc out_col][-or out_row] [input.dat]\n",
633 greg 2.1 argv[0]);
634     return(1);
635     }