ViewVC Help
View File | Revision Log | Show Annotations | Download File | Root Listing
root/radiance/ray/src/util/rcollate.c
Revision: 2.3
Committed: Thu Sep 5 18:47:12 2013 UTC (10 years, 7 months ago) by greg
Content type: text/plain
Branch: MAIN
Changes since 2.2: +8 -3 lines
Log Message:
Added error check for too little input data

File Contents

# User Rev Content
1 greg 2.1 #ifndef lint
2 greg 2.3 static const char RCSid[] = "$Id: rcollate.c,v 2.2 2013/09/05 18:30:58 greg Exp $";
3 greg 2.1 #endif
4     /*
5     * Utility to re-order records in a binary or ASCII data file (matrix)
6     */
7    
8     #include <stdlib.h>
9     #include <unistd.h>
10     #include <string.h>
11     #include <ctype.h>
12     #include "platform.h"
13     #include "rtio.h"
14     #include "resolu.h"
15     #ifndef _WIN32
16     #include <sys/mman.h>
17     #endif
18    
19     #ifdef getc_unlocked /* avoid horrendous overhead of flockfile */
20     #undef getc
21     #undef putc
22     #define getc getc_unlocked
23     #define putc putc_unlocked
24     #endif
25    
26     typedef struct {
27     void *base; /* pointer to base memory */
28     size_t len; /* allocated memory length */
29     int mapped; /* memory-mapped file? */
30     } MEMLOAD; /* file loaded/mapped into memory */
31    
32     typedef struct {
33     int nw_rec; /* number of words per record */
34     int nrecs; /* number of records we found */
35     char *rec[1]; /* record array (extends struct) */
36     } RECINDEX;
37    
38     /* free loaded file */
39     static void
40     free_load(MEMLOAD *mp)
41     {
42     if (mp == NULL || (mp->base == NULL) | (mp->len <= 0))
43     return;
44     #ifdef MAP_FILE
45     if (mp->mapped)
46     munmap(mp->base, mp->len);
47     else
48     #endif
49     free(mp->base);
50     mp->base = NULL;
51     mp->len = 0;
52     }
53    
54     /* load a file into memory */
55     static int
56     load_file(MEMLOAD *mp, FILE *fp)
57     {
58     int fd;
59     off_t skip, flen;
60    
61     if (mp == NULL)
62     return(-1);
63     mp->base = NULL;
64     mp->len = 0;
65     mp->mapped = 0;
66     if (fp == NULL)
67     return(-1);
68     fd = fileno(fp);
69     skip = ftello(fp);
70     flen = lseek(fd, 0, SEEK_END);
71     if (flen <= skip)
72     return((int)(flen - skip));
73     mp->len = (size_t)(flen - skip);
74     #ifdef MAP_FILE
75     if (mp->len > 1L<<20) { /* map file if > 1 MByte */
76 greg 2.2 mp->base = mmap(NULL, mp->len, PROT_READ, MAP_PRIVATE, fd, skip);
77 greg 2.1 if (mp->base != MAP_FAILED) {
78     mp->mapped = 1;
79     return(1); /* mmap() success */
80     }
81     mp->base = NULL; /* fall back to reading it in... */
82     }
83     #endif
84     if (lseek(fd, skip, SEEK_SET) != skip ||
85     (mp->base = malloc(mp->len)) == NULL) {
86     mp->len = 0;
87     return(-1);
88     }
89     if (read(fd, (char *)mp->base, mp->len) != mp->len) {
90     free_load(mp);
91     return(-1);
92     }
93     return(1);
94     }
95    
96     /* load memory from an input stream, starting from current position */
97     static int
98     load_stream(MEMLOAD *mp, FILE *fp)
99     {
100     char buf[8192];
101     size_t nr;
102    
103     if (mp == NULL)
104     return(-1);
105     mp->base = NULL;
106     mp->len = 0;
107     mp->mapped = 0;
108     if (fp == NULL)
109     return(-1);
110     while ((nr = fread(buf, 1, sizeof(buf), fp)) > 0) {
111     if (!mp->len)
112     mp->base = malloc(nr);
113     else
114     mp->base = realloc(mp->base, mp->len+nr);
115     if (mp->base == NULL)
116     return(-1);
117     memcpy((char *)mp->base + mp->len, buf, nr);
118     mp->len += nr;
119     }
120     if (ferror(fp)) {
121     free_load(mp);
122     return(-1);
123     }
124     return(mp->len > 0);
125     }
126    
127     /* free a record index */
128     #define free_records(rp) free(rp)
129    
130     /* compute record index */
131     static RECINDEX *
132     index_records(const MEMLOAD *mp, int nw_rec)
133     {
134     RECINDEX *rp;
135     char *cp, *mend;
136     int n;
137    
138     if (mp == NULL || (mp->base == NULL) | (mp->len <= 0))
139     return(NULL);
140     if (nw_rec <= 0)
141     return(NULL);
142     rp = (RECINDEX *)malloc(sizeof(RECINDEX) + mp->len/(2*nw_rec)*sizeof(char *));
143     if (rp == NULL)
144     return(NULL);
145     rp->nw_rec = nw_rec;
146     rp->nrecs = 0;
147     cp = (char *)mp->base;
148     mend = cp + mp->len;
149     for ( ; ; ) { /* whitespace-separated words */
150     while (cp < mend && !*cp | isspace(*cp))
151     ++cp;
152     if (cp >= mend)
153     break;
154     rp->rec[rp->nrecs++] = cp; /* point to first non-white */
155     n = rp->nw_rec;
156     while (++cp < mend) /* find end of record */
157     if (!*cp | isspace(*cp)) {
158     if (--n <= 0)
159     break; /* got requisite # words */
160     do { /* else find next word */
161     if (*cp == '\n') {
162     fprintf(stderr,
163     "Unexpected EOL in record!\n");
164     free_records(rp);
165     return(NULL);
166     }
167     if (++cp >= mend)
168     break;
169     } while (!*cp | isspace(*cp));
170     }
171     }
172     rp->rec[rp->nrecs] = mend; /* reallocate to save space */
173     rp = (RECINDEX *)realloc(rp,
174     sizeof(RECINDEX) + rp->nrecs*sizeof(char *));
175     return(rp);
176     }
177    
178     /* count number of columns based on first EOL */
179     static int
180     count_columns(const RECINDEX *rp)
181     {
182     char *cp = rp->rec[0];
183     char *mend = rp->rec[rp->nrecs];
184     int i;
185    
186     while (*cp != '\n')
187     if (++cp >= mend)
188     return(0);
189     for (i = 0; i < rp->nrecs; i++)
190     if (rp->rec[i] >= cp)
191     break;
192     return(i);
193     }
194    
195     /* copy nth record from index to stdout */
196     static int
197     print_record(const RECINDEX *rp, int n)
198     {
199     int words2go = rp->nw_rec;
200     char *scp;
201    
202     if ((n < 0) | (n >= rp->nrecs))
203     return(0);
204     scp = rp->rec[n];
205     do {
206     putc(*scp++, stdout);
207     if (!*scp | isspace(*scp)) {
208     if (--words2go <= 0)
209     break;
210     putc(' ', stdout); /* single space btwn. words */
211     do
212     if (++scp >= rp->rec[n+1])
213     break;
214     while (!*scp | isspace(*scp));
215     }
216     } while (scp < rp->rec[n+1]);
217     /* caller adds record sep. */
218     return(1);
219     }
220    
221     /* copy a stream to stdout */
222     static int
223     output_stream(FILE *fp)
224     {
225     char buf[8192];
226     ssize_t n;
227    
228     if (fp == NULL)
229     return(0);
230     fflush(stdout); /* assumes nothing in input buffer */
231     while ((n = read(fileno(fp), buf, sizeof(buf))) > 0)
232     if (write(fileno(stdout), buf, n) != n)
233     return(0);
234     return(n >= 0);
235     }
236    
237     /* get next word from stream, leaving stream on EOL or start of next word */
238     static char *
239     fget_word(char buf[256], FILE *fp)
240     {
241     int c;
242     char *cp;
243     /* skip nul's and white space */
244     while (!(c = getc(fp)) || isspace(c))
245     ;
246     if (c == EOF)
247     return(NULL);
248     cp = buf;
249     do
250     *cp++ = c;
251     while ((c = getc(fp)) != EOF && !isspace(c) && cp < buf+255);
252     *cp = '\0';
253     while (isspace(c) & (c != '\n'))
254     c = getc(fp);
255     if (c != EOF)
256     ungetc(c, fp);
257     return(buf);
258     }
259    
260     char *fmtid = "ascii"; /* format id */
261     int record_width = 3; /* words/record (<0 binary) */
262     int ni_columns = 0; /* number of input columns */
263     int ni_rows = 0; /* number of input rows */
264     int no_columns = 0; /* number of output columns */
265     int no_rows = 0; /* number of output rows */
266    
267     /* output transposed ASCII or binary data from memory */
268     static int
269     do_transpose(const MEMLOAD *mp)
270     {
271     static const char tabEOL[2] = {'\t','\n'};
272     RECINDEX *rp = NULL;
273     long nrecords;
274     int i, j;
275     /* get # records (& index) */
276     if (record_width > 0) {
277     if ((rp = index_records(mp, record_width)) == NULL)
278     return(0);
279     if (ni_columns <= 0)
280     ni_columns = count_columns(rp);
281     nrecords = rp->nrecs;
282 greg 2.3 } else if ((ni_rows > 0) & (ni_columns > 0)) {
283 greg 2.1 nrecords = ni_rows*ni_columns;
284 greg 2.3 if (nrecords > mp->len / -record_width) {
285     fprintf(stderr,
286     "Input too small for specified size and type\n");
287     return(0);
288     }
289     } else
290 greg 2.1 nrecords = mp->len / -record_width;
291     /* check sizes */
292     if (ni_rows <= 0)
293     ni_rows = no_columns;
294     if (ni_columns <= 0)
295     ni_columns = no_rows;
296     if ((ni_rows <= 0) & (ni_columns > 0))
297     ni_rows = nrecords/ni_columns;
298     if ((ni_columns <= 0) & (ni_rows > 0))
299     ni_columns = nrecords/ni_rows;
300     if (nrecords != ni_rows*ni_columns)
301     goto badspec;
302     if (no_columns <= 0)
303     no_columns = ni_rows;
304     if (no_rows <= 0)
305     no_rows = ni_columns;
306     if ((no_rows != ni_columns) | (no_columns != ni_rows))
307     goto badspec;
308     /* transpose records */
309     for (i = 0; i < no_rows; i++) {
310     for (j = 0; j < no_columns; j++)
311     if (rp != NULL) { /* ASCII output */
312     print_record(rp, j*ni_columns + i);
313     putc(tabEOL[j >= no_columns-1], stdout);
314     } else { /* binary output */
315     fwrite((char *)mp->base +
316     -record_width*(j*ni_columns + i),
317     -record_width, 1, stdout);
318     }
319     if (ferror(stdout)) {
320     fprintf(stderr, "Error writing to stdout\n");
321     return(0);
322     }
323     }
324     if (rp != NULL)
325     free_records(rp);
326     return(1);
327     badspec:
328     fprintf(stderr, "Bad transpose specification -- check dimension(s)\n");
329     return(0);
330     }
331    
332     /* resize ASCII stream input by ignoring EOLs between records */
333     static int
334     do_resize(FILE *fp)
335     {
336     long records2go = ni_rows*ni_columns;
337     int columns2go = no_columns;
338     char word[256];
339     /* sanity checks */
340     if (record_width <= 0) {
341     fprintf(stderr, "Bad call to do_resize (record_width = %d)\n",
342     record_width);
343     return(0);
344     }
345     if (no_columns <= 0) {
346     fprintf(stderr, "Missing -oc specification\n");
347     return(0);
348     }
349     if ((records2go <= 0) & (no_rows > 0))
350     records2go = no_rows*no_columns;
351     else if (no_rows*no_columns != records2go) {
352     fprintf(stderr,
353     "Input and output data sizes disagree (%dx%d != %dx%d)\n",
354     ni_rows, ni_columns, no_rows, no_columns);
355     return(0);
356     }
357     do { /* reshape records */
358     int n;
359    
360     for (n = record_width; n--; ) {
361     if (fget_word(word, fp) == NULL) {
362     if (records2go > 0 || n < record_width-1)
363     break;
364     goto done; /* normal EOD */
365     }
366     fputs(word, stdout);
367     if (n) { /* mid-record? */
368     int c = getc(fp);
369     if ((c == '\n') | (c == EOF))
370     break;
371     ungetc(c, fp);
372     putc(' ', stdout);
373     }
374     }
375     if (n >= 0) {
376     fprintf(stderr, "Incomplete record / unexpected EOF\n");
377     return(0);
378     }
379     if (--columns2go <= 0) { /* time to end output row? */
380     putc('\n', stdout);
381     columns2go = no_columns;
382     } else /* else separate records */
383     putc('\t', stdout);
384     } while (--records2go); /* expected EOD? */
385     done:
386     if (columns2go != no_columns)
387     fprintf(stderr, "Warning -- incomplete final row\n");
388     if (fget_word(word, fp) != NULL)
389     fprintf(stderr, "Warning -- data beyond expected EOF\n");
390     return(1);
391     }
392    
393     /* process a header line and copy to stdout */
394     static int
395     headline(char *s, void *p)
396     {
397     char fmt[32];
398    
399     if (formatval(fmt, s)) {
400     if (!strcmp(fmt, fmtid))
401     return(0);
402     fprintf(stderr, "Input format '%s' != '%s'\n", fmt, fmtid);
403     return(-1);
404     }
405     fputs(s, stdout); /* copy header info. */
406     return(0);
407     }
408    
409     /* main routine for converting rows/columns in data file */
410     int
411     main(int argc, char *argv[])
412     {
413     int do_header = 1; /* header i/o? */
414     int transpose = 0; /* transpose rows & cols? */
415     int i;
416    
417     for (i = 1; i < argc && argv[i][0] == '-'; i++)
418     switch (argv[i][1]) {
419     case 'i': /* input */
420     if (argv[i][2] == 'c') /* columns */
421     ni_columns = atoi(argv[++i]);
422     else if (argv[i][2] == 'r')
423     ni_rows = atoi(argv[++i]);
424     else
425     goto userr;
426     break;
427     case 'o': /* output */
428     if (argv[i][2] == 'c') /* columns */
429     no_columns = atoi(argv[++i]);
430     else if (argv[i][2] == 'r')
431     no_rows = atoi(argv[++i]);
432     else
433     goto userr;
434     break;
435     case 'h': /* header on/off */
436     do_header = !do_header;
437     break;
438     case 't': /* transpose on/off */
439     transpose = !transpose;
440     break;
441     case 'f': /* format */
442     switch (argv[i][2]) {
443     case 'a': /* ASCII */
444     case 'A':
445     fmtid = "ascii";
446     record_width = 1;
447     break;
448     case 'f': /* float */
449     case 'F':
450     fmtid = "float";
451     record_width = -(int)sizeof(float);
452     break;
453     case 'd': /* double */
454     case 'D':
455     fmtid = "double";
456     record_width = -(int)sizeof(double);
457     break;
458     case 'b': /* binary (bytes) */
459     case 'B':
460     fmtid = "byte";
461     record_width = -1;
462     break;
463     default:
464     goto userr;
465     }
466     if (argv[i][3]) {
467     if (!isdigit(argv[i][3]))
468     goto userr;
469     record_width *= atoi(argv[i]+3);
470     }
471     break;
472     default:
473     goto userr;
474     }
475     if (!record_width)
476     goto userr;
477     if (i < argc-1) /* arg count OK? */
478     goto userr;
479     /* open input file? */
480     if (i == argc-1 && freopen(argv[i], "r", stdin) == NULL) {
481     fprintf(stderr, "%s: cannot open for reading\n", argv[i]);
482     return(1);
483     }
484     if (record_width < 0) {
485     SET_FILE_BINARY(stdin);
486     SET_FILE_BINARY(stdout);
487     }
488     /* check for no-op */
489     if (!transpose && (record_width < 0 ||
490     (no_columns == ni_columns) & (no_rows == ni_rows))) {
491     fprintf(stderr, "%s: no-op -- copying input verbatim\n",
492     argv[0]);
493     if (!output_stream(stdin))
494     return(1);
495     return(0);
496     }
497     if (do_header) { /* read/write header */
498     if (getheader(stdin, &headline, NULL) < 0)
499     return(1);
500     printargs(argc, argv, stdout);
501     fputformat(fmtid, stdout);
502     fputc('\n', stdout); /* finish new header */
503     }
504     if (transpose) { /* transposing rows & columns? */
505     MEMLOAD myMem; /* need to load into memory */
506     if (i == argc-1) {
507     if (load_file(&myMem, stdin) <= 0) {
508     fprintf(stderr, "%s: error loading file into memory\n",
509     argv[i]);
510     return(1);
511     }
512     } else if (load_stream(&myMem, stdin) <= 0) {
513     fprintf(stderr, "%s: error loading stdin into memory\n",
514     argv[0]);
515     return(1);
516     }
517     if (!do_transpose(&myMem))
518     return(1);
519     /* free_load(&myMem); */
520     } else if (!do_resize(stdin)) /* just reshaping input */
521     return(1);
522     return(0);
523     userr:
524     fprintf(stderr,
525     "Usage: %s [-h][-f[afdb][N]][-t][-ic in_col][-ir in_row][-oc out_col][-or out_row] [input.dat]\n",
526     argv[0]);
527     return(1);
528     }