ViewVC Help
View File | Revision Log | Show Annotations | Download File | Root Listing
root/radiance/ray/src/util/rcollate.c
Revision: 2.24
Committed: Sat Apr 16 00:42:16 2016 UTC (8 years ago) by greg
Content type: text/plain
Branch: MAIN
Changes since 2.23: +11 -2 lines
Log Message:
Made memory allocation in index_records less greedy

File Contents

# Content
1 #ifndef lint
2 static const char RCSid[] = "$Id: rcollate.c,v 2.23 2016/03/06 01:13:18 schorsch Exp $";
3 #endif
4 /*
5 * Utility to re-order records in a binary or ASCII data file (matrix)
6 */
7
8 #include <stdlib.h>
9 #include <string.h>
10 #include <ctype.h>
11 #include "platform.h"
12 #include "rtio.h"
13 #include "resolu.h"
14 #if defined(_WIN32) || defined(_WIN64)
15 #undef ftello
16 #define ftello ftell
17 #undef ssize_t
18 #define ssize_t size_t
19 #else
20 #include <sys/mman.h>
21 #endif
22
23 typedef struct {
24 void *base; /* pointer to base memory */
25 size_t len; /* allocated memory length */
26 int mapped; /* memory-mapped file? */
27 } MEMLOAD; /* file loaded/mapped into memory */
28
29 typedef struct {
30 int nw_rec; /* number of words per record */
31 int nrecs; /* number of records we found */
32 char *rec[1]; /* record array (extends struct) */
33 } RECINDEX;
34
35 int warnings = 1; /* report warnings? */
36
37 /* free loaded file */
38 static void
39 free_load(MEMLOAD *mp)
40 {
41 if (mp == NULL || (mp->base == NULL) | (mp->len <= 0))
42 return;
43 #ifdef MAP_FILE
44 if (mp->mapped)
45 munmap(mp->base, mp->len);
46 else
47 #endif
48 free(mp->base);
49 mp->base = NULL;
50 mp->len = 0;
51 }
52
53 /* load memory from an input stream, starting from current position */
54 static int
55 load_stream(MEMLOAD *mp, FILE *fp)
56 {
57 size_t alloced = 0;
58 char buf[8192];
59 size_t nr;
60
61 if (mp == NULL)
62 return(-1);
63 mp->base = NULL;
64 mp->len = 0;
65 mp->mapped = 0;
66 if (fp == NULL)
67 return(-1);
68 while ((nr = fread(buf, 1, sizeof(buf), fp)) > 0) {
69 if (!alloced)
70 mp->base = malloc(alloced = nr);
71 else if (mp->len+nr > alloced)
72 mp->base = realloc(mp->base,
73 alloced = alloced*(2+(nr==sizeof(buf)))/2+nr);
74 if (mp->base == NULL)
75 return(-1);
76 memcpy((char *)mp->base + mp->len, buf, nr);
77 mp->len += nr;
78 }
79 if (ferror(fp)) {
80 free_load(mp);
81 return(-1);
82 }
83 if (alloced > mp->len*5/4) /* don't waste too much space */
84 mp->base = realloc(mp->base, mp->len);
85 return(mp->len > 0);
86 }
87
88 /* load a file into memory */
89 static int
90 load_file(MEMLOAD *mp, FILE *fp)
91 {
92 int fd;
93 off_t skip, flen;
94
95 #if defined(_WIN32) || defined(_WIN64)
96 /* too difficult to fix this */
97 return load_stream(mp, fp);
98 #endif
99 if (mp == NULL)
100 return(-1);
101 mp->base = NULL;
102 mp->len = 0;
103 mp->mapped = 0;
104 if (fp == NULL)
105 return(-1);
106 fd = fileno(fp);
107 skip = ftello(fp);
108 flen = lseek(fd, 0, SEEK_END);
109 if (flen <= skip)
110 return((int)(flen - skip));
111 mp->len = (size_t)(flen - skip);
112 #ifdef MAP_FILE
113 if (mp->len > 1L<<20) { /* map file if > 1 MByte */
114 mp->base = mmap(NULL, mp->len, PROT_READ, MAP_PRIVATE, fd, skip);
115 if (mp->base != MAP_FAILED) {
116 mp->mapped = 1;
117 return(1); /* mmap() success */
118 }
119 mp->base = NULL; /* fall back to reading it in... */
120 }
121 #endif
122 if (lseek(fd, skip, SEEK_SET) != skip ||
123 (mp->base = malloc(mp->len)) == NULL) {
124 mp->len = 0;
125 return(-1);
126 }
127 if (read(fd, (char *)mp->base, mp->len) != mp->len) {
128 free_load(mp);
129 return(-1);
130 }
131 return(1);
132 }
133
134 /* free a record index */
135 #define free_records(rp) free(rp)
136
137 /* compute record index */
138 static RECINDEX *
139 index_records(const MEMLOAD *mp, int nw_rec)
140 {
141 int nall = 0;
142 RECINDEX *rp;
143 char *cp, *mend;
144 int n;
145
146 if (mp == NULL || (mp->base == NULL) | (mp->len <= 0))
147 return(NULL);
148 if (nw_rec <= 0)
149 return(NULL);
150 nall = 1000;
151 rp = (RECINDEX *)malloc(sizeof(RECINDEX) + nall*sizeof(char *));
152 if (rp == NULL)
153 return(NULL);
154 rp->nw_rec = nw_rec;
155 rp->nrecs = 0;
156 cp = (char *)mp->base;
157 mend = cp + mp->len;
158 for ( ; ; ) { /* whitespace-separated words */
159 while (cp < mend && !*cp | isspace(*cp))
160 ++cp;
161 if (cp >= mend)
162 break;
163 if (rp->nrecs >= nall) {
164 nall += nall>>1; /* get more record space */
165 rp = (RECINDEX *)realloc(rp,
166 sizeof(RECINDEX) + nall*sizeof(char *));
167 if (rp == NULL)
168 return(NULL);
169 }
170 rp->rec[rp->nrecs++] = cp; /* point to first non-white */
171 n = rp->nw_rec;
172 while (++cp < mend) /* find end of record */
173 if (!*cp | isspace(*cp)) {
174 if (--n <= 0)
175 break; /* got requisite # words */
176 do { /* else find next word */
177 if (*cp == '\n') {
178 fprintf(stderr,
179 "Unexpected EOL in record!\n");
180 free_records(rp);
181 return(NULL);
182 }
183 if (++cp >= mend)
184 break;
185 } while (!*cp | isspace(*cp));
186 }
187 }
188 rp->rec[rp->nrecs] = mend; /* reallocate to save space */
189 rp = (RECINDEX *)realloc(rp,
190 sizeof(RECINDEX) + rp->nrecs*sizeof(char *));
191 return(rp);
192 }
193
194 /* count number of columns based on first EOL */
195 static int
196 count_columns(const RECINDEX *rp)
197 {
198 char *cp = rp->rec[0];
199 char *mend = rp->rec[rp->nrecs];
200 int i;
201
202 while (*cp != '\n')
203 if (++cp >= mend)
204 return(0);
205 for (i = 0; i < rp->nrecs; i++)
206 if (rp->rec[i] >= cp)
207 break;
208 return(i);
209 }
210
211 /* copy nth record from index to stdout */
212 static int
213 print_record(const RECINDEX *rp, int n)
214 {
215 int words2go = rp->nw_rec;
216 char *scp;
217
218 if ((n < 0) | (n >= rp->nrecs))
219 return(0);
220 scp = rp->rec[n];
221 do {
222 putc(*scp++, stdout);
223 if (!*scp | isspace(*scp)) {
224 if (--words2go <= 0)
225 break;
226 putc(' ', stdout); /* single space btwn. words */
227 do
228 if (++scp >= rp->rec[n+1])
229 break;
230 while (!*scp | isspace(*scp));
231 }
232 } while (scp < rp->rec[n+1]);
233 /* caller adds record sep. */
234 return(1);
235 }
236
237 /* copy a stream to stdout */
238 static int
239 output_stream(FILE *fp)
240 {
241 char buf[8192];
242 ssize_t n;
243
244 if (fp == NULL)
245 return(0);
246 fflush(stdout);
247 while ((n = fread(buf, 1, sizeof(buf), fp)) > 0)
248 if (write(fileno(stdout), buf, n) != n)
249 return(0);
250 return(!ferror(fp));
251 }
252
253 /* get next word from stream, leaving stream on EOL or start of next word */
254 static char *
255 fget_word(char buf[256], FILE *fp)
256 {
257 int c;
258 char *cp;
259 /* skip nul's and white space */
260 while (!(c = getc(fp)) || isspace(c))
261 ;
262 if (c == EOF)
263 return(NULL);
264 cp = buf;
265 do
266 *cp++ = c;
267 while ((c = getc(fp)) != EOF && !isspace(c) && cp < buf+255);
268 *cp = '\0';
269 while (isspace(c) & (c != '\n'))
270 c = getc(fp);
271 if (c != EOF)
272 ungetc(c, fp);
273 return(buf);
274 }
275
276 char *fmtid = NULL; /* format id */
277 int comp_size = 0; /* binary bytes/channel */
278 int n_comp = 0; /* components/record */
279 int ni_columns = 0; /* number of input columns */
280 int ni_rows = 0; /* number of input rows */
281 int no_columns = 0; /* number of output columns */
282 int no_rows = 0; /* number of output rows */
283 int transpose = 0; /* transpose rows & cols? */
284 int i_header = 1; /* input header? */
285 int o_header = 1; /* output header? */
286
287 /* check settings and assign defaults */
288 static int
289 check_sizes()
290 {
291 if (fmtid == NULL) {
292 fmtid = "ascii";
293 } else if (!comp_size) {
294 if (!strcmp(fmtid, "float"))
295 comp_size = sizeof(float);
296 else if (!strcmp(fmtid, "double"))
297 comp_size = sizeof(double);
298 else if (!strcmp(fmtid, "byte"))
299 comp_size = 1;
300 else if (strcmp(fmtid, "ascii")) {
301 fprintf(stderr, "Unsupported format: %s\n", fmtid);
302 return(0);
303 }
304 }
305 if (transpose && (no_rows <= 0) & (no_columns <= 0)) {
306 if (ni_rows > 0) no_columns = ni_rows;
307 if (ni_columns > 0) no_rows = ni_columns;
308 } else if ((no_rows <= 0) & (no_columns > 0) &&
309 !((ni_rows*ni_columns) % no_columns))
310 no_rows = ni_rows*ni_columns/no_columns;
311 if (n_comp <= 0)
312 n_comp = 3;
313 return(1);
314 }
315
316 /* output transposed ASCII or binary data from memory */
317 static int
318 do_transpose(const MEMLOAD *mp)
319 {
320 static const char tabEOL[2] = {'\t','\n'};
321 RECINDEX *rp = NULL;
322 long nrecords;
323 int i, j;
324 /* propogate sizes */
325 if (ni_rows <= 0)
326 ni_rows = no_columns;
327 if (ni_columns <= 0)
328 ni_columns = no_rows;
329 /* get # records (& index) */
330 if (!comp_size) {
331 if ((rp = index_records(mp, n_comp)) == NULL)
332 return(0);
333 if (ni_columns <= 0)
334 ni_columns = count_columns(rp);
335 nrecords = rp->nrecs;
336 } else if ((ni_rows > 0) & (ni_columns > 0)) {
337 nrecords = ni_rows*ni_columns;
338 if (nrecords > mp->len/(n_comp*comp_size)) {
339 fprintf(stderr,
340 "Input too small for specified size and type\n");
341 return(0);
342 }
343 } else
344 nrecords = mp->len/(n_comp*comp_size);
345 /* check sizes */
346 if ((ni_rows <= 0) & (ni_columns > 0))
347 ni_rows = nrecords/ni_columns;
348 if ((ni_columns <= 0) & (ni_rows > 0))
349 ni_columns = nrecords/ni_rows;
350 if (nrecords != ni_rows*ni_columns)
351 goto badspec;
352 if (no_columns <= 0)
353 no_columns = ni_rows;
354 if (no_rows <= 0)
355 no_rows = ni_columns;
356 if ((no_rows != ni_columns) | (no_columns != ni_rows))
357 goto badspec;
358 /* transpose records */
359 for (i = 0; i < no_rows; i++) {
360 for (j = 0; j < no_columns; j++)
361 if (rp != NULL) { /* ASCII output */
362 print_record(rp, j*ni_columns + i);
363 putc(tabEOL[j >= no_columns-1], stdout);
364 } else { /* binary output */
365 fwrite((char *)mp->base +
366 (n_comp*comp_size)*(j*ni_columns + i),
367 n_comp*comp_size, 1, stdout);
368 }
369 if (ferror(stdout)) {
370 fprintf(stderr, "Error writing to stdout\n");
371 return(0);
372 }
373 }
374 if (rp != NULL)
375 free_records(rp);
376 return(1);
377 badspec:
378 fprintf(stderr, "Bad transpose specification -- check dimension(s)\n");
379 return(0);
380 }
381
382 /* resize ASCII stream input by ignoring EOLs between records */
383 static int
384 do_resize(FILE *fp)
385 {
386 long records2go = ni_rows*ni_columns;
387 int columns2go = no_columns;
388 char word[256];
389 /* sanity checks */
390 if (comp_size || (no_columns == ni_columns) & (no_rows == ni_rows))
391 return(output_stream(fp)); /* no-op -- just copy */
392 if (no_columns <= 0) {
393 fprintf(stderr, "Missing -oc specification\n");
394 return(0);
395 }
396 if ((records2go <= 0) & (no_rows > 0))
397 records2go = no_rows*no_columns;
398 else if (no_rows*no_columns != records2go) {
399 fprintf(stderr,
400 "Input and output data sizes disagree (%dx%d != %dx%d)\n",
401 ni_rows, ni_columns, no_rows, no_columns);
402 return(0);
403 }
404 do { /* reshape records */
405 int n;
406
407 for (n = n_comp; n--; ) {
408 if (fget_word(word, fp) == NULL) {
409 if (records2go > 0 || n < n_comp-1)
410 break;
411 goto done; /* normal EOD */
412 }
413 fputs(word, stdout);
414 if (n) { /* mid-record? */
415 int c = getc(fp);
416 if ((c == '\n') | (c == EOF))
417 break;
418 ungetc(c, fp);
419 putc(' ', stdout);
420 }
421 }
422 if (n >= 0) {
423 fprintf(stderr, "Incomplete record / unexpected EOF\n");
424 return(0);
425 }
426 if (--columns2go <= 0) { /* time to end output row? */
427 putc('\n', stdout);
428 columns2go = no_columns;
429 } else /* else separate records */
430 putc('\t', stdout);
431 } while (--records2go); /* expected EOD? */
432 done:
433 if (warnings && columns2go != no_columns)
434 fprintf(stderr, "Warning -- incomplete final row\n");
435 if (warnings && fget_word(word, fp) != NULL)
436 fprintf(stderr, "Warning -- characters beyond expected EOD\n");
437 return(1);
438 }
439
440 /* process a header line and copy to stdout */
441 static int
442 headline(char *s, void *p)
443 {
444 static char fmt[32];
445 int n;
446
447 if (formatval(fmt, s)) {
448 if (fmtid == NULL) {
449 fmtid = fmt;
450 return(0);
451 }
452 if (!strcmp(fmt, fmtid))
453 return(0);
454 fprintf(stderr, "Input format '%s' != '%s'\n", fmt, fmtid);
455 return(-1);
456 }
457 if (!strncmp(s, "NROWS=", 6)) {
458 n = atoi(s+6);
459 if ((ni_rows > 0) & (n != ni_rows)) {
460 fputs("Incorrect input row count\n", stderr);
461 return(-1);
462 }
463 ni_rows = n;
464 return(0);
465 }
466 if (!strncmp(s, "NCOLS=", 6)) {
467 n = atoi(s+6);
468 if ((ni_columns > 0) & (n != ni_columns)) {
469 fputs("Incorrect input column count\n", stderr);
470 return(-1);
471 }
472 ni_columns = n;
473 return(0);
474 }
475 if (!strncmp(s, "NCOMP=", 6)) {
476 n = atoi(s+6);
477 if ((n_comp > 0) & (n != n_comp)) {
478 fputs("Incorrect number of components\n", stderr);
479 return(-1);
480 }
481 n_comp = n;
482 return(0);
483 }
484 if (o_header)
485 fputs(s, stdout); /* copy header info. */
486 return(0);
487 }
488
489 /* main routine for converting rows/columns in data file */
490 int
491 main(int argc, char *argv[])
492 {
493 int a;
494
495 for (a = 1; a < argc && argv[a][0] == '-'; a++)
496 switch (argv[a][1]) {
497 case 'i': /* input */
498 if (argv[a][2] == 'c') /* columns */
499 ni_columns = atoi(argv[++a]);
500 else if (argv[a][2] == 'r')
501 ni_rows = atoi(argv[++a]);
502 else
503 goto userr;
504 break;
505 case 'o': /* output */
506 if (argv[a][2] == 'c') /* columns */
507 no_columns = atoi(argv[++a]);
508 else if (argv[a][2] == 'r')
509 no_rows = atoi(argv[++a]);
510 else
511 goto userr;
512 break;
513 case 'h': /* turn off header */
514 switch (argv[a][2]) {
515 case 'i':
516 i_header = 0;
517 break;
518 case 'o':
519 o_header = 0;
520 break;
521 case '\0':
522 i_header = o_header = 0;
523 break;
524 default:
525 goto userr;
526 }
527 break;
528 case 't': /* transpose on/off */
529 transpose = !transpose;
530 break;
531 case 'f': /* format */
532 switch (argv[a][2]) {
533 case 'a': /* ASCII */
534 case 'A':
535 fmtid = "ascii";
536 comp_size = 0;
537 break;
538 case 'f': /* float */
539 case 'F':
540 fmtid = "float";
541 comp_size = sizeof(float);
542 break;
543 case 'd': /* double */
544 case 'D':
545 fmtid = "double";
546 comp_size = sizeof(double);
547 break;
548 case 'b': /* binary (bytes) */
549 case 'B':
550 fmtid = "byte";
551 comp_size = 1;
552 break;
553 default:
554 goto userr;
555 }
556 if (argv[a][3]) {
557 if (!isdigit(argv[a][3]))
558 goto userr;
559 n_comp = atoi(argv[a]+3);
560 } else
561 n_comp = 1;
562 break;
563 case 'w': /* warnings on/off */
564 warnings = !warnings;
565 break;
566 default:
567 goto userr;
568 }
569 if (a < argc-1) /* arg count OK? */
570 goto userr;
571 /* open input file? */
572 if (a == argc-1 && freopen(argv[a], "r", stdin) == NULL) {
573 fprintf(stderr, "%s: cannot open for reading\n", argv[a]);
574 return(1);
575 }
576 if (comp_size) {
577 SET_FILE_BINARY(stdin);
578 SET_FILE_BINARY(stdout);
579 }
580 /* check for no-op */
581 if (!transpose & (i_header == o_header) &&
582 (no_columns == ni_columns) & (no_rows == ni_rows)) {
583 if (warnings)
584 fprintf(stderr, "%s: no-op -- copying input verbatim\n",
585 argv[0]);
586 if (!output_stream(stdin))
587 return(1);
588 return(0);
589 }
590 if (i_header) { /* read header */
591 if (getheader(stdin, headline, NULL) < 0)
592 return(1);
593 if (!check_sizes())
594 return(1);
595 if (comp_size) { /* a little late... */
596 SET_FILE_BINARY(stdin);
597 SET_FILE_BINARY(stdout);
598 }
599 } else if (!check_sizes())
600 return(1);
601 if (o_header) { /* write header */
602 printargs(a, argv, stdout);
603 if (no_rows > 0)
604 printf("NROWS=%d\n", no_rows);
605 if (no_columns > 0)
606 printf("NCOLS=%d\n", no_columns);
607 printf("NCOMP=%d\n", n_comp);
608 fputformat(fmtid, stdout);
609 fputc('\n', stdout); /* finish new header */
610 }
611 if (transpose) { /* transposing rows & columns? */
612 MEMLOAD myMem; /* need to map into memory */
613 if (a == argc-1) {
614 if (load_file(&myMem, stdin) <= 0) {
615 fprintf(stderr, "%s: error loading file into memory\n",
616 argv[a]);
617 return(1);
618 }
619 } else if (load_stream(&myMem, stdin) <= 0) {
620 fprintf(stderr, "%s: error loading stdin into memory\n",
621 argv[0]);
622 return(1);
623 }
624 if (!do_transpose(&myMem))
625 return(1);
626 /* free_load(&myMem); about to exit, so don't bother */
627 } else if (!do_resize(stdin)) /* reshaping input */
628 return(1);
629 return(0);
630 userr:
631 fprintf(stderr,
632 "Usage: %s [-h[io]][-w][-f[afdb][N]][-t][-ic in_col][-ir in_row][-oc out_col][-or out_row] [input.dat]\n",
633 argv[0]);
634 return(1);
635 }