ViewVC Help
View File | Revision Log | Show Annotations | Download File | Root Listing
Revision: 2.5
Committed: Fri Sep 6 21:43:29 2013 UTC (11 years, 5 months ago) by greg
Content type: text/plain
Branch: MAIN
Changes since 2.4: +6 -5 lines
Log Message:
Propogate output dimensions to input dimensions before checking input data

File Contents

# Content
1 #ifndef lint
2 static const char RCSid[] = "$Id: rcollate.c,v 2.4 2013/09/06 21:34:39 greg Exp $";
3 #endif
4 /*
5 * Utility to re-order records in a binary or ASCII data file (matrix)
6 */
8 #include <stdlib.h>
9 #include <unistd.h>
10 #include <string.h>
11 #include <ctype.h>
12 #include "platform.h"
13 #include "rtio.h"
14 #include "resolu.h"
15 #ifndef _WIN32
16 #include <sys/mman.h>
17 #endif
19 #ifdef getc_unlocked /* avoid horrendous overhead of flockfile */
20 #undef getc
21 #undef putc
22 #define getc getc_unlocked
23 #define putc putc_unlocked
24 #endif
26 typedef struct {
27 void *base; /* pointer to base memory */
28 size_t len; /* allocated memory length */
29 int mapped; /* memory-mapped file? */
30 } MEMLOAD; /* file loaded/mapped into memory */
32 typedef struct {
33 int nw_rec; /* number of words per record */
34 int nrecs; /* number of records we found */
35 char *rec[1]; /* record array (extends struct) */
38 int warnings = 1; /* report warnings? */
40 /* free loaded file */
41 static void
42 free_load(MEMLOAD *mp)
43 {
44 if (mp == NULL || (mp->base == NULL) | (mp->len <= 0))
45 return;
46 #ifdef MAP_FILE
47 if (mp->mapped)
48 munmap(mp->base, mp->len);
49 else
50 #endif
51 free(mp->base);
52 mp->base = NULL;
53 mp->len = 0;
54 }
56 /* load a file into memory */
57 static int
58 load_file(MEMLOAD *mp, FILE *fp)
59 {
60 int fd;
61 off_t skip, flen;
63 if (mp == NULL)
64 return(-1);
65 mp->base = NULL;
66 mp->len = 0;
67 mp->mapped = 0;
68 if (fp == NULL)
69 return(-1);
70 fd = fileno(fp);
71 skip = ftello(fp);
72 flen = lseek(fd, 0, SEEK_END);
73 if (flen <= skip)
74 return((int)(flen - skip));
75 mp->len = (size_t)(flen - skip);
76 #ifdef MAP_FILE
77 if (mp->len > 1L<<20) { /* map file if > 1 MByte */
78 mp->base = mmap(NULL, mp->len, PROT_READ, MAP_PRIVATE, fd, skip);
79 if (mp->base != MAP_FAILED) {
80 mp->mapped = 1;
81 return(1); /* mmap() success */
82 }
83 mp->base = NULL; /* fall back to reading it in... */
84 }
85 #endif
86 if (lseek(fd, skip, SEEK_SET) != skip ||
87 (mp->base = malloc(mp->len)) == NULL) {
88 mp->len = 0;
89 return(-1);
90 }
91 if (read(fd, (char *)mp->base, mp->len) != mp->len) {
92 free_load(mp);
93 return(-1);
94 }
95 return(1);
96 }
98 /* load memory from an input stream, starting from current position */
99 static int
100 load_stream(MEMLOAD *mp, FILE *fp)
101 {
102 char buf[8192];
103 size_t nr;
105 if (mp == NULL)
106 return(-1);
107 mp->base = NULL;
108 mp->len = 0;
109 mp->mapped = 0;
110 if (fp == NULL)
111 return(-1);
112 while ((nr = fread(buf, 1, sizeof(buf), fp)) > 0) {
113 if (!mp->len)
114 mp->base = malloc(nr);
115 else
116 mp->base = realloc(mp->base, mp->len+nr);
117 if (mp->base == NULL)
118 return(-1);
119 memcpy((char *)mp->base + mp->len, buf, nr);
120 mp->len += nr;
121 }
122 if (ferror(fp)) {
123 free_load(mp);
124 return(-1);
125 }
126 return(mp->len > 0);
127 }
129 /* free a record index */
130 #define free_records(rp) free(rp)
132 /* compute record index */
133 static RECINDEX *
134 index_records(const MEMLOAD *mp, int nw_rec)
135 {
136 RECINDEX *rp;
137 char *cp, *mend;
138 int n;
140 if (mp == NULL || (mp->base == NULL) | (mp->len <= 0))
141 return(NULL);
142 if (nw_rec <= 0)
143 return(NULL);
144 rp = (RECINDEX *)malloc(sizeof(RECINDEX) + mp->len/(2*nw_rec)*sizeof(char *));
145 if (rp == NULL)
146 return(NULL);
147 rp->nw_rec = nw_rec;
148 rp->nrecs = 0;
149 cp = (char *)mp->base;
150 mend = cp + mp->len;
151 for ( ; ; ) { /* whitespace-separated words */
152 while (cp < mend && !*cp | isspace(*cp))
153 ++cp;
154 if (cp >= mend)
155 break;
156 rp->rec[rp->nrecs++] = cp; /* point to first non-white */
157 n = rp->nw_rec;
158 while (++cp < mend) /* find end of record */
159 if (!*cp | isspace(*cp)) {
160 if (--n <= 0)
161 break; /* got requisite # words */
162 do { /* else find next word */
163 if (*cp == '\n') {
164 fprintf(stderr,
165 "Unexpected EOL in record!\n");
166 free_records(rp);
167 return(NULL);
168 }
169 if (++cp >= mend)
170 break;
171 } while (!*cp | isspace(*cp));
172 }
173 }
174 rp->rec[rp->nrecs] = mend; /* reallocate to save space */
175 rp = (RECINDEX *)realloc(rp,
176 sizeof(RECINDEX) + rp->nrecs*sizeof(char *));
177 return(rp);
178 }
180 /* count number of columns based on first EOL */
181 static int
182 count_columns(const RECINDEX *rp)
183 {
184 char *cp = rp->rec[0];
185 char *mend = rp->rec[rp->nrecs];
186 int i;
188 while (*cp != '\n')
189 if (++cp >= mend)
190 return(0);
191 for (i = 0; i < rp->nrecs; i++)
192 if (rp->rec[i] >= cp)
193 break;
194 return(i);
195 }
197 /* copy nth record from index to stdout */
198 static int
199 print_record(const RECINDEX *rp, int n)
200 {
201 int words2go = rp->nw_rec;
202 char *scp;
204 if ((n < 0) | (n >= rp->nrecs))
205 return(0);
206 scp = rp->rec[n];
207 do {
208 putc(*scp++, stdout);
209 if (!*scp | isspace(*scp)) {
210 if (--words2go <= 0)
211 break;
212 putc(' ', stdout); /* single space btwn. words */
213 do
214 if (++scp >= rp->rec[n+1])
215 break;
216 while (!*scp | isspace(*scp));
217 }
218 } while (scp < rp->rec[n+1]);
219 /* caller adds record sep. */
220 return(1);
221 }
223 /* copy a stream to stdout */
224 static int
225 output_stream(FILE *fp)
226 {
227 char buf[8192];
228 ssize_t n;
230 if (fp == NULL)
231 return(0);
232 fflush(stdout); /* assumes nothing in input buffer */
233 while ((n = read(fileno(fp), buf, sizeof(buf))) > 0)
234 if (write(fileno(stdout), buf, n) != n)
235 return(0);
236 return(n >= 0);
237 }
239 /* get next word from stream, leaving stream on EOL or start of next word */
240 static char *
241 fget_word(char buf[256], FILE *fp)
242 {
243 int c;
244 char *cp;
245 /* skip nul's and white space */
246 while (!(c = getc(fp)) || isspace(c))
247 ;
248 if (c == EOF)
249 return(NULL);
250 cp = buf;
251 do
252 *cp++ = c;
253 while ((c = getc(fp)) != EOF && !isspace(c) && cp < buf+255);
254 *cp = '\0';
255 while (isspace(c) & (c != '\n'))
256 c = getc(fp);
257 if (c != EOF)
258 ungetc(c, fp);
259 return(buf);
260 }
262 char *fmtid = "ascii"; /* format id */
263 int record_width = 3; /* words/record (<0 binary) */
264 int ni_columns = 0; /* number of input columns */
265 int ni_rows = 0; /* number of input rows */
266 int no_columns = 0; /* number of output columns */
267 int no_rows = 0; /* number of output rows */
269 /* output transposed ASCII or binary data from memory */
270 static int
271 do_transpose(const MEMLOAD *mp)
272 {
273 static const char tabEOL[2] = {'\t','\n'};
274 RECINDEX *rp = NULL;
275 long nrecords;
276 int i, j;
277 /* propogate sizes */
278 if (ni_rows <= 0)
279 ni_rows = no_columns;
280 if (ni_columns <= 0)
281 ni_columns = no_rows;
282 /* get # records (& index) */
283 if (record_width > 0) {
284 if ((rp = index_records(mp, record_width)) == NULL)
285 return(0);
286 if (ni_columns <= 0)
287 ni_columns = count_columns(rp);
288 nrecords = rp->nrecs;
289 } else if ((ni_rows > 0) & (ni_columns > 0)) {
290 nrecords = ni_rows*ni_columns;
291 if (nrecords > mp->len / -record_width) {
292 fprintf(stderr,
293 "Input too small for specified size and type\n");
294 return(0);
295 }
296 } else
297 nrecords = mp->len / -record_width;
298 /* check sizes */
299 if ((ni_rows <= 0) & (ni_columns > 0))
300 ni_rows = nrecords/ni_columns;
301 if ((ni_columns <= 0) & (ni_rows > 0))
302 ni_columns = nrecords/ni_rows;
303 if (nrecords != ni_rows*ni_columns)
304 goto badspec;
305 if (no_columns <= 0)
306 no_columns = ni_rows;
307 if (no_rows <= 0)
308 no_rows = ni_columns;
309 if ((no_rows != ni_columns) | (no_columns != ni_rows))
310 goto badspec;
311 /* transpose records */
312 for (i = 0; i < no_rows; i++) {
313 for (j = 0; j < no_columns; j++)
314 if (rp != NULL) { /* ASCII output */
315 print_record(rp, j*ni_columns + i);
316 putc(tabEOL[j >= no_columns-1], stdout);
317 } else { /* binary output */
318 fwrite((char *)mp->base +
319 -record_width*(j*ni_columns + i),
320 -record_width, 1, stdout);
321 }
322 if (ferror(stdout)) {
323 fprintf(stderr, "Error writing to stdout\n");
324 return(0);
325 }
326 }
327 if (rp != NULL)
328 free_records(rp);
329 return(1);
330 badspec:
331 fprintf(stderr, "Bad transpose specification -- check dimension(s)\n");
332 return(0);
333 }
335 /* resize ASCII stream input by ignoring EOLs between records */
336 static int
337 do_resize(FILE *fp)
338 {
339 long records2go = ni_rows*ni_columns;
340 int columns2go = no_columns;
341 char word[256];
342 /* sanity checks */
343 if (record_width <= 0) {
344 fprintf(stderr, "Bad call to do_resize (record_width = %d)\n",
345 record_width);
346 return(0);
347 }
348 if (no_columns <= 0) {
349 fprintf(stderr, "Missing -oc specification\n");
350 return(0);
351 }
352 if ((records2go <= 0) & (no_rows > 0))
353 records2go = no_rows*no_columns;
354 else if (no_rows*no_columns != records2go) {
355 fprintf(stderr,
356 "Input and output data sizes disagree (%dx%d != %dx%d)\n",
357 ni_rows, ni_columns, no_rows, no_columns);
358 return(0);
359 }
360 do { /* reshape records */
361 int n;
363 for (n = record_width; n--; ) {
364 if (fget_word(word, fp) == NULL) {
365 if (records2go > 0 || n < record_width-1)
366 break;
367 goto done; /* normal EOD */
368 }
369 fputs(word, stdout);
370 if (n) { /* mid-record? */
371 int c = getc(fp);
372 if ((c == '\n') | (c == EOF))
373 break;
374 ungetc(c, fp);
375 putc(' ', stdout);
376 }
377 }
378 if (n >= 0) {
379 fprintf(stderr, "Incomplete record / unexpected EOF\n");
380 return(0);
381 }
382 if (--columns2go <= 0) { /* time to end output row? */
383 putc('\n', stdout);
384 columns2go = no_columns;
385 } else /* else separate records */
386 putc('\t', stdout);
387 } while (--records2go); /* expected EOD? */
388 done:
389 if (warnings && columns2go != no_columns)
390 fprintf(stderr, "Warning -- incomplete final row\n");
391 if (warnings && fget_word(word, fp) != NULL)
392 fprintf(stderr, "Warning -- characters beyond expected EOD\n");
393 return(1);
394 }
396 /* process a header line and copy to stdout */
397 static int
398 headline(char *s, void *p)
399 {
400 char fmt[32];
402 if (formatval(fmt, s)) {
403 if (!strcmp(fmt, fmtid))
404 return(0);
405 fprintf(stderr, "Input format '%s' != '%s'\n", fmt, fmtid);
406 return(-1);
407 }
408 fputs(s, stdout); /* copy header info. */
409 return(0);
410 }
412 /* main routine for converting rows/columns in data file */
413 int
414 main(int argc, char *argv[])
415 {
416 int do_header = 1; /* header i/o? */
417 int transpose = 0; /* transpose rows & cols? */
418 int i;
420 for (i = 1; i < argc && argv[i][0] == '-'; i++)
421 switch (argv[i][1]) {
422 case 'i': /* input */
423 if (argv[i][2] == 'c') /* columns */
424 ni_columns = atoi(argv[++i]);
425 else if (argv[i][2] == 'r')
426 ni_rows = atoi(argv[++i]);
427 else
428 goto userr;
429 break;
430 case 'o': /* output */
431 if (argv[i][2] == 'c') /* columns */
432 no_columns = atoi(argv[++i]);
433 else if (argv[i][2] == 'r')
434 no_rows = atoi(argv[++i]);
435 else
436 goto userr;
437 break;
438 case 'h': /* header on/off */
439 do_header = !do_header;
440 break;
441 case 't': /* transpose on/off */
442 transpose = !transpose;
443 break;
444 case 'f': /* format */
445 switch (argv[i][2]) {
446 case 'a': /* ASCII */
447 case 'A':
448 fmtid = "ascii";
449 record_width = 1;
450 break;
451 case 'f': /* float */
452 case 'F':
453 fmtid = "float";
454 record_width = -(int)sizeof(float);
455 break;
456 case 'd': /* double */
457 case 'D':
458 fmtid = "double";
459 record_width = -(int)sizeof(double);
460 break;
461 case 'b': /* binary (bytes) */
462 case 'B':
463 fmtid = "byte";
464 record_width = -1;
465 break;
466 default:
467 goto userr;
468 }
469 if (argv[i][3]) {
470 if (!isdigit(argv[i][3]))
471 goto userr;
472 record_width *= atoi(argv[i]+3);
473 }
474 break;
475 case 'w': /* warnings on/off */
476 warnings = !warnings;
477 break;
478 default:
479 goto userr;
480 }
481 if (!record_width)
482 goto userr;
483 if (i < argc-1) /* arg count OK? */
484 goto userr;
485 /* open input file? */
486 if (i == argc-1 && freopen(argv[i], "r", stdin) == NULL) {
487 fprintf(stderr, "%s: cannot open for reading\n", argv[i]);
488 return(1);
489 }
490 if (record_width < 0) {
491 SET_FILE_BINARY(stdin);
492 SET_FILE_BINARY(stdout);
493 }
494 /* check for no-op */
495 if (!transpose && (record_width < 0 ||
496 (no_columns == ni_columns) & (no_rows == ni_rows))) {
497 if (warnings)
498 fprintf(stderr, "%s: no-op -- copying input verbatim\n",
499 argv[0]);
500 if (!output_stream(stdin))
501 return(1);
502 return(0);
503 }
504 if (do_header) { /* read/write header */
505 if (getheader(stdin, &headline, NULL) < 0)
506 return(1);
507 printargs(argc, argv, stdout);
508 fputformat(fmtid, stdout);
509 fputc('\n', stdout); /* finish new header */
510 }
511 if (transpose) { /* transposing rows & columns? */
512 MEMLOAD myMem; /* need to load into memory */
513 if (i == argc-1) {
514 if (load_file(&myMem, stdin) <= 0) {
515 fprintf(stderr, "%s: error loading file into memory\n",
516 argv[i]);
517 return(1);
518 }
519 } else if (load_stream(&myMem, stdin) <= 0) {
520 fprintf(stderr, "%s: error loading stdin into memory\n",
521 argv[0]);
522 return(1);
523 }
524 if (!do_transpose(&myMem))
525 return(1);
526 /* free_load(&myMem); */
527 } else if (!do_resize(stdin)) /* just reshaping input */
528 return(1);
529 return(0);
530 userr:
531 fprintf(stderr,
532 "Usage: %s [-h][-w][-f[afdb][N]][-t][-ic in_col][-ir in_row][-oc out_col][-or out_row] [input.dat]\n",
533 argv[0]);
534 return(1);
535 }