1 |
#ifndef lint |
2 |
static const char RCSid[] = "$Id: rcollate.c,v 2.4 2013/09/06 21:34:39 greg Exp $"; |
3 |
#endif |
4 |
/* |
5 |
* Utility to re-order records in a binary or ASCII data file (matrix) |
6 |
*/ |
7 |
|
8 |
#include <stdlib.h> |
9 |
#include <unistd.h> |
10 |
#include <string.h> |
11 |
#include <ctype.h> |
12 |
#include "platform.h" |
13 |
#include "rtio.h" |
14 |
#include "resolu.h" |
15 |
#ifndef _WIN32 |
16 |
#include <sys/mman.h> |
17 |
#endif |
18 |
|
19 |
#ifdef getc_unlocked /* avoid horrendous overhead of flockfile */ |
20 |
#undef getc |
21 |
#undef putc |
22 |
#define getc getc_unlocked |
23 |
#define putc putc_unlocked |
24 |
#endif |
25 |
|
26 |
typedef struct { |
27 |
void *base; /* pointer to base memory */ |
28 |
size_t len; /* allocated memory length */ |
29 |
int mapped; /* memory-mapped file? */ |
30 |
} MEMLOAD; /* file loaded/mapped into memory */ |
31 |
|
32 |
typedef struct { |
33 |
int nw_rec; /* number of words per record */ |
34 |
int nrecs; /* number of records we found */ |
35 |
char *rec[1]; /* record array (extends struct) */ |
36 |
} RECINDEX; |
37 |
|
38 |
int warnings = 1; /* report warnings? */ |
39 |
|
40 |
/* free loaded file */ |
41 |
static void |
42 |
free_load(MEMLOAD *mp) |
43 |
{ |
44 |
if (mp == NULL || (mp->base == NULL) | (mp->len <= 0)) |
45 |
return; |
46 |
#ifdef MAP_FILE |
47 |
if (mp->mapped) |
48 |
munmap(mp->base, mp->len); |
49 |
else |
50 |
#endif |
51 |
free(mp->base); |
52 |
mp->base = NULL; |
53 |
mp->len = 0; |
54 |
} |
55 |
|
56 |
/* load a file into memory */ |
57 |
static int |
58 |
load_file(MEMLOAD *mp, FILE *fp) |
59 |
{ |
60 |
int fd; |
61 |
off_t skip, flen; |
62 |
|
63 |
if (mp == NULL) |
64 |
return(-1); |
65 |
mp->base = NULL; |
66 |
mp->len = 0; |
67 |
mp->mapped = 0; |
68 |
if (fp == NULL) |
69 |
return(-1); |
70 |
fd = fileno(fp); |
71 |
skip = ftello(fp); |
72 |
flen = lseek(fd, 0, SEEK_END); |
73 |
if (flen <= skip) |
74 |
return((int)(flen - skip)); |
75 |
mp->len = (size_t)(flen - skip); |
76 |
#ifdef MAP_FILE |
77 |
if (mp->len > 1L<<20) { /* map file if > 1 MByte */ |
78 |
mp->base = mmap(NULL, mp->len, PROT_READ, MAP_PRIVATE, fd, skip); |
79 |
if (mp->base != MAP_FAILED) { |
80 |
mp->mapped = 1; |
81 |
return(1); /* mmap() success */ |
82 |
} |
83 |
mp->base = NULL; /* fall back to reading it in... */ |
84 |
} |
85 |
#endif |
86 |
if (lseek(fd, skip, SEEK_SET) != skip || |
87 |
(mp->base = malloc(mp->len)) == NULL) { |
88 |
mp->len = 0; |
89 |
return(-1); |
90 |
} |
91 |
if (read(fd, (char *)mp->base, mp->len) != mp->len) { |
92 |
free_load(mp); |
93 |
return(-1); |
94 |
} |
95 |
return(1); |
96 |
} |
97 |
|
98 |
/* load memory from an input stream, starting from current position */ |
99 |
static int |
100 |
load_stream(MEMLOAD *mp, FILE *fp) |
101 |
{ |
102 |
char buf[8192]; |
103 |
size_t nr; |
104 |
|
105 |
if (mp == NULL) |
106 |
return(-1); |
107 |
mp->base = NULL; |
108 |
mp->len = 0; |
109 |
mp->mapped = 0; |
110 |
if (fp == NULL) |
111 |
return(-1); |
112 |
while ((nr = fread(buf, 1, sizeof(buf), fp)) > 0) { |
113 |
if (!mp->len) |
114 |
mp->base = malloc(nr); |
115 |
else |
116 |
mp->base = realloc(mp->base, mp->len+nr); |
117 |
if (mp->base == NULL) |
118 |
return(-1); |
119 |
memcpy((char *)mp->base + mp->len, buf, nr); |
120 |
mp->len += nr; |
121 |
} |
122 |
if (ferror(fp)) { |
123 |
free_load(mp); |
124 |
return(-1); |
125 |
} |
126 |
return(mp->len > 0); |
127 |
} |
128 |
|
129 |
/* free a record index */ |
130 |
#define free_records(rp) free(rp) |
131 |
|
132 |
/* compute record index */ |
133 |
static RECINDEX * |
134 |
index_records(const MEMLOAD *mp, int nw_rec) |
135 |
{ |
136 |
RECINDEX *rp; |
137 |
char *cp, *mend; |
138 |
int n; |
139 |
|
140 |
if (mp == NULL || (mp->base == NULL) | (mp->len <= 0)) |
141 |
return(NULL); |
142 |
if (nw_rec <= 0) |
143 |
return(NULL); |
144 |
rp = (RECINDEX *)malloc(sizeof(RECINDEX) + mp->len/(2*nw_rec)*sizeof(char *)); |
145 |
if (rp == NULL) |
146 |
return(NULL); |
147 |
rp->nw_rec = nw_rec; |
148 |
rp->nrecs = 0; |
149 |
cp = (char *)mp->base; |
150 |
mend = cp + mp->len; |
151 |
for ( ; ; ) { /* whitespace-separated words */ |
152 |
while (cp < mend && !*cp | isspace(*cp)) |
153 |
++cp; |
154 |
if (cp >= mend) |
155 |
break; |
156 |
rp->rec[rp->nrecs++] = cp; /* point to first non-white */ |
157 |
n = rp->nw_rec; |
158 |
while (++cp < mend) /* find end of record */ |
159 |
if (!*cp | isspace(*cp)) { |
160 |
if (--n <= 0) |
161 |
break; /* got requisite # words */ |
162 |
do { /* else find next word */ |
163 |
if (*cp == '\n') { |
164 |
fprintf(stderr, |
165 |
"Unexpected EOL in record!\n"); |
166 |
free_records(rp); |
167 |
return(NULL); |
168 |
} |
169 |
if (++cp >= mend) |
170 |
break; |
171 |
} while (!*cp | isspace(*cp)); |
172 |
} |
173 |
} |
174 |
rp->rec[rp->nrecs] = mend; /* reallocate to save space */ |
175 |
rp = (RECINDEX *)realloc(rp, |
176 |
sizeof(RECINDEX) + rp->nrecs*sizeof(char *)); |
177 |
return(rp); |
178 |
} |
179 |
|
180 |
/* count number of columns based on first EOL */ |
181 |
static int |
182 |
count_columns(const RECINDEX *rp) |
183 |
{ |
184 |
char *cp = rp->rec[0]; |
185 |
char *mend = rp->rec[rp->nrecs]; |
186 |
int i; |
187 |
|
188 |
while (*cp != '\n') |
189 |
if (++cp >= mend) |
190 |
return(0); |
191 |
for (i = 0; i < rp->nrecs; i++) |
192 |
if (rp->rec[i] >= cp) |
193 |
break; |
194 |
return(i); |
195 |
} |
196 |
|
197 |
/* copy nth record from index to stdout */ |
198 |
static int |
199 |
print_record(const RECINDEX *rp, int n) |
200 |
{ |
201 |
int words2go = rp->nw_rec; |
202 |
char *scp; |
203 |
|
204 |
if ((n < 0) | (n >= rp->nrecs)) |
205 |
return(0); |
206 |
scp = rp->rec[n]; |
207 |
do { |
208 |
putc(*scp++, stdout); |
209 |
if (!*scp | isspace(*scp)) { |
210 |
if (--words2go <= 0) |
211 |
break; |
212 |
putc(' ', stdout); /* single space btwn. words */ |
213 |
do |
214 |
if (++scp >= rp->rec[n+1]) |
215 |
break; |
216 |
while (!*scp | isspace(*scp)); |
217 |
} |
218 |
} while (scp < rp->rec[n+1]); |
219 |
/* caller adds record sep. */ |
220 |
return(1); |
221 |
} |
222 |
|
223 |
/* copy a stream to stdout */ |
224 |
static int |
225 |
output_stream(FILE *fp) |
226 |
{ |
227 |
char buf[8192]; |
228 |
ssize_t n; |
229 |
|
230 |
if (fp == NULL) |
231 |
return(0); |
232 |
fflush(stdout); /* assumes nothing in input buffer */ |
233 |
while ((n = read(fileno(fp), buf, sizeof(buf))) > 0) |
234 |
if (write(fileno(stdout), buf, n) != n) |
235 |
return(0); |
236 |
return(n >= 0); |
237 |
} |
238 |
|
239 |
/* get next word from stream, leaving stream on EOL or start of next word */ |
240 |
static char * |
241 |
fget_word(char buf[256], FILE *fp) |
242 |
{ |
243 |
int c; |
244 |
char *cp; |
245 |
/* skip nul's and white space */ |
246 |
while (!(c = getc(fp)) || isspace(c)) |
247 |
; |
248 |
if (c == EOF) |
249 |
return(NULL); |
250 |
cp = buf; |
251 |
do |
252 |
*cp++ = c; |
253 |
while ((c = getc(fp)) != EOF && !isspace(c) && cp < buf+255); |
254 |
*cp = '\0'; |
255 |
while (isspace(c) & (c != '\n')) |
256 |
c = getc(fp); |
257 |
if (c != EOF) |
258 |
ungetc(c, fp); |
259 |
return(buf); |
260 |
} |
261 |
|
262 |
char *fmtid = "ascii"; /* format id */ |
263 |
int record_width = 3; /* words/record (<0 binary) */ |
264 |
int ni_columns = 0; /* number of input columns */ |
265 |
int ni_rows = 0; /* number of input rows */ |
266 |
int no_columns = 0; /* number of output columns */ |
267 |
int no_rows = 0; /* number of output rows */ |
268 |
|
269 |
/* output transposed ASCII or binary data from memory */ |
270 |
static int |
271 |
do_transpose(const MEMLOAD *mp) |
272 |
{ |
273 |
static const char tabEOL[2] = {'\t','\n'}; |
274 |
RECINDEX *rp = NULL; |
275 |
long nrecords; |
276 |
int i, j; |
277 |
/* propogate sizes */ |
278 |
if (ni_rows <= 0) |
279 |
ni_rows = no_columns; |
280 |
if (ni_columns <= 0) |
281 |
ni_columns = no_rows; |
282 |
/* get # records (& index) */ |
283 |
if (record_width > 0) { |
284 |
if ((rp = index_records(mp, record_width)) == NULL) |
285 |
return(0); |
286 |
if (ni_columns <= 0) |
287 |
ni_columns = count_columns(rp); |
288 |
nrecords = rp->nrecs; |
289 |
} else if ((ni_rows > 0) & (ni_columns > 0)) { |
290 |
nrecords = ni_rows*ni_columns; |
291 |
if (nrecords > mp->len / -record_width) { |
292 |
fprintf(stderr, |
293 |
"Input too small for specified size and type\n"); |
294 |
return(0); |
295 |
} |
296 |
} else |
297 |
nrecords = mp->len / -record_width; |
298 |
/* check sizes */ |
299 |
if ((ni_rows <= 0) & (ni_columns > 0)) |
300 |
ni_rows = nrecords/ni_columns; |
301 |
if ((ni_columns <= 0) & (ni_rows > 0)) |
302 |
ni_columns = nrecords/ni_rows; |
303 |
if (nrecords != ni_rows*ni_columns) |
304 |
goto badspec; |
305 |
if (no_columns <= 0) |
306 |
no_columns = ni_rows; |
307 |
if (no_rows <= 0) |
308 |
no_rows = ni_columns; |
309 |
if ((no_rows != ni_columns) | (no_columns != ni_rows)) |
310 |
goto badspec; |
311 |
/* transpose records */ |
312 |
for (i = 0; i < no_rows; i++) { |
313 |
for (j = 0; j < no_columns; j++) |
314 |
if (rp != NULL) { /* ASCII output */ |
315 |
print_record(rp, j*ni_columns + i); |
316 |
putc(tabEOL[j >= no_columns-1], stdout); |
317 |
} else { /* binary output */ |
318 |
fwrite((char *)mp->base + |
319 |
-record_width*(j*ni_columns + i), |
320 |
-record_width, 1, stdout); |
321 |
} |
322 |
if (ferror(stdout)) { |
323 |
fprintf(stderr, "Error writing to stdout\n"); |
324 |
return(0); |
325 |
} |
326 |
} |
327 |
if (rp != NULL) |
328 |
free_records(rp); |
329 |
return(1); |
330 |
badspec: |
331 |
fprintf(stderr, "Bad transpose specification -- check dimension(s)\n"); |
332 |
return(0); |
333 |
} |
334 |
|
335 |
/* resize ASCII stream input by ignoring EOLs between records */ |
336 |
static int |
337 |
do_resize(FILE *fp) |
338 |
{ |
339 |
long records2go = ni_rows*ni_columns; |
340 |
int columns2go = no_columns; |
341 |
char word[256]; |
342 |
/* sanity checks */ |
343 |
if (record_width <= 0) { |
344 |
fprintf(stderr, "Bad call to do_resize (record_width = %d)\n", |
345 |
record_width); |
346 |
return(0); |
347 |
} |
348 |
if (no_columns <= 0) { |
349 |
fprintf(stderr, "Missing -oc specification\n"); |
350 |
return(0); |
351 |
} |
352 |
if ((records2go <= 0) & (no_rows > 0)) |
353 |
records2go = no_rows*no_columns; |
354 |
else if (no_rows*no_columns != records2go) { |
355 |
fprintf(stderr, |
356 |
"Input and output data sizes disagree (%dx%d != %dx%d)\n", |
357 |
ni_rows, ni_columns, no_rows, no_columns); |
358 |
return(0); |
359 |
} |
360 |
do { /* reshape records */ |
361 |
int n; |
362 |
|
363 |
for (n = record_width; n--; ) { |
364 |
if (fget_word(word, fp) == NULL) { |
365 |
if (records2go > 0 || n < record_width-1) |
366 |
break; |
367 |
goto done; /* normal EOD */ |
368 |
} |
369 |
fputs(word, stdout); |
370 |
if (n) { /* mid-record? */ |
371 |
int c = getc(fp); |
372 |
if ((c == '\n') | (c == EOF)) |
373 |
break; |
374 |
ungetc(c, fp); |
375 |
putc(' ', stdout); |
376 |
} |
377 |
} |
378 |
if (n >= 0) { |
379 |
fprintf(stderr, "Incomplete record / unexpected EOF\n"); |
380 |
return(0); |
381 |
} |
382 |
if (--columns2go <= 0) { /* time to end output row? */ |
383 |
putc('\n', stdout); |
384 |
columns2go = no_columns; |
385 |
} else /* else separate records */ |
386 |
putc('\t', stdout); |
387 |
} while (--records2go); /* expected EOD? */ |
388 |
done: |
389 |
if (warnings && columns2go != no_columns) |
390 |
fprintf(stderr, "Warning -- incomplete final row\n"); |
391 |
if (warnings && fget_word(word, fp) != NULL) |
392 |
fprintf(stderr, "Warning -- characters beyond expected EOD\n"); |
393 |
return(1); |
394 |
} |
395 |
|
396 |
/* process a header line and copy to stdout */ |
397 |
static int |
398 |
headline(char *s, void *p) |
399 |
{ |
400 |
char fmt[32]; |
401 |
|
402 |
if (formatval(fmt, s)) { |
403 |
if (!strcmp(fmt, fmtid)) |
404 |
return(0); |
405 |
fprintf(stderr, "Input format '%s' != '%s'\n", fmt, fmtid); |
406 |
return(-1); |
407 |
} |
408 |
fputs(s, stdout); /* copy header info. */ |
409 |
return(0); |
410 |
} |
411 |
|
412 |
/* main routine for converting rows/columns in data file */ |
413 |
int |
414 |
main(int argc, char *argv[]) |
415 |
{ |
416 |
int do_header = 1; /* header i/o? */ |
417 |
int transpose = 0; /* transpose rows & cols? */ |
418 |
int i; |
419 |
|
420 |
for (i = 1; i < argc && argv[i][0] == '-'; i++) |
421 |
switch (argv[i][1]) { |
422 |
case 'i': /* input */ |
423 |
if (argv[i][2] == 'c') /* columns */ |
424 |
ni_columns = atoi(argv[++i]); |
425 |
else if (argv[i][2] == 'r') |
426 |
ni_rows = atoi(argv[++i]); |
427 |
else |
428 |
goto userr; |
429 |
break; |
430 |
case 'o': /* output */ |
431 |
if (argv[i][2] == 'c') /* columns */ |
432 |
no_columns = atoi(argv[++i]); |
433 |
else if (argv[i][2] == 'r') |
434 |
no_rows = atoi(argv[++i]); |
435 |
else |
436 |
goto userr; |
437 |
break; |
438 |
case 'h': /* header on/off */ |
439 |
do_header = !do_header; |
440 |
break; |
441 |
case 't': /* transpose on/off */ |
442 |
transpose = !transpose; |
443 |
break; |
444 |
case 'f': /* format */ |
445 |
switch (argv[i][2]) { |
446 |
case 'a': /* ASCII */ |
447 |
case 'A': |
448 |
fmtid = "ascii"; |
449 |
record_width = 1; |
450 |
break; |
451 |
case 'f': /* float */ |
452 |
case 'F': |
453 |
fmtid = "float"; |
454 |
record_width = -(int)sizeof(float); |
455 |
break; |
456 |
case 'd': /* double */ |
457 |
case 'D': |
458 |
fmtid = "double"; |
459 |
record_width = -(int)sizeof(double); |
460 |
break; |
461 |
case 'b': /* binary (bytes) */ |
462 |
case 'B': |
463 |
fmtid = "byte"; |
464 |
record_width = -1; |
465 |
break; |
466 |
default: |
467 |
goto userr; |
468 |
} |
469 |
if (argv[i][3]) { |
470 |
if (!isdigit(argv[i][3])) |
471 |
goto userr; |
472 |
record_width *= atoi(argv[i]+3); |
473 |
} |
474 |
break; |
475 |
case 'w': /* warnings on/off */ |
476 |
warnings = !warnings; |
477 |
break; |
478 |
default: |
479 |
goto userr; |
480 |
} |
481 |
if (!record_width) |
482 |
goto userr; |
483 |
if (i < argc-1) /* arg count OK? */ |
484 |
goto userr; |
485 |
/* open input file? */ |
486 |
if (i == argc-1 && freopen(argv[i], "r", stdin) == NULL) { |
487 |
fprintf(stderr, "%s: cannot open for reading\n", argv[i]); |
488 |
return(1); |
489 |
} |
490 |
if (record_width < 0) { |
491 |
SET_FILE_BINARY(stdin); |
492 |
SET_FILE_BINARY(stdout); |
493 |
} |
494 |
/* check for no-op */ |
495 |
if (!transpose && (record_width < 0 || |
496 |
(no_columns == ni_columns) & (no_rows == ni_rows))) { |
497 |
if (warnings) |
498 |
fprintf(stderr, "%s: no-op -- copying input verbatim\n", |
499 |
argv[0]); |
500 |
if (!output_stream(stdin)) |
501 |
return(1); |
502 |
return(0); |
503 |
} |
504 |
if (do_header) { /* read/write header */ |
505 |
if (getheader(stdin, &headline, NULL) < 0) |
506 |
return(1); |
507 |
printargs(argc, argv, stdout); |
508 |
fputformat(fmtid, stdout); |
509 |
fputc('\n', stdout); /* finish new header */ |
510 |
} |
511 |
if (transpose) { /* transposing rows & columns? */ |
512 |
MEMLOAD myMem; /* need to load into memory */ |
513 |
if (i == argc-1) { |
514 |
if (load_file(&myMem, stdin) <= 0) { |
515 |
fprintf(stderr, "%s: error loading file into memory\n", |
516 |
argv[i]); |
517 |
return(1); |
518 |
} |
519 |
} else if (load_stream(&myMem, stdin) <= 0) { |
520 |
fprintf(stderr, "%s: error loading stdin into memory\n", |
521 |
argv[0]); |
522 |
return(1); |
523 |
} |
524 |
if (!do_transpose(&myMem)) |
525 |
return(1); |
526 |
/* free_load(&myMem); */ |
527 |
} else if (!do_resize(stdin)) /* just reshaping input */ |
528 |
return(1); |
529 |
return(0); |
530 |
userr: |
531 |
fprintf(stderr, |
532 |
"Usage: %s [-h][-w][-f[afdb][N]][-t][-ic in_col][-ir in_row][-oc out_col][-or out_row] [input.dat]\n", |
533 |
argv[0]); |
534 |
return(1); |
535 |
} |