1 |
#ifndef lint |
2 |
static const char RCSid[] = "$Id$"; |
3 |
#endif |
4 |
/* |
5 |
* Utility to re-order records in a binary or ASCII data file (matrix) |
6 |
*/ |
7 |
|
8 |
#include <stdlib.h> |
9 |
#include <unistd.h> |
10 |
#include <string.h> |
11 |
#include <ctype.h> |
12 |
#include "platform.h" |
13 |
#include "rtio.h" |
14 |
#include "resolu.h" |
15 |
#ifndef _WIN32 |
16 |
#include <sys/mman.h> |
17 |
#endif |
18 |
|
19 |
#ifdef getc_unlocked /* avoid horrendous overhead of flockfile */ |
20 |
#undef getc |
21 |
#undef putc |
22 |
#define getc getc_unlocked |
23 |
#define putc putc_unlocked |
24 |
#endif |
25 |
|
26 |
typedef struct { |
27 |
void *base; /* pointer to base memory */ |
28 |
size_t len; /* allocated memory length */ |
29 |
int mapped; /* memory-mapped file? */ |
30 |
} MEMLOAD; /* file loaded/mapped into memory */ |
31 |
|
32 |
typedef struct { |
33 |
int nw_rec; /* number of words per record */ |
34 |
int nrecs; /* number of records we found */ |
35 |
char *rec[1]; /* record array (extends struct) */ |
36 |
} RECINDEX; |
37 |
|
38 |
/* free loaded file */ |
39 |
static void |
40 |
free_load(MEMLOAD *mp) |
41 |
{ |
42 |
if (mp == NULL || (mp->base == NULL) | (mp->len <= 0)) |
43 |
return; |
44 |
#ifdef MAP_FILE |
45 |
if (mp->mapped) |
46 |
munmap(mp->base, mp->len); |
47 |
else |
48 |
#endif |
49 |
free(mp->base); |
50 |
mp->base = NULL; |
51 |
mp->len = 0; |
52 |
} |
53 |
|
54 |
/* load a file into memory */ |
55 |
static int |
56 |
load_file(MEMLOAD *mp, FILE *fp) |
57 |
{ |
58 |
int fd; |
59 |
off_t skip, flen; |
60 |
|
61 |
if (mp == NULL) |
62 |
return(-1); |
63 |
mp->base = NULL; |
64 |
mp->len = 0; |
65 |
mp->mapped = 0; |
66 |
if (fp == NULL) |
67 |
return(-1); |
68 |
fd = fileno(fp); |
69 |
skip = ftello(fp); |
70 |
flen = lseek(fd, 0, SEEK_END); |
71 |
if (flen <= skip) |
72 |
return((int)(flen - skip)); |
73 |
mp->len = (size_t)(flen - skip); |
74 |
#ifdef MAP_FILE |
75 |
if (mp->len > 1L<<20) { /* map file if > 1 MByte */ |
76 |
mp->base = mmap(NULL, mp->len, PROT_READ|PROT_WRITE, |
77 |
MAP_PRIVATE, fd, skip); |
78 |
if (mp->base != MAP_FAILED) { |
79 |
mp->mapped = 1; |
80 |
return(1); /* mmap() success */ |
81 |
} |
82 |
mp->base = NULL; /* fall back to reading it in... */ |
83 |
} |
84 |
#endif |
85 |
if (lseek(fd, skip, SEEK_SET) != skip || |
86 |
(mp->base = malloc(mp->len)) == NULL) { |
87 |
mp->len = 0; |
88 |
return(-1); |
89 |
} |
90 |
if (read(fd, (char *)mp->base, mp->len) != mp->len) { |
91 |
free_load(mp); |
92 |
return(-1); |
93 |
} |
94 |
return(1); |
95 |
} |
96 |
|
97 |
/* load memory from an input stream, starting from current position */ |
98 |
static int |
99 |
load_stream(MEMLOAD *mp, FILE *fp) |
100 |
{ |
101 |
char buf[8192]; |
102 |
size_t nr; |
103 |
|
104 |
if (mp == NULL) |
105 |
return(-1); |
106 |
mp->base = NULL; |
107 |
mp->len = 0; |
108 |
mp->mapped = 0; |
109 |
if (fp == NULL) |
110 |
return(-1); |
111 |
while ((nr = fread(buf, 1, sizeof(buf), fp)) > 0) { |
112 |
if (!mp->len) |
113 |
mp->base = malloc(nr); |
114 |
else |
115 |
mp->base = realloc(mp->base, mp->len+nr); |
116 |
if (mp->base == NULL) |
117 |
return(-1); |
118 |
memcpy((char *)mp->base + mp->len, buf, nr); |
119 |
mp->len += nr; |
120 |
} |
121 |
if (ferror(fp)) { |
122 |
free_load(mp); |
123 |
return(-1); |
124 |
} |
125 |
return(mp->len > 0); |
126 |
} |
127 |
|
128 |
/* free a record index */ |
129 |
#define free_records(rp) free(rp) |
130 |
|
131 |
/* compute record index */ |
132 |
static RECINDEX * |
133 |
index_records(const MEMLOAD *mp, int nw_rec) |
134 |
{ |
135 |
RECINDEX *rp; |
136 |
char *cp, *mend; |
137 |
int n; |
138 |
|
139 |
if (mp == NULL || (mp->base == NULL) | (mp->len <= 0)) |
140 |
return(NULL); |
141 |
if (nw_rec <= 0) |
142 |
return(NULL); |
143 |
rp = (RECINDEX *)malloc(sizeof(RECINDEX) + mp->len/(2*nw_rec)*sizeof(char *)); |
144 |
if (rp == NULL) |
145 |
return(NULL); |
146 |
rp->nw_rec = nw_rec; |
147 |
rp->nrecs = 0; |
148 |
cp = (char *)mp->base; |
149 |
mend = cp + mp->len; |
150 |
for ( ; ; ) { /* whitespace-separated words */ |
151 |
while (cp < mend && !*cp | isspace(*cp)) |
152 |
++cp; |
153 |
if (cp >= mend) |
154 |
break; |
155 |
rp->rec[rp->nrecs++] = cp; /* point to first non-white */ |
156 |
n = rp->nw_rec; |
157 |
while (++cp < mend) /* find end of record */ |
158 |
if (!*cp | isspace(*cp)) { |
159 |
if (--n <= 0) |
160 |
break; /* got requisite # words */ |
161 |
do { /* else find next word */ |
162 |
if (*cp == '\n') { |
163 |
fprintf(stderr, |
164 |
"Unexpected EOL in record!\n"); |
165 |
free_records(rp); |
166 |
return(NULL); |
167 |
} |
168 |
if (++cp >= mend) |
169 |
break; |
170 |
} while (!*cp | isspace(*cp)); |
171 |
} |
172 |
} |
173 |
rp->rec[rp->nrecs] = mend; /* reallocate to save space */ |
174 |
rp = (RECINDEX *)realloc(rp, |
175 |
sizeof(RECINDEX) + rp->nrecs*sizeof(char *)); |
176 |
return(rp); |
177 |
} |
178 |
|
179 |
/* count number of columns based on first EOL */ |
180 |
static int |
181 |
count_columns(const RECINDEX *rp) |
182 |
{ |
183 |
char *cp = rp->rec[0]; |
184 |
char *mend = rp->rec[rp->nrecs]; |
185 |
int i; |
186 |
|
187 |
while (*cp != '\n') |
188 |
if (++cp >= mend) |
189 |
return(0); |
190 |
for (i = 0; i < rp->nrecs; i++) |
191 |
if (rp->rec[i] >= cp) |
192 |
break; |
193 |
return(i); |
194 |
} |
195 |
|
196 |
/* copy nth record from index to stdout */ |
197 |
static int |
198 |
print_record(const RECINDEX *rp, int n) |
199 |
{ |
200 |
int words2go = rp->nw_rec; |
201 |
char *scp; |
202 |
|
203 |
if ((n < 0) | (n >= rp->nrecs)) |
204 |
return(0); |
205 |
scp = rp->rec[n]; |
206 |
do { |
207 |
putc(*scp++, stdout); |
208 |
if (!*scp | isspace(*scp)) { |
209 |
if (--words2go <= 0) |
210 |
break; |
211 |
putc(' ', stdout); /* single space btwn. words */ |
212 |
do |
213 |
if (++scp >= rp->rec[n+1]) |
214 |
break; |
215 |
while (!*scp | isspace(*scp)); |
216 |
} |
217 |
} while (scp < rp->rec[n+1]); |
218 |
/* caller adds record sep. */ |
219 |
return(1); |
220 |
} |
221 |
|
222 |
/* copy a stream to stdout */ |
223 |
static int |
224 |
output_stream(FILE *fp) |
225 |
{ |
226 |
char buf[8192]; |
227 |
ssize_t n; |
228 |
|
229 |
if (fp == NULL) |
230 |
return(0); |
231 |
fflush(stdout); /* assumes nothing in input buffer */ |
232 |
while ((n = read(fileno(fp), buf, sizeof(buf))) > 0) |
233 |
if (write(fileno(stdout), buf, n) != n) |
234 |
return(0); |
235 |
return(n >= 0); |
236 |
} |
237 |
|
238 |
/* get next word from stream, leaving stream on EOL or start of next word */ |
239 |
static char * |
240 |
fget_word(char buf[256], FILE *fp) |
241 |
{ |
242 |
int c; |
243 |
char *cp; |
244 |
/* skip nul's and white space */ |
245 |
while (!(c = getc(fp)) || isspace(c)) |
246 |
; |
247 |
if (c == EOF) |
248 |
return(NULL); |
249 |
cp = buf; |
250 |
do |
251 |
*cp++ = c; |
252 |
while ((c = getc(fp)) != EOF && !isspace(c) && cp < buf+255); |
253 |
*cp = '\0'; |
254 |
while (isspace(c) & (c != '\n')) |
255 |
c = getc(fp); |
256 |
if (c != EOF) |
257 |
ungetc(c, fp); |
258 |
return(buf); |
259 |
} |
260 |
|
261 |
char *fmtid = "ascii"; /* format id */ |
262 |
int record_width = 3; /* words/record (<0 binary) */ |
263 |
int ni_columns = 0; /* number of input columns */ |
264 |
int ni_rows = 0; /* number of input rows */ |
265 |
int no_columns = 0; /* number of output columns */ |
266 |
int no_rows = 0; /* number of output rows */ |
267 |
|
268 |
/* output transposed ASCII or binary data from memory */ |
269 |
static int |
270 |
do_transpose(const MEMLOAD *mp) |
271 |
{ |
272 |
static const char tabEOL[2] = {'\t','\n'}; |
273 |
RECINDEX *rp = NULL; |
274 |
long nrecords; |
275 |
int i, j; |
276 |
/* get # records (& index) */ |
277 |
if (record_width > 0) { |
278 |
if ((rp = index_records(mp, record_width)) == NULL) |
279 |
return(0); |
280 |
if (ni_columns <= 0) |
281 |
ni_columns = count_columns(rp); |
282 |
nrecords = rp->nrecs; |
283 |
} else if ((ni_rows > 0) & (ni_columns > 0)) |
284 |
nrecords = ni_rows*ni_columns; |
285 |
else |
286 |
nrecords = mp->len / -record_width; |
287 |
/* check sizes */ |
288 |
if (ni_rows <= 0) |
289 |
ni_rows = no_columns; |
290 |
if (ni_columns <= 0) |
291 |
ni_columns = no_rows; |
292 |
if ((ni_rows <= 0) & (ni_columns > 0)) |
293 |
ni_rows = nrecords/ni_columns; |
294 |
if ((ni_columns <= 0) & (ni_rows > 0)) |
295 |
ni_columns = nrecords/ni_rows; |
296 |
if (nrecords != ni_rows*ni_columns) |
297 |
goto badspec; |
298 |
if (no_columns <= 0) |
299 |
no_columns = ni_rows; |
300 |
if (no_rows <= 0) |
301 |
no_rows = ni_columns; |
302 |
if ((no_rows != ni_columns) | (no_columns != ni_rows)) |
303 |
goto badspec; |
304 |
/* transpose records */ |
305 |
for (i = 0; i < no_rows; i++) { |
306 |
for (j = 0; j < no_columns; j++) |
307 |
if (rp != NULL) { /* ASCII output */ |
308 |
print_record(rp, j*ni_columns + i); |
309 |
putc(tabEOL[j >= no_columns-1], stdout); |
310 |
} else { /* binary output */ |
311 |
fwrite((char *)mp->base + |
312 |
-record_width*(j*ni_columns + i), |
313 |
-record_width, 1, stdout); |
314 |
} |
315 |
if (ferror(stdout)) { |
316 |
fprintf(stderr, "Error writing to stdout\n"); |
317 |
return(0); |
318 |
} |
319 |
} |
320 |
if (rp != NULL) |
321 |
free_records(rp); |
322 |
return(1); |
323 |
badspec: |
324 |
fprintf(stderr, "Bad transpose specification -- check dimension(s)\n"); |
325 |
return(0); |
326 |
} |
327 |
|
328 |
/* resize ASCII stream input by ignoring EOLs between records */ |
329 |
static int |
330 |
do_resize(FILE *fp) |
331 |
{ |
332 |
long records2go = ni_rows*ni_columns; |
333 |
int columns2go = no_columns; |
334 |
char word[256]; |
335 |
/* sanity checks */ |
336 |
if (record_width <= 0) { |
337 |
fprintf(stderr, "Bad call to do_resize (record_width = %d)\n", |
338 |
record_width); |
339 |
return(0); |
340 |
} |
341 |
if (no_columns <= 0) { |
342 |
fprintf(stderr, "Missing -oc specification\n"); |
343 |
return(0); |
344 |
} |
345 |
if ((records2go <= 0) & (no_rows > 0)) |
346 |
records2go = no_rows*no_columns; |
347 |
else if (no_rows*no_columns != records2go) { |
348 |
fprintf(stderr, |
349 |
"Input and output data sizes disagree (%dx%d != %dx%d)\n", |
350 |
ni_rows, ni_columns, no_rows, no_columns); |
351 |
return(0); |
352 |
} |
353 |
do { /* reshape records */ |
354 |
int n; |
355 |
|
356 |
for (n = record_width; n--; ) { |
357 |
if (fget_word(word, fp) == NULL) { |
358 |
if (records2go > 0 || n < record_width-1) |
359 |
break; |
360 |
goto done; /* normal EOD */ |
361 |
} |
362 |
fputs(word, stdout); |
363 |
if (n) { /* mid-record? */ |
364 |
int c = getc(fp); |
365 |
if ((c == '\n') | (c == EOF)) |
366 |
break; |
367 |
ungetc(c, fp); |
368 |
putc(' ', stdout); |
369 |
} |
370 |
} |
371 |
if (n >= 0) { |
372 |
fprintf(stderr, "Incomplete record / unexpected EOF\n"); |
373 |
return(0); |
374 |
} |
375 |
if (--columns2go <= 0) { /* time to end output row? */ |
376 |
putc('\n', stdout); |
377 |
columns2go = no_columns; |
378 |
} else /* else separate records */ |
379 |
putc('\t', stdout); |
380 |
} while (--records2go); /* expected EOD? */ |
381 |
done: |
382 |
if (columns2go != no_columns) |
383 |
fprintf(stderr, "Warning -- incomplete final row\n"); |
384 |
if (fget_word(word, fp) != NULL) |
385 |
fprintf(stderr, "Warning -- data beyond expected EOF\n"); |
386 |
return(1); |
387 |
} |
388 |
|
389 |
/* process a header line and copy to stdout */ |
390 |
static int |
391 |
headline(char *s, void *p) |
392 |
{ |
393 |
char fmt[32]; |
394 |
|
395 |
if (formatval(fmt, s)) { |
396 |
if (!strcmp(fmt, fmtid)) |
397 |
return(0); |
398 |
fprintf(stderr, "Input format '%s' != '%s'\n", fmt, fmtid); |
399 |
return(-1); |
400 |
} |
401 |
fputs(s, stdout); /* copy header info. */ |
402 |
return(0); |
403 |
} |
404 |
|
405 |
/* main routine for converting rows/columns in data file */ |
406 |
int |
407 |
main(int argc, char *argv[]) |
408 |
{ |
409 |
int do_header = 1; /* header i/o? */ |
410 |
int transpose = 0; /* transpose rows & cols? */ |
411 |
int i; |
412 |
|
413 |
for (i = 1; i < argc && argv[i][0] == '-'; i++) |
414 |
switch (argv[i][1]) { |
415 |
case 'i': /* input */ |
416 |
if (argv[i][2] == 'c') /* columns */ |
417 |
ni_columns = atoi(argv[++i]); |
418 |
else if (argv[i][2] == 'r') |
419 |
ni_rows = atoi(argv[++i]); |
420 |
else |
421 |
goto userr; |
422 |
break; |
423 |
case 'o': /* output */ |
424 |
if (argv[i][2] == 'c') /* columns */ |
425 |
no_columns = atoi(argv[++i]); |
426 |
else if (argv[i][2] == 'r') |
427 |
no_rows = atoi(argv[++i]); |
428 |
else |
429 |
goto userr; |
430 |
break; |
431 |
case 'h': /* header on/off */ |
432 |
do_header = !do_header; |
433 |
break; |
434 |
case 't': /* transpose on/off */ |
435 |
transpose = !transpose; |
436 |
break; |
437 |
case 'f': /* format */ |
438 |
switch (argv[i][2]) { |
439 |
case 'a': /* ASCII */ |
440 |
case 'A': |
441 |
fmtid = "ascii"; |
442 |
record_width = 1; |
443 |
break; |
444 |
case 'f': /* float */ |
445 |
case 'F': |
446 |
fmtid = "float"; |
447 |
record_width = -(int)sizeof(float); |
448 |
break; |
449 |
case 'd': /* double */ |
450 |
case 'D': |
451 |
fmtid = "double"; |
452 |
record_width = -(int)sizeof(double); |
453 |
break; |
454 |
case 'b': /* binary (bytes) */ |
455 |
case 'B': |
456 |
fmtid = "byte"; |
457 |
record_width = -1; |
458 |
break; |
459 |
default: |
460 |
goto userr; |
461 |
} |
462 |
if (argv[i][3]) { |
463 |
if (!isdigit(argv[i][3])) |
464 |
goto userr; |
465 |
record_width *= atoi(argv[i]+3); |
466 |
} |
467 |
break; |
468 |
default: |
469 |
goto userr; |
470 |
} |
471 |
if (!record_width) |
472 |
goto userr; |
473 |
if (i < argc-1) /* arg count OK? */ |
474 |
goto userr; |
475 |
/* open input file? */ |
476 |
if (i == argc-1 && freopen(argv[i], "r", stdin) == NULL) { |
477 |
fprintf(stderr, "%s: cannot open for reading\n", argv[i]); |
478 |
return(1); |
479 |
} |
480 |
if (record_width < 0) { |
481 |
SET_FILE_BINARY(stdin); |
482 |
SET_FILE_BINARY(stdout); |
483 |
} |
484 |
/* check for no-op */ |
485 |
if (!transpose && (record_width < 0 || |
486 |
(no_columns == ni_columns) & (no_rows == ni_rows))) { |
487 |
fprintf(stderr, "%s: no-op -- copying input verbatim\n", |
488 |
argv[0]); |
489 |
if (!output_stream(stdin)) |
490 |
return(1); |
491 |
return(0); |
492 |
} |
493 |
if (do_header) { /* read/write header */ |
494 |
if (getheader(stdin, &headline, NULL) < 0) |
495 |
return(1); |
496 |
printargs(argc, argv, stdout); |
497 |
fputformat(fmtid, stdout); |
498 |
fputc('\n', stdout); /* finish new header */ |
499 |
} |
500 |
if (transpose) { /* transposing rows & columns? */ |
501 |
MEMLOAD myMem; /* need to load into memory */ |
502 |
if (i == argc-1) { |
503 |
if (load_file(&myMem, stdin) <= 0) { |
504 |
fprintf(stderr, "%s: error loading file into memory\n", |
505 |
argv[i]); |
506 |
return(1); |
507 |
} |
508 |
} else if (load_stream(&myMem, stdin) <= 0) { |
509 |
fprintf(stderr, "%s: error loading stdin into memory\n", |
510 |
argv[0]); |
511 |
return(1); |
512 |
} |
513 |
if (!do_transpose(&myMem)) |
514 |
return(1); |
515 |
/* free_load(&myMem); */ |
516 |
} else if (!do_resize(stdin)) /* just reshaping input */ |
517 |
return(1); |
518 |
return(0); |
519 |
userr: |
520 |
fprintf(stderr, |
521 |
"Usage: %s [-h][-f[afdb][N]][-t][-ic in_col][-ir in_row][-oc out_col][-or out_row] [input.dat]\n", |
522 |
argv[0]); |
523 |
return(1); |
524 |
} |