| 1 | #ifndef lint | 
| 2 | static const char RCSid[] = "$Id: rcollate.c,v 2.1 2013/09/05 17:53:23 greg Exp $"; | 
| 3 | #endif | 
| 4 | /* | 
| 5 | * Utility to re-order records in a binary or ASCII data file (matrix) | 
| 6 | */ | 
| 7 |  | 
| 8 | #include <stdlib.h> | 
| 9 | #include <unistd.h> | 
| 10 | #include <string.h> | 
| 11 | #include <ctype.h> | 
| 12 | #include "platform.h" | 
| 13 | #include "rtio.h" | 
| 14 | #include "resolu.h" | 
| 15 | #ifndef _WIN32 | 
| 16 | #include <sys/mman.h> | 
| 17 | #endif | 
| 18 |  | 
| 19 | #ifdef getc_unlocked            /* avoid horrendous overhead of flockfile */ | 
| 20 | #undef getc | 
| 21 | #undef putc | 
| 22 | #define getc    getc_unlocked | 
| 23 | #define putc    putc_unlocked | 
| 24 | #endif | 
| 25 |  | 
| 26 | typedef struct { | 
| 27 | void    *base;          /* pointer to base memory */ | 
| 28 | size_t  len;            /* allocated memory length */ | 
| 29 | int     mapped;         /* memory-mapped file? */ | 
| 30 | } MEMLOAD;              /* file loaded/mapped into memory */ | 
| 31 |  | 
| 32 | typedef struct { | 
| 33 | int     nw_rec;         /* number of words per record */ | 
| 34 | int     nrecs;          /* number of records we found */ | 
| 35 | char    *rec[1];        /* record array (extends struct) */ | 
| 36 | } RECINDEX; | 
| 37 |  | 
| 38 | /* free loaded file */ | 
| 39 | static void | 
| 40 | free_load(MEMLOAD *mp) | 
| 41 | { | 
| 42 | if (mp == NULL || (mp->base == NULL) | (mp->len <= 0)) | 
| 43 | return; | 
| 44 | #ifdef MAP_FILE | 
| 45 | if (mp->mapped) | 
| 46 | munmap(mp->base, mp->len); | 
| 47 | else | 
| 48 | #endif | 
| 49 | free(mp->base); | 
| 50 | mp->base = NULL; | 
| 51 | mp->len = 0; | 
| 52 | } | 
| 53 |  | 
| 54 | /* load a file into memory */ | 
| 55 | static int | 
| 56 | load_file(MEMLOAD *mp, FILE *fp) | 
| 57 | { | 
| 58 | int     fd; | 
| 59 | off_t   skip, flen; | 
| 60 |  | 
| 61 | if (mp == NULL) | 
| 62 | return(-1); | 
| 63 | mp->base = NULL; | 
| 64 | mp->len = 0; | 
| 65 | mp->mapped = 0; | 
| 66 | if (fp == NULL) | 
| 67 | return(-1); | 
| 68 | fd = fileno(fp); | 
| 69 | skip = ftello(fp); | 
| 70 | flen = lseek(fd, 0, SEEK_END); | 
| 71 | if (flen <= skip) | 
| 72 | return((int)(flen - skip)); | 
| 73 | mp->len = (size_t)(flen - skip); | 
| 74 | #ifdef MAP_FILE | 
| 75 | if (mp->len > 1L<<20) {         /* map file if > 1 MByte */ | 
| 76 | mp->base = mmap(NULL, mp->len, PROT_READ, MAP_PRIVATE, fd, skip); | 
| 77 | if (mp->base != MAP_FAILED) { | 
| 78 | mp->mapped = 1; | 
| 79 | return(1);      /* mmap() success */ | 
| 80 | } | 
| 81 | mp->base = NULL;        /* fall back to reading it in... */ | 
| 82 | } | 
| 83 | #endif | 
| 84 | if (lseek(fd, skip, SEEK_SET) != skip || | 
| 85 | (mp->base = malloc(mp->len)) == NULL) { | 
| 86 | mp->len = 0; | 
| 87 | return(-1); | 
| 88 | } | 
| 89 | if (read(fd, (char *)mp->base, mp->len) != mp->len) { | 
| 90 | free_load(mp); | 
| 91 | return(-1); | 
| 92 | } | 
| 93 | return(1); | 
| 94 | } | 
| 95 |  | 
| 96 | /* load memory from an input stream, starting from current position */ | 
| 97 | static int | 
| 98 | load_stream(MEMLOAD *mp, FILE *fp) | 
| 99 | { | 
| 100 | char    buf[8192]; | 
| 101 | size_t  nr; | 
| 102 |  | 
| 103 | if (mp == NULL) | 
| 104 | return(-1); | 
| 105 | mp->base = NULL; | 
| 106 | mp->len = 0; | 
| 107 | mp->mapped = 0; | 
| 108 | if (fp == NULL) | 
| 109 | return(-1); | 
| 110 | while ((nr = fread(buf, 1, sizeof(buf), fp)) > 0) { | 
| 111 | if (!mp->len) | 
| 112 | mp->base = malloc(nr); | 
| 113 | else | 
| 114 | mp->base = realloc(mp->base, mp->len+nr); | 
| 115 | if (mp->base == NULL) | 
| 116 | return(-1); | 
| 117 | memcpy((char *)mp->base + mp->len, buf, nr); | 
| 118 | mp->len += nr; | 
| 119 | } | 
| 120 | if (ferror(fp)) { | 
| 121 | free_load(mp); | 
| 122 | return(-1); | 
| 123 | } | 
| 124 | return(mp->len > 0); | 
| 125 | } | 
| 126 |  | 
| 127 | /* free a record index */ | 
| 128 | #define free_records(rp)        free(rp) | 
| 129 |  | 
| 130 | /* compute record index */ | 
| 131 | static RECINDEX * | 
| 132 | index_records(const MEMLOAD *mp, int nw_rec) | 
| 133 | { | 
| 134 | RECINDEX        *rp; | 
| 135 | char            *cp, *mend; | 
| 136 | int             n; | 
| 137 |  | 
| 138 | if (mp == NULL || (mp->base == NULL) | (mp->len <= 0)) | 
| 139 | return(NULL); | 
| 140 | if (nw_rec <= 0) | 
| 141 | return(NULL); | 
| 142 | rp = (RECINDEX *)malloc(sizeof(RECINDEX) + mp->len/(2*nw_rec)*sizeof(char *)); | 
| 143 | if (rp == NULL) | 
| 144 | return(NULL); | 
| 145 | rp->nw_rec = nw_rec; | 
| 146 | rp->nrecs = 0; | 
| 147 | cp = (char *)mp->base; | 
| 148 | mend = cp + mp->len; | 
| 149 | for ( ; ; ) {                           /* whitespace-separated words */ | 
| 150 | while (cp < mend && !*cp | isspace(*cp)) | 
| 151 | ++cp; | 
| 152 | if (cp >= mend) | 
| 153 | break; | 
| 154 | rp->rec[rp->nrecs++] = cp;      /* point to first non-white */ | 
| 155 | n = rp->nw_rec; | 
| 156 | while (++cp < mend)             /* find end of record */ | 
| 157 | if (!*cp | isspace(*cp)) { | 
| 158 | if (--n <= 0) | 
| 159 | break;  /* got requisite # words */ | 
| 160 | do {            /* else find next word */ | 
| 161 | if (*cp == '\n') { | 
| 162 | fprintf(stderr, | 
| 163 | "Unexpected EOL in record!\n"); | 
| 164 | free_records(rp); | 
| 165 | return(NULL); | 
| 166 | } | 
| 167 | if (++cp >= mend) | 
| 168 | break; | 
| 169 | } while (!*cp | isspace(*cp)); | 
| 170 | } | 
| 171 | } | 
| 172 | rp->rec[rp->nrecs] = mend;              /* reallocate to save space */ | 
| 173 | rp = (RECINDEX *)realloc(rp, | 
| 174 | sizeof(RECINDEX) + rp->nrecs*sizeof(char *)); | 
| 175 | return(rp); | 
| 176 | } | 
| 177 |  | 
| 178 | /* count number of columns based on first EOL */ | 
| 179 | static int | 
| 180 | count_columns(const RECINDEX *rp) | 
| 181 | { | 
| 182 | char    *cp = rp->rec[0]; | 
| 183 | char    *mend = rp->rec[rp->nrecs]; | 
| 184 | int     i; | 
| 185 |  | 
| 186 | while (*cp != '\n') | 
| 187 | if (++cp >= mend) | 
| 188 | return(0); | 
| 189 | for (i = 0; i < rp->nrecs; i++) | 
| 190 | if (rp->rec[i] >= cp) | 
| 191 | break; | 
| 192 | return(i); | 
| 193 | } | 
| 194 |  | 
| 195 | /* copy nth record from index to stdout */ | 
| 196 | static int | 
| 197 | print_record(const RECINDEX *rp, int n) | 
| 198 | { | 
| 199 | int     words2go = rp->nw_rec; | 
| 200 | char    *scp; | 
| 201 |  | 
| 202 | if ((n < 0) | (n >= rp->nrecs)) | 
| 203 | return(0); | 
| 204 | scp = rp->rec[n]; | 
| 205 | do { | 
| 206 | putc(*scp++, stdout); | 
| 207 | if (!*scp | isspace(*scp)) { | 
| 208 | if (--words2go <= 0) | 
| 209 | break; | 
| 210 | putc(' ', stdout);      /* single space btwn. words */ | 
| 211 | do | 
| 212 | if (++scp >= rp->rec[n+1]) | 
| 213 | break; | 
| 214 | while (!*scp | isspace(*scp)); | 
| 215 | } | 
| 216 | } while (scp < rp->rec[n+1]); | 
| 217 | /* caller adds record sep. */ | 
| 218 | return(1); | 
| 219 | } | 
| 220 |  | 
| 221 | /* copy a stream to stdout */ | 
| 222 | static int | 
| 223 | output_stream(FILE *fp) | 
| 224 | { | 
| 225 | char    buf[8192]; | 
| 226 | ssize_t n; | 
| 227 |  | 
| 228 | if (fp == NULL) | 
| 229 | return(0); | 
| 230 | fflush(stdout);                 /* assumes nothing in input buffer */ | 
| 231 | while ((n = read(fileno(fp), buf, sizeof(buf))) > 0) | 
| 232 | if (write(fileno(stdout), buf, n) != n) | 
| 233 | return(0); | 
| 234 | return(n >= 0); | 
| 235 | } | 
| 236 |  | 
| 237 | /* get next word from stream, leaving stream on EOL or start of next word */ | 
| 238 | static char * | 
| 239 | fget_word(char buf[256], FILE *fp) | 
| 240 | { | 
| 241 | int     c; | 
| 242 | char    *cp; | 
| 243 | /* skip nul's and white space */ | 
| 244 | while (!(c = getc(fp)) || isspace(c)) | 
| 245 | ; | 
| 246 | if (c == EOF) | 
| 247 | return(NULL); | 
| 248 | cp = buf; | 
| 249 | do | 
| 250 | *cp++ = c; | 
| 251 | while ((c = getc(fp)) != EOF && !isspace(c) && cp < buf+255); | 
| 252 | *cp = '\0'; | 
| 253 | while (isspace(c) & (c != '\n')) | 
| 254 | c = getc(fp); | 
| 255 | if (c != EOF) | 
| 256 | ungetc(c, fp); | 
| 257 | return(buf); | 
| 258 | } | 
| 259 |  | 
| 260 | char            *fmtid = "ascii";               /* format id */ | 
| 261 | int             record_width = 3;               /* words/record (<0 binary) */ | 
| 262 | int             ni_columns = 0;                 /* number of input columns */ | 
| 263 | int             ni_rows = 0;                    /* number of input rows */ | 
| 264 | int             no_columns = 0;                 /* number of output columns */ | 
| 265 | int             no_rows = 0;                    /* number of output rows */ | 
| 266 |  | 
| 267 | /* output transposed ASCII or binary data from memory */ | 
| 268 | static int | 
| 269 | do_transpose(const MEMLOAD *mp) | 
| 270 | { | 
| 271 | static const char       tabEOL[2] = {'\t','\n'}; | 
| 272 | RECINDEX                *rp = NULL; | 
| 273 | long                    nrecords; | 
| 274 | int                     i, j; | 
| 275 | /* get # records (& index) */ | 
| 276 | if (record_width > 0) { | 
| 277 | if ((rp = index_records(mp, record_width)) == NULL) | 
| 278 | return(0); | 
| 279 | if (ni_columns <= 0) | 
| 280 | ni_columns = count_columns(rp); | 
| 281 | nrecords = rp->nrecs; | 
| 282 | } else if ((ni_rows > 0) & (ni_columns > 0)) | 
| 283 | nrecords = ni_rows*ni_columns; | 
| 284 | else | 
| 285 | nrecords = mp->len / -record_width; | 
| 286 | /* check sizes */ | 
| 287 | if (ni_rows <= 0) | 
| 288 | ni_rows = no_columns; | 
| 289 | if (ni_columns <= 0) | 
| 290 | ni_columns = no_rows; | 
| 291 | if ((ni_rows <= 0) & (ni_columns > 0)) | 
| 292 | ni_rows = nrecords/ni_columns; | 
| 293 | if ((ni_columns <= 0) & (ni_rows > 0)) | 
| 294 | ni_columns = nrecords/ni_rows; | 
| 295 | if (nrecords != ni_rows*ni_columns) | 
| 296 | goto badspec; | 
| 297 | if (no_columns <= 0) | 
| 298 | no_columns = ni_rows; | 
| 299 | if (no_rows <= 0) | 
| 300 | no_rows = ni_columns; | 
| 301 | if ((no_rows != ni_columns) | (no_columns != ni_rows)) | 
| 302 | goto badspec; | 
| 303 | /* transpose records */ | 
| 304 | for (i = 0; i < no_rows; i++) { | 
| 305 | for (j = 0; j < no_columns; j++) | 
| 306 | if (rp != NULL) {               /* ASCII output */ | 
| 307 | print_record(rp, j*ni_columns + i); | 
| 308 | putc(tabEOL[j >= no_columns-1], stdout); | 
| 309 | } else {                        /* binary output */ | 
| 310 | fwrite((char *)mp->base + | 
| 311 | -record_width*(j*ni_columns + i), | 
| 312 | -record_width, 1, stdout); | 
| 313 | } | 
| 314 | if (ferror(stdout)) { | 
| 315 | fprintf(stderr, "Error writing to stdout\n"); | 
| 316 | return(0); | 
| 317 | } | 
| 318 | } | 
| 319 | if (rp != NULL) | 
| 320 | free_records(rp); | 
| 321 | return(1); | 
| 322 | badspec: | 
| 323 | fprintf(stderr, "Bad transpose specification -- check dimension(s)\n"); | 
| 324 | return(0); | 
| 325 | } | 
| 326 |  | 
| 327 | /* resize ASCII stream input by ignoring EOLs between records */ | 
| 328 | static int | 
| 329 | do_resize(FILE *fp) | 
| 330 | { | 
| 331 | long    records2go = ni_rows*ni_columns; | 
| 332 | int     columns2go = no_columns; | 
| 333 | char    word[256]; | 
| 334 | /* sanity checks */ | 
| 335 | if (record_width <= 0) { | 
| 336 | fprintf(stderr, "Bad call to do_resize (record_width = %d)\n", | 
| 337 | record_width); | 
| 338 | return(0); | 
| 339 | } | 
| 340 | if (no_columns <= 0) { | 
| 341 | fprintf(stderr, "Missing -oc specification\n"); | 
| 342 | return(0); | 
| 343 | } | 
| 344 | if ((records2go <= 0) & (no_rows > 0)) | 
| 345 | records2go = no_rows*no_columns; | 
| 346 | else if (no_rows*no_columns != records2go) { | 
| 347 | fprintf(stderr, | 
| 348 | "Input and output data sizes disagree (%dx%d != %dx%d)\n", | 
| 349 | ni_rows, ni_columns, no_rows, no_columns); | 
| 350 | return(0); | 
| 351 | } | 
| 352 | do {                                    /* reshape records */ | 
| 353 | int     n; | 
| 354 |  | 
| 355 | for (n = record_width; n--; ) { | 
| 356 | if (fget_word(word, fp) == NULL) { | 
| 357 | if (records2go > 0 || n < record_width-1) | 
| 358 | break; | 
| 359 | goto done;      /* normal EOD */ | 
| 360 | } | 
| 361 | fputs(word, stdout); | 
| 362 | if (n) {                /* mid-record? */ | 
| 363 | int     c = getc(fp); | 
| 364 | if ((c == '\n') | (c == EOF)) | 
| 365 | break; | 
| 366 | ungetc(c, fp); | 
| 367 | putc(' ', stdout); | 
| 368 | } | 
| 369 | } | 
| 370 | if (n >= 0) { | 
| 371 | fprintf(stderr, "Incomplete record / unexpected EOF\n"); | 
| 372 | return(0); | 
| 373 | } | 
| 374 | if (--columns2go <= 0) {        /* time to end output row? */ | 
| 375 | putc('\n', stdout); | 
| 376 | columns2go = no_columns; | 
| 377 | } else                          /* else separate records */ | 
| 378 | putc('\t', stdout); | 
| 379 | } while (--records2go);                 /* expected EOD? */ | 
| 380 | done: | 
| 381 | if (columns2go != no_columns) | 
| 382 | fprintf(stderr, "Warning -- incomplete final row\n"); | 
| 383 | if (fget_word(word, fp) != NULL) | 
| 384 | fprintf(stderr, "Warning -- data beyond expected EOF\n"); | 
| 385 | return(1); | 
| 386 | } | 
| 387 |  | 
| 388 | /* process a header line and copy to stdout */ | 
| 389 | static int | 
| 390 | headline(char *s, void *p) | 
| 391 | { | 
| 392 | char    fmt[32]; | 
| 393 |  | 
| 394 | if (formatval(fmt, s)) { | 
| 395 | if (!strcmp(fmt, fmtid)) | 
| 396 | return(0); | 
| 397 | fprintf(stderr, "Input format '%s' != '%s'\n", fmt, fmtid); | 
| 398 | return(-1); | 
| 399 | } | 
| 400 | fputs(s, stdout);                       /* copy header info. */ | 
| 401 | return(0); | 
| 402 | } | 
| 403 |  | 
| 404 | /* main routine for converting rows/columns in data file */ | 
| 405 | int | 
| 406 | main(int argc, char *argv[]) | 
| 407 | { | 
| 408 | int     do_header = 1;                  /* header i/o? */ | 
| 409 | int     transpose = 0;                  /* transpose rows & cols? */ | 
| 410 | int     i; | 
| 411 |  | 
| 412 | for (i = 1; i < argc && argv[i][0] == '-'; i++) | 
| 413 | switch (argv[i][1]) { | 
| 414 | case 'i':                       /* input */ | 
| 415 | if (argv[i][2] == 'c')  /* columns */ | 
| 416 | ni_columns = atoi(argv[++i]); | 
| 417 | else if (argv[i][2] == 'r') | 
| 418 | ni_rows = atoi(argv[++i]); | 
| 419 | else | 
| 420 | goto userr; | 
| 421 | break; | 
| 422 | case 'o':                       /* output */ | 
| 423 | if (argv[i][2] == 'c')  /* columns */ | 
| 424 | no_columns = atoi(argv[++i]); | 
| 425 | else if (argv[i][2] == 'r') | 
| 426 | no_rows = atoi(argv[++i]); | 
| 427 | else | 
| 428 | goto userr; | 
| 429 | break; | 
| 430 | case 'h':                       /* header on/off */ | 
| 431 | do_header = !do_header; | 
| 432 | break; | 
| 433 | case 't':                       /* transpose on/off */ | 
| 434 | transpose = !transpose; | 
| 435 | break; | 
| 436 | case 'f':                       /* format */ | 
| 437 | switch (argv[i][2]) { | 
| 438 | case 'a':               /* ASCII */ | 
| 439 | case 'A': | 
| 440 | fmtid = "ascii"; | 
| 441 | record_width = 1; | 
| 442 | break; | 
| 443 | case 'f':               /* float */ | 
| 444 | case 'F': | 
| 445 | fmtid = "float"; | 
| 446 | record_width = -(int)sizeof(float); | 
| 447 | break; | 
| 448 | case 'd':               /* double */ | 
| 449 | case 'D': | 
| 450 | fmtid = "double"; | 
| 451 | record_width = -(int)sizeof(double); | 
| 452 | break; | 
| 453 | case 'b':               /* binary (bytes) */ | 
| 454 | case 'B': | 
| 455 | fmtid = "byte"; | 
| 456 | record_width = -1; | 
| 457 | break; | 
| 458 | default: | 
| 459 | goto userr; | 
| 460 | } | 
| 461 | if (argv[i][3]) { | 
| 462 | if (!isdigit(argv[i][3])) | 
| 463 | goto userr; | 
| 464 | record_width *= atoi(argv[i]+3); | 
| 465 | } | 
| 466 | break; | 
| 467 | default: | 
| 468 | goto userr; | 
| 469 | } | 
| 470 | if (!record_width) | 
| 471 | goto userr; | 
| 472 | if (i < argc-1)                         /* arg count OK? */ | 
| 473 | goto userr; | 
| 474 | /* open input file? */ | 
| 475 | if (i == argc-1 && freopen(argv[i], "r", stdin) == NULL) { | 
| 476 | fprintf(stderr, "%s: cannot open for reading\n", argv[i]); | 
| 477 | return(1); | 
| 478 | } | 
| 479 | if (record_width < 0) { | 
| 480 | SET_FILE_BINARY(stdin); | 
| 481 | SET_FILE_BINARY(stdout); | 
| 482 | } | 
| 483 | /* check for no-op */ | 
| 484 | if (!transpose && (record_width < 0 || | 
| 485 | (no_columns == ni_columns) & (no_rows == ni_rows))) { | 
| 486 | fprintf(stderr, "%s: no-op -- copying input verbatim\n", | 
| 487 | argv[0]); | 
| 488 | if (!output_stream(stdin)) | 
| 489 | return(1); | 
| 490 | return(0); | 
| 491 | } | 
| 492 | if (do_header) {                        /* read/write header */ | 
| 493 | if (getheader(stdin, &headline, NULL) < 0) | 
| 494 | return(1); | 
| 495 | printargs(argc, argv, stdout); | 
| 496 | fputformat(fmtid, stdout); | 
| 497 | fputc('\n', stdout);            /* finish new header */ | 
| 498 | } | 
| 499 | if (transpose) {                        /* transposing rows & columns? */ | 
| 500 | MEMLOAD myMem;                  /* need to load into memory */ | 
| 501 | if (i == argc-1) { | 
| 502 | if (load_file(&myMem, stdin) <= 0) { | 
| 503 | fprintf(stderr, "%s: error loading file into memory\n", | 
| 504 | argv[i]); | 
| 505 | return(1); | 
| 506 | } | 
| 507 | } else if (load_stream(&myMem, stdin) <= 0) { | 
| 508 | fprintf(stderr, "%s: error loading stdin into memory\n", | 
| 509 | argv[0]); | 
| 510 | return(1); | 
| 511 | } | 
| 512 | if (!do_transpose(&myMem)) | 
| 513 | return(1); | 
| 514 | /* free_load(&myMem); */ | 
| 515 | } else if (!do_resize(stdin))           /* just reshaping input */ | 
| 516 | return(1); | 
| 517 | return(0); | 
| 518 | userr: | 
| 519 | fprintf(stderr, | 
| 520 | "Usage: %s [-h][-f[afdb][N]][-t][-ic in_col][-ir in_row][-oc out_col][-or out_row] [input.dat]\n", | 
| 521 | argv[0]); | 
| 522 | return(1); | 
| 523 | } |