--- ray/src/util/rcollate.c 2014/07/09 21:45:48 2.16 +++ ray/src/util/rcollate.c 2022/03/16 19:25:25 2.42 @@ -1,46 +1,54 @@ #ifndef lint -static const char RCSid[] = "$Id: rcollate.c,v 2.16 2014/07/09 21:45:48 greg Exp $"; +static const char RCSid[] = "$Id: rcollate.c,v 2.42 2022/03/16 19:25:25 greg Exp $"; #endif /* * Utility to re-order records in a binary or ASCII data file (matrix) */ #include -#include #include #include "platform.h" #include "rtio.h" #include "resolu.h" -#ifdef _WIN32 -#undef ftello -#define ftello ftell -#undef ssize_t -#define ssize_t size_t +#if defined(_WIN32) || defined(_WIN64) + #undef ftello + #define ftello ftell + #undef ssize_t + #define ssize_t size_t #else -#include + #include #endif -#ifdef getc_unlocked /* avoid horrendous overhead of flockfile */ -#undef getc -#undef putc -#define getc getc_unlocked -#define putc putc_unlocked -#endif +#define MAXLEVELS 16 /* max RxC.. block pairs */ typedef struct { + void *mapped; /* memory-mapped pointer */ void *base; /* pointer to base memory */ size_t len; /* allocated memory length */ - int mapped; /* memory-mapped file? */ } MEMLOAD; /* file loaded/mapped into memory */ typedef struct { int nw_rec; /* number of words per record */ - int nrecs; /* number of records we found */ + size_t nrecs; /* number of records we found */ char *rec[1]; /* record array (extends struct) */ } RECINDEX; -int warnings = 1; /* report warnings? */ +int warnings = 1; /* report warnings? */ +char *fmtid = NULL; /* format id */ +int comp_size = 0; /* binary bytes/channel */ +int n_comp = 0; /* components/record */ +int ni_columns = 0; /* number of input columns */ +int ni_rows = 0; /* number of input rows */ +int no_columns = 0; /* number of output columns */ +int no_rows = 0; /* number of output rows */ +int transpose = 0; /* transpose rows & cols? */ +int i_header = 1; /* input header? */ +int o_header = 1; /* output header? */ +int outArray[MAXLEVELS][2]; /* output block nesting */ +int outLevels = 0; /* number of blocking levels */ +int check = 0; /* force data check? */ + /* free loaded file */ static void free_load(MEMLOAD *mp) @@ -49,56 +57,15 @@ free_load(MEMLOAD *mp) return; #ifdef MAP_FILE if (mp->mapped) - munmap(mp->base, mp->len); + munmap(mp->mapped, mp->len); else #endif free(mp->base); + mp->mapped = NULL; mp->base = NULL; mp->len = 0; } -/* load a file into memory */ -static int -load_file(MEMLOAD *mp, FILE *fp) -{ - int fd; - off_t skip, flen; - - if (mp == NULL) - return(-1); - mp->base = NULL; - mp->len = 0; - mp->mapped = 0; - if (fp == NULL) - return(-1); - fd = fileno(fp); - skip = ftello(fp); - flen = lseek(fd, 0, SEEK_END); - if (flen <= skip) - return((int)(flen - skip)); - mp->len = (size_t)(flen - skip); -#ifdef MAP_FILE - if (mp->len > 1L<<20) { /* map file if > 1 MByte */ - mp->base = mmap(NULL, mp->len, PROT_READ, MAP_PRIVATE, fd, skip); - if (mp->base != MAP_FAILED) { - mp->mapped = 1; - return(1); /* mmap() success */ - } - mp->base = NULL; /* fall back to reading it in... */ - } -#endif - if (lseek(fd, skip, SEEK_SET) != skip || - (mp->base = malloc(mp->len)) == NULL) { - mp->len = 0; - return(-1); - } - if (read(fd, (char *)mp->base, mp->len) != mp->len) { - free_load(mp); - return(-1); - } - return(1); -} - /* load memory from an input stream, starting from current position */ static int load_stream(MEMLOAD *mp, FILE *fp) @@ -109,14 +76,14 @@ load_stream(MEMLOAD *mp, FILE *fp) if (mp == NULL) return(-1); + mp->mapped = NULL; mp->base = NULL; mp->len = 0; - mp->mapped = 0; if (fp == NULL) return(-1); while ((nr = fread(buf, 1, sizeof(buf), fp)) > 0) { if (!alloced) - mp->base = malloc(nr); + mp->base = malloc(alloced = nr); else if (mp->len+nr > alloced) mp->base = realloc(mp->base, alloced = alloced*(2+(nr==sizeof(buf)))/2+nr); @@ -134,6 +101,58 @@ load_stream(MEMLOAD *mp, FILE *fp) return(mp->len > 0); } +#if defined(_WIN32) || defined(_WIN64) + /* too difficult to fix this */ +#define load_file load_stream +#else +/* load a file into memory */ +static int +load_file(MEMLOAD *mp, FILE *fp) +{ + int fd; + off_t skip, flen, fpos; + if (mp == NULL) + return(-1); + mp->mapped = NULL; + mp->base = NULL; + mp->len = 0; + if (fp == NULL) + return(-1); + fd = fileno(fp); + skip = ftello(fp); + flen = lseek(fd, 0, SEEK_END); + if (flen <= skip) + return((int)(flen - skip)); + mp->len = (size_t)(flen - skip); +#ifdef MAP_FILE + if (mp->len >= 1L<<20) { /* map file if >= 1 MByte */ + mp->mapped = mmap(NULL, flen, PROT_READ, MAP_PRIVATE, fd, 0); + if (mp->mapped != MAP_FAILED) { + mp->base = (char *)mp->mapped + skip; + return(1); /* mmap() success */ + } + mp->mapped = NULL; /* else fall back to reading it in... */ + } +#endif + if (lseek(fd, skip, SEEK_SET) != skip || + (mp->base = malloc(mp->len)) == NULL) { + mp->len = 0; + return(-1); + } + fpos = skip; + while (fpos < flen) { /* read() fails if n > 2 GBytes */ + ssize_t nread = read(fd, (char *)mp->base+(fpos-skip), + (flen-fpos < 1L<<24) ? flen-fpos : 1L<<24); + if (nread <= 0) { + free_load(mp); + return(-1); + } + fpos += nread; + } + return(1); +} +#endif + /* free a record index */ #define free_records(rp) free(rp) @@ -141,6 +160,7 @@ load_stream(MEMLOAD *mp, FILE *fp) static RECINDEX * index_records(const MEMLOAD *mp, int nw_rec) { + int nall = 0; RECINDEX *rp; char *cp, *mend; int n; @@ -149,7 +169,8 @@ index_records(const MEMLOAD *mp, int nw_rec) return(NULL); if (nw_rec <= 0) return(NULL); - rp = (RECINDEX *)malloc(sizeof(RECINDEX) + mp->len/(2*nw_rec)*sizeof(char *)); + nall = 1000; + rp = (RECINDEX *)malloc(sizeof(RECINDEX) + nall*sizeof(char *)); if (rp == NULL) return(NULL); rp->nw_rec = nw_rec; @@ -161,6 +182,13 @@ index_records(const MEMLOAD *mp, int nw_rec) ++cp; if (cp >= mend) break; + if (rp->nrecs >= nall) { + nall += nall>>1; /* get more record space */ + rp = (RECINDEX *)realloc(rp, + sizeof(RECINDEX) + nall*sizeof(char *)); + if (rp == NULL) + return(NULL); + } rp->rec[rp->nrecs++] = cp; /* point to first non-white */ n = rp->nw_rec; while (++cp < mend) /* find end of record */ @@ -169,8 +197,7 @@ index_records(const MEMLOAD *mp, int nw_rec) break; /* got requisite # words */ do { /* else find next word */ if (*cp == '\n') { - fprintf(stderr, - "Unexpected EOL in record!\n"); + fputs("Unexpected EOL in record!\n", stderr); free_records(rp); return(NULL); } @@ -204,14 +231,18 @@ count_columns(const RECINDEX *rp) /* copy nth record from index to stdout */ static int -print_record(const RECINDEX *rp, int n) +print_record(const RECINDEX *rp, size_t n) { - int words2go = rp->nw_rec; - char *scp; + static char delims[] = " \t\n\r\f"; + int words2go = rp->nw_rec; + char *scp; - if ((n < 0) | (n >= rp->nrecs)) + if (n >= rp->nrecs) return(0); scp = rp->rec[n]; + + if (check && !isfltd(scp, delims)) + goto formerr; do { putc(*scp++, stdout); if (!*scp | isspace(*scp)) { @@ -222,26 +253,38 @@ print_record(const RECINDEX *rp, int n) if (++scp >= rp->rec[n+1]) break; while (!*scp | isspace(*scp)); + + if (check && !isfltd(scp, delims)) + goto formerr; } } while (scp < rp->rec[n+1]); /* caller adds record sep. */ return(1); +formerr: + fputs("Badly formed number: ", stderr); + while (*scp && !isspace(*scp)) + fputc(*scp++, stderr); + fputc('\n', stderr); + return(0); } -/* copy a stream to stdout */ -static int +/* copy a stream to stdout, return bytes written or 0 on error */ +static size_t output_stream(FILE *fp) { + size_t ntot = 0; char buf[8192]; - ssize_t n; + size_t n; if (fp == NULL) return(0); fflush(stdout); - while ((n = fread(buf, 1, sizeof(buf), fp)) > 0) + while ((n = fread(buf, 1, sizeof(buf), fp)) > 0) { if (write(fileno(stdout), buf, n) != n) return(0); - return(!ferror(fp)); + ntot += n; + } + return(!ferror(fp) * ntot); } /* get next word from stream, leaving stream on EOL or start of next word */ @@ -267,17 +310,41 @@ fget_word(char buf[256], FILE *fp) return(buf); } -char *fmtid = NULL; /* format id */ -int comp_size = 0; /* binary bytes/channel */ -int n_comp = 0; /* components/record */ -int ni_columns = 0; /* number of input columns */ -int ni_rows = 0; /* number of input rows */ -int no_columns = 0; /* number of output columns */ -int no_rows = 0; /* number of output rows */ -int transpose = 0; /* transpose rows & cols? */ -int i_header = 1; /* input header? */ -int o_header = 1; /* output header? */ +/* parse RxCx... string */ +static int +get_array(const char *spec, int blklvl[][2], int nlvls) +{ + int n; + if (nlvls <= 0) { + fputs("Too many block levels!\n", stderr); + exit(1); + } + if (sscanf(spec, "%dx%d", &blklvl[0][0], &blklvl[0][1]) != 2) { + fputs("Bad block specification!\n", stderr); + exit(1); + } + while (isdigit(*spec)) + spec++; + spec++; /* 'x' */ + while (isdigit(*spec)) + spec++; + if ((*spec != 'x') & (*spec != 'X')) { + if (*spec) { + fputs("Blocks must be separated by 'x' or 'X'\n", stderr); + exit(1); + } + return(1); + } + spec++; + n = get_array(spec, blklvl+1, nlvls-1); + if (!n) + return(0); + blklvl[0][0] *= blklvl[1][0]; + blklvl[0][1] *= blklvl[1][1]; + return(n+1); +} + /* check settings and assign defaults */ static int check_sizes() @@ -299,27 +366,70 @@ check_sizes() if (transpose && (no_rows <= 0) & (no_columns <= 0)) { if (ni_rows > 0) no_columns = ni_rows; if (ni_columns > 0) no_rows = ni_columns; - } else if ((no_rows <= 0) & (no_columns > 0) && - !((ni_rows*ni_columns) % no_columns)) - no_rows = ni_rows*ni_columns/no_columns; + } else { + if (no_columns <= 0) + no_columns = ni_columns; + if ((no_rows <= 0) & (no_columns > 0) && + !((ni_rows*ni_columns) % no_columns)) + no_rows = ni_rows*ni_columns/no_columns; + } if (n_comp <= 0) n_comp = 3; return(1); } -/* output transposed ASCII or binary data from memory */ +/* call to compute block input position */ +static size_t +get_block_pos(int r, int c, int blklvl[][2], int nlvls) +{ + size_t n = 0; + + while (nlvls > 1) { + int sr = r/blklvl[1][0]; + int sc = c/blklvl[1][1]; + r -= sr*blklvl[1][0]; + c -= sc*blklvl[1][1]; + n += sr*blklvl[1][0]*blklvl[0][1] + sc*blklvl[1][0]*blklvl[1][1]; + blklvl++; + nlvls--; + } + n += r*blklvl[0][1] + c; + return(n); +} + +/* return input offset based on array ordering and transpose option */ +static size_t +get_input_pos(int r, int c) +{ + size_t n; + + if (outLevels > 1) { /* block reordering */ + n = get_block_pos(r, c, outArray, outLevels); + if (transpose) { + r = n/ni_rows; + c = n - r*ni_rows; + n = (size_t)c*ni_columns + r; + } + } else if (transpose) /* transpose only */ + n = (size_t)c*ni_columns + r; + else /* XXX should never happen! */ + n = (size_t)r*no_columns + c; + return(n); +} + +/* output reordered ASCII or binary data from memory */ static int -do_transpose(const MEMLOAD *mp) +do_reorder(const MEMLOAD *mp) { static const char tabEOL[2] = {'\t','\n'}; RECINDEX *rp = NULL; - long nrecords; + size_t nrecords; int i, j; /* propogate sizes */ if (ni_rows <= 0) - ni_rows = no_columns; + ni_rows = transpose ? no_columns : no_rows; if (ni_columns <= 0) - ni_columns = no_rows; + ni_columns = transpose ? no_rows : no_columns; /* get # records (& index) */ if (!comp_size) { if ((rp = index_records(mp, n_comp)) == NULL) @@ -328,10 +438,10 @@ do_transpose(const MEMLOAD *mp) ni_columns = count_columns(rp); nrecords = rp->nrecs; } else if ((ni_rows > 0) & (ni_columns > 0)) { - nrecords = ni_rows*ni_columns; + nrecords = (size_t)ni_rows*ni_columns; if (nrecords > mp->len/(n_comp*comp_size)) { - fprintf(stderr, - "Input too small for specified size and type\n"); + fputs("Input too small for specified size and type\n", + stderr); return(0); } } else @@ -339,29 +449,54 @@ do_transpose(const MEMLOAD *mp) /* check sizes */ if ((ni_rows <= 0) & (ni_columns > 0)) ni_rows = nrecords/ni_columns; - if ((ni_columns <= 0) & (ni_rows > 0)) + else if ((ni_columns <= 0) & (ni_rows > 0)) ni_columns = nrecords/ni_rows; - if (nrecords != ni_rows*ni_columns) + if (nrecords != (size_t)ni_rows*ni_columns) goto badspec; - if (no_columns <= 0) - no_columns = ni_rows; - if (no_rows <= 0) - no_rows = ni_columns; - if ((no_rows != ni_columns) | (no_columns != ni_rows)) - goto badspec; - /* transpose records */ + if (transpose) { + if (no_columns <= 0) + no_columns = ni_rows; + if (no_rows <= 0) + no_rows = ni_columns; + if (outLevels <= 1 && + (no_rows != ni_columns) | (no_columns != ni_rows)) + goto badspec; + } else { + if (no_columns <= 0) + no_columns = ni_columns; + if (no_rows <= 0) + no_rows = ni_rows; + } + if (ni_rows*ni_columns != no_rows*no_columns) { + fputs("Number of input and output records do not match\n", + stderr); + return(0); + } + if (o_header) { /* finish header? */ + printf("NROWS=%d\n", no_rows); + printf("NCOLS=%d\n", no_columns); + fputformat(fmtid, stdout); + fputc('\n', stdout); + } + /* reorder records */ for (i = 0; i < no_rows; i++) { - for (j = 0; j < no_columns; j++) + for (j = 0; j < no_columns; j++) { + size_t n = get_input_pos(i, j); + if (n >= nrecords) { + fputs("Index past end-of-file\n", stderr); + return(0); + } if (rp != NULL) { /* ASCII output */ - print_record(rp, j*ni_columns + i); + if (!print_record(rp, n)) + return(0); putc(tabEOL[j >= no_columns-1], stdout); } else { /* binary output */ - fwrite((char *)mp->base + - (n_comp*comp_size)*(j*ni_columns + i), - n_comp*comp_size, 1, stdout); + putbinary((char *)mp->base + (n_comp*comp_size)*n, + comp_size, n_comp, stdout); } + } if (ferror(stdout)) { - fprintf(stderr, "Error writing to stdout\n"); + fputs("Error writing to stdout\n", stderr); return(0); } } @@ -369,41 +504,68 @@ do_transpose(const MEMLOAD *mp) free_records(rp); return(1); badspec: - fprintf(stderr, "Bad transpose specification -- check dimension(s)\n"); + fputs("Bad dimension(s)\n", stderr); return(0); } -/* resize ASCII stream input by ignoring EOLs between records */ +/* resize stream input by ignoring EOLs between ASCII records */ static int do_resize(FILE *fp) { - long records2go = ni_rows*ni_columns; + size_t records2go = ni_rows*ni_columns; int columns2go = no_columns; char word[256]; - /* sanity checks */ - if (comp_size) - return(output_stream(fp)); /* binary data -- just copy */ + + if (o_header) { /* finish header? */ + if (no_rows > 0) + printf("NROWS=%d\n", no_rows); + if (no_columns > 0) + printf("NCOLS=%d\n", no_columns); + fputformat(fmtid, stdout); + fputc('\n', stdout); + } + if (comp_size) { /* just copy binary data? */ + size_t nwritten = output_stream(fp); + if ((no_rows > 0) & (no_columns > 0) && + nwritten != (size_t)no_rows*no_columns*n_comp*comp_size) { + if (check) { + fputs("Incorrect binary file size\n", stderr); + return(0); /* fatal if check is true */ + } + if (warnings) + fputs("Warning -- unexpected binary file size\n", stderr); + } + return(nwritten > 0); + } /* else straight ASCII data copy? */ + if (!check & (no_columns == ni_columns) & (no_rows == ni_rows)) + return(output_stream(fp) > 0); + /* need to reshape (& check?) input */ if (no_columns <= 0) { - fprintf(stderr, "Missing -oc specification\n"); + fputs("Missing -oc specification\n", stderr); return(0); } if ((records2go <= 0) & (no_rows > 0)) records2go = no_rows*no_columns; else if (no_rows*no_columns != records2go) { fprintf(stderr, - "Input and output data sizes disagree (%dx%d != %dx%d)\n", + "Number of input and output records disagree (%dx%d != %dx%d)\n", ni_rows, ni_columns, no_rows, no_columns); return(0); } do { /* reshape records */ - int n; - - for (n = n_comp; n--; ) { + int n = n_comp; + while (n--) { if (fget_word(word, fp) == NULL) { - if (records2go > 0 || n < n_comp-1) + if ((records2go > 0) | (n < n_comp-1)) break; goto done; /* normal EOD */ } + if (check && !isflt(word)) { + fputs("Badly formed number: ", stderr); + fputs(word, stderr); + fputc('\n', stderr); + return(0); + } fputs(word, stdout); if (n) { /* mid-record? */ int c = getc(fp); @@ -414,7 +576,7 @@ do_resize(FILE *fp) } } if (n >= 0) { - fprintf(stderr, "Incomplete record / unexpected EOF\n"); + fputs("Incomplete record / unexpected EOF\n", stderr); return(0); } if (--columns2go <= 0) { /* time to end output row? */ @@ -424,10 +586,22 @@ do_resize(FILE *fp) putc('\t', stdout); } while (--records2go); /* expected EOD? */ done: - if (warnings && columns2go != no_columns) - fprintf(stderr, "Warning -- incomplete final row\n"); - if (warnings && fget_word(word, fp) != NULL) - fprintf(stderr, "Warning -- characters beyond expected EOD\n"); + if (columns2go != no_columns) { + if (check) { + fputs("Incomplete final row\n", stderr); + return(0); + } + if (warnings) + fputs("Warning -- incomplete final row\n", stderr); + } + if (fget_word(word, fp) != NULL) { + if (check) { + fputs("Characters beyond expected EOD\n", stderr); + return(0); + } + if (warnings) + fputs("Warning -- characters beyond expected EOD\n", stderr); + } return(1); } @@ -435,7 +609,7 @@ done: static int headline(char *s, void *p) { - static char fmt[32]; + static char fmt[MAXFMTLEN]; int n; if (formatval(fmt, s)) { @@ -501,7 +675,8 @@ main(int argc, char *argv[]) no_columns = atoi(argv[++a]); else if (argv[a][2] == 'r') no_rows = atoi(argv[++a]); - else + else if (argv[a][2] || + !(outLevels=get_array(argv[++a], outArray, MAXLEVELS))) goto userr; break; case 'h': /* turn off header */ @@ -557,53 +732,56 @@ main(int argc, char *argv[]) case 'w': /* warnings on/off */ warnings = !warnings; break; + case 'c': /* force check operation */ + check = 1; + break; default: goto userr; } if (a < argc-1) /* arg count OK? */ goto userr; + if (outLevels) { /* should check consistency? */ + no_rows = outArray[0][0]; + no_columns = outArray[0][1]; + } /* open input file? */ - if (a == argc-1 && freopen(argv[a], "r", stdin) == NULL) { + if (a == argc-1 && freopen(argv[a], "rb", stdin) == NULL) { fprintf(stderr, "%s: cannot open for reading\n", argv[a]); return(1); - } - if (comp_size) { + } else SET_FILE_BINARY(stdin); - SET_FILE_BINARY(stdout); - } + SET_FILE_BINARY(stdout); +#ifdef getc_unlocked /* avoid stupid semaphores */ + flockfile(stdin); + flockfile(stdout); +#endif /* check for no-op */ - if (!transpose & (i_header == o_header) && (comp_size || - (no_columns == ni_columns) & (no_rows == ni_rows))) { + if (!transpose & !check & (outLevels <= 1) & (i_header == o_header) && + (no_columns == ni_columns) & (no_rows == ni_rows)) { if (warnings) fprintf(stderr, "%s: no-op -- copying input verbatim\n", argv[0]); - if (!output_stream(stdin)) - return(1); - return(0); + return(!output_stream(stdin)); } if (i_header) { /* read header */ if (getheader(stdin, headline, NULL) < 0) return(1); if (!check_sizes()) return(1); - if (comp_size) { /* a little late... */ - SET_FILE_BINARY(stdin); - SET_FILE_BINARY(stdout); - } } else if (!check_sizes()) return(1); - if (o_header) { /* write header */ + if (o_header) { /* write/add to header */ + if (!i_header) + newheader("RADIANCE", stdout); printargs(a, argv, stdout); - if (no_rows > 0) - printf("NROWS=%d\n", no_rows); - if (no_columns > 0) - printf("NCOLS=%d\n", no_columns); printf("NCOMP=%d\n", n_comp); - fputformat(fmtid, stdout); - fputc('\n', stdout); /* finish new header */ } - if (transpose) { /* transposing rows & columns? */ - MEMLOAD myMem; /* need to load into memory */ + if (!comp_size) { /* a little late... */ + SET_FILE_TEXT(stdin); + SET_FILE_TEXT(stdout); + } + if (transpose | (outLevels > 1) || (o_header && no_rows <= 0)) { + MEMLOAD myMem; /* need to map into memory */ if (a == argc-1) { if (load_file(&myMem, stdin) <= 0) { fprintf(stderr, "%s: error loading file into memory\n", @@ -615,15 +793,15 @@ main(int argc, char *argv[]) argv[0]); return(1); } - if (!do_transpose(&myMem)) + if (!do_reorder(&myMem)) return(1); - /* free_load(&myMem); */ + /* free_load(&myMem); about to exit, so don't bother */ } else if (!do_resize(stdin)) /* just reshaping input */ return(1); return(0); userr: fprintf(stderr, -"Usage: %s [-h[io]][-w][-f[afdb][N]][-t][-ic in_col][-ir in_row][-oc out_col][-or out_row] [input.dat]\n", +"Usage: %s [-h[io]][-w][-c][-f[afdb][N]][-t][-ic in_col][-ir in_row][-oc out_col][-or out_row][-o RxC[xR1xC1..]] [input.dat]\n", argv[0]); return(1); }