ViewVC Help
View File | Revision Log | Show Annotations | Download File | Root Listing
root/Development/ray/src/util/pvsum.c
Revision: 2.9
Committed: Wed Oct 29 02:48:50 2025 UTC (7 weeks ago) by greg
Content type: text/plain
Branch: MAIN
Changes since 2.8: +217 -92 lines
Log Message:
feat(pvsum): Added -m option to optimize speed and memory usage

File Contents

# User Rev Content
1 greg 2.1 #ifndef lint
2 greg 2.9 static const char RCSid[] = "$Id: pvsum.c,v 2.8 2025/10/24 22:41:10 greg Exp $";
3 greg 2.1 #endif
4     /*
5     * pvsum.c - add together spectral and/or float pictures
6     * based on vector or matrix, similar to dctimestep
7     */
8    
9     #include <math.h>
10     #include "rtio.h"
11     #include "resolu.h"
12     #include "platform.h"
13 greg 2.2 #include "random.h"
14 greg 2.1 #include "rmatrix.h"
15     #if !defined(_WIN32) && !defined(_WIN64)
16     #include <sys/mman.h>
17 greg 2.5 #include <sys/wait.h>
18 greg 2.1 #endif
19    
20 greg 2.7 #define VIEWSTR "VIEW=" /* borrowed from common/view.h */
21     #define VIEWSTRL 5
22    
23 greg 2.1 int nprocs = 1; /* # of calculation processes (Unix) */
24     int in_type = DTfromHeader; /* input data type */
25     int out_type = DTfromHeader; /* output data type */
26     char *in_spec = NULL; /* input specification */
27     char *out_spec = NULL; /* output file specification */
28    
29     int iswapped = 0; /* input data is byte-swapped? */
30     int ncomp = 3; /* # input components */
31     int xres=0, yres=0; /* input image dimensions */
32 greg 2.7 char viewspec[128] = ""; /* VIEW= line from first header */
33     char pixasp[48] = ""; /* PIXASPECT= line from header */
34 greg 2.1
35 greg 2.8 int gargc; /* global argc */
36     char **gargv; /* global argv */
37    
38 greg 2.9 RMATRIX *cmtx; /* coefficient matrix */
39     int row0, rowN; /* rows for current pass */
40 greg 2.1
41     /* does the given spec contain integer format? */
42     int
43     hasFormat(const char *s)
44     {
45     restart:
46     if (s) s = strchr(s, '%');
47     if (!s) return(0);
48     if (s[1] == '%') { /* "%%" ? */
49     s += 2;
50     goto restart;
51     }
52     while (*++s) {
53     if (strchr("diouxX", *s))
54     return(1); /* integer format */
55     if (strchr("%fFeEgGaAcsb", *s))
56     break; /* non-integer format */
57     }
58     return(0);
59     }
60    
61     /* get first input header data we'll need */
62     int
63     iheadline(char *s, void *p)
64     {
65     char fmt[MAXFMTLEN];
66     int i;
67    
68     if (!strncmp(s, "NCOLS=", 6)) {
69     xres = atoi(s+6);
70     return(1);
71     }
72     if (!strncmp(s, "NROWS=", 6)) {
73     yres = atoi(s+6);
74     return(1);
75     }
76     if (isncomp(s)) {
77     ncomp = ncompval(s);
78     return(1);
79     }
80     if (formatval(fmt, s)) {
81     for (in_type = DTend; --in_type > DTfromHeader; )
82     if (!strcmp(fmt, cm_fmt_id[in_type]))
83     return(1);
84     return(-1);
85     }
86     i = isbigendian(s);
87     if (i >= 0) {
88     iswapped = (i != nativebigendian());
89     return(1);
90     }
91 greg 2.7 if (!strncmp(s, VIEWSTR, VIEWSTRL)) {
92     strcpy(viewspec, s);
93     return(1);
94     }
95     if (isaspect(s)) {
96     strcpy(pixasp, s);
97     return(1);
98     }
99 greg 2.1 return(0);
100     }
101    
102     /* open initial file and get relevant dimensions and type */
103     int
104     get_iotypes(void)
105     {
106     char fbuf[256];
107     FILE *fp;
108    
109     sprintf(fbuf, in_spec, 0);
110     fp = fopen(fbuf, "rb");
111     if (!fp) {
112     fprintf(stderr, "%s: cannot open for reading\n", fbuf);
113     return(0);
114     }
115     if (getheader(fp, iheadline, NULL) < 0) {
116     fprintf(stderr, "%s: bad header - wrong format?\n", fbuf);
117     fclose(fp);
118     return(0);
119     }
120     if ((xres <= 0) | (yres <= 0) && !fscnresolu(&xres, &yres, fp)) {
121     fprintf(stderr, "%s: missing input resolution\n", fbuf);
122     fclose(fp);
123     return(0);
124     }
125     if (nprocs > 1 && (in_type==DTrgbe) | (in_type==DTxyze)) {
126     long data_start = ftell(fp); /* make sure input flat */
127     off_t dend = lseek(fileno(fp), 0, SEEK_END);
128     if (dend < data_start + 4L*xres*yres) {
129 greg 2.9 fprintf(stderr, "%s: warning - multi-processing requires flat input files\n",
130     gargv[0]);
131 greg 2.1 nprocs = 1;
132     }
133     }
134     fclose(fp);
135     if ((cmtx->ncomp == 1) & (ncomp != 1)) {
136     double xfm[MAXCSAMP];
137     RMATRIX *nmtx;
138     int i;
139     for (i = ncomp; i--; )
140     xfm[i] = 1.;
141     nmtx = rmx_transform(cmtx, ncomp, xfm);
142     if (!nmtx)
143     return(0);
144     rmx_free(cmtx);
145     cmtx = nmtx;
146     } else if (cmtx->ncomp != ncomp) {
147 greg 2.9 fprintf(stderr, "%s: operation %s needs %d components, has %d\n",
148     gargv[0], cmtx->ncols == 1 ? "vector" : "matrix",
149 greg 2.1 ncomp, cmtx->ncomp);
150     return(0);
151     }
152     if ((in_type != DTrgbe) & (in_type != DTxyze) & (in_type != DTspec) &
153     (in_type != DTfloat)) {
154 greg 2.9 fprintf(stderr, "%s: unsupported input data type '%s'\n",
155 greg 2.1 fbuf, cm_fmt_id[in_type]);
156     return(0);
157     }
158     if ((out_type == DTrgbe) & (ncomp > 3))
159     out_type = DTspec;
160     else if (out_type == DTfromHeader ||
161     (out_type == DTrgbe) & (in_type != DTfloat))
162     out_type = in_type;
163     return(1);
164     }
165    
166 greg 2.9 struct hdata {
167     int xr, yr; /* resolution */
168     int fno; /* frame # */
169     char fmt[MAXFMTLEN]; /* format */
170     };
171    
172 greg 2.1 /* check subsequent headers match initial file */
173     int
174     checkline(char *s, void *p)
175     {
176 greg 2.3 static int exposWarned = 0;
177 greg 2.9 struct hdata *hp = (struct hdata *)p;
178 greg 2.1
179     if (!strncmp(s, "NCOLS=", 6)) {
180 greg 2.9 hp->xr = atoi(s+6);
181     if (hp->xr <= 0)
182 greg 2.1 return(-1);
183     return(1);
184     }
185     if (!strncmp(s, "NROWS=", 6)) {
186 greg 2.9 hp->yr = atoi(s+6);
187     if (hp->yr <= 0)
188 greg 2.1 return(-1);
189     return(1);
190     }
191 greg 2.9 if (!strncmp(s, "FRAME=", 6)) {
192     hp->fno = atoi(s+6);
193     return(1);
194     }
195 greg 2.1 if (isncomp(s)) {
196     if (ncompval(s) != ncomp)
197     return(-1);
198     return(1);
199     }
200     if (isexpos(s)) {
201 greg 2.3 if (!exposWarned && fabs(1. - exposval(s)) > 0.04) {
202 greg 2.9 fprintf(stderr, "%s: warning - ignoring EXPOSURE setting(s)\n",
203     gargv[0]);
204 greg 2.3 exposWarned++;
205     }
206 greg 2.1 return(1);
207     }
208 greg 2.9 if (formatval(hp->fmt, s))
209 greg 2.1 return(1);
210 greg 2.9
211 greg 2.1 return(0);
212     }
213    
214 greg 2.9 /* open and check input/output file, read/write mode if fno >= 0 */
215 greg 2.1 FILE *
216 greg 2.9 open_iofile(char *fname, int fno)
217 greg 2.1 {
218 greg 2.9 struct hdata hd;
219     FILE *fp = fopen(fname, fno>=0 ? "r+b" : "rb");
220 greg 2.1
221     if (!fp) {
222 greg 2.9 fprintf(stderr, "%s: cannot open for reading%s\n",
223     fname, fno>=0 ? "/writing" : "");
224 greg 2.1 return(NULL);
225     }
226 greg 2.9 hd.xr = hd.yr = 0;
227     hd.fno = -1;
228     hd.fmt[0] = '\0';
229     if (getheader(fp, checkline, &hd) < 0) {
230 greg 2.1 fprintf(stderr, "%s: bad/inconsistent header\n", fname);
231     fclose(fp);
232     return(NULL);
233     }
234 greg 2.9 if ((hd.fno >= 0) & (fno >= 0) & (hd.fno != fno)) {
235     fprintf(stderr, "%s: unexpected frame number (%d != %d)\n",
236     fname, hd.fno, fno);
237     fclose(fp);
238     return(NULL);
239     }
240     if (strcmp(hd.fmt, cm_fmt_id[fno>=0 ? out_type : in_type])) {
241     fprintf(stderr, "%s: wrong format\n", fname);
242     fclose(fp);
243     return(NULL);
244     }
245     if ((hd.xr <= 0) | (hd.yr <= 0) &&
246     !fscnresolu(&hd.xr, &hd.yr, fp)) {
247 greg 2.1 fprintf(stderr, "%s: missing resolution\n", fname);
248     fclose(fp);
249     return(NULL);
250     }
251 greg 2.9 if ((hd.xr != xres) | (hd.yr != yres)) {
252 greg 2.1 fprintf(stderr, "%s: mismatched resolution\n", fname);
253     fclose(fp);
254     return(NULL);
255     }
256     return(fp);
257     }
258    
259 greg 2.9 /* read in previous pixel data from output and rewind to data start */
260     int
261     reload_data(float *osum, FILE *fp)
262     {
263     long dstart;
264    
265     if (!osum | !fp)
266     return(0);
267     if ((dstart = ftell(fp)) < 0) {
268     fprintf(stderr, "%s: ftell() error in reload_data()\n",
269     gargv[0]);
270     return(0);
271     }
272     if (in_type == DTfloat) {
273     if (fread(osum, sizeof(float)*ncomp, (size_t)xres*yres, fp) !=
274     (size_t)xres*yres) {
275     fprintf(stderr, "%s: fread() error\n", gargv[0]);
276     return(0);
277     }
278     } else {
279     int y;
280     for (y = 0; y < yres; y++, osum += ncomp*xres)
281     if (freadsscan(osum, ncomp, xres, fp) < 0) {
282     fprintf(stderr, "%s: freadsscan() error\n", gargv[0]);
283     return(0);
284     }
285     }
286     if (fseek(fp, dstart, SEEK_SET) < 0) {
287     fprintf(stderr, "%s: fseek() error in reload_data()\n",
288     gargv[0]);
289     return(0);
290     }
291     return(1);
292     }
293    
294 greg 2.1 /* open output file or command (if !NULL) and write info header */
295     FILE *
296     open_output(char *ospec, int fno)
297     {
298     FILE *fp;
299    
300     if (!ospec) {
301     ospec = "<stdout>";
302     fp = stdout;
303     } else if (ospec[0] == '!') {
304     if (!(fp = popen(ospec+1, "w"))) {
305 greg 2.9 fprintf(stderr, "%s: cannot start: %s\n", gargv[0], ospec);
306 greg 2.1 return(NULL);
307     }
308 greg 2.3 } else if (!(fp = fopen(ospec, "w"))) {
309 greg 2.1 fprintf(stderr, "%s: cannot open for writing\n", ospec);
310     return(NULL);
311     }
312 greg 2.3 SET_FILE_BINARY(fp);
313 greg 2.1 newheader("RADIANCE", fp);
314 greg 2.2 if (cmtx->info) /* prepend matrix metadata */
315     fputs(cmtx->info, fp);
316     else
317     fputnow(fp);
318 greg 2.8 printargs(gargc, gargv, fp); /* this command */
319 greg 2.1 if (fno >= 0)
320     fprintf(fp, "FRAME=%d\n", fno);
321 greg 2.7 if (viewspec[0])
322     fputs(viewspec, fp);
323     if (pixasp[0])
324     fputs(pixasp, fp);
325 greg 2.1 switch (out_type) {
326     case DTfloat:
327     case DTdouble:
328     fprintf(fp, "NCOLS=%d\nNROWS=%d\n", xres, yres);
329     fputncomp(ncomp, fp);
330     fputendian(fp);
331     fputformat(cm_fmt_id[out_type], fp);
332     fputc('\n', fp);
333     break;
334     case DTrgbe:
335     fputformat(COLRFMT, fp);
336     fputc('\n', fp);
337     fprtresolu(xres, yres, fp);
338     break;
339     case DTxyze:
340     fputformat(CIEFMT, fp);
341     fputc('\n', fp);
342     fprtresolu(xres, yres, fp);
343     break;
344     case DTspec:
345     fputncomp(ncomp, fp);
346     fputwlsplit(cmtx->wlpart, fp);
347     fputformat(SPECFMT, fp);
348     fputc('\n', fp);
349     fprtresolu(xres, yres, fp);
350     break;
351     default:
352 greg 2.9 fprintf(stderr, "%s: unsupported output type!\n", gargv[0]);
353 greg 2.1 return(NULL);
354     }
355     if (fflush(fp) < 0) {
356     fprintf(stderr, "%s: write error\n", ospec);
357     fclose(fp);
358     return(NULL);
359     }
360     return(fp);
361     }
362    
363     /* run calculation from a single process */
364     int
365     solo_process(void)
366     {
367     float *osum = (float *)calloc((size_t)xres*yres, sizeof(float)*ncomp);
368     COLORV *iscan = (COLORV *)malloc(sizeof(COLORV)*ncomp*xres);
369     char fbuf[512];
370     int c;
371    
372     if (!osum | !iscan) {
373 greg 2.9 fprintf(stderr, "%s: annot allocate %dx%d %d-component accumulator\n",
374     gargv[0], xres, yres, ncomp);
375 greg 2.1 return(0);
376     }
377     if (sizeof(float) != sizeof(COLORV)) {
378 greg 2.9 fprintf(stderr, "%s: Code Error 1 in solo_process()\n", gargv[0]);
379 greg 2.1 return(0);
380     }
381 greg 2.9 /* run through each column/output */
382     for (c = 0; c < cmtx->ncols; c++) {
383     int rc = rowN - row0;
384 greg 2.1 FILE *fout;
385     int y;
386 greg 2.9 /* open output (load if multipass) */
387     if (out_spec) { /* file or command */
388     if (cmtx->ncols > 1 && !hasFormat(out_spec)) {
389     fprintf(stderr, "%s: sequential result must go to stdout\n",
390     gargv[0]);
391     return(0);
392     }
393     sprintf(fbuf, out_spec, c);
394     if (row0) { /* another pass -- get prev. data */
395     fout = open_iofile(fbuf, c);
396     if (!reload_data(osum, fout))
397     return(0);
398     } else /* else new output (clobber prev. files) */
399     fout = open_output(fbuf, c-(cmtx->ncols==1));
400     } else { /* else stdout */
401     if ((out_type == DTfloat) & (cmtx->ncols > 1)) {
402     fprintf(stderr, "%s: float outputs must have separate destinations\n",
403     gargv[0]);
404     return(0);
405     }
406     strcpy(fbuf, "<stdout>");
407     fout = open_output(NULL, c-(cmtx->ncols==1));
408     }
409     if (!fout)
410     return(0); /* assume error was reported */
411     if (!row0 & (c > 0)) /* clear accumulator? */
412 greg 2.1 memset(osum, 0, sizeof(float)*ncomp*xres*yres);
413     while (rc-- > 0) { /* run through each input file */
414 greg 2.9 const int r = c&1 ? row0 + rc : rowN-1 - rc;
415 greg 2.1 const rmx_dtype *cval = rmx_val(cmtx, r, c);
416     FILE *finp;
417     int i, x;
418     for (i = ncomp; i--; )
419     if (cval[i] != 0) break;
420     if (i < 0) /* this coefficient is zero, skip */
421     continue;
422     sprintf(fbuf, in_spec, r);
423 greg 2.9 finp = open_iofile(fbuf, -1);
424 greg 2.1 if (!finp)
425     return(0);
426     for (y = 0; y < yres; y++) {
427     float *dst = osum + y*xres*ncomp;
428     if (in_type == DTfloat ? getbinary(iscan, sizeof(float)*ncomp,
429     xres, finp) != xres
430     : freadsscan(iscan, ncomp, xres, finp) < 0)
431     goto readerr;
432     if ((in_type == DTfloat) & iswapped)
433     swap32((char *)iscan, ncomp*xres);
434     for (x = 0; x < xres; x++, dst += ncomp)
435     for (i = ncomp; i--; )
436     dst[i] += cval[i]*iscan[x*ncomp + i];
437     }
438     fclose(finp);
439 greg 2.9 } /* write accumulated picture */
440 greg 2.1 if (out_type != DTfloat) {
441     for (y = 0; y < yres; y++)
442     if (fwritesscan(osum + (size_t)y*xres*ncomp,
443     ncomp, xres, fout) < 0)
444     goto writerr;
445     } else if (fwrite(osum, sizeof(float)*ncomp, (size_t)xres*yres, fout) !=
446     (size_t)xres*yres)
447     goto writerr;
448    
449     if (fbuf[0] == '!') {
450     if (pclose(fout) != 0) {
451 greg 2.9 fprintf(stderr, "%s: bad status from: %s\n", gargv[0], fbuf);
452 greg 2.1 return(0);
453     }
454     } else if (fout != stdout && fclose(fout) == EOF)
455     goto writerr;
456     }
457     free(osum); /* clean up on success */
458     free(iscan);
459     return(1);
460     readerr:
461     fprintf(stderr, "%s: read error\n", fbuf);
462     return(0);
463     writerr:
464     fprintf(stderr, "%s: write error\n", fbuf);
465     return(0);
466     }
467    
468 greg 2.7 #if defined(_WIN32) || defined(_WIN64)
469     #define multi_process solo_process
470     #else
471    
472 greg 2.2 /* allocate a scrambled index array of the specified length */
473     int *
474     scramble(int n)
475     {
476     int *scarr = (int *)malloc(sizeof(int)*n);
477     int i;
478    
479     if (!scarr) {
480 greg 2.9 fprintf(stderr, "%s: out of memory in scramble(%d)\n", gargv[0], n);
481 greg 2.2 exit(1);
482     }
483     for (i = n; i--; )
484     scarr[i] = i;
485     /* perform Fisher-Yates shuffle */
486     for (i = 0; i < n-1; i++) {
487     int ix = irandom(n-i) + i;
488     int ndx = scarr[i];
489     scarr[i] = scarr[ix];
490     scarr[ix] = ndx;
491     }
492     return(scarr);
493     }
494    
495 greg 2.1 /* run calculation on multiple processes, using memory maps and fork() */
496     int
497     multi_process(void)
498     {
499     int coff = nprocs;
500     int odd = 0;
501 greg 2.9 float *osum = NULL;
502     int *syarr = NULL;
503 greg 2.1 char fbuf[512];
504     int c;
505     /* sanity check */
506     if (sizeof(float) != sizeof(COLORV)) {
507 greg 2.9 fprintf(stderr, "%s: code Error 1 in multi_process()\n", gargv[0]);
508 greg 2.1 return(0);
509     }
510     while (--coff > 0) { /* parent births children */
511     int pid = fork();
512     if (pid < 0) {
513 greg 2.9 fprintf(stderr, "%s: fork() call failed!\n", gargv[0]);
514 greg 2.1 return(0);
515     }
516     if (pid == 0) break; /* child gets to work */
517     }
518 greg 2.9 if (!row0 | (out_type != DTfloat)) {
519     osum = (float *)calloc((size_t)xres*yres, sizeof(float)*ncomp);
520     if (!osum) {
521     fprintf(stderr, "%s: cannot allocate %dx%d %d-component accumulator\n",
522     gargv[0], xres, yres, ncomp);
523     return(0);
524     }
525 greg 2.1 }
526 greg 2.2 srandom(113*coff + 5669); /* randomize row access for this process */
527     syarr = scramble(yres);
528 greg 2.1 /* run through our unique set of columns */
529     for (c = coff; c < cmtx->ncols; c += nprocs) {
530 greg 2.9 int rc = rowN - row0;
531     void *omap = NULL;
532     size_t omaplen = 0;
533     long dstart;
534 greg 2.1 FILE *fout;
535     int y;
536 greg 2.9 /* create/load output */
537     sprintf(fbuf, out_spec, c);
538     if (row0) { /* making another pass? */
539     fout = open_iofile(fbuf, c);
540     if (!fout) return(0);
541     if (out_type == DTfloat) {
542     dstart = ftell(fout);
543     if ((dstart < 0) | (dstart % sizeof(float))) {
544     fprintf(stderr, "%s: bad seek/alignment\n", fbuf);
545     return(0);
546     }
547     omaplen = dstart + sizeof(float)*ncomp*xres*yres;
548     omap = mmap(NULL, omaplen, PROT_READ|PROT_WRITE,
549     MAP_FILE|MAP_SHARED, fileno(fout), 0);
550     if (omap == MAP_FAILED) {
551     fprintf(stderr, "%s: cannot map file '%s'\n",
552     gargv[0], fbuf);
553     return(0);
554     }
555     osum = (float *)((char *)omap + dstart);
556     } else if (!reload_data(osum, fout))
557     return(0);
558     } else { /* else new output (clobber prev. files) */
559     fout = open_output(fbuf, c);
560     if (!fout) return(0);
561     if (c > coff) /* clear accumulator? */
562     memset(osum, 0, sizeof(float)*ncomp*xres*yres);
563     }
564 greg 2.1 while (rc-- > 0) { /* map & sum each input file */
565 greg 2.9 const int r = odd ? row0 + rc : rowN-1 - rc;
566 greg 2.1 const rmx_dtype *cval = rmx_val(cmtx, r, c);
567 greg 2.9 size_t imaplen;
568 greg 2.1 void *imap;
569     FILE *finp;
570     float *dst;
571 greg 2.2 int i, x;
572 greg 2.1 for (i = ncomp; i--; )
573     if (cval[i] != 0) break;
574     if (i < 0) /* this coefficient is zero, skip */
575     continue;
576     sprintf(fbuf, in_spec, r);
577 greg 2.9 finp = open_iofile(fbuf, -1);
578 greg 2.1 if (!finp)
579     return(0);
580     dstart = ftell(finp);
581     if (dstart < 0) {
582     fprintf(stderr, "%s: ftell() failed!\n", fbuf);
583     return(0);
584     }
585     if (in_type == DTfloat && dstart%sizeof(float)) {
586     fprintf(stderr, "%s: float header misalignment\n", fbuf);
587     return(0);
588     }
589     i = in_type==DTfloat ? ncomp*(int)sizeof(float) : ncomp+1;
590 greg 2.9 imaplen = dstart + (size_t)yres*xres*i;
591     imap = mmap(NULL, imaplen, PROT_READ,
592 greg 2.1 MAP_FILE|MAP_SHARED, fileno(finp), 0);
593 greg 2.3 fclose(finp); /* will read from map (randomly) */
594 greg 2.1 if (imap == MAP_FAILED) {
595     fprintf(stderr, "%s: unable to map input file\n", fbuf);
596     return(0);
597     }
598 greg 2.2 if (in_type == DTfloat)
599     for (y = yres; y-- > 0; ) {
600     const float *fvp = (float *)((char *)imap + dstart) +
601     (size_t)ncomp*xres*syarr[y];
602     dst = osum + (size_t)ncomp*xres*syarr[y];
603     for (x = xres; x-- > 0; dst += ncomp, fvp += ncomp)
604     for (i = ncomp; i--; )
605     dst[i] += cval[i]*fvp[i];
606     }
607     else
608     for (y = yres; y-- > 0; ) {
609     const COLRV *cvp = (COLRV *)((char *)imap + dstart) +
610     (ncomp+1L)*xres*syarr[y];
611     dst = osum + (size_t)ncomp*xres*syarr[y];
612     for (x = xres; x-- > 0; dst += ncomp, cvp += ncomp+1) {
613     const rmx_dtype fe = cxponent[cvp[ncomp]];
614     for (i = ncomp; i--; )
615     dst[i] += cval[i]*(cvp[i]+(rmx_dtype).5)*fe;
616     }
617 greg 2.1 }
618 greg 2.9 munmap(imap, imaplen);
619     } /* write accumulated column picture */
620 greg 2.1 if (out_type != DTfloat) {
621     for (y = 0; y < yres; y++)
622     if (fwritesscan(osum + (size_t)y*xres*ncomp,
623     ncomp, xres, fout) < 0)
624     goto writerr;
625 greg 2.9 } else if (omap) {
626     if (munmap(omap, omaplen) < 0)
627     goto writerr;
628     osum = NULL;
629 greg 2.1 } else if (fwrite(osum, sizeof(float)*ncomp, (size_t)xres*yres, fout) !=
630     (size_t)xres*yres)
631     goto writerr;
632    
633     if (fbuf[0] == '!') {
634     if (pclose(fout) != 0) {
635 greg 2.9 fprintf(stderr, "%s: bad status from: %s\n", gargv[0], fbuf);
636 greg 2.1 return(0);
637     }
638     } else if (fclose(fout) == EOF)
639     goto writerr;
640     odd = !odd; /* go back & forth to milk page cache */
641     }
642 greg 2.9 if (osum) free(osum);
643 greg 2.2 free(syarr);
644 greg 2.9 if (coff) /* child exits here... */
645     _exit(0);
646 greg 2.1 c = 0; /* ...but parent waits for children */
647     while (++coff < nprocs) {
648     int st;
649 greg 2.9 if (wait(&st) < 0) {
650     fprintf(stderr, "%s: warning - wait() call failed unexpectedly\n", gargv[0]);
651 greg 2.1 break;
652 greg 2.9 }
653 greg 2.1 if (st) c = st;
654     }
655     return(c == 0);
656     writerr:
657     fprintf(stderr, "%s: write error\n", fbuf);
658     return(0);
659     }
660    
661 greg 2.7 #endif /* ! Windows */
662    
663 greg 2.1 int
664     main(int argc, char *argv[])
665     {
666 greg 2.9 double cacheGB = 0;
667     int rintvl;
668 greg 2.1 int a;
669    
670 greg 2.8 gargc = argc; /* for header output */
671     gargv = argv;
672    
673 greg 2.1 for (a = 1; a < argc-1 && argv[a][0] == '-'; a++)
674     switch (argv[a][1]) {
675     case 'o': /* output spec/format */
676     switch (argv[a][2]) {
677     case '\0':
678     out_spec = argv[++a];
679     break;
680     case 'f':
681     out_type = DTfloat;
682     break;
683     case 'c':
684     out_type = DTrgbe;
685     break;
686     default:
687     goto badopt;
688     }
689     break;
690 greg 2.9 case 'm': /* cache size in GigaBytes */
691     cacheGB = atof(argv[++a]);
692     break;
693 greg 2.1 case 'N': /* number of desired processes */
694     case 'n': /* quietly supported alternate */
695     nprocs = atoi(argv[++a]);
696     if (nprocs <= 0)
697     goto userr;
698     break;
699     default:;
700     badopt: fprintf(stderr, "%s: bad option: %s\n", argv[0], argv[a]);
701     goto userr;
702     return(1);
703     }
704     if ((argc-a < 1) | (argc-a > 2) || argv[a][0] == '-')
705     goto userr;
706     in_spec = argv[a];
707 greg 2.4 cmtx = rmx_load(argv[a+1]); /* loads from stdin if a+1==argc */
708 greg 2.1 if (cmtx == NULL)
709     return(1); /* error reported */
710 greg 2.9 cacheGB *= (cmtx->ncols > 1);
711     if (cacheGB > 0 && (!out_spec || *out_spec == '!')) {
712     fprintf(stderr, "%s: -m option incompatible with output to %s\n",
713     argv[0], out_spec ? "command" : "stdout");
714     return(1);
715     }
716 greg 2.3 if (nprocs > cmtx->ncols)
717     nprocs = cmtx->ncols;
718 greg 2.1 #if defined(_WIN32) || defined(_WIN64)
719     if (nprocs > 1) {
720     fprintf(stderr, "%s: warning - Windows only allows -N 1\n", argv[0]);
721     nprocs = 1;
722     }
723     #else
724     if ((nprocs > 1) & !out_spec) {
725     fprintf(stderr, "%s: multi-processing result cannot go to stdout\n",
726     argv[0]);
727     nprocs = 1;
728     }
729     if ((nprocs > 1) & iswapped && (in_type==DTfloat) | (in_type==DTdouble)) {
730     fprintf(stderr, "%s: multi-processing unsupported on swapped input\n",
731     argv[0]);
732     nprocs = 1;
733     }
734     #endif
735     if (cmtx->nrows > 1 && !hasFormat(in_spec)) {
736     fprintf(stderr, "%s: input specification '%s' needs %%d format\n",
737     argv[0], in_spec);
738     goto userr;
739     }
740     if (!get_iotypes())
741     return(1);
742 greg 2.9 if (cacheGB > 1e-4) { /* figure out # of passes => rintvl */
743     size_t inp_bytes = (in_type==DTfloat ? sizeof(float)*ncomp
744     : (size_t)(ncomp+1)) * xres*yres;
745     size_t mem_bytes = sizeof(float)*ncomp*xres*yres;
746     int npasses = (double)inp_bytes*cmtx->nrows /
747     (cacheGB*(1L<<30) - (double)mem_bytes*nprocs) + 1;
748     if ((npasses <= 0) | (npasses*8 >= cmtx->nrows))
749     npasses = 1; /* let's not go there... */
750     rintvl = cmtx->nrows / npasses;
751     rintvl += (rintvl*npasses < cmtx->nrows);
752     } else
753     rintvl = cmtx->nrows;
754     /* make our passes */
755     for (row0 = 0; row0 < cmtx->nrows; row0 += rintvl) {
756     if ((rowN = row0 + rintvl) > cmtx->nrows)
757     rowN = cmtx->nrows;
758     if (nprocs==1 ? !solo_process() : !multi_process())
759     return(1);
760     }
761 greg 2.1 return(0);
762     userr:
763 greg 2.9 fprintf(stderr, "Usage: %s [-oc | -of][-o ospec][-N nproc][-m cacheGB] inpspec [mtx]\n",
764 greg 2.1 argv[0]);
765     return(1);
766     }