ViewVC Help
View File | Revision Log | Show Annotations | Download File | Root Listing
root/Development/ray/src/util/pvsum.c
Revision: 2.13
Committed: Thu Oct 30 20:54:27 2025 UTC (2 weeks, 1 day ago) by greg
Content type: text/plain
Branch: MAIN
CVS Tags: HEAD
Changes since 2.12: +3 -2 lines
Log Message:
perf(pvsum): Forgot to include size of in-core matrix in overhead

File Contents

# User Rev Content
1 greg 2.1 #ifndef lint
2 greg 2.13 static const char RCSid[] = "$Id: pvsum.c,v 2.12 2025/10/30 16:47:13 greg Exp $";
3 greg 2.1 #endif
4     /*
5     * pvsum.c - add together spectral and/or float pictures
6     * based on vector or matrix, similar to dctimestep
7     */
8    
9     #include <math.h>
10     #include "rtio.h"
11     #include "resolu.h"
12     #include "platform.h"
13 greg 2.2 #include "random.h"
14 greg 2.1 #include "rmatrix.h"
15     #if !defined(_WIN32) && !defined(_WIN64)
16     #include <sys/mman.h>
17 greg 2.5 #include <sys/wait.h>
18 greg 2.1 #endif
19    
20 greg 2.7 #define VIEWSTR "VIEW=" /* borrowed from common/view.h */
21     #define VIEWSTRL 5
22    
23 greg 2.1 int nprocs = 1; /* # of calculation processes (Unix) */
24     int in_type = DTfromHeader; /* input data type */
25     int out_type = DTfromHeader; /* output data type */
26     char *in_spec = NULL; /* input specification */
27     char *out_spec = NULL; /* output file specification */
28    
29     int iswapped = 0; /* input data is byte-swapped? */
30     int ncomp = 3; /* # input components */
31     int xres=0, yres=0; /* input image dimensions */
32 greg 2.7 char viewspec[128] = ""; /* VIEW= line from first header */
33     char pixasp[48] = ""; /* PIXASPECT= line from header */
34 greg 2.1
35 greg 2.8 int gargc; /* global argc */
36     char **gargv; /* global argv */
37    
38 greg 2.9 RMATRIX *cmtx; /* coefficient matrix */
39     int row0, rowN; /* rows for current pass */
40 greg 2.1
41     /* does the given spec contain integer format? */
42     int
43     hasFormat(const char *s)
44     {
45     restart:
46     if (s) s = strchr(s, '%');
47     if (!s) return(0);
48     if (s[1] == '%') { /* "%%" ? */
49     s += 2;
50     goto restart;
51     }
52     while (*++s) {
53     if (strchr("diouxX", *s))
54     return(1); /* integer format */
55     if (strchr("%fFeEgGaAcsb", *s))
56     break; /* non-integer format */
57     }
58     return(0);
59     }
60    
61     /* get first input header data we'll need */
62     int
63     iheadline(char *s, void *p)
64     {
65     char fmt[MAXFMTLEN];
66     int i;
67    
68     if (!strncmp(s, "NCOLS=", 6)) {
69     xres = atoi(s+6);
70     return(1);
71     }
72     if (!strncmp(s, "NROWS=", 6)) {
73     yres = atoi(s+6);
74     return(1);
75     }
76     if (isncomp(s)) {
77     ncomp = ncompval(s);
78     return(1);
79     }
80     if (formatval(fmt, s)) {
81     for (in_type = DTend; --in_type > DTfromHeader; )
82     if (!strcmp(fmt, cm_fmt_id[in_type]))
83     return(1);
84     return(-1);
85     }
86     i = isbigendian(s);
87     if (i >= 0) {
88     iswapped = (i != nativebigendian());
89     return(1);
90     }
91 greg 2.7 if (!strncmp(s, VIEWSTR, VIEWSTRL)) {
92     strcpy(viewspec, s);
93     return(1);
94     }
95     if (isaspect(s)) {
96     strcpy(pixasp, s);
97     return(1);
98     }
99 greg 2.1 return(0);
100     }
101    
102     /* open initial file and get relevant dimensions and type */
103     int
104     get_iotypes(void)
105     {
106     char fbuf[256];
107     FILE *fp;
108    
109     sprintf(fbuf, in_spec, 0);
110     fp = fopen(fbuf, "rb");
111     if (!fp) {
112     fprintf(stderr, "%s: cannot open for reading\n", fbuf);
113     return(0);
114     }
115     if (getheader(fp, iheadline, NULL) < 0) {
116     fprintf(stderr, "%s: bad header - wrong format?\n", fbuf);
117     fclose(fp);
118     return(0);
119     }
120     if ((xres <= 0) | (yres <= 0) && !fscnresolu(&xres, &yres, fp)) {
121     fprintf(stderr, "%s: missing input resolution\n", fbuf);
122     fclose(fp);
123     return(0);
124     }
125     if (nprocs > 1 && (in_type==DTrgbe) | (in_type==DTxyze)) {
126     long data_start = ftell(fp); /* make sure input flat */
127     off_t dend = lseek(fileno(fp), 0, SEEK_END);
128     if (dend < data_start + 4L*xres*yres) {
129 greg 2.9 fprintf(stderr, "%s: warning - multi-processing requires flat input files\n",
130     gargv[0]);
131 greg 2.1 nprocs = 1;
132     }
133     }
134     fclose(fp);
135     if ((cmtx->ncomp == 1) & (ncomp != 1)) {
136     double xfm[MAXCSAMP];
137     RMATRIX *nmtx;
138     int i;
139     for (i = ncomp; i--; )
140     xfm[i] = 1.;
141     nmtx = rmx_transform(cmtx, ncomp, xfm);
142     if (!nmtx)
143     return(0);
144     rmx_free(cmtx);
145     cmtx = nmtx;
146     } else if (cmtx->ncomp != ncomp) {
147 greg 2.9 fprintf(stderr, "%s: operation %s needs %d components, has %d\n",
148     gargv[0], cmtx->ncols == 1 ? "vector" : "matrix",
149 greg 2.1 ncomp, cmtx->ncomp);
150     return(0);
151     }
152     if ((in_type != DTrgbe) & (in_type != DTxyze) & (in_type != DTspec) &
153     (in_type != DTfloat)) {
154 greg 2.9 fprintf(stderr, "%s: unsupported input data type '%s'\n",
155 greg 2.1 fbuf, cm_fmt_id[in_type]);
156     return(0);
157     }
158     if ((out_type == DTrgbe) & (ncomp > 3))
159     out_type = DTspec;
160     else if (out_type == DTfromHeader ||
161     (out_type == DTrgbe) & (in_type != DTfloat))
162     out_type = in_type;
163     return(1);
164     }
165    
166 greg 2.9 struct hdata {
167     int xr, yr; /* resolution */
168     int fno; /* frame # */
169     char fmt[MAXFMTLEN]; /* format */
170     };
171    
172 greg 2.1 /* check subsequent headers match initial file */
173     int
174     checkline(char *s, void *p)
175     {
176 greg 2.3 static int exposWarned = 0;
177 greg 2.9 struct hdata *hp = (struct hdata *)p;
178 greg 2.1
179     if (!strncmp(s, "NCOLS=", 6)) {
180 greg 2.9 hp->xr = atoi(s+6);
181     if (hp->xr <= 0)
182 greg 2.1 return(-1);
183     return(1);
184     }
185     if (!strncmp(s, "NROWS=", 6)) {
186 greg 2.9 hp->yr = atoi(s+6);
187     if (hp->yr <= 0)
188 greg 2.1 return(-1);
189     return(1);
190     }
191 greg 2.9 if (!strncmp(s, "FRAME=", 6)) {
192     hp->fno = atoi(s+6);
193     return(1);
194     }
195 greg 2.1 if (isncomp(s)) {
196     if (ncompval(s) != ncomp)
197     return(-1);
198     return(1);
199     }
200     if (isexpos(s)) {
201 greg 2.3 if (!exposWarned && fabs(1. - exposval(s)) > 0.04) {
202 greg 2.9 fprintf(stderr, "%s: warning - ignoring EXPOSURE setting(s)\n",
203     gargv[0]);
204 greg 2.3 exposWarned++;
205     }
206 greg 2.1 return(1);
207     }
208 greg 2.9 if (formatval(hp->fmt, s))
209 greg 2.1 return(1);
210 greg 2.9
211 greg 2.1 return(0);
212     }
213    
214 greg 2.9 /* open and check input/output file, read/write mode if fno >= 0 */
215 greg 2.1 FILE *
216 greg 2.9 open_iofile(char *fname, int fno)
217 greg 2.1 {
218 greg 2.9 struct hdata hd;
219     FILE *fp = fopen(fname, fno>=0 ? "r+b" : "rb");
220 greg 2.1
221     if (!fp) {
222 greg 2.9 fprintf(stderr, "%s: cannot open for reading%s\n",
223     fname, fno>=0 ? "/writing" : "");
224 greg 2.1 return(NULL);
225     }
226 greg 2.9 hd.xr = hd.yr = 0;
227     hd.fno = -1;
228     hd.fmt[0] = '\0';
229     if (getheader(fp, checkline, &hd) < 0) {
230 greg 2.1 fprintf(stderr, "%s: bad/inconsistent header\n", fname);
231     fclose(fp);
232     return(NULL);
233     }
234 greg 2.9 if ((hd.fno >= 0) & (fno >= 0) & (hd.fno != fno)) {
235     fprintf(stderr, "%s: unexpected frame number (%d != %d)\n",
236     fname, hd.fno, fno);
237     fclose(fp);
238     return(NULL);
239     }
240     if (strcmp(hd.fmt, cm_fmt_id[fno>=0 ? out_type : in_type])) {
241     fprintf(stderr, "%s: wrong format\n", fname);
242     fclose(fp);
243     return(NULL);
244     }
245     if ((hd.xr <= 0) | (hd.yr <= 0) &&
246     !fscnresolu(&hd.xr, &hd.yr, fp)) {
247 greg 2.1 fprintf(stderr, "%s: missing resolution\n", fname);
248     fclose(fp);
249     return(NULL);
250     }
251 greg 2.9 if ((hd.xr != xres) | (hd.yr != yres)) {
252 greg 2.1 fprintf(stderr, "%s: mismatched resolution\n", fname);
253     fclose(fp);
254     return(NULL);
255     }
256     return(fp);
257     }
258    
259 greg 2.9 /* read in previous pixel data from output and rewind to data start */
260     int
261     reload_data(float *osum, FILE *fp)
262     {
263     long dstart;
264    
265     if (!osum | !fp)
266     return(0);
267     if ((dstart = ftell(fp)) < 0) {
268     fprintf(stderr, "%s: ftell() error in reload_data()\n",
269     gargv[0]);
270     return(0);
271     }
272 greg 2.11 if (out_type == DTfloat) {
273 greg 2.9 if (fread(osum, sizeof(float)*ncomp, (size_t)xres*yres, fp) !=
274     (size_t)xres*yres) {
275     fprintf(stderr, "%s: fread() error\n", gargv[0]);
276     return(0);
277     }
278     } else {
279     int y;
280     for (y = 0; y < yres; y++, osum += ncomp*xres)
281     if (freadsscan(osum, ncomp, xres, fp) < 0) {
282     fprintf(stderr, "%s: freadsscan() error\n", gargv[0]);
283     return(0);
284     }
285     }
286     if (fseek(fp, dstart, SEEK_SET) < 0) {
287     fprintf(stderr, "%s: fseek() error in reload_data()\n",
288     gargv[0]);
289     return(0);
290     }
291     return(1);
292     }
293    
294 greg 2.1 /* open output file or command (if !NULL) and write info header */
295     FILE *
296     open_output(char *ospec, int fno)
297     {
298     FILE *fp;
299    
300     if (!ospec) {
301     ospec = "<stdout>";
302     fp = stdout;
303     } else if (ospec[0] == '!') {
304     if (!(fp = popen(ospec+1, "w"))) {
305 greg 2.9 fprintf(stderr, "%s: cannot start: %s\n", gargv[0], ospec);
306 greg 2.1 return(NULL);
307     }
308 greg 2.3 } else if (!(fp = fopen(ospec, "w"))) {
309 greg 2.1 fprintf(stderr, "%s: cannot open for writing\n", ospec);
310     return(NULL);
311     }
312 greg 2.3 SET_FILE_BINARY(fp);
313 greg 2.1 newheader("RADIANCE", fp);
314 greg 2.2 if (cmtx->info) /* prepend matrix metadata */
315     fputs(cmtx->info, fp);
316     else
317     fputnow(fp);
318 greg 2.8 printargs(gargc, gargv, fp); /* this command */
319 greg 2.1 if (fno >= 0)
320     fprintf(fp, "FRAME=%d\n", fno);
321 greg 2.7 if (viewspec[0])
322     fputs(viewspec, fp);
323     if (pixasp[0])
324     fputs(pixasp, fp);
325 greg 2.1 switch (out_type) {
326     case DTfloat:
327     case DTdouble:
328     fprintf(fp, "NCOLS=%d\nNROWS=%d\n", xres, yres);
329     fputncomp(ncomp, fp);
330     fputendian(fp);
331     fputformat(cm_fmt_id[out_type], fp);
332     fputc('\n', fp);
333     break;
334     case DTrgbe:
335     fputformat(COLRFMT, fp);
336     fputc('\n', fp);
337     fprtresolu(xres, yres, fp);
338     break;
339     case DTxyze:
340     fputformat(CIEFMT, fp);
341     fputc('\n', fp);
342     fprtresolu(xres, yres, fp);
343     break;
344     case DTspec:
345     fputncomp(ncomp, fp);
346     fputwlsplit(cmtx->wlpart, fp);
347     fputformat(SPECFMT, fp);
348     fputc('\n', fp);
349     fprtresolu(xres, yres, fp);
350     break;
351     default:
352 greg 2.9 fprintf(stderr, "%s: unsupported output type!\n", gargv[0]);
353 greg 2.1 return(NULL);
354     }
355     if (fflush(fp) < 0) {
356     fprintf(stderr, "%s: write error\n", ospec);
357     fclose(fp);
358     return(NULL);
359     }
360     return(fp);
361     }
362    
363     /* run calculation from a single process */
364     int
365     solo_process(void)
366     {
367     float *osum = (float *)calloc((size_t)xres*yres, sizeof(float)*ncomp);
368     COLORV *iscan = (COLORV *)malloc(sizeof(COLORV)*ncomp*xres);
369     char fbuf[512];
370     int c;
371    
372     if (!osum | !iscan) {
373 greg 2.9 fprintf(stderr, "%s: annot allocate %dx%d %d-component accumulator\n",
374     gargv[0], xres, yres, ncomp);
375 greg 2.1 return(0);
376     }
377     if (sizeof(float) != sizeof(COLORV)) {
378 greg 2.9 fprintf(stderr, "%s: Code Error 1 in solo_process()\n", gargv[0]);
379 greg 2.1 return(0);
380     }
381 greg 2.9 /* run through each column/output */
382     for (c = 0; c < cmtx->ncols; c++) {
383     int rc = rowN - row0;
384 greg 2.1 FILE *fout;
385     int y;
386 greg 2.9 /* open output (load if multipass) */
387     if (out_spec) { /* file or command */
388     if (cmtx->ncols > 1 && !hasFormat(out_spec)) {
389     fprintf(stderr, "%s: sequential result must go to stdout\n",
390     gargv[0]);
391     return(0);
392     }
393     sprintf(fbuf, out_spec, c);
394     if (row0) { /* another pass -- get prev. data */
395     fout = open_iofile(fbuf, c);
396     if (!reload_data(osum, fout))
397     return(0);
398 greg 2.11 } else /* else new output (clobbers prev. file) */
399 greg 2.9 fout = open_output(fbuf, c-(cmtx->ncols==1));
400     } else { /* else stdout */
401     if ((out_type == DTfloat) & (cmtx->ncols > 1)) {
402     fprintf(stderr, "%s: float outputs must have separate destinations\n",
403     gargv[0]);
404     return(0);
405     }
406     strcpy(fbuf, "<stdout>");
407     fout = open_output(NULL, c-(cmtx->ncols==1));
408     }
409     if (!fout)
410     return(0); /* assume error was reported */
411     if (!row0 & (c > 0)) /* clear accumulator? */
412 greg 2.1 memset(osum, 0, sizeof(float)*ncomp*xres*yres);
413     while (rc-- > 0) { /* run through each input file */
414 greg 2.9 const int r = c&1 ? row0 + rc : rowN-1 - rc;
415 greg 2.1 const rmx_dtype *cval = rmx_val(cmtx, r, c);
416     FILE *finp;
417     int i, x;
418     for (i = ncomp; i--; )
419     if (cval[i] != 0) break;
420     if (i < 0) /* this coefficient is zero, skip */
421     continue;
422     sprintf(fbuf, in_spec, r);
423 greg 2.9 finp = open_iofile(fbuf, -1);
424 greg 2.1 if (!finp)
425     return(0);
426     for (y = 0; y < yres; y++) {
427     float *dst = osum + y*xres*ncomp;
428     if (in_type == DTfloat ? getbinary(iscan, sizeof(float)*ncomp,
429     xres, finp) != xres
430     : freadsscan(iscan, ncomp, xres, finp) < 0)
431     goto readerr;
432     if ((in_type == DTfloat) & iswapped)
433     swap32((char *)iscan, ncomp*xres);
434     for (x = 0; x < xres; x++, dst += ncomp)
435     for (i = ncomp; i--; )
436     dst[i] += cval[i]*iscan[x*ncomp + i];
437     }
438     fclose(finp);
439 greg 2.9 } /* write accumulated picture */
440 greg 2.1 if (out_type != DTfloat) {
441     for (y = 0; y < yres; y++)
442     if (fwritesscan(osum + (size_t)y*xres*ncomp,
443     ncomp, xres, fout) < 0)
444     goto writerr;
445     } else if (fwrite(osum, sizeof(float)*ncomp, (size_t)xres*yres, fout) !=
446     (size_t)xres*yres)
447     goto writerr;
448    
449     if (fbuf[0] == '!') {
450     if (pclose(fout) != 0) {
451 greg 2.9 fprintf(stderr, "%s: bad status from: %s\n", gargv[0], fbuf);
452 greg 2.1 return(0);
453     }
454     } else if (fout != stdout && fclose(fout) == EOF)
455     goto writerr;
456     }
457     free(osum); /* clean up on success */
458     free(iscan);
459     return(1);
460     readerr:
461     fprintf(stderr, "%s: read error\n", fbuf);
462     return(0);
463     writerr:
464     fprintf(stderr, "%s: write error\n", fbuf);
465     return(0);
466     }
467    
468 greg 2.7 #if defined(_WIN32) || defined(_WIN64)
469     #define multi_process solo_process
470     #else
471    
472 greg 2.2 /* allocate a scrambled index array of the specified length */
473     int *
474     scramble(int n)
475     {
476     int *scarr = (int *)malloc(sizeof(int)*n);
477     int i;
478    
479     if (!scarr) {
480 greg 2.9 fprintf(stderr, "%s: out of memory in scramble(%d)\n", gargv[0], n);
481 greg 2.2 exit(1);
482     }
483     for (i = n; i--; )
484     scarr[i] = i;
485     /* perform Fisher-Yates shuffle */
486     for (i = 0; i < n-1; i++) {
487     int ix = irandom(n-i) + i;
488     int ndx = scarr[i];
489     scarr[i] = scarr[ix];
490     scarr[ix] = ndx;
491     }
492     return(scarr);
493     }
494    
495 greg 2.1 /* run calculation on multiple processes, using memory maps and fork() */
496     int
497     multi_process(void)
498     {
499     int coff = nprocs;
500     int odd = 0;
501 greg 2.9 float *osum = NULL;
502     int *syarr = NULL;
503 greg 2.1 char fbuf[512];
504     int c;
505     /* sanity check */
506     if (sizeof(float) != sizeof(COLORV)) {
507 greg 2.9 fprintf(stderr, "%s: code Error 1 in multi_process()\n", gargv[0]);
508 greg 2.1 return(0);
509     }
510 greg 2.10 fflush(NULL); /* parent births helper subprocs */
511     while (--coff > 0) {
512 greg 2.1 int pid = fork();
513     if (pid < 0) {
514 greg 2.9 fprintf(stderr, "%s: fork() call failed!\n", gargv[0]);
515 greg 2.1 return(0);
516     }
517 greg 2.10 if (!pid) break; /* new child gets to work */
518 greg 2.1 }
519 greg 2.11 osum = (float *)calloc((size_t)xres*yres, sizeof(float)*ncomp);
520     if (!osum) {
521     fprintf(stderr, "%s: cannot allocate %dx%d %d-component accumulator\n",
522     gargv[0], xres, yres, ncomp);
523     return(0);
524 greg 2.1 }
525 greg 2.2 srandom(113*coff + 5669); /* randomize row access for this process */
526     syarr = scramble(yres);
527 greg 2.1 /* run through our unique set of columns */
528     for (c = coff; c < cmtx->ncols; c += nprocs) {
529 greg 2.9 int rc = rowN - row0;
530 greg 2.1 FILE *fout;
531     int y;
532 greg 2.9 /* create/load output */
533     sprintf(fbuf, out_spec, c);
534     if (row0) { /* making another pass? */
535     fout = open_iofile(fbuf, c);
536 greg 2.11 if (!reload_data(osum, fout))
537 greg 2.9 return(0);
538 greg 2.11 } else { /* else new output (clobbers prev. file) */
539 greg 2.9 fout = open_output(fbuf, c);
540     if (!fout) return(0);
541     if (c > coff) /* clear accumulator? */
542     memset(osum, 0, sizeof(float)*ncomp*xres*yres);
543     }
544 greg 2.1 while (rc-- > 0) { /* map & sum each input file */
545 greg 2.9 const int r = odd ? row0 + rc : rowN-1 - rc;
546 greg 2.1 const rmx_dtype *cval = rmx_val(cmtx, r, c);
547 greg 2.11 long dstart;
548 greg 2.9 size_t imaplen;
549 greg 2.1 void *imap;
550     FILE *finp;
551     float *dst;
552 greg 2.2 int i, x;
553 greg 2.1 for (i = ncomp; i--; )
554     if (cval[i] != 0) break;
555     if (i < 0) /* this coefficient is zero, skip */
556     continue;
557     sprintf(fbuf, in_spec, r);
558 greg 2.9 finp = open_iofile(fbuf, -1);
559 greg 2.1 if (!finp)
560     return(0);
561     dstart = ftell(finp);
562     if (dstart < 0) {
563     fprintf(stderr, "%s: ftell() failed!\n", fbuf);
564     return(0);
565     }
566     if (in_type == DTfloat && dstart%sizeof(float)) {
567     fprintf(stderr, "%s: float header misalignment\n", fbuf);
568     return(0);
569     }
570     i = in_type==DTfloat ? ncomp*(int)sizeof(float) : ncomp+1;
571 greg 2.9 imaplen = dstart + (size_t)yres*xres*i;
572     imap = mmap(NULL, imaplen, PROT_READ,
573 greg 2.1 MAP_FILE|MAP_SHARED, fileno(finp), 0);
574 greg 2.3 fclose(finp); /* will read from map (randomly) */
575 greg 2.1 if (imap == MAP_FAILED) {
576     fprintf(stderr, "%s: unable to map input file\n", fbuf);
577     return(0);
578     }
579 greg 2.2 if (in_type == DTfloat)
580     for (y = yres; y-- > 0; ) {
581     const float *fvp = (float *)((char *)imap + dstart) +
582     (size_t)ncomp*xres*syarr[y];
583     dst = osum + (size_t)ncomp*xres*syarr[y];
584     for (x = xres; x-- > 0; dst += ncomp, fvp += ncomp)
585     for (i = ncomp; i--; )
586     dst[i] += cval[i]*fvp[i];
587     }
588     else
589     for (y = yres; y-- > 0; ) {
590     const COLRV *cvp = (COLRV *)((char *)imap + dstart) +
591     (ncomp+1L)*xres*syarr[y];
592     dst = osum + (size_t)ncomp*xres*syarr[y];
593     for (x = xres; x-- > 0; dst += ncomp, cvp += ncomp+1) {
594     const rmx_dtype fe = cxponent[cvp[ncomp]];
595     for (i = ncomp; i--; )
596     dst[i] += cval[i]*(cvp[i]+(rmx_dtype).5)*fe;
597     }
598 greg 2.1 }
599 greg 2.9 munmap(imap, imaplen);
600     } /* write accumulated column picture */
601 greg 2.1 if (out_type != DTfloat) {
602     for (y = 0; y < yres; y++)
603     if (fwritesscan(osum + (size_t)y*xres*ncomp,
604     ncomp, xres, fout) < 0)
605     goto writerr;
606     } else if (fwrite(osum, sizeof(float)*ncomp, (size_t)xres*yres, fout) !=
607     (size_t)xres*yres)
608     goto writerr;
609    
610     if (fbuf[0] == '!') {
611     if (pclose(fout) != 0) {
612 greg 2.9 fprintf(stderr, "%s: bad status from: %s\n", gargv[0], fbuf);
613 greg 2.1 return(0);
614     }
615     } else if (fclose(fout) == EOF)
616     goto writerr;
617     odd = !odd; /* go back & forth to milk page cache */
618     }
619 greg 2.10 if (coff) _exit(0); /* child exits here */
620     /* but parent waits for children */
621 greg 2.11 free(osum);
622 greg 2.2 free(syarr);
623 greg 2.10 c = 0;
624 greg 2.1 while (++coff < nprocs) {
625     int st;
626 greg 2.9 if (wait(&st) < 0) {
627 greg 2.12 fprintf(stderr, "%s: warning - child disappeared\n", gargv[0]);
628 greg 2.1 break;
629 greg 2.9 }
630 greg 2.12 if (st) {
631     fprintf(stderr, "%s: bad exit status from child\n", gargv[0]);
632     c = st;
633     }
634 greg 2.1 }
635     return(c == 0);
636     writerr:
637     fprintf(stderr, "%s: write error\n", fbuf);
638     return(0);
639     }
640    
641 greg 2.7 #endif /* ! Windows */
642    
643 greg 2.1 int
644     main(int argc, char *argv[])
645     {
646 greg 2.9 double cacheGB = 0;
647     int rintvl;
648 greg 2.1 int a;
649    
650 greg 2.8 gargc = argc; /* for header output */
651     gargv = argv;
652    
653 greg 2.1 for (a = 1; a < argc-1 && argv[a][0] == '-'; a++)
654     switch (argv[a][1]) {
655     case 'o': /* output spec/format */
656     switch (argv[a][2]) {
657     case '\0':
658     out_spec = argv[++a];
659     break;
660     case 'f':
661     out_type = DTfloat;
662     break;
663     case 'c':
664     out_type = DTrgbe;
665     break;
666     default:
667     goto badopt;
668     }
669     break;
670 greg 2.9 case 'm': /* cache size in GigaBytes */
671     cacheGB = atof(argv[++a]);
672     break;
673 greg 2.1 case 'N': /* number of desired processes */
674     case 'n': /* quietly supported alternate */
675     nprocs = atoi(argv[++a]);
676     if (nprocs <= 0)
677     goto userr;
678     break;
679     default:;
680     badopt: fprintf(stderr, "%s: bad option: %s\n", argv[0], argv[a]);
681     goto userr;
682     return(1);
683     }
684     if ((argc-a < 1) | (argc-a > 2) || argv[a][0] == '-')
685     goto userr;
686     in_spec = argv[a];
687 greg 2.4 cmtx = rmx_load(argv[a+1]); /* loads from stdin if a+1==argc */
688 greg 2.1 if (cmtx == NULL)
689     return(1); /* error reported */
690 greg 2.9 cacheGB *= (cmtx->ncols > 1);
691     if (cacheGB > 0 && (!out_spec || *out_spec == '!')) {
692     fprintf(stderr, "%s: -m option incompatible with output to %s\n",
693     argv[0], out_spec ? "command" : "stdout");
694     return(1);
695     }
696 greg 2.3 if (nprocs > cmtx->ncols)
697     nprocs = cmtx->ncols;
698 greg 2.1 #if defined(_WIN32) || defined(_WIN64)
699     if (nprocs > 1) {
700     fprintf(stderr, "%s: warning - Windows only allows -N 1\n", argv[0]);
701     nprocs = 1;
702     }
703     #else
704     if ((nprocs > 1) & !out_spec) {
705     fprintf(stderr, "%s: multi-processing result cannot go to stdout\n",
706     argv[0]);
707     nprocs = 1;
708     }
709     if ((nprocs > 1) & iswapped && (in_type==DTfloat) | (in_type==DTdouble)) {
710     fprintf(stderr, "%s: multi-processing unsupported on swapped input\n",
711     argv[0]);
712     nprocs = 1;
713     }
714     #endif
715     if (cmtx->nrows > 1 && !hasFormat(in_spec)) {
716     fprintf(stderr, "%s: input specification '%s' needs %%d format\n",
717     argv[0], in_spec);
718     goto userr;
719     }
720     if (!get_iotypes())
721     return(1);
722 greg 2.9 if (cacheGB > 1e-4) { /* figure out # of passes => rintvl */
723     size_t inp_bytes = (in_type==DTfloat ? sizeof(float)*ncomp
724     : (size_t)(ncomp+1)) * xres*yres;
725 greg 2.13 size_t over_bytes = rmx_array_size(cmtx) +
726     sizeof(float)*ncomp*xres*yres +
727 greg 2.12 2*(out_type==DTfloat ? sizeof(float)*ncomp
728     : (size_t)(ncomp+1)) * xres*yres;
729 greg 2.9 int npasses = (double)inp_bytes*cmtx->nrows /
730 greg 2.12 (cacheGB*(1L<<30) - (double)over_bytes*nprocs) + 1;
731     if ((npasses <= 0) | (npasses*6 >= cmtx->nrows)) {
732     fprintf(stderr,
733     "%s: warning - insufficient cache space for multi-pass\n",
734     argv[0]);
735     npasses = 1;
736     }
737 greg 2.9 rintvl = cmtx->nrows / npasses;
738     rintvl += (rintvl*npasses < cmtx->nrows);
739     } else
740     rintvl = cmtx->nrows;
741 greg 2.12 /* make our output accumulation passes */
742 greg 2.9 for (row0 = 0; row0 < cmtx->nrows; row0 += rintvl) {
743     if ((rowN = row0 + rintvl) > cmtx->nrows)
744     rowN = cmtx->nrows;
745     if (nprocs==1 ? !solo_process() : !multi_process())
746     return(1);
747     }
748 greg 2.1 return(0);
749     userr:
750 greg 2.9 fprintf(stderr, "Usage: %s [-oc | -of][-o ospec][-N nproc][-m cacheGB] inpspec [mtx]\n",
751 greg 2.1 argv[0]);
752     return(1);
753     }