ViewVC Help
View File | Revision Log | Show Annotations | Download File | Root Listing
root/Development/ray/src/util/pvsum.c
Revision: 2.13
Committed: Thu Oct 30 20:54:27 2025 UTC (8 days, 21 hours ago) by greg
Content type: text/plain
Branch: MAIN
CVS Tags: HEAD
Changes since 2.12: +3 -2 lines
Log Message:
perf(pvsum): Forgot to include size of in-core matrix in overhead

File Contents

# Content
1 #ifndef lint
2 static const char RCSid[] = "$Id: pvsum.c,v 2.12 2025/10/30 16:47:13 greg Exp $";
3 #endif
4 /*
5 * pvsum.c - add together spectral and/or float pictures
6 * based on vector or matrix, similar to dctimestep
7 */
8
9 #include <math.h>
10 #include "rtio.h"
11 #include "resolu.h"
12 #include "platform.h"
13 #include "random.h"
14 #include "rmatrix.h"
15 #if !defined(_WIN32) && !defined(_WIN64)
16 #include <sys/mman.h>
17 #include <sys/wait.h>
18 #endif
19
20 #define VIEWSTR "VIEW=" /* borrowed from common/view.h */
21 #define VIEWSTRL 5
22
23 int nprocs = 1; /* # of calculation processes (Unix) */
24 int in_type = DTfromHeader; /* input data type */
25 int out_type = DTfromHeader; /* output data type */
26 char *in_spec = NULL; /* input specification */
27 char *out_spec = NULL; /* output file specification */
28
29 int iswapped = 0; /* input data is byte-swapped? */
30 int ncomp = 3; /* # input components */
31 int xres=0, yres=0; /* input image dimensions */
32 char viewspec[128] = ""; /* VIEW= line from first header */
33 char pixasp[48] = ""; /* PIXASPECT= line from header */
34
35 int gargc; /* global argc */
36 char **gargv; /* global argv */
37
38 RMATRIX *cmtx; /* coefficient matrix */
39 int row0, rowN; /* rows for current pass */
40
41 /* does the given spec contain integer format? */
42 int
43 hasFormat(const char *s)
44 {
45 restart:
46 if (s) s = strchr(s, '%');
47 if (!s) return(0);
48 if (s[1] == '%') { /* "%%" ? */
49 s += 2;
50 goto restart;
51 }
52 while (*++s) {
53 if (strchr("diouxX", *s))
54 return(1); /* integer format */
55 if (strchr("%fFeEgGaAcsb", *s))
56 break; /* non-integer format */
57 }
58 return(0);
59 }
60
61 /* get first input header data we'll need */
62 int
63 iheadline(char *s, void *p)
64 {
65 char fmt[MAXFMTLEN];
66 int i;
67
68 if (!strncmp(s, "NCOLS=", 6)) {
69 xres = atoi(s+6);
70 return(1);
71 }
72 if (!strncmp(s, "NROWS=", 6)) {
73 yres = atoi(s+6);
74 return(1);
75 }
76 if (isncomp(s)) {
77 ncomp = ncompval(s);
78 return(1);
79 }
80 if (formatval(fmt, s)) {
81 for (in_type = DTend; --in_type > DTfromHeader; )
82 if (!strcmp(fmt, cm_fmt_id[in_type]))
83 return(1);
84 return(-1);
85 }
86 i = isbigendian(s);
87 if (i >= 0) {
88 iswapped = (i != nativebigendian());
89 return(1);
90 }
91 if (!strncmp(s, VIEWSTR, VIEWSTRL)) {
92 strcpy(viewspec, s);
93 return(1);
94 }
95 if (isaspect(s)) {
96 strcpy(pixasp, s);
97 return(1);
98 }
99 return(0);
100 }
101
102 /* open initial file and get relevant dimensions and type */
103 int
104 get_iotypes(void)
105 {
106 char fbuf[256];
107 FILE *fp;
108
109 sprintf(fbuf, in_spec, 0);
110 fp = fopen(fbuf, "rb");
111 if (!fp) {
112 fprintf(stderr, "%s: cannot open for reading\n", fbuf);
113 return(0);
114 }
115 if (getheader(fp, iheadline, NULL) < 0) {
116 fprintf(stderr, "%s: bad header - wrong format?\n", fbuf);
117 fclose(fp);
118 return(0);
119 }
120 if ((xres <= 0) | (yres <= 0) && !fscnresolu(&xres, &yres, fp)) {
121 fprintf(stderr, "%s: missing input resolution\n", fbuf);
122 fclose(fp);
123 return(0);
124 }
125 if (nprocs > 1 && (in_type==DTrgbe) | (in_type==DTxyze)) {
126 long data_start = ftell(fp); /* make sure input flat */
127 off_t dend = lseek(fileno(fp), 0, SEEK_END);
128 if (dend < data_start + 4L*xres*yres) {
129 fprintf(stderr, "%s: warning - multi-processing requires flat input files\n",
130 gargv[0]);
131 nprocs = 1;
132 }
133 }
134 fclose(fp);
135 if ((cmtx->ncomp == 1) & (ncomp != 1)) {
136 double xfm[MAXCSAMP];
137 RMATRIX *nmtx;
138 int i;
139 for (i = ncomp; i--; )
140 xfm[i] = 1.;
141 nmtx = rmx_transform(cmtx, ncomp, xfm);
142 if (!nmtx)
143 return(0);
144 rmx_free(cmtx);
145 cmtx = nmtx;
146 } else if (cmtx->ncomp != ncomp) {
147 fprintf(stderr, "%s: operation %s needs %d components, has %d\n",
148 gargv[0], cmtx->ncols == 1 ? "vector" : "matrix",
149 ncomp, cmtx->ncomp);
150 return(0);
151 }
152 if ((in_type != DTrgbe) & (in_type != DTxyze) & (in_type != DTspec) &
153 (in_type != DTfloat)) {
154 fprintf(stderr, "%s: unsupported input data type '%s'\n",
155 fbuf, cm_fmt_id[in_type]);
156 return(0);
157 }
158 if ((out_type == DTrgbe) & (ncomp > 3))
159 out_type = DTspec;
160 else if (out_type == DTfromHeader ||
161 (out_type == DTrgbe) & (in_type != DTfloat))
162 out_type = in_type;
163 return(1);
164 }
165
166 struct hdata {
167 int xr, yr; /* resolution */
168 int fno; /* frame # */
169 char fmt[MAXFMTLEN]; /* format */
170 };
171
172 /* check subsequent headers match initial file */
173 int
174 checkline(char *s, void *p)
175 {
176 static int exposWarned = 0;
177 struct hdata *hp = (struct hdata *)p;
178
179 if (!strncmp(s, "NCOLS=", 6)) {
180 hp->xr = atoi(s+6);
181 if (hp->xr <= 0)
182 return(-1);
183 return(1);
184 }
185 if (!strncmp(s, "NROWS=", 6)) {
186 hp->yr = atoi(s+6);
187 if (hp->yr <= 0)
188 return(-1);
189 return(1);
190 }
191 if (!strncmp(s, "FRAME=", 6)) {
192 hp->fno = atoi(s+6);
193 return(1);
194 }
195 if (isncomp(s)) {
196 if (ncompval(s) != ncomp)
197 return(-1);
198 return(1);
199 }
200 if (isexpos(s)) {
201 if (!exposWarned && fabs(1. - exposval(s)) > 0.04) {
202 fprintf(stderr, "%s: warning - ignoring EXPOSURE setting(s)\n",
203 gargv[0]);
204 exposWarned++;
205 }
206 return(1);
207 }
208 if (formatval(hp->fmt, s))
209 return(1);
210
211 return(0);
212 }
213
214 /* open and check input/output file, read/write mode if fno >= 0 */
215 FILE *
216 open_iofile(char *fname, int fno)
217 {
218 struct hdata hd;
219 FILE *fp = fopen(fname, fno>=0 ? "r+b" : "rb");
220
221 if (!fp) {
222 fprintf(stderr, "%s: cannot open for reading%s\n",
223 fname, fno>=0 ? "/writing" : "");
224 return(NULL);
225 }
226 hd.xr = hd.yr = 0;
227 hd.fno = -1;
228 hd.fmt[0] = '\0';
229 if (getheader(fp, checkline, &hd) < 0) {
230 fprintf(stderr, "%s: bad/inconsistent header\n", fname);
231 fclose(fp);
232 return(NULL);
233 }
234 if ((hd.fno >= 0) & (fno >= 0) & (hd.fno != fno)) {
235 fprintf(stderr, "%s: unexpected frame number (%d != %d)\n",
236 fname, hd.fno, fno);
237 fclose(fp);
238 return(NULL);
239 }
240 if (strcmp(hd.fmt, cm_fmt_id[fno>=0 ? out_type : in_type])) {
241 fprintf(stderr, "%s: wrong format\n", fname);
242 fclose(fp);
243 return(NULL);
244 }
245 if ((hd.xr <= 0) | (hd.yr <= 0) &&
246 !fscnresolu(&hd.xr, &hd.yr, fp)) {
247 fprintf(stderr, "%s: missing resolution\n", fname);
248 fclose(fp);
249 return(NULL);
250 }
251 if ((hd.xr != xres) | (hd.yr != yres)) {
252 fprintf(stderr, "%s: mismatched resolution\n", fname);
253 fclose(fp);
254 return(NULL);
255 }
256 return(fp);
257 }
258
259 /* read in previous pixel data from output and rewind to data start */
260 int
261 reload_data(float *osum, FILE *fp)
262 {
263 long dstart;
264
265 if (!osum | !fp)
266 return(0);
267 if ((dstart = ftell(fp)) < 0) {
268 fprintf(stderr, "%s: ftell() error in reload_data()\n",
269 gargv[0]);
270 return(0);
271 }
272 if (out_type == DTfloat) {
273 if (fread(osum, sizeof(float)*ncomp, (size_t)xres*yres, fp) !=
274 (size_t)xres*yres) {
275 fprintf(stderr, "%s: fread() error\n", gargv[0]);
276 return(0);
277 }
278 } else {
279 int y;
280 for (y = 0; y < yres; y++, osum += ncomp*xres)
281 if (freadsscan(osum, ncomp, xres, fp) < 0) {
282 fprintf(stderr, "%s: freadsscan() error\n", gargv[0]);
283 return(0);
284 }
285 }
286 if (fseek(fp, dstart, SEEK_SET) < 0) {
287 fprintf(stderr, "%s: fseek() error in reload_data()\n",
288 gargv[0]);
289 return(0);
290 }
291 return(1);
292 }
293
294 /* open output file or command (if !NULL) and write info header */
295 FILE *
296 open_output(char *ospec, int fno)
297 {
298 FILE *fp;
299
300 if (!ospec) {
301 ospec = "<stdout>";
302 fp = stdout;
303 } else if (ospec[0] == '!') {
304 if (!(fp = popen(ospec+1, "w"))) {
305 fprintf(stderr, "%s: cannot start: %s\n", gargv[0], ospec);
306 return(NULL);
307 }
308 } else if (!(fp = fopen(ospec, "w"))) {
309 fprintf(stderr, "%s: cannot open for writing\n", ospec);
310 return(NULL);
311 }
312 SET_FILE_BINARY(fp);
313 newheader("RADIANCE", fp);
314 if (cmtx->info) /* prepend matrix metadata */
315 fputs(cmtx->info, fp);
316 else
317 fputnow(fp);
318 printargs(gargc, gargv, fp); /* this command */
319 if (fno >= 0)
320 fprintf(fp, "FRAME=%d\n", fno);
321 if (viewspec[0])
322 fputs(viewspec, fp);
323 if (pixasp[0])
324 fputs(pixasp, fp);
325 switch (out_type) {
326 case DTfloat:
327 case DTdouble:
328 fprintf(fp, "NCOLS=%d\nNROWS=%d\n", xres, yres);
329 fputncomp(ncomp, fp);
330 fputendian(fp);
331 fputformat(cm_fmt_id[out_type], fp);
332 fputc('\n', fp);
333 break;
334 case DTrgbe:
335 fputformat(COLRFMT, fp);
336 fputc('\n', fp);
337 fprtresolu(xres, yres, fp);
338 break;
339 case DTxyze:
340 fputformat(CIEFMT, fp);
341 fputc('\n', fp);
342 fprtresolu(xres, yres, fp);
343 break;
344 case DTspec:
345 fputncomp(ncomp, fp);
346 fputwlsplit(cmtx->wlpart, fp);
347 fputformat(SPECFMT, fp);
348 fputc('\n', fp);
349 fprtresolu(xres, yres, fp);
350 break;
351 default:
352 fprintf(stderr, "%s: unsupported output type!\n", gargv[0]);
353 return(NULL);
354 }
355 if (fflush(fp) < 0) {
356 fprintf(stderr, "%s: write error\n", ospec);
357 fclose(fp);
358 return(NULL);
359 }
360 return(fp);
361 }
362
363 /* run calculation from a single process */
364 int
365 solo_process(void)
366 {
367 float *osum = (float *)calloc((size_t)xres*yres, sizeof(float)*ncomp);
368 COLORV *iscan = (COLORV *)malloc(sizeof(COLORV)*ncomp*xres);
369 char fbuf[512];
370 int c;
371
372 if (!osum | !iscan) {
373 fprintf(stderr, "%s: annot allocate %dx%d %d-component accumulator\n",
374 gargv[0], xres, yres, ncomp);
375 return(0);
376 }
377 if (sizeof(float) != sizeof(COLORV)) {
378 fprintf(stderr, "%s: Code Error 1 in solo_process()\n", gargv[0]);
379 return(0);
380 }
381 /* run through each column/output */
382 for (c = 0; c < cmtx->ncols; c++) {
383 int rc = rowN - row0;
384 FILE *fout;
385 int y;
386 /* open output (load if multipass) */
387 if (out_spec) { /* file or command */
388 if (cmtx->ncols > 1 && !hasFormat(out_spec)) {
389 fprintf(stderr, "%s: sequential result must go to stdout\n",
390 gargv[0]);
391 return(0);
392 }
393 sprintf(fbuf, out_spec, c);
394 if (row0) { /* another pass -- get prev. data */
395 fout = open_iofile(fbuf, c);
396 if (!reload_data(osum, fout))
397 return(0);
398 } else /* else new output (clobbers prev. file) */
399 fout = open_output(fbuf, c-(cmtx->ncols==1));
400 } else { /* else stdout */
401 if ((out_type == DTfloat) & (cmtx->ncols > 1)) {
402 fprintf(stderr, "%s: float outputs must have separate destinations\n",
403 gargv[0]);
404 return(0);
405 }
406 strcpy(fbuf, "<stdout>");
407 fout = open_output(NULL, c-(cmtx->ncols==1));
408 }
409 if (!fout)
410 return(0); /* assume error was reported */
411 if (!row0 & (c > 0)) /* clear accumulator? */
412 memset(osum, 0, sizeof(float)*ncomp*xres*yres);
413 while (rc-- > 0) { /* run through each input file */
414 const int r = c&1 ? row0 + rc : rowN-1 - rc;
415 const rmx_dtype *cval = rmx_val(cmtx, r, c);
416 FILE *finp;
417 int i, x;
418 for (i = ncomp; i--; )
419 if (cval[i] != 0) break;
420 if (i < 0) /* this coefficient is zero, skip */
421 continue;
422 sprintf(fbuf, in_spec, r);
423 finp = open_iofile(fbuf, -1);
424 if (!finp)
425 return(0);
426 for (y = 0; y < yres; y++) {
427 float *dst = osum + y*xres*ncomp;
428 if (in_type == DTfloat ? getbinary(iscan, sizeof(float)*ncomp,
429 xres, finp) != xres
430 : freadsscan(iscan, ncomp, xres, finp) < 0)
431 goto readerr;
432 if ((in_type == DTfloat) & iswapped)
433 swap32((char *)iscan, ncomp*xres);
434 for (x = 0; x < xres; x++, dst += ncomp)
435 for (i = ncomp; i--; )
436 dst[i] += cval[i]*iscan[x*ncomp + i];
437 }
438 fclose(finp);
439 } /* write accumulated picture */
440 if (out_type != DTfloat) {
441 for (y = 0; y < yres; y++)
442 if (fwritesscan(osum + (size_t)y*xres*ncomp,
443 ncomp, xres, fout) < 0)
444 goto writerr;
445 } else if (fwrite(osum, sizeof(float)*ncomp, (size_t)xres*yres, fout) !=
446 (size_t)xres*yres)
447 goto writerr;
448
449 if (fbuf[0] == '!') {
450 if (pclose(fout) != 0) {
451 fprintf(stderr, "%s: bad status from: %s\n", gargv[0], fbuf);
452 return(0);
453 }
454 } else if (fout != stdout && fclose(fout) == EOF)
455 goto writerr;
456 }
457 free(osum); /* clean up on success */
458 free(iscan);
459 return(1);
460 readerr:
461 fprintf(stderr, "%s: read error\n", fbuf);
462 return(0);
463 writerr:
464 fprintf(stderr, "%s: write error\n", fbuf);
465 return(0);
466 }
467
468 #if defined(_WIN32) || defined(_WIN64)
469 #define multi_process solo_process
470 #else
471
472 /* allocate a scrambled index array of the specified length */
473 int *
474 scramble(int n)
475 {
476 int *scarr = (int *)malloc(sizeof(int)*n);
477 int i;
478
479 if (!scarr) {
480 fprintf(stderr, "%s: out of memory in scramble(%d)\n", gargv[0], n);
481 exit(1);
482 }
483 for (i = n; i--; )
484 scarr[i] = i;
485 /* perform Fisher-Yates shuffle */
486 for (i = 0; i < n-1; i++) {
487 int ix = irandom(n-i) + i;
488 int ndx = scarr[i];
489 scarr[i] = scarr[ix];
490 scarr[ix] = ndx;
491 }
492 return(scarr);
493 }
494
495 /* run calculation on multiple processes, using memory maps and fork() */
496 int
497 multi_process(void)
498 {
499 int coff = nprocs;
500 int odd = 0;
501 float *osum = NULL;
502 int *syarr = NULL;
503 char fbuf[512];
504 int c;
505 /* sanity check */
506 if (sizeof(float) != sizeof(COLORV)) {
507 fprintf(stderr, "%s: code Error 1 in multi_process()\n", gargv[0]);
508 return(0);
509 }
510 fflush(NULL); /* parent births helper subprocs */
511 while (--coff > 0) {
512 int pid = fork();
513 if (pid < 0) {
514 fprintf(stderr, "%s: fork() call failed!\n", gargv[0]);
515 return(0);
516 }
517 if (!pid) break; /* new child gets to work */
518 }
519 osum = (float *)calloc((size_t)xres*yres, sizeof(float)*ncomp);
520 if (!osum) {
521 fprintf(stderr, "%s: cannot allocate %dx%d %d-component accumulator\n",
522 gargv[0], xres, yres, ncomp);
523 return(0);
524 }
525 srandom(113*coff + 5669); /* randomize row access for this process */
526 syarr = scramble(yres);
527 /* run through our unique set of columns */
528 for (c = coff; c < cmtx->ncols; c += nprocs) {
529 int rc = rowN - row0;
530 FILE *fout;
531 int y;
532 /* create/load output */
533 sprintf(fbuf, out_spec, c);
534 if (row0) { /* making another pass? */
535 fout = open_iofile(fbuf, c);
536 if (!reload_data(osum, fout))
537 return(0);
538 } else { /* else new output (clobbers prev. file) */
539 fout = open_output(fbuf, c);
540 if (!fout) return(0);
541 if (c > coff) /* clear accumulator? */
542 memset(osum, 0, sizeof(float)*ncomp*xres*yres);
543 }
544 while (rc-- > 0) { /* map & sum each input file */
545 const int r = odd ? row0 + rc : rowN-1 - rc;
546 const rmx_dtype *cval = rmx_val(cmtx, r, c);
547 long dstart;
548 size_t imaplen;
549 void *imap;
550 FILE *finp;
551 float *dst;
552 int i, x;
553 for (i = ncomp; i--; )
554 if (cval[i] != 0) break;
555 if (i < 0) /* this coefficient is zero, skip */
556 continue;
557 sprintf(fbuf, in_spec, r);
558 finp = open_iofile(fbuf, -1);
559 if (!finp)
560 return(0);
561 dstart = ftell(finp);
562 if (dstart < 0) {
563 fprintf(stderr, "%s: ftell() failed!\n", fbuf);
564 return(0);
565 }
566 if (in_type == DTfloat && dstart%sizeof(float)) {
567 fprintf(stderr, "%s: float header misalignment\n", fbuf);
568 return(0);
569 }
570 i = in_type==DTfloat ? ncomp*(int)sizeof(float) : ncomp+1;
571 imaplen = dstart + (size_t)yres*xres*i;
572 imap = mmap(NULL, imaplen, PROT_READ,
573 MAP_FILE|MAP_SHARED, fileno(finp), 0);
574 fclose(finp); /* will read from map (randomly) */
575 if (imap == MAP_FAILED) {
576 fprintf(stderr, "%s: unable to map input file\n", fbuf);
577 return(0);
578 }
579 if (in_type == DTfloat)
580 for (y = yres; y-- > 0; ) {
581 const float *fvp = (float *)((char *)imap + dstart) +
582 (size_t)ncomp*xres*syarr[y];
583 dst = osum + (size_t)ncomp*xres*syarr[y];
584 for (x = xres; x-- > 0; dst += ncomp, fvp += ncomp)
585 for (i = ncomp; i--; )
586 dst[i] += cval[i]*fvp[i];
587 }
588 else
589 for (y = yres; y-- > 0; ) {
590 const COLRV *cvp = (COLRV *)((char *)imap + dstart) +
591 (ncomp+1L)*xres*syarr[y];
592 dst = osum + (size_t)ncomp*xres*syarr[y];
593 for (x = xres; x-- > 0; dst += ncomp, cvp += ncomp+1) {
594 const rmx_dtype fe = cxponent[cvp[ncomp]];
595 for (i = ncomp; i--; )
596 dst[i] += cval[i]*(cvp[i]+(rmx_dtype).5)*fe;
597 }
598 }
599 munmap(imap, imaplen);
600 } /* write accumulated column picture */
601 if (out_type != DTfloat) {
602 for (y = 0; y < yres; y++)
603 if (fwritesscan(osum + (size_t)y*xres*ncomp,
604 ncomp, xres, fout) < 0)
605 goto writerr;
606 } else if (fwrite(osum, sizeof(float)*ncomp, (size_t)xres*yres, fout) !=
607 (size_t)xres*yres)
608 goto writerr;
609
610 if (fbuf[0] == '!') {
611 if (pclose(fout) != 0) {
612 fprintf(stderr, "%s: bad status from: %s\n", gargv[0], fbuf);
613 return(0);
614 }
615 } else if (fclose(fout) == EOF)
616 goto writerr;
617 odd = !odd; /* go back & forth to milk page cache */
618 }
619 if (coff) _exit(0); /* child exits here */
620 /* but parent waits for children */
621 free(osum);
622 free(syarr);
623 c = 0;
624 while (++coff < nprocs) {
625 int st;
626 if (wait(&st) < 0) {
627 fprintf(stderr, "%s: warning - child disappeared\n", gargv[0]);
628 break;
629 }
630 if (st) {
631 fprintf(stderr, "%s: bad exit status from child\n", gargv[0]);
632 c = st;
633 }
634 }
635 return(c == 0);
636 writerr:
637 fprintf(stderr, "%s: write error\n", fbuf);
638 return(0);
639 }
640
641 #endif /* ! Windows */
642
643 int
644 main(int argc, char *argv[])
645 {
646 double cacheGB = 0;
647 int rintvl;
648 int a;
649
650 gargc = argc; /* for header output */
651 gargv = argv;
652
653 for (a = 1; a < argc-1 && argv[a][0] == '-'; a++)
654 switch (argv[a][1]) {
655 case 'o': /* output spec/format */
656 switch (argv[a][2]) {
657 case '\0':
658 out_spec = argv[++a];
659 break;
660 case 'f':
661 out_type = DTfloat;
662 break;
663 case 'c':
664 out_type = DTrgbe;
665 break;
666 default:
667 goto badopt;
668 }
669 break;
670 case 'm': /* cache size in GigaBytes */
671 cacheGB = atof(argv[++a]);
672 break;
673 case 'N': /* number of desired processes */
674 case 'n': /* quietly supported alternate */
675 nprocs = atoi(argv[++a]);
676 if (nprocs <= 0)
677 goto userr;
678 break;
679 default:;
680 badopt: fprintf(stderr, "%s: bad option: %s\n", argv[0], argv[a]);
681 goto userr;
682 return(1);
683 }
684 if ((argc-a < 1) | (argc-a > 2) || argv[a][0] == '-')
685 goto userr;
686 in_spec = argv[a];
687 cmtx = rmx_load(argv[a+1]); /* loads from stdin if a+1==argc */
688 if (cmtx == NULL)
689 return(1); /* error reported */
690 cacheGB *= (cmtx->ncols > 1);
691 if (cacheGB > 0 && (!out_spec || *out_spec == '!')) {
692 fprintf(stderr, "%s: -m option incompatible with output to %s\n",
693 argv[0], out_spec ? "command" : "stdout");
694 return(1);
695 }
696 if (nprocs > cmtx->ncols)
697 nprocs = cmtx->ncols;
698 #if defined(_WIN32) || defined(_WIN64)
699 if (nprocs > 1) {
700 fprintf(stderr, "%s: warning - Windows only allows -N 1\n", argv[0]);
701 nprocs = 1;
702 }
703 #else
704 if ((nprocs > 1) & !out_spec) {
705 fprintf(stderr, "%s: multi-processing result cannot go to stdout\n",
706 argv[0]);
707 nprocs = 1;
708 }
709 if ((nprocs > 1) & iswapped && (in_type==DTfloat) | (in_type==DTdouble)) {
710 fprintf(stderr, "%s: multi-processing unsupported on swapped input\n",
711 argv[0]);
712 nprocs = 1;
713 }
714 #endif
715 if (cmtx->nrows > 1 && !hasFormat(in_spec)) {
716 fprintf(stderr, "%s: input specification '%s' needs %%d format\n",
717 argv[0], in_spec);
718 goto userr;
719 }
720 if (!get_iotypes())
721 return(1);
722 if (cacheGB > 1e-4) { /* figure out # of passes => rintvl */
723 size_t inp_bytes = (in_type==DTfloat ? sizeof(float)*ncomp
724 : (size_t)(ncomp+1)) * xres*yres;
725 size_t over_bytes = rmx_array_size(cmtx) +
726 sizeof(float)*ncomp*xres*yres +
727 2*(out_type==DTfloat ? sizeof(float)*ncomp
728 : (size_t)(ncomp+1)) * xres*yres;
729 int npasses = (double)inp_bytes*cmtx->nrows /
730 (cacheGB*(1L<<30) - (double)over_bytes*nprocs) + 1;
731 if ((npasses <= 0) | (npasses*6 >= cmtx->nrows)) {
732 fprintf(stderr,
733 "%s: warning - insufficient cache space for multi-pass\n",
734 argv[0]);
735 npasses = 1;
736 }
737 rintvl = cmtx->nrows / npasses;
738 rintvl += (rintvl*npasses < cmtx->nrows);
739 } else
740 rintvl = cmtx->nrows;
741 /* make our output accumulation passes */
742 for (row0 = 0; row0 < cmtx->nrows; row0 += rintvl) {
743 if ((rowN = row0 + rintvl) > cmtx->nrows)
744 rowN = cmtx->nrows;
745 if (nprocs==1 ? !solo_process() : !multi_process())
746 return(1);
747 }
748 return(0);
749 userr:
750 fprintf(stderr, "Usage: %s [-oc | -of][-o ospec][-N nproc][-m cacheGB] inpspec [mtx]\n",
751 argv[0]);
752 return(1);
753 }