ViewVC Help
View File | Revision Log | Show Annotations | Download File | Root Listing
root/radiance/ray/src/util/pvsum.c
Revision: 2.2
Committed: Thu Mar 27 16:35:00 2025 UTC (5 weeks, 1 day ago) by greg
Content type: text/plain
Branch: MAIN
Changes since 2.1: +55 -20 lines
Log Message:
perf(pvsum): Randomized row access in child processes to avoid simultaneous page faults

File Contents

# Content
1 #ifndef lint
2 static const char RCSid[] = "$Id: pvsum.c,v 2.1 2025/03/27 01:26:55 greg Exp $";
3 #endif
4 /*
5 * pvsum.c - add together spectral and/or float pictures
6 * based on vector or matrix, similar to dctimestep
7 */
8
9 #include <math.h>
10 #include "rtio.h"
11 #include "resolu.h"
12 #include "platform.h"
13 #include "paths.h"
14 #include "random.h"
15 #include "rmatrix.h"
16 #if !defined(_WIN32) && !defined(_WIN64)
17 #include <sys/mman.h>
18 #endif
19
20 int nprocs = 1; /* # of calculation processes (Unix) */
21 int in_type = DTfromHeader; /* input data type */
22 int out_type = DTfromHeader; /* output data type */
23 char *in_spec = NULL; /* input specification */
24 char *out_spec = NULL; /* output file specification */
25
26 int iswapped = 0; /* input data is byte-swapped? */
27 int ncomp = 3; /* # input components */
28 int xres=0, yres=0; /* input image dimensions */
29
30 RMATRIX *cmtx = NULL; /* coefficient matrix */
31
32 /* does the given spec contain integer format? */
33 int
34 hasFormat(const char *s)
35 {
36 restart:
37 if (s) s = strchr(s, '%');
38 if (!s) return(0);
39 if (s[1] == '%') { /* "%%" ? */
40 s += 2;
41 goto restart;
42 }
43 while (*++s) {
44 if (strchr("diouxX", *s))
45 return(1); /* integer format */
46 if (strchr("%fFeEgGaAcsb", *s))
47 break; /* non-integer format */
48 }
49 return(0);
50 }
51
52 /* get first input header data we'll need */
53 int
54 iheadline(char *s, void *p)
55 {
56 char fmt[MAXFMTLEN];
57 int i;
58
59 if (!strncmp(s, "NCOLS=", 6)) {
60 xres = atoi(s+6);
61 return(1);
62 }
63 if (!strncmp(s, "NROWS=", 6)) {
64 yres = atoi(s+6);
65 return(1);
66 }
67 if (isncomp(s)) {
68 ncomp = ncompval(s);
69 return(1);
70 }
71 if (isexpos(s)) {
72 if (fabs(1. - exposval(s)) > 0.04)
73 fputs("Warning - ignoring EXPOSURE setting\n", stderr);
74 return(1);
75 }
76 if (formatval(fmt, s)) {
77 for (in_type = DTend; --in_type > DTfromHeader; )
78 if (!strcmp(fmt, cm_fmt_id[in_type]))
79 return(1);
80 return(-1);
81 }
82 i = isbigendian(s);
83 if (i >= 0) {
84 iswapped = (i != nativebigendian());
85 return(1);
86 }
87 return(0);
88 }
89
90 /* open initial file and get relevant dimensions and type */
91 int
92 get_iotypes(void)
93 {
94 char fbuf[256];
95 FILE *fp;
96
97 sprintf(fbuf, in_spec, 0);
98 fp = fopen(fbuf, "rb");
99 if (!fp) {
100 fprintf(stderr, "%s: cannot open for reading\n", fbuf);
101 return(0);
102 }
103 if (getheader(fp, iheadline, NULL) < 0) {
104 fprintf(stderr, "%s: bad header - wrong format?\n", fbuf);
105 fclose(fp);
106 return(0);
107 }
108 if ((xres <= 0) | (yres <= 0) && !fscnresolu(&xres, &yres, fp)) {
109 fprintf(stderr, "%s: missing input resolution\n", fbuf);
110 fclose(fp);
111 return(0);
112 }
113 if (nprocs > 1 && (in_type==DTrgbe) | (in_type==DTxyze)) {
114 long data_start = ftell(fp); /* make sure input flat */
115 off_t dend = lseek(fileno(fp), 0, SEEK_END);
116 if (dend < data_start + 4L*xres*yres) {
117 fputs("Warning - multi-processing requires flat input files\n",
118 stderr);
119 nprocs = 1;
120 }
121 }
122 fclose(fp);
123 if ((cmtx->ncomp == 1) & (ncomp != 1)) {
124 double xfm[MAXCSAMP];
125 RMATRIX *nmtx;
126 int i;
127 for (i = ncomp; i--; )
128 xfm[i] = 1.;
129 nmtx = rmx_transform(cmtx, ncomp, xfm);
130 if (!nmtx)
131 return(0);
132 rmx_free(cmtx);
133 cmtx = nmtx;
134 } else if (cmtx->ncomp != ncomp) {
135 fprintf(stderr, "operation %s needs %d components, has %d\n",
136 cmtx->ncols == 1 ? "vector" : "matrix",
137 ncomp, cmtx->ncomp);
138 return(0);
139 }
140 if ((in_type != DTrgbe) & (in_type != DTxyze) & (in_type != DTspec) &
141 (in_type != DTfloat)) {
142 fprintf(stderr, "%s unsupported input data type: %s\n",
143 fbuf, cm_fmt_id[in_type]);
144 return(0);
145 }
146 if ((out_type == DTrgbe) & (ncomp > 3))
147 out_type = DTspec;
148 else if (out_type == DTfromHeader ||
149 (out_type == DTrgbe) & (in_type != DTfloat))
150 out_type = in_type;
151 return(1);
152 }
153
154 /* check subsequent headers match initial file */
155 int
156 checkline(char *s, void *p)
157 {
158 int *xyres = (int *)p;
159 char fmt[MAXFMTLEN];
160
161 if (!strncmp(s, "NCOLS=", 6)) {
162 xyres[0] = atoi(s+6);
163 if (xyres[0] <= 0)
164 return(-1);
165 return(1);
166 }
167 if (!strncmp(s, "NROWS=", 6)) {
168 xyres[1] = atoi(s+6);
169 if (xyres[1] <= 0)
170 return(-1);
171 return(1);
172 }
173 if (isncomp(s)) {
174 if (ncompval(s) != ncomp)
175 return(-1);
176 return(1);
177 }
178 if (isexpos(s)) {
179 if (fabs(1. - exposval(s)) > 0.04)
180 fputs("Warning - ignoring EXPOSURE setting\n", stderr);
181 return(1);
182 }
183 if (formatval(fmt, s)) {
184 if (strcmp(fmt, cm_fmt_id[in_type]))
185 return(-1);
186 return(1);
187 }
188 return(0);
189 }
190
191 /* open and check input file */
192 FILE *
193 open_input(char *fname)
194 {
195 int xyres[2];
196 FILE *fp = fopen(fname, "rb");
197
198 if (!fp) {
199 fprintf(stderr, "%s: cannot open for reading\n", fname);
200 return(NULL);
201 }
202 xyres[0] = xyres[1] = 0;
203 if (getheader(fp, checkline, xyres) < 0) {
204 fprintf(stderr, "%s: bad/inconsistent header\n", fname);
205 fclose(fp);
206 return(NULL);
207 }
208 if ((xyres[0] <= 0) | (xyres[1] <= 0) &&
209 !fscnresolu(&xyres[0], &xyres[1], fp)) {
210 fprintf(stderr, "%s: missing resolution\n", fname);
211 fclose(fp);
212 return(NULL);
213 }
214 if ((xyres[0] != xres) | (xyres[1] != yres)) {
215 fprintf(stderr, "%s: mismatched resolution\n", fname);
216 fclose(fp);
217 return(NULL);
218 }
219 return(fp);
220 }
221
222 /* open output file or command (if !NULL) and write info header */
223 FILE *
224 open_output(char *ospec, int fno)
225 {
226 FILE *fp;
227
228 if (!ospec) {
229 ospec = "<stdout>";
230 fp = stdout;
231 SET_FILE_BINARY(fp);
232 } else if (ospec[0] == '!') {
233 if (!(fp = popen(ospec+1, "w"))) {
234 fprintf(stderr, "Cannot start: %s\n", ospec);
235 return(NULL);
236 }
237 SET_FILE_BINARY(fp);
238 } else if (!(fp = fopen(ospec, "wb"))) {
239 fprintf(stderr, "%s: cannot open for writing\n", ospec);
240 return(NULL);
241 }
242 newheader("RADIANCE", fp);
243 if (cmtx->info) /* prepend matrix metadata */
244 fputs(cmtx->info, fp);
245 else
246 fputnow(fp);
247 if (fno >= 0)
248 fprintf(fp, "FRAME=%d\n", fno);
249 switch (out_type) {
250 case DTfloat:
251 case DTdouble:
252 fprintf(fp, "NCOLS=%d\nNROWS=%d\n", xres, yres);
253 fputncomp(ncomp, fp);
254 fputendian(fp);
255 fputformat(cm_fmt_id[out_type], fp);
256 fputc('\n', fp);
257 break;
258 case DTrgbe:
259 fputformat(COLRFMT, fp);
260 fputc('\n', fp);
261 fprtresolu(xres, yres, fp);
262 break;
263 case DTxyze:
264 fputformat(CIEFMT, fp);
265 fputc('\n', fp);
266 fprtresolu(xres, yres, fp);
267 break;
268 case DTspec:
269 fputncomp(ncomp, fp);
270 fputwlsplit(cmtx->wlpart, fp);
271 fputformat(SPECFMT, fp);
272 fputc('\n', fp);
273 fprtresolu(xres, yres, fp);
274 break;
275 default:
276 fputs("Unsupported output type!\n", stderr);
277 return(NULL);
278 }
279 if (fflush(fp) < 0) {
280 fprintf(stderr, "%s: write error\n", ospec);
281 fclose(fp);
282 return(NULL);
283 }
284 return(fp);
285 }
286
287 /* run calculation from a single process */
288 int
289 solo_process(void)
290 {
291 float *osum = (float *)calloc((size_t)xres*yres, sizeof(float)*ncomp);
292 COLORV *iscan = (COLORV *)malloc(sizeof(COLORV)*ncomp*xres);
293 char fbuf[512];
294 int c;
295
296 if (!osum | !iscan) {
297 fprintf(stderr, "Cannot allocate %dx%d %d-component accumulator\n",
298 xres, yres, ncomp);
299 return(0);
300 }
301 if (sizeof(float) != sizeof(COLORV)) {
302 fputs("Code Error 1 in solo_process()\n", stderr);
303 return(0);
304 }
305 for (c = 0; c < cmtx->ncols; c++) { /* run through each column/output */
306 FILE *fout;
307 int y;
308 int rc = cmtx->nrows;
309 if (c > 0) /* clear accumulator? */
310 memset(osum, 0, sizeof(float)*ncomp*xres*yres);
311 while (rc-- > 0) { /* run through each input file */
312 const int r = c&1 ? rc : cmtx->nrows-1 - rc;
313 const rmx_dtype *cval = rmx_val(cmtx, r, c);
314 FILE *finp;
315 int i, x;
316 for (i = ncomp; i--; )
317 if (cval[i] != 0) break;
318 if (i < 0) /* this coefficient is zero, skip */
319 continue;
320 sprintf(fbuf, in_spec, r);
321 finp = open_input(fbuf);
322 if (!finp)
323 return(0);
324 for (y = 0; y < yres; y++) {
325 float *dst = osum + y*xres*ncomp;
326 if (in_type == DTfloat ? getbinary(iscan, sizeof(float)*ncomp,
327 xres, finp) != xres
328 : freadsscan(iscan, ncomp, xres, finp) < 0)
329 goto readerr;
330 if ((in_type == DTfloat) & iswapped)
331 swap32((char *)iscan, ncomp*xres);
332 for (x = 0; x < xres; x++, dst += ncomp)
333 for (i = ncomp; i--; )
334 dst[i] += cval[i]*iscan[x*ncomp + i];
335 }
336 fclose(finp);
337 } /* write out accumulated column result... */
338 if (out_spec) { /* ...to file or command */
339 if (cmtx->ncols > 1 && !hasFormat(out_spec)) {
340 fputs("Sequential result must go to stdout\n", stderr);
341 return(0);
342 }
343 sprintf(fbuf, out_spec, c);
344 fout = open_output(fbuf, c-(cmtx->ncols==1));
345 } else { /* ...to stdout */
346 if ((out_type == DTfloat) & (cmtx->ncols > 1)) {
347 fputs("Float outputs must have separate destinations\n",
348 stderr);
349 return(0);
350 }
351 strcpy(fbuf, "<stdout>");
352 fout = open_output(NULL, c-(cmtx->ncols==1));
353 }
354 if (!fout)
355 return(0); /* assume error was reported */
356 if (out_type != DTfloat) {
357 for (y = 0; y < yres; y++)
358 if (fwritesscan(osum + (size_t)y*xres*ncomp,
359 ncomp, xres, fout) < 0)
360 goto writerr;
361 } else if (fwrite(osum, sizeof(float)*ncomp, (size_t)xres*yres, fout) !=
362 (size_t)xres*yres)
363 goto writerr;
364
365 if (fbuf[0] == '!') {
366 if (pclose(fout) != 0) {
367 fprintf(stderr, "Bad status from: %s\n", fbuf);
368 return(0);
369 }
370 } else if (fout != stdout && fclose(fout) == EOF)
371 goto writerr;
372 }
373 free(osum); /* clean up on success */
374 free(iscan);
375 return(1);
376 readerr:
377 fprintf(stderr, "%s: read error\n", fbuf);
378 return(0);
379 writerr:
380 fprintf(stderr, "%s: write error\n", fbuf);
381 return(0);
382 }
383
384 /* allocate a scrambled index array of the specified length */
385 int *
386 scramble(int n)
387 {
388 int *scarr = (int *)malloc(sizeof(int)*n);
389 int i;
390
391 if (!scarr) {
392 fprintf(stderr, "Out of memory in scramble(%d)\n", n);
393 exit(1);
394 }
395 for (i = n; i--; )
396 scarr[i] = i;
397 /* perform Fisher-Yates shuffle */
398 for (i = 0; i < n-1; i++) {
399 int ix = irandom(n-i) + i;
400 int ndx = scarr[i];
401 scarr[i] = scarr[ix];
402 scarr[ix] = ndx;
403 }
404 return(scarr);
405 }
406
407 /* run calculation on multiple processes, using memory maps and fork() */
408 int
409 multi_process(void)
410 {
411 #if defined(_WIN32) || defined(_WIN64)
412 fputs("Bad call to multi_process()\n", stderr);
413 return(0);
414 #else
415 int coff = nprocs;
416 int odd = 0;
417 char fbuf[512];
418 float *osum;
419 int *syarr;
420 int c;
421 /* sanity check */
422 if (sizeof(float) != sizeof(COLORV)) {
423 fputs("Code Error 1 in multi_process()\n", stderr);
424 return(0);
425 }
426 while (--coff > 0) { /* parent births children */
427 int pid = fork();
428 if (pid < 0) {
429 fputs("fork() call failed!\n", stderr);
430 return(0);
431 }
432 if (pid == 0) break; /* child gets to work */
433 }
434 osum = (float *)calloc((size_t)xres*yres, sizeof(float)*ncomp);
435 if (!osum) {
436 fprintf(stderr, "Cannot allocate %dx%d %d-component accumulator\n",
437 xres, yres, ncomp);
438 return(0);
439 }
440 srandom(113*coff + 5669); /* randomize row access for this process */
441 syarr = scramble(yres);
442 /* run through our unique set of columns */
443 for (c = coff; c < cmtx->ncols; c += nprocs) {
444 FILE *fout;
445 int y;
446 int rc = cmtx->nrows;
447 if (c > coff) /* clear accumulator? */
448 memset(osum, 0, sizeof(float)*ncomp*xres*yres);
449 while (rc-- > 0) { /* map & sum each input file */
450 const int r = odd ? rc : cmtx->nrows-1 - rc;
451 const rmx_dtype *cval = rmx_val(cmtx, r, c);
452 long dstart;
453 size_t maplen;
454 void *imap;
455 FILE *finp;
456 float *dst;
457 int i, x;
458 for (i = ncomp; i--; )
459 if (cval[i] != 0) break;
460 if (i < 0) /* this coefficient is zero, skip */
461 continue;
462 sprintf(fbuf, in_spec, r);
463 finp = open_input(fbuf);
464 if (!finp)
465 return(0);
466 dstart = ftell(finp);
467 if (dstart < 0) {
468 fprintf(stderr, "%s: ftell() failed!\n", fbuf);
469 return(0);
470 }
471 if (in_type == DTfloat && dstart%sizeof(float)) {
472 fprintf(stderr, "%s: float header misalignment\n", fbuf);
473 return(0);
474 }
475 i = in_type==DTfloat ? ncomp*(int)sizeof(float) : ncomp+1;
476 maplen = dstart + yres*xres*i;
477 imap = mmap(NULL, maplen, PROT_READ,
478 MAP_FILE|MAP_SHARED, fileno(finp), 0);
479 fclose(finp); /* will load from map (randomly) */
480 if (imap == MAP_FAILED) {
481 fprintf(stderr, "%s: unable to map input file\n", fbuf);
482 return(0);
483 }
484 if (in_type == DTfloat)
485 for (y = yres; y-- > 0; ) {
486 const float *fvp = (float *)((char *)imap + dstart) +
487 (size_t)ncomp*xres*syarr[y];
488 dst = osum + (size_t)ncomp*xres*syarr[y];
489 for (x = xres; x-- > 0; dst += ncomp, fvp += ncomp)
490 for (i = ncomp; i--; )
491 dst[i] += cval[i]*fvp[i];
492 }
493 else
494 for (y = yres; y-- > 0; ) {
495 const COLRV *cvp = (COLRV *)((char *)imap + dstart) +
496 (ncomp+1L)*xres*syarr[y];
497 dst = osum + (size_t)ncomp*xres*syarr[y];
498 for (x = xres; x-- > 0; dst += ncomp, cvp += ncomp+1) {
499 const rmx_dtype fe = cxponent[cvp[ncomp]];
500 for (i = ncomp; i--; )
501 dst[i] += cval[i]*(cvp[i]+(rmx_dtype).5)*fe;
502 }
503 }
504 munmap(imap, maplen);
505 } /* write out accumulated column result */
506 sprintf(fbuf, out_spec, c);
507 fout = open_output(fbuf, c);
508 if (!fout)
509 return(0); /* assume error was reported */
510 if (out_type != DTfloat) {
511 for (y = 0; y < yres; y++)
512 if (fwritesscan(osum + (size_t)y*xres*ncomp,
513 ncomp, xres, fout) < 0)
514 goto writerr;
515 } else if (fwrite(osum, sizeof(float)*ncomp, (size_t)xres*yres, fout) !=
516 (size_t)xres*yres)
517 goto writerr;
518
519 if (fbuf[0] == '!') {
520 if (pclose(fout) != 0) {
521 fprintf(stderr, "Bad status from: %s\n", fbuf);
522 return(0);
523 }
524 } else if (fclose(fout) == EOF)
525 goto writerr;
526 odd = !odd; /* go back & forth to milk page cache */
527 }
528 free(osum);
529 free(syarr);
530 if (coff) /* children return here... */
531 return(1);
532 c = 0; /* ...but parent waits for children */
533 while (++coff < nprocs) {
534 int st;
535 if (wait(&st) < 0)
536 break;
537 if (st) c = st;
538 }
539 return(c == 0);
540 writerr:
541 fprintf(stderr, "%s: write error\n", fbuf);
542 return(0);
543 #endif
544 }
545
546 int
547 main(int argc, char *argv[])
548 {
549 int a;
550
551 for (a = 1; a < argc-1 && argv[a][0] == '-'; a++)
552 switch (argv[a][1]) {
553 case 'o': /* output spec/format */
554 switch (argv[a][2]) {
555 case '\0':
556 out_spec = argv[++a];
557 break;
558 case 'f':
559 out_type = DTfloat;
560 break;
561 case 'c':
562 out_type = DTrgbe;
563 break;
564 default:
565 goto badopt;
566 }
567 break;
568 case 'N': /* number of desired processes */
569 case 'n': /* quietly supported alternate */
570 nprocs = atoi(argv[++a]);
571 if (nprocs <= 0)
572 goto userr;
573 break;
574 default:;
575 badopt: fprintf(stderr, "%s: bad option: %s\n", argv[0], argv[a]);
576 goto userr;
577 return(1);
578 }
579 if ((argc-a < 1) | (argc-a > 2) || argv[a][0] == '-')
580 goto userr;
581 in_spec = argv[a];
582 cmtx = rmx_load(argv[a+1], RMPnone); /* may load from stdin */
583 if (cmtx == NULL)
584 return(1); /* error reported */
585 #if defined(_WIN32) || defined(_WIN64)
586 if (nprocs > 1) {
587 fprintf(stderr, "%s: warning - Windows only allows -N 1\n", argv[0]);
588 nprocs = 1;
589 }
590 #else
591 if (nprocs > cmtx->ncols)
592 nprocs = cmtx->ncols;
593 if ((nprocs > 1) & !out_spec) {
594 fprintf(stderr, "%s: multi-processing result cannot go to stdout\n",
595 argv[0]);
596 nprocs = 1;
597 }
598 if ((nprocs > 1) & iswapped && (in_type==DTfloat) | (in_type==DTdouble)) {
599 fprintf(stderr, "%s: multi-processing unsupported on swapped input\n",
600 argv[0]);
601 nprocs = 1;
602 }
603 #endif
604 if (cmtx->nrows > 1 && !hasFormat(in_spec)) {
605 fprintf(stderr, "%s: input specification '%s' needs %%d format\n",
606 argv[0], in_spec);
607 goto userr;
608 }
609 if (!get_iotypes())
610 return(1);
611 if (!(nprocs == 1 ? solo_process() : multi_process()))
612 return(1);
613 return(0);
614 userr:
615 fprintf(stderr, "Usage: %s [-oc | -of][-o ospec][-N nproc] inpspec [mtx]\n",
616 argv[0]);
617 return(1);
618 }