ViewVC Help
View File | Revision Log | Show Annotations | Download File | Root Listing
root/radiance/ray/src/util/pvsum.c
Revision: 2.2
Committed: Thu Mar 27 16:35:00 2025 UTC (5 weeks, 1 day ago) by greg
Content type: text/plain
Branch: MAIN
Changes since 2.1: +55 -20 lines
Log Message:
perf(pvsum): Randomized row access in child processes to avoid simultaneous page faults

File Contents

# User Rev Content
1 greg 2.1 #ifndef lint
2 greg 2.2 static const char RCSid[] = "$Id: pvsum.c,v 2.1 2025/03/27 01:26:55 greg Exp $";
3 greg 2.1 #endif
4     /*
5     * pvsum.c - add together spectral and/or float pictures
6     * based on vector or matrix, similar to dctimestep
7     */
8    
9     #include <math.h>
10     #include "rtio.h"
11     #include "resolu.h"
12     #include "platform.h"
13     #include "paths.h"
14 greg 2.2 #include "random.h"
15 greg 2.1 #include "rmatrix.h"
16     #if !defined(_WIN32) && !defined(_WIN64)
17     #include <sys/mman.h>
18     #endif
19    
20     int nprocs = 1; /* # of calculation processes (Unix) */
21     int in_type = DTfromHeader; /* input data type */
22     int out_type = DTfromHeader; /* output data type */
23     char *in_spec = NULL; /* input specification */
24     char *out_spec = NULL; /* output file specification */
25    
26     int iswapped = 0; /* input data is byte-swapped? */
27     int ncomp = 3; /* # input components */
28     int xres=0, yres=0; /* input image dimensions */
29    
30     RMATRIX *cmtx = NULL; /* coefficient matrix */
31    
32     /* does the given spec contain integer format? */
33     int
34     hasFormat(const char *s)
35     {
36     restart:
37     if (s) s = strchr(s, '%');
38     if (!s) return(0);
39     if (s[1] == '%') { /* "%%" ? */
40     s += 2;
41     goto restart;
42     }
43     while (*++s) {
44     if (strchr("diouxX", *s))
45     return(1); /* integer format */
46     if (strchr("%fFeEgGaAcsb", *s))
47     break; /* non-integer format */
48     }
49     return(0);
50     }
51    
52     /* get first input header data we'll need */
53     int
54     iheadline(char *s, void *p)
55     {
56     char fmt[MAXFMTLEN];
57     int i;
58    
59     if (!strncmp(s, "NCOLS=", 6)) {
60     xres = atoi(s+6);
61     return(1);
62     }
63     if (!strncmp(s, "NROWS=", 6)) {
64     yres = atoi(s+6);
65     return(1);
66     }
67     if (isncomp(s)) {
68     ncomp = ncompval(s);
69     return(1);
70     }
71     if (isexpos(s)) {
72     if (fabs(1. - exposval(s)) > 0.04)
73     fputs("Warning - ignoring EXPOSURE setting\n", stderr);
74     return(1);
75     }
76     if (formatval(fmt, s)) {
77     for (in_type = DTend; --in_type > DTfromHeader; )
78     if (!strcmp(fmt, cm_fmt_id[in_type]))
79     return(1);
80     return(-1);
81     }
82     i = isbigendian(s);
83     if (i >= 0) {
84     iswapped = (i != nativebigendian());
85     return(1);
86     }
87     return(0);
88     }
89    
90     /* open initial file and get relevant dimensions and type */
91     int
92     get_iotypes(void)
93     {
94     char fbuf[256];
95     FILE *fp;
96    
97     sprintf(fbuf, in_spec, 0);
98     fp = fopen(fbuf, "rb");
99     if (!fp) {
100     fprintf(stderr, "%s: cannot open for reading\n", fbuf);
101     return(0);
102     }
103     if (getheader(fp, iheadline, NULL) < 0) {
104     fprintf(stderr, "%s: bad header - wrong format?\n", fbuf);
105     fclose(fp);
106     return(0);
107     }
108     if ((xres <= 0) | (yres <= 0) && !fscnresolu(&xres, &yres, fp)) {
109     fprintf(stderr, "%s: missing input resolution\n", fbuf);
110     fclose(fp);
111     return(0);
112     }
113     if (nprocs > 1 && (in_type==DTrgbe) | (in_type==DTxyze)) {
114     long data_start = ftell(fp); /* make sure input flat */
115     off_t dend = lseek(fileno(fp), 0, SEEK_END);
116     if (dend < data_start + 4L*xres*yres) {
117     fputs("Warning - multi-processing requires flat input files\n",
118     stderr);
119     nprocs = 1;
120     }
121     }
122     fclose(fp);
123     if ((cmtx->ncomp == 1) & (ncomp != 1)) {
124     double xfm[MAXCSAMP];
125     RMATRIX *nmtx;
126     int i;
127     for (i = ncomp; i--; )
128     xfm[i] = 1.;
129     nmtx = rmx_transform(cmtx, ncomp, xfm);
130     if (!nmtx)
131     return(0);
132     rmx_free(cmtx);
133     cmtx = nmtx;
134     } else if (cmtx->ncomp != ncomp) {
135     fprintf(stderr, "operation %s needs %d components, has %d\n",
136     cmtx->ncols == 1 ? "vector" : "matrix",
137     ncomp, cmtx->ncomp);
138     return(0);
139     }
140     if ((in_type != DTrgbe) & (in_type != DTxyze) & (in_type != DTspec) &
141     (in_type != DTfloat)) {
142     fprintf(stderr, "%s unsupported input data type: %s\n",
143     fbuf, cm_fmt_id[in_type]);
144     return(0);
145     }
146     if ((out_type == DTrgbe) & (ncomp > 3))
147     out_type = DTspec;
148     else if (out_type == DTfromHeader ||
149     (out_type == DTrgbe) & (in_type != DTfloat))
150     out_type = in_type;
151     return(1);
152     }
153    
154     /* check subsequent headers match initial file */
155     int
156     checkline(char *s, void *p)
157     {
158     int *xyres = (int *)p;
159     char fmt[MAXFMTLEN];
160    
161     if (!strncmp(s, "NCOLS=", 6)) {
162     xyres[0] = atoi(s+6);
163     if (xyres[0] <= 0)
164     return(-1);
165     return(1);
166     }
167     if (!strncmp(s, "NROWS=", 6)) {
168     xyres[1] = atoi(s+6);
169     if (xyres[1] <= 0)
170     return(-1);
171     return(1);
172     }
173     if (isncomp(s)) {
174     if (ncompval(s) != ncomp)
175     return(-1);
176     return(1);
177     }
178     if (isexpos(s)) {
179     if (fabs(1. - exposval(s)) > 0.04)
180     fputs("Warning - ignoring EXPOSURE setting\n", stderr);
181     return(1);
182     }
183     if (formatval(fmt, s)) {
184     if (strcmp(fmt, cm_fmt_id[in_type]))
185     return(-1);
186     return(1);
187     }
188     return(0);
189     }
190    
191     /* open and check input file */
192     FILE *
193     open_input(char *fname)
194     {
195     int xyres[2];
196     FILE *fp = fopen(fname, "rb");
197    
198     if (!fp) {
199     fprintf(stderr, "%s: cannot open for reading\n", fname);
200     return(NULL);
201     }
202     xyres[0] = xyres[1] = 0;
203     if (getheader(fp, checkline, xyres) < 0) {
204     fprintf(stderr, "%s: bad/inconsistent header\n", fname);
205     fclose(fp);
206     return(NULL);
207     }
208     if ((xyres[0] <= 0) | (xyres[1] <= 0) &&
209     !fscnresolu(&xyres[0], &xyres[1], fp)) {
210     fprintf(stderr, "%s: missing resolution\n", fname);
211     fclose(fp);
212     return(NULL);
213     }
214     if ((xyres[0] != xres) | (xyres[1] != yres)) {
215     fprintf(stderr, "%s: mismatched resolution\n", fname);
216     fclose(fp);
217     return(NULL);
218     }
219     return(fp);
220     }
221    
222     /* open output file or command (if !NULL) and write info header */
223     FILE *
224     open_output(char *ospec, int fno)
225     {
226     FILE *fp;
227    
228     if (!ospec) {
229     ospec = "<stdout>";
230     fp = stdout;
231     SET_FILE_BINARY(fp);
232     } else if (ospec[0] == '!') {
233     if (!(fp = popen(ospec+1, "w"))) {
234     fprintf(stderr, "Cannot start: %s\n", ospec);
235     return(NULL);
236     }
237     SET_FILE_BINARY(fp);
238     } else if (!(fp = fopen(ospec, "wb"))) {
239     fprintf(stderr, "%s: cannot open for writing\n", ospec);
240     return(NULL);
241     }
242     newheader("RADIANCE", fp);
243 greg 2.2 if (cmtx->info) /* prepend matrix metadata */
244     fputs(cmtx->info, fp);
245     else
246     fputnow(fp);
247 greg 2.1 if (fno >= 0)
248     fprintf(fp, "FRAME=%d\n", fno);
249     switch (out_type) {
250     case DTfloat:
251     case DTdouble:
252     fprintf(fp, "NCOLS=%d\nNROWS=%d\n", xres, yres);
253     fputncomp(ncomp, fp);
254     fputendian(fp);
255     fputformat(cm_fmt_id[out_type], fp);
256     fputc('\n', fp);
257     break;
258     case DTrgbe:
259     fputformat(COLRFMT, fp);
260     fputc('\n', fp);
261     fprtresolu(xres, yres, fp);
262     break;
263     case DTxyze:
264     fputformat(CIEFMT, fp);
265     fputc('\n', fp);
266     fprtresolu(xres, yres, fp);
267     break;
268     case DTspec:
269     fputncomp(ncomp, fp);
270     fputwlsplit(cmtx->wlpart, fp);
271     fputformat(SPECFMT, fp);
272     fputc('\n', fp);
273     fprtresolu(xres, yres, fp);
274     break;
275     default:
276     fputs("Unsupported output type!\n", stderr);
277     return(NULL);
278     }
279     if (fflush(fp) < 0) {
280     fprintf(stderr, "%s: write error\n", ospec);
281     fclose(fp);
282     return(NULL);
283     }
284     return(fp);
285     }
286    
287     /* run calculation from a single process */
288     int
289     solo_process(void)
290     {
291     float *osum = (float *)calloc((size_t)xres*yres, sizeof(float)*ncomp);
292     COLORV *iscan = (COLORV *)malloc(sizeof(COLORV)*ncomp*xres);
293     char fbuf[512];
294     int c;
295    
296     if (!osum | !iscan) {
297     fprintf(stderr, "Cannot allocate %dx%d %d-component accumulator\n",
298     xres, yres, ncomp);
299     return(0);
300     }
301     if (sizeof(float) != sizeof(COLORV)) {
302     fputs("Code Error 1 in solo_process()\n", stderr);
303     return(0);
304     }
305     for (c = 0; c < cmtx->ncols; c++) { /* run through each column/output */
306     FILE *fout;
307     int y;
308     int rc = cmtx->nrows;
309     if (c > 0) /* clear accumulator? */
310     memset(osum, 0, sizeof(float)*ncomp*xres*yres);
311     while (rc-- > 0) { /* run through each input file */
312     const int r = c&1 ? rc : cmtx->nrows-1 - rc;
313     const rmx_dtype *cval = rmx_val(cmtx, r, c);
314     FILE *finp;
315     int i, x;
316     for (i = ncomp; i--; )
317     if (cval[i] != 0) break;
318     if (i < 0) /* this coefficient is zero, skip */
319     continue;
320     sprintf(fbuf, in_spec, r);
321     finp = open_input(fbuf);
322     if (!finp)
323     return(0);
324     for (y = 0; y < yres; y++) {
325     float *dst = osum + y*xres*ncomp;
326     if (in_type == DTfloat ? getbinary(iscan, sizeof(float)*ncomp,
327     xres, finp) != xres
328     : freadsscan(iscan, ncomp, xres, finp) < 0)
329     goto readerr;
330     if ((in_type == DTfloat) & iswapped)
331     swap32((char *)iscan, ncomp*xres);
332     for (x = 0; x < xres; x++, dst += ncomp)
333     for (i = ncomp; i--; )
334     dst[i] += cval[i]*iscan[x*ncomp + i];
335     }
336     fclose(finp);
337     } /* write out accumulated column result... */
338     if (out_spec) { /* ...to file or command */
339     if (cmtx->ncols > 1 && !hasFormat(out_spec)) {
340     fputs("Sequential result must go to stdout\n", stderr);
341     return(0);
342     }
343     sprintf(fbuf, out_spec, c);
344     fout = open_output(fbuf, c-(cmtx->ncols==1));
345     } else { /* ...to stdout */
346     if ((out_type == DTfloat) & (cmtx->ncols > 1)) {
347     fputs("Float outputs must have separate destinations\n",
348     stderr);
349     return(0);
350     }
351     strcpy(fbuf, "<stdout>");
352     fout = open_output(NULL, c-(cmtx->ncols==1));
353     }
354     if (!fout)
355     return(0); /* assume error was reported */
356     if (out_type != DTfloat) {
357     for (y = 0; y < yres; y++)
358     if (fwritesscan(osum + (size_t)y*xres*ncomp,
359     ncomp, xres, fout) < 0)
360     goto writerr;
361     } else if (fwrite(osum, sizeof(float)*ncomp, (size_t)xres*yres, fout) !=
362     (size_t)xres*yres)
363     goto writerr;
364    
365     if (fbuf[0] == '!') {
366     if (pclose(fout) != 0) {
367     fprintf(stderr, "Bad status from: %s\n", fbuf);
368     return(0);
369     }
370     } else if (fout != stdout && fclose(fout) == EOF)
371     goto writerr;
372     }
373     free(osum); /* clean up on success */
374     free(iscan);
375     return(1);
376     readerr:
377     fprintf(stderr, "%s: read error\n", fbuf);
378     return(0);
379     writerr:
380     fprintf(stderr, "%s: write error\n", fbuf);
381     return(0);
382     }
383    
384 greg 2.2 /* allocate a scrambled index array of the specified length */
385     int *
386     scramble(int n)
387     {
388     int *scarr = (int *)malloc(sizeof(int)*n);
389     int i;
390    
391     if (!scarr) {
392     fprintf(stderr, "Out of memory in scramble(%d)\n", n);
393     exit(1);
394     }
395     for (i = n; i--; )
396     scarr[i] = i;
397     /* perform Fisher-Yates shuffle */
398     for (i = 0; i < n-1; i++) {
399     int ix = irandom(n-i) + i;
400     int ndx = scarr[i];
401     scarr[i] = scarr[ix];
402     scarr[ix] = ndx;
403     }
404     return(scarr);
405     }
406    
407 greg 2.1 /* run calculation on multiple processes, using memory maps and fork() */
408     int
409     multi_process(void)
410     {
411     #if defined(_WIN32) || defined(_WIN64)
412     fputs("Bad call to multi_process()\n", stderr);
413     return(0);
414     #else
415     int coff = nprocs;
416     int odd = 0;
417     char fbuf[512];
418     float *osum;
419 greg 2.2 int *syarr;
420 greg 2.1 int c;
421     /* sanity check */
422     if (sizeof(float) != sizeof(COLORV)) {
423     fputs("Code Error 1 in multi_process()\n", stderr);
424     return(0);
425     }
426     while (--coff > 0) { /* parent births children */
427     int pid = fork();
428     if (pid < 0) {
429     fputs("fork() call failed!\n", stderr);
430     return(0);
431     }
432     if (pid == 0) break; /* child gets to work */
433     }
434     osum = (float *)calloc((size_t)xres*yres, sizeof(float)*ncomp);
435     if (!osum) {
436     fprintf(stderr, "Cannot allocate %dx%d %d-component accumulator\n",
437     xres, yres, ncomp);
438     return(0);
439     }
440 greg 2.2 srandom(113*coff + 5669); /* randomize row access for this process */
441     syarr = scramble(yres);
442 greg 2.1 /* run through our unique set of columns */
443     for (c = coff; c < cmtx->ncols; c += nprocs) {
444     FILE *fout;
445     int y;
446     int rc = cmtx->nrows;
447     if (c > coff) /* clear accumulator? */
448     memset(osum, 0, sizeof(float)*ncomp*xres*yres);
449     while (rc-- > 0) { /* map & sum each input file */
450     const int r = odd ? rc : cmtx->nrows-1 - rc;
451     const rmx_dtype *cval = rmx_val(cmtx, r, c);
452 greg 2.2 long dstart;
453 greg 2.1 size_t maplen;
454     void *imap;
455     FILE *finp;
456     float *dst;
457 greg 2.2 int i, x;
458 greg 2.1 for (i = ncomp; i--; )
459     if (cval[i] != 0) break;
460     if (i < 0) /* this coefficient is zero, skip */
461     continue;
462     sprintf(fbuf, in_spec, r);
463     finp = open_input(fbuf);
464     if (!finp)
465     return(0);
466     dstart = ftell(finp);
467     if (dstart < 0) {
468     fprintf(stderr, "%s: ftell() failed!\n", fbuf);
469     return(0);
470     }
471     if (in_type == DTfloat && dstart%sizeof(float)) {
472     fprintf(stderr, "%s: float header misalignment\n", fbuf);
473     return(0);
474     }
475     i = in_type==DTfloat ? ncomp*(int)sizeof(float) : ncomp+1;
476     maplen = dstart + yres*xres*i;
477     imap = mmap(NULL, maplen, PROT_READ,
478     MAP_FILE|MAP_SHARED, fileno(finp), 0);
479 greg 2.2 fclose(finp); /* will load from map (randomly) */
480 greg 2.1 if (imap == MAP_FAILED) {
481     fprintf(stderr, "%s: unable to map input file\n", fbuf);
482     return(0);
483     }
484 greg 2.2 if (in_type == DTfloat)
485     for (y = yres; y-- > 0; ) {
486     const float *fvp = (float *)((char *)imap + dstart) +
487     (size_t)ncomp*xres*syarr[y];
488     dst = osum + (size_t)ncomp*xres*syarr[y];
489     for (x = xres; x-- > 0; dst += ncomp, fvp += ncomp)
490     for (i = ncomp; i--; )
491     dst[i] += cval[i]*fvp[i];
492     }
493     else
494     for (y = yres; y-- > 0; ) {
495     const COLRV *cvp = (COLRV *)((char *)imap + dstart) +
496     (ncomp+1L)*xres*syarr[y];
497     dst = osum + (size_t)ncomp*xres*syarr[y];
498     for (x = xres; x-- > 0; dst += ncomp, cvp += ncomp+1) {
499     const rmx_dtype fe = cxponent[cvp[ncomp]];
500     for (i = ncomp; i--; )
501     dst[i] += cval[i]*(cvp[i]+(rmx_dtype).5)*fe;
502     }
503 greg 2.1 }
504     munmap(imap, maplen);
505     } /* write out accumulated column result */
506     sprintf(fbuf, out_spec, c);
507     fout = open_output(fbuf, c);
508     if (!fout)
509     return(0); /* assume error was reported */
510     if (out_type != DTfloat) {
511     for (y = 0; y < yres; y++)
512     if (fwritesscan(osum + (size_t)y*xres*ncomp,
513     ncomp, xres, fout) < 0)
514     goto writerr;
515     } else if (fwrite(osum, sizeof(float)*ncomp, (size_t)xres*yres, fout) !=
516     (size_t)xres*yres)
517     goto writerr;
518    
519     if (fbuf[0] == '!') {
520     if (pclose(fout) != 0) {
521     fprintf(stderr, "Bad status from: %s\n", fbuf);
522     return(0);
523     }
524     } else if (fclose(fout) == EOF)
525     goto writerr;
526     odd = !odd; /* go back & forth to milk page cache */
527     }
528     free(osum);
529 greg 2.2 free(syarr);
530 greg 2.1 if (coff) /* children return here... */
531     return(1);
532     c = 0; /* ...but parent waits for children */
533     while (++coff < nprocs) {
534     int st;
535     if (wait(&st) < 0)
536     break;
537     if (st) c = st;
538     }
539     return(c == 0);
540     writerr:
541     fprintf(stderr, "%s: write error\n", fbuf);
542     return(0);
543     #endif
544     }
545    
546     int
547     main(int argc, char *argv[])
548     {
549     int a;
550    
551     for (a = 1; a < argc-1 && argv[a][0] == '-'; a++)
552     switch (argv[a][1]) {
553     case 'o': /* output spec/format */
554     switch (argv[a][2]) {
555     case '\0':
556     out_spec = argv[++a];
557     break;
558     case 'f':
559     out_type = DTfloat;
560     break;
561     case 'c':
562     out_type = DTrgbe;
563     break;
564     default:
565     goto badopt;
566     }
567     break;
568     case 'N': /* number of desired processes */
569     case 'n': /* quietly supported alternate */
570     nprocs = atoi(argv[++a]);
571     if (nprocs <= 0)
572     goto userr;
573     break;
574     default:;
575     badopt: fprintf(stderr, "%s: bad option: %s\n", argv[0], argv[a]);
576     goto userr;
577     return(1);
578     }
579     if ((argc-a < 1) | (argc-a > 2) || argv[a][0] == '-')
580     goto userr;
581     in_spec = argv[a];
582     cmtx = rmx_load(argv[a+1], RMPnone); /* may load from stdin */
583     if (cmtx == NULL)
584     return(1); /* error reported */
585     #if defined(_WIN32) || defined(_WIN64)
586     if (nprocs > 1) {
587     fprintf(stderr, "%s: warning - Windows only allows -N 1\n", argv[0]);
588     nprocs = 1;
589     }
590     #else
591     if (nprocs > cmtx->ncols)
592     nprocs = cmtx->ncols;
593     if ((nprocs > 1) & !out_spec) {
594     fprintf(stderr, "%s: multi-processing result cannot go to stdout\n",
595     argv[0]);
596     nprocs = 1;
597     }
598     if ((nprocs > 1) & iswapped && (in_type==DTfloat) | (in_type==DTdouble)) {
599     fprintf(stderr, "%s: multi-processing unsupported on swapped input\n",
600     argv[0]);
601     nprocs = 1;
602     }
603     #endif
604     if (cmtx->nrows > 1 && !hasFormat(in_spec)) {
605     fprintf(stderr, "%s: input specification '%s' needs %%d format\n",
606     argv[0], in_spec);
607     goto userr;
608     }
609     if (!get_iotypes())
610     return(1);
611     if (!(nprocs == 1 ? solo_process() : multi_process()))
612     return(1);
613     return(0);
614     userr:
615     fprintf(stderr, "Usage: %s [-oc | -of][-o ospec][-N nproc] inpspec [mtx]\n",
616     argv[0]);
617     return(1);
618     }