--- ray/src/rt/ambient.c 1993/08/05 10:02:00 2.20 +++ ray/src/rt/ambient.c 1996/07/09 19:37:09 2.39 @@ -1,4 +1,4 @@ -/* Copyright (c) 1993 Regents of the University of California */ +/* Copyright (c) 1996 Regents of the University of California */ #ifndef lint static char SCCSid[] = "$SunId$ LBL"; @@ -18,7 +18,9 @@ static char SCCSid[] = "$SunId$ LBL"; #include "random.h" -#define OCTSCALE 0.5 /* ceil((valid rad.)/(cube size)) */ +#ifndef OCTSCALE +#define OCTSCALE 1.0 /* ceil((valid rad.)/(cube size)) */ +#endif typedef struct ambtree { AMBVAL *alist; /* ambient value list */ @@ -27,6 +29,8 @@ typedef struct ambtree { extern CUBE thescene; /* contains space boundaries */ +extern char *shm_boundary; /* memory sharing boundary */ + #define MAXASET 511 /* maximum number of elements in ambient set */ OBJECT ambset[MAXASET+1]={0}; /* ambient include/exclude set */ @@ -38,15 +42,42 @@ static AMBTREE atrunk; /* our ambient trunk node */ static FILE *ambfp = NULL; /* ambient file pointer */ static int nunflshed = 0; /* number of unflushed ambient values */ +#ifndef SORT_THRESH +#ifdef BIGMEM +#define SORT_THRESH ((9L<<20)/sizeof(AMBVAL)) +#else +#define SORT_THRESH ((3L<<20)/sizeof(AMBVAL)) +#endif +#endif +#ifndef SORT_INTVL +#define SORT_INTVL (SORT_THRESH*256) +#endif +#ifndef MAX_SORT_INTVL +#define MAX_SORT_INTVL (SORT_INTVL<<4) +#endif + +static COLOR avsum = BLKCOLOR; /* computed ambient value sum */ +static unsigned int nambvals = 0; /* total number of indirect values */ +static unsigned int nambshare = 0; /* number of values from file */ +static unsigned long ambclock = 0; /* ambient access clock */ +static unsigned long lastsort = 0; /* time of last value sort */ +static long sortintvl = SORT_INTVL; /* time until next sort */ + +#define MAXACLOCK (1L<<30) /* clock turnover value */ + /* + * Track access times unless we are sharing ambient values + * through memory on a multiprocessor, when we want to avoid + * claiming our own memory (copy on write). Go ahead anyway + * if more than two thirds of our values are unshared. + */ +#define tracktime (shm_boundary == NULL || nambvals > 3*nambshare) + #define AMBFLUSH (BUFSIZ/AMBVALSIZ) #define newambval() (AMBVAL *)bmalloc(sizeof(AMBVAL)) -#define newambtree() (AMBTREE *)calloc(8, sizeof(AMBTREE)) -#define freeambtree(t) free((char *)(t)) - extern long ftell(), lseek(); -static int initambfile(), avsave(), avinsert(), loadtree(); +static int initambfile(), avsave(), avinsert(), sortambvals(); static AMBVAL *avstore(); #ifdef F_SETLKW static aflock(); @@ -56,51 +87,49 @@ static aflock(); setambres(ar) /* set ambient resolution */ int ar; { - ambres = ar; /* may be done already */ + ambres = ar < 0 ? 0 : ar; /* may be done already */ /* set min & max radii */ if (ar <= 0) { - minarad = 0.0; + minarad = 0; maxarad = thescene.cusize / 2.0; } else { minarad = thescene.cusize / ar; - maxarad = 16.0 * minarad; /* heuristic */ + maxarad = 64 * minarad; /* heuristic */ if (maxarad > thescene.cusize / 2.0) maxarad = thescene.cusize / 2.0; } - if (maxarad <= FTINY) - maxarad = .001; + if (minarad <= FTINY) + minarad = 10*FTINY; + if (maxarad <= minarad) + maxarad = 64 * minarad; } -resetambacc(newa) /* change ambient accuracy setting */ +setambacc(newa) /* set ambient accuracy */ double newa; { - AMBTREE oldatrunk; + double ambdiff; - if (fabs(newa - ambacc) < 0.01) - return; /* insignificant -- don't bother */ - ambacc = newa; - if (ambacc <= FTINY) - return; /* cannot build new tree */ - /* else need to rebuild tree */ - copystruct(&oldatrunk, &atrunk); - atrunk.alist = NULL; - atrunk.kid = NULL; - loadtree(&oldatrunk); + if (newa < 0.0) + newa = 0.0; + ambdiff = fabs(newa - ambacc); + if (ambdiff >= .01 && (ambacc = newa) > FTINY && nambvals > 0) + sortambvals(1); /* rebuild tree */ } setambient(afile) /* initialize calculation */ char *afile; { - long headlen; + long pos, flen; AMBVAL amb; /* init ambient limits */ setambres(ambres); + setambacc(ambacc); if (afile == NULL) return; if (ambacc <= FTINY) { - sprintf(errmsg, "zero ambient accuracy so \"%s\" not loaded", + sprintf(errmsg, "zero ambient accuracy so \"%s\" not opened", afile); error(WARNING, errmsg); return; @@ -108,12 +137,20 @@ char *afile; /* open ambient file */ if ((ambfp = fopen(afile, "r+")) != NULL) { initambfile(0); - headlen = ftell(ambfp); + pos = ftell(ambfp); while (readambval(&amb, ambfp)) - avinsert(avstore(&amb), &atrunk, - thescene.cuorg, thescene.cusize); + avinsert(avstore(&amb)); /* align */ - fseek(ambfp, -((ftell(ambfp)-headlen)%AMBVALSIZ), 1); + pos += (long)nambvals*AMBVALSIZ; + flen = lseek(fileno(ambfp), 0L, 2); + if (flen != pos) { + error(WARNING, + "ignoring last %ld values in ambient file (corrupted)", + (flen - pos)/AMBVALSIZ); + fseek(ambfp, pos, 0); + ftruncate(fileno(ambfp), pos); + } + nambshare = nambvals; } else if ((ambfp = fopen(afile, "w+")) != NULL) initambfile(1); else { @@ -147,9 +184,10 @@ OBJECT obj; } -ambient(acol, r) /* compute ambient component for ray */ +ambient(acol, r, nrm) /* compute ambient component for ray */ COLOR acol; register RAY *r; +FVECT nrm; { static int rdepth = 0; /* ambient recursion */ double d; @@ -168,31 +206,41 @@ register RAY *r; rdepth++; d = doambient(acol, r, r->rweight, NULL, NULL); rdepth--; - if (d == 0.0) + if (d <= FTINY) goto dumbamb; return; } + /* resort memory? */ + sortambvals(0); /* get ambient value */ setcolor(acol, 0.0, 0.0, 0.0); - d = sumambient(acol, r, rdepth, + d = sumambient(acol, r, nrm, rdepth, &atrunk, thescene.cuorg, thescene.cusize); - if (d > FTINY) + if (d > FTINY) { scalecolor(acol, 1.0/d); - else { - d = makeambient(acol, r, rdepth++); - rdepth--; + return; } + rdepth++; /* need to cache new value */ + d = makeambient(acol, r, nrm, rdepth-1); + rdepth--; if (d > FTINY) return; dumbamb: /* return global value */ copycolor(acol, ambval); + if (ambvwt <= 0 | nambvals == 0) + return; + scalecolor(acol, (double)ambvwt); + addcolor(acol, avsum); /* average in computations */ + d = 1.0/(ambvwt+nambvals); + scalecolor(acol, d); } double -sumambient(acol, r, al, at, c0, s) /* get interpolated ambient value */ +sumambient(acol, r, rn, al, at, c0, s) /* get interpolated ambient value */ COLOR acol; register RAY *r; +FVECT rn; int al; AMBTREE *at; FVECT c0; @@ -204,22 +252,28 @@ double s; int i; register int j; register AMBVAL *av; - /* do this node */ + wsum = 0.0; + /* do this node */ for (av = at->alist; av != NULL; av = av->next) { + if (tracktime) + av->latick = ambclock++; /* * Ambient level test. */ - if (av->lvl > al || av->weight < r->rweight-FTINY) + if (av->lvl > al) /* list sorted, so this works */ + break; + if (av->weight < r->rweight-FTINY) continue; /* * Ambient radius test. */ - e1 = 0.0; - for (j = 0; j < 3; j++) { - d = av->pos[j] - r->rop[j]; - e1 += d * d; - } + d = av->pos[0] - r->rop[0]; + e1 = d * d; + d = av->pos[1] - r->rop[1]; + e1 += d * d; + d = av->pos[2] - r->rop[2]; + e1 += d * d; e1 /= av->rad * av->rad; if (e1 > ambacc*ambacc*1.21) continue; @@ -243,15 +297,14 @@ double s; * Jittering final test reduces image artifacts. */ wt = sqrt(e1) + sqrt(e2); - wt *= .9 + .2*urand(9015+samplendx); - if (wt > ambacc) + if (wt > ambacc*(.9+.2*urand(9015+samplendx))) continue; if (wt <= 1e-3) wt = 1e3; else wt = 1.0 / wt; wsum += wt; - extambient(ct, av, r->rop, r->ron); + extambient(ct, av, r->rop, rn); scalecolor(ct, wt); addcolor(acol, ct); } @@ -270,27 +323,28 @@ double s; break; } if (j == 3) - wsum += sumambient(acol, r, al, at->kid+i, ck0, s); + wsum += sumambient(acol, r, rn, al, at->kid+i, ck0, s); } return(wsum); } double -makeambient(acol, r, al) /* make a new ambient value */ +makeambient(acol, r, rn, al) /* make a new ambient value */ COLOR acol; register RAY *r; +FVECT rn; int al; { AMBVAL amb; FVECT gp, gd; /* compute weight */ amb.weight = pow(AVGREFL, (double)al); - if (r->rweight < 0.2*amb.weight) /* heuristic */ + if (r->rweight < 0.1*amb.weight) /* heuristic */ amb.weight = r->rweight; /* compute ambient */ amb.rad = doambient(acol, r, amb.weight, gp, gd); - if (amb.rad == 0.0) + if (amb.rad <= FTINY) return(0.0); /* store it */ VCOPY(amb.pos, r->rop); @@ -301,6 +355,8 @@ int al; VCOPY(amb.gdir, gd); /* insert into tree */ avsave(&amb); /* and save to file */ + if (rn != r->ron) + extambient(acol, &amb, r->rop, rn); /* texture */ return(amb.rad); } @@ -343,8 +399,9 @@ int creat; #ifdef MSDOS setmode(fileno(ambfp), O_BINARY); #endif - setbuf(ambfp, bmalloc(BUFSIZ)); + setbuf(ambfp, bmalloc(BUFSIZ+8)); if (creat) { /* new file */ + newheader("RADIANCE", ambfp); fprintf(ambfp, "%s -av %g %g %g -ab %d -aa %g ", progname, colval(ambval,RED), colval(ambval,GRN), colval(ambval,BLU), @@ -365,7 +422,7 @@ static avsave(av) /* insert and save an ambient value */ AMBVAL *av; { - avinsert(avstore(av), &atrunk, thescene.cuorg, thescene.cusize); + avinsert(avstore(av)); if (ambfp == NULL) return; if (writambval(av, ambfp) < 0) @@ -388,24 +445,68 @@ register AMBVAL *aval; if ((av = newambval()) == NULL) error(SYSTEM, "out of memory in avstore"); copystruct(av, aval); + av->latick = ambclock; + av->next = NULL; + addcolor(avsum, av->val); /* add to sum for averaging */ + nambvals++; return(av); } +#define ATALLOCSZ 512 /* #/8 trees to allocate at once */ + +static AMBTREE *atfreelist = NULL; /* free ambient tree structures */ + + static -avinsert(av, at, c0, s) /* insert ambient value in a tree */ +AMBTREE * +newambtree() /* allocate 8 ambient tree structs */ +{ + register AMBTREE *atp, *upperlim; + + if (atfreelist == NULL) { /* get more nodes */ + atfreelist = (AMBTREE *)bmalloc(ATALLOCSZ*8*sizeof(AMBTREE)); + if (atfreelist == NULL) + return(NULL); + /* link new free list */ + upperlim = atfreelist + 8*(ATALLOCSZ-1); + for (atp = atfreelist; atp < upperlim; atp += 8) + atp->kid = atp + 8; + atp->kid = NULL; + } + atp = atfreelist; + atfreelist = atp->kid; + bzero((char *)atp, 8*sizeof(AMBTREE)); + return(atp); +} + + +static +freeambtree(atp) /* free 8 ambient tree structs */ +AMBTREE *atp; +{ + atp->kid = atfreelist; + atfreelist = atp; +} + + +static +avinsert(av) /* insert ambient value in our tree */ register AMBVAL *av; -register AMBTREE *at; -FVECT c0; -double s; { + register AMBTREE *at; + register AMBVAL *ap; + AMBVAL avh; FVECT ck0; + double s; int branch; register int i; if (av->rad <= FTINY) error(CONSISTENCY, "zero ambient radius in avinsert"); - VCOPY(ck0, c0); + at = &atrunk; + VCOPY(ck0, thescene.cuorg); + s = thescene.cusize; while (s*(OCTSCALE/2) > av->rad*ambacc) { if (at->kid == NULL) if ((at->kid = newambtree()) == NULL) @@ -419,28 +520,191 @@ double s; } at = at->kid + branch; } - av->next = at->alist; - at->alist = av; + avh.next = at->alist; /* order by increasing level */ + for (ap = &avh; ap->next != NULL; ap = ap->next) + if (ap->next->lvl >= av->lvl) + break; + av->next = ap->next; + ap->next = av; + at->alist = avh.next; } static -loadtree(at) /* move tree to main store */ +unloadatree(at, f) /* unload an ambient value tree */ register AMBTREE *at; +int (*f)(); { register AMBVAL *av; register int i; /* transfer values at this node */ for (av = at->alist; av != NULL; av = at->alist) { at->alist = av->next; - avinsert(av, &atrunk, thescene.cuorg, thescene.cusize); + (*f)(av); } + if (at->kid == NULL) + return; for (i = 0; i < 8; i++) /* transfer and free children */ - loadtree(at->kid+i); + unloadatree(at->kid+i, f); freeambtree(at->kid); + at->kid = NULL; } +static struct avl { + AMBVAL *p; + unsigned long t; +} *avlist1; /* ambient value list with ticks */ +static AMBVAL **avlist2; /* memory positions for sorting */ +static int i_avlist; /* index for lists */ + + +static +av2list(av) +register AMBVAL *av; +{ +#ifdef DEBUG + if (i_avlist >= nambvals) + error(CONSISTENCY, "too many ambient values in av2list1"); +#endif + avlist1[i_avlist].p = avlist2[i_avlist] = av; + avlist1[i_avlist++].t = av->latick; +} + + +static int +alatcmp(av1, av2) /* compare ambient values for MRA */ +struct avl *av1, *av2; +{ + register long lc = av2->t - av1->t; + return(lc<0 ? -1 : lc>0 ? 1 : 0); +} + + +static int +aposcmp(avp1, avp2) /* compare ambient value positions */ +AMBVAL **avp1, **avp2; +{ + return(*avp1 - *avp2); +} + + +#if 1 +static int +avlmemi(avaddr) /* find list position from address */ +AMBVAL *avaddr; +{ + register AMBVAL **avlpp; + + avlpp = (AMBVAL **)bsearch((char *)&avaddr, (char *)avlist2, + nambvals, sizeof(AMBVAL *), aposcmp); + if (avlpp == NULL) + error(CONSISTENCY, "address not found in avlmemi"); + return(avlpp - avlist2); +} +#else +#define avlmemi(avaddr) ((AMBVAL **)bsearch((char *)&avaddr,(char *)avlist2, \ + nambvals,sizeof(AMBVAL *),aposcmp) - avlist2) +#endif + + +static +sortambvals(always) /* resort ambient values */ +int always; +{ + AMBTREE oldatrunk; + AMBVAL tav, *tap, *pnext; + register int i, j; + /* see if it's time yet */ + if (!always && (ambclock < lastsort+sortintvl || + nambvals < SORT_THRESH)) + return; + /* + * The idea here is to minimize memory thrashing + * in VM systems by improving reference locality. + * We do this by periodically sorting our stored ambient + * values in memory in order of most recently to least + * recently accessed. This ordering was chosen so that new + * ambient values (which tend to be less important) go into + * higher memory with the infrequently accessed values. + * Since we expect our values to need sorting less + * frequently as the process continues, we double our + * waiting interval after each call. + * This routine is also called by setambacc() with + * the "always" parameter set to 1 so that the ambient + * tree will be rebuilt with the new accuracy parameter. + */ + if (tracktime) { /* allocate pointer arrays to sort */ + avlist2 = (AMBVAL **)malloc(nambvals*sizeof(AMBVAL *)); + avlist1 = (struct avl *)malloc(nambvals*sizeof(struct avl)); + } else { + avlist2 = NULL; + avlist1 = NULL; + } + if (avlist1 == NULL) { /* no time tracking -- rebuild tree? */ + if (avlist2 != NULL) + free((char *)avlist2); + if (always) { /* rebuild without sorting */ + copystruct(&oldatrunk, &atrunk); + atrunk.alist = NULL; + atrunk.kid = NULL; + unloadatree(&oldatrunk, avinsert); + } + } else { /* sort memory by last access time */ + /* + * Sorting memory is tricky because it isn't contiguous. + * We have to sort an array of pointers by MRA and also + * by memory position. We then copy values in "loops" + * to minimize memory hits. Nevertheless, we will visit + * everyone at least twice, and this is an expensive process + * when we're thrashing, which is when we need to do it. + */ +#ifdef DEBUG + sprintf(errmsg, "sorting %u ambient values at ambclock=%lu...", + nambvals, ambclock); + eputs(errmsg); +#endif + i_avlist = 0; + unloadatree(&atrunk, av2list); /* empty current tree */ +#ifdef DEBUG + if (i_avlist < nambvals) + error(CONSISTENCY, "missing ambient values in sortambvals"); +#endif + qsort((char *)avlist1, nambvals, sizeof(struct avl), alatcmp); + qsort((char *)avlist2, nambvals, sizeof(AMBVAL *), aposcmp); + for (i = 0; i < nambvals; i++) { + if (avlist1[i].p == NULL) + continue; + tap = avlist2[i]; + copystruct(&tav, tap); + for (j = i; (pnext = avlist1[j].p) != tap; + j = avlmemi(pnext)) { + copystruct(avlist2[j], pnext); + avinsert(avlist2[j]); + avlist1[j].p = NULL; + } + copystruct(avlist2[j], &tav); + avinsert(avlist2[j]); + avlist1[j].p = NULL; + } + free((char *)avlist1); + free((char *)avlist2); + /* compute new sort interval */ + sortintvl = ambclock - lastsort; + if (sortintvl >= MAX_SORT_INTVL/2) + sortintvl = MAX_SORT_INTVL; + else + sortintvl <<= 1; /* wait twice as long next */ +#ifdef DEBUG + eputs("done\n"); +#endif + } + if (ambclock >= MAXACLOCK) + ambclock = MAXACLOCK/2; + lastsort = ambclock; +} + + #ifdef F_SETLKW static @@ -472,7 +736,7 @@ ambsync() /* synchronize ambient file */ aflock(F_WRLCK); /* see if file has grown */ if ((flen = lseek(fileno(ambfp), 0L, 2)) < 0) - error(SYSTEM, "cannot seek on ambient file"); + goto seekerr; if (n = flen - lastpos) { /* file has grown */ if (ambinp == NULL) { /* use duplicate filedes */ ambinp = fdopen(dup(fileno(ambfp)), "r"); @@ -480,22 +744,40 @@ ambsync() /* synchronize ambient file */ error(SYSTEM, "fdopen failed in ambsync"); } if (fseek(ambinp, lastpos, 0) < 0) - error(SYSTEM, "fseek failed in ambsync"); + goto seekerr; while (n >= AMBVALSIZ) { /* load contributed values */ - readambval(&avs, ambinp); - avinsert(avstore(&avs), &atrunk, - thescene.cuorg, thescene.cusize); + if (!readambval(&avs, ambinp)) { + sprintf(errmsg, + "ambient file corrupted near character %ld", + flen - n); + error(WARNING, errmsg); + break; + } + avinsert(avstore(&avs)); n -= AMBVALSIZ; } - if (n) /* alignment */ - lseek(fileno(ambfp), flen-n, 0); + /*** seek always as safety measure + if (n) ***/ /* alignment */ + if (lseek(fileno(ambfp), flen-n, 0) < 0) + goto seekerr; } +#ifdef DEBUG + if (ambfp->_ptr - ambfp->_base != nunflshed*AMBVALSIZ) { + sprintf(errmsg, "ambient file buffer at %d rather than %d", + ambfp->_ptr - ambfp->_base, + nunflshed*AMBVALSIZ); + error(CONSISTENCY, errmsg); + } +#endif syncend: n = fflush(ambfp); /* calls write() at last */ - lastpos = lseek(fileno(ambfp), 0L, 1); + if ((lastpos = lseek(fileno(ambfp), 0L, 1)) < 0) + goto seekerr; aflock(F_UNLCK); /* release file */ nunflshed = 0; return(n); +seekerr: + error(SYSTEM, "seek failed in ambsync"); } #else