--- ray/src/rt/ambcomp.c	1991/07/24 10:36:13	1.10
+++ ray/src/rt/ambcomp.c	2014/04/19 02:39:44	2.27
@@ -1,91 +1,648 @@
-/* Copyright (c) 1991 Regents of the University of California */
-
 #ifndef lint
-static char SCCSid[] = "$SunId$ LBL";
+static const char	RCSid[] = "$Id: ambcomp.c,v 2.27 2014/04/19 02:39:44 greg Exp $";
 #endif
-
 /*
  * Routines to compute "ambient" values using Monte Carlo
+ *
+ *  Hessian calculations based on "Practical Hessian-Based Error Control
+ *	for Irradiance Caching" by Schwarzhaupt, Wann Jensen, & Jarosz
+ *	from ACM SIGGRAPH Asia 2012 conference proceedings.
+ *
+ *  Declarations of external symbols in ambient.h
  */
 
-#include  "ray.h"
+#include "copyright.h"
 
+#include  "ray.h"
 #include  "ambient.h"
-
 #include  "random.h"
 
-typedef struct {
-	short  t, p;		/* theta, phi indices */
-	COLOR  v;		/* value sum */
-	float  r;		/* 1/distance sum */
-	float  k;		/* variance for this division */
-	int  n;			/* number of subsamples */
-}  AMBSAMP;		/* ambient sample division */
+#ifdef NEWAMB
 
+extern void		SDsquare2disk(double ds[2], double seedx, double seedy);
+
 typedef struct {
-	FVECT  ux, uy, uz;	/* x, y and z axis directions */
-	short  nt, np;		/* number of theta and phi directions */
+	RAY	*rp;		/* originating ray sample */
+	FVECT	ux, uy;		/* tangent axis unit vectors */
+	int	ns;		/* number of samples per axis */
+	COLOR	acoef;		/* division contribution coefficient */
+	struct s_ambsamp {
+		COLOR	v;		/* hemisphere sample value */
+		float	p[3];		/* intersection point */
+	} sa[1];		/* sample array (extends struct) */
 }  AMBHEMI;		/* ambient sample hemisphere */
 
-extern double  sin(), cos(), sqrt();
+#define ambsamp(h,i,j)	(h)->sa[(i)*(h)->ns + (j)]
 
+typedef struct {
+	FVECT	r_i, r_i1, e_i;
+	double	nf, I1, I2, J2;
+} FFTRI;		/* vectors and coefficients for Hessian calculation */
 
+
+static AMBHEMI *
+inithemi(			/* initialize sampling hemisphere */
+	COLOR	ac,
+	RAY	*r,
+	double	wt
+)
+{
+	AMBHEMI	*hp;
+	double	d;
+	int	n, i;
+					/* set number of divisions */
+	if (ambacc <= FTINY &&
+			wt > (d = 0.8*intens(ac)*r->rweight/(ambdiv*minweight)))
+		wt = d;			/* avoid ray termination */
+	n = sqrt(ambdiv * wt) + 0.5;
+	i = 1 + 5*(ambacc > FTINY);	/* minimum number of samples */
+	if (n < i)
+		n = i;
+					/* allocate sampling array */
+	hp = (AMBHEMI *)malloc(sizeof(AMBHEMI) +
+				sizeof(struct s_ambsamp)*(n*n - 1));
+	if (hp == NULL)
+		return(NULL);
+	hp->rp = r;
+	hp->ns = n;
+					/* assign coefficient */
+	copycolor(hp->acoef, ac);
+	d = 1.0/(n*n);
+	scalecolor(hp->acoef, d);
+					/* make tangent axes */
+	hp->uy[0] = hp->uy[1] = hp->uy[2] = 0.0;
+	for (i = 0; i < 3; i++)
+		if (r->ron[i] < 0.6 && r->ron[i] > -0.6)
+			break;
+	if (i >= 3)
+		error(CONSISTENCY, "bad ray direction in inithemi()");
+	hp->uy[i] = 1.0;
+	VCROSS(hp->ux, hp->uy, r->ron);
+	normalize(hp->ux);
+	VCROSS(hp->uy, r->ron, hp->ux);
+					/* we're ready to sample */
+	return(hp);
+}
+
+
 static int
-ambcmp(d1, d2)				/* decreasing order */
-AMBSAMP  *d1, *d2;
+ambsample(				/* sample an ambient direction */
+	AMBHEMI	*hp,
+	int	i,
+	int	j
+)
 {
-	if (d1->k < d2->k)
-		return(1);
-	if (d1->k > d2->k)
-		return(-1);
-	return(0);
+	struct s_ambsamp	*ap = &ambsamp(hp,i,j);
+	RAY			ar;
+	int			hlist[3];
+	double			spt[2], zd;
+	int			ii;
+					/* ambient coefficient for weight */
+	if (ambacc > FTINY)
+		setcolor(ar.rcoef, AVGREFL, AVGREFL, AVGREFL);
+	else
+		copycolor(ar.rcoef, hp->acoef);
+	if (rayorigin(&ar, AMBIENT, hp->rp, ar.rcoef) < 0) {
+		setcolor(ap->v, 0., 0., 0.);
+		VCOPY(ap->p, hp->rp->rop);
+		return(0);		/* no sample taken */
+	}
+	if (ambacc > FTINY) {
+		multcolor(ar.rcoef, hp->acoef);
+		scalecolor(ar.rcoef, 1./AVGREFL);
+	}
+					/* generate hemispherical sample */
+	SDsquare2disk(spt,	(i+.1+.8*frandom())/hp->ns,
+				(j+.1+.8*frandom())/hp->ns);
+	zd = sqrt(1. - spt[0]*spt[0] - spt[1]*spt[1]);
+	for (ii = 3; ii--; )
+		ar.rdir[ii] =	spt[0]*hp->ux[ii] +
+				spt[1]*hp->uy[ii] +
+				zd*hp->rp->ron[ii];
+	checknorm(ar.rdir);
+	dimlist[ndims++] = i*hp->ns + j + 90171;
+	rayvalue(&ar);			/* evaluate ray */
+	ndims--;
+	multcolor(ar.rcol, ar.rcoef);	/* apply coefficient */
+	copycolor(ap->v, ar.rcol);
+	if (ar.rt > 20.0*maxarad)	/* limit vertex distance */
+		ar.rt = 20.0*maxarad;
+	VSUM(ap->p, ar.rorg, ar.rdir, ar.rt);
+	return(1);
 }
 
 
+/* Compute vectors and coefficients for Hessian/gradient calcs */
+static void
+comp_fftri(FFTRI *ftp, float ap0[3], float ap1[3], FVECT rop)
+{
+	FVECT	v1;
+	double	dot_e, dot_er, dot_r, dot_r1;
+
+	VSUB(ftp->r_i, ap0, rop);
+	VSUB(ftp->r_i1, ap1, rop);
+	VSUB(ftp->e_i, ap1, ap0);
+	VCROSS(v1, ftp->e_i, ftp->r_i);
+	ftp->nf = 1.0/DOT(v1,v1);
+	VCROSS(v1, ftp->r_i, ftp->r_i1);
+	ftp->I1 = sqrt(DOT(v1,v1)*ftp->nf);
+	dot_e = DOT(ftp->e_i,ftp->e_i);
+	dot_er = DOT(ftp->e_i, ftp->r_i);
+	dot_r = DOT(ftp->r_i,ftp->r_i);
+	dot_r1 = DOT(ftp->r_i1,ftp->r_i1);
+	ftp->I2 = ( DOT(ftp->e_i, ftp->r_i1)/dot_r1 - dot_er/dot_r +
+			dot_e*ftp->I1 )*0.5*ftp->nf;
+	ftp->J2 =  0.25*ftp->nf*( 1.0/dot_r - 1.0/dot_r1 ) -
+			dot_er/dot_e*ftp->I2;
+}
+
+
+/* Compose matrix from two vectors */
+static void
+compose_matrix(FVECT mat[3], FVECT va, FVECT vb)
+{
+	mat[0][0] = 2.0*va[0]*vb[0];
+	mat[1][1] = 2.0*va[1]*vb[1];
+	mat[2][2] = 2.0*va[2]*vb[2];
+	mat[0][1] = mat[1][0] = va[0]*vb[1] + va[1]*vb[0];
+	mat[0][2] = mat[2][0] = va[0]*vb[2] + va[2]*vb[0];
+	mat[1][2] = mat[2][1] = va[1]*vb[2] + va[2]*vb[1];
+}
+
+
+/* Compute partial 3x3 Hessian matrix for edge */
+static void
+comp_hessian(FVECT hess[3], FFTRI *ftp, FVECT nrm)
+{
+	FVECT	v1, v2;
+	FVECT	m1[3], m2[3], m3[3], m4[3];
+	double	d1, d2, d3, d4;
+	double	I3, J3, K3;
+	int	i, j;
+					/* compute intermediate coefficients */
+	d1 = 1.0/DOT(ftp->r_i,ftp->r_i);
+	d2 = 1.0/DOT(ftp->r_i1,ftp->r_i1);
+	d3 = 1.0/DOT(ftp->e_i,ftp->e_i);
+	d4 = DOT(ftp->e_i, ftp->r_i);
+	I3 = 0.25*ftp->nf*( DOT(ftp->e_i, ftp->r_i1)*d2*d2 - d4*d1*d1 +
+				3.0*ftp->I2*d3 );
+	J3 = 0.25*d3*(d1*d1 - d2*d2) - d4*d3*I3;
+	K3 = d3*(ftp->I2 - I3/d1 - 2.0*d4*J3);
+					/* intermediate matrices */
+	VCROSS(v1, nrm, ftp->e_i);
+	for (j = 3; j--; )
+		v2[i] = ftp->I2*ftp->r_i[j] + ftp->J2*ftp->e_i[j];
+	compose_matrix(m1, v1, v2);
+	compose_matrix(m2, ftp->r_i, ftp->r_i);
+	compose_matrix(m3, ftp->e_i, ftp->e_i);
+	compose_matrix(m4, ftp->r_i, ftp->e_i);
+	VCROSS(v1, ftp->r_i, ftp->e_i);
+	d1 = DOT(nrm, v1);
+	d2 = -d1*ftp->I2;
+	d1 *= 2.0;
+	for (i = 3; i--; )		/* final matrix sum */
+	    for (j = 3; j--; ) {
+		hess[i][j] = m1[i][j] + d1*( I3*m2[i][j] + K3*m3[i][j] +
+						2.0*J3*m4[i][j] );
+		hess[i][j] += d2*(i==j);
+		hess[i][j] *= -1.0/PI;
+	    }
+}
+
+
+/* Reverse hessian calculation result for edge in other direction */
+static void
+rev_hessian(FVECT hess[3])
+{
+	int	i;
+
+	for (i = 3; i--; ) {
+		hess[i][0] = -hess[i][0];
+		hess[i][1] = -hess[i][1];
+		hess[i][2] = -hess[i][2];
+	}
+}
+
+
+/* Add to radiometric Hessian from the given triangle */
+static void
+add2hessian(FVECT hess[3], FVECT ehess1[3],
+		FVECT ehess2[3], FVECT ehess3[3], COLORV v)
+{
+	int	i, j;
+
+	for (i = 3; i--; )
+	    for (j = 3; j--; )
+		hess[i][j] += v*( ehess1[i][j] + ehess2[i][j] + ehess3[i][j] );
+}
+
+
+/* Compute partial displacement form factor gradient for edge */
+static void
+comp_gradient(FVECT grad, FFTRI *ftp, FVECT nrm)
+{
+	FVECT	vcp;
+	double	f1;
+	int	i;
+
+	VCROSS(vcp, ftp->r_i, ftp->r_i1);
+	f1 = 2.0*DOT(nrm, vcp);
+	VCROSS(vcp, nrm, ftp->e_i);
+	for (i = 3; i--; )
+		grad[i] = (0.5/PI)*( ftp->I1*vcp[i] +
+			    f1*(ftp->I2*ftp->r_i[i] + ftp->J2*ftp->e_i[i]) );
+}
+
+
+/* Reverse gradient calculation result for edge in other direction */
+static void
+rev_gradient(FVECT grad)
+{
+	grad[0] = -grad[0];
+	grad[1] = -grad[1];
+	grad[2] = -grad[2];
+}
+
+
+/* Add to displacement gradient from the given triangle */
+static void
+add2gradient(FVECT grad, FVECT egrad1, FVECT egrad2, FVECT egrad3, COLORV v)
+{
+	int	i;
+
+	for (i = 3; i--; )
+		grad[i] += v*( egrad1[i] + egrad2[i] + egrad3[i] );
+}
+
+
+/* Return brightness of furthest ambient sample */
+static COLORV
+back_ambval(struct s_ambsamp *ap1, struct s_ambsamp *ap2,
+		struct s_ambsamp *ap3, FVECT orig)
+{
+	COLORV	vback;
+	FVECT	vec;
+	double	d2, d2best;
+
+	VSUB(vec, ap1->p, orig);
+	d2best = DOT(vec,vec);
+	vback = ap1->v[CIEY];
+	VSUB(vec, ap2->p, orig);
+	d2 = DOT(vec,vec);
+	if (d2 > d2best) {
+		d2best = d2;
+		vback = ap2->v[CIEY];
+	}
+	VSUB(vec, ap3->p, orig);
+	d2 = DOT(vec,vec);
+	if (d2 > d2best)
+		return(ap3->v[CIEY]);
+	return(vback);
+}
+
+
+/* Compute anisotropic radii and eigenvector directions */
 static int
-ambnorm(d1, d2)				/* standard order */
-AMBSAMP  *d1, *d2;
+eigenvectors(FVECT uv[2], float ra[2], FVECT hessian[3])
 {
-	register int  c;
+	double	hess2[2][2];
+	FVECT	a, b;
+	double	evalue[2], slope1, xmag1;
+	int	i;
+					/* project Hessian to sample plane */
+	for (i = 3; i--; ) {
+		a[i] = DOT(hessian[i], uv[0]);
+		b[i] = DOT(hessian[i], uv[1]);
+	}
+	hess2[0][0] = DOT(uv[0], a);
+	hess2[0][1] = DOT(uv[0], b);
+	hess2[1][0] = DOT(uv[1], a);
+	hess2[1][1] = DOT(uv[1], b);
+					/* compute eigenvalues */
+	if (quadratic(evalue, 1.0, -hess2[0][0]-hess2[1][1],
+			hess2[0][0]*hess2[1][1]-hess2[0][1]*hess2[1][0]) != 2 ||
+			(evalue[0] = fabs(evalue[0])) <= FTINY*FTINY*FTINY ||
+			(evalue[1] = fabs(evalue[1])) <= FTINY*FTINY*FTINY)
+		error(INTERNAL, "bad eigenvalue calculation");
 
-	if (c = d1->t - d2->t)
-		return(c);
-	return(d1->p - d2->p);
+	if (evalue[0] > evalue[1]) {
+		ra[0] = 1.0/sqrt(sqrt(evalue[0]));
+		ra[1] = 1.0/sqrt(sqrt(evalue[1]));
+		slope1 = evalue[1];
+	} else {
+		ra[0] = 1.0/sqrt(sqrt(evalue[1]));
+		ra[1] = 1.0/sqrt(sqrt(evalue[0]));
+		slope1 = evalue[0];
+	}
+					/* compute unit eigenvectors */
+	if (fabs(hess2[0][1]) <= FTINY)
+		return;			/* uv OK as is */
+	slope1 = (slope1 - hess2[0][0]) / hess2[0][1];
+	xmag1 = sqrt(1.0/(1.0 + slope1*slope1));
+	for (i = 3; i--; ) {
+		b[i] = xmag1*uv[0][i] + slope1*xmag1*uv[1][i];
+		a[i] = slope1*xmag1*uv[0][i] - xmag1*uv[1][i];
+	}
+	VCOPY(uv[0], a);
+	VCOPY(uv[1], b);
 }
 
 
-divsample(dp, h, r)			/* sample a division */
-register AMBSAMP  *dp;
-AMBHEMI  *h;
-RAY  *r;
+static void
+ambHessian(				/* anisotropic radii & pos. gradient */
+	AMBHEMI	*hp,
+	FVECT	uv[2],			/* returned */
+	float	ra[2],			/* returned */
+	float	pg[2]			/* returned */
+)
 {
+	static char	memerrmsg[] = "out of memory in ambHessian()";
+	FVECT		(*hessrow)[3] = NULL;
+	FVECT		*gradrow = NULL;
+	FVECT		hessian[3];
+	FVECT		gradient;
+	FFTRI		fftr;
+	int		i, j;
+					/* be sure to assign unit vectors */
+	VCOPY(uv[0], hp->ux);
+	VCOPY(uv[1], hp->uy);
+			/* clock-wise vertex traversal from sample POV */
+	if (ra != NULL) {		/* initialize Hessian row buffer */
+		hessrow = (FVECT (*)[3])malloc(sizeof(FVECT)*3*hp->ns);
+		if (hessrow == NULL)
+			error(SYSTEM, memerrmsg);
+		memset(hessian, 0, sizeof(hessian));
+	} else if (pg == NULL)		/* bogus call? */
+		return;
+	if (pg != NULL) {		/* initialize form factor row buffer */
+		gradrow = (FVECT *)malloc(sizeof(FVECT)*hp->ns);
+		if (gradrow == NULL)
+			error(SYSTEM, memerrmsg);
+		memset(gradient, 0, sizeof(gradient));
+	}
+					/* compute first row of edges */
+	for (j = 0; j < hp->ns-1; j++) {
+		comp_fftri(&fftr, ambsamp(hp,0,j).p,
+				ambsamp(hp,0,j+1).p, hp->rp->rop);
+		if (hessrow != NULL)
+			comp_hessian(hessrow[j], &fftr, hp->rp->ron);
+		if (gradrow != NULL)
+			comp_gradient(gradrow[j], &fftr, hp->rp->ron);
+	}
+					/* sum each row of triangles */
+	for (i = 0; i < hp->ns-1; i++) {
+	    FVECT	hesscol[3];	/* compute first vertical edge */
+	    FVECT	gradcol;
+	    comp_fftri(&fftr, ambsamp(hp,i,0).p,
+			ambsamp(hp,i+1,0).p, hp->rp->rop);
+	    if (hessrow != NULL)
+		comp_hessian(hesscol, &fftr, hp->rp->ron);
+	    if (gradrow != NULL)
+		comp_gradient(gradcol, &fftr, hp->rp->ron);
+	    for (j = 0; j < hp->ns-1; j++) {
+		FVECT	hessdia[3];	/* compute triangle contributions */
+		FVECT	graddia;
+		COLORV	backg;
+		backg = back_ambval(&ambsamp(hp,i,j), &ambsamp(hp,i,j+1),
+					&ambsamp(hp,i+1,j), hp->rp->rop);
+					/* diagonal (inner) edge */
+		comp_fftri(&fftr, ambsamp(hp,i,j+1).p,
+				ambsamp(hp,i+1,j).p, hp->rp->rop);
+		if (hessrow != NULL) {
+		    comp_hessian(hessdia, &fftr, hp->rp->ron);
+		    rev_hessian(hesscol);
+		    add2hessian(hessian, hessrow[j], hessdia, hesscol, backg);
+		}
+		if (gradient != NULL) {
+		    comp_gradient(graddia, &fftr, hp->rp->ron);
+		    rev_gradient(gradcol);
+		    add2gradient(gradient, gradrow[j], graddia, gradcol, backg);
+		}
+					/* initialize edge in next row */
+		comp_fftri(&fftr, ambsamp(hp,i+1,j+1).p,
+				ambsamp(hp,i+1,j).p, hp->rp->rop);
+		if (hessrow != NULL)
+		    comp_hessian(hessrow[j], &fftr, hp->rp->ron);
+		if (gradrow != NULL)
+		    comp_gradient(gradrow[j], &fftr, hp->rp->ron);
+					/* new column edge & paired triangle */
+		backg = back_ambval(&ambsamp(hp,i,j+1), &ambsamp(hp,i+1,j+1),
+					&ambsamp(hp,i+1,j), hp->rp->rop);
+		comp_fftri(&fftr, ambsamp(hp,i,j+1).p, ambsamp(hp,i+1,j+1).p,
+				hp->rp->rop);
+		if (hessrow != NULL) {
+		    comp_hessian(hesscol, &fftr, hp->rp->ron);
+		    rev_hessian(hessdia);
+		    add2hessian(hessian, hessrow[j], hessdia, hesscol, backg);
+		    if (i < hp->ns-2)
+			rev_hessian(hessrow[j]);
+		}
+		if (gradrow != NULL) {
+		    comp_gradient(gradcol, &fftr, hp->rp->ron);
+		    rev_gradient(graddia);
+		    add2gradient(gradient, gradrow[j], graddia, gradcol, backg);
+		    if (i < hp->ns-2)
+			rev_gradient(gradrow[j]);
+		}
+	    }
+	}
+					/* release row buffers */
+	if (hessrow != NULL) free(hessrow);
+	if (gradrow != NULL) free(gradrow);
+	
+	if (ra != NULL)			/* extract eigenvectors & radii */
+		eigenvectors(uv, ra, hessian);
+	if (pg != NULL) {		/* project position gradient */
+		pg[0] = DOT(gradient, uv[0]);
+		pg[1] = DOT(gradient, uv[1]);
+	}
+}
+
+
+/* Compute direction gradient from a hemispherical sampling */
+static void
+ambdirgrad(AMBHEMI *hp, FVECT uv[2], float dg[2])
+{
+	struct s_ambsamp	*ap;
+	int			n;
+
+	dg[0] = dg[1] = 0;
+	for (ap = hp->sa, n = hp->ns*hp->ns; n--; ap++) {
+		FVECT	vd;
+		double	gfact;
+					/* use vector for azimuth + 90deg */
+		VSUB(vd, ap->p, hp->rp->rop);
+					/* brightness with tangent factor */
+		gfact = ap->v[CIEY] / DOT(hp->rp->ron, vd);
+					/* sine = proj_radius/vd_length */
+		dg[0] -= DOT(uv[1], vd) * gfact ;
+		dg[1] += DOT(uv[0], vd) * gfact;
+	}
+}
+
+
+int
+doambient(				/* compute ambient component */
+	COLOR	rcol,			/* input/output color */
+	RAY	*r,
+	double	wt,
+	FVECT	uv[2],			/* returned (optional) */
+	float	ra[2],			/* returned (optional) */
+	float	pg[2],			/* returned (optional) */
+	float	dg[2]			/* returned (optional) */
+)
+{
+	int			cnt = 0;
+	FVECT			my_uv[2];
+	AMBHEMI			*hp;
+	double			d, acol[3];
+	struct s_ambsamp	*ap;
+	int			i, j;
+					/* initialize */
+	if ((hp = inithemi(rcol, r, wt)) == NULL)
+		return(0);
+	if (uv != NULL)
+		memset(uv, 0, sizeof(FVECT)*2);
+	if (ra != NULL)
+		ra[0] = ra[1] = 0.0;
+	if (pg != NULL)
+		pg[0] = pg[1] = 0.0;
+	if (dg != NULL)
+		dg[0] = dg[1] = 0.0;
+					/* sample the hemisphere */
+	acol[0] = acol[1] = acol[2] = 0.0;
+	for (i = hp->ns; i--; )
+		for (j = hp->ns; j--; )
+			if (ambsample(hp, i, j)) {
+				ap = &ambsamp(hp,i,j);
+				addcolor(acol, ap->v);
+				++cnt;
+			}
+	if (!cnt) {
+		setcolor(rcol, 0.0, 0.0, 0.0);
+		free(hp);
+		return(0);		/* no valid samples */
+	}
+	d = 1.0 / cnt;			/* final indirect irradiance/PI */
+	acol[0] *= d; acol[1] *= d; acol[2] *= d;
+	copycolor(rcol, acol);
+	if (cnt < hp->ns*hp->ns ||	/* incomplete sampling? */
+			(ra == NULL) & (pg == NULL) & (dg == NULL)) {
+		free(hp);
+		return(-1);		/* no radius or gradient calc. */
+	}
+	d = 0.01 * bright(rcol);	/* add in 1% before Hessian comp. */
+	if (d < FTINY) d = FTINY;
+	ap = hp->sa;			/* using Y channel from here on... */
+	for (i = hp->ns*hp->ns; i--; ap++)
+		colval(ap->v,CIEY) = bright(ap->v) + d;
+
+	if (uv == NULL)			/* make sure we have axis pointers */
+		uv = my_uv;
+					/* compute radii & pos. gradient */
+	ambHessian(hp, uv, ra, pg);
+	if (dg != NULL)			/* compute direction gradient */
+		ambdirgrad(hp, uv, dg);
+	if (ra != NULL) {		/* adjust/clamp radii */
+		d = sqrt(sqrt((4.0/PI)*bright(rcol)/wt));
+		if ((ra[0] *= d) > maxarad)
+			ra[0] = maxarad;
+		if ((ra[1] *= d) > 2.0*ra[0])
+			ra[1] = 2.0*ra[0];
+	}
+	free(hp);			/* clean up and return */
+	return(1);
+}
+
+
+#else /* ! NEWAMB */
+
+
+void
+inithemi(			/* initialize sampling hemisphere */
+	AMBHEMI  *hp,
+	COLOR ac,
+	RAY  *r,
+	double  wt
+)
+{
+	double	d;
+	int  i;
+					/* set number of divisions */
+	if (ambacc <= FTINY &&
+			wt > (d = 0.8*intens(ac)*r->rweight/(ambdiv*minweight)))
+		wt = d;			/* avoid ray termination */
+	hp->nt = sqrt(ambdiv * wt / PI) + 0.5;
+	i = ambacc > FTINY ? 3 : 1;	/* minimum number of samples */
+	if (hp->nt < i)
+		hp->nt = i;
+	hp->np = PI * hp->nt + 0.5;
+					/* set number of super-samples */
+	hp->ns = ambssamp * wt + 0.5;
+					/* assign coefficient */
+	copycolor(hp->acoef, ac);
+	d = 1.0/(hp->nt*hp->np);
+	scalecolor(hp->acoef, d);
+					/* make axes */
+	VCOPY(hp->uz, r->ron);
+	hp->uy[0] = hp->uy[1] = hp->uy[2] = 0.0;
+	for (i = 0; i < 3; i++)
+		if (hp->uz[i] < 0.6 && hp->uz[i] > -0.6)
+			break;
+	if (i >= 3)
+		error(CONSISTENCY, "bad ray direction in inithemi");
+	hp->uy[i] = 1.0;
+	fcross(hp->ux, hp->uy, hp->uz);
+	normalize(hp->ux);
+	fcross(hp->uy, hp->uz, hp->ux);
+}
+
+
+int
+divsample(				/* sample a division */
+	AMBSAMP  *dp,
+	AMBHEMI  *h,
+	RAY  *r
+)
+{
 	RAY  ar;
-	int  hlist[4];
+	int  hlist[3];
+	double  spt[2];
 	double  xd, yd, zd;
 	double  b2;
 	double  phi;
-	register int  i;
-
-	if (rayorigin(&ar, r, AMBIENT, 0.5) < 0)
+	int  i;
+					/* ambient coefficient for weight */
+	if (ambacc > FTINY)
+		setcolor(ar.rcoef, AVGREFL, AVGREFL, AVGREFL);
+	else
+		copycolor(ar.rcoef, h->acoef);
+	if (rayorigin(&ar, AMBIENT, r, ar.rcoef) < 0)
 		return(-1);
+	if (ambacc > FTINY) {
+		multcolor(ar.rcoef, h->acoef);
+		scalecolor(ar.rcoef, 1./AVGREFL);
+	}
 	hlist[0] = r->rno;
 	hlist[1] = dp->t;
 	hlist[2] = dp->p;
-	hlist[3] = 0;
-	zd = sqrt((dp->t+urand(urind(ilhash(hlist,4),dp->n)))/h->nt);
-	hlist[3] = 1;
-	phi = 2.0*PI * (dp->p+urand(urind(ilhash(hlist,4),dp->n)))/h->np;
-	xd = cos(phi) * zd;
-	yd = sin(phi) * zd;
+	multisamp(spt, 2, urand(ilhash(hlist,3)+dp->n));
+	zd = sqrt((dp->t + spt[0])/h->nt);
+	phi = 2.0*PI * (dp->p + spt[1])/h->np;
+	xd = tcos(phi) * zd;
+	yd = tsin(phi) * zd;
 	zd = sqrt(1.0 - zd*zd);
 	for (i = 0; i < 3; i++)
 		ar.rdir[i] =	xd*h->ux[i] +
 				yd*h->uy[i] +
 				zd*h->uz[i];
+	checknorm(ar.rdir);
 	dimlist[ndims++] = dp->t*h->np + dp->p + 90171;
 	rayvalue(&ar);
 	ndims--;
+	multcolor(ar.rcol, ar.rcoef);	/* apply coefficient */
 	addcolor(dp->v, ar.rcol);
+					/* use rt to improve gradient calc */
 	if (ar.rt > FTINY && ar.rt < FHUGE)
 		dp->r += 1.0/ar.rt;
 					/* (re)initialize error */
@@ -99,77 +656,134 @@ RAY  *r;
 }
 
 
+static int
+ambcmp(					/* decreasing order */
+	const void *p1,
+	const void *p2
+)
+{
+	const AMBSAMP	*d1 = (const AMBSAMP *)p1;
+	const AMBSAMP	*d2 = (const AMBSAMP *)p2;
+
+	if (d1->k < d2->k)
+		return(1);
+	if (d1->k > d2->k)
+		return(-1);
+	return(0);
+}
+
+
+static int
+ambnorm(				/* standard order */
+	const void *p1,
+	const void *p2
+)
+{
+	const AMBSAMP	*d1 = (const AMBSAMP *)p1;
+	const AMBSAMP	*d2 = (const AMBSAMP *)p2;
+	int	c;
+
+	if ( (c = d1->t - d2->t) )
+		return(c);
+	return(d1->p - d2->p);
+}
+
+
 double
-doambient(acol, r, pg, dg)		/* compute ambient component */
-COLOR  acol;
-RAY  *r;
-FVECT  pg, dg;
+doambient(				/* compute ambient component */
+	COLOR  rcol,
+	RAY  *r,
+	double  wt,
+	FVECT  pg,
+	FVECT  dg
+)
 {
-	double  b, d;
+	double  b, d=0;
 	AMBHEMI  hemi;
 	AMBSAMP  *div;
 	AMBSAMP  dnew;
-	register AMBSAMP  *dp;
+	double  acol[3];
+	AMBSAMP  *dp;
 	double  arad;
-	int  ndivs, ns;
-	register int  i, j;
-					/* initialize color */
-	setcolor(acol, 0.0, 0.0, 0.0);
+	int  divcnt;
+	int  i, j;
 					/* initialize hemisphere */
-	inithemi(&hemi, r);
-	ndivs = hemi.nt * hemi.np;
-	if (ndivs == 0)
+	inithemi(&hemi, rcol, r, wt);
+	divcnt = hemi.nt * hemi.np;
+					/* initialize */
+	if (pg != NULL)
+		pg[0] = pg[1] = pg[2] = 0.0;
+	if (dg != NULL)
+		dg[0] = dg[1] = dg[2] = 0.0;
+	setcolor(rcol, 0.0, 0.0, 0.0);
+	if (divcnt == 0)
 		return(0.0);
-					/* set number of super-samples */
-	ns = ambssamp * r->rweight + 0.5;
-	if (ns > 0 || pg != NULL || dg != NULL) {
-		div = (AMBSAMP *)malloc(ndivs*sizeof(AMBSAMP));
+					/* allocate super-samples */
+	if (hemi.ns > 0 || pg != NULL || dg != NULL) {
+		div = (AMBSAMP *)malloc(divcnt*sizeof(AMBSAMP));
 		if (div == NULL)
 			error(SYSTEM, "out of memory in doambient");
 	} else
 		div = NULL;
 					/* sample the divisions */
 	arad = 0.0;
+	acol[0] = acol[1] = acol[2] = 0.0;
 	if ((dp = div) == NULL)
 		dp = &dnew;
+	divcnt = 0;
 	for (i = 0; i < hemi.nt; i++)
 		for (j = 0; j < hemi.np; j++) {
 			dp->t = i; dp->p = j;
 			setcolor(dp->v, 0.0, 0.0, 0.0);
 			dp->r = 0.0;
 			dp->n = 0;
-			if (divsample(dp, &hemi, r) < 0)
-				goto oopsy;
+			if (divsample(dp, &hemi, r) < 0) {
+				if (div != NULL)
+					dp++;
+				continue;
+			}
+			arad += dp->r;
+			divcnt++;
 			if (div != NULL)
 				dp++;
-			else {
+			else
 				addcolor(acol, dp->v);
-				arad += dp->r;
-			}
 		}
-	if (ns > 0) {			/* perform super-sampling */
+	if (!divcnt) {
+		if (div != NULL)
+			free((void *)div);
+		return(0.0);		/* no samples taken */
+	}
+	if (divcnt < hemi.nt*hemi.np) {
+		pg = dg = NULL;		/* incomplete sampling */
+		hemi.ns = 0;
+	} else if (arad > FTINY && divcnt/arad < minarad) {
+		hemi.ns = 0;		/* close enough */
+	} else if (hemi.ns > 0) {	/* else perform super-sampling? */
 		comperrs(div, &hemi);			/* compute errors */
-		qsort(div, ndivs, sizeof(AMBSAMP), ambcmp);	/* sort divs */
+		qsort(div, divcnt, sizeof(AMBSAMP), ambcmp);	/* sort divs */
 						/* super-sample */
-		for (i = ns; i > 0; i--) {
-			copystruct(&dnew, div);
-			if (divsample(&dnew, &hemi, r) < 0)
-				goto oopsy;
-							/* reinsert */
-			dp = div;
-			j = ndivs < i ? ndivs : i;
+		for (i = hemi.ns; i > 0; i--) {
+			dnew = *div;
+			if (divsample(&dnew, &hemi, r) < 0) {
+				dp++;
+				continue;
+			}
+			dp = div;		/* reinsert */
+			j = divcnt < i ? divcnt : i;
 			while (--j > 0 && dnew.k < dp[1].k) {
-				copystruct(dp, dp+1);
+				*dp = *(dp+1);
 				dp++;
 			}
-			copystruct(dp, &dnew);
+			*dp = dnew;
 		}
 		if (pg != NULL || dg != NULL)	/* restore order */
-			qsort(div, ndivs, sizeof(AMBSAMP), ambnorm);
+			qsort(div, divcnt, sizeof(AMBSAMP), ambnorm);
 	}
 					/* compute returned values */
 	if (div != NULL) {
-		for (i = ndivs, dp = div; i-- > 0; dp++) {
+		arad = 0.0;		/* note: divcnt may be < nt*np */
+		for (i = hemi.nt*hemi.np, dp = div; i-- > 0; dp++) {
 			arad += dp->r;
 			if (dp->n > 1) {
 				b = 1.0/dp->n;
@@ -181,7 +795,7 @@ FVECT  pg, dg;
 		}
 		b = bright(acol);
 		if (b > FTINY) {
-			b = ndivs/b;
+			b = 1.0/b;	/* compute & normalize gradient(s) */
 			if (pg != NULL) {
 				posgradient(pg, div, &hemi);
 				for (i = 0; i < 3; i++)
@@ -192,73 +806,42 @@ FVECT  pg, dg;
 				for (i = 0; i < 3; i++)
 					dg[i] *= b;
 			}
-		} else {
-			if (pg != NULL)
-				for (i = 0; i < 3; i++)
-					pg[i] = 0.0;
-			if (dg != NULL)
-				for (i = 0; i < 3; i++)
-					dg[i] = 0.0;
 		}
-		free((char *)div);
+		free((void *)div);
 	}
-	b = 1.0/ndivs;
-	scalecolor(acol, b);
+	copycolor(rcol, acol);
 	if (arad <= FTINY)
-		arad = FHUGE;
-	else
-		arad = (ndivs+ns)/arad;
-	if (arad > maxarad)
 		arad = maxarad;
-	else if (arad < minarad)
+	else
+		arad = (divcnt+hemi.ns)/arad;
+	if (pg != NULL) {		/* reduce radius if gradient large */
+		d = DOT(pg,pg);
+		if (d*arad*arad > 1.0)
+			arad = 1.0/sqrt(d);
+	}
+	if (arad < minarad) {
 		arad = minarad;
-	arad /= sqrt(r->rweight);
-	if (pg != NULL) {		/* clip pos. gradient if too large */
-		d = 4.0*DOT(pg,pg)*arad*arad;
-		if (d > 1.0) {
-			d = 1.0/sqrt(d);
+		if (pg != NULL && d*arad*arad > 1.0) {	/* cap gradient */
+			d = 1.0/arad/sqrt(d);
 			for (i = 0; i < 3; i++)
 				pg[i] *= d;
 		}
 	}
+	if ((arad /= sqrt(wt)) > maxarad)
+		arad = maxarad;
 	return(arad);
-oopsy:
-	if (div != NULL)
-		free((char *)div);
-	return(0.0);
 }
 
 
-inithemi(hp, r)			/* initialize sampling hemisphere */
-register AMBHEMI  *hp;
-RAY  *r;
+void
+comperrs(			/* compute initial error estimates */
+	AMBSAMP  *da,	/* assumes standard ordering */
+	AMBHEMI  *hp
+)
 {
-	register int  i;
-					/* set number of divisions */
-	hp->nt = sqrt(ambdiv * r->rweight / PI) + 0.5;
-	hp->np = PI * hp->nt;
-					/* make axes */
-	VCOPY(hp->uz, r->ron);
-	hp->uy[0] = hp->uy[1] = hp->uy[2] = 0.0;
-	for (i = 0; i < 3; i++)
-		if (hp->uz[i] < 0.6 && hp->uz[i] > -0.6)
-			break;
-	if (i >= 3)
-		error(CONSISTENCY, "bad ray direction in inithemi");
-	hp->uy[i] = 1.0;
-	fcross(hp->ux, hp->uy, hp->uz);
-	normalize(hp->ux);
-	fcross(hp->uy, hp->uz, hp->ux);
-}
-
-
-comperrs(da, hp)		/* compute initial error estimates */
-AMBSAMP  *da;		/* assumes standard ordering */
-register AMBHEMI  *hp;
-{
 	double  b, b2;
 	int  i, j;
-	register AMBSAMP  *dp;
+	AMBSAMP  *dp;
 				/* sum differences from neighbors */
 	dp = da;
 	for (i = 0; i < hp->nt; i++)
@@ -302,21 +885,24 @@ register AMBHEMI  *hp;
 }
 
 
-posgradient(gv, da, hp)				/* compute position gradient */
-FVECT  gv;
-AMBSAMP  *da;			/* assumes standard ordering */
-AMBHEMI  *hp;
+void
+posgradient(					/* compute position gradient */
+	FVECT  gv,
+	AMBSAMP  *da,			/* assumes standard ordering */
+	AMBHEMI  *hp
+)
 {
-	register int  i, j;
-	double  b, d;
+	int  i, j;
+	double  nextsine, lastsine, b, d;
 	double  mag0, mag1;
 	double  phi, cosp, sinp, xd, yd;
-	register AMBSAMP  *dp;
+	AMBSAMP  *dp;
 
 	xd = yd = 0.0;
 	for (j = 0; j < hp->np; j++) {
 		dp = da + j;
 		mag0 = mag1 = 0.0;
+		lastsine = 0.0;
 		for (i = 0; i < hp->nt; i++) {
 #ifdef  DEBUG
 			if (dp->t != i || dp->p != j)
@@ -327,43 +913,47 @@ AMBHEMI  *hp;
 			if (i > 0) {
 				d = dp[-hp->np].r;
 				if (dp[0].r > d) d = dp[0].r;
-				d *= 1.0 - sqrt((double)i/hp->nt);
+							/* sin(t)*cos(t)^2 */
+				d *= lastsine * (1.0 - (double)i/hp->nt);
 				mag0 += d*(b - bright(dp[-hp->np].v));
 			}
+			nextsine = sqrt((double)(i+1)/hp->nt);
 			if (j > 0) {
 				d = dp[-1].r;
 				if (dp[0].r > d) d = dp[0].r;
-				mag1 += d*(b - bright(dp[-1].v));
+				mag1 += d * (nextsine - lastsine) *
+						(b - bright(dp[-1].v));
 			} else {
 				d = dp[hp->np-1].r;
 				if (dp[0].r > d) d = dp[0].r;
-				mag1 += d*(b - bright(dp[hp->np-1].v));
+				mag1 += d * (nextsine - lastsine) *
+						(b - bright(dp[hp->np-1].v));
 			}
 			dp += hp->np;
+			lastsine = nextsine;
 		}
-		if (hp->nt > 1) {
-			mag0 /= (double)hp->np;
-			mag1 /= (double)hp->nt;
-		}
+		mag0 *= 2.0*PI / hp->np;
 		phi = 2.0*PI * (double)j/hp->np;
-		cosp = cos(phi); sinp = sin(phi);
+		cosp = tcos(phi); sinp = tsin(phi);
 		xd += mag0*cosp - mag1*sinp;
 		yd += mag0*sinp + mag1*cosp;
 	}
 	for (i = 0; i < 3; i++)
-		gv[i] = (xd*hp->ux[i] + yd*hp->uy[i])/PI;
+		gv[i] = (xd*hp->ux[i] + yd*hp->uy[i])*(hp->nt*hp->np)/PI;
 }
 
 
-dirgradient(gv, da, hp)				/* compute direction gradient */
-FVECT  gv;
-AMBSAMP  *da;			/* assumes standard ordering */
-AMBHEMI  *hp;
+void
+dirgradient(					/* compute direction gradient */
+	FVECT  gv,
+	AMBSAMP  *da,			/* assumes standard ordering */
+	AMBHEMI  *hp
+)
 {
-	register int  i, j;
+	int  i, j;
 	double  mag;
 	double  phi, xd, yd;
-	register AMBSAMP  *dp;
+	AMBSAMP  *dp;
 
 	xd = yd = 0.0;
 	for (j = 0; j < hp->np; j++) {
@@ -375,13 +965,16 @@ AMBHEMI  *hp;
 				error(CONSISTENCY,
 					"division order in dirgradient");
 #endif
-			mag += sqrt((i+.5)/hp->nt)*bright(dp->v);
+							/* tan(t) */
+			mag += bright(dp->v)/sqrt(hp->nt/(i+.5) - 1.0);
 			dp += hp->np;
 		}
 		phi = 2.0*PI * (j+.5)/hp->np + PI/2.0;
-		xd += mag * cos(phi);
-		yd += mag * sin(phi);
+		xd += mag * tcos(phi);
+		yd += mag * tsin(phi);
 	}
 	for (i = 0; i < 3; i++)
-		gv[i] = (xd*hp->ux[i] + yd*hp->uy[i])*PI/(hp->nt*hp->np);
+		gv[i] = xd*hp->ux[i] + yd*hp->uy[i];
 }
+
+#endif	/* ! NEWAMB */