--- ray/src/rt/ambcomp.c	2014/04/24 17:36:43	2.32
+++ ray/src/rt/ambcomp.c	2014/05/11 19:03:37	2.58
@@ -1,5 +1,5 @@
 #ifndef lint
-static const char	RCSid[] = "$Id: ambcomp.c,v 2.32 2014/04/24 17:36:43 greg Exp $";
+static const char	RCSid[] = "$Id: ambcomp.c,v 2.58 2014/05/11 19:03:37 greg Exp $";
 #endif
 /*
  * Routines to compute "ambient" values using Monte Carlo
@@ -8,6 +8,10 @@ static const char	RCSid[] = "$Id: ambcomp.c,v 2.32 201
  *	for Irradiance Caching" by Schwarzhaupt, Wann Jensen, & Jarosz
  *	from ACM SIGGRAPH Asia 2012 conference proceedings.
  *
+ *  Added book-keeping optimization to avoid calculations that would
+ *	cancel due to traversal both directions on edges that are adjacent
+ *	to same-valued triangles.  This cuts about half of Hessian math.
+ *
  *  Declarations of external symbols in ambient.h
  */
 
@@ -22,21 +26,25 @@ static const char	RCSid[] = "$Id: ambcomp.c,v 2.32 201
 extern void		SDsquare2disk(double ds[2], double seedx, double seedy);
 
 typedef struct {
+	COLOR	v;		/* hemisphere sample value */
+	float	d;		/* reciprocal distance (1/rt) */
+	FVECT	p;		/* intersection point */
+} AMBSAMP;		/* sample value */
+
+typedef struct {
 	RAY	*rp;		/* originating ray sample */
 	FVECT	ux, uy;		/* tangent axis unit vectors */
 	int	ns;		/* number of samples per axis */
 	COLOR	acoef;		/* division contribution coefficient */
-	struct s_ambsamp {
-		COLOR	v;		/* hemisphere sample value */
-		FVECT	p;		/* intersection point */
-	} sa[1];		/* sample array (extends struct) */
+	AMBSAMP	sa[1];		/* sample array (extends struct) */
 }  AMBHEMI;		/* ambient sample hemisphere */
 
-#define ambsamp(h,i,j)	(h)->sa[(i)*(h)->ns + (j)]
+#define AI(h,i,j)	((i)*(h)->ns + (j))
+#define ambsam(h,i,j)	(h)->sa[AI(h,i,j)]
 
 typedef struct {
-	FVECT	r_i, r_i1, e_i, rI2_eJ2;
-	double	nf, I1, I2;
+	FVECT	r_i, r_i1, e_i, rcp, rI2_eJ2;
+	double	I1, I2;
 } FFTRI;		/* vectors and coefficients for Hessian calculation */
 
 
@@ -59,8 +67,7 @@ inithemi(			/* initialize sampling hemisphere */
 	if (n < i)
 		n = i;
 					/* allocate sampling array */
-	hp = (AMBHEMI *)malloc(sizeof(AMBHEMI) +
-				sizeof(struct s_ambsamp)*(n*n - 1));
+	hp = (AMBHEMI *)malloc(sizeof(AMBHEMI) + sizeof(AMBSAMP)*(n*n - 1));
 	if (hp == NULL)
 		return(NULL);
 	hp->rp = r;
@@ -70,14 +77,14 @@ inithemi(			/* initialize sampling hemisphere */
 	d = 1.0/(n*n);
 	scalecolor(hp->acoef, d);
 					/* make tangent plane axes */
-	hp->uy[0] = 0.1 - 0.2*frandom();
-	hp->uy[1] = 0.1 - 0.2*frandom();
-	hp->uy[2] = 0.1 - 0.2*frandom();
-	for (i = 0; i < 3; i++)
-		if (r->ron[i] < 0.6 && r->ron[i] > -0.6)
+	hp->uy[0] = 0.5 - frandom();
+	hp->uy[1] = 0.5 - frandom();
+	hp->uy[2] = 0.5 - frandom();
+	for (i = 3; i--; )
+		if ((-0.6 < r->ron[i]) & (r->ron[i] < 0.6))
 			break;
-	if (i >= 3)
-		error(CONSISTENCY, "bad ray direction in inithemi()");
+	if (i < 0)
+		error(CONSISTENCY, "bad ray direction in inithemi");
 	hp->uy[i] = 1.0;
 	VCROSS(hp->ux, hp->uy, r->ron);
 	normalize(hp->ux);
@@ -87,79 +94,206 @@ inithemi(			/* initialize sampling hemisphere */
 }
 
 
-static struct s_ambsamp *
-ambsample(				/* sample an ambient direction */
-	AMBHEMI	*hp,
-	int	i,
-	int	j
-)
+/* Sample ambient division and apply weighting coefficient */
+static int
+getambsamp(RAY *arp, AMBHEMI *hp, int i, int j, int n)
 {
-	struct s_ambsamp	*ap = &ambsamp(hp,i,j);
-	RAY			ar;
-	double			spt[2], zd;
-	int			ii;
+	int	hlist[3], ii;
+	double	spt[2], zd;
 					/* ambient coefficient for weight */
 	if (ambacc > FTINY)
-		setcolor(ar.rcoef, AVGREFL, AVGREFL, AVGREFL);
+		setcolor(arp->rcoef, AVGREFL, AVGREFL, AVGREFL);
 	else
-		copycolor(ar.rcoef, hp->acoef);
-	if (rayorigin(&ar, AMBIENT, hp->rp, ar.rcoef) < 0)
-		goto badsample;
+		copycolor(arp->rcoef, hp->acoef);
+	if (rayorigin(arp, AMBIENT, hp->rp, arp->rcoef) < 0)
+		return(0);
 	if (ambacc > FTINY) {
-		multcolor(ar.rcoef, hp->acoef);
-		scalecolor(ar.rcoef, 1./AVGREFL);
+		multcolor(arp->rcoef, hp->acoef);
+		scalecolor(arp->rcoef, 1./AVGREFL);
 	}
-					/* generate hemispherical sample */
-	SDsquare2disk(spt,	(i+.1+.8*frandom())/hp->ns,
-				(j+.1+.8*frandom())/hp->ns );
+	hlist[0] = hp->rp->rno;
+	hlist[1] = j;
+	hlist[2] = i;
+	multisamp(spt, 2, urand(ilhash(hlist,3)+n));
+	if (!n) {			/* avoid border samples for n==0 */
+		if ((spt[0] < 0.1) | (spt[0] >= 0.9))
+			spt[0] = 0.1 + 0.8*frandom();
+		if ((spt[1] < 0.1) | (spt[1] >= 0.9))
+			spt[1] = 0.1 + 0.8*frandom();
+	}
+	SDsquare2disk(spt, (j+spt[1])/hp->ns, (i+spt[0])/hp->ns);
 	zd = sqrt(1. - spt[0]*spt[0] - spt[1]*spt[1]);
 	for (ii = 3; ii--; )
-		ar.rdir[ii] =	spt[0]*hp->ux[ii] +
+		arp->rdir[ii] =	spt[0]*hp->ux[ii] +
 				spt[1]*hp->uy[ii] +
 				zd*hp->rp->ron[ii];
-	checknorm(ar.rdir);
-	dimlist[ndims++] = i*hp->ns + j + 90171;
-	rayvalue(&ar);			/* evaluate ray */
-	ndims--;
-	if (ar.rt > 20.0*maxarad)	/* limit vertex distance */
-		ar.rt = 20.0*maxarad;
-	else if (ar.rt <= FTINY)	/* should never happen! */
-		goto badsample;
+	checknorm(arp->rdir);
+	dimlist[ndims++] = AI(hp,i,j) + 90171;
+	rayvalue(arp);			/* evaluate ray */
+	ndims--;			/* apply coefficient */
+	multcolor(arp->rcol, arp->rcoef);
+	return(1);
+}
+
+
+static AMBSAMP *
+ambsample(				/* initial ambient division sample */
+	AMBHEMI	*hp,
+	int	i,
+	int	j
+)
+{
+	AMBSAMP	*ap = &ambsam(hp,i,j);
+	RAY	ar;
+					/* generate hemispherical sample */
+	if (!getambsamp(&ar, hp, i, j, 0) || ar.rt <= FTINY) {
+		memset(ap, 0, sizeof(AMBSAMP));
+		return(NULL);
+	}
+	ap->d = 1.0/ar.rt;		/* limit vertex distance */
+	if (ar.rt > 10.0*thescene.cusize)
+		ar.rt = 10.0*thescene.cusize;
 	VSUM(ap->p, ar.rorg, ar.rdir, ar.rt);
-	multcolor(ar.rcol, ar.rcoef);	/* apply coefficient */
 	copycolor(ap->v, ar.rcol);
 	return(ap);
-badsample:
-	setcolor(ap->v, 0., 0., 0.);
-	VCOPY(ap->p, hp->rp->rop);
-	return(NULL);
 }
 
 
+/* Estimate errors based on ambient division differences */
+static float *
+getambdiffs(AMBHEMI *hp)
+{
+	float	*earr = (float *)calloc(hp->ns*hp->ns, sizeof(float));
+	float	*ep;
+	AMBSAMP	*ap;
+	double	b, d2;
+	int	i, j;
+
+	if (earr == NULL)		/* out of memory? */
+		return(NULL);
+					/* compute squared neighbor diffs */
+	for (ap = hp->sa, ep = earr, i = 0; i < hp->ns; i++)
+	    for (j = 0; j < hp->ns; j++, ap++, ep++) {
+		b = bright(ap[0].v);
+		if (i) {		/* from above */
+			d2 = b - bright(ap[-hp->ns].v);
+			d2 *= d2;
+			ep[0] += d2;
+			ep[-hp->ns] += d2;
+		}
+		if (!j) continue;
+					/* from behind */
+		d2 = b - bright(ap[-1].v);
+		d2 *= d2;
+		ep[0] += d2;
+		ep[-1] += d2;
+		if (!i) continue;
+					/* diagonal */
+		d2 = b - bright(ap[-hp->ns-1].v);
+		d2 *= d2;
+		ep[0] += d2;
+		ep[-hp->ns-1] += d2;
+	    }
+					/* correct for number of neighbors */
+	earr[0] *= 8./3.;
+	earr[hp->ns-1] *= 8./3.;
+	earr[(hp->ns-1)*hp->ns] *= 8./3.;
+	earr[(hp->ns-1)*hp->ns + hp->ns-1] *= 8./3.;
+	for (i = 1; i < hp->ns-1; i++) {
+		earr[i*hp->ns] *= 8./5.;
+		earr[i*hp->ns + hp->ns-1] *= 8./5.;
+	}
+	for (j = 1; j < hp->ns-1; j++) {
+		earr[j] *= 8./5.;
+		earr[(hp->ns-1)*hp->ns + j] *= 8./5.;
+	}
+	return(earr);
+}
+
+
+/* Perform super-sampling on hemisphere (introduces bias) */
+static void
+ambsupersamp(double acol[3], AMBHEMI *hp, int cnt)
+{
+	float	*earr = getambdiffs(hp);
+	double	e2rem = 0;
+	AMBSAMP	*ap;
+	RAY	ar;
+	double	asum[3];
+	float	*ep;
+	int	i, j, n, nss;
+
+	if (earr == NULL)		/* just skip calc. if no memory */
+		return;
+					/* accumulate estimated variances */
+	for (ep = earr + hp->ns*hp->ns; ep > earr; )
+		e2rem += *--ep;
+	ep = earr;			/* perform super-sampling */
+	for (ap = hp->sa, i = 0; i < hp->ns; i++)
+	    for (j = 0; j < hp->ns; j++, ap++) {
+		if (e2rem <= FTINY)
+			goto done;	/* nothing left to do */
+		nss = *ep/e2rem*cnt + frandom();
+		asum[0] = asum[1] = asum[2] = 0.0;
+		for (n = 1; n <= nss; n++) {
+			if (!getambsamp(&ar, hp, i, j, n)) {
+				nss = n-1;
+				break;
+			}
+			addcolor(asum, ar.rcol);
+		}
+		if (nss) {		/* update returned ambient value */
+			const double	ssf = 1./(nss + 1.);
+			for (n = 3; n--; )
+				acol[n] += ssf*asum[n] +
+						(ssf - 1.)*colval(ap->v,n);
+		}
+		e2rem -= *ep++;		/* update remainders */
+		cnt -= nss;
+	}
+done:
+	free(earr);
+}
+
+
+/* Return brightness of farthest ambient sample */
+static double
+back_ambval(AMBHEMI *hp, const int n1, const int n2, const int n3)
+{
+	if (hp->sa[n1].d <= hp->sa[n2].d) {
+		if (hp->sa[n1].d <= hp->sa[n3].d)
+			return(colval(hp->sa[n1].v,CIEY));
+		return(colval(hp->sa[n3].v,CIEY));
+	}
+	if (hp->sa[n2].d <= hp->sa[n3].d)
+		return(colval(hp->sa[n2].v,CIEY));
+	return(colval(hp->sa[n3].v,CIEY));
+}
+
+
 /* Compute vectors and coefficients for Hessian/gradient calcs */
 static void
-comp_fftri(FFTRI *ftp, FVECT ap0, FVECT ap1, FVECT rop)
+comp_fftri(FFTRI *ftp, AMBHEMI *hp, const int n0, const int n1)
 {
-	FVECT	vcp;
-	double	dot_e, dot_er, rdot_r, rdot_r1, J2;
-	int	i;
+	double	rdot_cp, dot_e, dot_er, rdot_r, rdot_r1, J2;
+	int	ii;
 
-	VSUB(ftp->r_i, ap0, rop);
-	VSUB(ftp->r_i1, ap1, rop);
-	VSUB(ftp->e_i, ap1, ap0);
-	VCROSS(vcp, ftp->e_i, ftp->r_i);
-	ftp->nf = 1.0/DOT(vcp,vcp);
+	VSUB(ftp->r_i, hp->sa[n0].p, hp->rp->rop);
+	VSUB(ftp->r_i1, hp->sa[n1].p, hp->rp->rop);
+	VSUB(ftp->e_i, hp->sa[n1].p, hp->sa[n0].p);
+	VCROSS(ftp->rcp, ftp->r_i, ftp->r_i1);
+	rdot_cp = 1.0/DOT(ftp->rcp,ftp->rcp);
 	dot_e = DOT(ftp->e_i,ftp->e_i);
 	dot_er = DOT(ftp->e_i, ftp->r_i);
 	rdot_r = 1.0/DOT(ftp->r_i,ftp->r_i);
 	rdot_r1 = 1.0/DOT(ftp->r_i1,ftp->r_i1);
 	ftp->I1 = acos( DOT(ftp->r_i, ftp->r_i1) * sqrt(rdot_r*rdot_r1) ) *
-			sqrt( ftp->nf );
+			sqrt( rdot_cp );
 	ftp->I2 = ( DOT(ftp->e_i, ftp->r_i1)*rdot_r1 - dot_er*rdot_r +
-			dot_e*ftp->I1 )*0.5*ftp->nf;
+			dot_e*ftp->I1 )*0.5*rdot_cp;
 	J2 =  ( 0.5*(rdot_r - rdot_r1) - dot_er*ftp->I2 ) / dot_e;
-	for (i = 3; i--; )
-		ftp->rI2_eJ2[i] = ftp->I2*ftp->r_i[i] + J2*ftp->e_i[i];
+	for (ii = 3; ii--; )
+		ftp->rI2_eJ2[ii] = ftp->I2*ftp->r_i[ii] + J2*ftp->e_i[ii];
 }
 
 
@@ -180,7 +314,7 @@ compose_matrix(FVECT mat[3], FVECT va, FVECT vb)
 static void
 comp_hessian(FVECT hess[3], FFTRI *ftp, FVECT nrm)
 {
-	FVECT	vcp;
+	FVECT	ncp;
 	FVECT	m1[3], m2[3], m3[3], m4[3];
 	double	d1, d2, d3, d4;
 	double	I3, J3, K3;
@@ -190,18 +324,17 @@ comp_hessian(FVECT hess[3], FFTRI *ftp, FVECT nrm)
 	d2 = 1.0/DOT(ftp->r_i1,ftp->r_i1);
 	d3 = 1.0/DOT(ftp->e_i,ftp->e_i);
 	d4 = DOT(ftp->e_i, ftp->r_i);
-	I3 = 0.25*ftp->nf*( DOT(ftp->e_i, ftp->r_i1)*d2*d2 - d4*d1*d1 +
-				3.0/d3*ftp->I2 );
+	I3 = ( DOT(ftp->e_i, ftp->r_i1)*d2*d2 - d4*d1*d1 + 3.0/d3*ftp->I2 )
+			/ ( 4.0*DOT(ftp->rcp,ftp->rcp) );
 	J3 = 0.25*d3*(d1*d1 - d2*d2) - d4*d3*I3;
 	K3 = d3*(ftp->I2 - I3/d1 - 2.0*d4*J3);
 					/* intermediate matrices */
-	VCROSS(vcp, nrm, ftp->e_i);
-	compose_matrix(m1, vcp, ftp->rI2_eJ2);
+	VCROSS(ncp, nrm, ftp->e_i);
+	compose_matrix(m1, ncp, ftp->rI2_eJ2);
 	compose_matrix(m2, ftp->r_i, ftp->r_i);
 	compose_matrix(m3, ftp->e_i, ftp->e_i);
 	compose_matrix(m4, ftp->r_i, ftp->e_i);
-	VCROSS(vcp, ftp->r_i, ftp->e_i);
-	d1 = DOT(nrm, vcp);
+	d1 = DOT(nrm, ftp->rcp);
 	d2 = -d1*ftp->I2;
 	d1 *= 2.0;
 	for (i = 3; i--; )		/* final matrix sum */
@@ -209,7 +342,7 @@ comp_hessian(FVECT hess[3], FFTRI *ftp, FVECT nrm)
 		hess[i][j] = m1[i][j] + d1*( I3*m2[i][j] + K3*m3[i][j] +
 						2.0*J3*m4[i][j] );
 		hess[i][j] += d2*(i==j);
-		hess[i][j] *= 1.0/PI;
+		hess[i][j] *= -1.0/PI;
 	    }
 }
 
@@ -231,7 +364,7 @@ rev_hessian(FVECT hess[3])
 /* Add to radiometric Hessian from the given triangle */
 static void
 add2hessian(FVECT hess[3], FVECT ehess1[3],
-		FVECT ehess2[3], FVECT ehess3[3], COLORV v)
+		FVECT ehess2[3], FVECT ehess3[3], double v)
 {
 	int	i, j;
 
@@ -245,15 +378,14 @@ add2hessian(FVECT hess[3], FVECT ehess1[3],
 static void
 comp_gradient(FVECT grad, FFTRI *ftp, FVECT nrm)
 {
-	FVECT	vcp;
+	FVECT	ncp;
 	double	f1;
 	int	i;
 
-	VCROSS(vcp, ftp->r_i, ftp->r_i1);
-	f1 = 2.0*DOT(nrm, vcp);
-	VCROSS(vcp, nrm, ftp->e_i);
+	f1 = 2.0*DOT(nrm, ftp->rcp);
+	VCROSS(ncp, nrm, ftp->e_i);
 	for (i = 3; i--; )
-		grad[i] = (-0.5/PI)*( ftp->I1*vcp[i] + f1*ftp->rI2_eJ2[i] );
+		grad[i] = (0.5/PI)*( ftp->I1*ncp[i] + f1*ftp->rI2_eJ2[i] );
 }
 
 
@@ -269,7 +401,7 @@ rev_gradient(FVECT grad)
 
 /* Add to displacement gradient from the given triangle */
 static void
-add2gradient(FVECT grad, FVECT egrad1, FVECT egrad2, FVECT egrad3, COLORV v)
+add2gradient(FVECT grad, FVECT egrad1, FVECT egrad2, FVECT egrad3, double v)
 {
 	int	i;
 
@@ -278,34 +410,8 @@ add2gradient(FVECT grad, FVECT egrad1, FVECT egrad2, F
 }
 
 
-/* Return brightness of furthest ambient sample */
-static COLORV
-back_ambval(struct s_ambsamp *ap1, struct s_ambsamp *ap2,
-		struct s_ambsamp *ap3, FVECT orig)
-{
-	COLORV	vback;
-	FVECT	vec;
-	double	d2, d2best;
-
-	VSUB(vec, ap1->p, orig);
-	d2best = DOT(vec,vec);
-	vback = colval(ap1->v,CIEY);
-	VSUB(vec, ap2->p, orig);
-	d2 = DOT(vec,vec);
-	if (d2 > d2best) {
-		d2best = d2;
-		vback = colval(ap2->v,CIEY);
-	}
-	VSUB(vec, ap3->p, orig);
-	d2 = DOT(vec,vec);
-	if (d2 > d2best)
-		return(colval(ap3->v,CIEY));
-	return(vback);
-}
-
-
 /* Compute anisotropic radii and eigenvector directions */
-static int
+static void
 eigenvectors(FVECT uv[2], float ra[2], FVECT hessian[3])
 {
 	double	hess2[2][2];
@@ -321,13 +427,16 @@ eigenvectors(FVECT uv[2], float ra[2], FVECT hessian[3
 	hess2[0][1] = DOT(uv[0], b);
 	hess2[1][0] = DOT(uv[1], a);
 	hess2[1][1] = DOT(uv[1], b);
-					/* compute eigenvalues */
-	if ( quadratic(evalue, 1.0, -hess2[0][0]-hess2[1][1],
-			hess2[0][0]*hess2[1][1]-hess2[0][1]*hess2[1][0]) != 2 ||
-			(evalue[0] = fabs(evalue[0])) <= FTINY*FTINY ||
-			(evalue[1] = fabs(evalue[1])) <= FTINY*FTINY )
-		error(INTERNAL, "bad eigenvalue calculation");
-
+					/* compute eigenvalue(s) */
+	i = quadratic(evalue, 1.0, -hess2[0][0]-hess2[1][1],
+			hess2[0][0]*hess2[1][1]-hess2[0][1]*hess2[1][0]);
+	if (i == 1)			/* double-root (circle) */
+		evalue[1] = evalue[0];
+	if (!i || ((evalue[0] = fabs(evalue[0])) <= FTINY*FTINY) |
+			((evalue[1] = fabs(evalue[1])) <= FTINY*FTINY) ) {
+		ra[0] = ra[1] = maxarad;
+		return;
+	}
 	if (evalue[0] > evalue[1]) {
 		ra[0] = sqrt(sqrt(4.0/evalue[0]));
 		ra[1] = sqrt(sqrt(4.0/evalue[1]));
@@ -385,8 +494,7 @@ ambHessian(				/* anisotropic radii & pos. gradient */
 	}
 					/* compute first row of edges */
 	for (j = 0; j < hp->ns-1; j++) {
-		comp_fftri(&fftr, ambsamp(hp,0,j).p,
-				ambsamp(hp,0,j+1).p, hp->rp->rop);
+		comp_fftri(&fftr, hp, AI(hp,0,j), AI(hp,0,j+1));
 		if (hessrow != NULL)
 			comp_hessian(hessrow[j], &fftr, hp->rp->ron);
 		if (gradrow != NULL)
@@ -396,8 +504,7 @@ ambHessian(				/* anisotropic radii & pos. gradient */
 	for (i = 0; i < hp->ns-1; i++) {
 	    FVECT	hesscol[3];	/* compute first vertical edge */
 	    FVECT	gradcol;
-	    comp_fftri(&fftr, ambsamp(hp,i,0).p,
-			ambsamp(hp,i+1,0).p, hp->rp->rop);
+	    comp_fftri(&fftr, hp, AI(hp,i,0), AI(hp,i+1,0));
 	    if (hessrow != NULL)
 		comp_hessian(hesscol, &fftr, hp->rp->ron);
 	    if (gradrow != NULL)
@@ -405,34 +512,31 @@ ambHessian(				/* anisotropic radii & pos. gradient */
 	    for (j = 0; j < hp->ns-1; j++) {
 		FVECT	hessdia[3];	/* compute triangle contributions */
 		FVECT	graddia;
-		COLORV	backg;
-		backg = back_ambval(&ambsamp(hp,i,j), &ambsamp(hp,i,j+1),
-					&ambsamp(hp,i+1,j), hp->rp->rop);
+		double	backg;
+		backg = back_ambval(hp, AI(hp,i,j),
+					AI(hp,i,j+1), AI(hp,i+1,j));
 					/* diagonal (inner) edge */
-		comp_fftri(&fftr, ambsamp(hp,i,j+1).p,
-				ambsamp(hp,i+1,j).p, hp->rp->rop);
+		comp_fftri(&fftr, hp, AI(hp,i,j+1), AI(hp,i+1,j));
 		if (hessrow != NULL) {
 		    comp_hessian(hessdia, &fftr, hp->rp->ron);
 		    rev_hessian(hesscol);
 		    add2hessian(hessian, hessrow[j], hessdia, hesscol, backg);
 		}
-		if (gradient != NULL) {
+		if (gradrow != NULL) {
 		    comp_gradient(graddia, &fftr, hp->rp->ron);
 		    rev_gradient(gradcol);
 		    add2gradient(gradient, gradrow[j], graddia, gradcol, backg);
 		}
 					/* initialize edge in next row */
-		comp_fftri(&fftr, ambsamp(hp,i+1,j+1).p,
-				ambsamp(hp,i+1,j).p, hp->rp->rop);
+		comp_fftri(&fftr, hp, AI(hp,i+1,j+1), AI(hp,i+1,j));
 		if (hessrow != NULL)
 		    comp_hessian(hessrow[j], &fftr, hp->rp->ron);
 		if (gradrow != NULL)
 		    comp_gradient(gradrow[j], &fftr, hp->rp->ron);
 					/* new column edge & paired triangle */
-		backg = back_ambval(&ambsamp(hp,i,j+1), &ambsamp(hp,i+1,j+1),
-					&ambsamp(hp,i+1,j), hp->rp->rop);
-		comp_fftri(&fftr, ambsamp(hp,i,j+1).p, ambsamp(hp,i+1,j+1).p,
-				hp->rp->rop);
+		backg = back_ambval(hp, AI(hp,i+1,j+1),
+					AI(hp,i+1,j), AI(hp,i,j+1));
+		comp_fftri(&fftr, hp, AI(hp,i,j+1), AI(hp,i+1,j+1));
 		if (hessrow != NULL) {
 		    comp_hessian(hesscol, &fftr, hp->rp->ron);
 		    rev_hessian(hessdia);
@@ -466,11 +570,11 @@ ambHessian(				/* anisotropic radii & pos. gradient */
 static void
 ambdirgrad(AMBHEMI *hp, FVECT uv[2], float dg[2])
 {
-	struct s_ambsamp	*ap;
-	double			dgsum[2];
-	int			n;
-	FVECT			vd;
-	double			gfact;
+	AMBSAMP	*ap;
+	double	dgsum[2];
+	int	n;
+	FVECT	vd;
+	double	gfact;
 
 	dgsum[0] = dgsum[1] = 0.0;	/* sum values times -tan(theta) */
 	for (ap = hp->sa, n = hp->ns*hp->ns; n--; ap++) {
@@ -478,15 +582,73 @@ ambdirgrad(AMBHEMI *hp, FVECT uv[2], float dg[2])
 		VSUB(vd, ap->p, hp->rp->rop);
 					/* brightness over cosine factor */
 		gfact = colval(ap->v,CIEY) / DOT(hp->rp->ron, vd);
-					/* -sine = -proj_radius/vd_length */
-		dgsum[0] += DOT(uv[1], vd) * gfact;
-		dgsum[1] -= DOT(uv[0], vd) * gfact;
+					/* sine = proj_radius/vd_length */
+		dgsum[0] -= DOT(uv[1], vd) * gfact;
+		dgsum[1] += DOT(uv[0], vd) * gfact;
 	}
 	dg[0] = dgsum[0] / (hp->ns*hp->ns);
 	dg[1] = dgsum[1] / (hp->ns*hp->ns);
 }
 
 
+/* Compute potential light leak direction flags for cache value */
+static uint32
+ambcorral(AMBHEMI *hp, FVECT uv[2], const double r0, const double r1)
+{
+	const double	max_d = 1.0/(minarad*ambacc + 0.001);
+	const double	ang_res = 0.5*PI/(hp->ns-1);
+	const double	ang_step = ang_res/((int)(16/PI*ang_res) + (1+FTINY));
+	double		avg_d = 0;
+	uint32		flgs = 0;
+	FVECT		vec;
+	double		u, v;
+	double		ang, a1;
+	int		i, j;
+					/* don't bother for a few samples */
+	if (hp->ns < 12)
+		return(0);
+					/* check distances overhead */
+	for (i = hp->ns*3/4; i-- > hp->ns>>2; )
+	    for (j = hp->ns*3/4; j-- > hp->ns>>2; )
+		avg_d += ambsam(hp,i,j).d;
+	avg_d *= 4.0/(hp->ns*hp->ns);
+	if (avg_d*r0 >= 1.0)		/* ceiling too low for corral? */
+		return(0);
+	if (avg_d >= max_d)		/* insurance */
+		return(0);
+					/* else circle around perimeter */
+	for (i = 0; i < hp->ns; i++)
+	    for (j = 0; j < hp->ns; j += !i|(i==hp->ns-1) ? 1 : hp->ns-1) {
+		AMBSAMP	*ap = &ambsam(hp,i,j);
+		if ((ap->d <= FTINY) | (ap->d >= max_d))
+			continue;	/* too far or too near */
+		VSUB(vec, ap->p, hp->rp->rop);
+		u = DOT(vec, uv[0]) * ap->d;
+		v = DOT(vec, uv[1]) * ap->d;
+		if ((r0*r0*u*u + r1*r1*v*v) * ap->d*ap->d <= 1.0)
+			continue;	/* occluder outside ellipse */
+		ang = atan2a(v, u);	/* else set direction flags */
+		for (a1 = ang-.5*ang_res; a1 <= ang+.5*ang_res; a1 += ang_step)
+			flgs |= 1L<<(int)(16/PI*(a1 + 2.*PI*(a1 < 0)));
+	    }
+					/* add low-angle incident (< 20deg) */
+	if (fabs(hp->rp->rod) <= 0.342) {
+		u = -DOT(hp->rp->rdir, uv[0]);
+		v = -DOT(hp->rp->rdir, uv[1]);
+		if ((r0*r0*u*u + r1*r1*v*v) > hp->rp->rot*hp->rp->rot) {
+			ang = atan2a(v, u);
+			ang += 2.*PI*(ang < 0);
+			ang *= 16/PI;
+			if ((ang < .5) | (ang >= 31.5))
+				flgs |= 0x80000001;
+			else
+				flgs |= 3L<<(int)(ang-.5);
+		}
+	}
+	return(flgs);
+}
+
+
 int
 doambient(				/* compute ambient component */
 	COLOR	rcol,			/* input/output color */
@@ -495,15 +657,16 @@ doambient(				/* compute ambient component */
 	FVECT	uv[2],			/* returned (optional) */
 	float	ra[2],			/* returned (optional) */
 	float	pg[2],			/* returned (optional) */
-	float	dg[2]			/* returned (optional) */
+	float	dg[2],			/* returned (optional) */
+	uint32	*crlp			/* returned (optional) */
 )
 {
-	AMBHEMI			*hp = inithemi(rcol, r, wt);
-	int			cnt = 0;
-	FVECT			my_uv[2];
-	double			d, acol[3];
-	struct s_ambsamp	*ap;
-	int			i, j;
+	AMBHEMI	*hp = inithemi(rcol, r, wt);
+	int	cnt;
+	FVECT	my_uv[2];
+	double	d, K, acol[3];
+	AMBSAMP	*ap;
+	int	i, j;
 					/* check/initialize */
 	if (hp == NULL)
 		return(0);
@@ -515,8 +678,11 @@ doambient(				/* compute ambient component */
 		pg[0] = pg[1] = 0.0;
 	if (dg != NULL)
 		dg[0] = dg[1] = 0.0;
+	if (crlp != NULL)
+		*crlp = 0;
 					/* sample the hemisphere */
 	acol[0] = acol[1] = acol[2] = 0.0;
+	cnt = 0;
 	for (i = hp->ns; i--; )
 		for (j = hp->ns; j--; )
 			if ((ap = ambsample(hp, i, j)) != NULL) {
@@ -528,19 +694,31 @@ doambient(				/* compute ambient component */
 		free(hp);
 		return(0);		/* no valid samples */
 	}
+	if (cnt < hp->ns*hp->ns) {	/* incomplete sampling? */
+		copycolor(rcol, acol);
+		free(hp);
+		return(-1);		/* return value w/o Hessian */
+	}
+	cnt = ambssamp*wt + 0.5;	/* perform super-sampling? */
+	if (cnt > 8)
+		ambsupersamp(acol, hp, cnt);
 	copycolor(rcol, acol);		/* final indirect irradiance/PI */
-	if (cnt < hp->ns*hp->ns ||	/* incomplete sampling? */
-			(ra == NULL) & (pg == NULL) & (dg == NULL)) {
+	if ((ra == NULL) & (pg == NULL) & (dg == NULL)) {
 		free(hp);
 		return(-1);		/* no radius or gradient calc. */
 	}
-	if (bright(acol) > FTINY)	/* normalize Y values */
-		d = cnt/bright(acol);
-	else
-		d = 0.0;
+	if ((d = bright(acol)) > FTINY) {	/* normalize Y values */
+		d = 0.99*(hp->ns*hp->ns)/d;
+		K = 0.01;
+	} else {			/* or fall back on geometric Hessian */
+		K = 1.0;
+		pg = NULL;
+		dg = NULL;
+		crlp = NULL;
+	}
 	ap = hp->sa;			/* relative Y channel from here on... */
 	for (i = hp->ns*hp->ns; i--; ap++)
-		colval(ap->v,CIEY) = bright(ap->v)*d + 0.01;
+		colval(ap->v,CIEY) = bright(ap->v)*d + K;
 
 	if (uv == NULL)			/* make sure we have axis pointers */
 		uv = my_uv;
@@ -551,6 +729,14 @@ doambient(				/* compute ambient component */
 		ambdirgrad(hp, uv, dg);
 
 	if (ra != NULL) {		/* scale/clamp radii */
+		if (pg != NULL) {
+			if (ra[0]*(d = fabs(pg[0])) > 1.0)
+				ra[0] = 1.0/d;
+			if (ra[1]*(d = fabs(pg[1])) > 1.0)
+				ra[1] = 1.0/d;
+			if (ra[0] > ra[1])
+				ra[0] = ra[1];
+		}
 		if (ra[0] < minarad) {
 			ra[0] = minarad;
 			if (ra[1] < minarad)
@@ -563,6 +749,17 @@ doambient(				/* compute ambient component */
 			ra[1] = maxarad;
 			if (ra[0] > maxarad)
 				ra[0] = maxarad;
+		}
+					/* flag encroached directions */
+		if ((wt >= 0.89*AVGREFL) & (crlp != NULL))
+			*crlp = ambcorral(hp, uv, ra[0]*ambacc, ra[1]*ambacc);
+		if (pg != NULL) {	/* cap gradient if necessary */
+			d = pg[0]*pg[0]*ra[0]*ra[0] + pg[1]*pg[1]*ra[1]*ra[1];
+			if (d > 1.0) {
+				d = 1.0/sqrt(d);
+				pg[0] *= d;
+				pg[1] *= d;
+			}
 		}
 	}
 	free(hp);			/* clean up and return */