--- ray/src/cv/bsdfmesh.c	2012/11/08 23:32:30	2.5
+++ ray/src/cv/bsdfmesh.c	2014/02/18 16:06:51	2.15
@@ -1,5 +1,5 @@
 #ifndef lint
-static const char RCSid[] = "$Id: bsdfmesh.c,v 2.5 2012/11/08 23:32:30 greg Exp $";
+static const char RCSid[] = "$Id: bsdfmesh.c,v 2.15 2014/02/18 16:06:51 greg Exp $";
 #endif
 /*
  * Create BSDF advection mesh from radial basis functions.
@@ -27,6 +27,7 @@ typedef struct {
 	int		nrows, ncols;	/* array size (matches migration) */
 	float		*price;		/* migration prices */
 	short		*sord;		/* sort for each row, low to high */
+	float		*prow;		/* current price row */
 } PRICEMAT;			/* sorted pricing matrix */
 
 #define	pricerow(p,i)	((p)->price + (i)*(p)->ncols)
@@ -119,6 +120,7 @@ run_subprocess(void)
 		if (pid < 0) {
 			fprintf(stderr, "%s: cannot fork subprocess\n",
 					progname);
+			await_children(nchild);
 			exit(1);
 		}
 		++nchild;			/* subprocess started */
@@ -138,9 +140,8 @@ static int
 msrt_cmp(void *b, const void *p1, const void *p2)
 {
 	PRICEMAT	*pm = (PRICEMAT *)b;
-	int		ri = ((const short *)p1 - pm->sord) / pm->ncols;
-	float		c1 = pricerow(pm,ri)[*(const short *)p1];
-	float		c2 = pricerow(pm,ri)[*(const short *)p2];
+	float		c1 = pm->prow[*(const short *)p1];
+	float		c2 = pm->prow[*(const short *)p2];
 
 	if (c1 > c2) return(1);
 	if (c1 < c2) return(-1);
@@ -170,13 +171,20 @@ price_routes(PRICEMAT *pm, const RBFNODE *from_rbf, co
 	for (i = from_rbf->nrbf; i--; ) {
 	    const double	from_ang = R2ANG(from_rbf->rbfa[i].crad);
 	    FVECT		vfrom;
+	    short		*srow;
 	    ovec_from_pos(vfrom, from_rbf->rbfa[i].gx, from_rbf->rbfa[i].gy);
+	    pm->prow = pricerow(pm,i);
+	    srow = psortrow(pm,i);
 	    for (j = to_rbf->nrbf; j--; ) {
-		pricerow(pm,i)[j] = acos(DOT(vfrom, vto[j])) +
-				fabs(R2ANG(to_rbf->rbfa[j].crad) - from_ang);
-		psortrow(pm,i)[j] = j;
+		double		d;		/* quadratic cost function */
+		d = DOT(vfrom, vto[j]);
+		d = (d >= 1.) ? .0 : acos(d);
+		pm->prow[j] = d*d;
+		d = R2ANG(to_rbf->rbfa[j].crad) - from_ang;
+		pm->prow[j] += d*d;	
+		srow[j] = j;
 	    }
-	    qsort_r(psortrow(pm,i), pm->ncols, sizeof(short), pm, &msrt_cmp);
+	    qsort_r(srow, pm->ncols, sizeof(short), pm, &msrt_cmp);
 	}
 	free(vto);
 }
@@ -193,75 +201,112 @@ free_routes(PRICEMAT *pm)
 static double
 min_cost(double amt2move, const double *avail, const PRICEMAT *pm, int s)
 {
+	const short	*srow = psortrow(pm,s);
+	const float	*prow = pricerow(pm,s);
 	double		total_cost = 0;
 	int		j;
-
-	if (amt2move <= FTINY)			/* pre-emptive check */
-		return(.0);
 						/* move cheapest first */
-	for (j = 0; j < pm->ncols && amt2move > FTINY; j++) {
-		int	d = psortrow(pm,s)[j];
+	for (j = 0; (j < pm->ncols) & (amt2move > FTINY); j++) {
+		int	d = srow[j];
 		double	amt = (amt2move < avail[d]) ? amt2move : avail[d];
 
-		total_cost += amt * pricerow(pm,s)[d];
+		total_cost += amt * prow[d];
 		amt2move -= amt;
 	}
 	return(total_cost);
 }
 
-/* Take a step in migration by choosing optimal bucket to transfer */
+/* Compare entries by moving price */
+static int
+rmovcmp(void *b, const void *p1, const void *p2)
+{
+	PRICEMAT	*pm = (PRICEMAT *)b;
+	const short	*ij1 = (const short *)p1;
+	const short	*ij2 = (const short *)p2;
+	float		price_diff;
+
+	if (ij1[1] < 0) return(ij2[1] >= 0);
+	if (ij2[1] < 0) return(-1);
+	price_diff = pricerow(pm,ij1[0])[ij1[1]] - pricerow(pm,ij2[0])[ij2[1]];
+	if (price_diff > 0) return(1);
+	if (price_diff < 0) return(-1);
+	return(0);
+}
+
+/* Take a step in migration by choosing reasonable bucket to transfer */
 static double
-migration_step(MIGRATION *mig, double *src_rem, double *dst_rem, const PRICEMAT *pm)
+migration_step(MIGRATION *mig, double *src_rem, double *dst_rem, PRICEMAT *pm)
 {
+	const int	max2check = 100;
 	const double	maxamt = 1./(double)pm->ncols;
-	const double	minamt = maxamt*5e-6;
+	const double	minamt = maxamt*1e-4;
 	double		*src_cost;
+	short		(*rord)[2];
 	struct {
 		int	s, d;	/* source and destination */
 		double	price;	/* price estimate per amount moved */
 		double	amt;	/* amount we can move */
 	} cur, best;
-	int		i;
+	int		r2check, i, ri;
+	/*
+	 * Check cheapest available routes only -- a higher adjusted
+	 * destination price implies that another source is closer, so
+	 * we can hold off considering more expensive options until
+	 * some other (hopefully better) moves have been made.
+	 */
+						/* most promising row order */
+	rord = (short (*)[2])malloc(sizeof(short)*2*pm->nrows);
+	if (rord == NULL)
+		goto memerr;
+	for (ri = pm->nrows; ri--; ) {
+	    rord[ri][0] = ri;
+	    rord[ri][1] = -1;
+	    if (src_rem[ri] <= minamt)		/* enough source material? */
+		    continue;
+	    for (i = 0; i < pm->ncols; i++)
+		if (dst_rem[ rord[ri][1] = psortrow(pm,ri)[i] ] > minamt)
+			break;
+	    if (i >= pm->ncols) {		/* moved all we can? */
+		free(rord);
+		return(.0);
+	    }
+	}
+	if (pm->nrows > max2check)		/* sort if too many sources */
+		qsort_r(rord, pm->nrows, sizeof(short)*2, pm, &rmovcmp);
 						/* allocate cost array */
 	src_cost = (double *)malloc(sizeof(double)*pm->nrows);
-	if (src_cost == NULL) {
-		fprintf(stderr, "%s: Out of memory in migration_step()\n",
-				progname);
-		exit(1);
-	}
+	if (src_cost == NULL)
+		goto memerr;
 	for (i = pm->nrows; i--; )		/* starting costs for diff. */
 		src_cost[i] = min_cost(src_rem[i], dst_rem, pm, i);
-
 						/* find best source & dest. */
 	best.s = best.d = -1; best.price = FHUGE; best.amt = 0;
-	for (cur.s = pm->nrows; cur.s--; ) {
+	if ((r2check = pm->nrows) > max2check)
+		r2check = max2check;		/* put a limit on search */
+	for (ri = 0; ri < r2check; ri++) {	/* check each source row */
 	    double	cost_others = 0;
-
-	    if (src_rem[cur.s] <= minamt)
-		    continue;
-						/* examine cheapest dest. */
-	    for (i = 0; i < pm->ncols; i++)
-		if (dst_rem[ cur.d = psortrow(pm,cur.s)[i] ] > minamt)
-			break;
-	    if (i >= pm->ncols)
-		break;
-	    if ((cur.price = pricerow(pm,cur.s)[cur.d]) >= best.price)
-		continue;			/* no point checking further */
+	    cur.s = rord[ri][0];
+	    if ((cur.d = rord[ri][1]) < 0 ||
+			(cur.price = pricerow(pm,cur.s)[cur.d]) >= best.price) {
+		if (pm->nrows > max2check) break;	/* sorted end */
+		continue;			/* else skip this one */
+	    }
 	    cur.amt = (src_rem[cur.s] < dst_rem[cur.d]) ?
 				src_rem[cur.s] : dst_rem[cur.d];
-	    if (cur.amt > maxamt) cur.amt = maxamt;
-	    dst_rem[cur.d] -= cur.amt;		/* add up differential costs */
+						/* don't just leave smidgen */
+	    if (cur.amt > maxamt*1.02) cur.amt = maxamt;
+	    dst_rem[cur.d] -= cur.amt;		/* add up opportunity costs */
 	    for (i = pm->nrows; i--; )
 		if (i != cur.s)
-			cost_others += min_cost(src_rem[i], dst_rem, pm, i)
+		    cost_others += min_cost(src_rem[i], dst_rem, pm, i)
 					- src_cost[i];
 	    dst_rem[cur.d] += cur.amt;		/* undo trial move */
 	    cur.price += cost_others/cur.amt;	/* adjust effective price */
 	    if (cur.price < best.price)		/* are we better than best? */
-		    best = cur;
+		best = cur;
 	}
-	free(src_cost);				/* finish up */
-
+	free(src_cost);				/* clean up */
+	free(rord);
 	if ((best.s < 0) | (best.d < 0))	/* nothing left to move? */
 		return(.0);
 						/* else make the actual move */
@@ -269,23 +314,11 @@ migration_step(MIGRATION *mig, double *src_rem, double
 	src_rem[best.s] -= best.amt;
 	dst_rem[best.d] -= best.amt;
 	return(best.amt);
+memerr:
+	fprintf(stderr, "%s: Out of memory in migration_step()\n", progname);
+	exit(1);
 }
 
-#ifdef DEBUG
-static char *
-thetaphi(const FVECT v)
-{
-	static char	buf[128];
-	double		theta, phi;
-
-	theta = 180./M_PI*acos(v[2]);
-	phi = 180./M_PI*atan2(v[1],v[0]);
-	sprintf(buf, "(%.0f,%.0f)", theta, phi);
-
-	return(buf);
-}
-#endif
-
 /* Compute and insert migration along directed edge (may fork child) */
 static MIGRATION *
 create_migration(RBFNODE *from_rbf, RBFNODE *to_rbf)
@@ -295,13 +328,22 @@ create_migration(RBFNODE *from_rbf, RBFNODE *to_rbf)
 	MIGRATION	*newmig;
 	double		*src_rem, *dst_rem;
 	double		total_rem = 1., move_amt;
-	int		i;
+	int		i, j;
 						/* check if exists already */
 	for (newmig = from_rbf->ejl; newmig != NULL;
 			newmig = nextedge(from_rbf,newmig))
 		if (newmig->rbfv[1] == to_rbf)
 			return(NULL);
 						/* else allocate */
+#ifdef DEBUG
+	fprintf(stderr, "Building path from (theta,phi) (%.1f,%.1f) ",
+			get_theta180(from_rbf->invec),
+			get_phi360(from_rbf->invec));
+	fprintf(stderr, "to (%.1f,%.1f) with %d x %d matrix\n",
+			get_theta180(to_rbf->invec),
+			get_phi360(to_rbf->invec), 
+			from_rbf->nrbf, to_rbf->nrbf);
+#endif
 	newmig = new_migration(from_rbf, to_rbf);
 	if (run_subprocess())
 		return(newmig);			/* child continues */
@@ -313,38 +355,24 @@ create_migration(RBFNODE *from_rbf, RBFNODE *to_rbf)
 				progname);
 		exit(1);
 	}
-#ifdef DEBUG
-	fprintf(stderr, "Building path from (theta,phi) %s ",
-			thetaphi(from_rbf->invec));
-	fprintf(stderr, "to %s with %d x %d matrix\n",
-			thetaphi(to_rbf->invec), 
-			from_rbf->nrbf, to_rbf->nrbf);
-#endif
 						/* starting quantities */
 	memset(newmig->mtx, 0, sizeof(float)*from_rbf->nrbf*to_rbf->nrbf);
 	for (i = from_rbf->nrbf; i--; )
 		src_rem[i] = rbf_volume(&from_rbf->rbfa[i]) / from_rbf->vtotal;
-	for (i = to_rbf->nrbf; i--; )
-		dst_rem[i] = rbf_volume(&to_rbf->rbfa[i]) / to_rbf->vtotal;
+	for (j = to_rbf->nrbf; j--; )
+		dst_rem[j] = rbf_volume(&to_rbf->rbfa[j]) / to_rbf->vtotal;
+
 	do {					/* move a bit at a time */
 		move_amt = migration_step(newmig, src_rem, dst_rem, &pmtx);
 		total_rem -= move_amt;
-#ifdef DEBUG
-		if (!nchild)
-			fprintf(stderr, "\r%.9f remaining...", total_rem);
-#endif
 	} while ((total_rem > end_thresh) & (move_amt > 0));
-#ifdef DEBUG
-	if (!nchild) fputs("done.\n", stderr);
-	else fprintf(stderr, "finished with %.9f remaining\n", total_rem);
-#endif
+
 	for (i = from_rbf->nrbf; i--; ) {	/* normalize final matrix */
-	    float	nf = rbf_volume(&from_rbf->rbfa[i]);
-	    int		j;
+	    double	nf = rbf_volume(&from_rbf->rbfa[i]);
 	    if (nf <= FTINY) continue;
 	    nf = from_rbf->vtotal / nf;
 	    for (j = to_rbf->nrbf; j--; )
-		mtx_coef(newmig,i,j) *= nf;
+		mtx_coef(newmig,i,j) *= nf;	/* row now sums to 1.0 */
 	}
 	end_subprocess();			/* exit here if subprocess */
 	free_routes(&pmtx);			/* free working arrays */
@@ -379,7 +407,7 @@ overlaps_tri(const RBFNODE *bv0, const RBFNODE *bv1, c
 	return(vother[im_rev] != NULL);
 }
 
-/* Find context hull vertex to complete triangle (oriented call) */
+/* Find convex hull vertex to complete triangle (oriented call) */
 static RBFNODE *
 find_chull_vert(const RBFNODE *rbf0, const RBFNODE *rbf1)
 {
@@ -400,7 +428,7 @@ find_chull_vert(const RBFNODE *rbf0, const RBFNODE *rb
 		if (DOT(vp, vmid) <= FTINY)
 			continue;		/* wrong orientation */
 		area2 = .25*DOT(vp,vp);
-		VSUB(vp, rbf->invec, rbf0->invec);
+		VSUB(vp, rbf->invec, vmid);
 		dprod = -DOT(vp, vejn);
 		VSUM(vp, vp, vejn, dprod);	/* above guarantees non-zero */
 		dprod = DOT(vp, vmid) / VLEN(vp);
@@ -456,6 +484,114 @@ mesh_from_edge(MIGRATION *edge)
 		}
 	}
 }
+
+/* Add normal direction if missing */
+static void
+check_normal_incidence(void)
+{
+	const int	saved_nprocs = nprocs;
+	RBFNODE		*near_rbf, *mir_rbf, *rbf;
+	double		bestd;
+	int		n, i, j;
+
+	if (dsf_list == NULL)
+		return;				/* XXX should be error? */
+	near_rbf = dsf_list;
+	bestd = input_orient*near_rbf->invec[2];
+	if (single_plane_incident) {		/* ordered plane incidence? */
+		if (bestd >= 1.-2.*FTINY)
+			return;			/* already have normal */
+	} else {
+		switch (inp_coverage) {
+		case INP_QUAD1:
+		case INP_QUAD2:
+		case INP_QUAD3:
+		case INP_QUAD4:
+			break;			/* quadrilateral symmetry? */
+		default:
+			return;			/* else we can interpolate */
+		}
+		for (rbf = near_rbf->next; rbf != NULL; rbf = rbf->next) {
+			const double	d = input_orient*rbf->invec[2];
+			if (d >= 1.-2.*FTINY)
+				return;		/* seems we have normal */
+			if (d > bestd) {
+				near_rbf = rbf;
+				bestd = d;
+			}
+		}
+	}
+	if (mig_list != NULL) {			/* need to be called first */
+		fprintf(stderr, "%s: Late call to check_normal_incidence()\n",
+				progname);
+		exit(1);
+	}
+#ifdef DEBUG
+	fprintf(stderr, "Interpolating normal incidence by mirroring (%.1f,%.1f)\n",
+			get_theta180(near_rbf->invec), get_phi360(near_rbf->invec));
+#endif
+						/* mirror nearest incidence */
+	n = sizeof(RBFNODE) + sizeof(RBFVAL)*(near_rbf->nrbf-1);
+	mir_rbf = (RBFNODE *)malloc(n);
+	if (mir_rbf == NULL)
+		goto memerr;
+	memcpy(mir_rbf, near_rbf, n);
+	mir_rbf->ord = near_rbf->ord - 1;	/* not used, I think */
+	mir_rbf->next = NULL;
+	rev_rbf_symmetry(mir_rbf, MIRROR_X|MIRROR_Y);
+	nprocs = 1;				/* compute migration matrix */
+	if (mig_list != create_migration(mir_rbf, near_rbf))
+		exit(1);			/* XXX should never happen! */
+	n = 0;					/* count migrating particles */
+	for (i = 0; i < mtx_nrows(mig_list); i++)
+	    for (j = 0; j < mtx_ncols(mig_list); j++)
+		n += (mtx_coef(mig_list,i,j) > FTINY);
+	rbf = (RBFNODE *)malloc(sizeof(RBFNODE) + sizeof(RBFVAL)*(n-1));
+	if (rbf == NULL)
+		goto memerr;
+	rbf->next = NULL; rbf->ejl = NULL;
+	rbf->invec[0] = rbf->invec[1] = 0; rbf->invec[2] = 1.;
+	rbf->nrbf = n;
+	rbf->vtotal = .5 + .5*mig_list->rbfv[1]->vtotal/mig_list->rbfv[0]->vtotal;
+	n = 0;					/* advect RBF lobes halfway */
+	for (i = 0; i < mtx_nrows(mig_list); i++) {
+	    const RBFVAL	*rbf0i = &mig_list->rbfv[0]->rbfa[i];
+	    const float		peak0 = rbf0i->peak;
+	    const double	rad0 = R2ANG(rbf0i->crad);
+	    FVECT		v0;
+	    float		mv;
+	    ovec_from_pos(v0, rbf0i->gx, rbf0i->gy);
+	    for (j = 0; j < mtx_ncols(mig_list); j++)
+		if ((mv = mtx_coef(mig_list,i,j)) > FTINY) {
+			const RBFVAL	*rbf1j = &mig_list->rbfv[1]->rbfa[j];
+			double		rad2;
+			FVECT		v;
+			int		pos[2];
+			rad2 = R2ANG(rbf1j->crad);
+			rad2 = .5*(rad0*rad0 + rad2*rad2);
+			rbf->rbfa[n].peak = peak0 * mv * rbf->vtotal *
+						rad0*rad0/rad2;
+			rbf->rbfa[n].crad = ANG2R(sqrt(rad2));
+			ovec_from_pos(v, rbf1j->gx, rbf1j->gy);
+			geodesic(v, v0, v, .5, GEOD_REL);
+			pos_from_vec(pos, v);
+			rbf->rbfa[n].gx = pos[0];
+			rbf->rbfa[n].gy = pos[1];
+			++n;
+		}
+	}
+	rbf->vtotal *= mig_list->rbfv[0]->vtotal;
+	nprocs = saved_nprocs;			/* final clean-up */
+	free(mir_rbf);
+	free(mig_list);
+	mig_list = near_rbf->ejl = NULL;
+	insert_dsf(rbf);			/* insert interpolated normal */
+	return;
+memerr:
+	fprintf(stderr, "%s: Out of memory in check_normal_incidence()\n",
+				progname);
+	exit(1);
+}
 	
 /* Build our triangle mesh from recorded RBFs */
 void
@@ -464,6 +600,8 @@ build_mesh(void)
 	double		best2 = M_PI*M_PI;
 	RBFNODE		*shrt_edj[2];
 	RBFNODE		*rbf0, *rbf1;
+						/* add normal if needed */
+	check_normal_incidence();
 						/* check if isotropic */
 	if (single_plane_incident) {
 		for (rbf0 = dsf_list; rbf0 != NULL; rbf0 = rbf0->next)