--- ray/src/cv/bsdfmesh.c	2012/10/19 04:14:29	2.1
+++ ray/src/cv/bsdfmesh.c	2013/11/01 18:23:50	2.13
@@ -1,5 +1,5 @@
 #ifndef lint
-static const char RCSid[] = "$Id: bsdfmesh.c,v 2.1 2012/10/19 04:14:29 greg Exp $";
+static const char RCSid[] = "$Id: bsdfmesh.c,v 2.13 2013/11/01 18:23:50 greg Exp $";
 #endif
 /*
  * Create BSDF advection mesh from radial basis functions.
@@ -23,168 +23,16 @@ int			nprocs = 1;
 				/* number of children (-1 in child) */
 static int		nchild = 0;
 
-/* Compute (and allocate) migration price matrix for optimization */
-static float *
-price_routes(const RBFNODE *from_rbf, const RBFNODE *to_rbf)
-{
-	float	*pmtx = (float *)malloc(sizeof(float) *
-					from_rbf->nrbf * to_rbf->nrbf);
-	FVECT	*vto = (FVECT *)malloc(sizeof(FVECT) * to_rbf->nrbf);
-	int	i, j;
+typedef struct {
+	int		nrows, ncols;	/* array size (matches migration) */
+	float		*price;		/* migration prices */
+	short		*sord;		/* sort for each row, low to high */
+	float		*prow;		/* current price row */
+} PRICEMAT;			/* sorted pricing matrix */
 
-	if ((pmtx == NULL) | (vto == NULL)) {
-		fprintf(stderr, "%s: Out of memory in migration_costs()\n",
-				progname);
-		exit(1);
-	}
-	for (j = to_rbf->nrbf; j--; )		/* save repetitive ops. */
-		ovec_from_pos(vto[j], to_rbf->rbfa[j].gx, to_rbf->rbfa[j].gy);
+#define	pricerow(p,i)	((p)->price + (i)*(p)->ncols)
+#define psortrow(p,i)	((p)->sord + (i)*(p)->ncols)
 
-	for (i = from_rbf->nrbf; i--; ) {
-	    const double	from_ang = R2ANG(from_rbf->rbfa[i].crad);
-	    FVECT		vfrom;
-	    ovec_from_pos(vfrom, from_rbf->rbfa[i].gx, from_rbf->rbfa[i].gy);
-	    for (j = to_rbf->nrbf; j--; )
-		pmtx[i*to_rbf->nrbf + j] = acos(DOT(vfrom, vto[j])) +
-				fabs(R2ANG(to_rbf->rbfa[j].crad) - from_ang);
-	}
-	free(vto);
-	return(pmtx);
-}
-
-/* Comparison routine needed for sorting price row */
-static const float	*price_arr;
-static int
-msrt_cmp(const void *p1, const void *p2)
-{
-	float	c1 = price_arr[*(const int *)p1];
-	float	c2 = price_arr[*(const int *)p2];
-
-	if (c1 > c2) return(1);
-	if (c1 < c2) return(-1);
-	return(0);
-}
-
-/* Compute minimum (optimistic) cost for moving the given source material */
-static double
-min_cost(double amt2move, const double *avail, const float *price, int n)
-{
-	static int	*price_sort = NULL;
-	static int	n_alloc = 0;
-	double		total_cost = 0;
-	int		i;
-
-	if (amt2move <= FTINY)			/* pre-emptive check */
-		return(0.);
-	if (n > n_alloc) {			/* (re)allocate sort array */
-		if (n_alloc) free(price_sort);
-		price_sort = (int *)malloc(sizeof(int)*n);
-		if (price_sort == NULL) {
-			fprintf(stderr, "%s: Out of memory in min_cost()\n",
-					progname);
-			exit(1);
-		}
-		n_alloc = n;
-	}
-	for (i = n; i--; )
-		price_sort[i] = i;
-	price_arr = price;
-	qsort(price_sort, n, sizeof(int), &msrt_cmp);
-						/* move cheapest first */
-	for (i = 0; i < n && amt2move > FTINY; i++) {
-		int	d = price_sort[i];
-		double	amt = (amt2move < avail[d]) ? amt2move : avail[d];
-
-		total_cost += amt * price[d];
-		amt2move -= amt;
-	}
-	return(total_cost);
-}
-
-/* Take a step in migration by choosing optimal bucket to transfer */
-static double
-migration_step(MIGRATION *mig, double *src_rem, double *dst_rem, const float *pmtx)
-{
-	const double	maxamt = .1;
-	const double	minamt = maxamt*.0001;
-	static double	*src_cost = NULL;
-	static int	n_alloc = 0;
-	struct {
-		int	s, d;	/* source and destination */
-		double	price;	/* price estimate per amount moved */
-		double	amt;	/* amount we can move */
-	} cur, best;
-	int		i;
-
-	if (mtx_nrows(mig) > n_alloc) {		/* allocate cost array */
-		if (n_alloc)
-			free(src_cost);
-		src_cost = (double *)malloc(sizeof(double)*mtx_nrows(mig));
-		if (src_cost == NULL) {
-			fprintf(stderr, "%s: Out of memory in migration_step()\n",
-					progname);
-			exit(1);
-		}
-		n_alloc = mtx_nrows(mig);
-	}
-	for (i = mtx_nrows(mig); i--; )		/* starting costs for diff. */
-		src_cost[i] = min_cost(src_rem[i], dst_rem,
-					pmtx+i*mtx_ncols(mig), mtx_ncols(mig));
-
-						/* find best source & dest. */
-	best.s = best.d = -1; best.price = FHUGE; best.amt = 0;
-	for (cur.s = mtx_nrows(mig); cur.s--; ) {
-	    const float	*price = pmtx + cur.s*mtx_ncols(mig);
-	    double	cost_others = 0;
-	    if (src_rem[cur.s] < minamt)
-		    continue;
-	    cur.d = -1;				/* examine cheapest dest. */
-	    for (i = mtx_ncols(mig); i--; )
-		if (dst_rem[i] > minamt &&
-				(cur.d < 0 || price[i] < price[cur.d]))
-			cur.d = i;
-	    if (cur.d < 0)
-		    return(.0);
-	    if ((cur.price = price[cur.d]) >= best.price)
-		    continue;			/* no point checking further */
-	    cur.amt = (src_rem[cur.s] < dst_rem[cur.d]) ?
-				src_rem[cur.s] : dst_rem[cur.d];
-	    if (cur.amt > maxamt) cur.amt = maxamt;
-	    dst_rem[cur.d] -= cur.amt;		/* add up differential costs */
-	    for (i = mtx_nrows(mig); i--; )
-		if (i != cur.s)
-			cost_others += min_cost(src_rem[i], dst_rem,
-						price, mtx_ncols(mig))
-					- src_cost[i];
-	    dst_rem[cur.d] += cur.amt;		/* undo trial move */
-	    cur.price += cost_others/cur.amt;	/* adjust effective price */
-	    if (cur.price < best.price)		/* are we better than best? */
-		    best = cur;
-	}
-	if ((best.s < 0) | (best.d < 0))
-		return(.0);
-						/* make the actual move */
-	mig->mtx[mtx_ndx(mig,best.s,best.d)] += best.amt;
-	src_rem[best.s] -= best.amt;
-	dst_rem[best.d] -= best.amt;
-	return(best.amt);
-}
-
-#ifdef DEBUG
-static char *
-thetaphi(const FVECT v)
-{
-	static char	buf[128];
-	double		theta, phi;
-
-	theta = 180./M_PI*acos(v[2]);
-	phi = 180./M_PI*atan2(v[1],v[0]);
-	sprintf(buf, "(%.0f,%.0f)", theta, phi);
-
-	return(buf);
-}
-#endif
-
 /* Create a new migration holder (sharing memory for multiprocessing) */
 static MIGRATION *
 new_migration(RBFNODE *from_rbf, RBFNODE *to_rbf)
@@ -272,6 +120,7 @@ run_subprocess(void)
 		if (pid < 0) {
 			fprintf(stderr, "%s: cannot fork subprocess\n",
 					progname);
+			await_children(nchild);
 			exit(1);
 		}
 		++nchild;			/* subprocess started */
@@ -286,28 +135,219 @@ run_subprocess(void)
 
 #endif	/* ! _WIN32 */
 
+/* Comparison routine needed for sorting price row */
+static int
+msrt_cmp(void *b, const void *p1, const void *p2)
+{
+	PRICEMAT	*pm = (PRICEMAT *)b;
+	float		c1 = pm->prow[*(const short *)p1];
+	float		c2 = pm->prow[*(const short *)p2];
+
+	if (c1 > c2) return(1);
+	if (c1 < c2) return(-1);
+	return(0);
+}
+
+/* Compute (and allocate) migration price matrix for optimization */
+static void
+price_routes(PRICEMAT *pm, const RBFNODE *from_rbf, const RBFNODE *to_rbf)
+{
+	FVECT	*vto = (FVECT *)malloc(sizeof(FVECT) * to_rbf->nrbf);
+	int	i, j;
+
+	pm->nrows = from_rbf->nrbf;
+	pm->ncols = to_rbf->nrbf;
+	pm->price = (float *)malloc(sizeof(float) * pm->nrows*pm->ncols);
+	pm->sord = (short *)malloc(sizeof(short) * pm->nrows*pm->ncols);
+	
+	if ((pm->price == NULL) | (pm->sord == NULL) | (vto == NULL)) {
+		fprintf(stderr, "%s: Out of memory in migration_costs()\n",
+				progname);
+		exit(1);
+	}
+	for (j = to_rbf->nrbf; j--; )		/* save repetitive ops. */
+		ovec_from_pos(vto[j], to_rbf->rbfa[j].gx, to_rbf->rbfa[j].gy);
+
+	for (i = from_rbf->nrbf; i--; ) {
+	    const double	from_ang = R2ANG(from_rbf->rbfa[i].crad);
+	    FVECT		vfrom;
+	    short		*srow;
+	    ovec_from_pos(vfrom, from_rbf->rbfa[i].gx, from_rbf->rbfa[i].gy);
+	    pm->prow = pricerow(pm,i);
+	    srow = psortrow(pm,i);
+	    for (j = to_rbf->nrbf; j--; ) {
+		double		d;		/* quadratic cost function */
+		d = DOT(vfrom, vto[j]);
+		d = (d >= 1.) ? .0 : acos(d);
+		pm->prow[j] = d*d;
+		d = R2ANG(to_rbf->rbfa[j].crad) - from_ang;
+		pm->prow[j] += d*d;	
+		srow[j] = j;
+	    }
+	    qsort_r(srow, pm->ncols, sizeof(short), pm, &msrt_cmp);
+	}
+	free(vto);
+}
+
+/* Free price matrix */
+static void
+free_routes(PRICEMAT *pm)
+{
+	free(pm->price); pm->price = NULL;
+	free(pm->sord); pm->sord = NULL;
+}
+
+/* Compute minimum (optimistic) cost for moving the given source material */
+static double
+min_cost(double amt2move, const double *avail, const PRICEMAT *pm, int s)
+{
+	const short	*srow = psortrow(pm,s);
+	const float	*prow = pricerow(pm,s);
+	double		total_cost = 0;
+	int		j;
+						/* move cheapest first */
+	for (j = 0; (j < pm->ncols) & (amt2move > FTINY); j++) {
+		int	d = srow[j];
+		double	amt = (amt2move < avail[d]) ? amt2move : avail[d];
+
+		total_cost += amt * prow[d];
+		amt2move -= amt;
+	}
+	return(total_cost);
+}
+
+/* Compare entries by moving price */
+static int
+rmovcmp(void *b, const void *p1, const void *p2)
+{
+	PRICEMAT	*pm = (PRICEMAT *)b;
+	const short	*ij1 = (const short *)p1;
+	const short	*ij2 = (const short *)p2;
+	float		price_diff;
+
+	if (ij1[1] < 0) return(ij2[1] >= 0);
+	if (ij2[1] < 0) return(-1);
+	price_diff = pricerow(pm,ij1[0])[ij1[1]] - pricerow(pm,ij2[0])[ij2[1]];
+	if (price_diff > 0) return(1);
+	if (price_diff < 0) return(-1);
+	return(0);
+}
+
+/* Take a step in migration by choosing reasonable bucket to transfer */
+static double
+migration_step(MIGRATION *mig, double *src_rem, double *dst_rem, PRICEMAT *pm)
+{
+	const int	max2check = 100;
+	const double	maxamt = 1./(double)pm->ncols;
+	const double	minamt = maxamt*1e-4;
+	double		*src_cost;
+	short		(*rord)[2];
+	struct {
+		int	s, d;	/* source and destination */
+		double	price;	/* price estimate per amount moved */
+		double	amt;	/* amount we can move */
+	} cur, best;
+	int		r2check, i, ri;
+	/*
+	 * Check cheapest available routes only -- a higher adjusted
+	 * destination price implies that another source is closer, so
+	 * we can hold off considering more expensive options until
+	 * some other (hopefully better) moves have been made.
+	 */
+						/* most promising row order */
+	rord = (short (*)[2])malloc(sizeof(short)*2*pm->nrows);
+	if (rord == NULL)
+		goto memerr;
+	for (ri = pm->nrows; ri--; ) {
+	    rord[ri][0] = ri;
+	    rord[ri][1] = -1;
+	    if (src_rem[ri] <= minamt)		/* enough source material? */
+		    continue;
+	    for (i = 0; i < pm->ncols; i++)
+		if (dst_rem[ rord[ri][1] = psortrow(pm,ri)[i] ] > minamt)
+			break;
+	    if (i >= pm->ncols) {		/* moved all we can? */
+		free(rord);
+		return(.0);
+	    }
+	}
+	if (pm->nrows > max2check)		/* sort if too many sources */
+		qsort_r(rord, pm->nrows, sizeof(short)*2, pm, &rmovcmp);
+						/* allocate cost array */
+	src_cost = (double *)malloc(sizeof(double)*pm->nrows);
+	if (src_cost == NULL)
+		goto memerr;
+	for (i = pm->nrows; i--; )		/* starting costs for diff. */
+		src_cost[i] = min_cost(src_rem[i], dst_rem, pm, i);
+						/* find best source & dest. */
+	best.s = best.d = -1; best.price = FHUGE; best.amt = 0;
+	if ((r2check = pm->nrows) > max2check)
+		r2check = max2check;		/* put a limit on search */
+	for (ri = 0; ri < r2check; ri++) {	/* check each source row */
+	    double	cost_others = 0;
+	    cur.s = rord[ri][0];
+	    if ((cur.d = rord[ri][1]) < 0 ||
+			(cur.price = pricerow(pm,cur.s)[cur.d]) >= best.price) {
+		if (pm->nrows > max2check) break;	/* sorted end */
+		continue;			/* else skip this one */
+	    }
+	    cur.amt = (src_rem[cur.s] < dst_rem[cur.d]) ?
+				src_rem[cur.s] : dst_rem[cur.d];
+						/* don't just leave smidgen */
+	    if (cur.amt > maxamt*1.02) cur.amt = maxamt;
+	    dst_rem[cur.d] -= cur.amt;		/* add up opportunity costs */
+	    for (i = pm->nrows; i--; )
+		if (i != cur.s)
+		    cost_others += min_cost(src_rem[i], dst_rem, pm, i)
+					- src_cost[i];
+	    dst_rem[cur.d] += cur.amt;		/* undo trial move */
+	    cur.price += cost_others/cur.amt;	/* adjust effective price */
+	    if (cur.price < best.price)		/* are we better than best? */
+		best = cur;
+	}
+	free(src_cost);				/* clean up */
+	free(rord);
+	if ((best.s < 0) | (best.d < 0))	/* nothing left to move? */
+		return(.0);
+						/* else make the actual move */
+	mtx_coef(mig,best.s,best.d) += best.amt;
+	src_rem[best.s] -= best.amt;
+	dst_rem[best.d] -= best.amt;
+	return(best.amt);
+memerr:
+	fprintf(stderr, "%s: Out of memory in migration_step()\n", progname);
+	exit(1);
+}
+
 /* Compute and insert migration along directed edge (may fork child) */
 static MIGRATION *
 create_migration(RBFNODE *from_rbf, RBFNODE *to_rbf)
 {
-	const double	end_thresh = 0.1/(from_rbf->nrbf*to_rbf->nrbf);
-	const double	check_thresh = 0.01;
-	const double	rel_thresh = 5e-6;
-	float		*pmtx;
+	const double	end_thresh = 5e-6;
+	PRICEMAT	pmtx;
 	MIGRATION	*newmig;
 	double		*src_rem, *dst_rem;
 	double		total_rem = 1., move_amt;
-	int		i;
+	int		i, j;
 						/* check if exists already */
 	for (newmig = from_rbf->ejl; newmig != NULL;
 			newmig = nextedge(from_rbf,newmig))
 		if (newmig->rbfv[1] == to_rbf)
 			return(NULL);
 						/* else allocate */
+#ifdef DEBUG
+	fprintf(stderr, "Building path from (theta,phi) (%.0f,%.0f) ",
+			get_theta180(from_rbf->invec),
+			get_phi360(from_rbf->invec));
+	fprintf(stderr, "to (%.0f,%.0f) with %d x %d matrix\n",
+			get_theta180(to_rbf->invec),
+			get_phi360(to_rbf->invec), 
+			from_rbf->nrbf, to_rbf->nrbf);
+#endif
 	newmig = new_migration(from_rbf, to_rbf);
 	if (run_subprocess())
 		return(newmig);			/* child continues */
-	pmtx = price_routes(from_rbf, to_rbf);
+	price_routes(&pmtx, from_rbf, to_rbf);
 	src_rem = (double *)malloc(sizeof(double)*from_rbf->nrbf);
 	dst_rem = (double *)malloc(sizeof(double)*to_rbf->nrbf);
 	if ((src_rem == NULL) | (dst_rem == NULL)) {
@@ -315,42 +355,27 @@ create_migration(RBFNODE *from_rbf, RBFNODE *to_rbf)
 				progname);
 		exit(1);
 	}
-#ifdef DEBUG
-	fprintf(stderr, "Building path from (theta,phi) %s ",
-			thetaphi(from_rbf->invec));
-	fprintf(stderr, "to %s", thetaphi(to_rbf->invec));
-	/* if (nchild) */ fputc('\n', stderr);
-#endif
 						/* starting quantities */
 	memset(newmig->mtx, 0, sizeof(float)*from_rbf->nrbf*to_rbf->nrbf);
 	for (i = from_rbf->nrbf; i--; )
 		src_rem[i] = rbf_volume(&from_rbf->rbfa[i]) / from_rbf->vtotal;
-	for (i = to_rbf->nrbf; i--; )
-		dst_rem[i] = rbf_volume(&to_rbf->rbfa[i]) / to_rbf->vtotal;
+	for (j = to_rbf->nrbf; j--; )
+		dst_rem[j] = rbf_volume(&to_rbf->rbfa[j]) / to_rbf->vtotal;
+
 	do {					/* move a bit at a time */
-		move_amt = migration_step(newmig, src_rem, dst_rem, pmtx);
+		move_amt = migration_step(newmig, src_rem, dst_rem, &pmtx);
 		total_rem -= move_amt;
-#ifdef DEBUG
-		if (!nchild)
-			/* fputc('.', stderr); */
-			fprintf(stderr, "%.9f remaining...\r", total_rem);
-#endif
-	} while (total_rem > end_thresh && (total_rem > check_thresh) |
-					(move_amt > rel_thresh*total_rem));
-#ifdef DEBUG
-	if (!nchild) fputs("\ndone.\n", stderr);
-	else fprintf(stderr, "finished with %.9f remaining\n", total_rem);
-#endif
+	} while ((total_rem > end_thresh) & (move_amt > 0));
+
 	for (i = from_rbf->nrbf; i--; ) {	/* normalize final matrix */
-	    float	nf = rbf_volume(&from_rbf->rbfa[i]);
-	    int		j;
+	    double	nf = rbf_volume(&from_rbf->rbfa[i]);
 	    if (nf <= FTINY) continue;
 	    nf = from_rbf->vtotal / nf;
 	    for (j = to_rbf->nrbf; j--; )
-		newmig->mtx[mtx_ndx(newmig,i,j)] *= nf;
+		mtx_coef(newmig,i,j) *= nf;	/* row now sums to 1.0 */
 	}
 	end_subprocess();			/* exit here if subprocess */
-	free(pmtx);				/* free working arrays */
+	free_routes(&pmtx);			/* free working arrays */
 	free(src_rem);
 	free(dst_rem);
 	return(newmig);