--- ray/src/rt/raypcalls.c	2003/02/25 02:47:23	2.2
+++ ray/src/rt/raypcalls.c	2009/12/12 00:03:42	2.21
@@ -1,5 +1,5 @@
 #ifndef lint
-static const char	RCSid[] = "$Id: raypcalls.c,v 2.2 2003/02/25 02:47:23 greg Exp $";
+static const char	RCSid[] = "$Id: raypcalls.c,v 2.21 2009/12/12 00:03:42 greg Exp $";
 #endif
 /*
  *  raypcalls.c - interface for parallel rendering using Radiance
@@ -13,24 +13,29 @@ static const char	RCSid[] = "$Id: raypcalls.c,v 2.2 20
  *  These calls are designed similarly to the ones in raycalls.c,
  *  but allow for multiple rendering processes on the same host
  *  machine.  There is no sense in specifying more child processes
- *  than you have processors, but one child may help by allowing
+ *  than you have processor cores, but one child may help by allowing
  *  asynchronous ray computation in an interactive program, and
  *  will protect the caller from fatal rendering errors.
  *
- *  You should first read and undrstand the header in raycalls.c,
+ *  You should first read and understand the header in raycalls.c,
  *  as some things are explained there that are not repated here.
  *
  *  The first step is opening one or more rendering processes
  *  with a call to ray_pinit(oct, nproc).  Before calling fork(),
  *  ray_pinit() loads the octree and data structures into the
- *  caller's memory.  This permits all sorts of queries that
- *  wouldn't be possible otherwise, without causing any real
+ *  caller's memory, and ray_popen() synchronizes the ambient
+ *  file, if any.  Shared memory permits all sorts of queries
+ *  that wouldn't be possible otherwise without causing any real
  *  memory overhead, since all the static data are shared
- *  between processes.  Rays are then traced using a simple
+ *  between processes.  Rays are traced using a simple
  *  queuing mechanism, explained below.
  *
- *  The ray queue holds as many rays as there are rendering
- *  processes.  Rays are queued and returned by a single
+ *  The ray queue buffers RAYQLEN rays before sending to
+ *  children, each of which may internally buffer RAYQLEN rays
+ *  during evaluation.  Rays are not returned in the order
+ *  they are sent when multiple processes are open.
+ *
+ *  Rays are queued and returned by a single
  *  ray_pqueue() call.  A ray_pqueue() return
  *  value of 0 indicates that no rays are ready
  *  and the queue is not yet full.  A return value of 1
@@ -43,7 +48,7 @@ static const char	RCSid[] = "$Id: raypcalls.c,v 2.2 20
  *	myRay.rorg = ( ray origin point )
  *	myRay.rdir = ( normalized ray direction )
  *	myRay.rmax = ( maximum length, or zero for no limit )
- *	rayorigin(&myRay, NULL, PRIMARY, 1.0);
+ *	rayorigin(&myRay, PRIMARY, NULL, NULL);
  *	myRay.rno = ( my personal ray identifier )
  *	if (ray_pqueue(&myRay) == 1)
  *		{ do something with results }
@@ -51,12 +56,12 @@ static const char	RCSid[] = "$Id: raypcalls.c,v 2.2 20
  *  Note the differences between this and the simpler ray_trace()
  *  call.  In particular, the call may or may not return a value
  *  in the passed ray structure.  Also, you need to call rayorigin()
- *  yourself, which is normally for you by ray_trace().  The
- *  great thing is that ray_pqueue() will trace rays faster in
+ *  yourself, which is normally called for you by ray_trace().  The
+ *  benefit is that ray_pqueue() will trace rays faster in
  *  proportion to the number of CPUs you have available on your
  *  system.  If the ray queue is full before the call, ray_pqueue()
  *  will block until a result is ready so it can queue this one.
- *  The global int ray_idle indicates the number of currently idle
+ *  The global int ray_pnidle indicates the number of currently idle
  *  children.  If you want to check for completed rays without blocking,
  *  or get the results from rays that have been queued without
  *  queuing any new ones, the ray_presult() call is for you:
@@ -70,27 +75,28 @@ static const char	RCSid[] = "$Id: raypcalls.c,v 2.2 20
  *  until a value is available, returning 0 only if the
  *  queue is completely empty.  A negative return value
  *  indicates that a rendering process died.  If this
- *  happens, ray_close(0) is automatically called to close
- *  all child processes, and ray_nprocs is set to zero.
+ *  happens, ray_pclose(0) is automatically called to close
+ *  all child processes, and ray_pnprocs is set to zero.
  *
  *  If you just want to fill the ray queue without checking for
- *  results, check ray_idle and call ray_psend():
+ *  results, check ray_pnidle and call ray_psend():
  *
- *	while (ray_idle) {
+ *	while (ray_pnidle) {
  *		( set up ray )
  *		ray_psend(&myRay);
  *	}
  *
- *  The ray_presult() and/or ray_pqueue() functions may then be
- *  called to read back the results.
+ *  Note that it is a fatal error to call ra_psend() when
+ *  ray_pnidle is zero.  The ray_presult() and/or ray_pqueue()
+ *  functions may be called subsequently to read back the results.
  *
  *  When you are done, you may call ray_pdone(1) to close
  *  all child processes and clean up memory used by Radiance.
  *  Any queued ray calculations will be awaited and discarded.
  *  As with ray_done(), ray_pdone(0) hangs onto data files
  *  and fonts that are likely to be used in subsequent renderings.
- *  Whether you want to bother cleaning up memory or not, you
- *  should at least call ray_pclose(0) to clean the child processes.
+ *  Whether you need to clean up memory or not, you should
+ *  at least call ray_pclose(0) to await the child processes.
  *
  *  Warning:  You cannot affect any of the rendering processes
  *  by changing global parameter values onece ray_pinit() has
@@ -99,37 +105,43 @@ static const char	RCSid[] = "$Id: raypcalls.c,v 2.2 20
  *  If you just want to reap children so that you can alter the
  *  rendering parameters without reloading the scene, use the
  *  ray_pclose(0) and ray_popen(nproc) calls to close
- *  then restart the child processes.
+ *  then restart the child processes after the changes are made.
  *
  *  Note:  These routines are written to coordinate with the
  *  definitions in raycalls.c, and in fact depend on them.
  *  If you want to trace a ray and get a result synchronously,
  *  use the ray_trace() call to compute it in the parent process.
+ *  This will not interfere with any subprocess calculations,
+ *  but beware that a fatal error may end with a call to quit().
  *
  *  Note:  One of the advantages of using separate processes
  *  is that it gives the calling program some immunity from
  *  fatal rendering errors.  As discussed in raycalls.c,
  *  Radiance tends to throw up its hands and exit at the
  *  first sign of trouble, calling quit() to return control
- *  to the system.  Although you can avoid exit() with
+ *  to the top level.  Although you can avoid exit() with
  *  your own longjmp() in quit(), the cleanup afterwards
  *  is always suspect.  Through the use of subprocesses,
  *  we avoid this pitfall by closing the processes and
  *  returning a negative value from ray_pqueue() or
  *  ray_presult().  If you get a negative value from either
  *  of these calls, you can assume that the processes have
- *  been cleaned up with a call to ray_close(), though you
+ *  been cleaned up with a call to ray_pclose(), though you
  *  will have to call ray_pdone() yourself if you want to
- *  free memory.  Obviously, you cannot continue rendering,
- *  but otherwise your process should not be compromised.
+ *  free memory.  Obviously, you cannot continue rendering
+ *  without risking further errors, but otherwise your
+ *  process should not be compromised.
  */
 
+#include  "rtprocess.h"
 #include  "ray.h"
-
+#include  "ambient.h"
+#include  <sys/types.h>
+#include  <sys/wait.h>
 #include  "selcall.h"
 
 #ifndef RAYQLEN
-#define RAYQLEN		16		/* # rays to send at once */
+#define RAYQLEN		12		/* # rays to send at once */
 #endif
 
 #ifndef MAX_RPROCS
@@ -142,15 +154,16 @@ static const char	RCSid[] = "$Id: raypcalls.c,v 2.2 20
 
 extern char	*shm_boundary;		/* boundary of shared memory */
 
-int		ray_nprocs = 0;		/* number of child processes */
-int		ray_idle = 0;		/* number of idle children */
+int		ray_pfifo = 0;		/* maintain ray call order? */
+int		ray_pnprocs = 0;	/* number of child processes */
+int		ray_pnidle = 0;		/* number of idle children */
 
 static struct child_proc {
 	int	pid;				/* child process id */
 	int	fd_send;			/* write to child here */
 	int	fd_recv;			/* read from child here */
 	int	npending;			/* # rays in process */
-	unsigned long  rno[RAYQLEN];		/* working on these rays */
+	RNUMBER	rno[RAYQLEN];			/* working on these rays */
 } r_proc[MAX_NPROCS];			/* our child processes */
 
 static RAY	r_queue[2*RAYQLEN];	/* ray i/o buffer */
@@ -160,23 +173,21 @@ static int	r_recv_next;		/* next receive ray placement
 
 #define sendq_full()	(r_send_next >= RAYQLEN)
 
+static int ray_pflush(void);
+static void ray_pchild(int fd_in, int fd_out);
 
-void
-ray_pinit(otnm, nproc)		/* initialize ray-tracing processes */
-char	*otnm;
-int	nproc;
+
+extern void
+ray_pinit(		/* initialize ray-tracing processes */
+	char	*otnm,
+	int	nproc
+)
 {
 	if (nobjects > 0)		/* close old calculation */
 		ray_pdone(0);
 
 	ray_init(otnm);			/* load the shared scene */
 
-	preload_objs();			/* preload auxiliary data */
-
-					/* set shared memory boundary */
-	shm_boundary = (char *)malloc(16);
-	strcpy(shm_boundary, "SHM_BOUNDARY");
-
 	r_send_next = 0;		/* set up queue */
 	r_recv_first = r_recv_next = RAYQLEN;
 
@@ -185,16 +196,16 @@ int	nproc;
 
 
 static int
-ray_pflush()			/* send queued rays to idle children */
+ray_pflush(void)			/* send queued rays to idle children */
 {
 	int	nc, n, nw, i, sfirst;
 
-	if ((ray_idle <= 0 | r_send_next <= 0))
+	if ((ray_pnidle <= 0) | (r_send_next <= 0))
 		return(0);		/* nothing we can send */
 	
 	sfirst = 0;			/* divvy up labor */
-	nc = ray_idle;
-	for (i = ray_nprocs; nc && i--; ) {
+	nc = ray_pnidle;
+	for (i = ray_pnprocs; nc && i--; ) {
 		if (r_proc[i].npending > 0)
 			continue;	/* child looks busy */
 		n = (r_send_next - sfirst)/nc--;
@@ -210,7 +221,7 @@ ray_pflush()			/* send queued rays to idle children */
 		while (n--)		/* record ray IDs */
 			r_proc[i].rno[n] = r_queue[sfirst+n].rno;
 		sfirst += r_proc[i].npending;
-		ray_idle--;		/* now she's busy */
+		ray_pnidle--;		/* now she's busy */
 	}
 	if (sfirst != r_send_next)
 		error(CONSISTENCY, "code screwup in ray_pflush");
@@ -219,9 +230,10 @@ ray_pflush()			/* send queued rays to idle children */
 }
 
 
-void
-ray_psend(r)			/* add a ray to our send queue */
-RAY	*r;
+extern void
+ray_psend(			/* add a ray to our send queue */
+	RAY	*r
+)
 {
 	if (r == NULL)
 		return;
@@ -229,46 +241,44 @@ RAY	*r;
 	if (sendq_full() && ray_pflush() <= 0)
 		error(INTERNAL, "ray_pflush failed in ray_psend");
 
-	copystruct(&r_queue[r_send_next], r);
-	r_send_next++;
+	r_queue[r_send_next++] = *r;
 }
 
 
-int
-ray_pqueue(r)			/* queue a ray for computation */
-RAY	*r;
+extern int
+ray_pqueue(			/* queue a ray for computation */
+	RAY	*r
+)
 {
 	if (r == NULL)
 		return(0);
 					/* check for full send queue */
 	if (sendq_full()) {
-		RAY	mySend;
-		int	rval;
-		copystruct(&mySend, r);
+		RAY	mySend = *r;
 					/* wait for a result */
-		rval = ray_presult(r, 0);
+		if (ray_presult(r, 0) <= 0)
+			return(-1);
 					/* put new ray in queue */
-		copystruct(&r_queue[r_send_next], &mySend);
-		r_send_next++;
-		return(rval);		/* done */
+		r_queue[r_send_next++] = mySend;
+				/* XXX r_send_next may now be > RAYQLEN */
+		return(1);
 	}
-					/* add ray to send queue */
-	copystruct(&r_queue[r_send_next], r);
-	r_send_next++;
+					/* else add ray to send queue */
+	r_queue[r_send_next++] = *r;
 					/* check for returned ray... */
 	if (r_recv_first >= r_recv_next)
 		return(0);
 					/* ...one is sitting in queue */
-	copystruct(r, &r_queue[r_recv_first]);
-	r_recv_first++;
+	*r = r_queue[r_recv_first++];
 	return(1);
 }
 
 
-int
-ray_presult(r, poll)		/* check for a completed ray */
-RAY	*r;
-int	poll;
+extern int
+ray_presult(		/* check for a completed ray */
+	RAY	*r,
+	int	poll
+)
 {
 	static struct timeval	tpoll;	/* zero timeval struct */
 	static fd_set	readset, errset;
@@ -279,11 +289,10 @@ int	poll;
 		return(0);
 					/* check queued results first */
 	if (r_recv_first < r_recv_next) {
-		copystruct(r, &r_queue[r_recv_first]);
-		r_recv_first++;
+		*r = r_queue[r_recv_first++];
 		return(1);
 	}
-	n = ray_nprocs - ray_idle;	/* pending before flush? */
+	n = ray_pnprocs - ray_pnidle;	/* pending before flush? */
 
 	if (ray_pflush() < 0)		/* send new rays to process */
 		return(-1);
@@ -291,18 +300,21 @@ int	poll;
 	r_recv_first = r_recv_next = RAYQLEN;
 
 	if (!poll)			/* count newly sent unless polling */
-		n = ray_nprocs - ray_idle;
+		n = ray_pnprocs - ray_pnidle;
 	if (n <= 0)			/* return if nothing to await */
 		return(0);
+	if (!poll && ray_pnprocs == 1)	/* one process -> skip select() */
+		FD_SET(r_proc[0].fd_recv, &readset);
+
 getready:				/* any children waiting for us? */
-	for (pn = ray_nprocs; pn--; )
+	for (pn = ray_pnprocs; pn--; )
 		if (FD_ISSET(r_proc[pn].fd_recv, &readset) ||
 				FD_ISSET(r_proc[pn].fd_recv, &errset))
 			break;
 					/* call select if we must */
 	if (pn < 0) {
 		FD_ZERO(&readset); FD_ZERO(&errset); n = 0;
-		for (pn = ray_nprocs; pn--; ) {
+		for (pn = ray_pnprocs; pn--; ) {
 			if (r_proc[pn].npending > 0)
 				FD_SET(r_proc[pn].fd_recv, &readset);
 			FD_SET(r_proc[pn].fd_recv, &errset);
@@ -338,7 +350,7 @@ getready:				/* any children waiting for us? */
 	if (n <= 0)
 		FD_CLR(r_proc[pn].fd_recv, &errset);
 	r_proc[pn].npending = 0;
-	ray_idle++;
+	ray_pnidle++;
 					/* check for rendering errors */
 	if (!ok) {
 		ray_pclose(0);		/* process died -- clean up */
@@ -354,15 +366,15 @@ getready:				/* any children waiting for us? */
 		rp->slights = NULL;
 	}
 					/* return first ray received */
-	copystruct(r, &r_queue[r_recv_first]);
-	r_recv_first++;
+	*r = r_queue[r_recv_first++];
 	return(1);
 }
 
 
-void
-ray_pdone(freall)		/* reap children and free data */
-int	freall;
+extern void
+ray_pdone(		/* reap children and free data */
+	int	freall
+)
 {
 	ray_pclose(0);			/* close child processes */
 
@@ -375,36 +387,38 @@ int	freall;
 
 
 static void
-ray_pchild(fd_in, fd_out)	/* process rays (never returns) */
-int	fd_in;
-int	fd_out;
+ray_pchild(	/* process rays (never returns) */
+	int	fd_in,
+	int	fd_out
+)
 {
 	int	n;
 	register int	i;
+					/* flag child process for quit() */
+	ray_pnprocs = -1;
 					/* read each ray request set */
 	while ((n = read(fd_in, (char *)r_queue, sizeof(r_queue))) > 0) {
 		int	n2;
-		if (n % sizeof(RAY))
+		if (n < sizeof(RAY))
 			break;
-		n /= sizeof(RAY);
 					/* get smuggled set length */
-		n2 = r_queue[0].crtype - n;
+		n2 = sizeof(RAY)*r_queue[0].crtype - n;
 		if (n2 < 0)
 			error(INTERNAL, "buffer over-read in ray_pchild");
 		if (n2 > 0) {		/* read the rest of the set */
-			i = readbuf(fd_in, (char *)(r_queue+n),
-					sizeof(RAY)*n2);
-			if (i != sizeof(RAY)*n2)
+			i = readbuf(fd_in, (char *)r_queue + n, n2);
+			if (i != n2)
 				break;
 			n += n2;
 		}
+		n /= sizeof(RAY);
 					/* evaluate rays */
 		for (i = 0; i < n; i++) {
 			r_queue[i].crtype = r_queue[i].rtype;
 			r_queue[i].parent = NULL;
 			r_queue[i].clipset = NULL;
 			r_queue[i].slights = NULL;
-			r_queue[i].revf = raytrace;
+			r_queue[i].rlvl = 0;
 			samplendx++;
 			rayclear(&r_queue[i]);
 			rayvalue(&r_queue[i]);
@@ -421,24 +435,31 @@ int	fd_out;
 }
 
 
-void
-ray_popen(nadd)			/* open the specified # processes */
-int	nadd;
+extern void
+ray_popen(			/* open the specified # processes */
+	int	nadd
+)
 {
 					/* check if our table has room */
-	if (ray_nprocs + nadd > MAX_NPROCS)
-		nadd = MAX_NPROCS - ray_nprocs;
+	if (ray_pnprocs + nadd > MAX_NPROCS)
+		nadd = MAX_NPROCS - ray_pnprocs;
 	if (nadd <= 0)
 		return;
-	fflush(stderr);			/* clear pending output */
-	fflush(stdout);
+	ambsync();			/* load any new ambient values */
+	if (shm_boundary == NULL) {	/* first child process? */
+		preload_objs();		/* preload auxiliary data */
+					/* set shared memory boundary */
+		shm_boundary = (char *)malloc(16);
+		strcpy(shm_boundary, "SHM_BOUNDARY");
+	}
+	fflush(NULL);			/* clear pending output */
 	while (nadd--) {		/* fork each new process */
 		int	p0[2], p1[2];
 		if (pipe(p0) < 0 || pipe(p1) < 0)
 			error(SYSTEM, "cannot create pipe");
-		if ((r_proc[ray_nprocs].pid = fork()) == 0) {
+		if ((r_proc[ray_pnprocs].pid = fork()) == 0) {
 			int	pn;	/* close others' descriptors */
-			for (pn = ray_nprocs; pn--; ) {
+			for (pn = ray_pnprocs; pn--; ) {
 				close(r_proc[pn].fd_send);
 				close(r_proc[pn].fd_recv);
 			}
@@ -446,21 +467,28 @@ int	nadd;
 					/* following call never returns */
 			ray_pchild(p1[0], p0[1]);
 		}
-		if (r_proc[ray_nprocs].pid < 0)
+		if (r_proc[ray_pnprocs].pid < 0)
 			error(SYSTEM, "cannot fork child process");
 		close(p1[0]); close(p0[1]);
-		r_proc[ray_nprocs].fd_send = p1[1];
-		r_proc[ray_nprocs].fd_recv = p0[0];
-		r_proc[ray_nprocs].npending = 0;
-		ray_nprocs++;
-		ray_idle++;
+		/*
+		 * Close write stream on exec to avoid multiprocessing deadlock.
+		 * No use in read stream without it, so set flag there as well.
+		 */
+		fcntl(p1[1], F_SETFD, FD_CLOEXEC);
+		fcntl(p0[0], F_SETFD, FD_CLOEXEC);
+		r_proc[ray_pnprocs].fd_send = p1[1];
+		r_proc[ray_pnprocs].fd_recv = p0[0];
+		r_proc[ray_pnprocs].npending = 0;
+		ray_pnprocs++;
+		ray_pnidle++;
 	}
 }
 
 
-void
-ray_pclose(nsub)		/* close one or more child processes */
-int	nsub;
+extern void
+ray_pclose(		/* close one or more child processes */
+	int	nsub
+)
 {
 	static int	inclose = 0;
 	RAY	res;
@@ -469,26 +497,26 @@ int	nsub;
 		return;
 	inclose++;
 					/* check argument */
-	if ((nsub <= 0 | nsub > ray_nprocs))
-		nsub = ray_nprocs;
+	if ((nsub <= 0) | (nsub > ray_pnprocs))
+		nsub = ray_pnprocs;
 					/* clear our ray queue */
 	while (ray_presult(&res,0) > 0)
 		;
 					/* clean up children */
 	while (nsub--) {
 		int	status;
-		ray_nprocs--;
-		close(r_proc[ray_nprocs].fd_recv);
-		close(r_proc[ray_nprocs].fd_send);
-		while (wait(&status) != r_proc[ray_nprocs].pid)
-			;
+		ray_pnprocs--;
+		close(r_proc[ray_pnprocs].fd_recv);
+		close(r_proc[ray_pnprocs].fd_send);
+		if (waitpid(r_proc[ray_pnprocs].pid, &status, 0) < 0)
+			status = 127<<8;
 		if (status) {
 			sprintf(errmsg,
 				"rendering process %d exited with code %d",
-					r_proc[ray_nprocs].pid, status>>8);
+					r_proc[ray_pnprocs].pid, status>>8);
 			error(WARNING, errmsg);
 		}
-		ray_idle--;
+		ray_pnidle--;
 	}
 	inclose--;
 }
@@ -498,5 +526,7 @@ void
 quit(ec)			/* make sure exit is called */
 int	ec;
 {
+	if (ray_pnprocs > 0)	/* close children if any */
+		ray_pclose(0);		
 	exit(ec);
 }