| 269 |
|
gargv[0]); |
| 270 |
|
return(0); |
| 271 |
|
} |
| 272 |
< |
if (in_type == DTfloat) { |
| 272 |
> |
if (out_type == DTfloat) { |
| 273 |
|
if (fread(osum, sizeof(float)*ncomp, (size_t)xres*yres, fp) != |
| 274 |
|
(size_t)xres*yres) { |
| 275 |
|
fprintf(stderr, "%s: fread() error\n", gargv[0]); |
| 395 |
|
fout = open_iofile(fbuf, c); |
| 396 |
|
if (!reload_data(osum, fout)) |
| 397 |
|
return(0); |
| 398 |
< |
} else /* else new output (clobber prev. files) */ |
| 398 |
> |
} else /* else new output (clobbers prev. file) */ |
| 399 |
|
fout = open_output(fbuf, c-(cmtx->ncols==1)); |
| 400 |
|
} else { /* else stdout */ |
| 401 |
|
if ((out_type == DTfloat) & (cmtx->ncols > 1)) { |
| 516 |
|
} |
| 517 |
|
if (!pid) break; /* new child gets to work */ |
| 518 |
|
} |
| 519 |
< |
if (!row0 | (out_type != DTfloat)) { |
| 520 |
< |
osum = (float *)calloc((size_t)xres*yres, sizeof(float)*ncomp); |
| 521 |
< |
if (!osum) { |
| 522 |
< |
fprintf(stderr, "%s: cannot allocate %dx%d %d-component accumulator\n", |
| 523 |
< |
gargv[0], xres, yres, ncomp); |
| 524 |
< |
return(0); |
| 525 |
< |
} |
| 519 |
> |
osum = (float *)calloc((size_t)xres*yres, sizeof(float)*ncomp); |
| 520 |
> |
if (!osum) { |
| 521 |
> |
fprintf(stderr, "%s: cannot allocate %dx%d %d-component accumulator\n", |
| 522 |
> |
gargv[0], xres, yres, ncomp); |
| 523 |
> |
return(0); |
| 524 |
|
} |
| 525 |
|
srandom(113*coff + 5669); /* randomize row access for this process */ |
| 526 |
|
syarr = scramble(yres); |
| 527 |
|
/* run through our unique set of columns */ |
| 528 |
|
for (c = coff; c < cmtx->ncols; c += nprocs) { |
| 529 |
|
int rc = rowN - row0; |
| 532 |
– |
void *omap = NULL; |
| 533 |
– |
size_t omaplen = 0; |
| 534 |
– |
long dstart; |
| 530 |
|
FILE *fout; |
| 531 |
|
int y; |
| 532 |
|
/* create/load output */ |
| 533 |
|
sprintf(fbuf, out_spec, c); |
| 534 |
|
if (row0) { /* making another pass? */ |
| 535 |
|
fout = open_iofile(fbuf, c); |
| 536 |
< |
if (!fout) return(0); |
| 542 |
< |
if (out_type == DTfloat) { |
| 543 |
< |
dstart = ftell(fout); |
| 544 |
< |
if ((dstart < 0) | (dstart % sizeof(float))) { |
| 545 |
< |
fprintf(stderr, "%s: bad seek/alignment\n", fbuf); |
| 546 |
< |
return(0); |
| 547 |
< |
} |
| 548 |
< |
omaplen = dstart + sizeof(float)*ncomp*xres*yres; |
| 549 |
< |
omap = mmap(NULL, omaplen, PROT_READ|PROT_WRITE, |
| 550 |
< |
MAP_FILE|MAP_SHARED, fileno(fout), 0); |
| 551 |
< |
if (omap == MAP_FAILED) { |
| 552 |
< |
fprintf(stderr, "%s: cannot map file '%s'\n", |
| 553 |
< |
gargv[0], fbuf); |
| 554 |
< |
return(0); |
| 555 |
< |
} |
| 556 |
< |
osum = (float *)((char *)omap + dstart); |
| 557 |
< |
} else if (!reload_data(osum, fout)) |
| 536 |
> |
if (!reload_data(osum, fout)) |
| 537 |
|
return(0); |
| 538 |
< |
} else { /* else new output (clobber prev. files) */ |
| 538 |
> |
} else { /* else new output (clobbers prev. file) */ |
| 539 |
|
fout = open_output(fbuf, c); |
| 540 |
|
if (!fout) return(0); |
| 541 |
|
if (c > coff) /* clear accumulator? */ |
| 544 |
|
while (rc-- > 0) { /* map & sum each input file */ |
| 545 |
|
const int r = odd ? row0 + rc : rowN-1 - rc; |
| 546 |
|
const rmx_dtype *cval = rmx_val(cmtx, r, c); |
| 547 |
+ |
long dstart; |
| 548 |
|
size_t imaplen; |
| 549 |
|
void *imap; |
| 550 |
|
FILE *finp; |
| 603 |
|
if (fwritesscan(osum + (size_t)y*xres*ncomp, |
| 604 |
|
ncomp, xres, fout) < 0) |
| 605 |
|
goto writerr; |
| 626 |
– |
} else if (omap) { |
| 627 |
– |
if (munmap(omap, omaplen) < 0) |
| 628 |
– |
goto writerr; |
| 629 |
– |
osum = NULL; |
| 606 |
|
} else if (fwrite(osum, sizeof(float)*ncomp, (size_t)xres*yres, fout) != |
| 607 |
|
(size_t)xres*yres) |
| 608 |
|
goto writerr; |
| 618 |
|
} |
| 619 |
|
if (coff) _exit(0); /* child exits here */ |
| 620 |
|
/* but parent waits for children */ |
| 621 |
< |
if (osum) free(osum); |
| 621 |
> |
free(osum); |
| 622 |
|
free(syarr); |
| 623 |
|
c = 0; |
| 624 |
|
while (++coff < nprocs) { |
| 625 |
|
int st; |
| 626 |
|
if (wait(&st) < 0) { |
| 627 |
< |
fprintf(stderr, "%s: warning - wait() call failed unexpectedly\n", gargv[0]); |
| 627 |
> |
fprintf(stderr, "%s: warning - child disappeared\n", gargv[0]); |
| 628 |
|
break; |
| 629 |
|
} |
| 630 |
< |
if (st) c = st; |
| 630 |
> |
if (st) { |
| 631 |
> |
fprintf(stderr, "%s: bad exit status from child\n", gargv[0]); |
| 632 |
> |
c = st; |
| 633 |
> |
} |
| 634 |
|
} |
| 635 |
|
return(c == 0); |
| 636 |
|
writerr: |
| 722 |
|
if (cacheGB > 1e-4) { /* figure out # of passes => rintvl */ |
| 723 |
|
size_t inp_bytes = (in_type==DTfloat ? sizeof(float)*ncomp |
| 724 |
|
: (size_t)(ncomp+1)) * xres*yres; |
| 725 |
< |
size_t mem_bytes = sizeof(float)*ncomp*xres*yres; |
| 725 |
> |
size_t over_bytes = rmx_array_size(cmtx) + |
| 726 |
> |
sizeof(float)*ncomp*xres*yres + |
| 727 |
> |
2*(out_type==DTfloat ? sizeof(float)*ncomp |
| 728 |
> |
: (size_t)(ncomp+1)) * xres*yres; |
| 729 |
|
int npasses = (double)inp_bytes*cmtx->nrows / |
| 730 |
< |
(cacheGB*(1L<<30) - (double)mem_bytes*nprocs) + 1; |
| 731 |
< |
if ((npasses <= 0) | (npasses*8 >= cmtx->nrows)) |
| 732 |
< |
npasses = 1; /* let's not go there... */ |
| 730 |
> |
(cacheGB*(1L<<30) - (double)over_bytes*nprocs) + 1; |
| 731 |
> |
if ((npasses <= 0) | (npasses*6 >= cmtx->nrows)) { |
| 732 |
> |
fprintf(stderr, |
| 733 |
> |
"%s: warning - insufficient cache space for multi-pass\n", |
| 734 |
> |
argv[0]); |
| 735 |
> |
npasses = 1; |
| 736 |
> |
} |
| 737 |
|
rintvl = cmtx->nrows / npasses; |
| 738 |
|
rintvl += (rintvl*npasses < cmtx->nrows); |
| 739 |
|
} else |
| 740 |
|
rintvl = cmtx->nrows; |
| 741 |
< |
/* make our passes */ |
| 741 |
> |
/* make our output accumulation passes */ |
| 742 |
|
for (row0 = 0; row0 < cmtx->nrows; row0 += rintvl) { |
| 743 |
|
if ((rowN = row0 + rintvl) > cmtx->nrows) |
| 744 |
|
rowN = cmtx->nrows; |