1 |
/* |
2 |
================================================================== |
3 |
N-way hybrid out-of-core merge sort |
4 |
Sorts N blocks internally using quicksort, followed by N-way |
5 |
merge using priority queue. |
6 |
|
7 |
Roland Schregle (roland.schregle@{hslu.ch, gmail.com}) |
8 |
(c) Fraunhofer Institute for Solar Energy Systems, |
9 |
Lucerne University of Applied Sciences & Arts |
10 |
================================================================== |
11 |
|
12 |
$Id $ |
13 |
*/ |
14 |
|
15 |
|
16 |
#include "oocsort.h" |
17 |
#include <stdio.h> |
18 |
#include <stdlib.h> |
19 |
#include <string.h> |
20 |
|
21 |
|
22 |
/* Priority queue node */ |
23 |
typedef struct { |
24 |
OOC_Sort_Key pri; /* Record's priority (sort key) */ |
25 |
unsigned blk; /* Block containing record */ |
26 |
} OOC_Sort_PQNode; |
27 |
|
28 |
typedef struct { |
29 |
OOC_Sort_PQNode *node; |
30 |
unsigned len, tail; |
31 |
} OOC_Sort_PQueue; |
32 |
|
33 |
/* Block descriptor */ |
34 |
typedef struct { |
35 |
FILE *file; /* Temporary file */ |
36 |
char *buf, *head, *tail; /* Associated I/O buffa, |
37 |
head/tail pointers */ |
38 |
} OOC_Sort_Block; |
39 |
|
40 |
|
41 |
/* Record priority evaluation function */ |
42 |
static OOC_Sort_Key (*pri)(const void *); |
43 |
|
44 |
|
45 |
static int OOC_Sort_Compare (const void *rec1, const void *rec2) |
46 |
/* Comparison function for internal sorting */ |
47 |
{ |
48 |
const OOC_Sort_Key pri1 = pri(rec1), pri2 = pri(rec2); |
49 |
|
50 |
if (!rec1 || !rec2) |
51 |
return 0; |
52 |
|
53 |
if (pri1 < pri2) |
54 |
return -1; |
55 |
else if (pri1 > pri2) |
56 |
return 1; |
57 |
else |
58 |
return 0; |
59 |
} |
60 |
|
61 |
|
62 |
static char *OOC_Sort_Read (OOC_Sort_Block *blk, unsigned recSize) |
63 |
/* Read next record from specified block; return pointer to buffa entry |
64 |
or NULL on error */ |
65 |
{ |
66 |
char *rec = NULL; |
67 |
|
68 |
if (blk -> head >= blk -> tail) { |
69 |
/* Input buffa empty; read next block (potentially truncated if last), |
70 |
* where tail marks end of buffa */ |
71 |
const unsigned long bufSize = blk -> tail - blk -> buf; |
72 |
|
73 |
if (feof(blk -> file)) |
74 |
return NULL; |
75 |
|
76 |
blk -> head = blk -> buf; |
77 |
blk -> tail = blk -> buf + fread(blk -> buf, 1, bufSize, blk -> file); |
78 |
|
79 |
if (blk -> tail == blk -> head) |
80 |
return NULL; |
81 |
} |
82 |
|
83 |
rec = blk -> head; |
84 |
blk -> head += recSize; |
85 |
|
86 |
return rec; |
87 |
} |
88 |
|
89 |
|
90 |
static char *OOC_Sort_Peek (OOC_Sort_Block *blk, unsigned recSize) |
91 |
/* Return next record from specified block WITHOUT removing from buffa */ |
92 |
{ |
93 |
char *rec = NULL; |
94 |
|
95 |
if (rec = OOC_Sort_Read(blk, recSize)) { |
96 |
/* Restore buffa head */ |
97 |
blk -> head -= recSize; |
98 |
} |
99 |
|
100 |
return rec; |
101 |
} |
102 |
|
103 |
|
104 |
static char *OOC_Sort_Write (OOC_Sort_Block *blk, unsigned recSize, |
105 |
const char *rec) |
106 |
/* Output record to specified block and return pointer to buffa entry |
107 |
or NULL on error */ |
108 |
{ |
109 |
char *res = NULL; |
110 |
|
111 |
if (blk -> head >= blk -> tail) { |
112 |
/* Flush output buffa (tail marks end of buffa) */ |
113 |
const unsigned long bufSize = blk -> tail - blk -> buf; |
114 |
|
115 |
blk -> head = blk -> buf; |
116 |
if (fwrite(blk -> buf, 1, bufSize, blk -> file) != bufSize) |
117 |
return NULL; |
118 |
} |
119 |
|
120 |
if (!rec) |
121 |
return NULL; |
122 |
|
123 |
memcpy(blk -> head, rec, recSize); |
124 |
res = blk -> head; |
125 |
blk -> head += recSize; |
126 |
|
127 |
return res; |
128 |
} |
129 |
|
130 |
|
131 |
#ifdef DEBUG |
132 |
static int OOC_Sort_PQCheck (OOC_Sort_PQueue *pq, unsigned root) |
133 |
/* Priority queue sanity check */ |
134 |
{ |
135 |
unsigned kid; |
136 |
|
137 |
if (root < pq -> tail) { |
138 |
if ((kid = (root << 1) + 1) < pq -> tail) |
139 |
if (pq -> node [kid].pri < pq -> node [root].pri) |
140 |
return -1; |
141 |
else return OOC_Sort_PQCheck(pq, kid); |
142 |
|
143 |
if ((kid = kid + 1) < pq -> tail) |
144 |
if (pq -> node [kid].pri < pq -> node [root].pri) |
145 |
return -1; |
146 |
else return OOC_Sort_PQCheck(pq, kid); |
147 |
|
148 |
} |
149 |
|
150 |
return 0; |
151 |
} |
152 |
#endif |
153 |
|
154 |
|
155 |
static int OOC_Sort_Push (OOC_Sort_PQueue *pq, OOC_Sort_Key pri, |
156 |
unsigned blk) |
157 |
/* Insert specified block index into priority queue; return block index |
158 |
* or -1 if queue is full */ |
159 |
{ |
160 |
OOC_Sort_PQNode *pqn = pq -> node; |
161 |
unsigned kid, root; |
162 |
|
163 |
if (pq -> tail >= pq -> len) |
164 |
/* Queue full */ |
165 |
return -1; |
166 |
|
167 |
/* Append node at tail and re-sort */ |
168 |
kid = pq -> tail++; |
169 |
|
170 |
while (kid) { |
171 |
root = kid - 1 >> 1; |
172 |
|
173 |
/* Compare with parent node and swap if necessary, else terminate */ |
174 |
if (pri < pqn [root].pri) { |
175 |
pqn [kid].pri = pqn [root].pri; |
176 |
pqn [kid].blk = pqn [root].blk; |
177 |
kid = root; |
178 |
} |
179 |
else break; |
180 |
} |
181 |
|
182 |
pqn [kid].pri = pri; |
183 |
pqn [kid].blk = blk; |
184 |
|
185 |
#ifdef DEBUG |
186 |
if (OOC_Sort_PQCheck(pq, 0) < 0) { |
187 |
fprintf(stderr, "OOC_Sort: priority queue inconsistency\n"); |
188 |
return -1; |
189 |
} |
190 |
#endif |
191 |
|
192 |
return blk; |
193 |
} |
194 |
|
195 |
|
196 |
static int OOC_Sort_Pop (OOC_Sort_PQueue *pq) |
197 |
/* Remove head of priority queue and return its block index |
198 |
* or -1 if queue is empty */ |
199 |
{ |
200 |
OOC_Sort_PQNode *pqn = pq -> node; |
201 |
OOC_Sort_Key pri; |
202 |
unsigned kid, kid2, root = 0, blk, res; |
203 |
|
204 |
if (!pq -> tail) |
205 |
/* Queue empty */ |
206 |
return -1; |
207 |
|
208 |
res = pqn -> blk; |
209 |
pri = pqn [--pq -> tail].pri; |
210 |
blk = pqn [pq -> tail].blk; |
211 |
|
212 |
/* Replace head with tail node and re-sort */ |
213 |
while ((kid = (root << 1) + 1) < pq -> tail) { |
214 |
/* Compare with with smaller kid and swap if necessary, else |
215 |
* terminate */ |
216 |
if ((kid2 = (kid + 1)) < pq -> tail && pqn [kid2].pri < pqn [kid].pri) |
217 |
kid = kid2; |
218 |
|
219 |
if (pri > pqn [kid].pri) { |
220 |
pqn [root].pri = pqn [kid].pri; |
221 |
pqn [root].blk = pqn [kid].blk; |
222 |
} |
223 |
else break; |
224 |
|
225 |
root = kid; |
226 |
} |
227 |
|
228 |
pqn [root].pri = pri; |
229 |
pqn [root].blk = blk; |
230 |
|
231 |
#ifdef DEBUG |
232 |
if (OOC_Sort_PQCheck(pq, 0) < 0) { |
233 |
fprintf(stderr, "OOC_Sort: priority queue inconsistency\n"); |
234 |
return -1; |
235 |
} |
236 |
#endif |
237 |
|
238 |
return res; |
239 |
} |
240 |
|
241 |
#if 0 |
242 |
int OOC_Sort (const char *inFile, const char *outFile, |
243 |
unsigned long blkSize, unsigned recSize, |
244 |
OOC_Sort_Key (*priority)(const void *)) |
245 |
/* Sort records in inFile and output to outFile by (a) internally |
246 |
* quicksorting block of blkSize bytes at a time, then (b) merging them |
247 |
* via a priority queue. RecSize specifies the size in bytes of each data |
248 |
* record. The priority() callback evaluates a record's priority and must be |
249 |
* supplied by the caller. */ |
250 |
{ |
251 |
FILE *in = NULL; |
252 |
OOC_Sort_PQueue pqueue; /* Priority queue */ |
253 |
OOC_Sort_Block *iBlk = NULL, oBlk; /* Block descriptors */ |
254 |
char *rec, *sortBuf = NULL; /* Internal sort buffa */ |
255 |
unsigned bufSize, numBlk; |
256 |
int b; |
257 |
|
258 |
/* Set record priority evaluation callback */ |
259 |
pri = priority; |
260 |
|
261 |
/* Round block and buffa size down to nearest multiple of record size */ |
262 |
blkSize = (blkSize / recSize) * recSize; |
263 |
bufSize = (OOC_SORT_BUFSIZE / recSize) * recSize; |
264 |
|
265 |
if (!(in = fopen(inFile, "rb"))) { |
266 |
fprintf(stderr, "OOC_Sort: failure opening input file %s\n", inFile); |
267 |
return -1; |
268 |
} |
269 |
|
270 |
/* Get input file size and number of blocks (rounded up to nearest |
271 |
* multiple of block size) */ |
272 |
fseek(in, 0, SEEK_END); |
273 |
numBlk = (ftell(in) + blkSize - 1) / blkSize; |
274 |
rewind(in); |
275 |
#else |
276 |
|
277 |
int OOC_Sort (const char *inFile, const char *outFile, |
278 |
unsigned numBlk, unsigned recSize, |
279 |
OOC_Sort_Key (*priority)(const void *)) |
280 |
/* Sort records in inFile and output to outFile by (a) internally |
281 |
* quicksorting numBlk blocks at a time, then (b) merging them via a priority |
282 |
* queue. RecSize specifies the size in bytes of each data record. The |
283 |
* priority() callback evaluates a record's priority (ordinal index) and |
284 |
* must be supplied by the caller. */ |
285 |
{ |
286 |
FILE *in = NULL; |
287 |
OOC_Sort_PQueue pqueue; /* Priority queue */ |
288 |
OOC_Sort_Block *iBlk = NULL, oBlk; /* Block descriptors */ |
289 |
char *rec, *sortBuf = NULL; /* Internal sort buffa */ |
290 |
unsigned long blkSize; |
291 |
unsigned bufSize; |
292 |
int b; |
293 |
|
294 |
/* Set record priority evaluation callback */ |
295 |
pri = priority; |
296 |
|
297 |
/* Round buffa size down to nearest multiple of record size */ |
298 |
bufSize = (OOC_SORT_BUFSIZE / recSize) * recSize; |
299 |
|
300 |
if (!(in = fopen(inFile, "rb"))) { |
301 |
fprintf(stderr, "OOC_Sort: failure opening input file %s\n", inFile); |
302 |
return -1; |
303 |
} |
304 |
|
305 |
/* Get input file size and block size (in number of records) rounded up |
306 |
* to nearest multiple of number of blocks */ |
307 |
fseek(in, 0, SEEK_END); |
308 |
blkSize = (ftell(in) / recSize + numBlk - 1) / numBlk; |
309 |
rewind(in); |
310 |
#endif |
311 |
|
312 |
/* Allocate buffa for internal sorting */ |
313 |
if (!(sortBuf = malloc(blkSize))) { |
314 |
fprintf(stderr, "OOC_Sort: failure allocating internal sort buffer\n"); |
315 |
return -1; |
316 |
} |
317 |
|
318 |
/* Allocate input blocks */ |
319 |
if (!(iBlk = calloc(numBlk, sizeof(OOC_Sort_Block)))) { |
320 |
fprintf(stderr, "OOC_Sort: failure allocating input blocks\n"); |
321 |
return -1; |
322 |
} |
323 |
|
324 |
/* =================================================================== |
325 |
* Pass 1: Internal quicksort of each input block in sortBuf |
326 |
* =================================================================== */ |
327 |
|
328 |
for (b = 0; b < numBlk; b++) { |
329 |
unsigned long numRec; |
330 |
|
331 |
/* Open temporary file associated with block */ |
332 |
if (!(iBlk [b].file = tmpfile())) { |
333 |
/* fprintf(stderr, "OOC_Sort: failure opening block file\n"); */ |
334 |
perror("OOC_Sort: failure opening block file"); |
335 |
return -1; |
336 |
} |
337 |
|
338 |
if (feof(in)) { |
339 |
fprintf(stderr, "OOC_Sort: unexpected end of input file %s\n", |
340 |
inFile); |
341 |
return -1; |
342 |
} |
343 |
|
344 |
/* Read next block (potentially truncated if last) */ |
345 |
if (!(numRec = fread(sortBuf, 1, blkSize, in) / recSize)) { |
346 |
fprintf(stderr, "OOC_Sort: error reading from input file %s\n", |
347 |
inFile); |
348 |
return -1; |
349 |
} |
350 |
|
351 |
/* Quicksort block internally and write out to temp file */ |
352 |
qsort(sortBuf, numRec, recSize, OOC_Sort_Compare); |
353 |
|
354 |
if (fwrite(sortBuf, recSize, numRec, iBlk [b].file) != numRec) { |
355 |
fprintf(stderr, "OOC_Sort: error writing to block file\n"); |
356 |
return -1; |
357 |
} |
358 |
|
359 |
rewind(iBlk [b].file); |
360 |
} |
361 |
|
362 |
/* Close input file and free sort buffa */ |
363 |
fclose(in); |
364 |
free(sortBuf); |
365 |
|
366 |
/* Allocate priority queue with numBlk nodes */ |
367 |
pqueue.tail = 0; |
368 |
pqueue.len = numBlk; |
369 |
if (!(pqueue.node = calloc(numBlk, sizeof(OOC_Sort_PQNode)))) { |
370 |
fprintf(stderr, "OOC_Sort: failure allocating priority queue\n"); |
371 |
return -1; |
372 |
} |
373 |
|
374 |
/* Prepare for pass 2: allocate buffa for each input block and initialise |
375 |
* fill priority queue with single record from each block */ |
376 |
for (b = 0; b < numBlk; b++) { |
377 |
if (!(iBlk [b].buf = malloc(bufSize))) { |
378 |
fprintf(stderr, "OOC_Sort: failure allocating input buffer\n"); |
379 |
return -1; |
380 |
} |
381 |
|
382 |
/* Peek first record in block file without modifying buffa */ |
383 |
iBlk [b].head = iBlk [b].tail = iBlk [b].buf + bufSize; |
384 |
if (!(rec = OOC_Sort_Peek(iBlk + b, recSize))) { |
385 |
fprintf(stderr, "OOC_Sort: error reading from block file\n"); |
386 |
return -1; |
387 |
} |
388 |
|
389 |
/* Insert record into priority queue */ |
390 |
if (OOC_Sort_Push(&pqueue, priority(rec), b) < 0) { |
391 |
fprintf(stderr, "OOC_Sort: failed priority queue insertion\n"); |
392 |
return -1; |
393 |
} |
394 |
} |
395 |
|
396 |
/* Allocate output buffa and open output file */ |
397 |
if (!(oBlk.file = fopen(outFile, "wb"))) { |
398 |
fprintf(stderr, "OOC_Sort: failure opening output file %s\n", outFile); |
399 |
return -1; |
400 |
} |
401 |
|
402 |
if (!(oBlk.head = oBlk.buf = malloc(bufSize))) { |
403 |
fprintf(stderr, "OOC_Sort: failure allocating output buffer\n"); |
404 |
return -1; |
405 |
} |
406 |
|
407 |
/* tail marks end of output buffa */ |
408 |
oBlk.tail = oBlk.buf + bufSize; |
409 |
|
410 |
/* =================================================================== |
411 |
* Pass 2: External merge of all blocks using priority queue |
412 |
* =================================================================== */ |
413 |
|
414 |
do { |
415 |
/* Get next node in priority queue, read next record in corresponding |
416 |
* block, and send to output */ |
417 |
b = OOC_Sort_Pop(&pqueue); |
418 |
|
419 |
if (b >= 0) { |
420 |
/* Priority queue non-empty */ |
421 |
if (!(rec = OOC_Sort_Read(iBlk + b, recSize))) { |
422 |
/* Corresponding record should still be in the buffa, so EOF |
423 |
* should not happen */ |
424 |
fprintf(stderr, "OOC_Sort: unexpected EOF in block file\n"); |
425 |
return -1; |
426 |
} |
427 |
|
428 |
if (!OOC_Sort_Write(&oBlk, recSize, rec)) { |
429 |
fprintf(stderr, "OOC_Sort; error writing to output file %s\n", |
430 |
outFile); |
431 |
return -1; |
432 |
} |
433 |
|
434 |
/* Peek next record from same block and insert in priority queue */ |
435 |
if (rec = OOC_Sort_Peek(iBlk + b, recSize)) |
436 |
/* Buffa not exhausted */ |
437 |
if (OOC_Sort_Push(&pqueue, priority(rec), b) < 0) { |
438 |
fprintf(stderr, "OOC_Sort: failed priority queue insertion\n"); |
439 |
return -1; |
440 |
} |
441 |
} |
442 |
} while (b >= 0); |
443 |
|
444 |
/* Priority queue now empty; flush output buffa & close file */ |
445 |
oBlk.tail = oBlk.head; |
446 |
OOC_Sort_Write(&oBlk, 0, NULL); |
447 |
fclose(oBlk.file); |
448 |
|
449 |
for (b = 0; b < numBlk; b++) { |
450 |
fclose(iBlk [b].file); |
451 |
free(iBlk [b].buf); |
452 |
} |
453 |
|
454 |
free(iBlk); |
455 |
free(pqueue.node); |
456 |
|
457 |
return 0; |
458 |
} |