Line data Source code
1 : /* Drop in replacement for heapq.py
2 :
3 : C implementation derived directly from heapq.py in Py2.3
4 : which was written by Kevin O'Connor, augmented by Tim Peters,
5 : annotated by François Pinard, and converted to C by Raymond Hettinger.
6 :
7 : */
8 :
9 : #include "Python.h"
10 :
11 : /* Older implementations of heapq used Py_LE for comparisons. Now, it uses
12 : Py_LT so it will match min(), sorted(), and bisect(). Unfortunately, some
13 : client code (Twisted for example) relied on Py_LE, so this little function
14 : restores compatibility by trying both.
15 : */
16 : static int
17 0 : cmp_lt(PyObject *x, PyObject *y)
18 : {
19 : int cmp;
20 : static PyObject *lt = NULL;
21 :
22 0 : if (lt == NULL) {
23 0 : lt = PyString_FromString("__lt__");
24 0 : if (lt == NULL)
25 0 : return -1;
26 : }
27 0 : if (PyObject_HasAttr(x, lt))
28 0 : return PyObject_RichCompareBool(x, y, Py_LT);
29 0 : cmp = PyObject_RichCompareBool(y, x, Py_LE);
30 0 : if (cmp != -1)
31 0 : cmp = 1 - cmp;
32 0 : return cmp;
33 : }
34 :
35 : static int
36 0 : _siftdown(PyListObject *heap, Py_ssize_t startpos, Py_ssize_t pos)
37 : {
38 : PyObject *newitem, *parent;
39 : Py_ssize_t parentpos, size;
40 : int cmp;
41 :
42 : assert(PyList_Check(heap));
43 0 : size = PyList_GET_SIZE(heap);
44 0 : if (pos >= size) {
45 0 : PyErr_SetString(PyExc_IndexError, "index out of range");
46 0 : return -1;
47 : }
48 :
49 : /* Follow the path to the root, moving parents down until finding
50 : a place newitem fits. */
51 0 : newitem = PyList_GET_ITEM(heap, pos);
52 0 : while (pos > startpos) {
53 0 : parentpos = (pos - 1) >> 1;
54 0 : parent = PyList_GET_ITEM(heap, parentpos);
55 0 : cmp = cmp_lt(newitem, parent);
56 0 : if (cmp == -1)
57 0 : return -1;
58 0 : if (size != PyList_GET_SIZE(heap)) {
59 0 : PyErr_SetString(PyExc_RuntimeError,
60 : "list changed size during iteration");
61 0 : return -1;
62 : }
63 0 : if (cmp == 0)
64 0 : break;
65 0 : parent = PyList_GET_ITEM(heap, parentpos);
66 0 : newitem = PyList_GET_ITEM(heap, pos);
67 0 : PyList_SET_ITEM(heap, parentpos, newitem);
68 0 : PyList_SET_ITEM(heap, pos, parent);
69 0 : pos = parentpos;
70 : }
71 0 : return 0;
72 : }
73 :
74 : static int
75 0 : _siftup(PyListObject *heap, Py_ssize_t pos)
76 : {
77 : Py_ssize_t startpos, endpos, childpos, rightpos, limit;
78 : PyObject *tmp1, *tmp2;
79 : int cmp;
80 :
81 : assert(PyList_Check(heap));
82 0 : endpos = PyList_GET_SIZE(heap);
83 0 : startpos = pos;
84 0 : if (pos >= endpos) {
85 0 : PyErr_SetString(PyExc_IndexError, "index out of range");
86 0 : return -1;
87 : }
88 :
89 : /* Bubble up the smaller child until hitting a leaf. */
90 0 : limit = endpos / 2; /* smallest pos that has no child */
91 0 : while (pos < limit) {
92 : /* Set childpos to index of smaller child. */
93 0 : childpos = 2*pos + 1; /* leftmost child position */
94 0 : rightpos = childpos + 1;
95 0 : if (rightpos < endpos) {
96 0 : cmp = cmp_lt(
97 0 : PyList_GET_ITEM(heap, childpos),
98 0 : PyList_GET_ITEM(heap, rightpos));
99 0 : if (cmp == -1)
100 0 : return -1;
101 0 : if (cmp == 0)
102 0 : childpos = rightpos;
103 0 : if (endpos != PyList_GET_SIZE(heap)) {
104 0 : PyErr_SetString(PyExc_RuntimeError,
105 : "list changed size during iteration");
106 0 : return -1;
107 : }
108 : }
109 : /* Move the smaller child up. */
110 0 : tmp1 = PyList_GET_ITEM(heap, childpos);
111 0 : tmp2 = PyList_GET_ITEM(heap, pos);
112 0 : PyList_SET_ITEM(heap, childpos, tmp2);
113 0 : PyList_SET_ITEM(heap, pos, tmp1);
114 0 : pos = childpos;
115 : }
116 : /* Bubble it up to its final resting place (by sifting its parents down). */
117 0 : return _siftdown(heap, startpos, pos);
118 : }
119 :
120 : static PyObject *
121 0 : heappush(PyObject *self, PyObject *args)
122 : {
123 : PyObject *heap, *item;
124 :
125 0 : if (!PyArg_UnpackTuple(args, "heappush", 2, 2, &heap, &item))
126 0 : return NULL;
127 :
128 0 : if (!PyList_Check(heap)) {
129 0 : PyErr_SetString(PyExc_TypeError, "heap argument must be a list");
130 0 : return NULL;
131 : }
132 :
133 0 : if (PyList_Append(heap, item) == -1)
134 0 : return NULL;
135 :
136 0 : if (_siftdown((PyListObject *)heap, 0, PyList_GET_SIZE(heap)-1) == -1)
137 0 : return NULL;
138 0 : Py_INCREF(Py_None);
139 0 : return Py_None;
140 : }
141 :
142 : PyDoc_STRVAR(heappush_doc,
143 : "heappush(heap, item) -> None. Push item onto heap, maintaining the heap invariant.");
144 :
145 : static PyObject *
146 0 : heappop(PyObject *self, PyObject *heap)
147 : {
148 : PyObject *lastelt, *returnitem;
149 : Py_ssize_t n;
150 :
151 0 : if (!PyList_Check(heap)) {
152 0 : PyErr_SetString(PyExc_TypeError, "heap argument must be a list");
153 0 : return NULL;
154 : }
155 :
156 : /* # raises appropriate IndexError if heap is empty */
157 0 : n = PyList_GET_SIZE(heap);
158 0 : if (n == 0) {
159 0 : PyErr_SetString(PyExc_IndexError, "index out of range");
160 0 : return NULL;
161 : }
162 :
163 0 : lastelt = PyList_GET_ITEM(heap, n-1) ;
164 0 : Py_INCREF(lastelt);
165 0 : PyList_SetSlice(heap, n-1, n, NULL);
166 0 : n--;
167 :
168 0 : if (!n)
169 0 : return lastelt;
170 0 : returnitem = PyList_GET_ITEM(heap, 0);
171 0 : PyList_SET_ITEM(heap, 0, lastelt);
172 0 : if (_siftup((PyListObject *)heap, 0) == -1) {
173 0 : Py_DECREF(returnitem);
174 0 : return NULL;
175 : }
176 0 : return returnitem;
177 : }
178 :
179 : PyDoc_STRVAR(heappop_doc,
180 : "Pop the smallest item off the heap, maintaining the heap invariant.");
181 :
182 : static PyObject *
183 0 : heapreplace(PyObject *self, PyObject *args)
184 : {
185 : PyObject *heap, *item, *returnitem;
186 :
187 0 : if (!PyArg_UnpackTuple(args, "heapreplace", 2, 2, &heap, &item))
188 0 : return NULL;
189 :
190 0 : if (!PyList_Check(heap)) {
191 0 : PyErr_SetString(PyExc_TypeError, "heap argument must be a list");
192 0 : return NULL;
193 : }
194 :
195 0 : if (PyList_GET_SIZE(heap) < 1) {
196 0 : PyErr_SetString(PyExc_IndexError, "index out of range");
197 0 : return NULL;
198 : }
199 :
200 0 : returnitem = PyList_GET_ITEM(heap, 0);
201 0 : Py_INCREF(item);
202 0 : PyList_SET_ITEM(heap, 0, item);
203 0 : if (_siftup((PyListObject *)heap, 0) == -1) {
204 0 : Py_DECREF(returnitem);
205 0 : return NULL;
206 : }
207 0 : return returnitem;
208 : }
209 :
210 : PyDoc_STRVAR(heapreplace_doc,
211 : "heapreplace(heap, item) -> value. Pop and return the current smallest value, and add the new item.\n\
212 : \n\
213 : This is more efficient than heappop() followed by heappush(), and can be\n\
214 : more appropriate when using a fixed-size heap. Note that the value\n\
215 : returned may be larger than item! That constrains reasonable uses of\n\
216 : this routine unless written as part of a conditional replacement:\n\n\
217 : if item > heap[0]:\n\
218 : item = heapreplace(heap, item)\n");
219 :
220 : static PyObject *
221 0 : heappushpop(PyObject *self, PyObject *args)
222 : {
223 : PyObject *heap, *item, *returnitem;
224 : int cmp;
225 :
226 0 : if (!PyArg_UnpackTuple(args, "heappushpop", 2, 2, &heap, &item))
227 0 : return NULL;
228 :
229 0 : if (!PyList_Check(heap)) {
230 0 : PyErr_SetString(PyExc_TypeError, "heap argument must be a list");
231 0 : return NULL;
232 : }
233 :
234 0 : if (PyList_GET_SIZE(heap) < 1) {
235 0 : Py_INCREF(item);
236 0 : return item;
237 : }
238 :
239 0 : cmp = cmp_lt(PyList_GET_ITEM(heap, 0), item);
240 0 : if (cmp == -1)
241 0 : return NULL;
242 0 : if (cmp == 0) {
243 0 : Py_INCREF(item);
244 0 : return item;
245 : }
246 :
247 0 : if (PyList_GET_SIZE(heap) == 0) {
248 0 : PyErr_SetString(PyExc_IndexError, "index out of range");
249 0 : return NULL;
250 : }
251 :
252 0 : returnitem = PyList_GET_ITEM(heap, 0);
253 0 : Py_INCREF(item);
254 0 : PyList_SET_ITEM(heap, 0, item);
255 0 : if (_siftup((PyListObject *)heap, 0) == -1) {
256 0 : Py_DECREF(returnitem);
257 0 : return NULL;
258 : }
259 0 : return returnitem;
260 : }
261 :
262 : PyDoc_STRVAR(heappushpop_doc,
263 : "heappushpop(heap, item) -> value. Push item on the heap, then pop and return the smallest item\n\
264 : from the heap. The combined action runs more efficiently than\n\
265 : heappush() followed by a separate call to heappop().");
266 :
267 : static PyObject *
268 0 : heapify(PyObject *self, PyObject *heap)
269 : {
270 : Py_ssize_t i, n;
271 :
272 0 : if (!PyList_Check(heap)) {
273 0 : PyErr_SetString(PyExc_TypeError, "heap argument must be a list");
274 0 : return NULL;
275 : }
276 :
277 0 : n = PyList_GET_SIZE(heap);
278 : /* Transform bottom-up. The largest index there's any point to
279 : looking at is the largest with a child index in-range, so must
280 : have 2*i + 1 < n, or i < (n-1)/2. If n is even = 2*j, this is
281 : (2*j-1)/2 = j-1/2 so j-1 is the largest, which is n//2 - 1. If
282 : n is odd = 2*j+1, this is (2*j+1-1)/2 = j so j-1 is the largest,
283 : and that's again n//2-1.
284 : */
285 0 : for (i=n/2-1 ; i>=0 ; i--)
286 0 : if(_siftup((PyListObject *)heap, i) == -1)
287 0 : return NULL;
288 0 : Py_INCREF(Py_None);
289 0 : return Py_None;
290 : }
291 :
292 : PyDoc_STRVAR(heapify_doc,
293 : "Transform list into a heap, in-place, in O(len(heap)) time.");
294 :
295 : static PyObject *
296 0 : nlargest(PyObject *self, PyObject *args)
297 : {
298 0 : PyObject *heap=NULL, *elem, *iterable, *sol, *it, *oldelem;
299 : Py_ssize_t i, n;
300 : int cmp;
301 :
302 0 : if (!PyArg_ParseTuple(args, "nO:nlargest", &n, &iterable))
303 0 : return NULL;
304 :
305 0 : it = PyObject_GetIter(iterable);
306 0 : if (it == NULL)
307 0 : return NULL;
308 :
309 0 : heap = PyList_New(0);
310 0 : if (heap == NULL)
311 0 : goto fail;
312 :
313 0 : for (i=0 ; i<n ; i++ ){
314 0 : elem = PyIter_Next(it);
315 0 : if (elem == NULL) {
316 0 : if (PyErr_Occurred())
317 0 : goto fail;
318 : else
319 0 : goto sortit;
320 : }
321 0 : if (PyList_Append(heap, elem) == -1) {
322 0 : Py_DECREF(elem);
323 0 : goto fail;
324 : }
325 0 : Py_DECREF(elem);
326 : }
327 0 : if (PyList_GET_SIZE(heap) == 0)
328 0 : goto sortit;
329 :
330 0 : for (i=n/2-1 ; i>=0 ; i--)
331 0 : if(_siftup((PyListObject *)heap, i) == -1)
332 0 : goto fail;
333 :
334 0 : sol = PyList_GET_ITEM(heap, 0);
335 : while (1) {
336 0 : elem = PyIter_Next(it);
337 0 : if (elem == NULL) {
338 0 : if (PyErr_Occurred())
339 0 : goto fail;
340 : else
341 0 : goto sortit;
342 : }
343 0 : cmp = cmp_lt(sol, elem);
344 0 : if (cmp == -1) {
345 0 : Py_DECREF(elem);
346 0 : goto fail;
347 : }
348 0 : if (cmp == 0) {
349 0 : Py_DECREF(elem);
350 0 : continue;
351 : }
352 0 : oldelem = PyList_GET_ITEM(heap, 0);
353 0 : PyList_SET_ITEM(heap, 0, elem);
354 0 : Py_DECREF(oldelem);
355 0 : if (_siftup((PyListObject *)heap, 0) == -1)
356 0 : goto fail;
357 0 : sol = PyList_GET_ITEM(heap, 0);
358 0 : }
359 : sortit:
360 0 : if (PyList_Sort(heap) == -1)
361 0 : goto fail;
362 0 : if (PyList_Reverse(heap) == -1)
363 0 : goto fail;
364 0 : Py_DECREF(it);
365 0 : return heap;
366 :
367 : fail:
368 0 : Py_DECREF(it);
369 0 : Py_XDECREF(heap);
370 0 : return NULL;
371 : }
372 :
373 : PyDoc_STRVAR(nlargest_doc,
374 : "Find the n largest elements in a dataset.\n\
375 : \n\
376 : Equivalent to: sorted(iterable, reverse=True)[:n]\n");
377 :
378 : static int
379 0 : _siftdownmax(PyListObject *heap, Py_ssize_t startpos, Py_ssize_t pos)
380 : {
381 : PyObject *newitem, *parent;
382 : int cmp;
383 : Py_ssize_t parentpos;
384 :
385 : assert(PyList_Check(heap));
386 0 : if (pos >= PyList_GET_SIZE(heap)) {
387 0 : PyErr_SetString(PyExc_IndexError, "index out of range");
388 0 : return -1;
389 : }
390 :
391 0 : newitem = PyList_GET_ITEM(heap, pos);
392 0 : Py_INCREF(newitem);
393 : /* Follow the path to the root, moving parents down until finding
394 : a place newitem fits. */
395 0 : while (pos > startpos){
396 0 : parentpos = (pos - 1) >> 1;
397 0 : parent = PyList_GET_ITEM(heap, parentpos);
398 0 : cmp = cmp_lt(parent, newitem);
399 0 : if (cmp == -1) {
400 0 : Py_DECREF(newitem);
401 0 : return -1;
402 : }
403 0 : if (cmp == 0)
404 0 : break;
405 0 : Py_INCREF(parent);
406 0 : Py_DECREF(PyList_GET_ITEM(heap, pos));
407 0 : PyList_SET_ITEM(heap, pos, parent);
408 0 : pos = parentpos;
409 : }
410 0 : Py_DECREF(PyList_GET_ITEM(heap, pos));
411 0 : PyList_SET_ITEM(heap, pos, newitem);
412 0 : return 0;
413 : }
414 :
415 : static int
416 0 : _siftupmax(PyListObject *heap, Py_ssize_t pos)
417 : {
418 : Py_ssize_t startpos, endpos, childpos, rightpos, limit;
419 : int cmp;
420 : PyObject *newitem, *tmp;
421 :
422 : assert(PyList_Check(heap));
423 0 : endpos = PyList_GET_SIZE(heap);
424 0 : startpos = pos;
425 0 : if (pos >= endpos) {
426 0 : PyErr_SetString(PyExc_IndexError, "index out of range");
427 0 : return -1;
428 : }
429 0 : newitem = PyList_GET_ITEM(heap, pos);
430 0 : Py_INCREF(newitem);
431 :
432 : /* Bubble up the smaller child until hitting a leaf. */
433 0 : limit = endpos / 2; /* smallest pos that has no child */
434 0 : while (pos < limit) {
435 : /* Set childpos to index of smaller child. */
436 0 : childpos = 2*pos + 1; /* leftmost child position */
437 0 : rightpos = childpos + 1;
438 0 : if (rightpos < endpos) {
439 0 : cmp = cmp_lt(
440 0 : PyList_GET_ITEM(heap, rightpos),
441 0 : PyList_GET_ITEM(heap, childpos));
442 0 : if (cmp == -1) {
443 0 : Py_DECREF(newitem);
444 0 : return -1;
445 : }
446 0 : if (cmp == 0)
447 0 : childpos = rightpos;
448 : }
449 : /* Move the smaller child up. */
450 0 : tmp = PyList_GET_ITEM(heap, childpos);
451 0 : Py_INCREF(tmp);
452 0 : Py_DECREF(PyList_GET_ITEM(heap, pos));
453 0 : PyList_SET_ITEM(heap, pos, tmp);
454 0 : pos = childpos;
455 : }
456 :
457 : /* The leaf at pos is empty now. Put newitem there, and bubble
458 : it up to its final resting place (by sifting its parents down). */
459 0 : Py_DECREF(PyList_GET_ITEM(heap, pos));
460 0 : PyList_SET_ITEM(heap, pos, newitem);
461 0 : return _siftdownmax(heap, startpos, pos);
462 : }
463 :
464 : static PyObject *
465 0 : nsmallest(PyObject *self, PyObject *args)
466 : {
467 0 : PyObject *heap=NULL, *elem, *iterable, *los, *it, *oldelem;
468 : Py_ssize_t i, n;
469 : int cmp;
470 :
471 0 : if (!PyArg_ParseTuple(args, "nO:nsmallest", &n, &iterable))
472 0 : return NULL;
473 :
474 0 : it = PyObject_GetIter(iterable);
475 0 : if (it == NULL)
476 0 : return NULL;
477 :
478 0 : heap = PyList_New(0);
479 0 : if (heap == NULL)
480 0 : goto fail;
481 :
482 0 : for (i=0 ; i<n ; i++ ){
483 0 : elem = PyIter_Next(it);
484 0 : if (elem == NULL) {
485 0 : if (PyErr_Occurred())
486 0 : goto fail;
487 : else
488 0 : goto sortit;
489 : }
490 0 : if (PyList_Append(heap, elem) == -1) {
491 0 : Py_DECREF(elem);
492 0 : goto fail;
493 : }
494 0 : Py_DECREF(elem);
495 : }
496 0 : n = PyList_GET_SIZE(heap);
497 0 : if (n == 0)
498 0 : goto sortit;
499 :
500 0 : for (i=n/2-1 ; i>=0 ; i--)
501 0 : if(_siftupmax((PyListObject *)heap, i) == -1)
502 0 : goto fail;
503 :
504 0 : los = PyList_GET_ITEM(heap, 0);
505 : while (1) {
506 0 : elem = PyIter_Next(it);
507 0 : if (elem == NULL) {
508 0 : if (PyErr_Occurred())
509 0 : goto fail;
510 : else
511 0 : goto sortit;
512 : }
513 0 : cmp = cmp_lt(elem, los);
514 0 : if (cmp == -1) {
515 0 : Py_DECREF(elem);
516 0 : goto fail;
517 : }
518 0 : if (cmp == 0) {
519 0 : Py_DECREF(elem);
520 0 : continue;
521 : }
522 :
523 0 : oldelem = PyList_GET_ITEM(heap, 0);
524 0 : PyList_SET_ITEM(heap, 0, elem);
525 0 : Py_DECREF(oldelem);
526 0 : if (_siftupmax((PyListObject *)heap, 0) == -1)
527 0 : goto fail;
528 0 : los = PyList_GET_ITEM(heap, 0);
529 0 : }
530 :
531 : sortit:
532 0 : if (PyList_Sort(heap) == -1)
533 0 : goto fail;
534 0 : Py_DECREF(it);
535 0 : return heap;
536 :
537 : fail:
538 0 : Py_DECREF(it);
539 0 : Py_XDECREF(heap);
540 0 : return NULL;
541 : }
542 :
543 : PyDoc_STRVAR(nsmallest_doc,
544 : "Find the n smallest elements in a dataset.\n\
545 : \n\
546 : Equivalent to: sorted(iterable)[:n]\n");
547 :
548 : static PyMethodDef heapq_methods[] = {
549 : {"heappush", (PyCFunction)heappush,
550 : METH_VARARGS, heappush_doc},
551 : {"heappushpop", (PyCFunction)heappushpop,
552 : METH_VARARGS, heappushpop_doc},
553 : {"heappop", (PyCFunction)heappop,
554 : METH_O, heappop_doc},
555 : {"heapreplace", (PyCFunction)heapreplace,
556 : METH_VARARGS, heapreplace_doc},
557 : {"heapify", (PyCFunction)heapify,
558 : METH_O, heapify_doc},
559 : {"nlargest", (PyCFunction)nlargest,
560 : METH_VARARGS, nlargest_doc},
561 : {"nsmallest", (PyCFunction)nsmallest,
562 : METH_VARARGS, nsmallest_doc},
563 : {NULL, NULL} /* sentinel */
564 : };
565 :
566 : PyDoc_STRVAR(module_doc,
567 : "Heap queue algorithm (a.k.a. priority queue).\n\
568 : \n\
569 : Heaps are arrays for which a[k] <= a[2*k+1] and a[k] <= a[2*k+2] for\n\
570 : all k, counting elements from 0. For the sake of comparison,\n\
571 : non-existing elements are considered to be infinite. The interesting\n\
572 : property of a heap is that a[0] is always its smallest element.\n\
573 : \n\
574 : Usage:\n\
575 : \n\
576 : heap = [] # creates an empty heap\n\
577 : heappush(heap, item) # pushes a new item on the heap\n\
578 : item = heappop(heap) # pops the smallest item from the heap\n\
579 : item = heap[0] # smallest item on the heap without popping it\n\
580 : heapify(x) # transforms list into a heap, in-place, in linear time\n\
581 : item = heapreplace(heap, item) # pops and returns smallest item, and adds\n\
582 : # new item; the heap size is unchanged\n\
583 : \n\
584 : Our API differs from textbook heap algorithms as follows:\n\
585 : \n\
586 : - We use 0-based indexing. This makes the relationship between the\n\
587 : index for a node and the indexes for its children slightly less\n\
588 : obvious, but is more suitable since Python uses 0-based indexing.\n\
589 : \n\
590 : - Our heappop() method returns the smallest item, not the largest.\n\
591 : \n\
592 : These two make it possible to view the heap as a regular Python list\n\
593 : without surprises: heap[0] is the smallest item, and heap.sort()\n\
594 : maintains the heap invariant!\n");
595 :
596 :
597 : PyDoc_STRVAR(__about__,
598 : "Heap queues\n\
599 : \n\
600 : [explanation by François Pinard]\n\
601 : \n\
602 : Heaps are arrays for which a[k] <= a[2*k+1] and a[k] <= a[2*k+2] for\n\
603 : all k, counting elements from 0. For the sake of comparison,\n\
604 : non-existing elements are considered to be infinite. The interesting\n\
605 : property of a heap is that a[0] is always its smallest element.\n"
606 : "\n\
607 : The strange invariant above is meant to be an efficient memory\n\
608 : representation for a tournament. The numbers below are `k', not a[k]:\n\
609 : \n\
610 : 0\n\
611 : \n\
612 : 1 2\n\
613 : \n\
614 : 3 4 5 6\n\
615 : \n\
616 : 7 8 9 10 11 12 13 14\n\
617 : \n\
618 : 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30\n\
619 : \n\
620 : \n\
621 : In the tree above, each cell `k' is topping `2*k+1' and `2*k+2'. In\n\
622 : a usual binary tournament we see in sports, each cell is the winner\n\
623 : over the two cells it tops, and we can trace the winner down the tree\n\
624 : to see all opponents s/he had. However, in many computer applications\n\
625 : of such tournaments, we do not need to trace the history of a winner.\n\
626 : To be more memory efficient, when a winner is promoted, we try to\n\
627 : replace it by something else at a lower level, and the rule becomes\n\
628 : that a cell and the two cells it tops contain three different items,\n\
629 : but the top cell \"wins\" over the two topped cells.\n"
630 : "\n\
631 : If this heap invariant is protected at all time, index 0 is clearly\n\
632 : the overall winner. The simplest algorithmic way to remove it and\n\
633 : find the \"next\" winner is to move some loser (let's say cell 30 in the\n\
634 : diagram above) into the 0 position, and then percolate this new 0 down\n\
635 : the tree, exchanging values, until the invariant is re-established.\n\
636 : This is clearly logarithmic on the total number of items in the tree.\n\
637 : By iterating over all items, you get an O(n ln n) sort.\n"
638 : "\n\
639 : A nice feature of this sort is that you can efficiently insert new\n\
640 : items while the sort is going on, provided that the inserted items are\n\
641 : not \"better\" than the last 0'th element you extracted. This is\n\
642 : especially useful in simulation contexts, where the tree holds all\n\
643 : incoming events, and the \"win\" condition means the smallest scheduled\n\
644 : time. When an event schedule other events for execution, they are\n\
645 : scheduled into the future, so they can easily go into the heap. So, a\n\
646 : heap is a good structure for implementing schedulers (this is what I\n\
647 : used for my MIDI sequencer :-).\n"
648 : "\n\
649 : Various structures for implementing schedulers have been extensively\n\
650 : studied, and heaps are good for this, as they are reasonably speedy,\n\
651 : the speed is almost constant, and the worst case is not much different\n\
652 : than the average case. However, there are other representations which\n\
653 : are more efficient overall, yet the worst cases might be terrible.\n"
654 : "\n\
655 : Heaps are also very useful in big disk sorts. You most probably all\n\
656 : know that a big sort implies producing \"runs\" (which are pre-sorted\n\
657 : sequences, which size is usually related to the amount of CPU memory),\n\
658 : followed by a merging passes for these runs, which merging is often\n\
659 : very cleverly organised[1]. It is very important that the initial\n\
660 : sort produces the longest runs possible. Tournaments are a good way\n\
661 : to that. If, using all the memory available to hold a tournament, you\n\
662 : replace and percolate items that happen to fit the current run, you'll\n\
663 : produce runs which are twice the size of the memory for random input,\n\
664 : and much better for input fuzzily ordered.\n"
665 : "\n\
666 : Moreover, if you output the 0'th item on disk and get an input which\n\
667 : may not fit in the current tournament (because the value \"wins\" over\n\
668 : the last output value), it cannot fit in the heap, so the size of the\n\
669 : heap decreases. The freed memory could be cleverly reused immediately\n\
670 : for progressively building a second heap, which grows at exactly the\n\
671 : same rate the first heap is melting. When the first heap completely\n\
672 : vanishes, you switch heaps and start a new run. Clever and quite\n\
673 : effective!\n\
674 : \n\
675 : In a word, heaps are useful memory structures to know. I use them in\n\
676 : a few applications, and I think it is good to keep a `heap' module\n\
677 : around. :-)\n"
678 : "\n\
679 : --------------------\n\
680 : [1] The disk balancing algorithms which are current, nowadays, are\n\
681 : more annoying than clever, and this is a consequence of the seeking\n\
682 : capabilities of the disks. On devices which cannot seek, like big\n\
683 : tape drives, the story was quite different, and one had to be very\n\
684 : clever to ensure (far in advance) that each tape movement will be the\n\
685 : most effective possible (that is, will best participate at\n\
686 : \"progressing\" the merge). Some tapes were even able to read\n\
687 : backwards, and this was also used to avoid the rewinding time.\n\
688 : Believe me, real good tape sorts were quite spectacular to watch!\n\
689 : From all times, sorting has always been a Great Art! :-)\n");
690 :
691 : PyMODINIT_FUNC
692 3 : init_heapq(void)
693 : {
694 : PyObject *m;
695 :
696 3 : m = Py_InitModule3("_heapq", heapq_methods, module_doc);
697 3 : if (m == NULL)
698 3 : return;
699 3 : PyModule_AddObject(m, "__about__", PyString_FromString(__about__));
700 : }
701 :
|