LCOV - code coverage report
Current view: top level - Objects/stringlib - fastsearch.h (source / functions) Hit Total Coverage
Test: CPython lcov report Lines: 15 70 21.4 %
Date: 2017-04-19 Functions: 1 1 100.0 %

          Line data    Source code
       1             : /* stringlib: fastsearch implementation */
       2             : 
       3             : #ifndef STRINGLIB_FASTSEARCH_H
       4             : #define STRINGLIB_FASTSEARCH_H
       5             : 
       6             : /* fast search/count implementation, based on a mix between boyer-
       7             :    moore and horspool, with a few more bells and whistles on the top.
       8             :    for some more background, see: http://effbot.org/zone/stringlib.htm */
       9             : 
      10             : /* note: fastsearch may access s[n], which isn't a problem when using
      11             :    Python's ordinary string types, but may cause problems if you're
      12             :    using this code in other contexts.  also, the count mode returns -1
      13             :    if there cannot possible be a match in the target string, and 0 if
      14             :    it has actually checked for matches, but didn't find any.  callers
      15             :    beware! */
      16             : 
      17             : #define FAST_COUNT 0
      18             : #define FAST_SEARCH 1
      19             : #define FAST_RSEARCH 2
      20             : 
      21             : #if LONG_BIT >= 128
      22             : #define STRINGLIB_BLOOM_WIDTH 128
      23             : #elif LONG_BIT >= 64
      24             : #define STRINGLIB_BLOOM_WIDTH 64
      25             : #elif LONG_BIT >= 32
      26             : #define STRINGLIB_BLOOM_WIDTH 32
      27             : #else
      28             : #error "LONG_BIT is smaller than 32"
      29             : #endif
      30             : 
      31             : #define STRINGLIB_BLOOM_ADD(mask, ch) \
      32             :     ((mask |= (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
      33             : #define STRINGLIB_BLOOM(mask, ch)     \
      34             :     ((mask &  (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
      35             : 
      36             : Py_LOCAL_INLINE(Py_ssize_t)
      37        8103 : fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n,
      38             :            const STRINGLIB_CHAR* p, Py_ssize_t m,
      39             :            Py_ssize_t maxcount, int mode)
      40             : {
      41             :     unsigned long mask;
      42        8103 :     Py_ssize_t skip, count = 0;
      43             :     Py_ssize_t i, j, mlast, w;
      44             : 
      45        8103 :     w = n - m;
      46             : 
      47        8103 :     if (w < 0 || (mode == FAST_COUNT && maxcount == 0))
      48           0 :         return -1;
      49             : 
      50             :     /* look for special cases */
      51        8103 :     if (m <= 1) {
      52        8103 :         if (m <= 0)
      53           0 :             return -1;
      54             :         /* use special case for 1-character strings */
      55        8103 :         if (mode == FAST_COUNT) {
      56           0 :             for (i = 0; i < n; i++)
      57           0 :                 if (s[i] == p[0]) {
      58           0 :                     count++;
      59           0 :                     if (count == maxcount)
      60           0 :                         return maxcount;
      61             :                 }
      62           0 :             return count;
      63        8103 :         } else if (mode == FAST_SEARCH) {
      64      161403 :             for (i = 0; i < n; i++)
      65      158505 :                 if (s[i] == p[0])
      66        5187 :                     return i;
      67             :         } else {    /* FAST_RSEARCH */
      68         114 :             for (i = n - 1; i > -1; i--)
      69         111 :                 if (s[i] == p[0])
      70          15 :                     return i;
      71             :         }
      72        2901 :         return -1;
      73             :     }
      74             : 
      75           0 :     mlast = m - 1;
      76           0 :     skip = mlast - 1;
      77           0 :     mask = 0;
      78             : 
      79           0 :     if (mode != FAST_RSEARCH) {
      80             : 
      81             :         /* create compressed boyer-moore delta 1 table */
      82             : 
      83             :         /* process pattern[:-1] */
      84           0 :         for (i = 0; i < mlast; i++) {
      85           0 :             STRINGLIB_BLOOM_ADD(mask, p[i]);
      86           0 :             if (p[i] == p[mlast])
      87           0 :                 skip = mlast - i - 1;
      88             :         }
      89             :         /* process pattern[-1] outside the loop */
      90           0 :         STRINGLIB_BLOOM_ADD(mask, p[mlast]);
      91             : 
      92           0 :         for (i = 0; i <= w; i++) {
      93             :             /* note: using mlast in the skip path slows things down on x86 */
      94           0 :             if (s[i+m-1] == p[m-1]) {
      95             :                 /* candidate match */
      96           0 :                 for (j = 0; j < mlast; j++)
      97           0 :                     if (s[i+j] != p[j])
      98           0 :                         break;
      99           0 :                 if (j == mlast) {
     100             :                     /* got a match! */
     101           0 :                     if (mode != FAST_COUNT)
     102           0 :                         return i;
     103           0 :                     count++;
     104           0 :                     if (count == maxcount)
     105           0 :                         return maxcount;
     106           0 :                     i = i + mlast;
     107           0 :                     continue;
     108             :                 }
     109             :                 /* miss: check if next character is part of pattern */
     110           0 :                 if (!STRINGLIB_BLOOM(mask, s[i+m]))
     111           0 :                     i = i + m;
     112             :                 else
     113           0 :                     i = i + skip;
     114             :             } else {
     115             :                 /* skip: check if next character is part of pattern */
     116           0 :                 if (!STRINGLIB_BLOOM(mask, s[i+m]))
     117           0 :                     i = i + m;
     118             :             }
     119             :         }
     120             :     } else {    /* FAST_RSEARCH */
     121             : 
     122             :         /* create compressed boyer-moore delta 1 table */
     123             : 
     124             :         /* process pattern[0] outside the loop */
     125           0 :         STRINGLIB_BLOOM_ADD(mask, p[0]);
     126             :         /* process pattern[:0:-1] */
     127           0 :         for (i = mlast; i > 0; i--) {
     128           0 :             STRINGLIB_BLOOM_ADD(mask, p[i]);
     129           0 :             if (p[i] == p[0])
     130           0 :                 skip = i - 1;
     131             :         }
     132             : 
     133           0 :         for (i = w; i >= 0; i--) {
     134           0 :             if (s[i] == p[0]) {
     135             :                 /* candidate match */
     136           0 :                 for (j = mlast; j > 0; j--)
     137           0 :                     if (s[i+j] != p[j])
     138           0 :                         break;
     139           0 :                 if (j == 0)
     140             :                     /* got a match! */
     141           0 :                     return i;
     142             :                 /* miss: check if previous character is part of pattern */
     143           0 :                 if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1]))
     144           0 :                     i = i - m;
     145             :                 else
     146           0 :                     i = i - skip;
     147             :             } else {
     148             :                 /* skip: check if previous character is part of pattern */
     149           0 :                 if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1]))
     150           0 :                     i = i - m;
     151             :             }
     152             :         }
     153             :     }
     154             : 
     155           0 :     if (mode != FAST_COUNT)
     156           0 :         return -1;
     157           0 :     return count;
     158             : }
     159             : 
     160             : #endif

Generated by: LCOV version 1.10