aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCarl Friedrich Bolz-Tereick <cfbolz@gmx.de>2021-02-25 13:02:10 +0100
committerCarl Friedrich Bolz-Tereick <cfbolz@gmx.de>2021-02-25 13:02:10 +0100
commit4c3aee2dcfcc1fa2ddab8b84354a476cd936bad1 (patch)
tree9def6e696246629ab26a853318c4e6064d8bebde
parentsecond optimization: have a fast path in replace for single character strings (diff)
downloadpypy-4c3aee2dcfcc1fa2ddab8b84354a476cd936bad1.tar.gz
pypy-4c3aee2dcfcc1fa2ddab8b84354a476cd936bad1.tar.bz2
pypy-4c3aee2dcfcc1fa2ddab8b84354a476cd936bad1.zip
fix a tiny performance bug in our string search that we ported from cpython.
the condition is a bit complicated: - we need a last character that is unique in the string - we are at a position in the string that matches the last character, but a previous char is a mismatch - the next char in the haystack is in the bloom filter if all this is met, we want to skip a whole needle length, not len(needle) - 1 this was pointed out by Tim Peters here: https://bugs.python.org/msg378301
-rw-r--r--rpython/rlib/rstring.py4
-rw-r--r--rpython/rlib/test/test_rstring.py2
-rw-r--r--rpython/rtyper/lltypesystem/rstr.py4
3 files changed, 6 insertions, 4 deletions
diff --git a/rpython/rlib/rstring.py b/rpython/rlib/rstring.py
index 0f30b6aa2b..ed7a61d734 100644
--- a/rpython/rlib/rstring.py
+++ b/rpython/rlib/rstring.py
@@ -434,7 +434,7 @@ def _search(value, other, start, end, mode):
return -1
mlast = m - 1
- skip = mlast - 1
+ skip = mlast
mask = 0
if mode != SEARCH_RFIND:
@@ -447,7 +447,7 @@ def _search(value, other, start, end, mode):
i = start - 1
while i + 1 <= start + w:
i += 1
- if value[i + m - 1] == other[m - 1]:
+ if value[i + mlast] == other[mlast]:
for j in range(mlast):
if value[i + j] != other[j]:
break
diff --git a/rpython/rlib/test/test_rstring.py b/rpython/rlib/test/test_rstring.py
index b8b0cd8482..10724b07ee 100644
--- a/rpython/rlib/test/test_rstring.py
+++ b/rpython/rlib/test/test_rstring.py
@@ -251,6 +251,8 @@ def test_search():
check_search(find, 'one two three', 'ne', 5, 13, res=-1)
check_search(find, 'one two three', '', 0, 13, res=0)
+ check_search(find, '000000p00000000', 'ap', 0, 15, res=-1)
+
check_search(rfind, 'one two three', 'e', 0, 13, res=12)
check_search(rfind, 'one two three', 'e', 0, 1, res=-1)
check_search(rfind, 'one two three', '', 0, 13, res=13)
diff --git a/rpython/rtyper/lltypesystem/rstr.py b/rpython/rtyper/lltypesystem/rstr.py
index 0ae4c2f459..3f05629757 100644
--- a/rpython/rtyper/lltypesystem/rstr.py
+++ b/rpython/rtyper/lltypesystem/rstr.py
@@ -794,7 +794,7 @@ class LLHelpers(AbstractLLHelpers):
return -1
mlast = m - 1
- skip = mlast - 1
+ skip = mlast
mask = 0
if mode != FAST_RFIND:
@@ -807,7 +807,7 @@ class LLHelpers(AbstractLLHelpers):
i = start - 1
while i + 1 <= start + w:
i += 1
- if s1.chars[i + m - 1] == s2.chars[m - 1]:
+ if s1.chars[i + mlast] == s2.chars[mlast]:
for j in range(mlast):
if s1.chars[i + j] != s2.chars[j]:
break