1 #!/usr/bin/env python2
2 """
3 string_ops_test.py: Tests for string_ops.py
4 """
5 from __future__ import print_function
6
7 import unittest
8
9 from core import error
10 from osh import string_ops # module under test
11
12
13 class LibStrTest(unittest.TestCase):
14 def testUtf8Encode(self):
15 CASES = [
16 (u'\u0065'.encode('utf-8'), 0x0065),
17 (u'\u0100'.encode('utf-8'), 0x0100),
18 (u'\u1234'.encode('utf-8'), 0x1234),
19 (u'\U00020000'.encode('utf-8'), 0x00020000),
20 # Out of range gives Unicode replacement character.
21 ('\xef\xbf\xbd', 0x10020000),
22 ]
23
24 for expected, code_point in CASES:
25 print('')
26 print('Utf8Encode case %r %r' % (expected, code_point))
27 self.assertEqual(expected, string_ops.Utf8Encode(code_point))
28
29 def test_NextUtf8Char(self):
30 CASES = [
31 ([1, 3, 6, 10], '\x24\xC2\xA2\xE0\xA4\xB9\xF0\x90\x8D\x88'),
32 ([1, 3,
33 'Invalid UTF-8 continuation byte'], '\x24\xC2\xA2\xE0\xE0\xA4'),
34 ([1, 3, 6, 'Invalid start of UTF-8 character'],
35 '\x24\xC2\xA2\xE0\xA4\xA4\xB9'),
36 ([1, 3, 'Invalid start of UTF-8 character'], '\x24\xC2\xA2\xFF'),
37 ([1, 'Incomplete UTF-8 character'], '\x24\xF0\x90\x8D'),
38 ]
39 for expected_indexes, input_str in CASES:
40 print()
41 print('_NextUtf8Char case %r %r' % (expected_indexes, input_str))
42 i = 0
43 actual_indexes = []
44 while True:
45 try:
46 i = string_ops._NextUtf8Char(input_str, i)
47 actual_indexes.append(i)
48 if i >= len(input_str):
49 break
50 except error.Strict as e:
51 actual_indexes.append(e.msg)
52 break
53 self.assertEqual(expected_indexes, actual_indexes)
54
55 def test_PreviousUtf8Char(self):
56 # The error messages could probably be improved for more consistency
57 # with NextUtf8Char, at the expense of more complexity.
58 CASES = [
59 ([6, 3, 1, 0], '\x24\xC2\xA2\xE0\xA4\xB9\xF0\x90\x8D\x88'),
60 ([6, 3, 1, 'Invalid start of UTF-8 character'],
61 '\xA2\xC2\xA2\xE0\xA4\xB9\xF0\x90\x8D\x88'),
62 ([10, 'Invalid start of UTF-8 character'],
63 '\xF0\x90\x8D\x88\x90\x8D\x88\x90\x8D\x88\x24'),
64 ([3, 'Invalid start of UTF-8 character'], '\xF0\x90\x8D\x24'),
65 ]
66 for expected_indexes, input_str in CASES:
67 print()
68 print('PreviousUtf8Char case %r %r' % (expected_indexes, input_str))
69 i = len(input_str)
70 actual_indexes = []
71 while True:
72 try:
73 i = string_ops.PreviousUtf8Char(input_str, i)
74 actual_indexes.append(i)
75 if i == 0:
76 break
77 except error.Strict as e:
78 actual_indexes.append(e.msg)
79 break
80 self.assertEqual(expected_indexes, actual_indexes)
81
82 def testUnarySuffixOpDemo(self):
83 print(string_ops)
84
85 s = 'abcd'
86 n = len(s)
87
88 # All of these loops test exactly 4.
89 # NOTE: These are manually copied into DoUnarySuffixOp
90
91 print('## shortest prefix')
92 for i in xrange(1, n + 1):
93 print('%d test %06r return %06r' % (i, s[:i], s[i:]))
94 print()
95
96 print('# longest prefix')
97 for i in xrange(n, 0, -1):
98 print('%d test %06r return %06r' % (i, s[:i], s[i:]))
99 print()
100
101 print('% shortest suffix')
102 for i in xrange(n - 1, -1, -1):
103 print('%d test %06r return %06r' % (i, s[i:], s[:i]))
104 print()
105
106 print('%% longest suffix')
107 for i in xrange(0, n):
108 print('%d test %06r return %06r' % (i, s[i:], s[:i]))
109 print()
110
111 def testPatSubAllMatches(self):
112 s = 'oXooXoooX'
113
114 # Match positions
115 self.assertEqual([(1, 3), (4, 6)],
116 string_ops._AllMatchPositions(s, '(X.)'))
117
118 # No match
119 self.assertEqual([], string_ops._AllMatchPositions(s, '(z)'))
120
121 # Replacement
122 self.assertEqual('o_o_ooX', string_ops._PatSubAll(s, '(X.)', '_'))
123
124 # Replacement with no match
125 self.assertEqual(s, string_ops._PatSubAll(s, '(z)', '_'))
126
127
128 if __name__ == '__main__':
129 unittest.main()