osh/string_ops

OILS / osh / string_ops_test.py

1	#!/usr/bin/env python2
2	"""
3	string_ops_test.py: Tests for string_ops.py
4	"""
5	from __future__ import print_function
6
7	import unittest
8
9	from core import error
10	from osh import string_ops # module under test
11
12
13	class LibStrTest(unittest.TestCase):
14	def testUtf8Encode(self):
15	CASES = [
16	(u'\u0065'.encode('utf-8'), 0x0065),
17	(u'\u0100'.encode('utf-8'), 0x0100),
18	(u'\u1234'.encode('utf-8'), 0x1234),
19	(u'\U00020000'.encode('utf-8'), 0x00020000),
20	# Out of range gives Unicode replacement character.
21	('\xef\xbf\xbd', 0x10020000),
22	]
23
24	for expected, code_point in CASES:
25	print('')
26	print('Utf8Encode case %r %r' % (expected, code_point))
27	self.assertEqual(expected, string_ops.Utf8Encode(code_point))
28
29	def test_NextUtf8Char(self):
30	CASES = [
31	([1, 3, 6, 10], '\x24\xC2\xA2\xE0\xA4\xB9\xF0\x90\x8D\x88'),
32	([1, 3,
33	'Invalid UTF-8 continuation byte'], '\x24\xC2\xA2\xE0\xE0\xA4'),
34	([1, 3, 6, 'Invalid start of UTF-8 character'],
35	'\x24\xC2\xA2\xE0\xA4\xA4\xB9'),
36	([1, 3, 'Invalid start of UTF-8 character'], '\x24\xC2\xA2\xFF'),
37	([1, 'Incomplete UTF-8 character'], '\x24\xF0\x90\x8D'),
38	]
39	for expected_indexes, input_str in CASES:
40	print()
41	print('_NextUtf8Char case %r %r' % (expected_indexes, input_str))
42	i = 0
43	actual_indexes = []
44	while True:
45	try:
46	i = string_ops._NextUtf8Char(input_str, i)
47	actual_indexes.append(i)
48	if i >= len(input_str):
49	break
50	except error.Strict as e:
51	actual_indexes.append(e.msg)
52	break
53	self.assertEqual(expected_indexes, actual_indexes)
54
55	def test_PreviousUtf8Char(self):
56	# The error messages could probably be improved for more consistency
57	# with NextUtf8Char, at the expense of more complexity.
58	CASES = [
59	([6, 3, 1, 0], '\x24\xC2\xA2\xE0\xA4\xB9\xF0\x90\x8D\x88'),
60	([6, 3, 1, 'Invalid start of UTF-8 character'],
61	'\xA2\xC2\xA2\xE0\xA4\xB9\xF0\x90\x8D\x88'),
62	([10, 'Invalid start of UTF-8 character'],
63	'\xF0\x90\x8D\x88\x90\x8D\x88\x90\x8D\x88\x24'),
64	([3, 'Invalid start of UTF-8 character'], '\xF0\x90\x8D\x24'),
65	]
66	for expected_indexes, input_str in CASES:
67	print()
68	print('PreviousUtf8Char case %r %r' % (expected_indexes, input_str))
69	i = len(input_str)
70	actual_indexes = []
71	while True:
72	try:
73	i = string_ops.PreviousUtf8Char(input_str, i)
74	actual_indexes.append(i)
75	if i == 0:
76	break
77	except error.Strict as e:
78	actual_indexes.append(e.msg)
79	break
80	self.assertEqual(expected_indexes, actual_indexes)
81
82	def testUnarySuffixOpDemo(self):
83	print(string_ops)
84
85	s = 'abcd'
86	n = len(s)
87
88	# All of these loops test exactly 4.
89	# NOTE: These are manually copied into DoUnarySuffixOp
90
91	print('## shortest prefix')
92	for i in xrange(1, n + 1):
93	print('%d test %06r return %06r' % (i, s[:i], s[i:]))
94	print()
95
96	print('# longest prefix')
97	for i in xrange(n, 0, -1):
98	print('%d test %06r return %06r' % (i, s[:i], s[i:]))
99	print()
100
101	print('% shortest suffix')
102	for i in xrange(n - 1, -1, -1):
103	print('%d test %06r return %06r' % (i, s[i:], s[:i]))
104	print()
105
106	print('%% longest suffix')
107	for i in xrange(0, n):
108	print('%d test %06r return %06r' % (i, s[i:], s[:i]))
109	print()
110
111	def testPatSubAllMatches(self):
112	s = 'oXooXoooX'
113
114	# Match positions
115	self.assertEqual([(1, 3), (4, 6)],
116	string_ops._AllMatchPositions(s, '(X.)'))
117
118	# No match
119	self.assertEqual([], string_ops._AllMatchPositions(s, '(z)'))
120
121	# Replacement
122	self.assertEqual('o_o_ooX', string_ops._PatSubAll(s, '(X.)', '_'))
123
124	# Replacement with no match
125	self.assertEqual(s, string_ops._PatSubAll(s, '(z)', '_'))
126
127
128	if __name__ == '__main__':
129	unittest.main()