FTXUI  5.0.0
C++ functional terminal UI.
string.cpp
Go to the documentation of this file.
1 // Copyright 2020 Arthur Sonzogni. All rights reserved.
2 // Use of this source code is governed by the MIT license that can be found in
3 // the LICENSE file.
4 //
5 // Content of this file was created thanks to:
6 // -
7 // https://www.unicode.org/Public/UCD/latest/ucd/auxiliary/WordBreakProperty.txt
8 // - Markus Kuhn -- 2007-05-26 (Unicode 5.0)
9 // http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
10 // Thanks you!
11 
12 #include "ftxui/screen/string.hpp"
13 
14 #include <array> // for array
15 #include <cstddef> // for size_t
16 #include <cstdint> // for uint32_t, uint8_t, uint16_t, int32_t
17 #include <string> // for string, basic_string, wstring
18 #include <tuple> // for _Swallow_assign, ignore
19 #include <vector>
20 
21 #include "ftxui/screen/deprecated.hpp" // for wchar_width, wstring_width
22 #include "ftxui/screen/string_internal.hpp" // for WordBreakProperty, EatCodePoint, CodepointToWordBreakProperty, GlyphCount, GlyphIterate, GlyphNext, GlyphPrevious, IsCombining, IsControl, IsFullWidth, Utf8ToWordBreakProperty
23 
24 namespace {
25 
26 struct Interval {
27  uint32_t first;
28  uint32_t last;
29 };
30 
31 // As of Unicode 13.0.0
32 constexpr std::array<Interval, 116> g_full_width_characters = {{
33  {0x01100, 0x0115f}, {0x0231a, 0x0231b}, {0x02329, 0x0232a},
34  {0x023e9, 0x023ec}, {0x023f0, 0x023f0}, {0x023f3, 0x023f3},
35  {0x025fd, 0x025fe}, {0x02614, 0x02615}, {0x02648, 0x02653},
36  {0x0267f, 0x0267f}, {0x02693, 0x02693}, {0x026a1, 0x026a1},
37  {0x026aa, 0x026ab}, {0x026bd, 0x026be}, {0x026c4, 0x026c5},
38  {0x026ce, 0x026ce}, {0x026d4, 0x026d4}, {0x026ea, 0x026ea},
39  {0x026f2, 0x026f3}, {0x026f5, 0x026f5}, {0x026fa, 0x026fa},
40  {0x026fd, 0x026fd}, {0x02705, 0x02705}, {0x0270a, 0x0270b},
41  {0x02728, 0x02728}, {0x0274c, 0x0274c}, {0x0274e, 0x0274e},
42  {0x02753, 0x02755}, {0x02757, 0x02757}, {0x02795, 0x02797},
43  {0x027b0, 0x027b0}, {0x027bf, 0x027bf}, {0x02b1b, 0x02b1c},
44  {0x02b50, 0x02b50}, {0x02b55, 0x02b55}, {0x02e80, 0x02e99},
45  {0x02e9b, 0x02ef3}, {0x02f00, 0x02fd5}, {0x02ff0, 0x02ffb},
46  {0x03000, 0x0303e}, {0x03041, 0x03096}, {0x03099, 0x030ff},
47  {0x03105, 0x0312f}, {0x03131, 0x0318e}, {0x03190, 0x031e3},
48  {0x031f0, 0x0321e}, {0x03220, 0x03247}, {0x03250, 0x04dbf},
49  {0x04e00, 0x0a48c}, {0x0a490, 0x0a4c6}, {0x0a960, 0x0a97c},
50  {0x0ac00, 0x0d7a3}, {0x0f900, 0x0faff}, {0x0fe10, 0x0fe19},
51  {0x0fe30, 0x0fe52}, {0x0fe54, 0x0fe66}, {0x0fe68, 0x0fe6b},
52  {0x0ff01, 0x0ff60}, {0x0ffe0, 0x0ffe6}, {0x16fe0, 0x16fe4},
53  {0x16ff0, 0x16ff1}, {0x17000, 0x187f7}, {0x18800, 0x18cd5},
54  {0x18d00, 0x18d08}, {0x1b000, 0x1b11e}, {0x1b150, 0x1b152},
55  {0x1b164, 0x1b167}, {0x1b170, 0x1b2fb}, {0x1f004, 0x1f004},
56  {0x1f0cf, 0x1f0cf}, {0x1f18e, 0x1f18e}, {0x1f191, 0x1f19a},
57  {0x1f200, 0x1f202}, {0x1f210, 0x1f23b}, {0x1f240, 0x1f248},
58  {0x1f250, 0x1f251}, {0x1f260, 0x1f265}, {0x1f300, 0x1f320},
59  {0x1f32d, 0x1f335}, {0x1f337, 0x1f37c}, {0x1f37e, 0x1f393},
60  {0x1f3a0, 0x1f3ca}, {0x1f3cf, 0x1f3d3}, {0x1f3e0, 0x1f3f0},
61  {0x1f3f4, 0x1f3f4}, {0x1f3f8, 0x1f43e}, {0x1f440, 0x1f440},
62  {0x1f442, 0x1f4fc}, {0x1f4ff, 0x1f53d}, {0x1f54b, 0x1f54e},
63  {0x1f550, 0x1f567}, {0x1f57a, 0x1f57a}, {0x1f595, 0x1f596},
64  {0x1f5a4, 0x1f5a4}, {0x1f5fb, 0x1f64f}, {0x1f680, 0x1f6c5},
65  {0x1f6cc, 0x1f6cc}, {0x1f6d0, 0x1f6d2}, {0x1f6d5, 0x1f6d7},
66  {0x1f6eb, 0x1f6ec}, {0x1f6f4, 0x1f6fc}, {0x1f7e0, 0x1f7eb},
67  {0x1f90c, 0x1f93a}, {0x1f93c, 0x1f945}, {0x1f947, 0x1f978},
68  {0x1f97a, 0x1f9cb}, {0x1f9cd, 0x1f9ff}, {0x1fa70, 0x1fa74},
69  {0x1fa78, 0x1fa7a}, {0x1fa80, 0x1fa86}, {0x1fa90, 0x1faa8},
70  {0x1fab0, 0x1fab6}, {0x1fac0, 0x1fac2}, {0x1fad0, 0x1fad6},
71  {0x20000, 0x2fffd}, {0x30000, 0x3fffd},
72 }};
73 
74 using WBP = ftxui::WordBreakProperty;
75 struct WordBreakPropertyInterval {
76  uint32_t first;
77  uint32_t last;
78  WBP property;
79 };
80 
81 // Properties from:
82 // https://www.unicode.org/Public/UCD/latest/ucd/auxiliary/WordBreakProperty.txt
83 constexpr std::array<WordBreakPropertyInterval, 993> g_word_break_intervals = {{
84  {0x0000A, 0x0000A, WBP::LF},
85  {0x0000B, 0x0000C, WBP::Newline},
86  {0x0000D, 0x0000D, WBP::CR},
87  {0x00020, 0x00020, WBP::WSegSpace},
88  {0x00022, 0x00022, WBP::Double_Quote},
89  {0x00027, 0x00027, WBP::Single_Quote},
90  {0x0002C, 0x0002C, WBP::MidNum},
91  {0x0002E, 0x0002E, WBP::MidNumLet},
92  {0x00030, 0x00039, WBP::Numeric},
93  {0x0003A, 0x0003A, WBP::MidLetter},
94  {0x0003B, 0x0003B, WBP::MidNum},
95  {0x00041, 0x0005A, WBP::ALetter},
96  {0x0005F, 0x0005F, WBP::ExtendNumLet},
97  {0x00061, 0x0007A, WBP::ALetter},
98  {0x00085, 0x00085, WBP::Newline},
99  {0x000AA, 0x000AA, WBP::ALetter},
100  {0x000AD, 0x000AD, WBP::Format},
101  {0x000B5, 0x000B5, WBP::ALetter},
102  {0x000B7, 0x000B7, WBP::MidLetter},
103  {0x000BA, 0x000BA, WBP::ALetter},
104  {0x000C0, 0x000D6, WBP::ALetter},
105  {0x000D8, 0x000F6, WBP::ALetter},
106  {0x000F8, 0x002D7, WBP::ALetter},
107  {0x002DE, 0x002FF, WBP::ALetter},
108  {0x00300, 0x0036F, WBP::Extend},
109  {0x00370, 0x00374, WBP::ALetter},
110  {0x00376, 0x00377, WBP::ALetter},
111  {0x0037A, 0x0037D, WBP::ALetter},
112  {0x0037E, 0x0037E, WBP::MidNum},
113  {0x0037F, 0x0037F, WBP::ALetter},
114  {0x00386, 0x00386, WBP::ALetter},
115  {0x00387, 0x00387, WBP::MidLetter},
116  {0x00388, 0x0038A, WBP::ALetter},
117  {0x0038C, 0x0038C, WBP::ALetter},
118  {0x0038E, 0x003A1, WBP::ALetter},
119  {0x003A3, 0x003F5, WBP::ALetter},
120  {0x003F7, 0x00481, WBP::ALetter},
121  {0x00483, 0x00489, WBP::Extend},
122  {0x0048A, 0x0052F, WBP::ALetter},
123  {0x00531, 0x00556, WBP::ALetter},
124  {0x00559, 0x0055C, WBP::ALetter},
125  {0x0055E, 0x0055E, WBP::ALetter},
126  {0x0055F, 0x0055F, WBP::MidLetter},
127  {0x00560, 0x00588, WBP::ALetter},
128  {0x00589, 0x00589, WBP::MidNum},
129  {0x0058A, 0x0058A, WBP::ALetter},
130  {0x00591, 0x005BD, WBP::Extend},
131  {0x005BF, 0x005BF, WBP::Extend},
132  {0x005C1, 0x005C2, WBP::Extend},
133  {0x005C4, 0x005C5, WBP::Extend},
134  {0x005C7, 0x005C7, WBP::Extend},
135  {0x005D0, 0x005EA, WBP::Hebrew_Letter},
136  {0x005EF, 0x005F2, WBP::Hebrew_Letter},
137  {0x005F3, 0x005F3, WBP::ALetter},
138  {0x005F4, 0x005F4, WBP::MidLetter},
139  {0x00600, 0x00605, WBP::Format},
140  {0x0060C, 0x0060D, WBP::MidNum},
141  {0x00610, 0x0061A, WBP::Extend},
142  {0x0061C, 0x0061C, WBP::Format},
143  {0x00620, 0x0064A, WBP::ALetter},
144  {0x0064B, 0x0065F, WBP::Extend},
145  {0x00660, 0x00669, WBP::Numeric},
146  {0x0066B, 0x0066B, WBP::Numeric},
147  {0x0066C, 0x0066C, WBP::MidNum},
148  {0x0066E, 0x0066F, WBP::ALetter},
149  {0x00670, 0x00670, WBP::Extend},
150  {0x00671, 0x006D3, WBP::ALetter},
151  {0x006D5, 0x006D5, WBP::ALetter},
152  {0x006D6, 0x006DC, WBP::Extend},
153  {0x006DD, 0x006DD, WBP::Format},
154  {0x006DF, 0x006E4, WBP::Extend},
155  {0x006E5, 0x006E6, WBP::ALetter},
156  {0x006E7, 0x006E8, WBP::Extend},
157  {0x006EA, 0x006ED, WBP::Extend},
158  {0x006EE, 0x006EF, WBP::ALetter},
159  {0x006F0, 0x006F9, WBP::Numeric},
160  {0x006FA, 0x006FC, WBP::ALetter},
161  {0x006FF, 0x006FF, WBP::ALetter},
162  {0x0070F, 0x0070F, WBP::Format},
163  {0x00710, 0x00710, WBP::ALetter},
164  {0x00711, 0x00711, WBP::Extend},
165  {0x00712, 0x0072F, WBP::ALetter},
166  {0x00730, 0x0074A, WBP::Extend},
167  {0x0074D, 0x007A5, WBP::ALetter},
168  {0x007A6, 0x007B0, WBP::Extend},
169  {0x007B1, 0x007B1, WBP::ALetter},
170  {0x007C0, 0x007C9, WBP::Numeric},
171  {0x007CA, 0x007EA, WBP::ALetter},
172  {0x007EB, 0x007F3, WBP::Extend},
173  {0x007F4, 0x007F5, WBP::ALetter},
174  {0x007F8, 0x007F8, WBP::MidNum},
175  {0x007FA, 0x007FA, WBP::ALetter},
176  {0x007FD, 0x007FD, WBP::Extend},
177  {0x00800, 0x00815, WBP::ALetter},
178  {0x00816, 0x00819, WBP::Extend},
179  {0x0081A, 0x0081A, WBP::ALetter},
180  {0x0081B, 0x00823, WBP::Extend},
181  {0x00824, 0x00824, WBP::ALetter},
182  {0x00825, 0x00827, WBP::Extend},
183  {0x00828, 0x00828, WBP::ALetter},
184  {0x00829, 0x0082D, WBP::Extend},
185  {0x00840, 0x00858, WBP::ALetter},
186  {0x00859, 0x0085B, WBP::Extend},
187  {0x00860, 0x0086A, WBP::ALetter},
188  {0x008A0, 0x008B4, WBP::ALetter},
189  {0x008B6, 0x008C7, WBP::ALetter},
190  {0x008D3, 0x008E1, WBP::Extend},
191  {0x008E2, 0x008E2, WBP::Format},
192  {0x008E3, 0x00903, WBP::Extend},
193  {0x00904, 0x00939, WBP::ALetter},
194  {0x0093A, 0x0093C, WBP::Extend},
195  {0x0093D, 0x0093D, WBP::ALetter},
196  {0x0093E, 0x0094F, WBP::Extend},
197  {0x00950, 0x00950, WBP::ALetter},
198  {0x00951, 0x00957, WBP::Extend},
199  {0x00958, 0x00961, WBP::ALetter},
200  {0x00962, 0x00963, WBP::Extend},
201  {0x00966, 0x0096F, WBP::Numeric},
202  {0x00971, 0x00980, WBP::ALetter},
203  {0x00981, 0x00983, WBP::Extend},
204  {0x00985, 0x0098C, WBP::ALetter},
205  {0x0098F, 0x00990, WBP::ALetter},
206  {0x00993, 0x009A8, WBP::ALetter},
207  {0x009AA, 0x009B0, WBP::ALetter},
208  {0x009B2, 0x009B2, WBP::ALetter},
209  {0x009B6, 0x009B9, WBP::ALetter},
210  {0x009BC, 0x009BC, WBP::Extend},
211  {0x009BD, 0x009BD, WBP::ALetter},
212  {0x009BE, 0x009C4, WBP::Extend},
213  {0x009C7, 0x009C8, WBP::Extend},
214  {0x009CB, 0x009CD, WBP::Extend},
215  {0x009CE, 0x009CE, WBP::ALetter},
216  {0x009D7, 0x009D7, WBP::Extend},
217  {0x009DC, 0x009DD, WBP::ALetter},
218  {0x009DF, 0x009E1, WBP::ALetter},
219  {0x009E2, 0x009E3, WBP::Extend},
220  {0x009E6, 0x009EF, WBP::Numeric},
221  {0x009F0, 0x009F1, WBP::ALetter},
222  {0x009FC, 0x009FC, WBP::ALetter},
223  {0x009FE, 0x009FE, WBP::Extend},
224  {0x00A01, 0x00A03, WBP::Extend},
225  {0x00A05, 0x00A0A, WBP::ALetter},
226  {0x00A0F, 0x00A10, WBP::ALetter},
227  {0x00A13, 0x00A28, WBP::ALetter},
228  {0x00A2A, 0x00A30, WBP::ALetter},
229  {0x00A32, 0x00A33, WBP::ALetter},
230  {0x00A35, 0x00A36, WBP::ALetter},
231  {0x00A38, 0x00A39, WBP::ALetter},
232  {0x00A3C, 0x00A3C, WBP::Extend},
233  {0x00A3E, 0x00A42, WBP::Extend},
234  {0x00A47, 0x00A48, WBP::Extend},
235  {0x00A4B, 0x00A4D, WBP::Extend},
236  {0x00A51, 0x00A51, WBP::Extend},
237  {0x00A59, 0x00A5C, WBP::ALetter},
238  {0x00A5E, 0x00A5E, WBP::ALetter},
239  {0x00A66, 0x00A6F, WBP::Numeric},
240  {0x00A70, 0x00A71, WBP::Extend},
241  {0x00A72, 0x00A74, WBP::ALetter},
242  {0x00A75, 0x00A75, WBP::Extend},
243  {0x00A81, 0x00A83, WBP::Extend},
244  {0x00A85, 0x00A8D, WBP::ALetter},
245  {0x00A8F, 0x00A91, WBP::ALetter},
246  {0x00A93, 0x00AA8, WBP::ALetter},
247  {0x00AAA, 0x00AB0, WBP::ALetter},
248  {0x00AB2, 0x00AB3, WBP::ALetter},
249  {0x00AB5, 0x00AB9, WBP::ALetter},
250  {0x00ABC, 0x00ABC, WBP::Extend},
251  {0x00ABD, 0x00ABD, WBP::ALetter},
252  {0x00ABE, 0x00AC5, WBP::Extend},
253  {0x00AC7, 0x00AC9, WBP::Extend},
254  {0x00ACB, 0x00ACD, WBP::Extend},
255  {0x00AD0, 0x00AD0, WBP::ALetter},
256  {0x00AE0, 0x00AE1, WBP::ALetter},
257  {0x00AE2, 0x00AE3, WBP::Extend},
258  {0x00AE6, 0x00AEF, WBP::Numeric},
259  {0x00AF9, 0x00AF9, WBP::ALetter},
260  {0x00AFA, 0x00AFF, WBP::Extend},
261  {0x00B01, 0x00B03, WBP::Extend},
262  {0x00B05, 0x00B0C, WBP::ALetter},
263  {0x00B0F, 0x00B10, WBP::ALetter},
264  {0x00B13, 0x00B28, WBP::ALetter},
265  {0x00B2A, 0x00B30, WBP::ALetter},
266  {0x00B32, 0x00B33, WBP::ALetter},
267  {0x00B35, 0x00B39, WBP::ALetter},
268  {0x00B3C, 0x00B3C, WBP::Extend},
269  {0x00B3D, 0x00B3D, WBP::ALetter},
270  {0x00B3E, 0x00B44, WBP::Extend},
271  {0x00B47, 0x00B48, WBP::Extend},
272  {0x00B4B, 0x00B4D, WBP::Extend},
273  {0x00B55, 0x00B57, WBP::Extend},
274  {0x00B5C, 0x00B5D, WBP::ALetter},
275  {0x00B5F, 0x00B61, WBP::ALetter},
276  {0x00B62, 0x00B63, WBP::Extend},
277  {0x00B66, 0x00B6F, WBP::Numeric},
278  {0x00B71, 0x00B71, WBP::ALetter},
279  {0x00B82, 0x00B82, WBP::Extend},
280  {0x00B83, 0x00B83, WBP::ALetter},
281  {0x00B85, 0x00B8A, WBP::ALetter},
282  {0x00B8E, 0x00B90, WBP::ALetter},
283  {0x00B92, 0x00B95, WBP::ALetter},
284  {0x00B99, 0x00B9A, WBP::ALetter},
285  {0x00B9C, 0x00B9C, WBP::ALetter},
286  {0x00B9E, 0x00B9F, WBP::ALetter},
287  {0x00BA3, 0x00BA4, WBP::ALetter},
288  {0x00BA8, 0x00BAA, WBP::ALetter},
289  {0x00BAE, 0x00BB9, WBP::ALetter},
290  {0x00BBE, 0x00BC2, WBP::Extend},
291  {0x00BC6, 0x00BC8, WBP::Extend},
292  {0x00BCA, 0x00BCD, WBP::Extend},
293  {0x00BD0, 0x00BD0, WBP::ALetter},
294  {0x00BD7, 0x00BD7, WBP::Extend},
295  {0x00BE6, 0x00BEF, WBP::Numeric},
296  {0x00C00, 0x00C04, WBP::Extend},
297  {0x00C05, 0x00C0C, WBP::ALetter},
298  {0x00C0E, 0x00C10, WBP::ALetter},
299  {0x00C12, 0x00C28, WBP::ALetter},
300  {0x00C2A, 0x00C39, WBP::ALetter},
301  {0x00C3D, 0x00C3D, WBP::ALetter},
302  {0x00C3E, 0x00C44, WBP::Extend},
303  {0x00C46, 0x00C48, WBP::Extend},
304  {0x00C4A, 0x00C4D, WBP::Extend},
305  {0x00C55, 0x00C56, WBP::Extend},
306  {0x00C58, 0x00C5A, WBP::ALetter},
307  {0x00C60, 0x00C61, WBP::ALetter},
308  {0x00C62, 0x00C63, WBP::Extend},
309  {0x00C66, 0x00C6F, WBP::Numeric},
310  {0x00C80, 0x00C80, WBP::ALetter},
311  {0x00C81, 0x00C83, WBP::Extend},
312  {0x00C85, 0x00C8C, WBP::ALetter},
313  {0x00C8E, 0x00C90, WBP::ALetter},
314  {0x00C92, 0x00CA8, WBP::ALetter},
315  {0x00CAA, 0x00CB3, WBP::ALetter},
316  {0x00CB5, 0x00CB9, WBP::ALetter},
317  {0x00CBC, 0x00CBC, WBP::Extend},
318  {0x00CBD, 0x00CBD, WBP::ALetter},
319  {0x00CBE, 0x00CC4, WBP::Extend},
320  {0x00CC6, 0x00CC8, WBP::Extend},
321  {0x00CCA, 0x00CCD, WBP::Extend},
322  {0x00CD5, 0x00CD6, WBP::Extend},
323  {0x00CDE, 0x00CDE, WBP::ALetter},
324  {0x00CE0, 0x00CE1, WBP::ALetter},
325  {0x00CE2, 0x00CE3, WBP::Extend},
326  {0x00CE6, 0x00CEF, WBP::Numeric},
327  {0x00CF1, 0x00CF2, WBP::ALetter},
328  {0x00D00, 0x00D03, WBP::Extend},
329  {0x00D04, 0x00D0C, WBP::ALetter},
330  {0x00D0E, 0x00D10, WBP::ALetter},
331  {0x00D12, 0x00D3A, WBP::ALetter},
332  {0x00D3B, 0x00D3C, WBP::Extend},
333  {0x00D3D, 0x00D3D, WBP::ALetter},
334  {0x00D3E, 0x00D44, WBP::Extend},
335  {0x00D46, 0x00D48, WBP::Extend},
336  {0x00D4A, 0x00D4D, WBP::Extend},
337  {0x00D4E, 0x00D4E, WBP::ALetter},
338  {0x00D54, 0x00D56, WBP::ALetter},
339  {0x00D57, 0x00D57, WBP::Extend},
340  {0x00D5F, 0x00D61, WBP::ALetter},
341  {0x00D62, 0x00D63, WBP::Extend},
342  {0x00D66, 0x00D6F, WBP::Numeric},
343  {0x00D7A, 0x00D7F, WBP::ALetter},
344  {0x00D81, 0x00D83, WBP::Extend},
345  {0x00D85, 0x00D96, WBP::ALetter},
346  {0x00D9A, 0x00DB1, WBP::ALetter},
347  {0x00DB3, 0x00DBB, WBP::ALetter},
348  {0x00DBD, 0x00DBD, WBP::ALetter},
349  {0x00DC0, 0x00DC6, WBP::ALetter},
350  {0x00DCA, 0x00DCA, WBP::Extend},
351  {0x00DCF, 0x00DD4, WBP::Extend},
352  {0x00DD6, 0x00DD6, WBP::Extend},
353  {0x00DD8, 0x00DDF, WBP::Extend},
354  {0x00DE6, 0x00DEF, WBP::Numeric},
355  {0x00DF2, 0x00DF3, WBP::Extend},
356  {0x00E31, 0x00E31, WBP::Extend},
357  {0x00E34, 0x00E3A, WBP::Extend},
358  {0x00E47, 0x00E4E, WBP::Extend},
359  {0x00E50, 0x00E59, WBP::Numeric},
360  {0x00EB1, 0x00EB1, WBP::Extend},
361  {0x00EB4, 0x00EBC, WBP::Extend},
362  {0x00EC8, 0x00ECD, WBP::Extend},
363  {0x00ED0, 0x00ED9, WBP::Numeric},
364  {0x00F00, 0x00F00, WBP::ALetter},
365  {0x00F18, 0x00F19, WBP::Extend},
366  {0x00F20, 0x00F29, WBP::Numeric},
367  {0x00F35, 0x00F35, WBP::Extend},
368  {0x00F37, 0x00F37, WBP::Extend},
369  {0x00F39, 0x00F39, WBP::Extend},
370  {0x00F3E, 0x00F3F, WBP::Extend},
371  {0x00F40, 0x00F47, WBP::ALetter},
372  {0x00F49, 0x00F6C, WBP::ALetter},
373  {0x00F71, 0x00F84, WBP::Extend},
374  {0x00F86, 0x00F87, WBP::Extend},
375  {0x00F88, 0x00F8C, WBP::ALetter},
376  {0x00F8D, 0x00F97, WBP::Extend},
377  {0x00F99, 0x00FBC, WBP::Extend},
378  {0x00FC6, 0x00FC6, WBP::Extend},
379  {0x0102B, 0x0103E, WBP::Extend},
380  {0x01040, 0x01049, WBP::Numeric},
381  {0x01056, 0x01059, WBP::Extend},
382  {0x0105E, 0x01060, WBP::Extend},
383  {0x01062, 0x01064, WBP::Extend},
384  {0x01067, 0x0106D, WBP::Extend},
385  {0x01071, 0x01074, WBP::Extend},
386  {0x01082, 0x0108D, WBP::Extend},
387  {0x0108F, 0x0108F, WBP::Extend},
388  {0x01090, 0x01099, WBP::Numeric},
389  {0x0109A, 0x0109D, WBP::Extend},
390  {0x010A0, 0x010C5, WBP::ALetter},
391  {0x010C7, 0x010C7, WBP::ALetter},
392  {0x010CD, 0x010CD, WBP::ALetter},
393  {0x010D0, 0x010FA, WBP::ALetter},
394  {0x010FC, 0x01248, WBP::ALetter},
395  {0x0124A, 0x0124D, WBP::ALetter},
396  {0x01250, 0x01256, WBP::ALetter},
397  {0x01258, 0x01258, WBP::ALetter},
398  {0x0125A, 0x0125D, WBP::ALetter},
399  {0x01260, 0x01288, WBP::ALetter},
400  {0x0128A, 0x0128D, WBP::ALetter},
401  {0x01290, 0x012B0, WBP::ALetter},
402  {0x012B2, 0x012B5, WBP::ALetter},
403  {0x012B8, 0x012BE, WBP::ALetter},
404  {0x012C0, 0x012C0, WBP::ALetter},
405  {0x012C2, 0x012C5, WBP::ALetter},
406  {0x012C8, 0x012D6, WBP::ALetter},
407  {0x012D8, 0x01310, WBP::ALetter},
408  {0x01312, 0x01315, WBP::ALetter},
409  {0x01318, 0x0135A, WBP::ALetter},
410  {0x0135D, 0x0135F, WBP::Extend},
411  {0x01380, 0x0138F, WBP::ALetter},
412  {0x013A0, 0x013F5, WBP::ALetter},
413  {0x013F8, 0x013FD, WBP::ALetter},
414  {0x01401, 0x0166C, WBP::ALetter},
415  {0x0166F, 0x0167F, WBP::ALetter},
416  {0x01680, 0x01680, WBP::WSegSpace},
417  {0x01681, 0x0169A, WBP::ALetter},
418  {0x016A0, 0x016EA, WBP::ALetter},
419  {0x016EE, 0x016F8, WBP::ALetter},
420  {0x01700, 0x0170C, WBP::ALetter},
421  {0x0170E, 0x01711, WBP::ALetter},
422  {0x01712, 0x01714, WBP::Extend},
423  {0x01720, 0x01731, WBP::ALetter},
424  {0x01732, 0x01734, WBP::Extend},
425  {0x01740, 0x01751, WBP::ALetter},
426  {0x01752, 0x01753, WBP::Extend},
427  {0x01760, 0x0176C, WBP::ALetter},
428  {0x0176E, 0x01770, WBP::ALetter},
429  {0x01772, 0x01773, WBP::Extend},
430  {0x017B4, 0x017D3, WBP::Extend},
431  {0x017DD, 0x017DD, WBP::Extend},
432  {0x017E0, 0x017E9, WBP::Numeric},
433  {0x0180B, 0x0180D, WBP::Extend},
434  {0x0180E, 0x0180E, WBP::Format},
435  {0x01810, 0x01819, WBP::Numeric},
436  {0x01820, 0x01878, WBP::ALetter},
437  {0x01880, 0x01884, WBP::ALetter},
438  {0x01885, 0x01886, WBP::Extend},
439  {0x01887, 0x018A8, WBP::ALetter},
440  {0x018A9, 0x018A9, WBP::Extend},
441  {0x018AA, 0x018AA, WBP::ALetter},
442  {0x018B0, 0x018F5, WBP::ALetter},
443  {0x01900, 0x0191E, WBP::ALetter},
444  {0x01920, 0x0192B, WBP::Extend},
445  {0x01930, 0x0193B, WBP::Extend},
446  {0x01946, 0x0194F, WBP::Numeric},
447  {0x019D0, 0x019D9, WBP::Numeric},
448  {0x01A00, 0x01A16, WBP::ALetter},
449  {0x01A17, 0x01A1B, WBP::Extend},
450  {0x01A55, 0x01A5E, WBP::Extend},
451  {0x01A60, 0x01A7C, WBP::Extend},
452  {0x01A7F, 0x01A7F, WBP::Extend},
453  {0x01A80, 0x01A89, WBP::Numeric},
454  {0x01A90, 0x01A99, WBP::Numeric},
455  {0x01AB0, 0x01AC0, WBP::Extend},
456  {0x01B00, 0x01B04, WBP::Extend},
457  {0x01B05, 0x01B33, WBP::ALetter},
458  {0x01B34, 0x01B44, WBP::Extend},
459  {0x01B45, 0x01B4B, WBP::ALetter},
460  {0x01B50, 0x01B59, WBP::Numeric},
461  {0x01B6B, 0x01B73, WBP::Extend},
462  {0x01B80, 0x01B82, WBP::Extend},
463  {0x01B83, 0x01BA0, WBP::ALetter},
464  {0x01BA1, 0x01BAD, WBP::Extend},
465  {0x01BAE, 0x01BAF, WBP::ALetter},
466  {0x01BB0, 0x01BB9, WBP::Numeric},
467  {0x01BBA, 0x01BE5, WBP::ALetter},
468  {0x01BE6, 0x01BF3, WBP::Extend},
469  {0x01C00, 0x01C23, WBP::ALetter},
470  {0x01C24, 0x01C37, WBP::Extend},
471  {0x01C40, 0x01C49, WBP::Numeric},
472  {0x01C4D, 0x01C4F, WBP::ALetter},
473  {0x01C50, 0x01C59, WBP::Numeric},
474  {0x01C5A, 0x01C7D, WBP::ALetter},
475  {0x01C80, 0x01C88, WBP::ALetter},
476  {0x01C90, 0x01CBA, WBP::ALetter},
477  {0x01CBD, 0x01CBF, WBP::ALetter},
478  {0x01CD0, 0x01CD2, WBP::Extend},
479  {0x01CD4, 0x01CE8, WBP::Extend},
480  {0x01CE9, 0x01CEC, WBP::ALetter},
481  {0x01CED, 0x01CED, WBP::Extend},
482  {0x01CEE, 0x01CF3, WBP::ALetter},
483  {0x01CF4, 0x01CF4, WBP::Extend},
484  {0x01CF5, 0x01CF6, WBP::ALetter},
485  {0x01CF7, 0x01CF9, WBP::Extend},
486  {0x01CFA, 0x01CFA, WBP::ALetter},
487  {0x01D00, 0x01DBF, WBP::ALetter},
488  {0x01DC0, 0x01DF9, WBP::Extend},
489  {0x01DFB, 0x01DFF, WBP::Extend},
490  {0x01E00, 0x01F15, WBP::ALetter},
491  {0x01F18, 0x01F1D, WBP::ALetter},
492  {0x01F20, 0x01F45, WBP::ALetter},
493  {0x01F48, 0x01F4D, WBP::ALetter},
494  {0x01F50, 0x01F57, WBP::ALetter},
495  {0x01F59, 0x01F59, WBP::ALetter},
496  {0x01F5B, 0x01F5B, WBP::ALetter},
497  {0x01F5D, 0x01F5D, WBP::ALetter},
498  {0x01F5F, 0x01F7D, WBP::ALetter},
499  {0x01F80, 0x01FB4, WBP::ALetter},
500  {0x01FB6, 0x01FBC, WBP::ALetter},
501  {0x01FBE, 0x01FBE, WBP::ALetter},
502  {0x01FC2, 0x01FC4, WBP::ALetter},
503  {0x01FC6, 0x01FCC, WBP::ALetter},
504  {0x01FD0, 0x01FD3, WBP::ALetter},
505  {0x01FD6, 0x01FDB, WBP::ALetter},
506  {0x01FE0, 0x01FEC, WBP::ALetter},
507  {0x01FF2, 0x01FF4, WBP::ALetter},
508  {0x01FF6, 0x01FFC, WBP::ALetter},
509  {0x02000, 0x02006, WBP::WSegSpace},
510  {0x02008, 0x0200A, WBP::WSegSpace},
511  {0x0200C, 0x0200C, WBP::Extend},
512  {0x0200D, 0x0200D, WBP::ZWJ},
513  {0x0200E, 0x0200F, WBP::Format},
514  {0x02018, 0x02019, WBP::MidNumLet},
515  {0x02024, 0x02024, WBP::MidNumLet},
516  {0x02027, 0x02027, WBP::MidLetter},
517  {0x02028, 0x02029, WBP::Newline},
518  {0x0202A, 0x0202E, WBP::Format},
519  {0x0202F, 0x0202F, WBP::ExtendNumLet},
520  {0x0203F, 0x02040, WBP::ExtendNumLet},
521  {0x02044, 0x02044, WBP::MidNum},
522  {0x02054, 0x02054, WBP::ExtendNumLet},
523  {0x0205F, 0x0205F, WBP::WSegSpace},
524  {0x02060, 0x02064, WBP::Format},
525  {0x02066, 0x0206F, WBP::Format},
526  {0x02071, 0x02071, WBP::ALetter},
527  {0x0207F, 0x0207F, WBP::ALetter},
528  {0x02090, 0x0209C, WBP::ALetter},
529  {0x020D0, 0x020F0, WBP::Extend},
530  {0x02102, 0x02102, WBP::ALetter},
531  {0x02107, 0x02107, WBP::ALetter},
532  {0x0210A, 0x02113, WBP::ALetter},
533  {0x02115, 0x02115, WBP::ALetter},
534  {0x02119, 0x0211D, WBP::ALetter},
535  {0x02124, 0x02124, WBP::ALetter},
536  {0x02126, 0x02126, WBP::ALetter},
537  {0x02128, 0x02128, WBP::ALetter},
538  {0x0212A, 0x0212D, WBP::ALetter},
539  {0x0212F, 0x02139, WBP::ALetter},
540  {0x0213C, 0x0213F, WBP::ALetter},
541  {0x02145, 0x02149, WBP::ALetter},
542  {0x0214E, 0x0214E, WBP::ALetter},
543  {0x02160, 0x02188, WBP::ALetter},
544  {0x024B6, 0x024E9, WBP::ALetter},
545  {0x02C00, 0x02C2E, WBP::ALetter},
546  {0x02C30, 0x02C5E, WBP::ALetter},
547  {0x02C60, 0x02CE4, WBP::ALetter},
548  {0x02CEB, 0x02CEE, WBP::ALetter},
549  {0x02CEF, 0x02CF1, WBP::Extend},
550  {0x02CF2, 0x02CF3, WBP::ALetter},
551  {0x02D00, 0x02D25, WBP::ALetter},
552  {0x02D27, 0x02D27, WBP::ALetter},
553  {0x02D2D, 0x02D2D, WBP::ALetter},
554  {0x02D30, 0x02D67, WBP::ALetter},
555  {0x02D6F, 0x02D6F, WBP::ALetter},
556  {0x02D7F, 0x02D7F, WBP::Extend},
557  {0x02D80, 0x02D96, WBP::ALetter},
558  {0x02DA0, 0x02DA6, WBP::ALetter},
559  {0x02DA8, 0x02DAE, WBP::ALetter},
560  {0x02DB0, 0x02DB6, WBP::ALetter},
561  {0x02DB8, 0x02DBE, WBP::ALetter},
562  {0x02DC0, 0x02DC6, WBP::ALetter},
563  {0x02DC8, 0x02DCE, WBP::ALetter},
564  {0x02DD0, 0x02DD6, WBP::ALetter},
565  {0x02DD8, 0x02DDE, WBP::ALetter},
566  {0x02DE0, 0x02DFF, WBP::Extend},
567  {0x02E2F, 0x02E2F, WBP::ALetter},
568  {0x03000, 0x03000, WBP::WSegSpace},
569  {0x03005, 0x03005, WBP::ALetter},
570  {0x0302A, 0x0302F, WBP::Extend},
571  {0x03031, 0x03035, WBP::Katakana},
572  {0x0303B, 0x0303C, WBP::ALetter},
573  {0x03099, 0x0309A, WBP::Extend},
574  {0x0309B, 0x0309C, WBP::Katakana},
575  {0x030A0, 0x030FA, WBP::Katakana},
576  {0x030FC, 0x030FF, WBP::Katakana},
577  {0x03105, 0x0312F, WBP::ALetter},
578  {0x03131, 0x0318E, WBP::ALetter},
579  {0x031A0, 0x031BF, WBP::ALetter},
580  {0x031F0, 0x031FF, WBP::Katakana},
581  {0x032D0, 0x032FE, WBP::Katakana},
582  {0x03300, 0x03357, WBP::Katakana},
583  {0x0A000, 0x0A48C, WBP::ALetter},
584  {0x0A4D0, 0x0A4FD, WBP::ALetter},
585  {0x0A500, 0x0A60C, WBP::ALetter},
586  {0x0A610, 0x0A61F, WBP::ALetter},
587  {0x0A620, 0x0A629, WBP::Numeric},
588  {0x0A62A, 0x0A62B, WBP::ALetter},
589  {0x0A640, 0x0A66E, WBP::ALetter},
590  {0x0A66F, 0x0A672, WBP::Extend},
591  {0x0A674, 0x0A67D, WBP::Extend},
592  {0x0A67F, 0x0A69D, WBP::ALetter},
593  {0x0A69E, 0x0A69F, WBP::Extend},
594  {0x0A6A0, 0x0A6EF, WBP::ALetter},
595  {0x0A6F0, 0x0A6F1, WBP::Extend},
596  {0x0A708, 0x0A7BF, WBP::ALetter},
597  {0x0A7C2, 0x0A7CA, WBP::ALetter},
598  {0x0A7F5, 0x0A801, WBP::ALetter},
599  {0x0A802, 0x0A802, WBP::Extend},
600  {0x0A803, 0x0A805, WBP::ALetter},
601  {0x0A806, 0x0A806, WBP::Extend},
602  {0x0A807, 0x0A80A, WBP::ALetter},
603  {0x0A80B, 0x0A80B, WBP::Extend},
604  {0x0A80C, 0x0A822, WBP::ALetter},
605  {0x0A823, 0x0A827, WBP::Extend},
606  {0x0A82C, 0x0A82C, WBP::Extend},
607  {0x0A840, 0x0A873, WBP::ALetter},
608  {0x0A880, 0x0A881, WBP::Extend},
609  {0x0A882, 0x0A8B3, WBP::ALetter},
610  {0x0A8B4, 0x0A8C5, WBP::Extend},
611  {0x0A8D0, 0x0A8D9, WBP::Numeric},
612  {0x0A8E0, 0x0A8F1, WBP::Extend},
613  {0x0A8F2, 0x0A8F7, WBP::ALetter},
614  {0x0A8FB, 0x0A8FB, WBP::ALetter},
615  {0x0A8FD, 0x0A8FE, WBP::ALetter},
616  {0x0A8FF, 0x0A8FF, WBP::Extend},
617  {0x0A900, 0x0A909, WBP::Numeric},
618  {0x0A90A, 0x0A925, WBP::ALetter},
619  {0x0A926, 0x0A92D, WBP::Extend},
620  {0x0A930, 0x0A946, WBP::ALetter},
621  {0x0A947, 0x0A953, WBP::Extend},
622  {0x0A960, 0x0A97C, WBP::ALetter},
623  {0x0A980, 0x0A983, WBP::Extend},
624  {0x0A984, 0x0A9B2, WBP::ALetter},
625  {0x0A9B3, 0x0A9C0, WBP::Extend},
626  {0x0A9CF, 0x0A9CF, WBP::ALetter},
627  {0x0A9D0, 0x0A9D9, WBP::Numeric},
628  {0x0A9E5, 0x0A9E5, WBP::Extend},
629  {0x0A9F0, 0x0A9F9, WBP::Numeric},
630  {0x0AA00, 0x0AA28, WBP::ALetter},
631  {0x0AA29, 0x0AA36, WBP::Extend},
632  {0x0AA40, 0x0AA42, WBP::ALetter},
633  {0x0AA43, 0x0AA43, WBP::Extend},
634  {0x0AA44, 0x0AA4B, WBP::ALetter},
635  {0x0AA4C, 0x0AA4D, WBP::Extend},
636  {0x0AA50, 0x0AA59, WBP::Numeric},
637  {0x0AA7B, 0x0AA7D, WBP::Extend},
638  {0x0AAB0, 0x0AAB0, WBP::Extend},
639  {0x0AAB2, 0x0AAB4, WBP::Extend},
640  {0x0AAB7, 0x0AAB8, WBP::Extend},
641  {0x0AABE, 0x0AABF, WBP::Extend},
642  {0x0AAC1, 0x0AAC1, WBP::Extend},
643  {0x0AAE0, 0x0AAEA, WBP::ALetter},
644  {0x0AAEB, 0x0AAEF, WBP::Extend},
645  {0x0AAF2, 0x0AAF4, WBP::ALetter},
646  {0x0AAF5, 0x0AAF6, WBP::Extend},
647  {0x0AB01, 0x0AB06, WBP::ALetter},
648  {0x0AB09, 0x0AB0E, WBP::ALetter},
649  {0x0AB11, 0x0AB16, WBP::ALetter},
650  {0x0AB20, 0x0AB26, WBP::ALetter},
651  {0x0AB28, 0x0AB2E, WBP::ALetter},
652  {0x0AB30, 0x0AB69, WBP::ALetter},
653  {0x0AB70, 0x0ABE2, WBP::ALetter},
654  {0x0ABE3, 0x0ABEA, WBP::Extend},
655  {0x0ABEC, 0x0ABED, WBP::Extend},
656  {0x0ABF0, 0x0ABF9, WBP::Numeric},
657  {0x0AC00, 0x0D7A3, WBP::ALetter},
658  {0x0D7B0, 0x0D7C6, WBP::ALetter},
659  {0x0D7CB, 0x0D7FB, WBP::ALetter},
660  {0x0FB00, 0x0FB06, WBP::ALetter},
661  {0x0FB13, 0x0FB17, WBP::ALetter},
662  {0x0FB1D, 0x0FB1D, WBP::Hebrew_Letter},
663  {0x0FB1E, 0x0FB1E, WBP::Extend},
664  {0x0FB1F, 0x0FB28, WBP::Hebrew_Letter},
665  {0x0FB2A, 0x0FB36, WBP::Hebrew_Letter},
666  {0x0FB38, 0x0FB3C, WBP::Hebrew_Letter},
667  {0x0FB3E, 0x0FB3E, WBP::Hebrew_Letter},
668  {0x0FB40, 0x0FB41, WBP::Hebrew_Letter},
669  {0x0FB43, 0x0FB44, WBP::Hebrew_Letter},
670  {0x0FB46, 0x0FB4F, WBP::Hebrew_Letter},
671  {0x0FB50, 0x0FBB1, WBP::ALetter},
672  {0x0FBD3, 0x0FD3D, WBP::ALetter},
673  {0x0FD50, 0x0FD8F, WBP::ALetter},
674  {0x0FD92, 0x0FDC7, WBP::ALetter},
675  {0x0FDF0, 0x0FDFB, WBP::ALetter},
676  {0x0FE00, 0x0FE0F, WBP::Extend},
677  {0x0FE10, 0x0FE10, WBP::MidNum},
678  {0x0FE13, 0x0FE13, WBP::MidLetter},
679  {0x0FE14, 0x0FE14, WBP::MidNum},
680  {0x0FE20, 0x0FE2F, WBP::Extend},
681  {0x0FE33, 0x0FE34, WBP::ExtendNumLet},
682  {0x0FE4D, 0x0FE4F, WBP::ExtendNumLet},
683  {0x0FE50, 0x0FE50, WBP::MidNum},
684  {0x0FE52, 0x0FE52, WBP::MidNumLet},
685  {0x0FE54, 0x0FE54, WBP::MidNum},
686  {0x0FE55, 0x0FE55, WBP::MidLetter},
687  {0x0FE70, 0x0FE74, WBP::ALetter},
688  {0x0FE76, 0x0FEFC, WBP::ALetter},
689  {0x0FEFF, 0x0FEFF, WBP::Format},
690  {0x0FF07, 0x0FF07, WBP::MidNumLet},
691  {0x0FF0C, 0x0FF0C, WBP::MidNum},
692  {0x0FF0E, 0x0FF0E, WBP::MidNumLet},
693  {0x0FF10, 0x0FF19, WBP::Numeric},
694  {0x0FF1A, 0x0FF1A, WBP::MidLetter},
695  {0x0FF1B, 0x0FF1B, WBP::MidNum},
696  {0x0FF21, 0x0FF3A, WBP::ALetter},
697  {0x0FF3F, 0x0FF3F, WBP::ExtendNumLet},
698  {0x0FF41, 0x0FF5A, WBP::ALetter},
699  {0x0FF66, 0x0FF9D, WBP::Katakana},
700  {0x0FF9E, 0x0FF9F, WBP::Extend},
701  {0x0FFA0, 0x0FFBE, WBP::ALetter},
702  {0x0FFC2, 0x0FFC7, WBP::ALetter},
703  {0x0FFCA, 0x0FFCF, WBP::ALetter},
704  {0x0FFD2, 0x0FFD7, WBP::ALetter},
705  {0x0FFDA, 0x0FFDC, WBP::ALetter},
706  {0x0FFF9, 0x0FFFB, WBP::Format},
707  {0x10000, 0x1000B, WBP::ALetter},
708  {0x1000D, 0x10026, WBP::ALetter},
709  {0x10028, 0x1003A, WBP::ALetter},
710  {0x1003C, 0x1003D, WBP::ALetter},
711  {0x1003F, 0x1004D, WBP::ALetter},
712  {0x10050, 0x1005D, WBP::ALetter},
713  {0x10080, 0x100FA, WBP::ALetter},
714  {0x10140, 0x10174, WBP::ALetter},
715  {0x101FD, 0x101FD, WBP::Extend},
716  {0x10280, 0x1029C, WBP::ALetter},
717  {0x102A0, 0x102D0, WBP::ALetter},
718  {0x102E0, 0x102E0, WBP::Extend},
719  {0x10300, 0x1031F, WBP::ALetter},
720  {0x1032D, 0x1034A, WBP::ALetter},
721  {0x10350, 0x10375, WBP::ALetter},
722  {0x10376, 0x1037A, WBP::Extend},
723  {0x10380, 0x1039D, WBP::ALetter},
724  {0x103A0, 0x103C3, WBP::ALetter},
725  {0x103C8, 0x103CF, WBP::ALetter},
726  {0x103D1, 0x103D5, WBP::ALetter},
727  {0x10400, 0x1049D, WBP::ALetter},
728  {0x104A0, 0x104A9, WBP::Numeric},
729  {0x104B0, 0x104D3, WBP::ALetter},
730  {0x104D8, 0x104FB, WBP::ALetter},
731  {0x10500, 0x10527, WBP::ALetter},
732  {0x10530, 0x10563, WBP::ALetter},
733  {0x10600, 0x10736, WBP::ALetter},
734  {0x10740, 0x10755, WBP::ALetter},
735  {0x10760, 0x10767, WBP::ALetter},
736  {0x10800, 0x10805, WBP::ALetter},
737  {0x10808, 0x10808, WBP::ALetter},
738  {0x1080A, 0x10835, WBP::ALetter},
739  {0x10837, 0x10838, WBP::ALetter},
740  {0x1083C, 0x1083C, WBP::ALetter},
741  {0x1083F, 0x10855, WBP::ALetter},
742  {0x10860, 0x10876, WBP::ALetter},
743  {0x10880, 0x1089E, WBP::ALetter},
744  {0x108E0, 0x108F2, WBP::ALetter},
745  {0x108F4, 0x108F5, WBP::ALetter},
746  {0x10900, 0x10915, WBP::ALetter},
747  {0x10920, 0x10939, WBP::ALetter},
748  {0x10980, 0x109B7, WBP::ALetter},
749  {0x109BE, 0x109BF, WBP::ALetter},
750  {0x10A00, 0x10A00, WBP::ALetter},
751  {0x10A01, 0x10A03, WBP::Extend},
752  {0x10A05, 0x10A06, WBP::Extend},
753  {0x10A0C, 0x10A0F, WBP::Extend},
754  {0x10A10, 0x10A13, WBP::ALetter},
755  {0x10A15, 0x10A17, WBP::ALetter},
756  {0x10A19, 0x10A35, WBP::ALetter},
757  {0x10A38, 0x10A3A, WBP::Extend},
758  {0x10A3F, 0x10A3F, WBP::Extend},
759  {0x10A60, 0x10A7C, WBP::ALetter},
760  {0x10A80, 0x10A9C, WBP::ALetter},
761  {0x10AC0, 0x10AC7, WBP::ALetter},
762  {0x10AC9, 0x10AE4, WBP::ALetter},
763  {0x10AE5, 0x10AE6, WBP::Extend},
764  {0x10B00, 0x10B35, WBP::ALetter},
765  {0x10B40, 0x10B55, WBP::ALetter},
766  {0x10B60, 0x10B72, WBP::ALetter},
767  {0x10B80, 0x10B91, WBP::ALetter},
768  {0x10C00, 0x10C48, WBP::ALetter},
769  {0x10C80, 0x10CB2, WBP::ALetter},
770  {0x10CC0, 0x10CF2, WBP::ALetter},
771  {0x10D00, 0x10D23, WBP::ALetter},
772  {0x10D24, 0x10D27, WBP::Extend},
773  {0x10D30, 0x10D39, WBP::Numeric},
774  {0x10E80, 0x10EA9, WBP::ALetter},
775  {0x10EAB, 0x10EAC, WBP::Extend},
776  {0x10EB0, 0x10EB1, WBP::ALetter},
777  {0x10F00, 0x10F1C, WBP::ALetter},
778  {0x10F27, 0x10F27, WBP::ALetter},
779  {0x10F30, 0x10F45, WBP::ALetter},
780  {0x10F46, 0x10F50, WBP::Extend},
781  {0x10FB0, 0x10FC4, WBP::ALetter},
782  {0x10FE0, 0x10FF6, WBP::ALetter},
783  {0x11000, 0x11002, WBP::Extend},
784  {0x11003, 0x11037, WBP::ALetter},
785  {0x11038, 0x11046, WBP::Extend},
786  {0x11066, 0x1106F, WBP::Numeric},
787  {0x1107F, 0x11082, WBP::Extend},
788  {0x11083, 0x110AF, WBP::ALetter},
789  {0x110B0, 0x110BA, WBP::Extend},
790  {0x110BD, 0x110BD, WBP::Format},
791  {0x110CD, 0x110CD, WBP::Format},
792  {0x110D0, 0x110E8, WBP::ALetter},
793  {0x110F0, 0x110F9, WBP::Numeric},
794  {0x11100, 0x11102, WBP::Extend},
795  {0x11103, 0x11126, WBP::ALetter},
796  {0x11127, 0x11134, WBP::Extend},
797  {0x11136, 0x1113F, WBP::Numeric},
798  {0x11144, 0x11144, WBP::ALetter},
799  {0x11145, 0x11146, WBP::Extend},
800  {0x11147, 0x11147, WBP::ALetter},
801  {0x11150, 0x11172, WBP::ALetter},
802  {0x11173, 0x11173, WBP::Extend},
803  {0x11176, 0x11176, WBP::ALetter},
804  {0x11180, 0x11182, WBP::Extend},
805  {0x11183, 0x111B2, WBP::ALetter},
806  {0x111B3, 0x111C0, WBP::Extend},
807  {0x111C1, 0x111C4, WBP::ALetter},
808  {0x111C9, 0x111CC, WBP::Extend},
809  {0x111CE, 0x111CF, WBP::Extend},
810  {0x111D0, 0x111D9, WBP::Numeric},
811  {0x111DA, 0x111DA, WBP::ALetter},
812  {0x111DC, 0x111DC, WBP::ALetter},
813  {0x11200, 0x11211, WBP::ALetter},
814  {0x11213, 0x1122B, WBP::ALetter},
815  {0x1122C, 0x11237, WBP::Extend},
816  {0x1123E, 0x1123E, WBP::Extend},
817  {0x11280, 0x11286, WBP::ALetter},
818  {0x11288, 0x11288, WBP::ALetter},
819  {0x1128A, 0x1128D, WBP::ALetter},
820  {0x1128F, 0x1129D, WBP::ALetter},
821  {0x1129F, 0x112A8, WBP::ALetter},
822  {0x112B0, 0x112DE, WBP::ALetter},
823  {0x112DF, 0x112EA, WBP::Extend},
824  {0x112F0, 0x112F9, WBP::Numeric},
825  {0x11300, 0x11303, WBP::Extend},
826  {0x11305, 0x1130C, WBP::ALetter},
827  {0x1130F, 0x11310, WBP::ALetter},
828  {0x11313, 0x11328, WBP::ALetter},
829  {0x1132A, 0x11330, WBP::ALetter},
830  {0x11332, 0x11333, WBP::ALetter},
831  {0x11335, 0x11339, WBP::ALetter},
832  {0x1133B, 0x1133C, WBP::Extend},
833  {0x1133D, 0x1133D, WBP::ALetter},
834  {0x1133E, 0x11344, WBP::Extend},
835  {0x11347, 0x11348, WBP::Extend},
836  {0x1134B, 0x1134D, WBP::Extend},
837  {0x11350, 0x11350, WBP::ALetter},
838  {0x11357, 0x11357, WBP::Extend},
839  {0x1135D, 0x11361, WBP::ALetter},
840  {0x11362, 0x11363, WBP::Extend},
841  {0x11366, 0x1136C, WBP::Extend},
842  {0x11370, 0x11374, WBP::Extend},
843  {0x11400, 0x11434, WBP::ALetter},
844  {0x11435, 0x11446, WBP::Extend},
845  {0x11447, 0x1144A, WBP::ALetter},
846  {0x11450, 0x11459, WBP::Numeric},
847  {0x1145E, 0x1145E, WBP::Extend},
848  {0x1145F, 0x11461, WBP::ALetter},
849  {0x11480, 0x114AF, WBP::ALetter},
850  {0x114B0, 0x114C3, WBP::Extend},
851  {0x114C4, 0x114C5, WBP::ALetter},
852  {0x114C7, 0x114C7, WBP::ALetter},
853  {0x114D0, 0x114D9, WBP::Numeric},
854  {0x11580, 0x115AE, WBP::ALetter},
855  {0x115AF, 0x115B5, WBP::Extend},
856  {0x115B8, 0x115C0, WBP::Extend},
857  {0x115D8, 0x115DB, WBP::ALetter},
858  {0x115DC, 0x115DD, WBP::Extend},
859  {0x11600, 0x1162F, WBP::ALetter},
860  {0x11630, 0x11640, WBP::Extend},
861  {0x11644, 0x11644, WBP::ALetter},
862  {0x11650, 0x11659, WBP::Numeric},
863  {0x11680, 0x116AA, WBP::ALetter},
864  {0x116AB, 0x116B7, WBP::Extend},
865  {0x116B8, 0x116B8, WBP::ALetter},
866  {0x116C0, 0x116C9, WBP::Numeric},
867  {0x1171D, 0x1172B, WBP::Extend},
868  {0x11730, 0x11739, WBP::Numeric},
869  {0x11800, 0x1182B, WBP::ALetter},
870  {0x1182C, 0x1183A, WBP::Extend},
871  {0x118A0, 0x118DF, WBP::ALetter},
872  {0x118E0, 0x118E9, WBP::Numeric},
873  {0x118FF, 0x11906, WBP::ALetter},
874  {0x11909, 0x11909, WBP::ALetter},
875  {0x1190C, 0x11913, WBP::ALetter},
876  {0x11915, 0x11916, WBP::ALetter},
877  {0x11918, 0x1192F, WBP::ALetter},
878  {0x11930, 0x11935, WBP::Extend},
879  {0x11937, 0x11938, WBP::Extend},
880  {0x1193B, 0x1193E, WBP::Extend},
881  {0x1193F, 0x1193F, WBP::ALetter},
882  {0x11940, 0x11940, WBP::Extend},
883  {0x11941, 0x11941, WBP::ALetter},
884  {0x11942, 0x11943, WBP::Extend},
885  {0x11950, 0x11959, WBP::Numeric},
886  {0x119A0, 0x119A7, WBP::ALetter},
887  {0x119AA, 0x119D0, WBP::ALetter},
888  {0x119D1, 0x119D7, WBP::Extend},
889  {0x119DA, 0x119E0, WBP::Extend},
890  {0x119E1, 0x119E1, WBP::ALetter},
891  {0x119E3, 0x119E3, WBP::ALetter},
892  {0x119E4, 0x119E4, WBP::Extend},
893  {0x11A00, 0x11A00, WBP::ALetter},
894  {0x11A01, 0x11A0A, WBP::Extend},
895  {0x11A0B, 0x11A32, WBP::ALetter},
896  {0x11A33, 0x11A39, WBP::Extend},
897  {0x11A3A, 0x11A3A, WBP::ALetter},
898  {0x11A3B, 0x11A3E, WBP::Extend},
899  {0x11A47, 0x11A47, WBP::Extend},
900  {0x11A50, 0x11A50, WBP::ALetter},
901  {0x11A51, 0x11A5B, WBP::Extend},
902  {0x11A5C, 0x11A89, WBP::ALetter},
903  {0x11A8A, 0x11A99, WBP::Extend},
904  {0x11A9D, 0x11A9D, WBP::ALetter},
905  {0x11AC0, 0x11AF8, WBP::ALetter},
906  {0x11C00, 0x11C08, WBP::ALetter},
907  {0x11C0A, 0x11C2E, WBP::ALetter},
908  {0x11C2F, 0x11C36, WBP::Extend},
909  {0x11C38, 0x11C3F, WBP::Extend},
910  {0x11C40, 0x11C40, WBP::ALetter},
911  {0x11C50, 0x11C59, WBP::Numeric},
912  {0x11C72, 0x11C8F, WBP::ALetter},
913  {0x11C92, 0x11CA7, WBP::Extend},
914  {0x11CA9, 0x11CB6, WBP::Extend},
915  {0x11D00, 0x11D06, WBP::ALetter},
916  {0x11D08, 0x11D09, WBP::ALetter},
917  {0x11D0B, 0x11D30, WBP::ALetter},
918  {0x11D31, 0x11D36, WBP::Extend},
919  {0x11D3A, 0x11D3A, WBP::Extend},
920  {0x11D3C, 0x11D3D, WBP::Extend},
921  {0x11D3F, 0x11D45, WBP::Extend},
922  {0x11D46, 0x11D46, WBP::ALetter},
923  {0x11D47, 0x11D47, WBP::Extend},
924  {0x11D50, 0x11D59, WBP::Numeric},
925  {0x11D60, 0x11D65, WBP::ALetter},
926  {0x11D67, 0x11D68, WBP::ALetter},
927  {0x11D6A, 0x11D89, WBP::ALetter},
928  {0x11D8A, 0x11D8E, WBP::Extend},
929  {0x11D90, 0x11D91, WBP::Extend},
930  {0x11D93, 0x11D97, WBP::Extend},
931  {0x11D98, 0x11D98, WBP::ALetter},
932  {0x11DA0, 0x11DA9, WBP::Numeric},
933  {0x11EE0, 0x11EF2, WBP::ALetter},
934  {0x11EF3, 0x11EF6, WBP::Extend},
935  {0x11FB0, 0x11FB0, WBP::ALetter},
936  {0x12000, 0x12399, WBP::ALetter},
937  {0x12400, 0x1246E, WBP::ALetter},
938  {0x12480, 0x12543, WBP::ALetter},
939  {0x13000, 0x1342E, WBP::ALetter},
940  {0x13430, 0x13438, WBP::Format},
941  {0x14400, 0x14646, WBP::ALetter},
942  {0x16800, 0x16A38, WBP::ALetter},
943  {0x16A40, 0x16A5E, WBP::ALetter},
944  {0x16A60, 0x16A69, WBP::Numeric},
945  {0x16AD0, 0x16AED, WBP::ALetter},
946  {0x16AF0, 0x16AF4, WBP::Extend},
947  {0x16B00, 0x16B2F, WBP::ALetter},
948  {0x16B30, 0x16B36, WBP::Extend},
949  {0x16B40, 0x16B43, WBP::ALetter},
950  {0x16B50, 0x16B59, WBP::Numeric},
951  {0x16B63, 0x16B77, WBP::ALetter},
952  {0x16B7D, 0x16B8F, WBP::ALetter},
953  {0x16E40, 0x16E7F, WBP::ALetter},
954  {0x16F00, 0x16F4A, WBP::ALetter},
955  {0x16F4F, 0x16F4F, WBP::Extend},
956  {0x16F50, 0x16F50, WBP::ALetter},
957  {0x16F51, 0x16F87, WBP::Extend},
958  {0x16F8F, 0x16F92, WBP::Extend},
959  {0x16F93, 0x16F9F, WBP::ALetter},
960  {0x16FE0, 0x16FE1, WBP::ALetter},
961  {0x16FE3, 0x16FE3, WBP::ALetter},
962  {0x16FE4, 0x16FE4, WBP::Extend},
963  {0x16FF0, 0x16FF1, WBP::Extend},
964  {0x1B000, 0x1B000, WBP::Katakana},
965  {0x1B164, 0x1B167, WBP::Katakana},
966  {0x1BC00, 0x1BC6A, WBP::ALetter},
967  {0x1BC70, 0x1BC7C, WBP::ALetter},
968  {0x1BC80, 0x1BC88, WBP::ALetter},
969  {0x1BC90, 0x1BC99, WBP::ALetter},
970  {0x1BC9D, 0x1BC9E, WBP::Extend},
971  {0x1BCA0, 0x1BCA3, WBP::Format},
972  {0x1D165, 0x1D169, WBP::Extend},
973  {0x1D16D, 0x1D172, WBP::Extend},
974  {0x1D173, 0x1D17A, WBP::Format},
975  {0x1D17B, 0x1D182, WBP::Extend},
976  {0x1D185, 0x1D18B, WBP::Extend},
977  {0x1D1AA, 0x1D1AD, WBP::Extend},
978  {0x1D242, 0x1D244, WBP::Extend},
979  {0x1D400, 0x1D454, WBP::ALetter},
980  {0x1D456, 0x1D49C, WBP::ALetter},
981  {0x1D49E, 0x1D49F, WBP::ALetter},
982  {0x1D4A2, 0x1D4A2, WBP::ALetter},
983  {0x1D4A5, 0x1D4A6, WBP::ALetter},
984  {0x1D4A9, 0x1D4AC, WBP::ALetter},
985  {0x1D4AE, 0x1D4B9, WBP::ALetter},
986  {0x1D4BB, 0x1D4BB, WBP::ALetter},
987  {0x1D4BD, 0x1D4C3, WBP::ALetter},
988  {0x1D4C5, 0x1D505, WBP::ALetter},
989  {0x1D507, 0x1D50A, WBP::ALetter},
990  {0x1D50D, 0x1D514, WBP::ALetter},
991  {0x1D516, 0x1D51C, WBP::ALetter},
992  {0x1D51E, 0x1D539, WBP::ALetter},
993  {0x1D53B, 0x1D53E, WBP::ALetter},
994  {0x1D540, 0x1D544, WBP::ALetter},
995  {0x1D546, 0x1D546, WBP::ALetter},
996  {0x1D54A, 0x1D550, WBP::ALetter},
997  {0x1D552, 0x1D6A5, WBP::ALetter},
998  {0x1D6A8, 0x1D6C0, WBP::ALetter},
999  {0x1D6C2, 0x1D6DA, WBP::ALetter},
1000  {0x1D6DC, 0x1D6FA, WBP::ALetter},
1001  {0x1D6FC, 0x1D714, WBP::ALetter},
1002  {0x1D716, 0x1D734, WBP::ALetter},
1003  {0x1D736, 0x1D74E, WBP::ALetter},
1004  {0x1D750, 0x1D76E, WBP::ALetter},
1005  {0x1D770, 0x1D788, WBP::ALetter},
1006  {0x1D78A, 0x1D7A8, WBP::ALetter},
1007  {0x1D7AA, 0x1D7C2, WBP::ALetter},
1008  {0x1D7C4, 0x1D7CB, WBP::ALetter},
1009  {0x1D7CE, 0x1D7FF, WBP::Numeric},
1010  {0x1DA00, 0x1DA36, WBP::Extend},
1011  {0x1DA3B, 0x1DA6C, WBP::Extend},
1012  {0x1DA75, 0x1DA75, WBP::Extend},
1013  {0x1DA84, 0x1DA84, WBP::Extend},
1014  {0x1DA9B, 0x1DA9F, WBP::Extend},
1015  {0x1DAA1, 0x1DAAF, WBP::Extend},
1016  {0x1E000, 0x1E006, WBP::Extend},
1017  {0x1E008, 0x1E018, WBP::Extend},
1018  {0x1E01B, 0x1E021, WBP::Extend},
1019  {0x1E023, 0x1E024, WBP::Extend},
1020  {0x1E026, 0x1E02A, WBP::Extend},
1021  {0x1E100, 0x1E12C, WBP::ALetter},
1022  {0x1E130, 0x1E136, WBP::Extend},
1023  {0x1E137, 0x1E13D, WBP::ALetter},
1024  {0x1E140, 0x1E149, WBP::Numeric},
1025  {0x1E14E, 0x1E14E, WBP::ALetter},
1026  {0x1E2C0, 0x1E2EB, WBP::ALetter},
1027  {0x1E2EC, 0x1E2EF, WBP::Extend},
1028  {0x1E2F0, 0x1E2F9, WBP::Numeric},
1029  {0x1E800, 0x1E8C4, WBP::ALetter},
1030  {0x1E8D0, 0x1E8D6, WBP::Extend},
1031  {0x1E900, 0x1E943, WBP::ALetter},
1032  {0x1E944, 0x1E94A, WBP::Extend},
1033  {0x1E94B, 0x1E94B, WBP::ALetter},
1034  {0x1E950, 0x1E959, WBP::Numeric},
1035  {0x1EE00, 0x1EE03, WBP::ALetter},
1036  {0x1EE05, 0x1EE1F, WBP::ALetter},
1037  {0x1EE21, 0x1EE22, WBP::ALetter},
1038  {0x1EE24, 0x1EE24, WBP::ALetter},
1039  {0x1EE27, 0x1EE27, WBP::ALetter},
1040  {0x1EE29, 0x1EE32, WBP::ALetter},
1041  {0x1EE34, 0x1EE37, WBP::ALetter},
1042  {0x1EE39, 0x1EE39, WBP::ALetter},
1043  {0x1EE3B, 0x1EE3B, WBP::ALetter},
1044  {0x1EE42, 0x1EE42, WBP::ALetter},
1045  {0x1EE47, 0x1EE47, WBP::ALetter},
1046  {0x1EE49, 0x1EE49, WBP::ALetter},
1047  {0x1EE4B, 0x1EE4B, WBP::ALetter},
1048  {0x1EE4D, 0x1EE4F, WBP::ALetter},
1049  {0x1EE51, 0x1EE52, WBP::ALetter},
1050  {0x1EE54, 0x1EE54, WBP::ALetter},
1051  {0x1EE57, 0x1EE57, WBP::ALetter},
1052  {0x1EE59, 0x1EE59, WBP::ALetter},
1053  {0x1EE5B, 0x1EE5B, WBP::ALetter},
1054  {0x1EE5D, 0x1EE5D, WBP::ALetter},
1055  {0x1EE5F, 0x1EE5F, WBP::ALetter},
1056  {0x1EE61, 0x1EE62, WBP::ALetter},
1057  {0x1EE64, 0x1EE64, WBP::ALetter},
1058  {0x1EE67, 0x1EE6A, WBP::ALetter},
1059  {0x1EE6C, 0x1EE72, WBP::ALetter},
1060  {0x1EE74, 0x1EE77, WBP::ALetter},
1061  {0x1EE79, 0x1EE7C, WBP::ALetter},
1062  {0x1EE7E, 0x1EE7E, WBP::ALetter},
1063  {0x1EE80, 0x1EE89, WBP::ALetter},
1064  {0x1EE8B, 0x1EE9B, WBP::ALetter},
1065  {0x1EEA1, 0x1EEA3, WBP::ALetter},
1066  {0x1EEA5, 0x1EEA9, WBP::ALetter},
1067  {0x1EEAB, 0x1EEBB, WBP::ALetter},
1068  {0x1F130, 0x1F149, WBP::ALetter},
1069  {0x1F150, 0x1F169, WBP::ALetter},
1070  {0x1F170, 0x1F189, WBP::ALetter},
1071  {0x1F1E6, 0x1F1FF, WBP::Regional_Indicator},
1072  {0x1F3FB, 0x1F3FF, WBP::Extend},
1073  {0x1FBF0, 0x1FBF9, WBP::Numeric},
1074  {0xE0001, 0xE0001, WBP::Format},
1075  {0xE0020, 0xE007F, WBP::Extend},
1076  {0xE0100, 0xE01EF, WBP::Extend},
1077 }};
1078 
1079 // Construct table of just WBP::Extend character intervals
1080 constexpr auto g_extend_characters{[]() constexpr {
1081  // Compute number of extend character intervals
1082  constexpr size_t size = []() constexpr {
1083  size_t count = 0;
1084  for (auto interval : g_word_break_intervals) {
1085  if (interval.property == WBP::Extend) {
1086  count++;
1087  }
1088  }
1089  return count;
1090  }();
1091 
1092  // Create array of extend character intervals
1093  std::array<Interval, size> result{};
1094  size_t index = 0;
1095  for (auto interval : g_word_break_intervals) {
1096  if (interval.property == WBP::Extend) {
1097  result[index++] = {interval.first, interval.last}; // NOLINT
1098  }
1099  }
1100  return result;
1101 }()};
1102 
1103 // Find a codepoint inside a sorted list of Interval.
1104 template <size_t N>
1105 bool Bisearch(uint32_t ucs, const std::array<Interval, N>& table) {
1106  if (ucs < table.front().first || ucs > table.back().last) { // NOLINT
1107  return false;
1108  }
1109 
1110  int min = 0;
1111  int max = N - 1;
1112  while (max >= min) {
1113  const int mid = (min + max) / 2;
1114  if (ucs > table[mid].last) { // NOLINT
1115  min = mid + 1;
1116  } else if (ucs < table[mid].first) { // NOLINT
1117  max = mid - 1;
1118  } else {
1119  return true;
1120  }
1121  }
1122 
1123  return false;
1124 }
1125 
1126 // Find a value inside a sorted list of Interval + property.
1127 template <class C, size_t N>
1128 bool Bisearch(uint32_t ucs, const std::array<C, N>& table, C* out) {
1129  if (ucs < table.front().first || ucs > table.back().last) { // NOLINT
1130  return false;
1131  }
1132 
1133  int min = 0;
1134  int max = N - 1;
1135  while (max >= min) {
1136  const int mid = (min + max) / 2;
1137  if (ucs > table[mid].last) { // NOLINT
1138  min = mid + 1;
1139  } else if (ucs < table[mid].first) { // NOLINT
1140  max = mid - 1;
1141  } else {
1142  *out = table[mid]; // NOLINT
1143  return true;
1144  }
1145  }
1146 
1147  return false;
1148 }
1149 
1150 int codepoint_width(uint32_t ucs) {
1151  if (ftxui::IsControl(ucs)) {
1152  return -1;
1153  }
1154 
1155  if (ftxui::IsCombining(ucs)) {
1156  return 0;
1157  }
1158 
1159  if (ftxui::IsFullWidth(ucs)) {
1160  return 2;
1161  }
1162 
1163  return 1;
1164 }
1165 
1166 } // namespace
1167 
1168 namespace ftxui {
1169 
1170 // From UTF8 encoded string |input|, eat in between 1 and 4 byte representing
1171 // one codepoint. Put the codepoint into |ucs|. Start at |start| and update
1172 // |end| to represent the beginning of the next byte to eat for consecutive
1173 // executions.
1174 bool EatCodePoint(const std::string& input,
1175  size_t start,
1176  size_t* end,
1177  uint32_t* ucs) {
1178  if (start >= input.size()) {
1179  *end = start + 1;
1180  return false;
1181  }
1182  const uint8_t C0 = input[start];
1183 
1184  // 1 byte string.
1185  if ((C0 & 0b1000'0000) == 0b0000'0000) { // NOLINT
1186  *ucs = C0 & 0b0111'1111; // NOLINT
1187  *end = start + 1;
1188  return true;
1189  }
1190 
1191  // 2 byte string.
1192  if ((C0 & 0b1110'0000) == 0b1100'0000 && // NOLINT
1193  start + 1 < input.size()) {
1194  const uint8_t C1 = input[start + 1];
1195  *ucs = 0;
1196  *ucs += C0 & 0b0001'1111; // NOLINT
1197  *ucs <<= 6; // NOLINT
1198  *ucs += C1 & 0b0011'1111; // NOLINT
1199  *end = start + 2;
1200  return true;
1201  }
1202 
1203  // 3 byte string.
1204  if ((C0 & 0b1111'0000) == 0b1110'0000 && // NOLINT
1205  start + 2 < input.size()) {
1206  const uint8_t C1 = input[start + 1];
1207  const uint8_t C2 = input[start + 2];
1208  *ucs = 0;
1209  *ucs += C0 & 0b0000'1111; // NOLINT
1210  *ucs <<= 6; // NOLINT
1211  *ucs += C1 & 0b0011'1111; // NOLINT
1212  *ucs <<= 6; // NOLINT
1213  *ucs += C2 & 0b0011'1111; // NOLINT
1214  *end = start + 3;
1215  return true;
1216  }
1217 
1218  // 4 byte string.
1219  if ((C0 & 0b1111'1000) == 0b1111'0000 && // NOLINT
1220  start + 3 < input.size()) {
1221  const uint8_t C1 = input[start + 1];
1222  const uint8_t C2 = input[start + 2];
1223  const uint8_t C3 = input[start + 3];
1224  *ucs = 0;
1225  *ucs += C0 & 0b0000'0111; // NOLINT
1226  *ucs <<= 6; // NOLINT
1227  *ucs += C1 & 0b0011'1111; // NOLINT
1228  *ucs <<= 6; // NOLINT
1229  *ucs += C2 & 0b0011'1111; // NOLINT
1230  *ucs <<= 6; // NOLINT
1231  *ucs += C3 & 0b0011'1111; // NOLINT
1232  *end = start + 4;
1233  return true;
1234  }
1235 
1236  *end = start + 1;
1237  return false;
1238 }
1239 
1240 // From UTF16 encoded string |input|, eat in between 1 and 4 byte representing
1241 // one codepoint. Put the codepoint into |ucs|. Start at |start| and update
1242 // |end| to represent the beginning of the next byte to eat for consecutive
1243 // executions.
1244 bool EatCodePoint(const std::wstring& input,
1245  size_t start,
1246  size_t* end,
1247  uint32_t* ucs) {
1248  if (start >= input.size()) {
1249  *end = start + 1;
1250  return false;
1251  }
1252 
1253  // On linux wstring uses the UTF32 encoding:
1254  if constexpr (sizeof(wchar_t) == 4) {
1255  *ucs = input[start]; // NOLINT
1256  *end = start + 1;
1257  return true;
1258  }
1259 
1260  // On windows, wstring uses the UTF16 encoding:
1261  int32_t C0 = input[start]; // NOLINT
1262 
1263  // 1 word size:
1264  if (C0 < 0xd800 || C0 >= 0xdc00) { // NOLINT
1265  *ucs = C0;
1266  *end = start + 1;
1267  return true;
1268  }
1269 
1270  // 2 word size:
1271  if (start + 1 >= input.size()) {
1272  *end = start + 2;
1273  return false;
1274  }
1275 
1276  int32_t C1 = input[start + 1]; // NOLINT
1277  *ucs = ((C0 & 0x3ff) << 10) + (C1 & 0x3ff) + 0x10000; // NOLINT
1278  *end = start + 2;
1279  return true;
1280 }
1281 
1282 bool IsCombining(uint32_t ucs) {
1283  return Bisearch(ucs, g_extend_characters);
1284 }
1285 
1286 bool IsFullWidth(uint32_t ucs) {
1287  if (ucs < 0x0300) // Quick path: // NOLINT
1288  return false;
1289 
1290  return Bisearch(ucs, g_full_width_characters);
1291 }
1292 
1293 bool IsControl(uint32_t ucs) {
1294  if (ucs == 0) {
1295  return true;
1296  }
1297  if (ucs < 32) { // NOLINT
1298  const uint32_t LINE_FEED = 10;
1299  return ucs != LINE_FEED;
1300  }
1301  if (ucs >= 0x7f && ucs < 0xa0) { // NOLINT
1302  return true;
1303  }
1304  return false;
1305 }
1306 
1308  WordBreakPropertyInterval interval = {0, 0, WBP::ALetter};
1309  std::ignore = Bisearch(codepoint, g_word_break_intervals, &interval);
1310  return interval.property;
1311 }
1312 
1313 int wchar_width(wchar_t ucs) {
1314  return codepoint_width(uint32_t(ucs));
1315 }
1316 
1317 int wstring_width(const std::wstring& text) {
1318  int width = 0;
1319 
1320  for (const wchar_t& it : text) {
1321  const int w = wchar_width(it);
1322  if (w < 0) {
1323  return -1;
1324  }
1325  width += w;
1326  }
1327  return width;
1328 }
1329 
1330 int string_width(const std::string& input) {
1331  int width = 0;
1332  size_t start = 0;
1333  while (start < input.size()) {
1334  uint32_t codepoint = 0;
1335  if (!EatCodePoint(input, start, &start, &codepoint)) {
1336  continue;
1337  }
1338 
1339  if (IsControl(codepoint)) {
1340  continue;
1341  }
1342 
1343  if (IsCombining(codepoint)) {
1344  continue;
1345  }
1346 
1347  if (IsFullWidth(codepoint)) {
1348  width += 2;
1349  continue;
1350  }
1351 
1352  width += 1;
1353  }
1354  return width;
1355 }
1356 
1357 std::vector<std::string> Utf8ToGlyphs(const std::string& input) {
1358  std::vector<std::string> out;
1359  out.reserve(input.size());
1360  size_t start = 0;
1361  size_t end = 0;
1362  while (start < input.size()) {
1363  uint32_t codepoint = 0;
1364  if (!EatCodePoint(input, start, &end, &codepoint)) {
1365  start = end;
1366  continue;
1367  }
1368 
1369  const std::string append = input.substr(start, end - start);
1370  start = end;
1371 
1372  // Ignore control characters.
1373  if (IsControl(codepoint)) {
1374  continue;
1375  }
1376 
1377  // Combining characters are put with the previous glyph they are modifying.
1378  if (IsCombining(codepoint)) {
1379  if (!out.empty()) {
1380  out.back() += append;
1381  }
1382  continue;
1383  }
1384 
1385  // Fullwidth characters take two cells. The second is made of the empty
1386  // string to reserve the space the first is taking.
1387  if (IsFullWidth(codepoint)) {
1388  out.push_back(append);
1389  out.emplace_back("");
1390  continue;
1391  }
1392 
1393  // Normal characters:
1394  out.push_back(append);
1395  }
1396  return out;
1397 }
1398 
1399 size_t GlyphPrevious(const std::string& input, size_t start) {
1400  while (true) {
1401  if (start == 0) {
1402  return 0;
1403  }
1404  start--;
1405 
1406  // Skip the UTF8 continuation bytes.
1407  if ((input[start] & 0b1100'0000) == 0b1000'0000) {
1408  continue;
1409  }
1410 
1411  uint32_t codepoint = 0;
1412  size_t end = 0;
1413  const bool eaten = EatCodePoint(input, start, &end, &codepoint);
1414 
1415  // Ignore invalid, control characters and combining characters.
1416  if (!eaten || IsControl(codepoint) || IsCombining(codepoint)) {
1417  continue;
1418  }
1419 
1420  return start;
1421  }
1422 }
1423 
1424 size_t GlyphNext(const std::string& input, size_t start) {
1425  bool glyph_found = false;
1426  while (start < input.size()) {
1427  size_t end = 0;
1428  uint32_t codepoint = 0;
1429  const bool eaten = EatCodePoint(input, start, &end, &codepoint);
1430 
1431  // Ignore invalid, control characters and combining characters.
1432  if (!eaten || IsControl(codepoint) || IsCombining(codepoint)) {
1433  start = end;
1434  continue;
1435  }
1436 
1437  // We eat the beginning of the next glyph. If we are eating the one
1438  // requested, return its start position immediately.
1439  if (glyph_found) {
1440  return static_cast<int>(start);
1441  }
1442 
1443  // Otherwise, skip this glyph and iterate:
1444  glyph_found = true;
1445  start = end;
1446  }
1447  return static_cast<int>(input.size());
1448 }
1449 
1450 size_t GlyphIterate(const std::string& input, int glyph_offset, size_t start) {
1451  if (glyph_offset >= 0) {
1452  for (int i = 0; i < glyph_offset; ++i) {
1453  start = GlyphNext(input, start);
1454  }
1455  return start;
1456  } else {
1457  for (int i = 0; i < -glyph_offset; ++i) {
1458  start = GlyphPrevious(input, start);
1459  }
1460  return start;
1461  }
1462 }
1463 
1464 std::vector<int> CellToGlyphIndex(const std::string& input) {
1465  int x = -1;
1466  std::vector<int> out;
1467  out.reserve(input.size());
1468  size_t start = 0;
1469  size_t end = 0;
1470  while (start < input.size()) {
1471  uint32_t codepoint = 0;
1472  const bool eaten = EatCodePoint(input, start, &end, &codepoint);
1473  start = end;
1474 
1475  // Ignore invalid / control characters.
1476  if (!eaten || IsControl(codepoint)) {
1477  continue;
1478  }
1479 
1480  // Combining characters are put with the previous glyph they are modifying.
1481  if (IsCombining(codepoint)) {
1482  if (x == -1) {
1483  ++x;
1484  out.push_back(x);
1485  }
1486  continue;
1487  }
1488 
1489  // Fullwidth characters take two cells. The second is made of the empty
1490  // string to reserve the space the first is taking.
1491  if (IsFullWidth(codepoint)) {
1492  ++x;
1493  out.push_back(x);
1494  out.push_back(x);
1495  continue;
1496  }
1497 
1498  // Normal characters:
1499  ++x;
1500  out.push_back(x);
1501  }
1502  return out;
1503 }
1504 
1505 int GlyphCount(const std::string& input) {
1506  int size = 0;
1507  size_t start = 0;
1508  size_t end = 0;
1509  while (start < input.size()) {
1510  uint32_t codepoint = 0;
1511  const bool eaten = EatCodePoint(input, start, &end, &codepoint);
1512  start = end;
1513 
1514  // Ignore invalid characters:
1515  if (!eaten || IsControl(codepoint)) {
1516  continue;
1517  }
1518 
1519  // Ignore combining characters, except when they don't have a preceding to
1520  // combine with.
1521  if (IsCombining(codepoint)) {
1522  if (size == 0) {
1523  size++;
1524  }
1525  continue;
1526  }
1527 
1528  size++;
1529  }
1530  return size;
1531 }
1532 
1533 std::vector<WordBreakProperty> Utf8ToWordBreakProperty(
1534  const std::string& input) {
1535  std::vector<WordBreakProperty> out;
1536  out.reserve(input.size());
1537  size_t start = 0;
1538  size_t end = 0;
1539  while (start < input.size()) {
1540  uint32_t codepoint = 0;
1541  if (!EatCodePoint(input, start, &end, &codepoint)) {
1542  start = end;
1543  continue;
1544  }
1545  start = end;
1546 
1547  // Ignore control characters.
1548  if (IsControl(codepoint)) {
1549  continue;
1550  }
1551 
1552  // Ignore combining characters.
1553  if (IsCombining(codepoint)) {
1554  continue;
1555  }
1556 
1557  WordBreakPropertyInterval interval = {0, 0, WBP::ALetter};
1558  std::ignore = Bisearch(codepoint, g_word_break_intervals, &interval);
1559  out.push_back(interval.property);
1560  }
1561  return out;
1562 }
1563 
1564 /// Convert a UTF8 std::string into a std::wstring.
1565 std::string to_string(const std::wstring& s) {
1566  std::string out;
1567 
1568  size_t i = 0;
1569  uint32_t codepoint = 0;
1570  while (EatCodePoint(s, i, &i, &codepoint)) {
1571  // Code point <-> UTF-8 conversion
1572  //
1573  // ┏━━━━━━━━┳━━━━━━━━┳━━━━━━━━┳━━━━━━━━┓
1574  // ┃Byte 1 ┃Byte 2 ┃Byte 3 ┃Byte 4 ┃
1575  // ┡━━━━━━━━╇━━━━━━━━╇━━━━━━━━╇━━━━━━━━┩
1576  // │0xxxxxxx│ │ │ │
1577  // ├────────┼────────┼────────┼────────┤
1578  // │110xxxxx│10xxxxxx│ │ │
1579  // ├────────┼────────┼────────┼────────┤
1580  // │1110xxxx│10xxxxxx│10xxxxxx│ │
1581  // ├────────┼────────┼────────┼────────┤
1582  // │11110xxx│10xxxxxx│10xxxxxx│10xxxxxx│
1583  // └────────┴────────┴────────┴────────┘
1584 
1585  // 1 byte UTF8
1586  if (codepoint <= 0b000'0000'0111'1111) { // NOLINT
1587  const uint8_t p1 = codepoint;
1588  out.push_back(p1); // NOLINT
1589  continue;
1590  }
1591 
1592  // 2 bytes UTF8
1593  if (codepoint <= 0b000'0111'1111'1111) { // NOLINT
1594  uint8_t p2 = codepoint & 0b111111; // NOLINT
1595  codepoint >>= 6; // NOLINT
1596  uint8_t p1 = codepoint; // NOLINT
1597  out.push_back(0b11000000 + p1); // NOLINT
1598  out.push_back(0b10000000 + p2); // NOLINT
1599  continue;
1600  }
1601 
1602  // 3 bytes UTF8
1603  if (codepoint <= 0b1111'1111'1111'1111) { // NOLINT
1604  uint8_t p3 = codepoint & 0b111111; // NOLINT
1605  codepoint >>= 6; // NOLINT
1606  uint8_t p2 = codepoint & 0b111111; // NOLINT
1607  codepoint >>= 6; // NOLINT
1608  uint8_t p1 = codepoint; // NOLINT
1609  out.push_back(0b11100000 + p1); // NOLINT
1610  out.push_back(0b10000000 + p2); // NOLINT
1611  out.push_back(0b10000000 + p3); // NOLINT
1612  continue;
1613  }
1614 
1615  // 4 bytes UTF8
1616  if (codepoint <= 0b1'0000'1111'1111'1111'1111) { // NOLINT
1617  uint8_t p4 = codepoint & 0b111111; // NOLINT
1618  codepoint >>= 6; // NOLINT
1619  uint8_t p3 = codepoint & 0b111111; // NOLINT
1620  codepoint >>= 6; // NOLINT
1621  uint8_t p2 = codepoint & 0b111111; // NOLINT
1622  codepoint >>= 6; // NOLINT
1623  uint8_t p1 = codepoint; // NOLINT
1624  out.push_back(0b11110000 + p1); // NOLINT
1625  out.push_back(0b10000000 + p2); // NOLINT
1626  out.push_back(0b10000000 + p3); // NOLINT
1627  out.push_back(0b10000000 + p4); // NOLINT
1628  continue;
1629  }
1630 
1631  // Something else?
1632  }
1633  return out;
1634 }
1635 
1636 /// Convert a std::wstring into a UTF8 std::string.
1637 std::wstring to_wstring(const std::string& s) {
1638  std::wstring out;
1639 
1640  size_t i = 0;
1641  uint32_t codepoint = 0;
1642  while (EatCodePoint(s, i, &i, &codepoint)) {
1643  // On linux wstring are UTF32 encoded:
1644  if constexpr (sizeof(wchar_t) == 4) {
1645  out.push_back(codepoint); // NOLINT
1646  continue;
1647  }
1648 
1649  // On Windows, wstring are UTF16 encoded:
1650 
1651  // Codepoint encoded using 1 word:
1652  // NOLINTNEXTLINE
1653  if (codepoint < 0xD800 || (codepoint > 0xDFFF && codepoint < 0x10000)) {
1654  uint16_t p0 = codepoint; // NOLINT
1655  out.push_back(p0); // NOLINT
1656  continue;
1657  }
1658 
1659  // Codepoint encoded using 2 words:
1660  codepoint -= 0x010000; // NOLINT
1661  uint16_t p0 = (((codepoint << 12) >> 22) + 0xD800); // NOLINT
1662  uint16_t p1 = (((codepoint << 22) >> 22) + 0xDC00); // NOLINT
1663  out.push_back(p0); // NOLINT
1664  out.push_back(p1); // NOLINT
1665  }
1666  return out;
1667 }
1668 
1669 } // namespace ftxui
size_t GlyphNext(const std::string &input, size_t start)
Definition: string.cpp:1424
bool IsControl(uint32_t ucs)
Definition: string.cpp:1293
WordBreakProperty CodepointToWordBreakProperty(uint32_t codepoint)
Definition: string.cpp:1307
int wchar_width(wchar_t)
Definition: string.cpp:1313
Decorator size(WidthOrHeight, Constraint, int value)
Apply a constraint on the size of an element.
Definition: size.cpp:89
std::vector< std::string > Utf8ToGlyphs(const std::string &input)
Definition: string.cpp:1357
bool IsCombining(uint32_t ucs)
Definition: string.cpp:1282
int string_width(const std::string &)
Definition: string.cpp:1330
std::wstring to_wstring(const std::string &s)
Convert a std::wstring into a UTF8 std::string.
Definition: string.cpp:1637
std::string to_string(const std::wstring &s)
Convert a UTF8 std::string into a std::wstring.
Definition: string.cpp:1565
Element text(std::wstring text)
Display a piece of unicode text.
Definition: text.cpp:119
bool EatCodePoint(const std::string &input, size_t start, size_t *end, uint32_t *ucs)
Definition: string.cpp:1174
std::vector< int > CellToGlyphIndex(const std::string &input)
Definition: string.cpp:1464
int GlyphCount(const std::string &input)
Definition: string.cpp:1505
bool IsFullWidth(uint32_t ucs)
Definition: string.cpp:1286
std::vector< WordBreakProperty > Utf8ToWordBreakProperty(const std::string &input)
Definition: string.cpp:1533
size_t GlyphIterate(const std::string &input, int glyph_offset, size_t start)
Definition: string.cpp:1450
int wstring_width(const std::wstring &)
Definition: string.cpp:1317
size_t GlyphPrevious(const std::string &input, size_t start)
Definition: string.cpp:1399