unicode_categories/
lib.rs1mod tables;
21
22pub trait UnicodeCategories : Sized + Copy {
23
24 fn is_other_control(self) -> bool;
27
28 fn is_other_format(self) -> bool;
31
32 fn is_other_private_use(self) -> bool;
35
36 fn is_letter_lowercase(self) -> bool;
39
40 fn is_letter_modifier(self) -> bool;
43
44 fn is_letter_other(self) -> bool;
47
48 fn is_letter_titlecase(self) -> bool;
51
52 fn is_letter_uppercase(self) -> bool;
55
56 fn is_mark_spacing_combining(self) -> bool;
59
60 fn is_mark_enclosing(self) -> bool;
63
64 fn is_mark_nonspacing(self) -> bool;
67
68 fn is_number_decimal_digit(self) -> bool;
71
72 fn is_number_letter(self) -> bool;
75
76 fn is_number_other(self) -> bool;
79
80 fn is_punctuation_connector(self) -> bool;
83
84 fn is_punctuation_dash(self) -> bool;
87
88 fn is_punctuation_close(self) -> bool;
91
92 fn is_punctuation_final_quote(self) -> bool;
95
96 fn is_punctuation_initial_quote(self) -> bool;
99
100 fn is_punctuation_other(self) -> bool;
103
104 fn is_punctuation_open(self) -> bool;
107
108 fn is_symbol_currency(self) -> bool;
111
112 fn is_symbol_modifier(self) -> bool;
115
116 fn is_symbol_math(self) -> bool;
119
120 fn is_symbol_other(self) -> bool;
123
124 fn is_separator_line(self) -> bool;
127
128 fn is_separator_paragraph(self) -> bool;
131
132 fn is_separator_space(self) -> bool;
135
136 #[inline]
141 fn is_other(self) -> bool {
142 self.is_other_control()
143 || self.is_other_format()
144 || self.is_other_private_use()
145 }
146
147 #[inline]
150 fn is_letter(self) -> bool {
151 self.is_letter_lowercase()
152 || self.is_letter_modifier()
153 || self.is_letter_other()
154 || self.is_letter_titlecase()
155 || self.is_letter_uppercase()
156 }
157
158 #[inline]
161 fn is_mark(self) -> bool {
162 self.is_mark_spacing_combining()
163 || self.is_mark_enclosing()
164 || self.is_mark_nonspacing()
165 }
166
167 #[inline]
170 fn is_number(self) -> bool {
171 self.is_number_decimal_digit()
172 || self.is_number_letter()
173 || self.is_number_other()
174 }
175
176 #[inline]
179 fn is_punctuation(self) -> bool {
180 self.is_punctuation_connector()
181 || self.is_punctuation_dash()
182 || self.is_punctuation_close()
183 || self.is_punctuation_close()
184 || self.is_punctuation_final_quote()
185 || self.is_punctuation_initial_quote()
186 || self.is_punctuation_other()
187 || self.is_punctuation_open()
188 }
189
190 #[inline]
193 fn is_symbol(self) -> bool {
194 self.is_symbol_currency()
195 || self.is_symbol_modifier()
196 || self.is_symbol_math()
197 || self.is_symbol_other()
198 }
199
200 #[inline]
203 fn is_separator(self) -> bool {
204 self.is_separator_line()
205 || self.is_separator_paragraph()
206 || self.is_separator_space()
207 }
208}
209
210fn table_binary_search(target: char, table: &'static [char]) -> bool {
211 table.binary_search(&target).is_ok()
212}
213
214impl UnicodeCategories for char {
215 #[inline]
216 fn is_other_control(self) -> bool {
217 table_binary_search(self, tables::OTHER_CONTROL)
218 }
219
220 #[inline]
221 fn is_other_format(self) -> bool {
222 table_binary_search(self, tables::OTHER_FORMAT)
223 }
224
225 #[inline]
226 fn is_other_private_use(self) -> bool {
227 match self {
228 '\u{E000}'...'\u{F8FF}' => true,
230 '\u{F0000}'...'\u{FFFFD}' => true,
232 '\u{100000}'...'\u{10FFFD}' => true,
234 _ => table_binary_search(self, tables::OTHER_PRIVATE_USE)
235 }
236 }
237
238 #[inline]
239 fn is_letter_lowercase(self) -> bool {
240 table_binary_search(self, tables::LETTER_LOWERCASED)
241 }
242
243 #[inline]
244 fn is_letter_modifier(self) -> bool {
245 table_binary_search(self, tables::LETTER_MODIFIER)
246 }
247
248 #[inline]
249 fn is_letter_other(self) -> bool {
250 match self {
251 '\u{3400}'...'\u{4DB5}' => true,
253 '\u{4E00}'...'\u{9FD5}' => true,
255 '\u{AC00}'...'\u{D7A3}' => true,
257 '\u{17000}'...'\u{187EC}' => true,
259 '\u{20000}'...'\u{2A6D6}' => true,
261 '\u{2A700}'...'\u{2B734}' => true,
263 '\u{2B740}'...'\u{2B81D}' => true,
265 '\u{2B820}'...'\u{2CEA1}' => true,
267 _ => table_binary_search(self, tables::LETTER_OTHER)
268 }
269 }
270
271 #[inline]
272 fn is_letter_titlecase(self) -> bool {
273 table_binary_search(self, tables::LETTER_TITLECASE)
274 }
275
276 #[inline]
277 fn is_letter_uppercase(self) -> bool {
278 table_binary_search(self, tables::LETTER_UPPERCASE)
279 }
280
281 #[inline]
282 fn is_mark_spacing_combining(self) -> bool {
283 table_binary_search(self, tables::MARK_SPACE_COMBINING)
284 }
285
286 #[inline]
287 fn is_mark_enclosing(self) -> bool {
288 table_binary_search(self, tables::MARK_ENCLOSING)
289 }
290
291 #[inline]
292 fn is_mark_nonspacing(self) -> bool {
293 table_binary_search(self, tables::MARK_NONSPACING)
294 }
295
296 #[inline]
297 fn is_number_decimal_digit(self) -> bool {
298 table_binary_search(self, tables::NUMBER_DECIMAL_DIGIT)
299 }
300
301 #[inline]
302 fn is_number_letter(self) -> bool {
303 table_binary_search(self, tables::NUMBER_LETTER)
304 }
305
306 #[inline]
307 fn is_number_other(self) -> bool {
308 table_binary_search(self, tables::NUMBER_OTHER)
309 }
310
311 #[inline]
312 fn is_punctuation_connector(self) -> bool {
313 table_binary_search(self, tables::PUNCTUATION_CONNECTOR)
314 }
315
316 #[inline]
317 fn is_punctuation_dash(self) -> bool {
318 table_binary_search(self, tables::PUNCTUATION_DASH)
319 }
320
321 #[inline]
322 fn is_punctuation_close(self) -> bool {
323 table_binary_search(self, tables::PUNCTUATION_CLOSE)
324 }
325
326 #[inline]
327 fn is_punctuation_final_quote(self) -> bool {
328 table_binary_search(self, tables::PUNCTUATION_FINAL_QUOTE)
329 }
330
331 #[inline]
332 fn is_punctuation_initial_quote(self) -> bool {
333 table_binary_search(self, tables::PUNCTUATION_INITIAL_QUOTE)
334 }
335
336 #[inline]
337 fn is_punctuation_other(self) -> bool {
338 table_binary_search(self, tables::PUNCTUATION_OTHER)
339 }
340
341 #[inline]
342 fn is_punctuation_open(self) -> bool {
343 table_binary_search(self, tables::PUNCTUATION_OPEN)
344 }
345
346 #[inline]
347 fn is_symbol_currency(self) -> bool {
348 table_binary_search(self, tables::SYMBOL_CURRENCY)
349 }
350
351 #[inline]
352 fn is_symbol_modifier(self) -> bool {
353 table_binary_search(self, tables::SYMBOL_MODIFIER)
354 }
355
356 #[inline]
357 fn is_symbol_math(self) -> bool {
358 table_binary_search(self, tables::SYMBOL_MATH)
359 }
360
361 #[inline]
362 fn is_symbol_other(self) -> bool {
363 table_binary_search(self, tables::SYMBOL_OTHER)
364 }
365
366 #[inline]
367 fn is_separator_line(self) -> bool {
368 table_binary_search(self, tables::SEPARATOR_LINE)
369 }
370
371 #[inline]
372 fn is_separator_paragraph(self) -> bool {
373 table_binary_search(self, tables::SEPARATOR_PARAGRAPH)
374 }
375
376 #[inline]
377 fn is_separator_space(self) -> bool {
378 table_binary_search(self, tables::SEPARATOR_SPACE)
379 }
380}
381
382#[cfg(test)]
383mod tests {
384 use super::UnicodeCategories;
385
386 #[test]
387 fn is_other_control() {
388 assert!('\0'.is_other_control());
389 assert!('\u{007F}'.is_other_control());
390 assert!(!'f'.is_other_control());
391 }
392
393 #[test]
394 fn is_other_format() {
395 assert!(''.is_other_format());
396 assert!(!'0'.is_other_format());
397 }
398
399 #[test]
400 fn is_other_private_use() {
401 assert!('\u{F8FF}'.is_other_private_use());
402 assert!(!'n'.is_other_private_use())
403 }
404
405 #[test]
406 fn is_letter_lowercase() {
407 assert!('q'.is_letter_lowercase());
408 assert!(!'N'.is_letter_lowercase());
409 }
410
411 #[test]
412 fn is_letter_modifier() {
413 assert!('ˢ'.is_letter_modifier());
414 assert!(!'m'.is_letter_modifier());
415 }
416
417 #[test]
418 fn is_letter_range() {
419 assert!('界'.is_letter_other());
420 }
421}