base/
text.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
4
5pub use crate::unicode_block::{UnicodeBlock, UnicodeBlockMethod};
6
7pub fn is_bidi_control(c: char) -> bool {
8    matches!(c, '\u{202A}'..='\u{202E}' | '\u{2066}'..='\u{2069}' | '\u{200E}' | '\u{200F}' | '\u{061C}')
9}
10
11pub fn unicode_plane(codepoint: char) -> u32 {
12    (codepoint as u32) >> 16
13}
14
15pub fn is_cjk(codepoint: char) -> bool {
16    if let Some(
17        UnicodeBlock::CJKRadicalsSupplement |
18        UnicodeBlock::KangxiRadicals |
19        UnicodeBlock::IdeographicDescriptionCharacters |
20        UnicodeBlock::CJKSymbolsandPunctuation |
21        UnicodeBlock::Hiragana |
22        UnicodeBlock::Katakana |
23        UnicodeBlock::Bopomofo |
24        UnicodeBlock::HangulCompatibilityJamo |
25        UnicodeBlock::Kanbun |
26        UnicodeBlock::BopomofoExtended |
27        UnicodeBlock::CJKStrokes |
28        UnicodeBlock::KatakanaPhoneticExtensions |
29        UnicodeBlock::EnclosedCJKLettersandMonths |
30        UnicodeBlock::CJKCompatibility |
31        UnicodeBlock::CJKUnifiedIdeographsExtensionA |
32        UnicodeBlock::YijingHexagramSymbols |
33        UnicodeBlock::CJKUnifiedIdeographs |
34        UnicodeBlock::CJKCompatibilityIdeographs |
35        UnicodeBlock::CJKCompatibilityForms |
36        UnicodeBlock::HalfwidthandFullwidthForms,
37    ) = codepoint.block()
38    {
39        return true;
40    }
41
42    // https://en.wikipedia.org/wiki/Plane_(Unicode)#Supplementary_Ideographic_Plane
43    // https://en.wikipedia.org/wiki/Plane_(Unicode)#Tertiary_Ideographic_Plane
44    unicode_plane(codepoint) == 2 || unicode_plane(codepoint) == 3
45}
46
47#[test]
48fn test_is_cjk() {
49    // Test characters from different CJK blocks
50    assert_eq!(is_cjk('〇'), true);
51    assert_eq!(is_cjk('㐀'), true);
52    assert_eq!(is_cjk('あ'), true);
53    assert_eq!(is_cjk('ア'), true);
54    assert_eq!(is_cjk('㆒'), true);
55    assert_eq!(is_cjk('ㆣ'), true);
56    assert_eq!(is_cjk('龥'), true);
57    assert_eq!(is_cjk('𰾑'), true);
58    assert_eq!(is_cjk('𰻝'), true);
59
60    // Test characters from outside CJK blocks
61    assert_eq!(is_cjk('a'), false);
62    assert_eq!(is_cjk('🙂'), false);
63    assert_eq!(is_cjk('©'), false);
64}