1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */

pub use crate::unicode_block::{UnicodeBlock, UnicodeBlockMethod};

pub fn is_bidi_control(c: char) -> bool {
    matches!(c, '\u{202A}'..='\u{202E}' | '\u{2066}'..='\u{2069}' | '\u{200E}' | '\u{200F}' | '\u{061C}')
}

pub fn unicode_plane(codepoint: char) -> u32 {
    (codepoint as u32) >> 16
}

pub fn is_cjk(codepoint: char) -> bool {
    if let Some(block) = codepoint.block() {
        match block {
            UnicodeBlock::CJKRadicalsSupplement |
            UnicodeBlock::KangxiRadicals |
            UnicodeBlock::IdeographicDescriptionCharacters |
            UnicodeBlock::CJKSymbolsandPunctuation |
            UnicodeBlock::Hiragana |
            UnicodeBlock::Katakana |
            UnicodeBlock::Bopomofo |
            UnicodeBlock::HangulCompatibilityJamo |
            UnicodeBlock::Kanbun |
            UnicodeBlock::BopomofoExtended |
            UnicodeBlock::CJKStrokes |
            UnicodeBlock::KatakanaPhoneticExtensions |
            UnicodeBlock::EnclosedCJKLettersandMonths |
            UnicodeBlock::CJKCompatibility |
            UnicodeBlock::CJKUnifiedIdeographsExtensionA |
            UnicodeBlock::YijingHexagramSymbols |
            UnicodeBlock::CJKUnifiedIdeographs |
            UnicodeBlock::CJKCompatibilityIdeographs |
            UnicodeBlock::CJKCompatibilityForms |
            UnicodeBlock::HalfwidthandFullwidthForms => return true,
            _ => {},
        }
    }

    // https://en.wikipedia.org/wiki/Plane_(Unicode)#Supplementary_Ideographic_Plane
    // https://en.wikipedia.org/wiki/Plane_(Unicode)#Tertiary_Ideographic_Plane
    unicode_plane(codepoint) == 2 || unicode_plane(codepoint) == 3
}

#[test]
fn test_is_cjk() {
    // Test characters from different CJK blocks
    assert_eq!(is_cjk('〇'), true);
    assert_eq!(is_cjk('㐀'), true);
    assert_eq!(is_cjk('あ'), true);
    assert_eq!(is_cjk('ア'), true);
    assert_eq!(is_cjk('㆒'), true);
    assert_eq!(is_cjk('ㆣ'), true);
    assert_eq!(is_cjk('龥'), true);
    assert_eq!(is_cjk('𰾑'), true);
    assert_eq!(is_cjk('𰻝'), true);

    // Test characters from outside CJK blocks
    assert_eq!(is_cjk('a'), false);
    assert_eq!(is_cjk('🙂'), false);
    assert_eq!(is_cjk('©'), false);
}