1use std::iter::Sum;
6use std::ops::{Add, AddAssign, Sub, SubAssign};
7
8use malloc_size_of_derive::MallocSizeOf;
9
10pub use crate::unicode_block::{UnicodeBlock, UnicodeBlockMethod};
11
12pub fn is_bidi_control(c: char) -> bool {
13 matches!(c, '\u{202A}'..='\u{202E}' | '\u{2066}'..='\u{2069}' | '\u{200E}' | '\u{200F}' | '\u{061C}')
14}
15
16pub fn unicode_plane(codepoint: char) -> u32 {
17 (codepoint as u32) >> 16
18}
19
20pub fn is_cjk(codepoint: char) -> bool {
21 if let Some(
22 UnicodeBlock::CJKRadicalsSupplement |
23 UnicodeBlock::KangxiRadicals |
24 UnicodeBlock::IdeographicDescriptionCharacters |
25 UnicodeBlock::CJKSymbolsandPunctuation |
26 UnicodeBlock::Hiragana |
27 UnicodeBlock::Katakana |
28 UnicodeBlock::Bopomofo |
29 UnicodeBlock::HangulCompatibilityJamo |
30 UnicodeBlock::Kanbun |
31 UnicodeBlock::BopomofoExtended |
32 UnicodeBlock::CJKStrokes |
33 UnicodeBlock::KatakanaPhoneticExtensions |
34 UnicodeBlock::EnclosedCJKLettersandMonths |
35 UnicodeBlock::CJKCompatibility |
36 UnicodeBlock::CJKUnifiedIdeographsExtensionA |
37 UnicodeBlock::YijingHexagramSymbols |
38 UnicodeBlock::CJKUnifiedIdeographs |
39 UnicodeBlock::CJKCompatibilityIdeographs |
40 UnicodeBlock::CJKCompatibilityForms |
41 UnicodeBlock::HalfwidthandFullwidthForms,
42 ) = codepoint.block()
43 {
44 return true;
45 }
46
47 unicode_plane(codepoint) == 2 || unicode_plane(codepoint) == 3
50}
51
52macro_rules! unicode_length_type {
53 ($type_name:ident) => {
54 #[derive(Clone, Copy, Debug, Default, Eq, MallocSizeOf, Ord, PartialEq, PartialOrd)]
59 pub struct $type_name(pub usize);
60
61 impl $type_name {
62 pub fn zero() -> Self {
63 Self(0)
64 }
65
66 pub fn one() -> Self {
67 Self(1)
68 }
69
70 pub fn saturating_sub(self, value: Self) -> Self {
71 Self(self.0.saturating_sub(value.0))
72 }
73 }
74
75 impl From<u32> for $type_name {
76 fn from(value: u32) -> Self {
77 Self(value as usize)
78 }
79 }
80
81 impl From<isize> for $type_name {
82 fn from(value: isize) -> Self {
83 Self(value as usize)
84 }
85 }
86
87 impl Add for $type_name {
88 type Output = Self;
89 fn add(self, other: Self) -> Self {
90 Self(self.0 + other.0)
91 }
92 }
93
94 impl AddAssign for $type_name {
95 fn add_assign(&mut self, other: Self) {
96 *self = Self(self.0 + other.0)
97 }
98 }
99
100 impl Sub for $type_name {
101 type Output = Self;
102 fn sub(self, value: Self) -> Self {
103 Self(self.0 - value.0)
104 }
105 }
106
107 impl SubAssign for $type_name {
108 fn sub_assign(&mut self, other: Self) {
109 *self = Self(self.0 - other.0)
110 }
111 }
112
113 impl Sum for $type_name {
114 fn sum<I: Iterator<Item = Self>>(iter: I) -> Self {
115 iter.fold(Self::zero(), |a, b| Self(a.0 + b.0))
116 }
117 }
118 };
119}
120
121unicode_length_type!(Utf8CodeUnitLength);
122unicode_length_type!(Utf16CodeUnitLength);
123
124#[cfg(test)]
125mod test {
126 use super::*;
127
128 #[test]
129 fn test_is_cjk() {
130 assert_eq!(is_cjk('〇'), true);
132 assert_eq!(is_cjk('㐀'), true);
133 assert_eq!(is_cjk('あ'), true);
134 assert_eq!(is_cjk('ア'), true);
135 assert_eq!(is_cjk('㆒'), true);
136 assert_eq!(is_cjk('ㆣ'), true);
137 assert_eq!(is_cjk('龥'), true);
138 assert_eq!(is_cjk('𰾑'), true);
139 assert_eq!(is_cjk('𰻝'), true);
140
141 assert_eq!(is_cjk('a'), false);
143 assert_eq!(is_cjk('🙂'), false);
144 assert_eq!(is_cjk('©'), false);
145 }
146}