1use std::iter::Sum;
6use std::ops::{Add, AddAssign, Range, Sub, SubAssign};
7
8use malloc_size_of_derive::MallocSizeOf;
9
10pub use crate::unicode_block::{UnicodeBlock, UnicodeBlockMethod};
11
12pub fn is_bidi_control(c: char) -> bool {
13 matches!(c, '\u{202A}'..='\u{202E}' | '\u{2066}'..='\u{2069}' | '\u{200E}' | '\u{200F}' | '\u{061C}')
14}
15
16pub fn unicode_plane(codepoint: char) -> u32 {
17 (codepoint as u32) >> 16
18}
19
20pub fn is_cjk(codepoint: char) -> bool {
21 if let Some(
22 UnicodeBlock::CJKRadicalsSupplement |
23 UnicodeBlock::KangxiRadicals |
24 UnicodeBlock::IdeographicDescriptionCharacters |
25 UnicodeBlock::CJKSymbolsandPunctuation |
26 UnicodeBlock::Hiragana |
27 UnicodeBlock::Katakana |
28 UnicodeBlock::Bopomofo |
29 UnicodeBlock::HangulCompatibilityJamo |
30 UnicodeBlock::Kanbun |
31 UnicodeBlock::BopomofoExtended |
32 UnicodeBlock::CJKStrokes |
33 UnicodeBlock::KatakanaPhoneticExtensions |
34 UnicodeBlock::EnclosedCJKLettersandMonths |
35 UnicodeBlock::CJKCompatibility |
36 UnicodeBlock::CJKUnifiedIdeographsExtensionA |
37 UnicodeBlock::YijingHexagramSymbols |
38 UnicodeBlock::CJKUnifiedIdeographs |
39 UnicodeBlock::CJKCompatibilityIdeographs |
40 UnicodeBlock::CJKCompatibilityForms |
41 UnicodeBlock::HalfwidthandFullwidthForms,
42 ) = codepoint.block()
43 {
44 return true;
45 }
46
47 unicode_plane(codepoint) == 2 || unicode_plane(codepoint) == 3
50}
51
52macro_rules! unicode_length_type {
53 ($type_name:ident) => {
54 #[derive(Clone, Copy, Debug, Default, Eq, MallocSizeOf, Ord, PartialEq, PartialOrd)]
59 pub struct $type_name(pub usize);
60
61 impl $type_name {
62 pub fn zero() -> Self {
63 Self(0)
64 }
65
66 pub fn one() -> Self {
67 Self(1)
68 }
69
70 pub fn unwrap_range(byte_range: Range<Self>) -> Range<usize> {
71 byte_range.start.0..byte_range.end.0
72 }
73
74 pub fn saturating_sub(self, value: Self) -> Self {
75 Self(self.0.saturating_sub(value.0))
76 }
77 }
78
79 impl From<u32> for $type_name {
80 fn from(value: u32) -> Self {
81 Self(value as usize)
82 }
83 }
84
85 impl From<isize> for $type_name {
86 fn from(value: isize) -> Self {
87 Self(value as usize)
88 }
89 }
90
91 impl Add for $type_name {
92 type Output = Self;
93 fn add(self, other: Self) -> Self {
94 Self(self.0 + other.0)
95 }
96 }
97
98 impl AddAssign for $type_name {
99 fn add_assign(&mut self, other: Self) {
100 *self = Self(self.0 + other.0)
101 }
102 }
103
104 impl Sub for $type_name {
105 type Output = Self;
106 fn sub(self, value: Self) -> Self {
107 Self(self.0 - value.0)
108 }
109 }
110
111 impl SubAssign for $type_name {
112 fn sub_assign(&mut self, other: Self) {
113 *self = Self(self.0 - other.0)
114 }
115 }
116
117 impl Sum for $type_name {
118 fn sum<I: Iterator<Item = Self>>(iter: I) -> Self {
119 iter.fold(Self::zero(), |a, b| Self(a.0 + b.0))
120 }
121 }
122 };
123}
124
125unicode_length_type!(Utf8CodeUnitLength);
126unicode_length_type!(Utf16CodeUnitLength);
127
128#[cfg(test)]
129mod test {
130 use super::*;
131
132 #[test]
133 fn test_is_cjk() {
134 assert_eq!(is_cjk('〇'), true);
136 assert_eq!(is_cjk('㐀'), true);
137 assert_eq!(is_cjk('あ'), true);
138 assert_eq!(is_cjk('ア'), true);
139 assert_eq!(is_cjk('㆒'), true);
140 assert_eq!(is_cjk('ㆣ'), true);
141 assert_eq!(is_cjk('龥'), true);
142 assert_eq!(is_cjk('𰾑'), true);
143 assert_eq!(is_cjk('𰻝'), true);
144
145 assert_eq!(is_cjk('a'), false);
147 assert_eq!(is_cjk('🙂'), false);
148 assert_eq!(is_cjk('©'), false);
149 }
150}