1use core::convert::TryFrom;
2
3pub use unicode_ccc::CanonicalCombiningClass;
4pub use unicode_properties::GeneralCategory as hb_unicode_general_category_t;
6
7use crate::Script;
8
9pub mod hb_unicode_funcs_t {
13 pub type space_t = u8;
14 pub const NOT_SPACE: u8 = 0;
15 pub const SPACE_EM: u8 = 1;
16 pub const SPACE_EM_2: u8 = 2;
17 pub const SPACE_EM_3: u8 = 3;
18 pub const SPACE_EM_4: u8 = 4;
19 pub const SPACE_EM_5: u8 = 5;
20 pub const SPACE_EM_6: u8 = 6;
21 pub const SPACE_EM_16: u8 = 16;
22 pub const SPACE_4_EM_18: u8 = 17; pub const SPACE: u8 = 18;
24 pub const SPACE_FIGURE: u8 = 19;
25 pub const SPACE_PUNCTUATION: u8 = 20;
26 pub const SPACE_NARROW: u8 = 21;
27}
28
29#[allow(dead_code)]
30pub mod modified_combining_class {
31 pub const CCC10: u8 = 22; pub const CCC11: u8 = 15; pub const CCC12: u8 = 16; pub const CCC13: u8 = 17; pub const CCC14: u8 = 23; pub const CCC15: u8 = 18; pub const CCC16: u8 = 19; pub const CCC17: u8 = 20; pub const CCC18: u8 = 21; pub const CCC19: u8 = 14; pub const CCC20: u8 = 24; pub const CCC21: u8 = 12; pub const CCC22: u8 = 25; pub const CCC23: u8 = 13; pub const CCC24: u8 = 10; pub const CCC25: u8 = 11; pub const CCC26: u8 = 26; pub const CCC27: u8 = 28; pub const CCC28: u8 = 29; pub const CCC29: u8 = 30; pub const CCC30: u8 = 31; pub const CCC31: u8 = 32; pub const CCC32: u8 = 33; pub const CCC33: u8 = 27; pub const CCC34: u8 = 34; pub const CCC35: u8 = 35; pub const CCC36: u8 = 36; pub const CCC84: u8 = 0; pub const CCC91: u8 = 0; pub const CCC103: u8 = 3; pub const CCC107: u8 = 107; pub const CCC118: u8 = 118; pub const CCC122: u8 = 122; pub const CCC129: u8 = 129; pub const CCC130: u8 = 132; pub const CCC132: u8 = 131; }
108
109#[rustfmt::skip]
110const MODIFIED_COMBINING_CLASS: &[u8; 256] = &[
111 CanonicalCombiningClass::NotReordered as u8,
112 CanonicalCombiningClass::Overlay as u8,
113 2, 3, 4, 5, 6,
114 CanonicalCombiningClass::Nukta as u8,
115 CanonicalCombiningClass::KanaVoicing as u8,
116 CanonicalCombiningClass::Virama as u8,
117
118 modified_combining_class::CCC10,
120 modified_combining_class::CCC11,
121 modified_combining_class::CCC12,
122 modified_combining_class::CCC13,
123 modified_combining_class::CCC14,
124 modified_combining_class::CCC15,
125 modified_combining_class::CCC16,
126 modified_combining_class::CCC17,
127 modified_combining_class::CCC18,
128 modified_combining_class::CCC19,
129 modified_combining_class::CCC20,
130 modified_combining_class::CCC21,
131 modified_combining_class::CCC22,
132 modified_combining_class::CCC23,
133 modified_combining_class::CCC24,
134 modified_combining_class::CCC25,
135 modified_combining_class::CCC26,
136
137 modified_combining_class::CCC27,
139 modified_combining_class::CCC28,
140 modified_combining_class::CCC29,
141 modified_combining_class::CCC30,
142 modified_combining_class::CCC31,
143 modified_combining_class::CCC32,
144 modified_combining_class::CCC33,
145 modified_combining_class::CCC34,
146 modified_combining_class::CCC35,
147
148 modified_combining_class::CCC36,
150
151 37, 38, 39,
152 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
153 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
154 80, 81, 82, 83,
155
156 modified_combining_class::CCC84,
158 85, 86, 87, 88, 89, 90,
159 modified_combining_class::CCC91,
160 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102,
161
162 modified_combining_class::CCC103,
164 104, 105, 106,
165 modified_combining_class::CCC107,
166 108, 109, 110, 111, 112, 113, 114, 115, 116, 117,
167
168 modified_combining_class::CCC118,
170 119, 120, 121,
171 modified_combining_class::CCC122,
172 123, 124, 125, 126, 127, 128,
173
174 modified_combining_class::CCC129,
176 modified_combining_class::CCC130,
177 131,
178 modified_combining_class::CCC132,
179 133, 134, 135, 136, 137, 138, 139,
180
181
182 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
183 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
184 160, 161, 162, 163, 164, 165, 166, 167, 168, 169,
185 170, 171, 172, 173, 174, 175, 176, 177, 178, 179,
186 180, 181, 182, 183, 184, 185, 186, 187, 188, 189,
187 190, 191, 192, 193, 194, 195, 196, 197, 198, 199,
188
189 CanonicalCombiningClass::AttachedBelowLeft as u8,
190 201,
191 CanonicalCombiningClass::AttachedBelow as u8,
192 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213,
193 CanonicalCombiningClass::AttachedAbove as u8,
194 215,
195 CanonicalCombiningClass::AttachedAboveRight as u8,
196 217,
197 CanonicalCombiningClass::BelowLeft as u8,
198 219,
199 CanonicalCombiningClass::Below as u8,
200 221,
201 CanonicalCombiningClass::BelowRight as u8,
202 223,
203 CanonicalCombiningClass::Left as u8,
204 225,
205 CanonicalCombiningClass::Right as u8,
206 227,
207 CanonicalCombiningClass::AboveLeft as u8,
208 229,
209 CanonicalCombiningClass::Above as u8,
210 231,
211 CanonicalCombiningClass::AboveRight as u8,
212 CanonicalCombiningClass::DoubleBelow as u8,
213 CanonicalCombiningClass::DoubleAbove as u8,
214 235, 236, 237, 238, 239,
215 CanonicalCombiningClass::IotaSubscript as u8,
216 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254,
217 255, ];
219
220pub trait GeneralCategoryExt {
221 fn to_rb(&self) -> u32;
222 fn from_rb(gc: u32) -> Self;
223 fn is_mark(&self) -> bool;
224 fn is_letter(&self) -> bool;
225}
226
227#[rustfmt::skip]
228impl GeneralCategoryExt for hb_unicode_general_category_t {
229 fn to_rb(&self) -> u32 {
230 match *self {
231 hb_unicode_general_category_t::ClosePunctuation => hb_gc::RB_UNICODE_GENERAL_CATEGORY_CLOSE_PUNCTUATION,
232 hb_unicode_general_category_t::ConnectorPunctuation => hb_gc::RB_UNICODE_GENERAL_CATEGORY_CONNECT_PUNCTUATION,
233 hb_unicode_general_category_t::Control => hb_gc::RB_UNICODE_GENERAL_CATEGORY_CONTROL,
234 hb_unicode_general_category_t::CurrencySymbol => hb_gc::RB_UNICODE_GENERAL_CATEGORY_CURRENCY_SYMBOL,
235 hb_unicode_general_category_t::DashPunctuation => hb_gc::RB_UNICODE_GENERAL_CATEGORY_DASH_PUNCTUATION,
236 hb_unicode_general_category_t::DecimalNumber => hb_gc::RB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER,
237 hb_unicode_general_category_t::EnclosingMark => hb_gc::RB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK,
238 hb_unicode_general_category_t::FinalPunctuation => hb_gc::RB_UNICODE_GENERAL_CATEGORY_FINAL_PUNCTUATION,
239 hb_unicode_general_category_t::Format => hb_gc::RB_UNICODE_GENERAL_CATEGORY_FORMAT,
240 hb_unicode_general_category_t::InitialPunctuation => hb_gc::RB_UNICODE_GENERAL_CATEGORY_INITIAL_PUNCTUATION,
241 hb_unicode_general_category_t::LetterNumber => hb_gc::RB_UNICODE_GENERAL_CATEGORY_LETTER_NUMBER,
242 hb_unicode_general_category_t::LineSeparator => hb_gc::RB_UNICODE_GENERAL_CATEGORY_LINE_SEPARATOR,
243 hb_unicode_general_category_t::LowercaseLetter => hb_gc::RB_UNICODE_GENERAL_CATEGORY_LOWERCASE_LETTER,
244 hb_unicode_general_category_t::MathSymbol => hb_gc::RB_UNICODE_GENERAL_CATEGORY_MATH_SYMBOL,
245 hb_unicode_general_category_t::ModifierLetter => hb_gc::RB_UNICODE_GENERAL_CATEGORY_MODIFIER_LETTER,
246 hb_unicode_general_category_t::ModifierSymbol => hb_gc::RB_UNICODE_GENERAL_CATEGORY_MODIFIER_SYMBOL,
247 hb_unicode_general_category_t::NonspacingMark => hb_gc::RB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK,
248 hb_unicode_general_category_t::OpenPunctuation => hb_gc::RB_UNICODE_GENERAL_CATEGORY_OPEN_PUNCTUATION,
249 hb_unicode_general_category_t::OtherLetter => hb_gc::RB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER,
250 hb_unicode_general_category_t::OtherNumber => hb_gc::RB_UNICODE_GENERAL_CATEGORY_OTHER_NUMBER,
251 hb_unicode_general_category_t::OtherPunctuation => hb_gc::RB_UNICODE_GENERAL_CATEGORY_OTHER_PUNCTUATION,
252 hb_unicode_general_category_t::OtherSymbol => hb_gc::RB_UNICODE_GENERAL_CATEGORY_OTHER_SYMBOL,
253 hb_unicode_general_category_t::ParagraphSeparator => hb_gc::RB_UNICODE_GENERAL_CATEGORY_PARAGRAPH_SEPARATOR,
254 hb_unicode_general_category_t::PrivateUse => hb_gc::RB_UNICODE_GENERAL_CATEGORY_PRIVATE_USE,
255 hb_unicode_general_category_t::SpaceSeparator => hb_gc::RB_UNICODE_GENERAL_CATEGORY_SPACE_SEPARATOR,
256 hb_unicode_general_category_t::SpacingMark => hb_gc::RB_UNICODE_GENERAL_CATEGORY_SPACING_MARK,
257 hb_unicode_general_category_t::Surrogate => hb_gc::RB_UNICODE_GENERAL_CATEGORY_SURROGATE,
258 hb_unicode_general_category_t::TitlecaseLetter => hb_gc::RB_UNICODE_GENERAL_CATEGORY_TITLECASE_LETTER,
259 hb_unicode_general_category_t::Unassigned => hb_gc::RB_UNICODE_GENERAL_CATEGORY_UNASSIGNED,
260 hb_unicode_general_category_t::UppercaseLetter => hb_gc::RB_UNICODE_GENERAL_CATEGORY_UPPERCASE_LETTER
261 }
262 }
263
264 fn from_rb(gc: u32) -> Self {
265 match gc {
266 hb_gc::RB_UNICODE_GENERAL_CATEGORY_CLOSE_PUNCTUATION => hb_unicode_general_category_t::ClosePunctuation,
267 hb_gc::RB_UNICODE_GENERAL_CATEGORY_CONNECT_PUNCTUATION => hb_unicode_general_category_t::ConnectorPunctuation,
268 hb_gc::RB_UNICODE_GENERAL_CATEGORY_CONTROL => hb_unicode_general_category_t::Control,
269 hb_gc::RB_UNICODE_GENERAL_CATEGORY_CURRENCY_SYMBOL => hb_unicode_general_category_t::CurrencySymbol,
270 hb_gc::RB_UNICODE_GENERAL_CATEGORY_DASH_PUNCTUATION => hb_unicode_general_category_t::DashPunctuation,
271 hb_gc::RB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER => hb_unicode_general_category_t::DecimalNumber,
272 hb_gc::RB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK => hb_unicode_general_category_t::EnclosingMark,
273 hb_gc::RB_UNICODE_GENERAL_CATEGORY_FINAL_PUNCTUATION => hb_unicode_general_category_t::FinalPunctuation,
274 hb_gc::RB_UNICODE_GENERAL_CATEGORY_FORMAT => hb_unicode_general_category_t::Format,
275 hb_gc::RB_UNICODE_GENERAL_CATEGORY_INITIAL_PUNCTUATION => hb_unicode_general_category_t::InitialPunctuation,
276 hb_gc::RB_UNICODE_GENERAL_CATEGORY_LETTER_NUMBER => hb_unicode_general_category_t::LetterNumber,
277 hb_gc::RB_UNICODE_GENERAL_CATEGORY_LINE_SEPARATOR => hb_unicode_general_category_t::LineSeparator,
278 hb_gc::RB_UNICODE_GENERAL_CATEGORY_LOWERCASE_LETTER => hb_unicode_general_category_t::LowercaseLetter,
279 hb_gc::RB_UNICODE_GENERAL_CATEGORY_MATH_SYMBOL => hb_unicode_general_category_t::MathSymbol,
280 hb_gc::RB_UNICODE_GENERAL_CATEGORY_MODIFIER_LETTER => hb_unicode_general_category_t::ModifierLetter,
281 hb_gc::RB_UNICODE_GENERAL_CATEGORY_MODIFIER_SYMBOL => hb_unicode_general_category_t::ModifierSymbol,
282 hb_gc::RB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK => hb_unicode_general_category_t::NonspacingMark,
283 hb_gc::RB_UNICODE_GENERAL_CATEGORY_OPEN_PUNCTUATION => hb_unicode_general_category_t::OpenPunctuation,
284 hb_gc::RB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER => hb_unicode_general_category_t::OtherLetter,
285 hb_gc::RB_UNICODE_GENERAL_CATEGORY_OTHER_NUMBER => hb_unicode_general_category_t::OtherNumber,
286 hb_gc::RB_UNICODE_GENERAL_CATEGORY_OTHER_PUNCTUATION => hb_unicode_general_category_t::OtherPunctuation,
287 hb_gc::RB_UNICODE_GENERAL_CATEGORY_OTHER_SYMBOL => hb_unicode_general_category_t::OtherSymbol,
288 hb_gc::RB_UNICODE_GENERAL_CATEGORY_PARAGRAPH_SEPARATOR => hb_unicode_general_category_t::ParagraphSeparator,
289 hb_gc::RB_UNICODE_GENERAL_CATEGORY_PRIVATE_USE => hb_unicode_general_category_t::PrivateUse,
290 hb_gc::RB_UNICODE_GENERAL_CATEGORY_SPACE_SEPARATOR => hb_unicode_general_category_t::SpaceSeparator,
291 hb_gc::RB_UNICODE_GENERAL_CATEGORY_SPACING_MARK => hb_unicode_general_category_t::SpacingMark,
292 hb_gc::RB_UNICODE_GENERAL_CATEGORY_SURROGATE => hb_unicode_general_category_t::Surrogate,
293 hb_gc::RB_UNICODE_GENERAL_CATEGORY_TITLECASE_LETTER => hb_unicode_general_category_t::TitlecaseLetter,
294 hb_gc::RB_UNICODE_GENERAL_CATEGORY_UNASSIGNED => hb_unicode_general_category_t::Unassigned,
295 hb_gc::RB_UNICODE_GENERAL_CATEGORY_UPPERCASE_LETTER => hb_unicode_general_category_t::UppercaseLetter,
296 _ => unreachable!()
297 }
298 }
299
300 fn is_mark(&self) -> bool {
301 matches!(*self,
302 hb_unicode_general_category_t::SpacingMark |
303 hb_unicode_general_category_t::EnclosingMark |
304 hb_unicode_general_category_t::NonspacingMark)
305 }
306
307 fn is_letter(&self) -> bool {
308 matches!(*self,
309 hb_unicode_general_category_t::LowercaseLetter |
310 hb_unicode_general_category_t::ModifierLetter |
311 hb_unicode_general_category_t::OtherLetter |
312 hb_unicode_general_category_t::TitlecaseLetter |
313 hb_unicode_general_category_t::UppercaseLetter)
314 }
315}
316
317pub trait CharExt {
318 fn script(self) -> Script;
319 fn general_category(self) -> hb_unicode_general_category_t;
320 fn space_fallback(self) -> hb_unicode_funcs_t::space_t;
321 fn modified_combining_class(self) -> u8;
322 fn mirrored(self) -> Option<char>;
323 fn is_emoji_extended_pictographic(self) -> bool;
324 fn is_default_ignorable(self) -> bool;
325 fn is_variation_selector(self) -> bool;
326 fn vertical(self) -> Option<char>;
327}
328
329impl CharExt for char {
330 fn script(self) -> Script {
331 use crate::script;
332 use unicode_script as us;
333
334 match unicode_script::UnicodeScript::script(&self) {
335 us::Script::Common => script::COMMON,
336 us::Script::Inherited => script::INHERITED,
337 us::Script::Adlam => script::ADLAM,
338 us::Script::Ahom => script::AHOM,
339 us::Script::Anatolian_Hieroglyphs => script::ANATOLIAN_HIEROGLYPHS,
340 us::Script::Arabic => script::ARABIC,
341 us::Script::Armenian => script::ARMENIAN,
342 us::Script::Avestan => script::AVESTAN,
343 us::Script::Balinese => script::BALINESE,
344 us::Script::Bamum => script::BAMUM,
345 us::Script::Bassa_Vah => script::BASSA_VAH,
346 us::Script::Batak => script::BATAK,
347 us::Script::Bengali => script::BENGALI,
348 us::Script::Bhaiksuki => script::BHAIKSUKI,
349 us::Script::Bopomofo => script::BOPOMOFO,
350 us::Script::Brahmi => script::BRAHMI,
351 us::Script::Braille => script::BRAILLE,
352 us::Script::Buginese => script::BUGINESE,
353 us::Script::Buhid => script::BUHID,
354 us::Script::Canadian_Aboriginal => script::CANADIAN_SYLLABICS,
355 us::Script::Carian => script::CARIAN,
356 us::Script::Caucasian_Albanian => script::CAUCASIAN_ALBANIAN,
357 us::Script::Chakma => script::CHAKMA,
358 us::Script::Cham => script::CHAM,
359 us::Script::Cherokee => script::CHEROKEE,
360 us::Script::Chorasmian => script::CHORASMIAN,
361 us::Script::Coptic => script::COPTIC,
362 us::Script::Cuneiform => script::CUNEIFORM,
363 us::Script::Cypriot => script::CYPRIOT,
364 us::Script::Cyrillic => script::CYRILLIC,
365 us::Script::Deseret => script::DESERET,
366 us::Script::Devanagari => script::DEVANAGARI,
367 us::Script::Dives_Akuru => script::DIVES_AKURU,
368 us::Script::Dogra => script::DOGRA,
369 us::Script::Duployan => script::DUPLOYAN,
370 us::Script::Egyptian_Hieroglyphs => script::EGYPTIAN_HIEROGLYPHS,
371 us::Script::Elbasan => script::ELBASAN,
372 us::Script::Elymaic => script::ELYMAIC,
373 us::Script::Ethiopic => script::ETHIOPIC,
374 us::Script::Georgian => script::GEORGIAN,
375 us::Script::Glagolitic => script::GLAGOLITIC,
376 us::Script::Gothic => script::GOTHIC,
377 us::Script::Grantha => script::GRANTHA,
378 us::Script::Greek => script::GREEK,
379 us::Script::Gujarati => script::GUJARATI,
380 us::Script::Gunjala_Gondi => script::GUNJALA_GONDI,
381 us::Script::Gurmukhi => script::GURMUKHI,
382 us::Script::Han => script::HAN,
383 us::Script::Hangul => script::HANGUL,
384 us::Script::Hanifi_Rohingya => script::HANIFI_ROHINGYA,
385 us::Script::Hanunoo => script::HANUNOO,
386 us::Script::Hatran => script::HATRAN,
387 us::Script::Hebrew => script::HEBREW,
388 us::Script::Hiragana => script::HIRAGANA,
389 us::Script::Imperial_Aramaic => script::IMPERIAL_ARAMAIC,
390 us::Script::Inscriptional_Pahlavi => script::INSCRIPTIONAL_PAHLAVI,
391 us::Script::Inscriptional_Parthian => script::INSCRIPTIONAL_PARTHIAN,
392 us::Script::Javanese => script::JAVANESE,
393 us::Script::Kaithi => script::KAITHI,
394 us::Script::Kannada => script::KANNADA,
395 us::Script::Katakana => script::KATAKANA,
396 us::Script::Kayah_Li => script::KAYAH_LI,
397 us::Script::Kharoshthi => script::KHAROSHTHI,
398 us::Script::Khitan_Small_Script => script::KHITAN_SMALL_SCRIPT,
399 us::Script::Khmer => script::KHMER,
400 us::Script::Khojki => script::KHOJKI,
401 us::Script::Khudawadi => script::KHUDAWADI,
402 us::Script::Lao => script::LAO,
403 us::Script::Latin => script::LATIN,
404 us::Script::Lepcha => script::LEPCHA,
405 us::Script::Limbu => script::LIMBU,
406 us::Script::Linear_A => script::LINEAR_A,
407 us::Script::Linear_B => script::LINEAR_B,
408 us::Script::Lisu => script::LISU,
409 us::Script::Lycian => script::LYCIAN,
410 us::Script::Lydian => script::LYDIAN,
411 us::Script::Mahajani => script::MAHAJANI,
412 us::Script::Makasar => script::MAKASAR,
413 us::Script::Malayalam => script::MALAYALAM,
414 us::Script::Mandaic => script::MANDAIC,
415 us::Script::Manichaean => script::MANICHAEAN,
416 us::Script::Marchen => script::MARCHEN,
417 us::Script::Masaram_Gondi => script::MASARAM_GONDI,
418 us::Script::Medefaidrin => script::MEDEFAIDRIN,
419 us::Script::Meetei_Mayek => script::MEETEI_MAYEK,
420 us::Script::Mende_Kikakui => script::MENDE_KIKAKUI,
421 us::Script::Meroitic_Cursive => script::MEROITIC_CURSIVE,
422 us::Script::Meroitic_Hieroglyphs => script::MEROITIC_HIEROGLYPHS,
423 us::Script::Miao => script::MIAO,
424 us::Script::Modi => script::MODI,
425 us::Script::Mongolian => script::MONGOLIAN,
426 us::Script::Mro => script::MRO,
427 us::Script::Multani => script::MULTANI,
428 us::Script::Myanmar => script::MYANMAR,
429 us::Script::Nabataean => script::NABATAEAN,
430 us::Script::Nandinagari => script::NANDINAGARI,
431 us::Script::New_Tai_Lue => script::NEW_TAI_LUE,
432 us::Script::Newa => script::NEWA,
433 us::Script::Nko => script::NKO,
434 us::Script::Nushu => script::NUSHU,
435 us::Script::Nyiakeng_Puachue_Hmong => script::NYIAKENG_PUACHUE_HMONG,
436 us::Script::Ogham => script::OGHAM,
437 us::Script::Ol_Chiki => script::OL_CHIKI,
438 us::Script::Old_Hungarian => script::OLD_HUNGARIAN,
439 us::Script::Old_Italic => script::OLD_ITALIC,
440 us::Script::Old_North_Arabian => script::OLD_NORTH_ARABIAN,
441 us::Script::Old_Permic => script::OLD_PERMIC,
442 us::Script::Old_Persian => script::OLD_PERSIAN,
443 us::Script::Old_Sogdian => script::OLD_SOGDIAN,
444 us::Script::Old_South_Arabian => script::OLD_SOUTH_ARABIAN,
445 us::Script::Old_Turkic => script::OLD_TURKIC,
446 us::Script::Oriya => script::ORIYA,
447 us::Script::Osage => script::OSAGE,
448 us::Script::Osmanya => script::OSMANYA,
449 us::Script::Pahawh_Hmong => script::PAHAWH_HMONG,
450 us::Script::Palmyrene => script::PALMYRENE,
451 us::Script::Pau_Cin_Hau => script::PAU_CIN_HAU,
452 us::Script::Phags_Pa => script::PHAGS_PA,
453 us::Script::Phoenician => script::PHOENICIAN,
454 us::Script::Psalter_Pahlavi => script::PSALTER_PAHLAVI,
455 us::Script::Rejang => script::REJANG,
456 us::Script::Runic => script::RUNIC,
457 us::Script::Samaritan => script::SAMARITAN,
458 us::Script::Saurashtra => script::SAURASHTRA,
459 us::Script::Sharada => script::SHARADA,
460 us::Script::Shavian => script::SHAVIAN,
461 us::Script::Siddham => script::SIDDHAM,
462 us::Script::SignWriting => script::SIGNWRITING,
463 us::Script::Sinhala => script::SINHALA,
464 us::Script::Sogdian => script::SOGDIAN,
465 us::Script::Sora_Sompeng => script::SORA_SOMPENG,
466 us::Script::Soyombo => script::SOYOMBO,
467 us::Script::Sundanese => script::SUNDANESE,
468 us::Script::Syloti_Nagri => script::SYLOTI_NAGRI,
469 us::Script::Syriac => script::SYRIAC,
470 us::Script::Tagalog => script::TAGALOG,
471 us::Script::Tagbanwa => script::TAGBANWA,
472 us::Script::Tai_Le => script::TAI_LE,
473 us::Script::Tai_Tham => script::TAI_THAM,
474 us::Script::Tai_Viet => script::TAI_VIET,
475 us::Script::Takri => script::TAKRI,
476 us::Script::Tamil => script::TAMIL,
477 us::Script::Tangut => script::TANGUT,
478 us::Script::Telugu => script::TELUGU,
479 us::Script::Thaana => script::THAANA,
480 us::Script::Thai => script::THAI,
481 us::Script::Tibetan => script::TIBETAN,
482 us::Script::Tifinagh => script::TIFINAGH,
483 us::Script::Tirhuta => script::TIRHUTA,
484 us::Script::Ugaritic => script::UGARITIC,
485 us::Script::Vai => script::VAI,
486 us::Script::Wancho => script::WANCHO,
487 us::Script::Warang_Citi => script::WARANG_CITI,
488 us::Script::Yezidi => script::YEZIDI,
489 us::Script::Yi => script::YI,
490 us::Script::Zanabazar_Square => script::ZANABAZAR_SQUARE,
491 _ => script::UNKNOWN,
492 }
493 }
494
495 fn general_category(self) -> hb_unicode_general_category_t {
496 unicode_properties::general_category::UnicodeGeneralCategory::general_category(self)
497 }
498
499 fn space_fallback(self) -> hb_unicode_funcs_t::space_t {
500 use hb_unicode_funcs_t::*;
501
502 match self {
504 '\u{0020}' => SPACE, '\u{00A0}' => SPACE, '\u{2000}' => SPACE_EM_2, '\u{2001}' => SPACE_EM, '\u{2002}' => SPACE_EM_2, '\u{2003}' => SPACE_EM, '\u{2004}' => SPACE_EM_3, '\u{2005}' => SPACE_EM_4, '\u{2006}' => SPACE_EM_6, '\u{2007}' => SPACE_FIGURE, '\u{2008}' => SPACE_PUNCTUATION, '\u{2009}' => SPACE_EM_5, '\u{200A}' => SPACE_EM_16, '\u{202F}' => SPACE_NARROW, '\u{205F}' => SPACE_4_EM_18, '\u{3000}' => SPACE_EM, _ => NOT_SPACE, }
522 }
523
524 fn modified_combining_class(self) -> u8 {
525 let u = self;
526
527 if u == '\u{1A60}' {
529 return 254;
530 }
531
532 if u == '\u{0FC6}' {
534 return 254;
535 }
536
537 if u == '\u{0F39}' {
539 return 127;
540 }
541
542 let k = unicode_ccc::get_canonical_combining_class(u);
543 MODIFIED_COMBINING_CLASS[k as usize]
544 }
545
546 fn mirrored(self) -> Option<char> {
547 unicode_bidi_mirroring::get_mirrored(self)
548 }
549
550 fn is_emoji_extended_pictographic(self) -> bool {
551 match self as u32 {
553 0x00A9 => true,
554 0x00AE => true,
555 0x203C => true,
556 0x2049 => true,
557 0x2122 => true,
558 0x2139 => true,
559 0x2194..=0x2199 => true,
560 0x21A9..=0x21AA => true,
561 0x231A..=0x231B => true,
562 0x2328 => true,
563 0x2388 => true,
564 0x23CF => true,
565 0x23E9..=0x23F3 => true,
566 0x23F8..=0x23FA => true,
567 0x24C2 => true,
568 0x25AA..=0x25AB => true,
569 0x25B6 => true,
570 0x25C0 => true,
571 0x25FB..=0x25FE => true,
572 0x2600..=0x2605 => true,
573 0x2607..=0x2612 => true,
574 0x2614..=0x2685 => true,
575 0x2690..=0x2705 => true,
576 0x2708..=0x2712 => true,
577 0x2714 => true,
578 0x2716 => true,
579 0x271D => true,
580 0x2721 => true,
581 0x2728 => true,
582 0x2733..=0x2734 => true,
583 0x2744 => true,
584 0x2747 => true,
585 0x274C => true,
586 0x274E => true,
587 0x2753..=0x2755 => true,
588 0x2757 => true,
589 0x2763..=0x2767 => true,
590 0x2795..=0x2797 => true,
591 0x27A1 => true,
592 0x27B0 => true,
593 0x27BF => true,
594 0x2934..=0x2935 => true,
595 0x2B05..=0x2B07 => true,
596 0x2B1B..=0x2B1C => true,
597 0x2B50 => true,
598 0x2B55 => true,
599 0x3030 => true,
600 0x303D => true,
601 0x3297 => true,
602 0x3299 => true,
603 0x1F000..=0x1F0FF => true,
604 0x1F10D..=0x1F10F => true,
605 0x1F12F => true,
606 0x1F16C..=0x1F171 => true,
607 0x1F17E..=0x1F17F => true,
608 0x1F18E => true,
609 0x1F191..=0x1F19A => true,
610 0x1F1AD..=0x1F1E5 => true,
611 0x1F201..=0x1F20F => true,
612 0x1F21A => true,
613 0x1F22F => true,
614 0x1F232..=0x1F23A => true,
615 0x1F23C..=0x1F23F => true,
616 0x1F249..=0x1F3FA => true,
617 0x1F400..=0x1F53D => true,
618 0x1F546..=0x1F64F => true,
619 0x1F680..=0x1F6FF => true,
620 0x1F774..=0x1F77F => true,
621 0x1F7D5..=0x1F7FF => true,
622 0x1F80C..=0x1F80F => true,
623 0x1F848..=0x1F84F => true,
624 0x1F85A..=0x1F85F => true,
625 0x1F888..=0x1F88F => true,
626 0x1F8AE..=0x1F8FF => true,
627 0x1F90C..=0x1F93A => true,
628 0x1F93C..=0x1F945 => true,
629 0x1F947..=0x1FAFF => true,
630 0x1FC00..=0x1FFFD => true,
631 _ => false,
632 }
633 }
634
635 fn is_default_ignorable(self) -> bool {
673 let ch = u32::from(self);
674 let plane = ch >> 16;
675 if plane == 0 {
676 let page = ch >> 8;
678 match page {
679 0x00 => ch == 0x00AD,
680 0x03 => ch == 0x034F,
681 0x06 => ch == 0x061C,
682 0x17 => (0x17B4..=0x17B5).contains(&ch),
683 0x18 => (0x180B..=0x180E).contains(&ch),
684 0x20 => {
685 (0x200B..=0x200F).contains(&ch)
686 || (0x202A..=0x202E).contains(&ch)
687 || (0x2060..=0x206F).contains(&ch)
688 }
689 0xFE => (0xFE00..=0xFE0F).contains(&ch) || ch == 0xFEFF,
690 0xFF => (0xFFF0..=0xFFF8).contains(&ch),
691 _ => false,
692 }
693 } else {
694 match plane {
696 0x01 => (0x1D173..=0x1D17A).contains(&ch),
697 0x0E => (0xE0000..=0xE0FFF).contains(&ch),
698 _ => false,
699 }
700 }
701 }
702
703 fn is_variation_selector(self) -> bool {
704 let ch = u32::from(self);
707 (0x0FE00..=0x0FE0F).contains(&ch) || (0xE0100..=0xE01EF).contains(&ch) }
710
711 fn vertical(self) -> Option<char> {
712 Some(match u32::from(self) >> 8 {
713 0x20 => match self {
714 '\u{2013}' => '\u{fe32}', '\u{2014}' => '\u{fe31}', '\u{2025}' => '\u{fe30}', '\u{2026}' => '\u{fe19}', _ => return None,
719 },
720 0x30 => match self {
721 '\u{3001}' => '\u{fe11}', '\u{3002}' => '\u{fe12}', '\u{3008}' => '\u{fe3f}', '\u{3009}' => '\u{fe40}', '\u{300a}' => '\u{fe3d}', '\u{300b}' => '\u{fe3e}', '\u{300c}' => '\u{fe41}', '\u{300d}' => '\u{fe42}', '\u{300e}' => '\u{fe43}', '\u{300f}' => '\u{fe44}', '\u{3010}' => '\u{fe3b}', '\u{3011}' => '\u{fe3c}', '\u{3014}' => '\u{fe39}', '\u{3015}' => '\u{fe3a}', '\u{3016}' => '\u{fe17}', '\u{3017}' => '\u{fe18}', _ => return None,
738 },
739 0xfe => match self {
740 '\u{fe4f}' => '\u{fe34}', _ => return None,
742 },
743 0xff => match self {
744 '\u{ff01}' => '\u{fe15}', '\u{ff08}' => '\u{fe35}', '\u{ff09}' => '\u{fe36}', '\u{ff0c}' => '\u{fe10}', '\u{ff1a}' => '\u{fe13}', '\u{ff1b}' => '\u{fe14}', '\u{ff1f}' => '\u{fe16}', '\u{ff3b}' => '\u{fe47}', '\u{ff3d}' => '\u{fe48}', '\u{ff3f}' => '\u{fe33}', '\u{ff5b}' => '\u{fe37}', '\u{ff5d}' => '\u{fe38}', _ => return None,
757 },
758 _ => return None,
759 })
760 }
761}
762
763const S_BASE: u32 = 0xAC00;
764const L_BASE: u32 = 0x1100;
765const V_BASE: u32 = 0x1161;
766const T_BASE: u32 = 0x11A7;
767const L_COUNT: u32 = 19;
768const V_COUNT: u32 = 21;
769const T_COUNT: u32 = 28;
770const N_COUNT: u32 = V_COUNT * T_COUNT;
771const S_COUNT: u32 = L_COUNT * N_COUNT;
772
773pub fn compose(a: char, b: char) -> Option<char> {
774 if let Some(ab) = compose_hangul(a, b) {
775 return Some(ab);
776 }
777
778 let needle = (a as u64) << 32 | (b as u64);
779 super::unicode_norm::COMPOSITION_TABLE
780 .binary_search_by(|item| item.0.cmp(&needle))
781 .map(|idx| super::unicode_norm::COMPOSITION_TABLE[idx].1)
782 .ok()
783}
784
785fn compose_hangul(a: char, b: char) -> Option<char> {
786 let l = u32::from(a);
787 let v = u32::from(b);
788 if L_BASE <= l && l < (L_BASE + L_COUNT) && V_BASE <= v && v < (V_BASE + V_COUNT) {
789 let r = S_BASE + (l - L_BASE) * N_COUNT + (v - V_BASE) * T_COUNT;
790 Some(char::try_from(r).unwrap())
791 } else if S_BASE <= l
792 && l <= (S_BASE + S_COUNT - T_COUNT)
793 && T_BASE <= v
794 && v < (T_BASE + T_COUNT)
795 && (l - S_BASE) % T_COUNT == 0
796 {
797 let r = l + (v - T_BASE);
798 Some(char::try_from(r).unwrap())
799 } else {
800 None
801 }
802}
803
804pub fn decompose(ab: char) -> Option<(char, char)> {
805 if let Some(ab) = decompose_hangul(ab) {
806 return Some(ab);
807 }
808
809 super::unicode_norm::DECOMPOSITION_TABLE
810 .binary_search_by(|item| item.0.cmp(&ab))
811 .map(|idx| {
812 let chars = &super::unicode_norm::DECOMPOSITION_TABLE[idx];
813 (chars.1, chars.2.unwrap_or('\0'))
814 })
815 .ok()
816}
817
818pub fn decompose_hangul(ab: char) -> Option<(char, char)> {
819 let si = u32::from(ab).wrapping_sub(S_BASE);
820 if si >= S_COUNT {
821 return None;
822 }
823
824 let (a, b) = if si % T_COUNT != 0 {
825 (S_BASE + (si / T_COUNT) * T_COUNT, T_BASE + (si % T_COUNT))
827 } else {
828 (L_BASE + (si / N_COUNT), V_BASE + (si % N_COUNT) / T_COUNT)
830 };
831
832 Some((char::try_from(a).unwrap(), char::try_from(b).unwrap()))
833}
834
835#[cfg(test)]
836mod tests {
837 #[test]
838 fn check_unicode_version() {
839 assert_eq!(unicode_bidi_mirroring::UNICODE_VERSION, (16, 0, 0));
840 assert_eq!(unicode_ccc::UNICODE_VERSION, (16, 0, 0));
841 assert_eq!(unicode_properties::UNICODE_VERSION, (16, 0, 0));
842 assert_eq!(unicode_script::UNICODE_VERSION, (16, 0, 0));
843 assert_eq!(crate::hb::unicode_norm::UNICODE_VERSION, (16, 0, 0));
844 }
845}
846
847pub mod hb_gc {
849 pub const RB_UNICODE_GENERAL_CATEGORY_CONTROL: u32 = 0;
850 pub const RB_UNICODE_GENERAL_CATEGORY_FORMAT: u32 = 1;
851 pub const RB_UNICODE_GENERAL_CATEGORY_UNASSIGNED: u32 = 2;
852 pub const RB_UNICODE_GENERAL_CATEGORY_PRIVATE_USE: u32 = 3;
853 pub const RB_UNICODE_GENERAL_CATEGORY_SURROGATE: u32 = 4;
854 pub const RB_UNICODE_GENERAL_CATEGORY_LOWERCASE_LETTER: u32 = 5;
855 pub const RB_UNICODE_GENERAL_CATEGORY_MODIFIER_LETTER: u32 = 6;
856 pub const RB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER: u32 = 7;
857 pub const RB_UNICODE_GENERAL_CATEGORY_TITLECASE_LETTER: u32 = 8;
858 pub const RB_UNICODE_GENERAL_CATEGORY_UPPERCASE_LETTER: u32 = 9;
859 pub const RB_UNICODE_GENERAL_CATEGORY_SPACING_MARK: u32 = 10;
860 pub const RB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK: u32 = 11;
861 pub const RB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK: u32 = 12;
862 pub const RB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER: u32 = 13;
863 pub const RB_UNICODE_GENERAL_CATEGORY_LETTER_NUMBER: u32 = 14;
864 pub const RB_UNICODE_GENERAL_CATEGORY_OTHER_NUMBER: u32 = 15;
865 pub const RB_UNICODE_GENERAL_CATEGORY_CONNECT_PUNCTUATION: u32 = 16;
866 pub const RB_UNICODE_GENERAL_CATEGORY_DASH_PUNCTUATION: u32 = 17;
867 pub const RB_UNICODE_GENERAL_CATEGORY_CLOSE_PUNCTUATION: u32 = 18;
868 pub const RB_UNICODE_GENERAL_CATEGORY_FINAL_PUNCTUATION: u32 = 19;
869 pub const RB_UNICODE_GENERAL_CATEGORY_INITIAL_PUNCTUATION: u32 = 20;
870 pub const RB_UNICODE_GENERAL_CATEGORY_OTHER_PUNCTUATION: u32 = 21;
871 pub const RB_UNICODE_GENERAL_CATEGORY_OPEN_PUNCTUATION: u32 = 22;
872 pub const RB_UNICODE_GENERAL_CATEGORY_CURRENCY_SYMBOL: u32 = 23;
873 pub const RB_UNICODE_GENERAL_CATEGORY_MODIFIER_SYMBOL: u32 = 24;
874 pub const RB_UNICODE_GENERAL_CATEGORY_MATH_SYMBOL: u32 = 25;
875 pub const RB_UNICODE_GENERAL_CATEGORY_OTHER_SYMBOL: u32 = 26;
876 pub const RB_UNICODE_GENERAL_CATEGORY_LINE_SEPARATOR: u32 = 27;
877 pub const RB_UNICODE_GENERAL_CATEGORY_PARAGRAPH_SEPARATOR: u32 = 28;
878 pub const RB_UNICODE_GENERAL_CATEGORY_SPACE_SEPARATOR: u32 = 29;
879}