1use core::convert::TryFrom;
2
3pub use unicode_ccc::CanonicalCombiningClass;
4pub use unicode_properties::GeneralCategory as hb_unicode_general_category_t;
6
7use crate::Script;
8
9pub mod hb_unicode_funcs_t {
13    pub type space_t = u8;
14    pub const NOT_SPACE: u8 = 0;
15    pub const SPACE_EM: u8 = 1;
16    pub const SPACE_EM_2: u8 = 2;
17    pub const SPACE_EM_3: u8 = 3;
18    pub const SPACE_EM_4: u8 = 4;
19    pub const SPACE_EM_5: u8 = 5;
20    pub const SPACE_EM_6: u8 = 6;
21    pub const SPACE_EM_16: u8 = 16;
22    pub const SPACE_4_EM_18: u8 = 17; pub const SPACE: u8 = 18;
24    pub const SPACE_FIGURE: u8 = 19;
25    pub const SPACE_PUNCTUATION: u8 = 20;
26    pub const SPACE_NARROW: u8 = 21;
27}
28
29#[allow(dead_code)]
30pub mod modified_combining_class {
31    pub const CCC10: u8 = 22; pub const CCC11: u8 = 15; pub const CCC12: u8 = 16; pub const CCC13: u8 = 17; pub const CCC14: u8 = 23; pub const CCC15: u8 = 18; pub const CCC16: u8 = 19; pub const CCC17: u8 = 20; pub const CCC18: u8 = 21; pub const CCC19: u8 = 14; pub const CCC20: u8 = 24; pub const CCC21: u8 = 12; pub const CCC22: u8 = 25; pub const CCC23: u8 = 13; pub const CCC24: u8 = 10; pub const CCC25: u8 = 11; pub const CCC26: u8 = 26; pub const CCC27: u8 = 28; pub const CCC28: u8 = 29; pub const CCC29: u8 = 30; pub const CCC30: u8 = 31; pub const CCC31: u8 = 32; pub const CCC32: u8 = 33; pub const CCC33: u8 = 27; pub const CCC34: u8 = 34; pub const CCC35: u8 = 35; pub const CCC36: u8 = 36; pub const CCC84: u8 = 0; pub const CCC91: u8 = 0; pub const CCC103: u8 = 3; pub const CCC107: u8 = 107; pub const CCC118: u8 = 118; pub const CCC122: u8 = 122; pub const CCC129: u8 = 129; pub const CCC130: u8 = 132; pub const CCC132: u8 = 131; }
108
109#[rustfmt::skip]
110const MODIFIED_COMBINING_CLASS: &[u8; 256] = &[
111    CanonicalCombiningClass::NotReordered as u8,
112    CanonicalCombiningClass::Overlay as u8,
113    2, 3, 4, 5, 6,
114    CanonicalCombiningClass::Nukta as u8,
115    CanonicalCombiningClass::KanaVoicing as u8,
116    CanonicalCombiningClass::Virama as u8,
117
118    modified_combining_class::CCC10,
120    modified_combining_class::CCC11,
121    modified_combining_class::CCC12,
122    modified_combining_class::CCC13,
123    modified_combining_class::CCC14,
124    modified_combining_class::CCC15,
125    modified_combining_class::CCC16,
126    modified_combining_class::CCC17,
127    modified_combining_class::CCC18,
128    modified_combining_class::CCC19,
129    modified_combining_class::CCC20,
130    modified_combining_class::CCC21,
131    modified_combining_class::CCC22,
132    modified_combining_class::CCC23,
133    modified_combining_class::CCC24,
134    modified_combining_class::CCC25,
135    modified_combining_class::CCC26,
136
137    modified_combining_class::CCC27,
139    modified_combining_class::CCC28,
140    modified_combining_class::CCC29,
141    modified_combining_class::CCC30,
142    modified_combining_class::CCC31,
143    modified_combining_class::CCC32,
144    modified_combining_class::CCC33,
145    modified_combining_class::CCC34,
146    modified_combining_class::CCC35,
147
148    modified_combining_class::CCC36,
150
151    37, 38, 39,
152    40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
153    60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
154    80, 81, 82, 83,
155
156    modified_combining_class::CCC84,
158    85, 86, 87, 88, 89, 90,
159    modified_combining_class::CCC91,
160    92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102,
161
162    modified_combining_class::CCC103,
164    104, 105, 106,
165    modified_combining_class::CCC107,
166    108, 109, 110, 111, 112, 113, 114, 115, 116, 117,
167
168    modified_combining_class::CCC118,
170    119, 120, 121,
171    modified_combining_class::CCC122,
172    123, 124, 125, 126, 127, 128,
173
174    modified_combining_class::CCC129,
176    modified_combining_class::CCC130,
177    131,
178    modified_combining_class::CCC132,
179    133, 134, 135, 136, 137, 138, 139,
180
181
182    140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
183    150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
184    160, 161, 162, 163, 164, 165, 166, 167, 168, 169,
185    170, 171, 172, 173, 174, 175, 176, 177, 178, 179,
186    180, 181, 182, 183, 184, 185, 186, 187, 188, 189,
187    190, 191, 192, 193, 194, 195, 196, 197, 198, 199,
188
189    CanonicalCombiningClass::AttachedBelowLeft as u8,
190    201,
191    CanonicalCombiningClass::AttachedBelow as u8,
192    203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213,
193    CanonicalCombiningClass::AttachedAbove as u8,
194    215,
195    CanonicalCombiningClass::AttachedAboveRight as u8,
196    217,
197    CanonicalCombiningClass::BelowLeft as u8,
198    219,
199    CanonicalCombiningClass::Below as u8,
200    221,
201    CanonicalCombiningClass::BelowRight as u8,
202    223,
203    CanonicalCombiningClass::Left as u8,
204    225,
205    CanonicalCombiningClass::Right as u8,
206    227,
207    CanonicalCombiningClass::AboveLeft as u8,
208    229,
209    CanonicalCombiningClass::Above as u8,
210    231,
211    CanonicalCombiningClass::AboveRight as u8,
212    CanonicalCombiningClass::DoubleBelow as u8,
213    CanonicalCombiningClass::DoubleAbove as u8,
214    235, 236, 237, 238, 239,
215    CanonicalCombiningClass::IotaSubscript as u8,
216    241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254,
217    255, ];
219
220pub trait GeneralCategoryExt {
221    fn to_rb(&self) -> u32;
222    fn from_rb(gc: u32) -> Self;
223    fn is_mark(&self) -> bool;
224    fn is_letter(&self) -> bool;
225}
226
227#[rustfmt::skip]
228impl GeneralCategoryExt for hb_unicode_general_category_t {
229    fn to_rb(&self) -> u32 {
230        match *self {
231            hb_unicode_general_category_t::ClosePunctuation => hb_gc::RB_UNICODE_GENERAL_CATEGORY_CLOSE_PUNCTUATION,
232            hb_unicode_general_category_t::ConnectorPunctuation => hb_gc::RB_UNICODE_GENERAL_CATEGORY_CONNECT_PUNCTUATION,
233            hb_unicode_general_category_t::Control => hb_gc::RB_UNICODE_GENERAL_CATEGORY_CONTROL,
234            hb_unicode_general_category_t::CurrencySymbol => hb_gc::RB_UNICODE_GENERAL_CATEGORY_CURRENCY_SYMBOL,
235            hb_unicode_general_category_t::DashPunctuation => hb_gc::RB_UNICODE_GENERAL_CATEGORY_DASH_PUNCTUATION,
236            hb_unicode_general_category_t::DecimalNumber => hb_gc::RB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER,
237            hb_unicode_general_category_t::EnclosingMark => hb_gc::RB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK,
238            hb_unicode_general_category_t::FinalPunctuation => hb_gc::RB_UNICODE_GENERAL_CATEGORY_FINAL_PUNCTUATION,
239            hb_unicode_general_category_t::Format => hb_gc::RB_UNICODE_GENERAL_CATEGORY_FORMAT,
240            hb_unicode_general_category_t::InitialPunctuation => hb_gc::RB_UNICODE_GENERAL_CATEGORY_INITIAL_PUNCTUATION,
241            hb_unicode_general_category_t::LetterNumber => hb_gc::RB_UNICODE_GENERAL_CATEGORY_LETTER_NUMBER,
242            hb_unicode_general_category_t::LineSeparator => hb_gc::RB_UNICODE_GENERAL_CATEGORY_LINE_SEPARATOR,
243            hb_unicode_general_category_t::LowercaseLetter => hb_gc::RB_UNICODE_GENERAL_CATEGORY_LOWERCASE_LETTER,
244            hb_unicode_general_category_t::MathSymbol => hb_gc::RB_UNICODE_GENERAL_CATEGORY_MATH_SYMBOL,
245            hb_unicode_general_category_t::ModifierLetter => hb_gc::RB_UNICODE_GENERAL_CATEGORY_MODIFIER_LETTER,
246            hb_unicode_general_category_t::ModifierSymbol => hb_gc::RB_UNICODE_GENERAL_CATEGORY_MODIFIER_SYMBOL,
247            hb_unicode_general_category_t::NonspacingMark => hb_gc::RB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK,
248            hb_unicode_general_category_t::OpenPunctuation => hb_gc::RB_UNICODE_GENERAL_CATEGORY_OPEN_PUNCTUATION,
249            hb_unicode_general_category_t::OtherLetter => hb_gc::RB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER,
250            hb_unicode_general_category_t::OtherNumber => hb_gc::RB_UNICODE_GENERAL_CATEGORY_OTHER_NUMBER,
251            hb_unicode_general_category_t::OtherPunctuation => hb_gc::RB_UNICODE_GENERAL_CATEGORY_OTHER_PUNCTUATION,
252            hb_unicode_general_category_t::OtherSymbol => hb_gc::RB_UNICODE_GENERAL_CATEGORY_OTHER_SYMBOL,
253            hb_unicode_general_category_t::ParagraphSeparator => hb_gc::RB_UNICODE_GENERAL_CATEGORY_PARAGRAPH_SEPARATOR,
254            hb_unicode_general_category_t::PrivateUse => hb_gc::RB_UNICODE_GENERAL_CATEGORY_PRIVATE_USE,
255            hb_unicode_general_category_t::SpaceSeparator => hb_gc::RB_UNICODE_GENERAL_CATEGORY_SPACE_SEPARATOR,
256            hb_unicode_general_category_t::SpacingMark => hb_gc::RB_UNICODE_GENERAL_CATEGORY_SPACING_MARK,
257            hb_unicode_general_category_t::Surrogate => hb_gc::RB_UNICODE_GENERAL_CATEGORY_SURROGATE,
258            hb_unicode_general_category_t::TitlecaseLetter => hb_gc::RB_UNICODE_GENERAL_CATEGORY_TITLECASE_LETTER,
259            hb_unicode_general_category_t::Unassigned => hb_gc::RB_UNICODE_GENERAL_CATEGORY_UNASSIGNED,
260            hb_unicode_general_category_t::UppercaseLetter => hb_gc::RB_UNICODE_GENERAL_CATEGORY_UPPERCASE_LETTER
261        }
262    }
263
264    fn from_rb(gc: u32) -> Self {
265        match gc {
266            hb_gc::RB_UNICODE_GENERAL_CATEGORY_CLOSE_PUNCTUATION => hb_unicode_general_category_t::ClosePunctuation,
267            hb_gc::RB_UNICODE_GENERAL_CATEGORY_CONNECT_PUNCTUATION => hb_unicode_general_category_t::ConnectorPunctuation,
268            hb_gc::RB_UNICODE_GENERAL_CATEGORY_CONTROL => hb_unicode_general_category_t::Control,
269            hb_gc::RB_UNICODE_GENERAL_CATEGORY_CURRENCY_SYMBOL => hb_unicode_general_category_t::CurrencySymbol,
270            hb_gc::RB_UNICODE_GENERAL_CATEGORY_DASH_PUNCTUATION => hb_unicode_general_category_t::DashPunctuation,
271            hb_gc::RB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER => hb_unicode_general_category_t::DecimalNumber,
272            hb_gc::RB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK => hb_unicode_general_category_t::EnclosingMark,
273            hb_gc::RB_UNICODE_GENERAL_CATEGORY_FINAL_PUNCTUATION => hb_unicode_general_category_t::FinalPunctuation,
274            hb_gc::RB_UNICODE_GENERAL_CATEGORY_FORMAT => hb_unicode_general_category_t::Format,
275            hb_gc::RB_UNICODE_GENERAL_CATEGORY_INITIAL_PUNCTUATION => hb_unicode_general_category_t::InitialPunctuation,
276            hb_gc::RB_UNICODE_GENERAL_CATEGORY_LETTER_NUMBER => hb_unicode_general_category_t::LetterNumber,
277            hb_gc::RB_UNICODE_GENERAL_CATEGORY_LINE_SEPARATOR => hb_unicode_general_category_t::LineSeparator,
278            hb_gc::RB_UNICODE_GENERAL_CATEGORY_LOWERCASE_LETTER => hb_unicode_general_category_t::LowercaseLetter,
279            hb_gc::RB_UNICODE_GENERAL_CATEGORY_MATH_SYMBOL => hb_unicode_general_category_t::MathSymbol,
280            hb_gc::RB_UNICODE_GENERAL_CATEGORY_MODIFIER_LETTER => hb_unicode_general_category_t::ModifierLetter,
281            hb_gc::RB_UNICODE_GENERAL_CATEGORY_MODIFIER_SYMBOL => hb_unicode_general_category_t::ModifierSymbol,
282            hb_gc::RB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK => hb_unicode_general_category_t::NonspacingMark,
283            hb_gc::RB_UNICODE_GENERAL_CATEGORY_OPEN_PUNCTUATION => hb_unicode_general_category_t::OpenPunctuation,
284            hb_gc::RB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER => hb_unicode_general_category_t::OtherLetter,
285            hb_gc::RB_UNICODE_GENERAL_CATEGORY_OTHER_NUMBER => hb_unicode_general_category_t::OtherNumber,
286            hb_gc::RB_UNICODE_GENERAL_CATEGORY_OTHER_PUNCTUATION => hb_unicode_general_category_t::OtherPunctuation,
287            hb_gc::RB_UNICODE_GENERAL_CATEGORY_OTHER_SYMBOL => hb_unicode_general_category_t::OtherSymbol,
288            hb_gc::RB_UNICODE_GENERAL_CATEGORY_PARAGRAPH_SEPARATOR => hb_unicode_general_category_t::ParagraphSeparator,
289            hb_gc::RB_UNICODE_GENERAL_CATEGORY_PRIVATE_USE => hb_unicode_general_category_t::PrivateUse,
290            hb_gc::RB_UNICODE_GENERAL_CATEGORY_SPACE_SEPARATOR => hb_unicode_general_category_t::SpaceSeparator,
291            hb_gc::RB_UNICODE_GENERAL_CATEGORY_SPACING_MARK => hb_unicode_general_category_t::SpacingMark,
292            hb_gc::RB_UNICODE_GENERAL_CATEGORY_SURROGATE => hb_unicode_general_category_t::Surrogate,
293            hb_gc::RB_UNICODE_GENERAL_CATEGORY_TITLECASE_LETTER => hb_unicode_general_category_t::TitlecaseLetter,
294            hb_gc::RB_UNICODE_GENERAL_CATEGORY_UNASSIGNED => hb_unicode_general_category_t::Unassigned,
295            hb_gc::RB_UNICODE_GENERAL_CATEGORY_UPPERCASE_LETTER => hb_unicode_general_category_t::UppercaseLetter,
296            _ => unreachable!()
297        }
298    }
299
300    fn is_mark(&self) -> bool {
301        matches!(*self, 
302            hb_unicode_general_category_t::SpacingMark |
303            hb_unicode_general_category_t::EnclosingMark |
304            hb_unicode_general_category_t::NonspacingMark)
305    }
306
307    fn is_letter(&self) -> bool {
308        matches!(*self, 
309            hb_unicode_general_category_t::LowercaseLetter |
310            hb_unicode_general_category_t::ModifierLetter |
311            hb_unicode_general_category_t::OtherLetter |
312            hb_unicode_general_category_t::TitlecaseLetter |
313            hb_unicode_general_category_t::UppercaseLetter)
314    }
315}
316
317pub trait CharExt {
318    fn script(self) -> Script;
319    fn general_category(self) -> hb_unicode_general_category_t;
320    fn space_fallback(self) -> hb_unicode_funcs_t::space_t;
321    fn modified_combining_class(self) -> u8;
322    fn mirrored(self) -> Option<char>;
323    fn is_emoji_extended_pictographic(self) -> bool;
324    fn is_default_ignorable(self) -> bool;
325    fn is_variation_selector(self) -> bool;
326    fn vertical(self) -> Option<char>;
327}
328
329impl CharExt for char {
330    fn script(self) -> Script {
331        use crate::script;
332        use unicode_script as us;
333
334        match unicode_script::UnicodeScript::script(&self) {
335            us::Script::Common => script::COMMON,
336            us::Script::Inherited => script::INHERITED,
337            us::Script::Adlam => script::ADLAM,
338            us::Script::Ahom => script::AHOM,
339            us::Script::Anatolian_Hieroglyphs => script::ANATOLIAN_HIEROGLYPHS,
340            us::Script::Arabic => script::ARABIC,
341            us::Script::Armenian => script::ARMENIAN,
342            us::Script::Avestan => script::AVESTAN,
343            us::Script::Balinese => script::BALINESE,
344            us::Script::Bamum => script::BAMUM,
345            us::Script::Bassa_Vah => script::BASSA_VAH,
346            us::Script::Batak => script::BATAK,
347            us::Script::Bengali => script::BENGALI,
348            us::Script::Bhaiksuki => script::BHAIKSUKI,
349            us::Script::Bopomofo => script::BOPOMOFO,
350            us::Script::Brahmi => script::BRAHMI,
351            us::Script::Braille => script::BRAILLE,
352            us::Script::Buginese => script::BUGINESE,
353            us::Script::Buhid => script::BUHID,
354            us::Script::Canadian_Aboriginal => script::CANADIAN_SYLLABICS,
355            us::Script::Carian => script::CARIAN,
356            us::Script::Caucasian_Albanian => script::CAUCASIAN_ALBANIAN,
357            us::Script::Chakma => script::CHAKMA,
358            us::Script::Cham => script::CHAM,
359            us::Script::Cherokee => script::CHEROKEE,
360            us::Script::Chorasmian => script::CHORASMIAN,
361            us::Script::Coptic => script::COPTIC,
362            us::Script::Cuneiform => script::CUNEIFORM,
363            us::Script::Cypriot => script::CYPRIOT,
364            us::Script::Cyrillic => script::CYRILLIC,
365            us::Script::Deseret => script::DESERET,
366            us::Script::Devanagari => script::DEVANAGARI,
367            us::Script::Dives_Akuru => script::DIVES_AKURU,
368            us::Script::Dogra => script::DOGRA,
369            us::Script::Duployan => script::DUPLOYAN,
370            us::Script::Egyptian_Hieroglyphs => script::EGYPTIAN_HIEROGLYPHS,
371            us::Script::Elbasan => script::ELBASAN,
372            us::Script::Elymaic => script::ELYMAIC,
373            us::Script::Ethiopic => script::ETHIOPIC,
374            us::Script::Georgian => script::GEORGIAN,
375            us::Script::Glagolitic => script::GLAGOLITIC,
376            us::Script::Gothic => script::GOTHIC,
377            us::Script::Grantha => script::GRANTHA,
378            us::Script::Greek => script::GREEK,
379            us::Script::Gujarati => script::GUJARATI,
380            us::Script::Gunjala_Gondi => script::GUNJALA_GONDI,
381            us::Script::Gurmukhi => script::GURMUKHI,
382            us::Script::Han => script::HAN,
383            us::Script::Hangul => script::HANGUL,
384            us::Script::Hanifi_Rohingya => script::HANIFI_ROHINGYA,
385            us::Script::Hanunoo => script::HANUNOO,
386            us::Script::Hatran => script::HATRAN,
387            us::Script::Hebrew => script::HEBREW,
388            us::Script::Hiragana => script::HIRAGANA,
389            us::Script::Imperial_Aramaic => script::IMPERIAL_ARAMAIC,
390            us::Script::Inscriptional_Pahlavi => script::INSCRIPTIONAL_PAHLAVI,
391            us::Script::Inscriptional_Parthian => script::INSCRIPTIONAL_PARTHIAN,
392            us::Script::Javanese => script::JAVANESE,
393            us::Script::Kaithi => script::KAITHI,
394            us::Script::Kannada => script::KANNADA,
395            us::Script::Katakana => script::KATAKANA,
396            us::Script::Kayah_Li => script::KAYAH_LI,
397            us::Script::Kharoshthi => script::KHAROSHTHI,
398            us::Script::Khitan_Small_Script => script::KHITAN_SMALL_SCRIPT,
399            us::Script::Khmer => script::KHMER,
400            us::Script::Khojki => script::KHOJKI,
401            us::Script::Khudawadi => script::KHUDAWADI,
402            us::Script::Lao => script::LAO,
403            us::Script::Latin => script::LATIN,
404            us::Script::Lepcha => script::LEPCHA,
405            us::Script::Limbu => script::LIMBU,
406            us::Script::Linear_A => script::LINEAR_A,
407            us::Script::Linear_B => script::LINEAR_B,
408            us::Script::Lisu => script::LISU,
409            us::Script::Lycian => script::LYCIAN,
410            us::Script::Lydian => script::LYDIAN,
411            us::Script::Mahajani => script::MAHAJANI,
412            us::Script::Makasar => script::MAKASAR,
413            us::Script::Malayalam => script::MALAYALAM,
414            us::Script::Mandaic => script::MANDAIC,
415            us::Script::Manichaean => script::MANICHAEAN,
416            us::Script::Marchen => script::MARCHEN,
417            us::Script::Masaram_Gondi => script::MASARAM_GONDI,
418            us::Script::Medefaidrin => script::MEDEFAIDRIN,
419            us::Script::Meetei_Mayek => script::MEETEI_MAYEK,
420            us::Script::Mende_Kikakui => script::MENDE_KIKAKUI,
421            us::Script::Meroitic_Cursive => script::MEROITIC_CURSIVE,
422            us::Script::Meroitic_Hieroglyphs => script::MEROITIC_HIEROGLYPHS,
423            us::Script::Miao => script::MIAO,
424            us::Script::Modi => script::MODI,
425            us::Script::Mongolian => script::MONGOLIAN,
426            us::Script::Mro => script::MRO,
427            us::Script::Multani => script::MULTANI,
428            us::Script::Myanmar => script::MYANMAR,
429            us::Script::Nabataean => script::NABATAEAN,
430            us::Script::Nandinagari => script::NANDINAGARI,
431            us::Script::New_Tai_Lue => script::NEW_TAI_LUE,
432            us::Script::Newa => script::NEWA,
433            us::Script::Nko => script::NKO,
434            us::Script::Nushu => script::NUSHU,
435            us::Script::Nyiakeng_Puachue_Hmong => script::NYIAKENG_PUACHUE_HMONG,
436            us::Script::Ogham => script::OGHAM,
437            us::Script::Ol_Chiki => script::OL_CHIKI,
438            us::Script::Old_Hungarian => script::OLD_HUNGARIAN,
439            us::Script::Old_Italic => script::OLD_ITALIC,
440            us::Script::Old_North_Arabian => script::OLD_NORTH_ARABIAN,
441            us::Script::Old_Permic => script::OLD_PERMIC,
442            us::Script::Old_Persian => script::OLD_PERSIAN,
443            us::Script::Old_Sogdian => script::OLD_SOGDIAN,
444            us::Script::Old_South_Arabian => script::OLD_SOUTH_ARABIAN,
445            us::Script::Old_Turkic => script::OLD_TURKIC,
446            us::Script::Oriya => script::ORIYA,
447            us::Script::Osage => script::OSAGE,
448            us::Script::Osmanya => script::OSMANYA,
449            us::Script::Pahawh_Hmong => script::PAHAWH_HMONG,
450            us::Script::Palmyrene => script::PALMYRENE,
451            us::Script::Pau_Cin_Hau => script::PAU_CIN_HAU,
452            us::Script::Phags_Pa => script::PHAGS_PA,
453            us::Script::Phoenician => script::PHOENICIAN,
454            us::Script::Psalter_Pahlavi => script::PSALTER_PAHLAVI,
455            us::Script::Rejang => script::REJANG,
456            us::Script::Runic => script::RUNIC,
457            us::Script::Samaritan => script::SAMARITAN,
458            us::Script::Saurashtra => script::SAURASHTRA,
459            us::Script::Sharada => script::SHARADA,
460            us::Script::Shavian => script::SHAVIAN,
461            us::Script::Siddham => script::SIDDHAM,
462            us::Script::SignWriting => script::SIGNWRITING,
463            us::Script::Sinhala => script::SINHALA,
464            us::Script::Sogdian => script::SOGDIAN,
465            us::Script::Sora_Sompeng => script::SORA_SOMPENG,
466            us::Script::Soyombo => script::SOYOMBO,
467            us::Script::Sundanese => script::SUNDANESE,
468            us::Script::Syloti_Nagri => script::SYLOTI_NAGRI,
469            us::Script::Syriac => script::SYRIAC,
470            us::Script::Tagalog => script::TAGALOG,
471            us::Script::Tagbanwa => script::TAGBANWA,
472            us::Script::Tai_Le => script::TAI_LE,
473            us::Script::Tai_Tham => script::TAI_THAM,
474            us::Script::Tai_Viet => script::TAI_VIET,
475            us::Script::Takri => script::TAKRI,
476            us::Script::Tamil => script::TAMIL,
477            us::Script::Tangut => script::TANGUT,
478            us::Script::Telugu => script::TELUGU,
479            us::Script::Thaana => script::THAANA,
480            us::Script::Thai => script::THAI,
481            us::Script::Tibetan => script::TIBETAN,
482            us::Script::Tifinagh => script::TIFINAGH,
483            us::Script::Tirhuta => script::TIRHUTA,
484            us::Script::Ugaritic => script::UGARITIC,
485            us::Script::Vai => script::VAI,
486            us::Script::Wancho => script::WANCHO,
487            us::Script::Warang_Citi => script::WARANG_CITI,
488            us::Script::Yezidi => script::YEZIDI,
489            us::Script::Yi => script::YI,
490            us::Script::Zanabazar_Square => script::ZANABAZAR_SQUARE,
491            _ => script::UNKNOWN,
492        }
493    }
494
495    fn general_category(self) -> hb_unicode_general_category_t {
496        unicode_properties::general_category::UnicodeGeneralCategory::general_category(self)
497    }
498
499    fn space_fallback(self) -> hb_unicode_funcs_t::space_t {
500        use hb_unicode_funcs_t::*;
501
502        match self {
504            '\u{0020}' => SPACE,             '\u{00A0}' => SPACE,             '\u{2000}' => SPACE_EM_2,        '\u{2001}' => SPACE_EM,          '\u{2002}' => SPACE_EM_2,        '\u{2003}' => SPACE_EM,          '\u{2004}' => SPACE_EM_3,        '\u{2005}' => SPACE_EM_4,        '\u{2006}' => SPACE_EM_6,        '\u{2007}' => SPACE_FIGURE,      '\u{2008}' => SPACE_PUNCTUATION, '\u{2009}' => SPACE_EM_5,        '\u{200A}' => SPACE_EM_16,       '\u{202F}' => SPACE_NARROW,      '\u{205F}' => SPACE_4_EM_18,     '\u{3000}' => SPACE_EM,          _ => NOT_SPACE,                  }
522    }
523
524    fn modified_combining_class(self) -> u8 {
525        let u = self;
526
527        if u == '\u{1A60}' {
529            return 254;
530        }
531
532        if u == '\u{0FC6}' {
534            return 254;
535        }
536
537        if u == '\u{0F39}' {
539            return 127;
540        }
541
542        let k = unicode_ccc::get_canonical_combining_class(u);
543        MODIFIED_COMBINING_CLASS[k as usize]
544    }
545
546    fn mirrored(self) -> Option<char> {
547        unicode_bidi_mirroring::get_mirrored(self)
548    }
549
550    fn is_emoji_extended_pictographic(self) -> bool {
551        match self as u32 {
553            0x00A9 => true,
554            0x00AE => true,
555            0x203C => true,
556            0x2049 => true,
557            0x2122 => true,
558            0x2139 => true,
559            0x2194..=0x2199 => true,
560            0x21A9..=0x21AA => true,
561            0x231A..=0x231B => true,
562            0x2328 => true,
563            0x2388 => true,
564            0x23CF => true,
565            0x23E9..=0x23F3 => true,
566            0x23F8..=0x23FA => true,
567            0x24C2 => true,
568            0x25AA..=0x25AB => true,
569            0x25B6 => true,
570            0x25C0 => true,
571            0x25FB..=0x25FE => true,
572            0x2600..=0x2605 => true,
573            0x2607..=0x2612 => true,
574            0x2614..=0x2685 => true,
575            0x2690..=0x2705 => true,
576            0x2708..=0x2712 => true,
577            0x2714 => true,
578            0x2716 => true,
579            0x271D => true,
580            0x2721 => true,
581            0x2728 => true,
582            0x2733..=0x2734 => true,
583            0x2744 => true,
584            0x2747 => true,
585            0x274C => true,
586            0x274E => true,
587            0x2753..=0x2755 => true,
588            0x2757 => true,
589            0x2763..=0x2767 => true,
590            0x2795..=0x2797 => true,
591            0x27A1 => true,
592            0x27B0 => true,
593            0x27BF => true,
594            0x2934..=0x2935 => true,
595            0x2B05..=0x2B07 => true,
596            0x2B1B..=0x2B1C => true,
597            0x2B50 => true,
598            0x2B55 => true,
599            0x3030 => true,
600            0x303D => true,
601            0x3297 => true,
602            0x3299 => true,
603            0x1F000..=0x1F0FF => true,
604            0x1F10D..=0x1F10F => true,
605            0x1F12F => true,
606            0x1F16C..=0x1F171 => true,
607            0x1F17E..=0x1F17F => true,
608            0x1F18E => true,
609            0x1F191..=0x1F19A => true,
610            0x1F1AD..=0x1F1E5 => true,
611            0x1F201..=0x1F20F => true,
612            0x1F21A => true,
613            0x1F22F => true,
614            0x1F232..=0x1F23A => true,
615            0x1F23C..=0x1F23F => true,
616            0x1F249..=0x1F3FA => true,
617            0x1F400..=0x1F53D => true,
618            0x1F546..=0x1F64F => true,
619            0x1F680..=0x1F6FF => true,
620            0x1F774..=0x1F77F => true,
621            0x1F7D5..=0x1F7FF => true,
622            0x1F80C..=0x1F80F => true,
623            0x1F848..=0x1F84F => true,
624            0x1F85A..=0x1F85F => true,
625            0x1F888..=0x1F88F => true,
626            0x1F8AE..=0x1F8FF => true,
627            0x1F90C..=0x1F93A => true,
628            0x1F93C..=0x1F945 => true,
629            0x1F947..=0x1FAFF => true,
630            0x1FC00..=0x1FFFD => true,
631            _ => false,
632        }
633    }
634
635    fn is_default_ignorable(self) -> bool {
673        let ch = u32::from(self);
674        let plane = ch >> 16;
675        if plane == 0 {
676            let page = ch >> 8;
678            match page {
679                0x00 => ch == 0x00AD,
680                0x03 => ch == 0x034F,
681                0x06 => ch == 0x061C,
682                0x17 => (0x17B4..=0x17B5).contains(&ch),
683                0x18 => (0x180B..=0x180E).contains(&ch),
684                0x20 => {
685                    (0x200B..=0x200F).contains(&ch)
686                        || (0x202A..=0x202E).contains(&ch)
687                        || (0x2060..=0x206F).contains(&ch)
688                }
689                0xFE => (0xFE00..=0xFE0F).contains(&ch) || ch == 0xFEFF,
690                0xFF => (0xFFF0..=0xFFF8).contains(&ch),
691                _ => false,
692            }
693        } else {
694            match plane {
696                0x01 => (0x1D173..=0x1D17A).contains(&ch),
697                0x0E => (0xE0000..=0xE0FFF).contains(&ch),
698                _ => false,
699            }
700        }
701    }
702
703    fn is_variation_selector(self) -> bool {
704        let ch = u32::from(self);
707        (0x0FE00..=0x0FE0F).contains(&ch) || (0xE0100..=0xE01EF).contains(&ch) }
710
711    fn vertical(self) -> Option<char> {
712        Some(match u32::from(self) >> 8 {
713            0x20 => match self {
714                '\u{2013}' => '\u{fe32}', '\u{2014}' => '\u{fe31}', '\u{2025}' => '\u{fe30}', '\u{2026}' => '\u{fe19}', _ => return None,
719            },
720            0x30 => match self {
721                '\u{3001}' => '\u{fe11}', '\u{3002}' => '\u{fe12}', '\u{3008}' => '\u{fe3f}', '\u{3009}' => '\u{fe40}', '\u{300a}' => '\u{fe3d}', '\u{300b}' => '\u{fe3e}', '\u{300c}' => '\u{fe41}', '\u{300d}' => '\u{fe42}', '\u{300e}' => '\u{fe43}', '\u{300f}' => '\u{fe44}', '\u{3010}' => '\u{fe3b}', '\u{3011}' => '\u{fe3c}', '\u{3014}' => '\u{fe39}', '\u{3015}' => '\u{fe3a}', '\u{3016}' => '\u{fe17}', '\u{3017}' => '\u{fe18}', _ => return None,
738            },
739            0xfe => match self {
740                '\u{fe4f}' => '\u{fe34}', _ => return None,
742            },
743            0xff => match self {
744                '\u{ff01}' => '\u{fe15}', '\u{ff08}' => '\u{fe35}', '\u{ff09}' => '\u{fe36}', '\u{ff0c}' => '\u{fe10}', '\u{ff1a}' => '\u{fe13}', '\u{ff1b}' => '\u{fe14}', '\u{ff1f}' => '\u{fe16}', '\u{ff3b}' => '\u{fe47}', '\u{ff3d}' => '\u{fe48}', '\u{ff3f}' => '\u{fe33}', '\u{ff5b}' => '\u{fe37}', '\u{ff5d}' => '\u{fe38}', _ => return None,
757            },
758            _ => return None,
759        })
760    }
761}
762
763const S_BASE: u32 = 0xAC00;
764const L_BASE: u32 = 0x1100;
765const V_BASE: u32 = 0x1161;
766const T_BASE: u32 = 0x11A7;
767const L_COUNT: u32 = 19;
768const V_COUNT: u32 = 21;
769const T_COUNT: u32 = 28;
770const N_COUNT: u32 = V_COUNT * T_COUNT;
771const S_COUNT: u32 = L_COUNT * N_COUNT;
772
773pub fn compose(a: char, b: char) -> Option<char> {
774    if let Some(ab) = compose_hangul(a, b) {
775        return Some(ab);
776    }
777
778    let needle = (a as u64) << 32 | (b as u64);
779    super::unicode_norm::COMPOSITION_TABLE
780        .binary_search_by(|item| item.0.cmp(&needle))
781        .map(|idx| super::unicode_norm::COMPOSITION_TABLE[idx].1)
782        .ok()
783}
784
785fn compose_hangul(a: char, b: char) -> Option<char> {
786    let l = u32::from(a);
787    let v = u32::from(b);
788    if L_BASE <= l && l < (L_BASE + L_COUNT) && V_BASE <= v && v < (V_BASE + V_COUNT) {
789        let r = S_BASE + (l - L_BASE) * N_COUNT + (v - V_BASE) * T_COUNT;
790        Some(char::try_from(r).unwrap())
791    } else if S_BASE <= l
792        && l <= (S_BASE + S_COUNT - T_COUNT)
793        && T_BASE <= v
794        && v < (T_BASE + T_COUNT)
795        && (l - S_BASE) % T_COUNT == 0
796    {
797        let r = l + (v - T_BASE);
798        Some(char::try_from(r).unwrap())
799    } else {
800        None
801    }
802}
803
804pub fn decompose(ab: char) -> Option<(char, char)> {
805    if let Some(ab) = decompose_hangul(ab) {
806        return Some(ab);
807    }
808
809    super::unicode_norm::DECOMPOSITION_TABLE
810        .binary_search_by(|item| item.0.cmp(&ab))
811        .map(|idx| {
812            let chars = &super::unicode_norm::DECOMPOSITION_TABLE[idx];
813            (chars.1, chars.2.unwrap_or('\0'))
814        })
815        .ok()
816}
817
818pub fn decompose_hangul(ab: char) -> Option<(char, char)> {
819    let si = u32::from(ab).wrapping_sub(S_BASE);
820    if si >= S_COUNT {
821        return None;
822    }
823
824    let (a, b) = if si % T_COUNT != 0 {
825        (S_BASE + (si / T_COUNT) * T_COUNT, T_BASE + (si % T_COUNT))
827    } else {
828        (L_BASE + (si / N_COUNT), V_BASE + (si % N_COUNT) / T_COUNT)
830    };
831
832    Some((char::try_from(a).unwrap(), char::try_from(b).unwrap()))
833}
834
835#[cfg(test)]
836mod tests {
837    #[test]
838    fn check_unicode_version() {
839        assert_eq!(unicode_bidi_mirroring::UNICODE_VERSION, (16, 0, 0));
840        assert_eq!(unicode_ccc::UNICODE_VERSION, (16, 0, 0));
841        assert_eq!(unicode_properties::UNICODE_VERSION, (16, 0, 0));
842        assert_eq!(unicode_script::UNICODE_VERSION, (16, 0, 0));
843        assert_eq!(crate::hb::unicode_norm::UNICODE_VERSION, (16, 0, 0));
844    }
845}
846
847pub mod hb_gc {
849    pub const RB_UNICODE_GENERAL_CATEGORY_CONTROL: u32 = 0;
850    pub const RB_UNICODE_GENERAL_CATEGORY_FORMAT: u32 = 1;
851    pub const RB_UNICODE_GENERAL_CATEGORY_UNASSIGNED: u32 = 2;
852    pub const RB_UNICODE_GENERAL_CATEGORY_PRIVATE_USE: u32 = 3;
853    pub const RB_UNICODE_GENERAL_CATEGORY_SURROGATE: u32 = 4;
854    pub const RB_UNICODE_GENERAL_CATEGORY_LOWERCASE_LETTER: u32 = 5;
855    pub const RB_UNICODE_GENERAL_CATEGORY_MODIFIER_LETTER: u32 = 6;
856    pub const RB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER: u32 = 7;
857    pub const RB_UNICODE_GENERAL_CATEGORY_TITLECASE_LETTER: u32 = 8;
858    pub const RB_UNICODE_GENERAL_CATEGORY_UPPERCASE_LETTER: u32 = 9;
859    pub const RB_UNICODE_GENERAL_CATEGORY_SPACING_MARK: u32 = 10;
860    pub const RB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK: u32 = 11;
861    pub const RB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK: u32 = 12;
862    pub const RB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER: u32 = 13;
863    pub const RB_UNICODE_GENERAL_CATEGORY_LETTER_NUMBER: u32 = 14;
864    pub const RB_UNICODE_GENERAL_CATEGORY_OTHER_NUMBER: u32 = 15;
865    pub const RB_UNICODE_GENERAL_CATEGORY_CONNECT_PUNCTUATION: u32 = 16;
866    pub const RB_UNICODE_GENERAL_CATEGORY_DASH_PUNCTUATION: u32 = 17;
867    pub const RB_UNICODE_GENERAL_CATEGORY_CLOSE_PUNCTUATION: u32 = 18;
868    pub const RB_UNICODE_GENERAL_CATEGORY_FINAL_PUNCTUATION: u32 = 19;
869    pub const RB_UNICODE_GENERAL_CATEGORY_INITIAL_PUNCTUATION: u32 = 20;
870    pub const RB_UNICODE_GENERAL_CATEGORY_OTHER_PUNCTUATION: u32 = 21;
871    pub const RB_UNICODE_GENERAL_CATEGORY_OPEN_PUNCTUATION: u32 = 22;
872    pub const RB_UNICODE_GENERAL_CATEGORY_CURRENCY_SYMBOL: u32 = 23;
873    pub const RB_UNICODE_GENERAL_CATEGORY_MODIFIER_SYMBOL: u32 = 24;
874    pub const RB_UNICODE_GENERAL_CATEGORY_MATH_SYMBOL: u32 = 25;
875    pub const RB_UNICODE_GENERAL_CATEGORY_OTHER_SYMBOL: u32 = 26;
876    pub const RB_UNICODE_GENERAL_CATEGORY_LINE_SEPARATOR: u32 = 27;
877    pub const RB_UNICODE_GENERAL_CATEGORY_PARAGRAPH_SEPARATOR: u32 = 28;
878    pub const RB_UNICODE_GENERAL_CATEGORY_SPACE_SEPARATOR: u32 = 29;
879}