rustybuzz/hb/
ot_shaper_hebrew.rs

1use super::ot_shape_normalize::*;
2use super::ot_shaper::*;
3use super::{hb_tag_t, unicode};
4use crate::hb::buffer::hb_buffer_t;
5use crate::hb::ot_layout::_hb_glyph_info_get_modified_combining_class;
6use crate::hb::ot_shape_plan::hb_ot_shape_plan_t;
7use crate::hb::unicode::modified_combining_class;
8use unicode_ccc::CanonicalCombiningClass;
9
10pub const HEBREW_SHAPER: hb_ot_shaper_t = hb_ot_shaper_t {
11    collect_features: None,
12    override_features: None,
13    create_data: None,
14    preprocess_text: None,
15    postprocess_glyphs: None,
16    normalization_preference: HB_OT_SHAPE_NORMALIZATION_MODE_AUTO,
17    decompose: None,
18    compose: Some(compose),
19    setup_masks: None,
20    gpos_tag: Some(hb_tag_t::from_bytes(b"hebr")),
21    reorder_marks: Some(reorder_marks_hebrew),
22    zero_width_marks: HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_LATE,
23    fallback_position: true,
24};
25
26fn reorder_marks_hebrew(
27    _: &hb_ot_shape_plan_t,
28    buffer: &mut hb_buffer_t,
29    start: usize,
30    end: usize,
31) {
32    for i in start + 2..end {
33        let c0 = buffer.info[i - 2];
34        let c1 = buffer.info[i - 1];
35        let c2 = buffer.info[i - 0];
36
37        if (_hb_glyph_info_get_modified_combining_class(&c0) == modified_combining_class::CCC17
38                || _hb_glyph_info_get_modified_combining_class(&c0) == modified_combining_class::CCC18) /* patach or qamats */
39                &&
40            (_hb_glyph_info_get_modified_combining_class(&c1) == modified_combining_class::CCC10
41                || _hb_glyph_info_get_modified_combining_class(&c1) == modified_combining_class::CCC14) /* sheva or hiriq */ &&
42            (_hb_glyph_info_get_modified_combining_class(&c2) == modified_combining_class::CCC22
43                || _hb_glyph_info_get_modified_combining_class(&c2) == CanonicalCombiningClass::Below as u8)
44        /* meteg or below */
45        {
46            buffer.merge_clusters(i - 1, i + 1);
47            buffer.info.swap(i - 1, i);
48            break;
49        }
50    }
51}
52
53const S_DAGESH_FORMS: &[char] = &[
54    '\u{FB30}', // ALEF
55    '\u{FB31}', // BET
56    '\u{FB32}', // GIMEL
57    '\u{FB33}', // DALET
58    '\u{FB34}', // HE
59    '\u{FB35}', // VAV
60    '\u{FB36}', // ZAYIN
61    '\u{0000}', // HET
62    '\u{FB38}', // TET
63    '\u{FB39}', // YOD
64    '\u{FB3A}', // FINAL KAF
65    '\u{FB3B}', // KAF
66    '\u{FB3C}', // LAMED
67    '\u{0000}', // FINAL MEM
68    '\u{FB3E}', // MEM
69    '\u{0000}', // FINAL NUN
70    '\u{FB40}', // NUN
71    '\u{FB41}', // SAMEKH
72    '\u{0000}', // AYIN
73    '\u{FB43}', // FINAL PE
74    '\u{FB44}', // PE
75    '\u{0000}', // FINAL TSADI
76    '\u{FB46}', // TSADI
77    '\u{FB47}', // QOF
78    '\u{FB48}', // RESH
79    '\u{FB49}', // SHIN
80    '\u{FB4A}', // TAV
81];
82
83fn compose(ctx: &hb_ot_shape_normalize_context_t, a: char, b: char) -> Option<char> {
84    // Hebrew presentation-form shaping.
85    // https://bugzilla.mozilla.org/show_bug.cgi?id=728866
86    // Hebrew presentation forms with dagesh, for characters U+05D0..05EA;
87    // Note that some letters do not have a dagesh presForm encoded.
88    match unicode::compose(a, b) {
89        Some(c) => Some(c),
90        None if !ctx.plan.has_gpos_mark => {
91            // Special-case Hebrew presentation forms that are excluded from
92            // standard normalization, but wanted for old fonts.
93            let a = a as u32;
94            let b = b as u32;
95            match b {
96                0x05B4 => {
97                    // HIRIQ
98                    match a {
99                        0x05D9 => Some('\u{FB1D}'), // YOD
100                        _ => None,
101                    }
102                }
103                0x05B7 => {
104                    // PATAH
105                    match a {
106                        0x05D9 => Some('\u{FB1F}'), // YIDDISH YOD YOD
107                        0x05D0 => Some('\u{FB2E}'), // ALEF
108                        _ => None,
109                    }
110                }
111                0x05B8 => {
112                    // QAMATS
113                    match a {
114                        0x05D0 => Some('\u{FB2F}'), // ALEF
115                        _ => None,
116                    }
117                }
118                0x05B9 => {
119                    // HOLAM
120                    match a {
121                        0x05D5 => Some('\u{FB4B}'), // VAV
122                        _ => None,
123                    }
124                }
125                0x05BC => {
126                    // DAGESH
127                    match a {
128                        0x05D0..=0x05EA => {
129                            let c = S_DAGESH_FORMS[a as usize - 0x05D0];
130                            if c != '\0' {
131                                Some(c)
132                            } else {
133                                None
134                            }
135                        }
136                        0xFB2A => Some('\u{FB2C}'), // SHIN WITH SHIN DOT
137                        0xFB2B => Some('\u{FB2D}'), // SHIN WITH SIN DOT
138                        _ => None,
139                    }
140                }
141                0x05BF => {
142                    // RAFE
143                    match a {
144                        0x05D1 => Some('\u{FB4C}'), // BET
145                        0x05DB => Some('\u{FB4D}'), // KAF
146                        0x05E4 => Some('\u{FB4E}'), // PE
147                        _ => None,
148                    }
149                }
150                0x05C1 => {
151                    // SHIN DOT
152                    match a {
153                        0x05E9 => Some('\u{FB2A}'), // SHIN
154                        0xFB49 => Some('\u{FB2C}'), // SHIN WITH DAGESH
155                        _ => None,
156                    }
157                }
158                0x05C2 => {
159                    // SIN DOT
160                    match a {
161                        0x05E9 => Some('\u{FB2B}'), // SHIN
162                        0xFB49 => Some('\u{FB2D}'), // SHIN WITH DAGESH
163                        _ => None,
164                    }
165                }
166                _ => None,
167            }
168        }
169        None => None,
170    }
171}