rustybuzz/hb/
ot_shaper_khmer.rs

1use alloc::boxed::Box;
2
3use super::buffer::hb_buffer_t;
4use super::ot_map::*;
5use super::ot_shape::*;
6use super::ot_shape_normalize::*;
7use super::ot_shape_plan::hb_ot_shape_plan_t;
8use super::ot_shaper::*;
9use super::ot_shaper_indic::ot_category_t;
10use super::unicode::{CharExt, GeneralCategoryExt};
11use super::{hb_font_t, hb_glyph_info_t, hb_mask_t, hb_tag_t};
12
13pub const KHMER_SHAPER: hb_ot_shaper_t = hb_ot_shaper_t {
14    collect_features: Some(collect_features),
15    override_features: Some(override_features),
16    create_data: Some(|plan| Box::new(KhmerShapePlan::new(plan))),
17    preprocess_text: None,
18    postprocess_glyphs: None,
19    normalization_preference: HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT,
20    decompose: Some(decompose),
21    compose: Some(compose),
22    setup_masks: Some(setup_masks),
23    gpos_tag: None,
24    reorder_marks: None,
25    zero_width_marks: HB_OT_SHAPE_ZERO_WIDTH_MARKS_NONE,
26    fallback_position: false,
27};
28
29const KHMER_FEATURES: &[(hb_tag_t, hb_ot_map_feature_flags_t)] = &[
30    // Basic features.
31    // These features are applied all at once, before reordering, constrained
32    // to the syllable.
33    (
34        hb_tag_t::from_bytes(b"pref"),
35        F_MANUAL_JOINERS | F_PER_SYLLABLE,
36    ),
37    (
38        hb_tag_t::from_bytes(b"blwf"),
39        F_MANUAL_JOINERS | F_PER_SYLLABLE,
40    ),
41    (
42        hb_tag_t::from_bytes(b"abvf"),
43        F_MANUAL_JOINERS | F_PER_SYLLABLE,
44    ),
45    (
46        hb_tag_t::from_bytes(b"pstf"),
47        F_MANUAL_JOINERS | F_PER_SYLLABLE,
48    ),
49    (
50        hb_tag_t::from_bytes(b"cfar"),
51        F_MANUAL_JOINERS | F_PER_SYLLABLE,
52    ),
53    // Other features.
54    // These features are applied all at once after clearing syllables.
55    (hb_tag_t::from_bytes(b"pres"), F_GLOBAL_MANUAL_JOINERS),
56    (hb_tag_t::from_bytes(b"abvs"), F_GLOBAL_MANUAL_JOINERS),
57    (hb_tag_t::from_bytes(b"blws"), F_GLOBAL_MANUAL_JOINERS),
58    (hb_tag_t::from_bytes(b"psts"), F_GLOBAL_MANUAL_JOINERS),
59];
60
61// Must be in the same order as the KHMER_FEATURES array.
62mod khmer_feature {
63    pub const PREF: usize = 0;
64    pub const BLWF: usize = 1;
65    pub const ABVF: usize = 2;
66    pub const PSTF: usize = 3;
67    pub const CFAR: usize = 4;
68}
69
70impl hb_glyph_info_t {
71    fn set_khmer_properties(&mut self) {
72        let u = self.glyph_id;
73        let (cat, _) = crate::hb::ot_shaper_indic_table::get_categories(u);
74
75        self.set_indic_category(cat);
76    }
77}
78
79struct KhmerShapePlan {
80    mask_array: [hb_mask_t; KHMER_FEATURES.len()],
81}
82
83impl KhmerShapePlan {
84    fn new(plan: &hb_ot_shape_plan_t) -> Self {
85        let mut mask_array = [0; KHMER_FEATURES.len()];
86        for (i, feature) in KHMER_FEATURES.iter().enumerate() {
87            mask_array[i] = if feature.1 & F_GLOBAL != 0 {
88                0
89            } else {
90                plan.ot_map.get_1_mask(feature.0)
91            }
92        }
93
94        KhmerShapePlan { mask_array }
95    }
96}
97
98fn collect_features(planner: &mut hb_ot_shape_planner_t) {
99    // Do this before any lookups have been applied.
100    planner.ot_map.add_gsub_pause(Some(setup_syllables));
101    planner.ot_map.add_gsub_pause(Some(reorder_khmer));
102
103    // Testing suggests that Uniscribe does NOT pause between basic
104    // features.  Test with KhmerUI.ttf and the following three
105    // sequences:
106    //
107    //   U+1789,U+17BC
108    //   U+1789,U+17D2,U+1789
109    //   U+1789,U+17D2,U+1789,U+17BC
110    //
111    // https://github.com/harfbuzz/harfbuzz/issues/974
112    planner
113        .ot_map
114        .enable_feature(hb_tag_t::from_bytes(b"locl"), F_PER_SYLLABLE, 1);
115    planner
116        .ot_map
117        .enable_feature(hb_tag_t::from_bytes(b"ccmp"), F_PER_SYLLABLE, 1);
118
119    for feature in KHMER_FEATURES.iter().take(5) {
120        planner.ot_map.add_feature(feature.0, feature.1, 1);
121    }
122
123    /* https://github.com/harfbuzz/harfbuzz/issues/3531 */
124    planner.ot_map.add_gsub_pause(Some(syllabic_clear_var)); // Don't need syllables anymore.
125
126    for feature in KHMER_FEATURES.iter().skip(5) {
127        planner.ot_map.add_feature(feature.0, feature.1, 1);
128    }
129}
130
131fn setup_syllables(_: &hb_ot_shape_plan_t, _: &hb_font_t, buffer: &mut hb_buffer_t) -> bool {
132    super::ot_shaper_khmer_machine::find_syllables_khmer(buffer);
133
134    let mut start = 0;
135    let mut end = buffer.next_syllable(0);
136    while start < buffer.len {
137        buffer.unsafe_to_break(Some(start), Some(end));
138        start = end;
139        end = buffer.next_syllable(start);
140    }
141
142    false
143}
144
145fn reorder_khmer(plan: &hb_ot_shape_plan_t, face: &hb_font_t, buffer: &mut hb_buffer_t) -> bool {
146    use super::ot_shaper_khmer_machine::SyllableType;
147
148    let mut ret = false;
149
150    if super::ot_shaper_syllabic::insert_dotted_circles(
151        face,
152        buffer,
153        SyllableType::BrokenCluster as u8,
154        ot_category_t::OT_DOTTEDCIRCLE,
155        Some(ot_category_t::OT_Repha),
156        None,
157    ) {
158        ret = true;
159    }
160
161    let khmer_plan = plan.data::<KhmerShapePlan>();
162
163    let mut start = 0;
164    let mut end = buffer.next_syllable(0);
165    while start < buffer.len {
166        reorder_syllable_khmer(khmer_plan, start, end, buffer);
167        start = end;
168        end = buffer.next_syllable(start);
169    }
170
171    ret
172}
173
174fn reorder_syllable_khmer(
175    khmer_plan: &KhmerShapePlan,
176    start: usize,
177    end: usize,
178    buffer: &mut hb_buffer_t,
179) {
180    use super::ot_shaper_khmer_machine::SyllableType;
181
182    let syllable_type = match buffer.info[start].syllable() & 0x0F {
183        0 => SyllableType::ConsonantSyllable,
184        1 => SyllableType::BrokenCluster,
185        2 => SyllableType::NonKhmerCluster,
186        _ => unreachable!(),
187    };
188
189    match syllable_type {
190        SyllableType::ConsonantSyllable | SyllableType::BrokenCluster => {
191            reorder_consonant_syllable(khmer_plan, start, end, buffer);
192        }
193        SyllableType::NonKhmerCluster => {}
194    }
195}
196
197// Rules from:
198// https://docs.microsoft.com/en-us/typography/script-development/devanagari
199fn reorder_consonant_syllable(
200    plan: &KhmerShapePlan,
201    start: usize,
202    end: usize,
203    buffer: &mut hb_buffer_t,
204) {
205    // Setup masks.
206    {
207        // Post-base
208        let mask = plan.mask_array[khmer_feature::BLWF]
209            | plan.mask_array[khmer_feature::ABVF]
210            | plan.mask_array[khmer_feature::PSTF];
211        for info in &mut buffer.info[start + 1..end] {
212            info.mask |= mask;
213        }
214    }
215
216    let mut num_coengs = 0;
217    for i in start + 1..end {
218        // When a COENG + (Cons | IndV) combination are found (and subscript count
219        // is less than two) the character combination is handled according to the
220        // subscript type of the character following the COENG.
221        //
222        // ...
223        //
224        // Subscript Type 2 - The COENG + RO characters are reordered to immediately
225        // before the base glyph. Then the COENG + RO characters are assigned to have
226        // the 'pref' OpenType feature applied to them.
227        if buffer.info[i].indic_category() == ot_category_t::OT_H && num_coengs <= 2 && i + 1 < end
228        {
229            num_coengs += 1;
230
231            if buffer.info[i + 1].indic_category() == ot_category_t::OT_Ra {
232                for j in 0..2 {
233                    buffer.info[i + j].mask |= plan.mask_array[khmer_feature::PREF];
234                }
235
236                // Move the Coeng,Ro sequence to the start.
237                buffer.merge_clusters(start, i + 2);
238                let t0 = buffer.info[i];
239                let t1 = buffer.info[i + 1];
240                for k in (0..i - start).rev() {
241                    buffer.info[k + start + 2] = buffer.info[k + start];
242                }
243
244                buffer.info[start] = t0;
245                buffer.info[start + 1] = t1;
246
247                // Mark the subsequent stuff with 'cfar'.  Used in Khmer.
248                // Read the feature spec.
249                // This allows distinguishing the following cases with MS Khmer fonts:
250                // U+1784,U+17D2,U+179A,U+17D2,U+1782
251                // U+1784,U+17D2,U+1782,U+17D2,U+179A
252                if plan.mask_array[khmer_feature::CFAR] != 0 {
253                    for j in i + 2..end {
254                        buffer.info[j].mask |= plan.mask_array[khmer_feature::CFAR];
255                    }
256                }
257
258                num_coengs = 2; // Done.
259            }
260        } else if buffer.info[i].indic_category() == ot_category_t::OT_VPre {
261            // Reorder left matra piece.
262
263            // Move to the start.
264            buffer.merge_clusters(start, i + 1);
265            let t = buffer.info[i];
266            for k in (0..i - start).rev() {
267                buffer.info[k + start + 1] = buffer.info[k + start];
268            }
269            buffer.info[start] = t;
270        }
271    }
272}
273
274fn override_features(planner: &mut hb_ot_shape_planner_t) {
275    // Khmer spec has 'clig' as part of required shaping features:
276    // "Apply feature 'clig' to form ligatures that are desired for
277    // typographical correctness.", hence in overrides...
278    planner
279        .ot_map
280        .enable_feature(hb_tag_t::from_bytes(b"clig"), F_NONE, 1);
281
282    planner
283        .ot_map
284        .disable_feature(hb_tag_t::from_bytes(b"liga"));
285}
286
287fn decompose(_: &hb_ot_shape_normalize_context_t, ab: char) -> Option<(char, char)> {
288    // Decompose split matras that don't have Unicode decompositions.
289    match ab {
290        '\u{17BE}' | '\u{17BF}' | '\u{17C0}' | '\u{17C4}' | '\u{17C5}' => Some(('\u{17C1}', ab)),
291        _ => crate::hb::unicode::decompose(ab),
292    }
293}
294
295fn compose(_: &hb_ot_shape_normalize_context_t, a: char, b: char) -> Option<char> {
296    // Avoid recomposing split matras.
297    if a.general_category().is_mark() {
298        return None;
299    }
300
301    crate::hb::unicode::compose(a, b)
302}
303
304fn setup_masks(_: &hb_ot_shape_plan_t, _: &hb_font_t, buffer: &mut hb_buffer_t) {
305    // We cannot setup masks here.  We save information about characters
306    // and setup masks later on in a pause-callback.
307    for info in buffer.info_slice_mut() {
308        info.set_khmer_properties();
309    }
310}