rustybuzz/hb/
ot_shaper_use.rs

1use alloc::boxed::Box;
2
3use super::algs::*;
4use super::buffer::hb_buffer_t;
5use super::ot_layout::*;
6use super::ot_map::*;
7use super::ot_shape::*;
8use super::ot_shape_normalize::*;
9use super::ot_shape_plan::hb_ot_shape_plan_t;
10use super::ot_shaper::*;
11use super::ot_shaper_arabic::arabic_shape_plan_t;
12use super::unicode::{CharExt, GeneralCategoryExt};
13use super::{hb_font_t, hb_glyph_info_t, hb_mask_t, hb_tag_t, script, Script};
14
15pub const UNIVERSAL_SHAPER: hb_ot_shaper_t = hb_ot_shaper_t {
16    collect_features: Some(collect_features),
17    override_features: None,
18    create_data: Some(|plan| Box::new(UniversalShapePlan::new(plan))),
19    preprocess_text: Some(preprocess_text),
20    postprocess_glyphs: None,
21    normalization_preference: HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT,
22    decompose: None,
23    compose: Some(compose),
24    setup_masks: Some(setup_masks),
25    gpos_tag: None,
26    reorder_marks: None,
27    zero_width_marks: HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_EARLY,
28    fallback_position: false,
29};
30
31pub type Category = u8;
32#[allow(dead_code)]
33pub mod category {
34    pub const O: u8 = 0; // OTHER
35
36    pub const B: u8 = 1; // BASE
37
38    // pub const IND: u8     = 3;    // BASE_IND
39
40    pub const N: u8 = 4; // BASE_NUM
41    pub const GB: u8 = 5; // BASE_OTHER
42    pub const CGJ: u8 = 6;
43
44    // pub const CGJ: u8     = 6;    // CGJ
45    // pub const F: u8       = 7;    // CONS_FINAL
46    // pub const FM: u8 = 8;         // CONS_FINAL_MOD
47    // pub const M: u8       = 9;    // CONS_MED
48    // pub const CM: u8      = 10;   // CONS_MOD
49
50    pub const SUB: u8 = 11; // CONS_SUB
51    pub const H: u8 = 12; // HALANT
52
53    pub const HN: u8 = 13; // HALANT_NUM
54    pub const ZWNJ: u8 = 14; // Zero width non-joiner
55
56    // pub const ZWJ: u8     = 15;   // Zero width joiner
57    pub const WJ: u8 = 16; // Word joiner
58
59    pub const RSV: u8 = 17; // Reserved characters
60    pub const R: u8 = 18; // REPHA
61    pub const S: u8 = 19; // SYM
62
63    // pub const SM: u8      = 20;   // SYM_MOD
64    // pub const VS: u8      = 21;   // VARIATION_SELECTOR
65    // pub const V: u8       = 36;   // VOWEL
66    // pub const VM: u8      = 40;   // VOWEL_MOD
67
68    pub const CS: u8 = 43; // CONS_WITH_STACKER
69
70    // https://github.com/harfbuzz/harfbuzz/issues/1102
71    pub const IS: u8 = 44; // HALANT_OR_VOWEL_MODIFIER
72
73    pub const Sk: u8 = 48; // SAKOT
74
75    pub const FAbv: u8 = 24; // CONS_FINAL_ABOVE
76    pub const FBlw: u8 = 25; // CONS_FINAL_BELOW
77    pub const FPst: u8 = 26; // CONS_FINAL_POST
78    pub const MAbv: u8 = 27; // CONS_MED_ABOVE
79    pub const MBlw: u8 = 28; // CONS_MED_BELOW
80    pub const MPst: u8 = 29; // CONS_MED_POST
81    pub const MPre: u8 = 30; // CONS_MED_PRE
82    pub const CMAbv: u8 = 31; // CONS_MOD_ABOVE
83    pub const CMBlw: u8 = 32; // CONS_MOD_BELOW
84    pub const VAbv: u8 = 33; // VOWEL_ABOVE / VOWEL_ABOVE_BELOW / VOWEL_ABOVE_BELOW_POST / VOWEL_ABOVE_POST
85    pub const VBlw: u8 = 34; // VOWEL_BELOW / VOWEL_BELOW_POST
86    pub const VPst: u8 = 35; // VOWEL_POST UIPC = Right
87    pub const VPre: u8 = 22; // VOWEL_PRE / VOWEL_PRE_ABOVE / VOWEL_PRE_ABOVE_POST / VOWEL_PRE_POST
88    pub const VMAbv: u8 = 37; // VOWEL_MOD_ABOVE
89    pub const VMBlw: u8 = 38; // VOWEL_MOD_BELOW
90    pub const VMPst: u8 = 39; // VOWEL_MOD_POST
91    pub const VMPre: u8 = 23; // VOWEL_MOD_PRE
92    pub const SMAbv: u8 = 41; // SYM_MOD_ABOVE
93    pub const SMBlw: u8 = 42; // SYM_MOD_BELOW
94    pub const FMAbv: u8 = 45; // CONS_FINAL_MOD UIPC = Top
95    pub const FMBlw: u8 = 46; // CONS_FINAL_MOD UIPC = Bottom
96    pub const FMPst: u8 = 47; // CONS_FINAL_MOD UIPC = Not_Applicable
97    pub const G: u8 = 49; // HIEROGLYPH
98    pub const J: u8 = 50; // HIEROGLYPH_JOINER
99    pub const SB: u8 = 51; // HIEROGLYPH_SEGMENT_BEGIN
100    pub const SE: u8 = 52; // HIEROGLYPH_SEGMENT_END
101    pub const HVM: u8 = 53; // HIEROGLYPH_SEGMENT_END
102    pub const HM: u8 = 54; // HIEROGLYPH_MOD
103    pub const HR: u8 = 55; // HIEROGLYPH_MIRROR
104}
105
106// These features are applied all at once, before reordering,
107// constrained to the syllable.
108const BASIC_FEATURES: &[hb_tag_t] = &[
109    hb_tag_t::from_bytes(b"rkrf"),
110    hb_tag_t::from_bytes(b"abvf"),
111    hb_tag_t::from_bytes(b"blwf"),
112    hb_tag_t::from_bytes(b"half"),
113    hb_tag_t::from_bytes(b"pstf"),
114    hb_tag_t::from_bytes(b"vatu"),
115    hb_tag_t::from_bytes(b"cjct"),
116];
117
118const TOPOGRAPHICAL_FEATURES: &[hb_tag_t] = &[
119    hb_tag_t::from_bytes(b"isol"),
120    hb_tag_t::from_bytes(b"init"),
121    hb_tag_t::from_bytes(b"medi"),
122    hb_tag_t::from_bytes(b"fina"),
123];
124
125// Same order as use_topographical_features.
126#[derive(Clone, Copy, PartialEq)]
127enum JoiningForm {
128    Isolated = 0,
129    Initial,
130    Medial,
131    Terminal,
132}
133
134// These features are applied all at once, after reordering and clearing syllables.
135const OTHER_FEATURES: &[hb_tag_t] = &[
136    hb_tag_t::from_bytes(b"abvs"),
137    hb_tag_t::from_bytes(b"blws"),
138    hb_tag_t::from_bytes(b"haln"),
139    hb_tag_t::from_bytes(b"pres"),
140    hb_tag_t::from_bytes(b"psts"),
141];
142
143impl hb_glyph_info_t {
144    pub(crate) fn use_category(&self) -> Category {
145        self.ot_shaper_var_u8_category()
146    }
147
148    fn set_use_category(&mut self, c: Category) {
149        self.set_ot_shaper_var_u8_category(c)
150    }
151
152    fn is_halant_use(&self) -> bool {
153        matches!(
154            self.use_category(),
155            category::H | category::HVM | category::IS
156        ) && !_hb_glyph_info_ligated(self)
157    }
158}
159
160struct UniversalShapePlan {
161    rphf_mask: hb_mask_t,
162    arabic_plan: Option<arabic_shape_plan_t>,
163}
164
165impl UniversalShapePlan {
166    fn new(plan: &hb_ot_shape_plan_t) -> UniversalShapePlan {
167        let mut arabic_plan = None;
168
169        if plan.script.map_or(false, has_arabic_joining) {
170            arabic_plan = Some(crate::hb::ot_shaper_arabic::data_create_arabic(plan));
171        }
172
173        UniversalShapePlan {
174            rphf_mask: plan.ot_map.get_1_mask(hb_tag_t::from_bytes(b"rphf")),
175            arabic_plan,
176        }
177    }
178}
179
180fn collect_features(planner: &mut hb_ot_shape_planner_t) {
181    // Do this before any lookups have been applied.
182    planner.ot_map.add_gsub_pause(Some(setup_syllables));
183
184    // Default glyph pre-processing group
185    planner
186        .ot_map
187        .enable_feature(hb_tag_t::from_bytes(b"locl"), F_PER_SYLLABLE, 1);
188    planner
189        .ot_map
190        .enable_feature(hb_tag_t::from_bytes(b"ccmp"), F_PER_SYLLABLE, 1);
191    planner
192        .ot_map
193        .enable_feature(hb_tag_t::from_bytes(b"nukt"), F_PER_SYLLABLE, 1);
194    planner.ot_map.enable_feature(
195        hb_tag_t::from_bytes(b"akhn"),
196        F_MANUAL_ZWJ | F_PER_SYLLABLE,
197        1,
198    );
199
200    // Reordering group
201    planner
202        .ot_map
203        .add_gsub_pause(Some(crate::hb::ot_layout::_hb_clear_substitution_flags));
204    planner.ot_map.add_feature(
205        hb_tag_t::from_bytes(b"rphf"),
206        F_MANUAL_ZWJ | F_PER_SYLLABLE,
207        1,
208    );
209    planner.ot_map.add_gsub_pause(Some(record_rphf));
210    planner
211        .ot_map
212        .add_gsub_pause(Some(crate::hb::ot_layout::_hb_clear_substitution_flags));
213    planner.ot_map.enable_feature(
214        hb_tag_t::from_bytes(b"pref"),
215        F_MANUAL_ZWJ | F_PER_SYLLABLE,
216        1,
217    );
218    planner.ot_map.add_gsub_pause(Some(record_pref));
219
220    // Orthographic unit shaping group
221    for feature in BASIC_FEATURES {
222        planner
223            .ot_map
224            .enable_feature(*feature, F_MANUAL_ZWJ | F_PER_SYLLABLE, 1);
225    }
226
227    planner.ot_map.add_gsub_pause(Some(reorder_use));
228    planner.ot_map.add_gsub_pause(Some(syllabic_clear_var)); // Don't need syllables anymore.
229
230    // Topographical features
231    for feature in TOPOGRAPHICAL_FEATURES {
232        planner.ot_map.add_feature(*feature, F_NONE, 1);
233    }
234    planner.ot_map.add_gsub_pause(None);
235
236    // Standard typographic presentation
237    for feature in OTHER_FEATURES {
238        planner.ot_map.enable_feature(*feature, F_MANUAL_ZWJ, 1);
239    }
240}
241
242fn setup_syllables(plan: &hb_ot_shape_plan_t, _: &hb_font_t, buffer: &mut hb_buffer_t) -> bool {
243    super::ot_shaper_use_machine::find_syllables(buffer);
244
245    foreach_syllable!(buffer, start, end, {
246        buffer.unsafe_to_break(Some(start), Some(end));
247    });
248
249    setup_rphf_mask(plan, buffer);
250    setup_topographical_masks(plan, buffer);
251
252    false
253}
254
255fn setup_rphf_mask(plan: &hb_ot_shape_plan_t, buffer: &mut hb_buffer_t) -> bool {
256    let universal_plan = plan.data::<UniversalShapePlan>();
257
258    let mask = universal_plan.rphf_mask;
259    if mask == 0 {
260        return false;
261    }
262
263    let mut start = 0;
264    let mut end = buffer.next_syllable(0);
265    while start < buffer.len {
266        let limit = if buffer.info[start].use_category() == category::R {
267            1
268        } else {
269            core::cmp::min(3, end - start)
270        };
271
272        for i in start..start + limit {
273            buffer.info[i].mask |= mask;
274        }
275
276        start = end;
277        end = buffer.next_syllable(start);
278    }
279
280    false
281}
282
283fn setup_topographical_masks(plan: &hb_ot_shape_plan_t, buffer: &mut hb_buffer_t) {
284    use super::ot_shaper_use_machine::SyllableType;
285
286    if plan.data::<UniversalShapePlan>().arabic_plan.is_some() {
287        return;
288    }
289
290    let mut masks = [0; 4];
291    let mut all_masks = 0;
292    for i in 0..4 {
293        masks[i] = plan.ot_map.get_1_mask(TOPOGRAPHICAL_FEATURES[i]);
294        if masks[i] == plan.ot_map.get_global_mask() {
295            masks[i] = 0;
296        }
297
298        all_masks |= masks[i];
299    }
300
301    if all_masks == 0 {
302        return;
303    }
304
305    let other_masks = !all_masks;
306
307    let mut last_start = 0;
308    let mut last_form = None;
309    let mut start = 0;
310    let mut end = buffer.next_syllable(0);
311    while start < buffer.len {
312        let syllable = buffer.info[start].syllable() & 0x0F;
313        if syllable == SyllableType::HieroglyphCluster as u8
314            || syllable == SyllableType::NonCluster as u8
315        {
316            last_form = None;
317        } else {
318            let join = last_form == Some(JoiningForm::Terminal)
319                || last_form == Some(JoiningForm::Isolated);
320
321            if join {
322                // Fixup previous syllable's form.
323                let form = if last_form == Some(JoiningForm::Terminal) {
324                    JoiningForm::Medial
325                } else {
326                    JoiningForm::Initial
327                };
328
329                for i in last_start..start {
330                    buffer.info[i].mask =
331                        (buffer.info[i].mask & other_masks) | masks[form as usize];
332                }
333            }
334
335            // Form for this syllable.
336            let form = if join {
337                JoiningForm::Terminal
338            } else {
339                JoiningForm::Isolated
340            };
341            last_form = Some(form);
342            for i in start..end {
343                buffer.info[i].mask = (buffer.info[i].mask & other_masks) | masks[form as usize];
344            }
345        }
346
347        last_start = start;
348        start = end;
349        end = buffer.next_syllable(start);
350    }
351}
352
353fn record_rphf(plan: &hb_ot_shape_plan_t, _: &hb_font_t, buffer: &mut hb_buffer_t) -> bool {
354    let universal_plan = plan.data::<UniversalShapePlan>();
355
356    let mask = universal_plan.rphf_mask;
357    if mask == 0 {
358        return false;
359    }
360
361    let mut start = 0;
362    let mut end = buffer.next_syllable(0);
363    while start < buffer.len {
364        // Mark a substituted repha as USE_R.
365        for i in start..end {
366            if buffer.info[i].mask & mask == 0 {
367                break;
368            }
369
370            if _hb_glyph_info_substituted(&buffer.info[i]) {
371                buffer.info[i].set_use_category(category::R);
372                break;
373            }
374        }
375
376        start = end;
377        end = buffer.next_syllable(start);
378    }
379
380    false
381}
382
383fn reorder_use(_: &hb_ot_shape_plan_t, face: &hb_font_t, buffer: &mut hb_buffer_t) -> bool {
384    use super::ot_shaper_use_machine::SyllableType;
385
386    let mut ret = false;
387
388    if crate::hb::ot_shaper_syllabic::insert_dotted_circles(
389        face,
390        buffer,
391        SyllableType::BrokenCluster as u8,
392        category::B,
393        Some(category::R),
394        None,
395    ) {
396        ret = true;
397    }
398
399    let mut start = 0;
400    let mut end = buffer.next_syllable(0);
401    while start < buffer.len {
402        reorder_syllable_use(start, end, buffer);
403        start = end;
404        end = buffer.next_syllable(start);
405    }
406
407    ret
408}
409
410const fn category_flag(c: Category) -> u32 {
411    rb_flag(c as u32)
412}
413
414const fn category_flag64(c: Category) -> u64 {
415    rb_flag64(c as u32)
416}
417
418const POST_BASE_FLAGS: u64 = category_flag64(category::FAbv)
419    | category_flag64(category::FBlw)
420    | category_flag64(category::FPst)
421    | category_flag64(category::FMAbv)
422    | category_flag64(category::FMBlw)
423    | category_flag64(category::FMPst)
424    | category_flag64(category::MAbv)
425    | category_flag64(category::MBlw)
426    | category_flag64(category::MPst)
427    | category_flag64(category::MPre)
428    | category_flag64(category::VAbv)
429    | category_flag64(category::VBlw)
430    | category_flag64(category::VPst)
431    | category_flag64(category::VPre)
432    | category_flag64(category::VMAbv)
433    | category_flag64(category::VMBlw)
434    | category_flag64(category::VMPst)
435    | category_flag64(category::VMPre);
436
437fn reorder_syllable_use(start: usize, end: usize, buffer: &mut hb_buffer_t) {
438    use super::ot_shaper_use_machine::SyllableType;
439
440    let syllable_type = (buffer.info[start].syllable() & 0x0F) as u32;
441    // Only a few syllable types need reordering.
442    if (rb_flag_unsafe(syllable_type)
443        & (rb_flag(SyllableType::ViramaTerminatedCluster as u32)
444            | rb_flag(SyllableType::SakotTerminatedCluster as u32)
445            | rb_flag(SyllableType::StandardCluster as u32)
446            | rb_flag(SyllableType::BrokenCluster as u32)
447            | 0))
448        == 0
449    {
450        return;
451    }
452
453    // Move things forward.
454    if buffer.info[start].use_category() == category::R && end - start > 1 {
455        // Got a repha.  Reorder it towards the end, but before the first post-base glyph.
456        for i in start + 1..end {
457            let is_post_base_glyph =
458                (rb_flag64_unsafe(buffer.info[i].use_category() as u32) & POST_BASE_FLAGS) != 0
459                    || buffer.info[i].is_halant_use();
460
461            if is_post_base_glyph || i == end - 1 {
462                // If we hit a post-base glyph, move before it; otherwise move to the
463                // end. Shift things in between backward.
464
465                let mut i = i;
466                if is_post_base_glyph {
467                    i -= 1;
468                }
469
470                buffer.merge_clusters(start, i + 1);
471                let t = buffer.info[start];
472                for k in 0..i - start {
473                    buffer.info[k + start] = buffer.info[k + start + 1];
474                }
475                buffer.info[i] = t;
476
477                break;
478            }
479        }
480    }
481
482    // Move things back.
483    let mut j = start;
484    for i in start..end {
485        let flag = rb_flag_unsafe(buffer.info[i].use_category() as u32);
486        if buffer.info[i].is_halant_use() {
487            // If we hit a halant, move after it; otherwise move to the beginning, and
488            // shift things in between forward.
489            j = i + 1;
490        } else if (flag & (category_flag(category::VPre) | category_flag(category::VMPre))) != 0
491            && _hb_glyph_info_get_lig_comp(&buffer.info[i]) == 0
492            && j < i
493        {
494            // Only move the first component of a MultipleSubst.
495            buffer.merge_clusters(j, i + 1);
496            let t = buffer.info[i];
497            for k in (0..i - j).rev() {
498                buffer.info[k + j + 1] = buffer.info[k + j];
499            }
500            buffer.info[j] = t;
501        }
502    }
503}
504
505fn record_pref(_: &hb_ot_shape_plan_t, _: &hb_font_t, buffer: &mut hb_buffer_t) -> bool {
506    let mut start = 0;
507    let mut end = buffer.next_syllable(0);
508    while start < buffer.len {
509        // Mark a substituted pref as VPre, as they behave the same way.
510        for i in start..end {
511            if _hb_glyph_info_substituted(&buffer.info[i]) {
512                buffer.info[i].set_use_category(category::VPre);
513                break;
514            }
515        }
516
517        start = end;
518        end = buffer.next_syllable(start);
519    }
520
521    false
522}
523
524fn has_arabic_joining(script: Script) -> bool {
525    // List of scripts that have data in arabic-table.
526    matches!(
527        script,
528        script::ADLAM
529            | script::ARABIC
530            | script::CHORASMIAN
531            | script::HANIFI_ROHINGYA
532            | script::MANDAIC
533            | script::MANICHAEAN
534            | script::MONGOLIAN
535            | script::NKO
536            | script::OLD_UYGHUR
537            | script::PHAGS_PA
538            | script::PSALTER_PAHLAVI
539            | script::SOGDIAN
540            | script::SYRIAC
541    )
542}
543
544fn preprocess_text(_: &hb_ot_shape_plan_t, _: &hb_font_t, buffer: &mut hb_buffer_t) {
545    super::ot_shaper_vowel_constraints::preprocess_text_vowel_constraints(buffer);
546}
547
548fn compose(_: &hb_ot_shape_normalize_context_t, a: char, b: char) -> Option<char> {
549    // Avoid recomposing split matras.
550    if a.general_category().is_mark() {
551        return None;
552    }
553
554    crate::hb::unicode::compose(a, b)
555}
556
557fn setup_masks(plan: &hb_ot_shape_plan_t, _: &hb_font_t, buffer: &mut hb_buffer_t) {
558    let universal_plan = plan.data::<UniversalShapePlan>();
559
560    // Do this before allocating use_category().
561    if let Some(ref arabic_plan) = universal_plan.arabic_plan {
562        crate::hb::ot_shaper_arabic::setup_masks_inner(arabic_plan, plan.script, buffer);
563    }
564
565    // We cannot setup masks here. We save information about characters
566    // and setup masks later on in a pause-callback.
567    for info in buffer.info_slice_mut() {
568        info.set_use_category(super::ot_shaper_use_table::hb_use_get_category(
569            info.glyph_id,
570        ));
571    }
572}