rustybuzz/hb/
ot_shaper_indic.rs

1use alloc::boxed::Box;
2use core::cmp;
3use core::convert::TryFrom;
4use core::ops::Range;
5
6use ttf_parser::GlyphId;
7
8use super::algs::*;
9use super::buffer::hb_buffer_t;
10use super::ot_layout::*;
11use super::ot_layout_gsubgpos::{WouldApply, WouldApplyContext};
12use super::ot_map::*;
13use super::ot_shape::*;
14use super::ot_shape_normalize::*;
15use super::ot_shape_plan::hb_ot_shape_plan_t;
16use super::ot_shaper::*;
17use super::unicode::{hb_gc, CharExt, GeneralCategoryExt};
18use super::{hb_font_t, hb_glyph_info_t, hb_mask_t, hb_tag_t, script, Script};
19
20pub const INDIC_SHAPER: hb_ot_shaper_t = hb_ot_shaper_t {
21    collect_features: Some(collect_features),
22    override_features: Some(override_features),
23    create_data: Some(|plan| Box::new(IndicShapePlan::new(plan))),
24    preprocess_text: Some(preprocess_text),
25    postprocess_glyphs: None,
26    normalization_preference: HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT,
27    decompose: Some(decompose),
28    compose: Some(compose),
29    setup_masks: Some(setup_masks),
30    gpos_tag: None,
31    reorder_marks: None,
32    zero_width_marks: HB_OT_SHAPE_ZERO_WIDTH_MARKS_NONE,
33    fallback_position: false,
34};
35
36pub type Category = u8;
37
38// This mod doesn't exist in harfbuzz anymore. Instead, the corresponding values are auto-generated
39// by the various machines and stored in `hb-ot-shaper-indic-table`. This means that when updating the
40// values in the machines, we also need to update them here.
41#[allow(dead_code)]
42pub mod ot_category_t {
43    pub const OT_X: u8 = 0;
44    pub const OT_C: u8 = 1;
45    pub const OT_V: u8 = 2;
46    pub const OT_N: u8 = 3;
47    pub const OT_H: u8 = 4;
48    pub const OT_ZWNJ: u8 = 5;
49    pub const OT_ZWJ: u8 = 6;
50    pub const OT_M: u8 = 7;
51    pub const OT_SM: u8 = 8;
52    pub const OT_A: u8 = 9;
53    pub const OT_VD: u8 = OT_A;
54    pub const OT_PLACEHOLDER: u8 = 10;
55    pub const OT_GB: u8 = OT_PLACEHOLDER;
56    pub const OT_DOTTEDCIRCLE: u8 = 11;
57    pub const OT_RS: u8 = 12; // Register Shifter, used in Khmer OT spec.
58    pub const OT_MPst: u8 = 13;
59    pub const OT_Repha: u8 = 14; // Atomically-encoded logical or visual repha.
60    pub const OT_Ra: u8 = 15;
61    pub const OT_CM: u8 = 16; // Consonant-Medial.
62    pub const OT_Symbol: u8 = 17; // Avagraha, etc that take marks (SM,A,VD).
63    pub const OT_CS: u8 = 18;
64
65    /* Khmer & Myanmar shapers. */
66    pub const OT_VAbv: u8 = 20;
67    pub const OT_VBlw: u8 = 21;
68    pub const OT_VPre: u8 = 22;
69    pub const OT_VPst: u8 = 23;
70
71    /* Khmer. */
72    pub const OT_Robatic: u8 = 25;
73    pub const OT_Xgroup: u8 = 26;
74    pub const OT_Ygroup: u8 = 27;
75
76    /* Myanmar */
77    pub const OT_As: u8 = 32; // Asat
78    pub const OT_MH: u8 = 35; // Medial
79    pub const OT_MR: u8 = 36; // Medial
80    pub const OT_MW: u8 = 37; // Medial
81    pub const OT_MY: u8 = 38; // Medial
82    pub const OT_PT: u8 = 39; // Pwo and other tones
83    pub const OT_VS: u8 = 40; // Variation selectors
84    pub const OT_ML: u8 = 41; // Consonant medials
85
86    // This one doesn't exist in ot_category_t in harfbuzz, only in
87    // the Myanmar machine. However, in Rust we unfortunately can't export
88    // inside the Ragel file, so we have to define it here as well. Needs to
89    // be kept in sync with the value in the machine.
90    pub const IV: u8 = 2;
91}
92
93pub type Position = u8;
94pub mod ot_position_t {
95    pub const POS_START: u8 = 0;
96
97    pub const POS_RA_TO_BECOME_REPH: u8 = 1;
98    pub const POS_PRE_M: u8 = 2;
99    pub const POS_PRE_C: u8 = 3;
100
101    pub const POS_BASE_C: u8 = 4;
102    pub const POS_AFTER_MAIN: u8 = 5;
103
104    pub const POS_ABOVE_C: u8 = 6;
105
106    pub const POS_BEFORE_SUB: u8 = 7;
107    pub const POS_BELOW_C: u8 = 8;
108    pub const POS_AFTER_SUB: u8 = 9;
109
110    pub const POS_BEFORE_POST: u8 = 10;
111    pub const POS_POST_C: u8 = 11;
112    pub const POS_AFTER_POST: u8 = 12;
113
114    pub const POS_SMVD: u8 = 13;
115
116    pub const POS_END: u8 = 14;
117}
118
119const INDIC_FEATURES: &[(hb_tag_t, hb_ot_map_feature_flags_t)] = &[
120    // Basic features.
121    // These features are applied in order, one at a time, after initial_reordering,
122    // constrained to the syllable.
123    (
124        hb_tag_t::from_bytes(b"nukt"),
125        F_GLOBAL_MANUAL_JOINERS | F_PER_SYLLABLE,
126    ),
127    (
128        hb_tag_t::from_bytes(b"akhn"),
129        F_GLOBAL_MANUAL_JOINERS | F_PER_SYLLABLE,
130    ),
131    (
132        hb_tag_t::from_bytes(b"rphf"),
133        F_MANUAL_JOINERS | F_PER_SYLLABLE,
134    ),
135    (
136        hb_tag_t::from_bytes(b"rkrf"),
137        F_GLOBAL_MANUAL_JOINERS | F_PER_SYLLABLE,
138    ),
139    (
140        hb_tag_t::from_bytes(b"pref"),
141        F_MANUAL_JOINERS | F_PER_SYLLABLE,
142    ),
143    (
144        hb_tag_t::from_bytes(b"blwf"),
145        F_MANUAL_JOINERS | F_PER_SYLLABLE,
146    ),
147    (
148        hb_tag_t::from_bytes(b"abvf"),
149        F_MANUAL_JOINERS | F_PER_SYLLABLE,
150    ),
151    (
152        hb_tag_t::from_bytes(b"half"),
153        F_MANUAL_JOINERS | F_PER_SYLLABLE,
154    ),
155    (
156        hb_tag_t::from_bytes(b"pstf"),
157        F_MANUAL_JOINERS | F_PER_SYLLABLE,
158    ),
159    (
160        hb_tag_t::from_bytes(b"vatu"),
161        F_GLOBAL_MANUAL_JOINERS | F_PER_SYLLABLE,
162    ),
163    (
164        hb_tag_t::from_bytes(b"cjct"),
165        F_GLOBAL_MANUAL_JOINERS | F_PER_SYLLABLE,
166    ),
167    // Other features.
168    // These features are applied all at once, after final_reordering, constrained
169    // to the syllable.
170    // Default Bengali font in Windows for example has intermixed
171    // lookups for init,pres,abvs,blws features.
172    (
173        hb_tag_t::from_bytes(b"init"),
174        F_MANUAL_JOINERS | F_PER_SYLLABLE,
175    ),
176    (
177        hb_tag_t::from_bytes(b"pres"),
178        F_GLOBAL_MANUAL_JOINERS | F_PER_SYLLABLE,
179    ),
180    (
181        hb_tag_t::from_bytes(b"abvs"),
182        F_GLOBAL_MANUAL_JOINERS | F_PER_SYLLABLE,
183    ),
184    (
185        hb_tag_t::from_bytes(b"blws"),
186        F_GLOBAL_MANUAL_JOINERS | F_PER_SYLLABLE,
187    ),
188    (
189        hb_tag_t::from_bytes(b"psts"),
190        F_GLOBAL_MANUAL_JOINERS | F_PER_SYLLABLE,
191    ),
192    (
193        hb_tag_t::from_bytes(b"haln"),
194        F_GLOBAL_MANUAL_JOINERS | F_PER_SYLLABLE,
195    ),
196];
197
198// Must be in the same order as the INDIC_FEATURES array.
199#[allow(dead_code)]
200mod indic_feature {
201    pub const NUKT: usize = 0;
202    pub const AKHN: usize = 1;
203    pub const RPHF: usize = 2;
204    pub const RKRF: usize = 3;
205    pub const PREF: usize = 4;
206    pub const BLWF: usize = 5;
207    pub const ABVF: usize = 6;
208    pub const HALF: usize = 7;
209    pub const PSTF: usize = 8;
210    pub const VATU: usize = 9;
211    pub const CJCT: usize = 10;
212    pub const INIT: usize = 11;
213    pub const PRES: usize = 12;
214    pub const ABVS: usize = 13;
215    pub const BLWS: usize = 14;
216    pub const PSTS: usize = 15;
217    pub const HALN: usize = 16;
218}
219
220pub(crate) const fn category_flag(c: Category) -> u32 {
221    rb_flag(c as u32)
222}
223
224// Note:
225//
226// We treat Vowels and placeholders as if they were consonants.  This is safe because Vowels
227// cannot happen in a consonant syllable.  The plus side however is, we can call the
228// consonant syllable logic from the vowel syllable function and get it all right!
229const CONSONANT_FLAGS_INDIC: u32 = category_flag(ot_category_t::OT_C)
230    | category_flag(ot_category_t::OT_CS)
231    | category_flag(ot_category_t::OT_Ra)
232    | category_flag(ot_category_t::OT_CM)
233    | category_flag(ot_category_t::OT_V)
234    | category_flag(ot_category_t::OT_PLACEHOLDER)
235    | category_flag(ot_category_t::OT_DOTTEDCIRCLE);
236
237const CONSONANT_FLAGS_MYANMAR: u32 = category_flag(ot_category_t::OT_C)
238    | category_flag(ot_category_t::OT_CS)
239    | category_flag(ot_category_t::OT_Ra)
240    // | category_flag(ot_category_t::OT_CM)
241    | category_flag(ot_category_t::IV)
242    | category_flag(ot_category_t::OT_GB)
243    | category_flag(ot_category_t::OT_DOTTEDCIRCLE);
244
245const JOINER_FLAGS: u32 =
246    category_flag(ot_category_t::OT_ZWJ) | category_flag(ot_category_t::OT_ZWNJ);
247
248#[derive(Clone, Copy, PartialEq)]
249enum RephPosition {
250    AfterMain = ot_position_t::POS_AFTER_MAIN as isize,
251    BeforeSub = ot_position_t::POS_BEFORE_SUB as isize,
252    AfterSub = ot_position_t::POS_AFTER_SUB as isize,
253    BeforePost = ot_position_t::POS_BEFORE_POST as isize,
254    AfterPost = ot_position_t::POS_AFTER_POST as isize,
255}
256
257#[derive(Clone, Copy, PartialEq)]
258enum RephMode {
259    /// Reph formed out of initial Ra,H sequence.
260    Implicit,
261    /// Reph formed out of initial Ra,H,ZWJ sequence.
262    Explicit,
263    /// Encoded Repha character, needs reordering.
264    LogRepha,
265}
266
267#[derive(Clone, Copy, PartialEq)]
268enum BlwfMode {
269    /// Below-forms feature applied to pre-base and post-base.
270    PreAndPost,
271    /// Below-forms feature applied to post-base only.
272    PostOnly,
273}
274
275#[derive(Clone, Copy)]
276struct IndicConfig {
277    script: Option<Script>,
278    has_old_spec: bool,
279    virama: u32,
280    reph_pos: RephPosition,
281    reph_mode: RephMode,
282    blwf_mode: BlwfMode,
283}
284
285impl IndicConfig {
286    const fn new(
287        script: Option<Script>,
288        has_old_spec: bool,
289        virama: u32,
290        reph_pos: RephPosition,
291        reph_mode: RephMode,
292        blwf_mode: BlwfMode,
293    ) -> Self {
294        IndicConfig {
295            script,
296            has_old_spec,
297            virama,
298            reph_pos,
299            reph_mode,
300            blwf_mode,
301        }
302    }
303}
304
305const INDIC_CONFIGS: &[IndicConfig] = &[
306    IndicConfig::new(
307        None,
308        false,
309        0,
310        RephPosition::BeforePost,
311        RephMode::Implicit,
312        BlwfMode::PreAndPost,
313    ),
314    IndicConfig::new(
315        Some(script::DEVANAGARI),
316        true,
317        0x094D,
318        RephPosition::BeforePost,
319        RephMode::Implicit,
320        BlwfMode::PreAndPost,
321    ),
322    IndicConfig::new(
323        Some(script::BENGALI),
324        true,
325        0x09CD,
326        RephPosition::AfterSub,
327        RephMode::Implicit,
328        BlwfMode::PreAndPost,
329    ),
330    IndicConfig::new(
331        Some(script::GURMUKHI),
332        true,
333        0x0A4D,
334        RephPosition::BeforeSub,
335        RephMode::Implicit,
336        BlwfMode::PreAndPost,
337    ),
338    IndicConfig::new(
339        Some(script::GUJARATI),
340        true,
341        0x0ACD,
342        RephPosition::BeforePost,
343        RephMode::Implicit,
344        BlwfMode::PreAndPost,
345    ),
346    IndicConfig::new(
347        Some(script::ORIYA),
348        true,
349        0x0B4D,
350        RephPosition::AfterMain,
351        RephMode::Implicit,
352        BlwfMode::PreAndPost,
353    ),
354    IndicConfig::new(
355        Some(script::TAMIL),
356        true,
357        0x0BCD,
358        RephPosition::AfterPost,
359        RephMode::Implicit,
360        BlwfMode::PreAndPost,
361    ),
362    IndicConfig::new(
363        Some(script::TELUGU),
364        true,
365        0x0C4D,
366        RephPosition::AfterPost,
367        RephMode::Explicit,
368        BlwfMode::PostOnly,
369    ),
370    IndicConfig::new(
371        Some(script::KANNADA),
372        true,
373        0x0CCD,
374        RephPosition::AfterPost,
375        RephMode::Implicit,
376        BlwfMode::PostOnly,
377    ),
378    IndicConfig::new(
379        Some(script::MALAYALAM),
380        true,
381        0x0D4D,
382        RephPosition::AfterMain,
383        RephMode::LogRepha,
384        BlwfMode::PreAndPost,
385    ),
386    IndicConfig::new(
387        Some(script::SINHALA),
388        false,
389        0x0DCA,
390        RephPosition::AfterPost,
391        RephMode::Explicit,
392        BlwfMode::PreAndPost,
393    ),
394];
395
396struct IndicWouldSubstituteFeature {
397    lookups: Range<usize>,
398    zero_context: bool,
399}
400
401impl IndicWouldSubstituteFeature {
402    pub fn new(map: &hb_ot_map_t, feature_tag: hb_tag_t, zero_context: bool) -> Self {
403        IndicWouldSubstituteFeature {
404            lookups: match map.get_feature_stage(TableIndex::GSUB, feature_tag) {
405                Some(stage) => map.stage_lookup_range(TableIndex::GSUB, stage),
406                None => 0..0,
407            },
408            zero_context,
409        }
410    }
411
412    pub fn would_substitute(
413        &self,
414        map: &hb_ot_map_t,
415        face: &hb_font_t,
416        glyphs: &[GlyphId],
417    ) -> bool {
418        for index in self.lookups.clone() {
419            let lookup = map.lookup(TableIndex::GSUB, index);
420            let ctx = WouldApplyContext {
421                glyphs,
422                zero_context: self.zero_context,
423            };
424            if face
425                .gsub
426                .as_ref()
427                .and_then(|table| table.get_lookup(lookup.index))
428                .map_or(false, |lookup| lookup.would_apply(&ctx))
429            {
430                return true;
431            }
432        }
433
434        false
435    }
436}
437
438struct IndicShapePlan {
439    config: IndicConfig,
440    is_old_spec: bool,
441    // virama_glyph: Option<u32>,
442    rphf: IndicWouldSubstituteFeature,
443    pref: IndicWouldSubstituteFeature,
444    blwf: IndicWouldSubstituteFeature,
445    pstf: IndicWouldSubstituteFeature,
446    vatu: IndicWouldSubstituteFeature,
447    mask_array: [hb_mask_t; INDIC_FEATURES.len()],
448}
449
450impl IndicShapePlan {
451    fn new(plan: &hb_ot_shape_plan_t) -> Self {
452        let script = plan.script;
453        let config = if let Some(c) = INDIC_CONFIGS.iter().skip(1).find(|c| c.script == script) {
454            *c
455        } else {
456            INDIC_CONFIGS[0]
457        };
458
459        let is_old_spec = config.has_old_spec
460            && plan
461                .ot_map
462                .chosen_script(TableIndex::GSUB)
463                .map_or(true, |tag| tag.to_bytes()[3] != b'2');
464
465        // Use zero-context would_substitute() matching for new-spec of the main
466        // Indic scripts, and scripts with one spec only, but not for old-specs.
467        // The new-spec for all dual-spec scripts says zero-context matching happens.
468        //
469        // However, testing with Malayalam shows that old and new spec both allow
470        // context.  Testing with Bengali new-spec however shows that it doesn't.
471        // So, the heuristic here is the way it is.  It should *only* be changed,
472        // as we discover more cases of what Windows does.  DON'T TOUCH OTHERWISE.
473        let zero_context = is_old_spec && script != Some(script::MALAYALAM);
474
475        let mut mask_array = [0; INDIC_FEATURES.len()];
476        for (i, feature) in INDIC_FEATURES.iter().enumerate() {
477            mask_array[i] = if feature.1 & F_GLOBAL != 0 {
478                0
479            } else {
480                plan.ot_map.get_1_mask(feature.0)
481            }
482        }
483
484        // TODO: what is this?
485        // let mut virama_glyph = None;
486        // if config.virama != 0 {
487        //     if let Some(g) = face.glyph_index(char::try_from(config.virama).unwrap()) {
488        //         virama_glyph = Some(g.0 as u32);
489        //     }
490        // }
491
492        IndicShapePlan {
493            config,
494            is_old_spec,
495            // virama_glyph,
496            rphf: IndicWouldSubstituteFeature::new(
497                &plan.ot_map,
498                hb_tag_t::from_bytes(b"rphf"),
499                zero_context,
500            ),
501            pref: IndicWouldSubstituteFeature::new(
502                &plan.ot_map,
503                hb_tag_t::from_bytes(b"pref"),
504                zero_context,
505            ),
506            blwf: IndicWouldSubstituteFeature::new(
507                &plan.ot_map,
508                hb_tag_t::from_bytes(b"blwf"),
509                zero_context,
510            ),
511            pstf: IndicWouldSubstituteFeature::new(
512                &plan.ot_map,
513                hb_tag_t::from_bytes(b"pstf"),
514                zero_context,
515            ),
516            vatu: IndicWouldSubstituteFeature::new(
517                &plan.ot_map,
518                hb_tag_t::from_bytes(b"vatu"),
519                zero_context,
520            ),
521            mask_array,
522        }
523    }
524}
525
526impl hb_glyph_info_t {
527    pub(crate) fn indic_category(&self) -> Category {
528        self.ot_shaper_var_u8_category()
529    }
530
531    pub(crate) fn myanmar_category(&self) -> Category {
532        self.ot_shaper_var_u8_category()
533    }
534
535    pub(crate) fn khmer_category(&self) -> Category {
536        self.ot_shaper_var_u8_category()
537    }
538
539    pub(crate) fn set_indic_category(&mut self, c: Category) {
540        self.set_ot_shaper_var_u8_category(c)
541    }
542
543    pub(crate) fn set_myanmar_category(&mut self, c: Category) {
544        self.set_ot_shaper_var_u8_category(c)
545    }
546
547    pub(crate) fn indic_position(&self) -> Position {
548        self.ot_shaper_var_u8_auxiliary()
549    }
550
551    pub(crate) fn myanmar_position(&self) -> Position {
552        self.ot_shaper_var_u8_auxiliary()
553    }
554
555    pub(crate) fn set_indic_position(&mut self, c: Position) {
556        self.set_ot_shaper_var_u8_auxiliary(c)
557    }
558
559    pub(crate) fn set_myanmar_position(&mut self, c: Position) {
560        self.set_ot_shaper_var_u8_auxiliary(c)
561    }
562
563    fn is_one_of(&self, flags: u32) -> bool {
564        // If it ligated, all bets are off.
565        if _hb_glyph_info_ligated(self) {
566            return false;
567        }
568
569        rb_flag_unsafe(self.indic_category() as u32) & flags != 0
570    }
571
572    fn is_joiner(&self) -> bool {
573        self.is_one_of(JOINER_FLAGS)
574    }
575
576    pub(crate) fn is_consonant(&self) -> bool {
577        self.is_one_of(CONSONANT_FLAGS_INDIC)
578    }
579
580    pub(crate) fn is_consonant_myanmar(&self) -> bool {
581        self.is_one_of(CONSONANT_FLAGS_MYANMAR)
582    }
583
584    fn is_halant(&self) -> bool {
585        self.is_one_of(rb_flag(ot_category_t::OT_H as u32))
586    }
587
588    fn set_indic_properties(&mut self) {
589        let u = self.glyph_id;
590        let (cat, pos) = crate::hb::ot_shaper_indic_table::get_categories(u);
591
592        self.set_indic_category(cat);
593        self.set_indic_position(pos);
594    }
595}
596
597fn collect_features(planner: &mut hb_ot_shape_planner_t) {
598    // Do this before any lookups have been applied.
599    planner.ot_map.add_gsub_pause(Some(setup_syllables));
600
601    planner
602        .ot_map
603        .enable_feature(hb_tag_t::from_bytes(b"locl"), F_PER_SYLLABLE, 1);
604    // The Indic specs do not require ccmp, but we apply it here since if
605    // there is a use of it, it's typically at the beginning.
606    planner
607        .ot_map
608        .enable_feature(hb_tag_t::from_bytes(b"ccmp"), F_PER_SYLLABLE, 1);
609
610    planner.ot_map.add_gsub_pause(Some(initial_reordering));
611
612    for feature in INDIC_FEATURES.iter().take(11) {
613        planner.ot_map.add_feature(feature.0, feature.1, 1);
614        planner.ot_map.add_gsub_pause(None);
615    }
616
617    planner.ot_map.add_gsub_pause(Some(final_reordering));
618
619    for feature in INDIC_FEATURES.iter().skip(11) {
620        planner.ot_map.add_feature(feature.0, feature.1, 1);
621    }
622}
623
624fn override_features(planner: &mut hb_ot_shape_planner_t) {
625    planner
626        .ot_map
627        .disable_feature(hb_tag_t::from_bytes(b"liga"));
628    planner.ot_map.add_gsub_pause(Some(syllabic_clear_var)); // Don't need syllables anymore.
629}
630
631fn preprocess_text(_: &hb_ot_shape_plan_t, _: &hb_font_t, buffer: &mut hb_buffer_t) {
632    super::ot_shaper_vowel_constraints::preprocess_text_vowel_constraints(buffer);
633}
634
635fn decompose(_: &hb_ot_shape_normalize_context_t, ab: char) -> Option<(char, char)> {
636    // Don't decompose these.
637    match ab {
638        '\u{0931}' |               // DEVANAGARI LETTER RRA
639        // https://github.com/harfbuzz/harfbuzz/issues/779
640        '\u{09DC}' |               // BENGALI LETTER RRA
641        '\u{09DD}' |               // BENGALI LETTER RHA
642        '\u{0B94}' => return None, // TAMIL LETTER AU
643        _ => {}
644    }
645
646    crate::hb::unicode::decompose(ab)
647}
648
649fn compose(_: &hb_ot_shape_normalize_context_t, a: char, b: char) -> Option<char> {
650    // Avoid recomposing split matras.
651    if a.general_category().is_mark() {
652        return None;
653    }
654
655    // Composition-exclusion exceptions that we want to recompose.
656    if a == '\u{09AF}' && b == '\u{09BC}' {
657        return Some('\u{09DF}');
658    }
659
660    crate::hb::unicode::compose(a, b)
661}
662
663fn setup_masks(_: &hb_ot_shape_plan_t, _: &hb_font_t, buffer: &mut hb_buffer_t) {
664    // We cannot setup masks here.  We save information about characters
665    // and setup masks later on in a pause-callback.
666    for info in buffer.info_slice_mut() {
667        info.set_indic_properties();
668    }
669}
670
671fn setup_syllables(_: &hb_ot_shape_plan_t, _: &hb_font_t, buffer: &mut hb_buffer_t) -> bool {
672    super::ot_shaper_indic_machine::find_syllables_indic(buffer);
673
674    let mut start = 0;
675    let mut end = buffer.next_syllable(0);
676    while start < buffer.len {
677        buffer.unsafe_to_break(Some(start), Some(end));
678        start = end;
679        end = buffer.next_syllable(start);
680    }
681
682    false
683}
684
685fn initial_reordering(
686    plan: &hb_ot_shape_plan_t,
687    face: &hb_font_t,
688    buffer: &mut hb_buffer_t,
689) -> bool {
690    use super::ot_shaper_indic_machine::SyllableType;
691
692    let mut ret = false;
693
694    let indic_plan = plan.data::<IndicShapePlan>();
695
696    update_consonant_positions(plan, indic_plan, face, buffer);
697    if super::ot_shaper_syllabic::insert_dotted_circles(
698        face,
699        buffer,
700        SyllableType::BrokenCluster as u8,
701        ot_category_t::OT_DOTTEDCIRCLE,
702        Some(ot_category_t::OT_Repha),
703        Some(ot_position_t::POS_END),
704    ) {
705        ret = true;
706    }
707
708    let mut start = 0;
709    let mut end = buffer.next_syllable(0);
710    while start < buffer.len {
711        initial_reordering_syllable(plan, indic_plan, face, start, end, buffer);
712        start = end;
713        end = buffer.next_syllable(start);
714    }
715
716    ret
717}
718
719fn update_consonant_positions(
720    plan: &hb_ot_shape_plan_t,
721    indic_plan: &IndicShapePlan,
722    face: &hb_font_t,
723    buffer: &mut hb_buffer_t,
724) {
725    let mut virama_glyph = None;
726    if indic_plan.config.virama != 0 {
727        virama_glyph = face.get_nominal_glyph(indic_plan.config.virama);
728    }
729
730    if let Some(virama) = virama_glyph {
731        for info in buffer.info_slice_mut() {
732            if info.indic_position() == ot_position_t::POS_BASE_C {
733                let consonant = info.as_glyph();
734                info.set_indic_position(consonant_position_from_face(
735                    plan, indic_plan, face, consonant, virama,
736                ));
737            }
738        }
739    }
740}
741
742fn consonant_position_from_face(
743    plan: &hb_ot_shape_plan_t,
744    indic_plan: &IndicShapePlan,
745    face: &hb_font_t,
746    consonant: GlyphId,
747    virama: GlyphId,
748) -> u8 {
749    // For old-spec, the order of glyphs is Consonant,Virama,
750    // whereas for new-spec, it's Virama,Consonant.  However,
751    // some broken fonts (like Free Sans) simply copied lookups
752    // from old-spec to new-spec without modification.
753    // And oddly enough, Uniscribe seems to respect those lookups.
754    // Eg. in the sequence U+0924,U+094D,U+0930, Uniscribe finds
755    // base at 0.  The font however, only has lookups matching
756    // 930,94D in 'blwf', not the expected 94D,930 (with new-spec
757    // table).  As such, we simply match both sequences.  Seems
758    // to work.
759    //
760    // Vatu is done as well, for:
761    // https://github.com/harfbuzz/harfbuzz/issues/1587
762
763    if indic_plan
764        .blwf
765        .would_substitute(&plan.ot_map, face, &[virama, consonant])
766        || indic_plan
767            .blwf
768            .would_substitute(&plan.ot_map, face, &[consonant, virama])
769        || indic_plan
770            .vatu
771            .would_substitute(&plan.ot_map, face, &[virama, consonant])
772        || indic_plan
773            .vatu
774            .would_substitute(&plan.ot_map, face, &[consonant, virama])
775    {
776        return ot_position_t::POS_BELOW_C;
777    }
778
779    if indic_plan
780        .pstf
781        .would_substitute(&plan.ot_map, face, &[virama, consonant])
782        || indic_plan
783            .pstf
784            .would_substitute(&plan.ot_map, face, &[consonant, virama])
785    {
786        return ot_position_t::POS_POST_C;
787    }
788
789    if indic_plan
790        .pref
791        .would_substitute(&plan.ot_map, face, &[virama, consonant])
792        || indic_plan
793            .pref
794            .would_substitute(&plan.ot_map, face, &[consonant, virama])
795    {
796        return ot_position_t::POS_POST_C;
797    }
798
799    ot_position_t::POS_BASE_C
800}
801
802fn initial_reordering_syllable(
803    plan: &hb_ot_shape_plan_t,
804    indic_plan: &IndicShapePlan,
805    face: &hb_font_t,
806    start: usize,
807    end: usize,
808    buffer: &mut hb_buffer_t,
809) {
810    use super::ot_shaper_indic_machine::SyllableType;
811
812    let syllable_type = match buffer.info[start].syllable() & 0x0F {
813        0 => SyllableType::ConsonantSyllable,
814        1 => SyllableType::VowelSyllable,
815        2 => SyllableType::StandaloneCluster,
816        3 => SyllableType::SymbolCluster,
817        4 => SyllableType::BrokenCluster,
818        5 => SyllableType::NonIndicCluster,
819        _ => unreachable!(),
820    };
821
822    match syllable_type {
823        // We made the vowels look like consonants.  So let's call the consonant logic!
824        SyllableType::VowelSyllable | SyllableType::ConsonantSyllable => {
825            initial_reordering_consonant_syllable(plan, indic_plan, face, start, end, buffer);
826        }
827        // We already inserted dotted-circles, so just call the standalone_cluster.
828        SyllableType::BrokenCluster | SyllableType::StandaloneCluster => {
829            initial_reordering_standalone_cluster(plan, indic_plan, face, start, end, buffer);
830        }
831        SyllableType::SymbolCluster | SyllableType::NonIndicCluster => {}
832    }
833}
834
835// Rules from:
836// https://docs.microsqoft.com/en-us/typography/script-development/devanagari */
837fn initial_reordering_consonant_syllable(
838    plan: &hb_ot_shape_plan_t,
839    indic_plan: &IndicShapePlan,
840    face: &hb_font_t,
841    start: usize,
842    end: usize,
843    buffer: &mut hb_buffer_t,
844) {
845    // https://github.com/harfbuzz/harfbuzz/issues/435#issuecomment-335560167
846    // For compatibility with legacy usage in Kannada,
847    // Ra+h+ZWJ must behave like Ra+ZWJ+h...
848    if buffer.script == Some(script::KANNADA)
849        && start + 3 <= end
850        && buffer.info[start].is_one_of(category_flag(ot_category_t::OT_Ra))
851        && buffer.info[start + 1].is_one_of(category_flag(ot_category_t::OT_H))
852        && buffer.info[start + 2].is_one_of(category_flag(ot_category_t::OT_ZWJ))
853    {
854        buffer.merge_clusters(start + 1, start + 3);
855        buffer.info.swap(start + 1, start + 2);
856    }
857
858    // 1. Find base consonant:
859    //
860    // The shaping engine finds the base consonant of the syllable, using the
861    // following algorithm: starting from the end of the syllable, move backwards
862    // until a consonant is found that does not have a below-base or post-base
863    // form (post-base forms have to follow below-base forms), or that is not a
864    // pre-base-reordering Ra, or arrive at the first consonant. The consonant
865    // stopped at will be the base.
866    //
867    //   - If the syllable starts with Ra + Halant (in a script that has Reph)
868    //     and has more than one consonant, Ra is excluded from candidates for
869    //     base consonants.
870
871    let mut base = end;
872    let mut has_reph = false;
873
874    {
875        // -> If the syllable starts with Ra + Halant (in a script that has Reph)
876        //    and has more than one consonant, Ra is excluded from candidates for
877        //    base consonants.
878        let mut limit = start;
879        if indic_plan.mask_array[indic_feature::RPHF] != 0
880            && start + 3 <= end
881            && ((indic_plan.config.reph_mode == RephMode::Implicit
882                && !buffer.info[start + 2].is_joiner())
883                || (indic_plan.config.reph_mode == RephMode::Explicit
884                    && buffer.info[start + 2].indic_category() == ot_category_t::OT_ZWJ))
885        {
886            // See if it matches the 'rphf' feature.
887            let glyphs = &[
888                buffer.info[start].as_glyph(),
889                buffer.info[start + 1].as_glyph(),
890                if indic_plan.config.reph_mode == RephMode::Explicit {
891                    buffer.info[start + 2].as_glyph()
892                } else {
893                    GlyphId(0)
894                },
895            ];
896            if indic_plan
897                .rphf
898                .would_substitute(&plan.ot_map, face, &glyphs[0..2])
899                || (indic_plan.config.reph_mode == RephMode::Explicit
900                    && indic_plan.rphf.would_substitute(&plan.ot_map, face, glyphs))
901            {
902                limit += 2;
903                while limit < end && buffer.info[limit].is_joiner() {
904                    limit += 1;
905                }
906                base = start;
907                has_reph = true;
908            }
909        } else if indic_plan.config.reph_mode == RephMode::LogRepha
910            && buffer.info[start].indic_category() == ot_category_t::OT_Repha
911        {
912            limit += 1;
913            while limit < end && buffer.info[limit].is_joiner() {
914                limit += 1;
915            }
916            base = start;
917            has_reph = true;
918        }
919
920        {
921            // -> starting from the end of the syllable, move backwards
922            let mut i = end;
923            let mut seen_below = false;
924            loop {
925                i -= 1;
926                // -> until a consonant is found
927                if buffer.info[i].is_consonant_myanmar() {
928                    // -> that does not have a below-base or post-base form
929                    // (post-base forms have to follow below-base forms),
930                    if buffer.info[i].indic_position() != ot_position_t::POS_BELOW_C
931                        && (buffer.info[i].indic_position() != ot_position_t::POS_POST_C
932                            || seen_below)
933                    {
934                        base = i;
935                        break;
936                    }
937                    if buffer.info[i].indic_position() == ot_position_t::POS_BELOW_C {
938                        seen_below = true;
939                    }
940
941                    // -> or that is not a pre-base-reordering Ra,
942                    //
943                    // IMPLEMENTATION NOTES:
944                    //
945                    // Our pre-base-reordering Ra's are marked position::PostC, so will be skipped
946                    // by the logic above already.
947
948                    // -> or arrive at the first consonant. The consonant stopped at will
949                    // be the base.
950                    base = i;
951                } else {
952                    // A ZWJ after a Halant stops the base search, and requests an explicit
953                    // half form.
954                    // A ZWJ before a Halant, requests a subjoined form instead, and hence
955                    // search continues.  This is particularly important for Bengali
956                    // sequence Ra,H,Ya that should form Ya-Phalaa by subjoining Ya.
957                    if start < i
958                        && buffer.info[i].indic_category() == ot_category_t::OT_ZWJ
959                        && buffer.info[i - 1].indic_category() == ot_category_t::OT_H
960                    {
961                        break;
962                    }
963                }
964
965                if i <= limit {
966                    break;
967                }
968            }
969        }
970
971        // -> If the syllable starts with Ra + Halant (in a script that has Reph)
972        //    and has more than one consonant, Ra is excluded from candidates for
973        //    base consonants.
974        //
975        //  Only do this for unforced Reph. (ie. not for Ra,H,ZWJ.
976        if has_reph && base == start && limit - base <= 2 {
977            // Have no other consonant, so Reph is not formed and Ra becomes base.
978            has_reph = false;
979        }
980    }
981
982    // 2. Decompose and reorder Matras:
983    //
984    // Each matra and any syllable modifier sign in the syllable are moved to the
985    // appropriate position relative to the consonant(s) in the syllable. The
986    // shaping engine decomposes two- or three-part matras into their constituent
987    // parts before any repositioning. Matra characters are classified by which
988    // consonant in a conjunct they have affinity for and are reordered to the
989    // following positions:
990    //
991    //   - Before first half form in the syllable
992    //   - After subjoined consonants
993    //   - After post-form consonant
994    //   - After main consonant (for above marks)
995    //
996    // IMPLEMENTATION NOTES:
997    //
998    // The normalize() routine has already decomposed matras for us, so we don't
999    // need to worry about that.
1000
1001    // 3.  Reorder marks to canonical order:
1002    //
1003    // Adjacent nukta and halant or nukta and vedic sign are always repositioned
1004    // if necessary, so that the nukta is first.
1005    //
1006    // IMPLEMENTATION NOTES:
1007    //
1008    // We don't need to do this: the normalize() routine already did this for us.
1009
1010    // Reorder characters
1011
1012    for i in start..base {
1013        let pos = buffer.info[i].indic_position();
1014        buffer.info[i].set_indic_position(cmp::min(ot_position_t::POS_PRE_C, pos));
1015    }
1016
1017    if base < end {
1018        buffer.info[base].set_indic_position(ot_position_t::POS_BASE_C);
1019    }
1020
1021    // Handle beginning Ra
1022    if has_reph {
1023        buffer.info[start].set_indic_position(ot_position_t::POS_RA_TO_BECOME_REPH);
1024    }
1025
1026    // For old-style Indic script tags, move the first post-base Halant after
1027    // last consonant.
1028    //
1029    // Reports suggest that in some scripts Uniscribe does this only if there
1030    // is *not* a Halant after last consonant already.  We know that is the
1031    // case for Kannada, while it reorders unconditionally in other scripts,
1032    // eg. Malayalam, Bengali, and Devanagari.  We don't currently know about
1033    // other scripts, so we block Kannada.
1034    //
1035    // Kannada test case:
1036    // U+0C9A,U+0CCD,U+0C9A,U+0CCD
1037    // With some versions of Lohit Kannada.
1038    // https://bugs.freedesktop.org/show_bug.cgi?id=59118
1039    //
1040    // Malayalam test case:
1041    // U+0D38,U+0D4D,U+0D31,U+0D4D,U+0D31,U+0D4D
1042    // With lohit-ttf-20121122/Lohit-Malayalam.ttf
1043    //
1044    // Bengali test case:
1045    // U+0998,U+09CD,U+09AF,U+09CD
1046    // With Windows XP vrinda.ttf
1047    // https://github.com/harfbuzz/harfbuzz/issues/1073
1048    //
1049    // Devanagari test case:
1050    // U+091F,U+094D,U+0930,U+094D
1051    // With chandas.ttf
1052    // https://github.com/harfbuzz/harfbuzz/issues/1071
1053    if indic_plan.is_old_spec {
1054        let disallow_double_halants = buffer.script == Some(script::KANNADA);
1055        for i in base + 1..end {
1056            if buffer.info[i].indic_category() == ot_category_t::OT_H {
1057                let mut j = end - 1;
1058                while j > i {
1059                    if buffer.info[j].is_consonant()
1060                        || (disallow_double_halants
1061                            && buffer.info[j].indic_category() == ot_category_t::OT_H)
1062                    {
1063                        break;
1064                    }
1065
1066                    j -= 1;
1067                }
1068
1069                if buffer.info[j].indic_category() != ot_category_t::OT_H && j > i {
1070                    // Move Halant to after last consonant.
1071                    let t = buffer.info[i];
1072                    for k in 0..j - i {
1073                        buffer.info[k + i] = buffer.info[k + i + 1];
1074                    }
1075                    buffer.info[j] = t;
1076                }
1077
1078                break;
1079            }
1080        }
1081    }
1082
1083    // Attach misc marks to previous char to move with them.
1084    {
1085        let mut last_pos = ot_position_t::POS_START;
1086        for i in start..end {
1087            let ok = rb_flag_unsafe(buffer.info[i].indic_category() as u32)
1088                & (category_flag(ot_category_t::OT_ZWJ)
1089                    | category_flag(ot_category_t::OT_ZWNJ)
1090                    | category_flag(ot_category_t::OT_N)
1091                    | category_flag(ot_category_t::OT_RS)
1092                    | category_flag(ot_category_t::OT_CM)
1093                    | category_flag(ot_category_t::OT_H))
1094                != 0;
1095            if ok {
1096                buffer.info[i].set_indic_position(last_pos);
1097
1098                if buffer.info[i].indic_category() == ot_category_t::OT_H
1099                    && buffer.info[i].indic_position() == ot_position_t::POS_PRE_M
1100                {
1101                    // Uniscribe doesn't move the Halant with Left Matra.
1102                    // TEST: U+092B,U+093F,U+094DE
1103                    // We follow.
1104                    for j in (start + 1..=i).rev() {
1105                        if buffer.info[j - 1].indic_position() != ot_position_t::POS_PRE_M {
1106                            let pos = buffer.info[j - 1].indic_position();
1107                            buffer.info[i].set_indic_position(pos);
1108                            break;
1109                        }
1110                    }
1111                }
1112            } else if buffer.info[i].indic_position() != ot_position_t::POS_SMVD {
1113                if buffer.info[i].indic_category() == ot_category_t::OT_MPst
1114                    && i > start
1115                    && buffer.info[i - 1].indic_category() == ot_category_t::OT_SM
1116                {
1117                    let val = buffer.info[i].indic_position();
1118                    buffer.info[i - 1].set_indic_position(val);
1119                }
1120
1121                last_pos = buffer.info[i].indic_position();
1122            }
1123        }
1124    }
1125    // For post-base consonants let them own anything before them
1126    // since the last consonant or matra.
1127    {
1128        let mut last = base;
1129        for i in base + 1..end {
1130            if buffer.info[i].is_consonant() {
1131                for j in last + 1..i {
1132                    if buffer.info[j].indic_position() < ot_position_t::POS_SMVD {
1133                        let pos = buffer.info[i].indic_position();
1134                        buffer.info[j].set_indic_position(pos);
1135                    }
1136                }
1137
1138                last = i;
1139            } else if (rb_flag_unsafe(buffer.info[i].indic_category() as u32)
1140                & (rb_flag(ot_category_t::OT_M as u32) | rb_flag(ot_category_t::OT_MPst as u32)))
1141                != 0
1142            {
1143                last = i;
1144            }
1145        }
1146    }
1147
1148    {
1149        // Use syllable() for sort accounting temporarily.
1150        let syllable = buffer.info[start].syllable();
1151        for i in start..end {
1152            buffer.info[i].set_syllable(u8::try_from(i - start).unwrap());
1153        }
1154
1155        buffer.info[start..end].sort_by_key(|a| a.indic_position());
1156
1157        // Find base again; also flip left-matra sequence.
1158        let mut first_left_mantra = end;
1159        let mut last_left_mantra = end;
1160        base = end;
1161
1162        for i in start..end {
1163            if buffer.info[i].indic_position() == ot_position_t::POS_BASE_C {
1164                base = i;
1165                break;
1166            } else if buffer.info[i].indic_position() == ot_position_t::POS_PRE_M {
1167                if first_left_mantra == end {
1168                    first_left_mantra = i;
1169                }
1170
1171                last_left_mantra = i;
1172            }
1173        }
1174
1175        // https://github.com/harfbuzz/harfbuzz/issues/3863
1176        if first_left_mantra < last_left_mantra {
1177            // No need to merge clusters, handled later.
1178            buffer.reverse_range(first_left_mantra, last_left_mantra + 1);
1179            // Reverse back nuktas, etc.
1180            let mut i = first_left_mantra;
1181
1182            for j in i..=last_left_mantra {
1183                if (rb_flag_unsafe(buffer.info[j].indic_category() as u32)
1184                    & (rb_flag(ot_category_t::OT_M as u32)
1185                        | rb_flag(ot_category_t::OT_MPst as u32)))
1186                    != 0
1187                {
1188                    buffer.reverse_range(i, j + 1);
1189                    i = j + 1;
1190                }
1191            }
1192        }
1193
1194        // Things are out-of-control for post base positions, they may shuffle
1195        // around like crazy.  In old-spec mode, we move halants around, so in
1196        // that case merge all clusters after base.  Otherwise, check the sort
1197        // order and merge as needed.
1198        // For pre-base stuff, we handle cluster issues in final reordering.
1199        //
1200        // We could use buffer->sort() for this, if there was no special
1201        // reordering of pre-base stuff happening later...
1202        // We don't want to merge_clusters all of that, which buffer->sort()
1203        // would.  Here's a concrete example:
1204        //
1205        // Assume there's a pre-base consonant and explicit Halant before base,
1206        // followed by a prebase-reordering (left) Matra:
1207        //
1208        //   C,H,ZWNJ,B,M
1209        //
1210        // At this point in reordering we would have:
1211        //
1212        //   M,C,H,ZWNJ,B
1213        //
1214        // whereas in final reordering we will bring the Matra closer to Base:
1215        //
1216        //   C,H,ZWNJ,M,B
1217        //
1218        // That's why we don't want to merge-clusters anything before the Base
1219        // at this point.  But if something moved from after Base to before it,
1220        // we should merge clusters from base to them.  In final-reordering, we
1221        // only move things around before base, and merge-clusters up to base.
1222        // These two merge-clusters from the two sides of base will interlock
1223        // to merge things correctly.  See:
1224        // https://github.com/harfbuzz/harfbuzz/issues/2272
1225        if indic_plan.is_old_spec || end - start > 127 {
1226            buffer.merge_clusters(base, end);
1227        } else {
1228            // Note! syllable() is a one-byte field.
1229            for i in base..end {
1230                if buffer.info[i].syllable() != 255 {
1231                    let mut min = i;
1232                    let mut max = i;
1233                    let mut j = start + buffer.info[i].syllable() as usize;
1234                    while j != i {
1235                        min = cmp::min(min, j);
1236                        max = cmp::max(max, j);
1237                        let next = start + buffer.info[j].syllable() as usize;
1238                        buffer.info[j].set_syllable(255); // So we don't process j later again.
1239                        j = next;
1240                    }
1241
1242                    buffer.merge_clusters(cmp::max(base, min), max + 1);
1243                }
1244            }
1245        }
1246
1247        // Put syllable back in.
1248        for info in &mut buffer.info[start..end] {
1249            info.set_syllable(syllable);
1250        }
1251    }
1252
1253    // Setup masks now
1254
1255    {
1256        // Reph
1257        for info in &mut buffer.info[start..end] {
1258            if info.indic_position() != ot_position_t::POS_RA_TO_BECOME_REPH {
1259                break;
1260            }
1261
1262            info.mask |= indic_plan.mask_array[indic_feature::RPHF];
1263        }
1264
1265        // Pre-base
1266        let mut mask = indic_plan.mask_array[indic_feature::HALF];
1267        if !indic_plan.is_old_spec && indic_plan.config.blwf_mode == BlwfMode::PreAndPost {
1268            mask |= indic_plan.mask_array[indic_feature::BLWF];
1269        }
1270
1271        for info in &mut buffer.info[start..base] {
1272            info.mask |= mask;
1273        }
1274
1275        // Base
1276        mask = 0;
1277        if base < end {
1278            buffer.info[base].mask |= mask;
1279        }
1280
1281        // Post-base
1282        mask = indic_plan.mask_array[indic_feature::BLWF]
1283            | indic_plan.mask_array[indic_feature::ABVF]
1284            | indic_plan.mask_array[indic_feature::PSTF];
1285        for i in base + 1..end {
1286            buffer.info[i].mask |= mask;
1287        }
1288    }
1289
1290    if indic_plan.is_old_spec && buffer.script == Some(script::DEVANAGARI) {
1291        // Old-spec eye-lash Ra needs special handling.  From the
1292        // spec:
1293        //
1294        // "The feature 'below-base form' is applied to consonants
1295        // having below-base forms and following the base consonant.
1296        // The exception is vattu, which may appear below half forms
1297        // as well as below the base glyph. The feature 'below-base
1298        // form' will be applied to all such occurrences of Ra as well."
1299        //
1300        // Test case: U+0924,U+094D,U+0930,U+094d,U+0915
1301        // with Sanskrit 2003 font.
1302        //
1303        // However, note that Ra,Halant,ZWJ is the correct way to
1304        // request eyelash form of Ra, so we wouldbn't inhibit it
1305        // in that sequence.
1306        //
1307        // Test case: U+0924,U+094D,U+0930,U+094d,U+200D,U+0915
1308        for i in start..base.saturating_sub(1) {
1309            if buffer.info[i].indic_category() == ot_category_t::OT_Ra
1310                && buffer.info[i + 1].indic_category() == ot_category_t::OT_H
1311                && (i + 2 == base || buffer.info[i + 2].indic_category() != ot_category_t::OT_ZWJ)
1312            {
1313                buffer.info[i].mask |= indic_plan.mask_array[indic_feature::BLWF];
1314                buffer.info[i + 1].mask |= indic_plan.mask_array[indic_feature::BLWF];
1315            }
1316        }
1317    }
1318
1319    let pref_len = 2;
1320    if indic_plan.mask_array[indic_feature::PREF] != 0 && base + pref_len < end {
1321        // Find a Halant,Ra sequence and mark it for pre-base-reordering processing.
1322        for i in base + 1..end - pref_len + 1 {
1323            let glyphs = &[buffer.info[i + 0].as_glyph(), buffer.info[i + 1].as_glyph()];
1324            if indic_plan.pref.would_substitute(&plan.ot_map, face, glyphs) {
1325                buffer.info[i + 0].mask |= indic_plan.mask_array[indic_feature::PREF];
1326                buffer.info[i + 1].mask |= indic_plan.mask_array[indic_feature::PREF];
1327                break;
1328            }
1329        }
1330    }
1331
1332    // Apply ZWJ/ZWNJ effects
1333    for i in start + 1..end {
1334        if buffer.info[i].is_joiner() {
1335            let non_joiner = buffer.info[i].indic_category() == ot_category_t::OT_ZWNJ;
1336            let mut j = i;
1337
1338            loop {
1339                j -= 1;
1340
1341                // ZWJ/ZWNJ should disable CJCT.  They do that by simply
1342                // being there, since we don't skip them for the CJCT
1343                // feature (ie. F_MANUAL_ZWJ)
1344
1345                // A ZWNJ disables HALF.
1346                if non_joiner {
1347                    buffer.info[j].mask &= !indic_plan.mask_array[indic_feature::HALF];
1348                }
1349
1350                if j <= start || buffer.info[j].is_consonant() {
1351                    break;
1352                }
1353            }
1354        }
1355    }
1356}
1357
1358fn initial_reordering_standalone_cluster(
1359    plan: &hb_ot_shape_plan_t,
1360    indic_plan: &IndicShapePlan,
1361    face: &hb_font_t,
1362    start: usize,
1363    end: usize,
1364    buffer: &mut hb_buffer_t,
1365) {
1366    // We treat placeholder/dotted-circle as if they are consonants, so we
1367    // should just chain.  Only if not in compatibility mode that is...
1368    initial_reordering_consonant_syllable(plan, indic_plan, face, start, end, buffer);
1369}
1370
1371fn final_reordering(plan: &hb_ot_shape_plan_t, face: &hb_font_t, buffer: &mut hb_buffer_t) -> bool {
1372    if buffer.is_empty() {
1373        return false;
1374    }
1375
1376    foreach_syllable!(buffer, start, end, {
1377        final_reordering_impl(plan, face, start, end, buffer);
1378    });
1379
1380    false
1381}
1382
1383fn final_reordering_impl(
1384    plan: &hb_ot_shape_plan_t,
1385    face: &hb_font_t,
1386    start: usize,
1387    end: usize,
1388    buffer: &mut hb_buffer_t,
1389) {
1390    let indic_plan = plan.data::<IndicShapePlan>();
1391
1392    // This function relies heavily on halant glyphs.  Lots of ligation
1393    // and possibly multiple substitutions happened prior to this
1394    // phase, and that might have messed up our properties.  Recover
1395    // from a particular case of that where we're fairly sure that a
1396    // class of OT_H is desired but has been lost.
1397    //
1398    // We don't call load_virama_glyph(), since we know it's already loaded.
1399    let mut virama_glyph = None;
1400    if indic_plan.config.virama != 0 {
1401        if let Some(g) = face.get_nominal_glyph(indic_plan.config.virama) {
1402            virama_glyph = Some(g.0 as u32);
1403        }
1404    }
1405
1406    if let Some(virama_glyph) = virama_glyph {
1407        for info in &mut buffer.info[start..end] {
1408            if info.glyph_id == virama_glyph
1409                && _hb_glyph_info_ligated(info)
1410                && _hb_glyph_info_multiplied(info)
1411            {
1412                // This will make sure that this glyph passes is_halant() test.
1413                info.set_indic_category(ot_category_t::OT_H);
1414                _hb_glyph_info_clear_ligated_and_multiplied(info);
1415            }
1416        }
1417    }
1418
1419    // 4. Final reordering:
1420    //
1421    // After the localized forms and basic shaping forms GSUB features have been
1422    // applied (see below), the shaping engine performs some final glyph
1423    // reordering before applying all the remaining font features to the entire
1424    // syllable.
1425
1426    let mut try_pref = indic_plan.mask_array[indic_feature::PREF] != 0;
1427
1428    let mut base = start;
1429    while base < end {
1430        if buffer.info[base].indic_position() as u32 >= ot_position_t::POS_BASE_C as u32 {
1431            if try_pref && base + 1 < end {
1432                for i in base + 1..end {
1433                    if (buffer.info[i].mask & indic_plan.mask_array[indic_feature::PREF]) != 0 {
1434                        if !(_hb_glyph_info_substituted(&buffer.info[i])
1435                            && _hb_glyph_info_ligated_and_didnt_multiply(&buffer.info[i]))
1436                        {
1437                            // Ok, this was a 'pref' candidate but didn't form any.
1438                            // Base is around here...
1439                            base = i;
1440                            while base < end && buffer.info[base].is_halant() {
1441                                base += 1;
1442                            }
1443
1444                            if base < end {
1445                                buffer.info[base].set_indic_position(ot_position_t::POS_BASE_C);
1446                            }
1447
1448                            try_pref = false;
1449                        }
1450
1451                        break;
1452                    }
1453
1454                    if base == end {
1455                        break;
1456                    }
1457                }
1458            }
1459
1460            // For Malayalam, skip over unformed below- (but NOT post-) forms.
1461            if buffer.script == Some(script::MALAYALAM) {
1462                let mut i = base + 1;
1463                while i < end {
1464                    while i < end && buffer.info[i].is_joiner() {
1465                        i += 1;
1466                    }
1467
1468                    if i == end || !buffer.info[i].is_halant() {
1469                        break;
1470                    }
1471
1472                    i += 1; // Skip halant.
1473
1474                    while i < end && buffer.info[i].is_joiner() {
1475                        i += 1;
1476                    }
1477
1478                    if i < end
1479                        && buffer.info[i].is_consonant()
1480                        && buffer.info[i].indic_position() == ot_position_t::POS_BELOW_C
1481                    {
1482                        base = i;
1483                        buffer.info[base].set_indic_position(ot_position_t::POS_BASE_C);
1484                    }
1485
1486                    i += 1;
1487                }
1488            }
1489
1490            if start < base
1491                && buffer.info[base].indic_position() as u32 > ot_position_t::POS_BASE_C as u32
1492            {
1493                base -= 1;
1494            }
1495
1496            break;
1497        }
1498
1499        base += 1;
1500    }
1501
1502    if base == end
1503        && start < base
1504        && buffer.info[base - 1].is_one_of(rb_flag(ot_category_t::OT_ZWJ as u32))
1505    {
1506        base -= 1;
1507    }
1508
1509    if base < end {
1510        while start < base
1511            && buffer.info[base].is_one_of(
1512                rb_flag(ot_category_t::OT_N as u32) | rb_flag(ot_category_t::OT_H as u32),
1513            )
1514        {
1515            base -= 1;
1516        }
1517    }
1518
1519    // - Reorder matras:
1520    //
1521    //   If a pre-base matra character had been reordered before applying basic
1522    //   features, the glyph can be moved closer to the main consonant based on
1523    //   whether half-forms had been formed. Actual position for the matra is
1524    //   defined as “after last standalone halant glyph, after initial matra
1525    //   position and before the main consonant”. If ZWJ or ZWNJ follow this
1526    //   halant, position is moved after it.
1527    //
1528    // IMPLEMENTATION NOTES:
1529    //
1530    // It looks like the last sentence is wrong.  Testing, with Windows 7 Uniscribe
1531    // and Devanagari shows that the behavior is best described as:
1532    //
1533    // "If ZWJ follows this halant, matra is NOT repositioned after this halant.
1534    //  If ZWNJ follows this halant, position is moved after it."
1535    //
1536    // Test case, with Adobe Devanagari or Nirmala UI:
1537    //
1538    //   U+091F,U+094D,U+200C,U+092F,U+093F
1539    //   (Matra moves to the middle, after ZWNJ.)
1540    //
1541    //   U+091F,U+094D,U+200D,U+092F,U+093F
1542    //   (Matra does NOT move, stays to the left.)
1543    //
1544    // https://github.com/harfbuzz/harfbuzz/issues/1070
1545
1546    // Otherwise there can't be any pre-base matra characters.
1547    if start + 1 < end && start < base {
1548        // If we lost track of base, alas, position before last thingy.
1549        let mut new_pos = if base == end { base - 2 } else { base - 1 };
1550
1551        // Malayalam / Tamil do not have "half" forms or explicit virama forms.
1552        // The glyphs formed by 'half' are Chillus or ligated explicit viramas.
1553        // We want to position matra after them.
1554        if buffer.script != Some(script::MALAYALAM) && buffer.script != Some(script::TAMIL) {
1555            loop {
1556                while new_pos > start
1557                    && !buffer.info[new_pos].is_one_of(
1558                        rb_flag(ot_category_t::OT_M as u32)
1559                            | rb_flag(ot_category_t::OT_MPst as u32)
1560                            | rb_flag(ot_category_t::OT_H as u32),
1561                    )
1562                {
1563                    new_pos -= 1;
1564                }
1565
1566                // If we found no Halant we are done.
1567                // Otherwise only proceed if the Halant does
1568                // not belong to the Matra itself!
1569                if buffer.info[new_pos].is_halant()
1570                    && buffer.info[new_pos].indic_position() != ot_position_t::POS_PRE_M
1571                {
1572                    if new_pos + 1 < end {
1573                        // -> If ZWJ follows this halant, matra is NOT repositioned after this halant.
1574                        if buffer.info[new_pos + 1].indic_category() == ot_category_t::OT_ZWJ {
1575                            // Keep searching.
1576                            if new_pos > start {
1577                                new_pos -= 1;
1578                                continue;
1579                            }
1580                        }
1581
1582                        // -> If ZWNJ follows this halant, position is moved after it.
1583                        //
1584                        // IMPLEMENTATION NOTES:
1585                        //
1586                        // This is taken care of by the state-machine. A Halant,ZWNJ is a terminating
1587                        // sequence for a consonant syllable; any pre-base matras occurring after it
1588                        // will belong to the subsequent syllable.
1589                    }
1590                } else {
1591                    new_pos = start; // No move.
1592                }
1593
1594                break;
1595            }
1596        }
1597
1598        if start < new_pos && buffer.info[new_pos].indic_position() != ot_position_t::POS_PRE_M {
1599            // Now go see if there's actually any matras...
1600            for i in (start + 1..=new_pos).rev() {
1601                if buffer.info[i - 1].indic_position() == ot_position_t::POS_PRE_M {
1602                    let old_pos = i - 1;
1603                    // Shouldn't actually happen.
1604                    if old_pos < base && base <= new_pos {
1605                        base -= 1;
1606                    }
1607
1608                    let tmp = buffer.info[old_pos];
1609                    for i in 0..new_pos - old_pos {
1610                        buffer.info[i + old_pos] = buffer.info[i + old_pos + 1];
1611                    }
1612                    buffer.info[new_pos] = tmp;
1613
1614                    // Note: this merge_clusters() is intentionally *after* the reordering.
1615                    // Indic matra reordering is special and tricky...
1616                    buffer.merge_clusters(new_pos, cmp::min(end, base + 1));
1617
1618                    new_pos -= 1;
1619                }
1620            }
1621        } else {
1622            for i in start..base {
1623                if buffer.info[i].indic_position() == ot_position_t::POS_PRE_M {
1624                    buffer.merge_clusters(i, cmp::min(end, base + 1));
1625                    break;
1626                }
1627            }
1628        }
1629    }
1630
1631    // - Reorder reph:
1632    //
1633    //   Reph’s original position is always at the beginning of the syllable,
1634    //   (i.e. it is not reordered at the character reordering stage). However,
1635    //   it will be reordered according to the basic-forms shaping results.
1636    //   Possible positions for reph, depending on the script, are; after main,
1637    //   before post-base consonant forms, and after post-base consonant forms.
1638
1639    // Two cases:
1640    //
1641    // - If repha is encoded as a sequence of characters (Ra,H or Ra,H,ZWJ), then
1642    //   we should only move it if the sequence ligated to the repha form.
1643    //
1644    // - If repha is encoded separately and in the logical position, we should only
1645    //   move it if it did NOT ligate.  If it ligated, it's probably the font trying
1646    //   to make it work without the reordering.
1647
1648    if start + 1 < end
1649        && buffer.info[start].indic_position() == ot_position_t::POS_RA_TO_BECOME_REPH
1650        && (buffer.info[start].indic_category() == ot_category_t::OT_Repha)
1651            ^ _hb_glyph_info_ligated_and_didnt_multiply(&buffer.info[start])
1652    {
1653        let mut new_reph_pos;
1654        'reph: {
1655            let reph_pos = indic_plan.config.reph_pos;
1656
1657            // 1. If reph should be positioned after post-base consonant forms,
1658            //    proceed to step 5.
1659            if reph_pos != RephPosition::AfterPost {
1660                // 2. If the reph repositioning class is not after post-base: target
1661                //    position is after the first explicit halant glyph between the
1662                //    first post-reph consonant and last main consonant. If ZWJ or ZWNJ
1663                //    are following this halant, position is moved after it. If such
1664                //    position is found, this is the target position. Otherwise,
1665                //    proceed to the next step.
1666                //
1667                //    Note: in old-implementation fonts, where classifications were
1668                //    fixed in shaping engine, there was no case where reph position
1669                //    will be found on this step.
1670                {
1671                    new_reph_pos = start + 1;
1672                    while new_reph_pos < base && !buffer.info[new_reph_pos].is_halant() {
1673                        new_reph_pos += 1;
1674                    }
1675
1676                    if new_reph_pos < base && buffer.info[new_reph_pos].is_halant() {
1677                        // ->If ZWJ or ZWNJ are following this halant, position is moved after it.
1678                        if new_reph_pos + 1 < base && buffer.info[new_reph_pos + 1].is_joiner() {
1679                            new_reph_pos += 1;
1680                        }
1681
1682                        break 'reph;
1683                    }
1684                }
1685
1686                // 3. If reph should be repositioned after the main consonant: find the
1687                //    first consonant not ligated with main, or find the first
1688                //    consonant that is not a potential pre-base-reordering Ra.
1689                if reph_pos == RephPosition::AfterMain {
1690                    new_reph_pos = base;
1691                    while new_reph_pos + 1 < end
1692                        && buffer.info[new_reph_pos + 1].indic_position()
1693                            <= ot_position_t::POS_AFTER_MAIN
1694                    {
1695                        new_reph_pos += 1;
1696                    }
1697
1698                    if new_reph_pos < end {
1699                        break 'reph;
1700                    }
1701                }
1702
1703                // 4. If reph should be positioned before post-base consonant, find
1704                //    first post-base classified consonant not ligated with main. If no
1705                //    consonant is found, the target position should be before the
1706                //    first matra, syllable modifier sign or vedic sign.
1707                //
1708                // This is our take on what step 4 is trying to say (and failing, BADLY).
1709                if reph_pos == RephPosition::AfterSub {
1710                    new_reph_pos = base;
1711                    while new_reph_pos + 1 < end
1712                        && (rb_flag_unsafe(buffer.info[new_reph_pos + 1].indic_position() as u32)
1713                            & (rb_flag(ot_position_t::POS_POST_C as u32)
1714                                | rb_flag(ot_position_t::POS_AFTER_POST as u32)
1715                                | rb_flag(ot_position_t::POS_SMVD as u32)))
1716                            == 0
1717                    {
1718                        new_reph_pos += 1;
1719                    }
1720
1721                    if new_reph_pos < end {
1722                        break 'reph;
1723                    }
1724                }
1725            }
1726
1727            // 5. If no consonant is found in steps 3 or 4, move reph to a position
1728            //    immediately before the first post-base matra, syllable modifier
1729            //    sign or vedic sign that has a reordering class after the intended
1730            //    reph position. For example, if the reordering position for reph
1731            //    is post-main, it will skip above-base matras that also have a
1732            //    post-main position.
1733            //
1734            // Copied from step 2.
1735            new_reph_pos = start + 1;
1736            while new_reph_pos < base && !buffer.info[new_reph_pos].is_halant() {
1737                new_reph_pos += 1;
1738            }
1739
1740            if new_reph_pos < base && buffer.info[new_reph_pos].is_halant() {
1741                /* ->If ZWJ or ZWNJ are following this halant, position is moved after it. */
1742                if new_reph_pos + 1 < base && buffer.info[new_reph_pos + 1].is_joiner() {
1743                    new_reph_pos += 1;
1744                }
1745
1746                break 'reph;
1747            }
1748            // See https://github.com/harfbuzz/harfbuzz/issues/2298#issuecomment-615318654
1749
1750            // 6. Otherwise, reorder reph to the end of the syllable.
1751            {
1752                new_reph_pos = end - 1;
1753                while new_reph_pos > start
1754                    && buffer.info[new_reph_pos].indic_position() == ot_position_t::POS_SMVD
1755                {
1756                    new_reph_pos -= 1;
1757                }
1758
1759                // If the Reph is to be ending up after a Matra,Halant sequence,
1760                // position it before that Halant so it can interact with the Matra.
1761                // However, if it's a plain Consonant,Halant we shouldn't do that.
1762                // Uniscribe doesn't do this.
1763                // TEST: U+0930,U+094D,U+0915,U+094B,U+094D
1764                if buffer.info[new_reph_pos].is_halant() {
1765                    for info in &buffer.info[base + 1..new_reph_pos] {
1766                        if (rb_flag_unsafe(info.indic_category() as u32)
1767                            & (rb_flag(ot_category_t::OT_M as u32)
1768                                | rb_flag(ot_category_t::OT_MPst as u32)))
1769                            != 0
1770                        {
1771                            // Ok, got it.
1772                            new_reph_pos -= 1;
1773                        }
1774                    }
1775                }
1776            }
1777
1778            break 'reph;
1779        }
1780
1781        // Move
1782        buffer.merge_clusters(start, new_reph_pos + 1);
1783
1784        let reph = buffer.info[start];
1785        for i in 0..new_reph_pos - start {
1786            buffer.info[i + start] = buffer.info[i + start + 1];
1787        }
1788        buffer.info[new_reph_pos] = reph;
1789
1790        if start < base && base <= new_reph_pos {
1791            base -= 1;
1792        }
1793    }
1794
1795    // - Reorder pre-base-reordering consonants:
1796    //
1797    //   If a pre-base-reordering consonant is found, reorder it according to
1798    //   the following rules:
1799
1800    // Otherwise there can't be any pre-base-reordering Ra.
1801    if try_pref && base + 1 < end {
1802        for i in base + 1..end {
1803            if (buffer.info[i].mask & indic_plan.mask_array[indic_feature::PREF]) != 0 {
1804                // 1. Only reorder a glyph produced by substitution during application
1805                //    of the <pref> feature. (Note that a font may shape a Ra consonant with
1806                //    the feature generally but block it in certain contexts.)
1807                //
1808                // Note: We just check that something got substituted.  We don't check that
1809                // the <pref> feature actually did it...
1810                //
1811                // Reorder pref only if it ligated.
1812                if _hb_glyph_info_ligated_and_didnt_multiply(&buffer.info[i]) {
1813                    // 2. Try to find a target position the same way as for pre-base matra.
1814                    //    If it is found, reorder pre-base consonant glyph.
1815                    //
1816                    // 3. If position is not found, reorder immediately before main consonant.
1817
1818                    let mut new_pos = base;
1819                    // Malayalam / Tamil do not have "half" forms or explicit virama forms.
1820                    // The glyphs formed by 'half' are Chillus or ligated explicit viramas.
1821                    // We want to position matra after them.
1822                    if buffer.script != Some(script::MALAYALAM)
1823                        && buffer.script != Some(script::TAMIL)
1824                    {
1825                        while new_pos > start
1826                            && !buffer.info[new_pos - 1].is_one_of(
1827                                rb_flag(ot_category_t::OT_M as u32)
1828                                    | rb_flag(ot_category_t::OT_MPst as u32)
1829                                    | rb_flag(ot_category_t::OT_H as u32),
1830                            )
1831                        {
1832                            new_pos -= 1;
1833                        }
1834                    }
1835
1836                    if new_pos > start && buffer.info[new_pos - 1].is_halant() {
1837                        // -> If ZWJ or ZWNJ follow this halant, position is moved after it.
1838                        if new_pos < end && buffer.info[new_pos].is_joiner() {
1839                            new_pos += 1;
1840                        }
1841                    }
1842
1843                    {
1844                        let old_pos = i;
1845
1846                        buffer.merge_clusters(new_pos, old_pos + 1);
1847                        let tmp = buffer.info[old_pos];
1848                        for i in (0..old_pos - new_pos).rev() {
1849                            buffer.info[i + new_pos + 1] = buffer.info[i + new_pos];
1850                        }
1851                        buffer.info[new_pos] = tmp;
1852
1853                        if new_pos <= base && base < old_pos {
1854                            // TODO: investigate
1855                            #[allow(unused_assignments)]
1856                            {
1857                                base += 1;
1858                            }
1859                        }
1860                    }
1861                }
1862
1863                break;
1864            }
1865        }
1866    }
1867
1868    // Apply 'init' to the Left Matra if it's a word start.
1869    if buffer.info[start].indic_position() == ot_position_t::POS_PRE_M {
1870        if start == 0
1871            || (rb_flag_unsafe(
1872                _hb_glyph_info_get_general_category(&buffer.info[start - 1]).to_rb(),
1873            ) & rb_flag_range(
1874                hb_gc::RB_UNICODE_GENERAL_CATEGORY_FORMAT,
1875                hb_gc::RB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK,
1876            )) == 0
1877        {
1878            buffer.info[start].mask |= indic_plan.mask_array[indic_feature::INIT];
1879        } else {
1880            buffer.unsafe_to_break(Some(start - 1), Some(start + 1));
1881        }
1882    }
1883}