rustybuzz/hb/
ot_shaper_arabic.rs

1use crate::Direction;
2use alloc::boxed::Box;
3
4use super::algs::*;
5use super::buffer::*;
6use super::ot_layout::*;
7use super::ot_map::*;
8use super::ot_shape::*;
9use super::ot_shape_normalize::HB_OT_SHAPE_NORMALIZATION_MODE_AUTO;
10use super::ot_shape_plan::hb_ot_shape_plan_t;
11use super::ot_shaper::*;
12use super::unicode::*;
13use super::{hb_font_t, hb_glyph_info_t, hb_mask_t, hb_tag_t, script, Script};
14
15const HB_BUFFER_SCRATCH_FLAG_ARABIC_HAS_STCH: hb_buffer_scratch_flags_t =
16    HB_BUFFER_SCRATCH_FLAG_SHAPER0;
17
18// See:
19// https://github.com/harfbuzz/harfbuzz/commit/6e6f82b6f3dde0fc6c3c7d991d9ec6cfff57823d#commitcomment-14248516
20fn is_word_category(gc: hb_unicode_general_category_t) -> bool {
21    (rb_flag_unsafe(gc.to_rb())
22        & (rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_UNASSIGNED)
23            | rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_PRIVATE_USE)
24            | rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_MODIFIER_LETTER)
25            | rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER)
26            | rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_SPACING_MARK)
27            | rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK)
28            | rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK)
29            | rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER)
30            | rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_LETTER_NUMBER)
31            | rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_OTHER_NUMBER)
32            | rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_CURRENCY_SYMBOL)
33            | rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_MODIFIER_SYMBOL)
34            | rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_MATH_SYMBOL)
35            | rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_OTHER_SYMBOL)))
36        != 0
37}
38
39#[derive(Clone, Copy, PartialEq, PartialOrd, Debug)]
40pub enum hb_arabic_joining_type_t {
41    U = 0,
42    L = 1,
43    R = 2,
44    D = 3,
45    // We don't have C, like harfbuzz, because Rust doesn't allow duplicated enum variants.
46    GroupAlaph = 4,
47    GroupDalathRish = 5,
48    T = 7,
49    X = 8, // means: use general-category to choose between U or T.
50}
51
52fn get_joining_type(u: char, gc: hb_unicode_general_category_t) -> hb_arabic_joining_type_t {
53    let j_type = super::ot_shaper_arabic_table::joining_type(u);
54    if j_type != hb_arabic_joining_type_t::X {
55        return j_type;
56    }
57
58    let ok = rb_flag_unsafe(gc.to_rb())
59        & (rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK)
60            | rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK)
61            | rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_FORMAT));
62
63    if ok != 0 {
64        hb_arabic_joining_type_t::T
65    } else {
66        hb_arabic_joining_type_t::U
67    }
68}
69
70fn feature_is_syriac(tag: hb_tag_t) -> bool {
71    matches!(tag.to_bytes()[3], b'2' | b'3')
72}
73
74const ARABIC_FEATURES: &[hb_tag_t] = &[
75    hb_tag_t::from_bytes(b"isol"),
76    hb_tag_t::from_bytes(b"fina"),
77    hb_tag_t::from_bytes(b"fin2"),
78    hb_tag_t::from_bytes(b"fin3"),
79    hb_tag_t::from_bytes(b"medi"),
80    hb_tag_t::from_bytes(b"med2"),
81    hb_tag_t::from_bytes(b"init"),
82];
83
84mod arabic_action_t {
85    pub const ISOL: u8 = 0;
86    pub const FINA: u8 = 1;
87    pub const FIN2: u8 = 2;
88    pub const FIN3: u8 = 3;
89    pub const MEDI: u8 = 4;
90    pub const MED2: u8 = 5;
91    pub const INIT: u8 = 6;
92    pub const NONE: u8 = 7;
93
94    // We abuse the same byte for other things...
95    pub const STRETCHING_FIXED: u8 = 8;
96    pub const STRETCHING_REPEATING: u8 = 9;
97
98    #[inline]
99    pub fn is_stch(n: u8) -> bool {
100        matches!(n, STRETCHING_FIXED | STRETCHING_REPEATING)
101    }
102}
103
104const STATE_TABLE: &[[(u8, u8, u16); 6]] = &[
105    // jt_U,          jt_L,          jt_R,
106    // jt_D,          jg_ALAPH,      jg_DALATH_RISH
107
108    // State 0: prev was U, not willing to join.
109    [
110        (arabic_action_t::NONE, arabic_action_t::NONE, 0),
111        (arabic_action_t::NONE, arabic_action_t::ISOL, 2),
112        (arabic_action_t::NONE, arabic_action_t::ISOL, 1),
113        (arabic_action_t::NONE, arabic_action_t::ISOL, 2),
114        (arabic_action_t::NONE, arabic_action_t::ISOL, 1),
115        (arabic_action_t::NONE, arabic_action_t::ISOL, 6),
116    ],
117    // State 1: prev was R or action::ISOL/ALAPH, not willing to join.
118    [
119        (arabic_action_t::NONE, arabic_action_t::NONE, 0),
120        (arabic_action_t::NONE, arabic_action_t::ISOL, 2),
121        (arabic_action_t::NONE, arabic_action_t::ISOL, 1),
122        (arabic_action_t::NONE, arabic_action_t::ISOL, 2),
123        (arabic_action_t::NONE, arabic_action_t::FIN2, 5),
124        (arabic_action_t::NONE, arabic_action_t::ISOL, 6),
125    ],
126    // State 2: prev was D/L in action::ISOL form, willing to join.
127    [
128        (arabic_action_t::NONE, arabic_action_t::NONE, 0),
129        (arabic_action_t::NONE, arabic_action_t::ISOL, 2),
130        (arabic_action_t::INIT, arabic_action_t::FINA, 1),
131        (arabic_action_t::INIT, arabic_action_t::FINA, 3),
132        (arabic_action_t::INIT, arabic_action_t::FINA, 4),
133        (arabic_action_t::INIT, arabic_action_t::FINA, 6),
134    ],
135    // State 3: prev was D in action::FINA form, willing to join.
136    [
137        (arabic_action_t::NONE, arabic_action_t::NONE, 0),
138        (arabic_action_t::NONE, arabic_action_t::ISOL, 2),
139        (arabic_action_t::MEDI, arabic_action_t::FINA, 1),
140        (arabic_action_t::MEDI, arabic_action_t::FINA, 3),
141        (arabic_action_t::MEDI, arabic_action_t::FINA, 4),
142        (arabic_action_t::MEDI, arabic_action_t::FINA, 6),
143    ],
144    // State 4: prev was action::FINA ALAPH, not willing to join.
145    [
146        (arabic_action_t::NONE, arabic_action_t::NONE, 0),
147        (arabic_action_t::NONE, arabic_action_t::ISOL, 2),
148        (arabic_action_t::MED2, arabic_action_t::ISOL, 1),
149        (arabic_action_t::MED2, arabic_action_t::ISOL, 2),
150        (arabic_action_t::MED2, arabic_action_t::FIN2, 5),
151        (arabic_action_t::MED2, arabic_action_t::ISOL, 6),
152    ],
153    // State 5: prev was FIN2/FIN3 ALAPH, not willing to join.
154    [
155        (arabic_action_t::NONE, arabic_action_t::NONE, 0),
156        (arabic_action_t::NONE, arabic_action_t::ISOL, 2),
157        (arabic_action_t::ISOL, arabic_action_t::ISOL, 1),
158        (arabic_action_t::ISOL, arabic_action_t::ISOL, 2),
159        (arabic_action_t::ISOL, arabic_action_t::FIN2, 5),
160        (arabic_action_t::ISOL, arabic_action_t::ISOL, 6),
161    ],
162    // State 6: prev was DALATH/RISH, not willing to join.
163    [
164        (arabic_action_t::NONE, arabic_action_t::NONE, 0),
165        (arabic_action_t::NONE, arabic_action_t::ISOL, 2),
166        (arabic_action_t::NONE, arabic_action_t::ISOL, 1),
167        (arabic_action_t::NONE, arabic_action_t::ISOL, 2),
168        (arabic_action_t::NONE, arabic_action_t::FIN3, 5),
169        (arabic_action_t::NONE, arabic_action_t::ISOL, 6),
170    ],
171];
172
173impl hb_glyph_info_t {
174    fn arabic_shaping_action(&self) -> u8 {
175        self.ot_shaper_var_u8_auxiliary()
176    }
177
178    fn set_arabic_shaping_action(&mut self, action: u8) {
179        self.set_ot_shaper_var_u8_auxiliary(action)
180    }
181}
182
183fn collect_features(planner: &mut hb_ot_shape_planner_t) {
184    // We apply features according to the Arabic spec, with pauses
185    // in between most.
186    //
187    // The pause between init/medi/... and rlig is required.  See eg:
188    // https://bugzilla.mozilla.org/show_bug.cgi?id=644184
189    //
190    // The pauses between init/medi/... themselves are not necessarily
191    // needed as only one of those features is applied to any character.
192    // The only difference it makes is when fonts have contextual
193    // substitutions.  We now follow the order of the spec, which makes
194    // for better experience if that's what Uniscribe is doing.
195    //
196    // At least for Arabic, looks like Uniscribe has a pause between
197    // rlig and calt.  Otherwise the IranNastaliq's ALLAH ligature won't
198    // work.  However, testing shows that rlig and calt are applied
199    // together for Mongolian in Uniscribe.  As such, we only add a
200    // pause for Arabic, not other scripts.
201    //
202    // A pause after calt is required to make KFGQPC Uthmanic Script HAFS
203    // work correctly.  See https://github.com/harfbuzz/harfbuzz/issues/505
204
205    planner
206        .ot_map
207        .enable_feature(hb_tag_t::from_bytes(b"stch"), F_NONE, 1);
208    planner.ot_map.add_gsub_pause(Some(record_stch));
209
210    planner
211        .ot_map
212        .enable_feature(hb_tag_t::from_bytes(b"ccmp"), F_MANUAL_ZWJ, 1);
213    planner
214        .ot_map
215        .enable_feature(hb_tag_t::from_bytes(b"locl"), F_MANUAL_ZWJ, 1);
216
217    planner.ot_map.add_gsub_pause(None);
218
219    for feature in ARABIC_FEATURES {
220        let has_fallback = planner.script == Some(script::ARABIC) && !feature_is_syriac(*feature);
221        let flags = if has_fallback { F_HAS_FALLBACK } else { F_NONE };
222        planner
223            .ot_map
224            .add_feature(*feature, F_MANUAL_ZWJ | flags, 1);
225        planner.ot_map.add_gsub_pause(None);
226    }
227
228    // Normally, Unicode says a ZWNJ means "don't ligate".  In Arabic script
229    // however, it says a ZWJ should also mean "don't ligate".  So we run
230    // the main ligating features as MANUAL_ZWJ.
231
232    planner.ot_map.enable_feature(
233        hb_tag_t::from_bytes(b"rlig"),
234        F_MANUAL_ZWJ | F_HAS_FALLBACK,
235        1,
236    );
237
238    if planner.script == Some(script::ARABIC) {
239        planner.ot_map.add_gsub_pause(Some(arabic_fallback_shape));
240    }
241
242    // No pause after rclt.
243    // See 98460779bae19e4d64d29461ff154b3527bf8420
244    planner
245        .ot_map
246        .enable_feature(hb_tag_t::from_bytes(b"calt"), F_MANUAL_ZWJ, 1);
247    /* https://github.com/harfbuzz/harfbuzz/issues/1573 */
248    if !planner.ot_map.has_feature(hb_tag_t::from_bytes(b"rclt")) {
249        planner.ot_map.add_gsub_pause(None);
250    }
251
252    planner
253        .ot_map
254        .enable_feature(hb_tag_t::from_bytes(b"liga"), F_MANUAL_ZWJ, 1);
255    planner
256        .ot_map
257        .enable_feature(hb_tag_t::from_bytes(b"clig"), F_MANUAL_ZWJ, 1);
258
259    // The spec includes 'cswh'.  Earlier versions of Windows
260    // used to enable this by default, but testing suggests
261    // that Windows 8 and later do not enable it by default,
262    // and spec now says 'Off by default'.
263    // We disabled this in ae23c24c32.
264    // Note that IranNastaliq uses this feature extensively
265    // to fixup broken glyph sequences.  Oh well...
266    // Test case: U+0643,U+0640,U+0631.
267
268    // planner.ot_map.enable_feature(feature::CONTEXTUAL_SWASH, F_MANUAL_ZWJ, 1);
269    planner
270        .ot_map
271        .enable_feature(hb_tag_t::from_bytes(b"mset"), F_MANUAL_ZWJ, 1);
272}
273
274pub struct arabic_shape_plan_t {
275    // The "+ 1" in the next array is to accommodate for the "NONE" command,
276    // which is not an OpenType feature, but this simplifies the code by not
277    // having to do a "if (... < NONE) ..." and just rely on the fact that
278    // mask_array[NONE] == 0.
279    mask_array: [hb_mask_t; ARABIC_FEATURES.len() + 1],
280    has_stch: bool,
281}
282
283pub fn data_create_arabic(plan: &hb_ot_shape_plan_t) -> arabic_shape_plan_t {
284    let has_stch = plan.ot_map.get_1_mask(hb_tag_t::from_bytes(b"stch")) != 0;
285
286    let mut mask_array = [0; ARABIC_FEATURES.len() + 1];
287    for i in 0..ARABIC_FEATURES.len() {
288        mask_array[i] = plan.ot_map.get_1_mask(ARABIC_FEATURES[i]);
289    }
290
291    arabic_shape_plan_t {
292        mask_array,
293        has_stch,
294    }
295}
296
297fn arabic_joining(buffer: &mut hb_buffer_t) {
298    let mut prev: Option<usize> = None;
299    let mut state = 0;
300
301    // Check pre-context.
302    for i in 0..buffer.context_len[0] {
303        let c = buffer.context[0][i];
304        let this_type = get_joining_type(c, c.general_category());
305        if this_type == hb_arabic_joining_type_t::T {
306            continue;
307        }
308
309        state = STATE_TABLE[state][this_type as usize].2 as usize;
310        break;
311    }
312
313    for i in 0..buffer.len {
314        let this_type = get_joining_type(
315            buffer.info[i].as_char(),
316            _hb_glyph_info_get_general_category(&buffer.info[i]),
317        );
318        if this_type == hb_arabic_joining_type_t::T {
319            buffer.info[i].set_arabic_shaping_action(arabic_action_t::NONE);
320            continue;
321        }
322
323        let entry = &STATE_TABLE[state][this_type as usize];
324        if entry.0 != arabic_action_t::NONE && prev.is_some() {
325            if let Some(prev) = prev {
326                buffer.info[prev].set_arabic_shaping_action(entry.0);
327                buffer.safe_to_insert_tatweel(Some(prev), Some(i + 1));
328            }
329        }
330        // States that have a possible prev_action.
331        else {
332            if let Some(prev) = prev {
333                if this_type >= hb_arabic_joining_type_t::R || (2 <= state && state <= 5) {
334                    buffer.unsafe_to_concat(Some(prev), Some(i + 1));
335                }
336            } else {
337                if this_type >= hb_arabic_joining_type_t::R {
338                    buffer.unsafe_to_concat_from_outbuffer(Some(0), Some(i + 1));
339                }
340            }
341        }
342
343        buffer.info[i].set_arabic_shaping_action(entry.1);
344
345        prev = Some(i);
346        state = entry.2 as usize;
347    }
348
349    for i in 0..buffer.context_len[1] {
350        let c = buffer.context[1][i];
351        let this_type = get_joining_type(c, c.general_category());
352        if this_type == hb_arabic_joining_type_t::T {
353            continue;
354        }
355
356        let entry = &STATE_TABLE[state][this_type as usize];
357        if entry.0 != arabic_action_t::NONE && prev.is_some() {
358            if let Some(prev) = prev {
359                buffer.info[prev].set_arabic_shaping_action(entry.0);
360                buffer.safe_to_insert_tatweel(Some(prev), Some(buffer.len));
361            }
362        }
363        // States that have a possible prev_action.
364        else if 2 <= state && state <= 5 {
365            if let Some(prev) = prev {
366                buffer.unsafe_to_concat(Some(prev), Some(buffer.len));
367            }
368        }
369
370        break;
371    }
372}
373
374fn mongolian_variation_selectors(buffer: &mut hb_buffer_t) {
375    // Copy arabic_shaping_action() from base to Mongolian variation selectors.
376    let len = buffer.len;
377    let info = &mut buffer.info;
378    for i in 1..len {
379        if (0x180B..=0x180D).contains(&info[i].glyph_id) || info[i].glyph_id == 0x180F {
380            let a = info[i - 1].arabic_shaping_action();
381            info[i].set_arabic_shaping_action(a);
382        }
383    }
384}
385
386fn setup_masks_arabic_plan(plan: &hb_ot_shape_plan_t, _: &hb_font_t, buffer: &mut hb_buffer_t) {
387    let arabic_plan = plan.data::<arabic_shape_plan_t>();
388    setup_masks_inner(arabic_plan, plan.script, buffer)
389}
390
391pub fn setup_masks_inner(
392    arabic_plan: &arabic_shape_plan_t,
393    script: Option<Script>,
394    buffer: &mut hb_buffer_t,
395) {
396    arabic_joining(buffer);
397    if script == Some(script::MONGOLIAN) {
398        mongolian_variation_selectors(buffer);
399    }
400
401    for info in buffer.info_slice_mut() {
402        info.mask |= arabic_plan.mask_array[info.arabic_shaping_action() as usize];
403    }
404}
405
406fn arabic_fallback_shape(_: &hb_ot_shape_plan_t, _: &hb_font_t, _: &mut hb_buffer_t) -> bool {
407    false
408}
409
410// Stretch feature: "stch".
411// See example here:
412// https://docs.microsoft.com/en-us/typography/script-development/syriac
413// We implement this in a generic way, such that the Arabic subtending
414// marks can use it as well.
415fn record_stch(plan: &hb_ot_shape_plan_t, _: &hb_font_t, buffer: &mut hb_buffer_t) -> bool {
416    let arabic_plan = plan.data::<arabic_shape_plan_t>();
417    if !arabic_plan.has_stch {
418        return false;
419    }
420
421    // 'stch' feature was just applied.  Look for anything that multiplied,
422    // and record it for stch treatment later.  Note that rtlm, frac, etc
423    // are applied before stch, but we assume that they didn't result in
424    // anything multiplying into 5 pieces, so it's safe-ish...
425
426    let len = buffer.len;
427    let info = &mut buffer.info;
428    let mut has_stch = false;
429    for glyph_info in &mut info[..len] {
430        if _hb_glyph_info_multiplied(glyph_info) {
431            let comp = if _hb_glyph_info_get_lig_comp(glyph_info) % 2 != 0 {
432                arabic_action_t::STRETCHING_REPEATING
433            } else {
434                arabic_action_t::STRETCHING_FIXED
435            };
436
437            glyph_info.set_arabic_shaping_action(comp);
438            has_stch = true;
439        }
440    }
441
442    if has_stch {
443        buffer.scratch_flags |= HB_BUFFER_SCRATCH_FLAG_ARABIC_HAS_STCH;
444    }
445
446    false
447}
448
449fn apply_stch(face: &hb_font_t, buffer: &mut hb_buffer_t) {
450    if buffer.scratch_flags & HB_BUFFER_SCRATCH_FLAG_ARABIC_HAS_STCH == 0 {
451        return;
452    }
453
454    let rtl = buffer.direction == Direction::RightToLeft;
455
456    if !rtl {
457        buffer.reverse();
458    }
459
460    // We do a two pass implementation:
461    // First pass calculates the exact number of extra glyphs we need,
462    // We then enlarge buffer to have that much room,
463    // Second pass applies the stretch, copying things to the end of buffer.
464
465    let mut extra_glyphs_needed: usize = 0; // Set during MEASURE, used during CUT
466    const MEASURE: usize = 0;
467    const CUT: usize = 1;
468
469    for step in 0..2 {
470        let new_len = buffer.len + extra_glyphs_needed; // write head during CUT
471        let mut i = buffer.len;
472        let mut j = new_len;
473        while i != 0 {
474            if !arabic_action_t::is_stch(buffer.info[i - 1].arabic_shaping_action()) {
475                if step == CUT {
476                    j -= 1;
477                    buffer.info[j] = buffer.info[i - 1];
478                    buffer.pos[j] = buffer.pos[i - 1];
479                }
480
481                i -= 1;
482                continue;
483            }
484
485            // Yay, justification!
486
487            let mut w_total = 0; // Total to be filled
488            let mut w_fixed = 0; // Sum of fixed tiles
489            let mut w_repeating = 0; // Sum of repeating tiles
490            let mut n_repeating: i32 = 0;
491
492            let end = i;
493            while i != 0 && arabic_action_t::is_stch(buffer.info[i - 1].arabic_shaping_action()) {
494                i -= 1;
495                let width = face.glyph_h_advance(buffer.info[i].as_glyph());
496
497                if buffer.info[i].arabic_shaping_action() == arabic_action_t::STRETCHING_FIXED {
498                    w_fixed += width;
499                } else {
500                    w_repeating += width;
501                    n_repeating += 1;
502                }
503            }
504
505            let start = i;
506            let mut context = i;
507            while context != 0
508                && !arabic_action_t::is_stch(buffer.info[context - 1].arabic_shaping_action())
509                && (_hb_glyph_info_is_default_ignorable(&buffer.info[context - 1])
510                    || is_word_category(_hb_glyph_info_get_general_category(
511                        &buffer.info[context - 1],
512                    )))
513            {
514                context -= 1;
515                w_total += buffer.pos[context].x_advance;
516            }
517
518            i += 1; // Don't touch i again.
519
520            // Number of additional times to repeat each repeating tile.
521            let mut n_copies: i32 = 0;
522
523            let mut w_remaining = w_total - w_fixed;
524            if w_remaining > w_repeating && w_repeating > 0 {
525                n_copies = w_remaining / (w_repeating) - 1;
526            }
527
528            // See if we can improve the fit by adding an extra repeat and squeezing them together a bit.
529            let mut extra_repeat_overlap = 0;
530            let shortfall = w_remaining - w_repeating * (n_copies + 1);
531            if shortfall > 0 && n_repeating > 0 {
532                n_copies += 1;
533                let excess = (n_copies + 1) * w_repeating - w_remaining;
534                if excess > 0 {
535                    extra_repeat_overlap = excess / (n_copies * n_repeating);
536                    w_remaining = 0;
537                }
538            }
539
540            if step == MEASURE {
541                extra_glyphs_needed += (n_copies * n_repeating) as usize;
542            } else {
543                buffer.unsafe_to_break(Some(context), Some(end));
544                let mut x_offset = w_remaining / 2;
545                for k in (start + 1..=end).rev() {
546                    let width = face.glyph_h_advance(buffer.info[k - 1].as_glyph());
547
548                    let mut repeat = 1;
549                    if buffer.info[k - 1].arabic_shaping_action()
550                        == arabic_action_t::STRETCHING_REPEATING
551                    {
552                        repeat += n_copies;
553                    }
554
555                    buffer.pos[k - 1].x_advance = 0;
556
557                    for n in 0..repeat {
558                        if rtl {
559                            x_offset -= width;
560                            if n > 0 {
561                                x_offset += extra_repeat_overlap;
562                            }
563                        }
564
565                        buffer.pos[k - 1].x_offset = x_offset;
566
567                        // Append copy.
568                        j -= 1;
569                        buffer.info[j] = buffer.info[k - 1];
570                        buffer.pos[j] = buffer.pos[k - 1];
571
572                        if !rtl {
573                            x_offset += width;
574
575                            if n > 0 {
576                                x_offset -= extra_repeat_overlap;
577                            }
578                        }
579                    }
580                }
581            }
582
583            i -= 1;
584        }
585
586        if step == MEASURE {
587            if !buffer.ensure(buffer.len + extra_glyphs_needed) {
588                break;
589            }
590        } else {
591            debug_assert_eq!(j, 0);
592            buffer.len = new_len;
593        }
594    }
595
596    if !rtl {
597        buffer.reverse();
598    }
599}
600
601fn postprocess_glyphs_arabic(_: &hb_ot_shape_plan_t, face: &hb_font_t, buffer: &mut hb_buffer_t) {
602    apply_stch(face, buffer)
603}
604
605// http://www.unicode.org/reports/tr53/
606const MODIFIER_COMBINING_MARKS: &[u32] = &[
607    0x0654, // ARABIC HAMZA ABOVE
608    0x0655, // ARABIC HAMZA BELOW
609    0x0658, // ARABIC MARK NOON GHUNNA
610    0x06DC, // ARABIC SMALL HIGH SEEN
611    0x06E3, // ARABIC SMALL LOW SEEN
612    0x06E7, // ARABIC SMALL HIGH YEH
613    0x06E8, // ARABIC SMALL HIGH NOON
614    0x08CA, // ARABIC SMALL HIGH FARSI YEH
615    0x08CB, // ARABIC SMALL HIGH YEH BARREE WITH TWO DOTS BELOW
616    0x08CD, // ARABIC SMALL HIGH ZAH
617    0x08CE, // ARABIC LARGE ROUND DOT ABOVE
618    0x08CF, // ARABIC LARGE ROUND DOT BELOW
619    0x08D3, // ARABIC SMALL LOW WAW
620    0x08F3, // ARABIC SMALL HIGH WAW
621];
622
623fn reorder_marks_arabic(
624    _: &hb_ot_shape_plan_t,
625    buffer: &mut hb_buffer_t,
626    mut start: usize,
627    end: usize,
628) {
629    let mut i = start;
630    for cc in [220u8, 230].iter().cloned() {
631        while i < end && _hb_glyph_info_get_modified_combining_class(&buffer.info[i]) < cc {
632            i += 1;
633        }
634
635        if i == end {
636            break;
637        }
638
639        if _hb_glyph_info_get_modified_combining_class(&buffer.info[i]) > cc {
640            continue;
641        }
642
643        let mut j = i;
644        while j < end
645            && _hb_glyph_info_get_modified_combining_class(&buffer.info[j]) == cc
646            && MODIFIER_COMBINING_MARKS.contains(&buffer.info[j].glyph_id)
647        {
648            j += 1;
649        }
650
651        if i == j {
652            continue;
653        }
654
655        // Shift it!
656        let mut temp = [hb_glyph_info_t::default(); MAX_COMBINING_MARKS];
657        debug_assert!(j - i <= MAX_COMBINING_MARKS);
658        buffer.merge_clusters(start, j);
659
660        temp[..j - i].copy_from_slice(&buffer.info[i..j]);
661
662        for k in (0..i - start).rev() {
663            buffer.info[k + start + j - i] = buffer.info[k + start];
664        }
665
666        buffer.info[start..][..j - i].copy_from_slice(&temp[..j - i]);
667
668        // Renumber CC such that the reordered sequence is still sorted.
669        // 22 and 26 are chosen because they are smaller than all Arabic categories,
670        // and are folded back to 220/230 respectively during fallback mark positioning.
671        //
672        // We do this because the CGJ-handling logic in the normalizer relies on
673        // mark sequences having an increasing order even after this reordering.
674        // https://github.com/harfbuzz/harfbuzz/issues/554
675        // This, however, does break some obscure sequences, where the normalizer
676        // might compose a sequence that it should not.  For example, in the seequence
677        // ALEF, HAMZAH, MADDAH, we should NOT try to compose ALEF+MADDAH, but with this
678        // renumbering, we will.
679        let new_start = start + j - i;
680        let new_cc = if cc == 220 {
681            modified_combining_class::CCC22
682        } else {
683            modified_combining_class::CCC26
684        };
685
686        while start < new_start {
687            _hb_glyph_info_set_modified_combining_class(&mut buffer.info[start], new_cc);
688            start += 1;
689        }
690
691        i = j;
692    }
693}
694
695pub const ARABIC_SHAPER: hb_ot_shaper_t = hb_ot_shaper_t {
696    collect_features: Some(collect_features),
697    override_features: None,
698    create_data: Some(|plan| Box::new(data_create_arabic(plan))),
699    preprocess_text: None,
700    postprocess_glyphs: Some(postprocess_glyphs_arabic),
701    normalization_preference: HB_OT_SHAPE_NORMALIZATION_MODE_AUTO,
702    decompose: None,
703    compose: None,
704    setup_masks: Some(setup_masks_arabic_plan),
705    gpos_tag: None,
706    reorder_marks: Some(reorder_marks_arabic),
707    zero_width_marks: HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_LATE,
708    fallback_position: true,
709};