fonts/shapers/
mod.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
4
5mod harfbuzz;
6use std::cmp;
7
8use app_units::Au;
9use base::text::is_bidi_control;
10use euclid::default::Point2D;
11use fonts_traits::ByteIndex;
12pub(crate) use harfbuzz::Shaper;
13use log::debug;
14use num_traits::Zero as _;
15
16const NO_GLYPH: i32 = -1;
17
18use crate::{Font, GlyphData, GlyphId, GlyphStore, ShapingOptions, advance_for_shaped_glyph};
19
20/// Utility function to convert a `unicode_script::Script` enum into the corresponding `c_uint` tag that
21/// harfbuzz uses to represent unicode scipts.
22fn unicode_script_to_iso15924_tag(script: unicode_script::Script) -> u32 {
23    let bytes: [u8; 4] = match script {
24        unicode_script::Script::Unknown => *b"Zzzz",
25        _ => {
26            let short_name = script.short_name();
27            short_name.as_bytes().try_into().unwrap()
28        },
29    };
30
31    u32::from_be_bytes(bytes)
32}
33
34struct ShapedGlyphEntry {
35    codepoint: GlyphId,
36    advance: Au,
37    offset: Option<Point2D<Au>>,
38}
39
40/// Holds the results of shaping. Abstracts over HarfBuzz and HarfRust which return data in very similar
41/// form but with different types
42trait HarfBuzzShapedGlyphData {
43    /// The number of shaped glyphs
44    fn len(&self) -> usize;
45    /// The byte offset of the shaped glyph in the souce text
46    fn byte_offset_of_glyph(&self, i: usize) -> u32;
47    /// Returns shaped glyph data for one glyph, and updates the y-position of the pen.
48    fn entry_for_glyph(&self, i: usize, y_pos: &mut Au) -> ShapedGlyphEntry;
49}
50
51/// Shape text using an `impl HarfBuzzShaper`
52fn shape_text_harfbuzz<ShapedGlyphData: HarfBuzzShapedGlyphData>(
53    glyph_data: &ShapedGlyphData,
54    font: &Font,
55    text: &str,
56    options: &ShapingOptions,
57    glyphs: &mut GlyphStore,
58) {
59    let glyph_count = glyph_data.len();
60    let byte_max = text.len();
61
62    debug!(
63        "Shaped text[byte count={}], got back {} glyph info records.",
64        byte_max, glyph_count
65    );
66
67    // make map of what chars have glyphs
68    let mut byte_to_glyph = vec![NO_GLYPH; byte_max];
69
70    debug!("(glyph idx) -> (text byte offset)");
71    for i in 0..glyph_data.len() {
72        let loc = glyph_data.byte_offset_of_glyph(i) as usize;
73        if loc < byte_max {
74            byte_to_glyph[loc] = i as i32;
75        } else {
76            debug!(
77                "ERROR: tried to set out of range byte_to_glyph: idx={}, glyph idx={}",
78                loc, i
79            );
80        }
81        debug!("{} -> {}", i, loc);
82    }
83
84    debug!("text: {:?}", text);
85    debug!("(char idx): char->(glyph index):");
86    for (i, ch) in text.char_indices() {
87        debug!("{}: {:?} --> {}", i, ch, byte_to_glyph[i]);
88    }
89
90    let mut glyph_span = 0..0;
91    let mut byte_range = 0..0;
92
93    let mut y_pos = Au::zero();
94
95    // main loop over each glyph. each iteration usually processes 1 glyph and 1+ chars.
96    // in cases with complex glyph-character associations, 2+ glyphs and 1+ chars can be
97    // processed.
98    while glyph_span.start < glyph_count {
99        debug!("Processing glyph at idx={}", glyph_span.start);
100        glyph_span.end = glyph_span.start;
101        byte_range.end = glyph_data.byte_offset_of_glyph(glyph_span.start) as usize;
102
103        while byte_range.end < byte_max {
104            byte_range.end += 1;
105            // Extend the byte range to include any following byte without its own glyph.
106            while byte_range.end < byte_max && byte_to_glyph[byte_range.end] == NO_GLYPH {
107                byte_range.end += 1;
108            }
109
110            // Extend the glyph range to include all glyphs covered by bytes processed so far.
111            let mut max_glyph_idx = glyph_span.end;
112            for glyph_idx in &byte_to_glyph[byte_range.clone()] {
113                if *glyph_idx != NO_GLYPH {
114                    max_glyph_idx = cmp::max(*glyph_idx as usize + 1, max_glyph_idx);
115                }
116            }
117            if max_glyph_idx > glyph_span.end {
118                glyph_span.end = max_glyph_idx;
119                debug!("Extended glyph span to {:?}", glyph_span);
120            }
121
122            // if there's just one glyph, then we don't need further checks.
123            if glyph_span.len() == 1 {
124                break;
125            }
126
127            // if no glyphs were found yet, extend the char byte range more.
128            if glyph_span.is_empty() {
129                continue;
130            }
131
132            // If byte_range now includes all the byte offsets found in glyph_span, then we
133            // have found a contiguous "cluster" and can stop extending it.
134            let mut all_glyphs_are_within_cluster: bool = true;
135            for j in glyph_span.clone() {
136                let loc = glyph_data.byte_offset_of_glyph(j) as usize;
137                if !(byte_range.start <= loc && loc < byte_range.end) {
138                    all_glyphs_are_within_cluster = false;
139                    break;
140                }
141            }
142            if all_glyphs_are_within_cluster {
143                break;
144            }
145
146            // Otherwise, the bytes we have seen so far correspond to a non-contiguous set of
147            // glyphs.  Keep extending byte_range until we fill in all the holes in the glyph
148            // span or reach the end of the text.
149        }
150
151        assert!(!byte_range.is_empty());
152        assert!(!glyph_span.is_empty());
153
154        // Now byte_range is the ligature clump formed by the glyphs in glyph_span.
155        // We will save these glyphs to the glyph store at the index of the first byte.
156        let byte_idx = ByteIndex(byte_range.start as isize);
157
158        if glyph_span.len() == 1 {
159            // Fast path: 1-to-1 mapping of byte offset to single glyph.
160            //
161            // TODO(Issue #214): cluster ranges need to be computed before
162            // shaping, and then consulted here.
163            // for now, just pretend that every character is a cluster start.
164            // (i.e., pretend there are no combining character sequences).
165            // 1-to-1 mapping of character to glyph also treated as ligature start.
166            //
167            // NB: When we acquire the ability to handle ligatures that cross word boundaries,
168            // we'll need to do something special to handle `word-spacing` properly.
169            let character = text[byte_range.clone()].chars().next().unwrap();
170            if is_bidi_control(character) {
171                // Don't add any glyphs for bidi control chars
172            } else {
173                let (glyph_id, advance, offset) = if character == '\t' {
174                    // Treat tabs in pre-formatted text as a fixed number of spaces. The glyph id does
175                    // not matter here as Servo doesn't render any glyphs for whitespace.
176                    //
177                    // TODO: Proper tab stops. This should happen in layout and be based on the
178                    // size of the space character of the inline formatting context.
179                    (
180                        font.glyph_index(' ').unwrap_or(0),
181                        font.metrics.space_advance * 8,
182                        Default::default(),
183                    )
184                } else {
185                    let shape = glyph_data.entry_for_glyph(glyph_span.start, &mut y_pos);
186                    let advance = advance_for_shaped_glyph(shape.advance, character, options);
187                    (shape.codepoint, advance, shape.offset)
188                };
189
190                let data = GlyphData::new(glyph_id, advance, offset, true, true);
191                glyphs.add_glyph_for_byte_index(byte_idx, character, &data);
192            }
193        } else {
194            // collect all glyphs to be assigned to the first character.
195            let mut datas = vec![];
196
197            for glyph_i in glyph_span.clone() {
198                let shape = glyph_data.entry_for_glyph(glyph_i, &mut y_pos);
199                datas.push(GlyphData::new(
200                    shape.codepoint,
201                    shape.advance,
202                    shape.offset,
203                    true, // treat as cluster start
204                    glyph_i > glyph_span.start,
205                ));
206                // all but first are ligature continuations
207            }
208            // now add the detailed glyph entry.
209            glyphs.add_glyphs_for_byte_index(byte_idx, &datas);
210        }
211
212        glyph_span.start = glyph_span.end;
213        byte_range.start = byte_range.end;
214    }
215
216    // this must be called after adding all glyph data; it sorts the
217    // lookup table for finding detailed glyphs by associated char index.
218    glyphs.finalize_changes();
219}