fonts/shapers/mod.rs
1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
4
5mod harfbuzz;
6use std::cmp;
7
8use app_units::Au;
9use base::text::is_bidi_control;
10use euclid::default::Point2D;
11use fonts_traits::ByteIndex;
12pub(crate) use harfbuzz::Shaper;
13use log::debug;
14use num_traits::Zero as _;
15
16const NO_GLYPH: i32 = -1;
17
18use crate::{Font, GlyphData, GlyphId, GlyphStore, ShapingOptions, advance_for_shaped_glyph};
19
20/// Utility function to convert a `unicode_script::Script` enum into the corresponding `c_uint` tag that
21/// harfbuzz uses to represent unicode scipts.
22fn unicode_script_to_iso15924_tag(script: unicode_script::Script) -> u32 {
23 let bytes: [u8; 4] = match script {
24 unicode_script::Script::Unknown => *b"Zzzz",
25 _ => {
26 let short_name = script.short_name();
27 short_name.as_bytes().try_into().unwrap()
28 },
29 };
30
31 u32::from_be_bytes(bytes)
32}
33
34struct ShapedGlyphEntry {
35 codepoint: GlyphId,
36 advance: Au,
37 offset: Option<Point2D<Au>>,
38}
39
40/// Holds the results of shaping. Abstracts over HarfBuzz and HarfRust which return data in very similar
41/// form but with different types
42trait HarfBuzzShapedGlyphData {
43 /// The number of shaped glyphs
44 fn len(&self) -> usize;
45 /// The byte offset of the shaped glyph in the souce text
46 fn byte_offset_of_glyph(&self, i: usize) -> u32;
47 /// Returns shaped glyph data for one glyph, and updates the y-position of the pen.
48 fn entry_for_glyph(&self, i: usize, y_pos: &mut Au) -> ShapedGlyphEntry;
49}
50
51/// Shape text using an `impl HarfBuzzShaper`
52fn shape_text_harfbuzz<ShapedGlyphData: HarfBuzzShapedGlyphData>(
53 glyph_data: &ShapedGlyphData,
54 font: &Font,
55 text: &str,
56 options: &ShapingOptions,
57 glyphs: &mut GlyphStore,
58) {
59 let glyph_count = glyph_data.len();
60 let byte_max = text.len();
61
62 debug!(
63 "Shaped text[byte count={}], got back {} glyph info records.",
64 byte_max, glyph_count
65 );
66
67 // make map of what chars have glyphs
68 let mut byte_to_glyph = vec![NO_GLYPH; byte_max];
69
70 debug!("(glyph idx) -> (text byte offset)");
71 for i in 0..glyph_data.len() {
72 let loc = glyph_data.byte_offset_of_glyph(i) as usize;
73 if loc < byte_max {
74 byte_to_glyph[loc] = i as i32;
75 } else {
76 debug!(
77 "ERROR: tried to set out of range byte_to_glyph: idx={}, glyph idx={}",
78 loc, i
79 );
80 }
81 debug!("{} -> {}", i, loc);
82 }
83
84 debug!("text: {:?}", text);
85 debug!("(char idx): char->(glyph index):");
86 for (i, ch) in text.char_indices() {
87 debug!("{}: {:?} --> {}", i, ch, byte_to_glyph[i]);
88 }
89
90 let mut glyph_span = 0..0;
91 let mut byte_range = 0..0;
92
93 let mut y_pos = Au::zero();
94
95 // main loop over each glyph. each iteration usually processes 1 glyph and 1+ chars.
96 // in cases with complex glyph-character associations, 2+ glyphs and 1+ chars can be
97 // processed.
98 while glyph_span.start < glyph_count {
99 debug!("Processing glyph at idx={}", glyph_span.start);
100 glyph_span.end = glyph_span.start;
101 byte_range.end = glyph_data.byte_offset_of_glyph(glyph_span.start) as usize;
102
103 while byte_range.end < byte_max {
104 byte_range.end += 1;
105 // Extend the byte range to include any following byte without its own glyph.
106 while byte_range.end < byte_max && byte_to_glyph[byte_range.end] == NO_GLYPH {
107 byte_range.end += 1;
108 }
109
110 // Extend the glyph range to include all glyphs covered by bytes processed so far.
111 let mut max_glyph_idx = glyph_span.end;
112 for glyph_idx in &byte_to_glyph[byte_range.clone()] {
113 if *glyph_idx != NO_GLYPH {
114 max_glyph_idx = cmp::max(*glyph_idx as usize + 1, max_glyph_idx);
115 }
116 }
117 if max_glyph_idx > glyph_span.end {
118 glyph_span.end = max_glyph_idx;
119 debug!("Extended glyph span to {:?}", glyph_span);
120 }
121
122 // if there's just one glyph, then we don't need further checks.
123 if glyph_span.len() == 1 {
124 break;
125 }
126
127 // if no glyphs were found yet, extend the char byte range more.
128 if glyph_span.is_empty() {
129 continue;
130 }
131
132 // If byte_range now includes all the byte offsets found in glyph_span, then we
133 // have found a contiguous "cluster" and can stop extending it.
134 let mut all_glyphs_are_within_cluster: bool = true;
135 for j in glyph_span.clone() {
136 let loc = glyph_data.byte_offset_of_glyph(j) as usize;
137 if !(byte_range.start <= loc && loc < byte_range.end) {
138 all_glyphs_are_within_cluster = false;
139 break;
140 }
141 }
142 if all_glyphs_are_within_cluster {
143 break;
144 }
145
146 // Otherwise, the bytes we have seen so far correspond to a non-contiguous set of
147 // glyphs. Keep extending byte_range until we fill in all the holes in the glyph
148 // span or reach the end of the text.
149 }
150
151 assert!(!byte_range.is_empty());
152 assert!(!glyph_span.is_empty());
153
154 // Now byte_range is the ligature clump formed by the glyphs in glyph_span.
155 // We will save these glyphs to the glyph store at the index of the first byte.
156 let byte_idx = ByteIndex(byte_range.start as isize);
157
158 if glyph_span.len() == 1 {
159 // Fast path: 1-to-1 mapping of byte offset to single glyph.
160 //
161 // TODO(Issue #214): cluster ranges need to be computed before
162 // shaping, and then consulted here.
163 // for now, just pretend that every character is a cluster start.
164 // (i.e., pretend there are no combining character sequences).
165 // 1-to-1 mapping of character to glyph also treated as ligature start.
166 //
167 // NB: When we acquire the ability to handle ligatures that cross word boundaries,
168 // we'll need to do something special to handle `word-spacing` properly.
169 let character = text[byte_range.clone()].chars().next().unwrap();
170 if is_bidi_control(character) {
171 // Don't add any glyphs for bidi control chars
172 } else {
173 let (glyph_id, advance, offset) = if character == '\t' {
174 // Treat tabs in pre-formatted text as a fixed number of spaces. The glyph id does
175 // not matter here as Servo doesn't render any glyphs for whitespace.
176 //
177 // TODO: Proper tab stops. This should happen in layout and be based on the
178 // size of the space character of the inline formatting context.
179 (
180 font.glyph_index(' ').unwrap_or(0),
181 font.metrics.space_advance * 8,
182 Default::default(),
183 )
184 } else {
185 let shape = glyph_data.entry_for_glyph(glyph_span.start, &mut y_pos);
186 let advance = advance_for_shaped_glyph(shape.advance, character, options);
187 (shape.codepoint, advance, shape.offset)
188 };
189
190 let data = GlyphData::new(glyph_id, advance, offset, true, true);
191 glyphs.add_glyph_for_byte_index(byte_idx, character, &data);
192 }
193 } else {
194 // collect all glyphs to be assigned to the first character.
195 let mut datas = vec![];
196
197 for glyph_i in glyph_span.clone() {
198 let shape = glyph_data.entry_for_glyph(glyph_i, &mut y_pos);
199 datas.push(GlyphData::new(
200 shape.codepoint,
201 shape.advance,
202 shape.offset,
203 true, // treat as cluster start
204 glyph_i > glyph_span.start,
205 ));
206 // all but first are ligature continuations
207 }
208 // now add the detailed glyph entry.
209 glyphs.add_glyphs_for_byte_index(byte_idx, &datas);
210 }
211
212 glyph_span.start = glyph_span.end;
213 byte_range.start = byte_range.end;
214 }
215
216 // this must be called after adding all glyph data; it sorts the
217 // lookup table for finding detailed glyphs by associated char index.
218 glyphs.finalize_changes();
219}