base/
rope.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
4
5use std::iter::once;
6use std::ops::Range;
7
8use malloc_size_of_derive::MallocSizeOf;
9use rayon::iter::Either;
10use unicode_segmentation::UnicodeSegmentation;
11
12use crate::text::{Utf8CodeUnitLength, Utf16CodeUnitLength};
13
14fn contents_vec(contents: impl Into<String>) -> Vec<String> {
15    let mut contents: Vec<_> = contents
16        .into()
17        .split('\n')
18        .map(|line| format!("{line}\n"))
19        .collect();
20    // The last line should not have a newline.
21    if let Some(last_line) = contents.last_mut() {
22        last_line.truncate(last_line.len() - 1);
23    }
24    contents
25}
26
27/// Describes a unit of movement for [`Rope::move_by`].
28pub enum RopeMovement {
29    Character,
30    Grapheme,
31    Word,
32    Line,
33    LineStartOrEnd,
34    RopeStartOrEnd,
35}
36
37/// An implementation of a [rope data structure], composed of lines of
38/// owned strings. This is used to implement text controls in Servo.
39///
40/// [rope data structure]: https://en.wikipedia.org/wiki/Rope_(data_structure)
41#[derive(MallocSizeOf)]
42pub struct Rope {
43    /// The lines of the rope. Each line is an owned string that ends with a newline
44    /// (`\n`), apart from the last line which has no trailing newline.
45    lines: Vec<String>,
46}
47
48impl Rope {
49    pub fn new(contents: impl Into<String>) -> Self {
50        Self {
51            lines: contents_vec(contents),
52        }
53    }
54
55    pub fn contents(&self) -> String {
56        self.lines.join("")
57    }
58
59    pub fn last_index(&self) -> RopeIndex {
60        let line_index = self.lines.len() - 1;
61        RopeIndex::new(line_index, self.line(line_index).len())
62    }
63
64    /// Replace the given range of [`RopeIndex`]s with the given string. Returns the
65    /// [`RopeIndex`] of the end of the insertion.
66    pub fn replace_range(
67        &mut self,
68        mut range: Range<RopeIndex>,
69        string: impl Into<String>,
70    ) -> RopeIndex {
71        range.start = self.normalize_index(range.start);
72        range.end = self.normalize_index(range.end);
73        assert!(range.start <= range.end);
74
75        let start_index = range.start;
76        self.delete_range(range);
77
78        let mut new_contents = contents_vec(string);
79        let Some(first_line_of_new_contents) = new_contents.first() else {
80            return start_index;
81        };
82
83        if new_contents.len() == 1 {
84            self.line_for_index_mut(start_index)
85                .insert_str(start_index.code_point, first_line_of_new_contents);
86            return RopeIndex::new(
87                start_index.line,
88                start_index.code_point + first_line_of_new_contents.len(),
89            );
90        }
91
92        let start_line = self.line_for_index_mut(start_index);
93        let last_line = new_contents.last().expect("Should have at least one line");
94        let last_index = RopeIndex::new(
95            start_index.line + new_contents.len().saturating_sub(1),
96            last_line.len(),
97        );
98
99        let remaining_string = start_line.split_off(start_index.code_point);
100        start_line.push_str(first_line_of_new_contents);
101        new_contents
102            .last_mut()
103            .expect("Should have at least one line")
104            .push_str(&remaining_string);
105
106        let splice_index = start_index.line + 1;
107        self.lines
108            .splice(splice_index..splice_index, new_contents.into_iter().skip(1));
109        last_index
110    }
111
112    fn delete_range(&mut self, mut range: Range<RopeIndex>) {
113        range.start = self.normalize_index(range.start);
114        range.end = self.normalize_index(range.end);
115        assert!(range.start <= range.end);
116
117        if range.start.line == range.end.line {
118            self.line_for_index_mut(range.start)
119                .replace_range(range.start.code_point..range.end.code_point, "");
120            return;
121        }
122
123        // Remove the start line and any before the last line.
124        let removed_lines = self.lines.splice(range.start.line..range.end.line, []);
125        let first_line = removed_lines
126            .into_iter()
127            .nth(0)
128            .expect("Should have removed at least one line");
129
130        let first_line_prefix = &first_line[0..range.start.code_point];
131        let new_end_line = range.start.line;
132        self.lines[new_end_line].replace_range(0..range.end.code_point, first_line_prefix);
133    }
134
135    /// Create a [`RopeSlice`] for this [`Rope`] from `start` to `end`. If either of
136    /// these is `None`, then the slice will extend to the extent of the rope.
137    pub fn slice<'a>(&'a self, start: Option<RopeIndex>, end: Option<RopeIndex>) -> RopeSlice<'a> {
138        RopeSlice {
139            rope: self,
140            start: start.unwrap_or_default(),
141            end: end.unwrap_or_else(|| self.last_index()),
142        }
143    }
144
145    pub fn chars<'a>(&'a self) -> RopeChars<'a> {
146        self.slice(None, None).chars()
147    }
148
149    /// Return `true` if the [`Rope`] is empty or false otherwise. This will also
150    /// return `true` if the contents of the [`Rope`] are a single empty line.
151    pub fn is_empty(&self) -> bool {
152        self.lines.first().is_none_or(String::is_empty)
153    }
154
155    /// The total number of code units required to encode the content in utf16.
156    pub fn len_utf16(&self) -> Utf16CodeUnitLength {
157        Utf16CodeUnitLength(self.chars().map(char::len_utf16).sum())
158    }
159
160    fn line(&self, index: usize) -> &str {
161        &self.lines[index]
162    }
163
164    fn line_for_index(&self, index: RopeIndex) -> &String {
165        &self.lines[index.line]
166    }
167
168    fn line_for_index_mut(&mut self, index: RopeIndex) -> &mut String {
169        &mut self.lines[index.line]
170    }
171
172    fn last_index_in_line(&self, line: usize) -> RopeIndex {
173        if line >= self.lines.len() - 1 {
174            return self.last_index();
175        }
176        RopeIndex {
177            line,
178            code_point: self.line(line).len() - 1,
179        }
180    }
181
182    /// Return a [`RopeIndex`] which points to the start of the subsequent line.
183    /// If the given [`RopeIndex`] is already on the final line, this will return
184    /// the final index of the entire [`Rope`].
185    fn start_of_following_line(&self, index: RopeIndex) -> RopeIndex {
186        if index.line >= self.lines.len() - 1 {
187            return self.last_index();
188        }
189        RopeIndex::new(index.line + 1, 0)
190    }
191
192    /// Return a [`RopeIndex`] which points to the end of preceding line. If already
193    /// at the end of the first line, this will return the start index of the entire
194    /// [`Rope`].
195    fn end_of_preceding_line(&self, index: RopeIndex) -> RopeIndex {
196        if index.line == 0 {
197            return Default::default();
198        }
199        let line_index = index.line.saturating_sub(1);
200        RopeIndex::new(line_index, self.line(line_index).len())
201    }
202
203    pub fn move_by(&self, origin: RopeIndex, unit: RopeMovement, amount: isize) -> RopeIndex {
204        if amount == 0 {
205            return origin;
206        }
207
208        match unit {
209            RopeMovement::Character | RopeMovement::Grapheme | RopeMovement::Word => {
210                self.move_by_iterator(origin, unit, amount)
211            },
212            RopeMovement::Line => self.move_by_lines(origin, amount),
213            RopeMovement::LineStartOrEnd => {
214                if amount >= 0 {
215                    self.last_index_in_line(origin.line)
216                } else {
217                    RopeIndex::new(origin.line, 0)
218                }
219            },
220            RopeMovement::RopeStartOrEnd => {
221                if amount >= 0 {
222                    self.last_index()
223                } else {
224                    Default::default()
225                }
226            },
227        }
228    }
229
230    fn move_by_lines(&self, origin: RopeIndex, lines_to_move: isize) -> RopeIndex {
231        let new_line_index = (origin.line as isize) + lines_to_move;
232        if new_line_index < 0 {
233            return Default::default();
234        }
235        if new_line_index > (self.lines.len() - 1) as isize {
236            return self.last_index();
237        }
238
239        let new_line_index = new_line_index.unsigned_abs();
240        let char_count = self.line(origin.line)[0..origin.code_point].chars().count();
241        let new_code_point_index = self
242            .line(new_line_index)
243            .char_indices()
244            .take(char_count)
245            .last()
246            .map(|(byte_index, character)| byte_index + character.len_utf8())
247            .unwrap_or_default();
248        RopeIndex::new(new_line_index, new_code_point_index)
249            .min(self.last_index_in_line(new_line_index))
250    }
251
252    fn move_by_iterator(&self, origin: RopeIndex, unit: RopeMovement, amount: isize) -> RopeIndex {
253        assert_ne!(amount, 0);
254        let (boundary_value, slice) = if amount > 0 {
255            (self.last_index(), self.slice(Some(origin), None))
256        } else {
257            (RopeIndex::default(), self.slice(None, Some(origin)))
258        };
259
260        let iterator = match unit {
261            RopeMovement::Character => slice.char_indices(),
262            RopeMovement::Grapheme => slice.grapheme_indices(),
263            RopeMovement::Word => slice.word_indices(),
264            _ => unreachable!("Should not be called for other movement types"),
265        };
266        let iterator = if amount > 0 {
267            Either::Left(iterator)
268        } else {
269            Either::Right(iterator.rev())
270        };
271
272        let mut iterations = amount.unsigned_abs();
273        for mut index in iterator {
274            iterations = iterations.saturating_sub(1);
275            if iterations == 0 {
276                // Instead of returning offsets for the absolute end of a line, return the
277                // start offset for the next line.
278                if index.code_point >= self.line_for_index(index).len() {
279                    index = self.start_of_following_line(index);
280                }
281                return index;
282            }
283        }
284
285        boundary_value
286    }
287
288    /// Given a [`RopeIndex`], clamp it and ensure that it is on a character boundary,
289    /// meaning that its indices are all bound by the actual size of the line and the
290    /// number of lines in this [`Rope`].
291    pub fn normalize_index(&self, rope_index: RopeIndex) -> RopeIndex {
292        let last_line = self.lines.len().saturating_sub(1);
293        let line_index = rope_index.line.min(last_line);
294
295        // This may appear a bit odd as we are adding an index to the end of the line,
296        // but `RopeIndex` isn't just an offset to a UTF-8 code point, but also can
297        // serve as the end of an exclusive range so there is one more index at the end
298        // that is still valid.
299        //
300        // Lines other than the last line have a trailing newline. We do not want to allow
301        // an index past the trailing newline.
302        let line = self.line(line_index);
303        let line_length_utf8 = if line_index == last_line {
304            line.len()
305        } else {
306            line.len() - 1
307        };
308
309        let mut code_point = rope_index.code_point.min(line_length_utf8);
310        while code_point < line.len() && !line.is_char_boundary(code_point) {
311            code_point += 1;
312        }
313
314        RopeIndex::new(line_index, code_point)
315    }
316
317    /// Convert a [`RopeIndex`] into a byte offset from the start of the content.
318    pub fn index_to_utf8_offset(&self, rope_index: RopeIndex) -> Utf8CodeUnitLength {
319        let rope_index = self.normalize_index(rope_index);
320        Utf8CodeUnitLength(
321            self.lines
322                .iter()
323                .take(rope_index.line)
324                .map(String::len)
325                .sum::<usize>() +
326                rope_index.code_point,
327        )
328    }
329
330    pub fn index_to_utf16_offset(&self, rope_index: RopeIndex) -> Utf16CodeUnitLength {
331        let rope_index = self.normalize_index(rope_index);
332        let final_line = self.line(rope_index.line);
333
334        // The offset might be past the end of the line due to being an exclusive offset.
335        let final_line_offset = Utf16CodeUnitLength(
336            final_line[0..rope_index.code_point]
337                .chars()
338                .map(char::len_utf16)
339                .sum(),
340        );
341
342        self.lines
343            .iter()
344            .take(rope_index.line)
345            .map(|line| Utf16CodeUnitLength(line.chars().map(char::len_utf16).sum()))
346            .sum::<Utf16CodeUnitLength>() +
347            final_line_offset
348    }
349
350    /// Convert a [`RopeIndex`] into a character offset from the start of the content.
351    pub fn index_to_character_offset(&self, rope_index: RopeIndex) -> usize {
352        let rope_index = self.normalize_index(rope_index);
353
354        // The offset might be past the end of the line due to being an exclusive offset.
355        let final_line = self.line(rope_index.line);
356        let final_line_offset = final_line[0..rope_index.code_point].chars().count();
357        self.lines
358            .iter()
359            .take(rope_index.line)
360            .map(|line| line.chars().count())
361            .sum::<usize>() +
362            final_line_offset
363    }
364
365    /// Convert a byte offset from the start of the content into a [`RopeIndex`].
366    pub fn utf8_offset_to_rope_index(&self, utf8_offset: Utf8CodeUnitLength) -> RopeIndex {
367        let mut current_utf8_offset = utf8_offset.0;
368        for (line_index, line) in self.lines.iter().enumerate() {
369            if current_utf8_offset == 0 || current_utf8_offset < line.len() {
370                return RopeIndex::new(line_index, current_utf8_offset);
371            }
372            current_utf8_offset -= line.len();
373        }
374        self.last_index()
375    }
376
377    pub fn utf16_offset_to_utf8_offset(
378        &self,
379        utf16_offset: Utf16CodeUnitLength,
380    ) -> Utf8CodeUnitLength {
381        let mut current_utf16_offset = Utf16CodeUnitLength::zero();
382        let mut current_utf8_offset = Utf8CodeUnitLength::zero();
383
384        for character in self.chars() {
385            let utf16_length = character.len_utf16();
386            if current_utf16_offset + Utf16CodeUnitLength(utf16_length) > utf16_offset {
387                return current_utf8_offset;
388            }
389            current_utf8_offset += Utf8CodeUnitLength(character.len_utf8());
390            current_utf16_offset += Utf16CodeUnitLength(utf16_length);
391        }
392        current_utf8_offset
393    }
394
395    /// Find the boundaries of the word most relevant to the given [`RopeIndex`]. Word
396    /// returned in order or precedence:
397    ///
398    /// - If the index intersects the word or is the index directly preceding a word,
399    ///   the boundaries of that word are returned.
400    /// - The word preceding the cursor.
401    /// - If there is no word preceding the cursor, the start of the line to the end
402    ///   of the next word.
403    pub fn relevant_word_boundaries<'a>(&'a self, index: RopeIndex) -> RopeSlice<'a> {
404        let line = self.line_for_index(index);
405        let mut result_start = 0;
406        let mut result_end = None;
407        for (word_start, word) in line.unicode_word_indices() {
408            if word_start > index.code_point {
409                result_end = result_end.or_else(|| Some(word_start + word.len()));
410                break;
411            }
412            result_start = word_start;
413            result_end = Some(word_start + word.len());
414        }
415
416        let result_end = result_end.unwrap_or(result_start);
417        self.slice(
418            Some(RopeIndex::new(index.line, result_start)),
419            Some(RopeIndex::new(index.line, result_end)),
420        )
421    }
422
423    /// Return the boundaries of the line that contains the given [`RopeIndex`].
424    pub fn line_boundaries<'a>(&'a self, index: RopeIndex) -> RopeSlice<'a> {
425        self.slice(
426            Some(RopeIndex::new(index.line, 0)),
427            Some(self.last_index_in_line(index.line)),
428        )
429    }
430
431    fn character_at(&self, index: RopeIndex) -> Option<char> {
432        let line = self.line_for_index(index);
433        line[index.code_point..].chars().next()
434    }
435
436    fn character_before(&self, index: RopeIndex) -> Option<char> {
437        let line = self.line_for_index(index);
438        line[..index.code_point].chars().next_back()
439    }
440}
441
442/// An index into a [`Rope`] data structure. Used to efficiently identify a particular
443/// position in a [`Rope`]. As [`Rope`] always uses Rust strings interally, code point
444/// indices represented in a [`RopeIndex`] are assumed to be UTF-8 code points (one byte
445/// each).
446///
447/// Note that it is possible for a [`RopeIndex`] to point past the end of the last line,
448/// as it can be used in exclusive ranges. In lines other than the last line, it should
449/// always refer to offsets before the trailing newline.
450#[derive(Clone, Copy, Debug, Default, Eq, MallocSizeOf, PartialEq, PartialOrd, Ord)]
451pub struct RopeIndex {
452    /// The index of the line that this [`RopeIndex`] refers to.
453    pub line: usize,
454    /// The index of the code point on the [`RopeIndex`]'s line in UTF-8 code
455    /// points.
456    ///
457    /// Note: This is not a `Utf8CodeUnitLength` in order to avoid continually having
458    /// to unpack the inner value.
459    pub code_point: usize,
460}
461
462impl RopeIndex {
463    pub fn new(line: usize, code_point: usize) -> Self {
464        Self { line, code_point }
465    }
466}
467
468/// A slice of a [`Rope`]. This can be used to to iterate over a subset of characters of a
469/// [`Rope`] or to return the content of the [`RopeSlice`] as a `String`.
470pub struct RopeSlice<'a> {
471    /// The underlying [`Rope`] of this [`RopeSlice`]
472    rope: &'a Rope,
473    /// The inclusive `RopeIndex` of the start of this [`RopeSlice`].
474    pub start: RopeIndex,
475    /// The exclusive end `RopeIndex` of this [`RopeSlice`].
476    pub end: RopeIndex,
477}
478
479impl From<RopeSlice<'_>> for String {
480    fn from(slice: RopeSlice<'_>) -> Self {
481        if slice.start.line == slice.end.line {
482            slice.rope.line_for_index(slice.start)[slice.start.code_point..slice.end.code_point]
483                .into()
484        } else {
485            once(&slice.rope.line_for_index(slice.start)[slice.start.code_point..])
486                .chain(
487                    (slice.start.line + 1..slice.end.line)
488                        .map(|line_index| slice.rope.line(line_index)),
489                )
490                .chain(once(
491                    &slice.rope.line_for_index(slice.end)[..slice.end.code_point],
492                ))
493                .collect()
494        }
495    }
496}
497
498impl<'a> RopeSlice<'a> {
499    pub fn chars(self) -> RopeChars<'a> {
500        RopeChars {
501            movement_iterator: RopeMovementIterator {
502                slice: self,
503                end_of_forward_motion: |_, string| {
504                    let (offset, character) = string.char_indices().next()?;
505                    Some(offset + character.len_utf8())
506                },
507                start_of_backward_motion: |_, string: &str| {
508                    Some(string.char_indices().next_back()?.0)
509                },
510            },
511        }
512    }
513
514    fn char_indices(self) -> RopeMovementIterator<'a> {
515        RopeMovementIterator {
516            slice: self,
517            end_of_forward_motion: |_, string| {
518                let (offset, character) = string.char_indices().next()?;
519                Some(offset + character.len_utf8())
520            },
521            start_of_backward_motion: |_, string: &str| Some(string.char_indices().next_back()?.0),
522        }
523    }
524
525    fn grapheme_indices(self) -> RopeMovementIterator<'a> {
526        RopeMovementIterator {
527            slice: self,
528            end_of_forward_motion: |_, string| {
529                let (offset, grapheme) = string.grapheme_indices(true).next()?;
530                Some(offset + grapheme.len())
531            },
532            start_of_backward_motion: |_, string| {
533                Some(string.grapheme_indices(true).next_back()?.0)
534            },
535        }
536    }
537
538    fn word_indices(self) -> RopeMovementIterator<'a> {
539        RopeMovementIterator {
540            slice: self,
541            end_of_forward_motion: |_, string| {
542                let (offset, word) = string.unicode_word_indices().next()?;
543                Some(offset + word.len())
544            },
545            start_of_backward_motion: |_, string| {
546                Some(string.unicode_word_indices().next_back()?.0)
547            },
548        }
549    }
550}
551
552/// A generic movement iterator for a [`Rope`]. This can move in both directions. Note
553/// than when moving forward and backward, the indices returned for each unit are
554/// different. When moving forward, the end of the unit of movement is returned and when
555/// moving backward the start of the unit of movement is returned. This matches the
556/// expected behavior when interactively moving through editable text.
557struct RopeMovementIterator<'a> {
558    slice: RopeSlice<'a>,
559    end_of_forward_motion: fn(&RopeSlice, &'a str) -> Option<usize>,
560    start_of_backward_motion: fn(&RopeSlice, &'a str) -> Option<usize>,
561}
562
563impl Iterator for RopeMovementIterator<'_> {
564    type Item = RopeIndex;
565
566    fn next(&mut self) -> Option<Self::Item> {
567        // If the two indices have crossed over, iteration is done.
568        if self.slice.start >= self.slice.end {
569            return None;
570        }
571
572        assert!(self.slice.start.line < self.slice.rope.lines.len());
573        let line = self.slice.rope.line_for_index(self.slice.start);
574
575        if self.slice.start.code_point < line.len() + 1 {
576            if let Some(end_offset) =
577                (self.end_of_forward_motion)(&self.slice, &line[self.slice.start.code_point..])
578            {
579                self.slice.start.code_point += end_offset;
580                return Some(self.slice.start);
581            }
582        }
583
584        // Advance the line as we are at the end of the line.
585        self.slice.start = self.slice.rope.start_of_following_line(self.slice.start);
586        self.next()
587    }
588}
589
590impl DoubleEndedIterator for RopeMovementIterator<'_> {
591    fn next_back(&mut self) -> Option<Self::Item> {
592        // If the two indices have crossed over, iteration is done.
593        if self.slice.end <= self.slice.start {
594            return None;
595        }
596
597        let line = self.slice.rope.line_for_index(self.slice.end);
598        if self.slice.end.code_point > 0 {
599            if let Some(new_start_index) =
600                (self.start_of_backward_motion)(&self.slice, &line[..self.slice.end.code_point])
601            {
602                self.slice.end.code_point = new_start_index;
603                return Some(self.slice.end);
604            }
605        }
606
607        // Decrease the line index as we are at the start of the line.
608        self.slice.end = self.slice.rope.end_of_preceding_line(self.slice.end);
609        self.next_back()
610    }
611}
612
613/// A `Chars`-like iterator for [`Rope`].
614pub struct RopeChars<'a> {
615    movement_iterator: RopeMovementIterator<'a>,
616}
617
618impl Iterator for RopeChars<'_> {
619    type Item = char;
620    fn next(&mut self) -> Option<Self::Item> {
621        self.movement_iterator
622            .next()
623            .and_then(|index| self.movement_iterator.slice.rope.character_before(index))
624    }
625}
626
627impl DoubleEndedIterator for RopeChars<'_> {
628    fn next_back(&mut self) -> Option<Self::Item> {
629        self.movement_iterator
630            .next_back()
631            .and_then(|index| self.movement_iterator.slice.rope.character_at(index))
632    }
633}
634
635#[test]
636fn test_rope_index_conversion_to_utf8_offset() {
637    let rope = Rope::new("A\nBB\nCCC\nDDDD");
638    assert_eq!(
639        rope.index_to_utf8_offset(RopeIndex::new(0, 0)),
640        Utf8CodeUnitLength(0),
641    );
642    assert_eq!(
643        rope.index_to_utf8_offset(RopeIndex::new(0, 1)),
644        Utf8CodeUnitLength(1),
645    );
646    assert_eq!(
647        rope.index_to_utf8_offset(RopeIndex::new(0, 10)),
648        Utf8CodeUnitLength(1),
649        "RopeIndex with offset past the end of the line should return final offset in line",
650    );
651    assert_eq!(
652        rope.index_to_utf8_offset(RopeIndex::new(1, 0)),
653        Utf8CodeUnitLength(2),
654    );
655    assert_eq!(
656        rope.index_to_utf8_offset(RopeIndex::new(1, 2)),
657        Utf8CodeUnitLength(4),
658    );
659
660    assert_eq!(
661        rope.index_to_utf8_offset(RopeIndex::new(3, 0)),
662        Utf8CodeUnitLength(9),
663    );
664    assert_eq!(
665        rope.index_to_utf8_offset(RopeIndex::new(3, 3)),
666        Utf8CodeUnitLength(12),
667    );
668    assert_eq!(
669        rope.index_to_utf8_offset(RopeIndex::new(3, 4)),
670        Utf8CodeUnitLength(13),
671        "There should be no newline at the end of the TextInput",
672    );
673    assert_eq!(
674        rope.index_to_utf8_offset(RopeIndex::new(3, 40)),
675        Utf8CodeUnitLength(13),
676        "There should be no newline at the end of the TextInput",
677    );
678}
679
680#[test]
681fn test_rope_index_conversion_to_utf16_offset() {
682    let rope = Rope::new("A\nBB\nCCC\n家家");
683    assert_eq!(
684        rope.index_to_utf16_offset(RopeIndex::new(0, 0)),
685        Utf16CodeUnitLength(0),
686    );
687    assert_eq!(
688        rope.index_to_utf16_offset(RopeIndex::new(0, 1)),
689        Utf16CodeUnitLength(1),
690    );
691    assert_eq!(
692        rope.index_to_utf16_offset(RopeIndex::new(0, 10)),
693        Utf16CodeUnitLength(1),
694        "RopeIndex with offset past the end of the line should return final offset in line",
695    );
696    assert_eq!(
697        rope.index_to_utf16_offset(RopeIndex::new(3, 0)),
698        Utf16CodeUnitLength(9),
699    );
700
701    assert_eq!(
702        rope.index_to_utf16_offset(RopeIndex::new(3, 3)),
703        Utf16CodeUnitLength(10),
704        "3 code unit UTF-8 encodede character"
705    );
706    assert_eq!(
707        rope.index_to_utf16_offset(RopeIndex::new(3, 6)),
708        Utf16CodeUnitLength(11),
709    );
710    assert_eq!(
711        rope.index_to_utf16_offset(RopeIndex::new(3, 20)),
712        Utf16CodeUnitLength(11),
713    );
714}
715
716#[test]
717fn test_utf16_offset_to_utf8_offset() {
718    let rope = Rope::new("A\nBB\nCCC\n家家");
719    assert_eq!(
720        rope.utf16_offset_to_utf8_offset(Utf16CodeUnitLength(0)),
721        Utf8CodeUnitLength(0),
722    );
723    assert_eq!(
724        rope.utf16_offset_to_utf8_offset(Utf16CodeUnitLength(1)),
725        Utf8CodeUnitLength(1),
726    );
727    assert_eq!(
728        rope.utf16_offset_to_utf8_offset(Utf16CodeUnitLength(2)),
729        Utf8CodeUnitLength(2),
730        "Offset past the end of the line",
731    );
732    assert_eq!(
733        rope.utf16_offset_to_utf8_offset(Utf16CodeUnitLength(9)),
734        Utf8CodeUnitLength(9),
735    );
736
737    assert_eq!(
738        rope.utf16_offset_to_utf8_offset(Utf16CodeUnitLength(10)),
739        Utf8CodeUnitLength(12),
740        "3 code unit UTF-8 encodede character"
741    );
742    assert_eq!(
743        rope.utf16_offset_to_utf8_offset(Utf16CodeUnitLength(11)),
744        Utf8CodeUnitLength(15),
745    );
746    assert_eq!(
747        rope.utf16_offset_to_utf8_offset(Utf16CodeUnitLength(300)),
748        Utf8CodeUnitLength(15),
749    );
750}
751
752#[test]
753fn test_rope_delete_slice() {
754    let mut rope = Rope::new("ABC\nDEF\n");
755    rope.delete_range(RopeIndex::new(0, 1)..RopeIndex::new(0, 2));
756    assert_eq!(rope.contents(), "AC\nDEF\n");
757
758    // Trying to delete beyond the last index of the line should note remove any trailing
759    // newlines from the rope.
760    let mut rope = Rope::new("ABC\nDEF\n");
761    rope.delete_range(RopeIndex::new(0, 3)..RopeIndex::new(0, 4));
762    assert_eq!(rope.lines, ["ABC\n", "DEF\n", ""]);
763
764    let mut rope = Rope::new("ABC\nDEF\n");
765    rope.delete_range(RopeIndex::new(0, 0)..RopeIndex::new(0, 4));
766    assert_eq!(rope.lines, ["\n", "DEF\n", ""]);
767
768    let mut rope = Rope::new("A\nBB\nCCC");
769    rope.delete_range(RopeIndex::new(0, 2)..RopeIndex::new(1, 0));
770    assert_eq!(rope.lines, ["ABB\n", "CCC"]);
771}
772
773#[test]
774fn test_rope_replace_slice() {
775    let mut rope = Rope::new("AAA\nBBB\nCCC");
776    rope.replace_range(RopeIndex::new(0, 1)..RopeIndex::new(0, 2), "x");
777    assert_eq!(rope.contents(), "AxA\nBBB\nCCC",);
778
779    let mut rope = Rope::new("A\nBB\nCCC");
780    rope.replace_range(RopeIndex::new(0, 2)..RopeIndex::new(1, 0), "D");
781    assert_eq!(rope.lines, ["ADBB\n", "CCC"]);
782
783    let mut rope = Rope::new("AAA\nBBB\nCCC\nDDD");
784    rope.replace_range(RopeIndex::new(0, 2)..RopeIndex::new(2, 1), "x");
785    assert_eq!(rope.lines, ["AAxCC\n", "DDD"]);
786}
787
788#[test]
789fn test_rope_relevant_word() {
790    let rope = Rope::new("AAA    BBB   CCC");
791    let boundaries = rope.relevant_word_boundaries(RopeIndex::new(0, 0));
792    assert_eq!(boundaries.start, RopeIndex::new(0, 0));
793    assert_eq!(boundaries.end, RopeIndex::new(0, 3));
794
795    // Choose previous word if starting on whitespace.
796    let boundaries = rope.relevant_word_boundaries(RopeIndex::new(0, 4));
797    assert_eq!(boundaries.start, RopeIndex::new(0, 0));
798    assert_eq!(boundaries.end, RopeIndex::new(0, 3));
799
800    // Choose next word if starting at word start.
801    let boundaries = rope.relevant_word_boundaries(RopeIndex::new(0, 7));
802    assert_eq!(boundaries.start, RopeIndex::new(0, 7));
803    assert_eq!(boundaries.end, RopeIndex::new(0, 10));
804
805    // Choose word if starting at in middle.
806    let boundaries = rope.relevant_word_boundaries(RopeIndex::new(0, 8));
807    assert_eq!(boundaries.start, RopeIndex::new(0, 7));
808    assert_eq!(boundaries.end, RopeIndex::new(0, 10));
809
810    // Choose start of line to end of first word if in whitespace at start of line.
811    let rope = Rope::new("         AAA    BBB   CCC");
812    let boundaries = rope.relevant_word_boundaries(RopeIndex::new(0, 3));
813    assert_eq!(boundaries.start, RopeIndex::new(0, 0));
814    assert_eq!(boundaries.end, RopeIndex::new(0, 12));
815
816    // Works properly if line is empty.
817    let rope = Rope::new("");
818    let boundaries = rope.relevant_word_boundaries(RopeIndex::new(0, 0));
819    assert_eq!(boundaries.start, RopeIndex::new(0, 0));
820    assert_eq!(boundaries.end, RopeIndex::new(0, 0));
821}
822
823#[test]
824fn test_rope_index_intersects_character() {
825    let rope = Rope::new("񉡚");
826    let rope_index = RopeIndex::new(0, 1);
827    assert_eq!(rope.normalize_index(rope_index), RopeIndex::new(0, 4));
828    assert_eq!(
829        rope.index_to_utf16_offset(rope_index),
830        Utf16CodeUnitLength(2)
831    );
832    assert_eq!(rope.index_to_utf8_offset(rope_index), Utf8CodeUnitLength(4));
833
834    let rope = Rope::new("abc\ndef");
835    assert_eq!(
836        rope.normalize_index(RopeIndex::new(0, 100)),
837        RopeIndex::new(0, 3),
838        "Normalizing index past end of line should just clamp to line length."
839    );
840    assert_eq!(
841        rope.normalize_index(RopeIndex::new(1, 100)),
842        RopeIndex::new(1, 3),
843        "Normalizing index past end of line should just clamp to line length."
844    );
845}