1use std::iter::once;
6use std::ops::Range;
7
8use malloc_size_of_derive::MallocSizeOf;
9use rayon::iter::Either;
10use unicode_segmentation::UnicodeSegmentation;
11
12use crate::text::{Utf8CodeUnitLength, Utf16CodeUnitLength};
13
14fn contents_vec(contents: impl Into<String>) -> Vec<String> {
15 let mut contents: Vec<_> = contents
16 .into()
17 .split('\n')
18 .map(|line| format!("{line}\n"))
19 .collect();
20 if let Some(last_line) = contents.last_mut() {
22 last_line.truncate(last_line.len() - 1);
23 }
24 contents
25}
26
27pub enum RopeMovement {
29 Grapheme,
30 Word,
31 Line,
32 LineStartOrEnd,
33 RopeStartOrEnd,
34}
35
36#[derive(MallocSizeOf)]
41pub struct Rope {
42 lines: Vec<String>,
45}
46
47impl Rope {
48 pub fn new(contents: impl Into<String>) -> Self {
49 Self {
50 lines: contents_vec(contents),
51 }
52 }
53
54 pub fn contents(&self) -> String {
55 self.lines.join("")
56 }
57
58 pub fn last_index(&self) -> RopeIndex {
59 let line_index = self.lines.len() - 1;
60 RopeIndex::new(line_index, self.line(line_index).len())
61 }
62
63 pub fn replace_range(
66 &mut self,
67 mut range: Range<RopeIndex>,
68 string: impl Into<String>,
69 ) -> RopeIndex {
70 range.start = self.clamp_index(range.start);
71 range.end = self.clamp_index(range.end);
72 assert!(range.start <= range.end);
73
74 let start_index = range.start;
75 self.delete_range(range);
76
77 let mut new_contents = contents_vec(string);
78 let Some(first_line_of_new_contents) = new_contents.first() else {
79 return start_index;
80 };
81
82 if new_contents.len() == 1 {
83 self.line_for_index_mut(start_index)
84 .insert_str(start_index.code_point, first_line_of_new_contents);
85 return RopeIndex::new(
86 start_index.line,
87 start_index.code_point + first_line_of_new_contents.len(),
88 );
89 }
90
91 let start_line = self.line_for_index_mut(start_index);
92 let last_line = new_contents.last().expect("Should have at least one line");
93 let last_index = RopeIndex::new(
94 start_index.line + new_contents.len().saturating_sub(1),
95 last_line.len(),
96 );
97
98 let remaining_string = start_line.split_off(start_index.code_point);
99 start_line.push_str(first_line_of_new_contents);
100 new_contents
101 .last_mut()
102 .expect("Should have at least one line")
103 .push_str(&remaining_string);
104
105 let splice_index = start_index.line + 1;
106 self.lines
107 .splice(splice_index..splice_index, new_contents.into_iter().skip(1));
108 last_index
109 }
110
111 fn delete_range(&mut self, mut range: Range<RopeIndex>) {
112 range.start = self.clamp_index(range.start);
113 range.end = self.clamp_index(range.end);
114 assert!(range.start <= range.end);
115
116 if range.start.line == range.end.line {
117 self.line_for_index_mut(range.start)
118 .replace_range(range.start.code_point..range.end.code_point, "");
119 return;
120 }
121
122 let removed_lines = self.lines.splice(range.start.line..range.end.line, []);
124 let first_line = removed_lines
125 .into_iter()
126 .nth(0)
127 .expect("Should have removed at least one line");
128
129 let first_line_prefix = &first_line[0..range.start.code_point];
130 let new_end_line = range.start.line;
131 self.lines[new_end_line].replace_range(0..range.end.code_point, first_line_prefix);
132 }
133
134 pub fn slice<'a>(&'a self, start: Option<RopeIndex>, end: Option<RopeIndex>) -> RopeSlice<'a> {
137 RopeSlice {
138 rope: self,
139 start: start.unwrap_or_default(),
140 end: end.unwrap_or_else(|| self.last_index()),
141 }
142 }
143
144 pub fn chars<'a>(&'a self) -> RopeChars<'a> {
145 self.slice(None, None).chars()
146 }
147
148 pub fn is_empty(&self) -> bool {
151 self.lines.first().is_none_or(String::is_empty)
152 }
153
154 pub fn len_utf16(&self) -> Utf16CodeUnitLength {
156 Utf16CodeUnitLength(self.chars().map(char::len_utf16).sum())
157 }
158
159 fn line(&self, index: usize) -> &str {
160 &self.lines[index]
161 }
162
163 fn line_for_index(&self, index: RopeIndex) -> &String {
164 &self.lines[index.line]
165 }
166
167 fn line_for_index_mut(&mut self, index: RopeIndex) -> &mut String {
168 &mut self.lines[index.line]
169 }
170
171 fn last_index_in_line(&self, line: usize) -> RopeIndex {
172 if line >= self.lines.len() - 1 {
173 return self.last_index();
174 }
175 RopeIndex {
176 line,
177 code_point: self.line(line).len() - 1,
178 }
179 }
180
181 fn start_of_following_line(&self, index: RopeIndex) -> RopeIndex {
185 if index.line >= self.lines.len() - 1 {
186 return self.last_index();
187 }
188 RopeIndex::new(index.line + 1, 0)
189 }
190
191 fn end_of_preceding_line(&self, index: RopeIndex) -> RopeIndex {
195 if index.line == 0 {
196 return Default::default();
197 }
198 let line_index = index.line.saturating_sub(1);
199 RopeIndex::new(line_index, self.line(line_index).len())
200 }
201
202 pub fn move_by(&self, origin: RopeIndex, unit: RopeMovement, amount: isize) -> RopeIndex {
203 if amount == 0 {
204 return origin;
205 }
206
207 match unit {
208 RopeMovement::Grapheme | RopeMovement::Word => {
209 self.move_by_iterator(origin, unit, amount)
210 },
211 RopeMovement::Line => self.move_by_lines(origin, amount),
212 RopeMovement::LineStartOrEnd => {
213 if amount >= 0 {
214 self.last_index_in_line(origin.line)
215 } else {
216 RopeIndex::new(origin.line, 0)
217 }
218 },
219 RopeMovement::RopeStartOrEnd => {
220 if amount >= 0 {
221 self.last_index()
222 } else {
223 Default::default()
224 }
225 },
226 }
227 }
228
229 fn move_by_lines(&self, origin: RopeIndex, lines_to_move: isize) -> RopeIndex {
230 let new_line_index = (origin.line as isize) + lines_to_move;
231 if new_line_index < 0 {
232 return Default::default();
233 }
234 if new_line_index > (self.lines.len() - 1) as isize {
235 return self.last_index();
236 }
237
238 let new_line_index = new_line_index.unsigned_abs();
239 let char_count = self.line(origin.line)[0..origin.code_point].chars().count();
240 let new_code_point_index = self
241 .line(new_line_index)
242 .char_indices()
243 .take(char_count)
244 .last()
245 .map(|(byte_index, character)| byte_index + character.len_utf8())
246 .unwrap_or_default();
247 RopeIndex::new(new_line_index, new_code_point_index)
248 .min(self.last_index_in_line(new_line_index))
249 }
250
251 fn move_by_iterator(&self, origin: RopeIndex, unit: RopeMovement, amount: isize) -> RopeIndex {
252 assert_ne!(amount, 0);
253 let (boundary_value, slice) = if amount > 0 {
254 (self.last_index(), self.slice(Some(origin), None))
255 } else {
256 (RopeIndex::default(), self.slice(None, Some(origin)))
257 };
258
259 let iterator = match unit {
260 RopeMovement::Grapheme => slice.grapheme_indices(),
261 RopeMovement::Word => slice.word_indices(),
262 _ => unreachable!("Should not be called for other movement types"),
263 };
264 let iterator = if amount > 0 {
265 Either::Left(iterator)
266 } else {
267 Either::Right(iterator.rev())
268 };
269
270 let mut iterations = amount.unsigned_abs();
271 for (mut index, _) in iterator {
272 iterations = iterations.saturating_sub(1);
273 if iterations == 0 {
274 if index.code_point >= self.line_for_index(index).len() {
277 index = self.start_of_following_line(index);
278 }
279 return index;
280 }
281 }
282
283 boundary_value
284 }
285
286 pub fn clamp_index(&self, rope_index: RopeIndex) -> RopeIndex {
289 let last_line = self.lines.len().saturating_sub(1);
290 let line_index = rope_index.line.min(last_line);
291
292 let line_length_utf8 = if line_index == last_line {
300 self.lines[line_index].len()
301 } else {
302 self.lines[line_index].len() - 1
303 };
304
305 RopeIndex::new(line_index, rope_index.code_point.min(line_length_utf8))
306 }
307
308 pub fn index_to_utf8_offset(&self, rope_index: RopeIndex) -> Utf8CodeUnitLength {
310 let rope_index = self.clamp_index(rope_index);
311 Utf8CodeUnitLength(
312 self.lines
313 .iter()
314 .take(rope_index.line)
315 .map(String::len)
316 .sum::<usize>() +
317 rope_index.code_point,
318 )
319 }
320
321 pub fn index_to_utf16_offset(&self, rope_index: RopeIndex) -> Utf16CodeUnitLength {
322 let rope_index = self.clamp_index(rope_index);
323 let final_line = self.line(rope_index.line);
324
325 let final_line_offset = Utf16CodeUnitLength(
327 final_line[0..rope_index.code_point]
328 .chars()
329 .map(char::len_utf16)
330 .sum(),
331 );
332
333 self.lines
334 .iter()
335 .take(rope_index.line)
336 .map(|line| Utf16CodeUnitLength(line.chars().map(char::len_utf16).sum()))
337 .sum::<Utf16CodeUnitLength>() +
338 final_line_offset
339 }
340
341 pub fn utf8_offset_to_rope_index(&self, utf8_offset: Utf8CodeUnitLength) -> RopeIndex {
343 let mut current_utf8_offset = utf8_offset.0;
344 for (line_index, line) in self.lines.iter().enumerate() {
345 if current_utf8_offset == 0 || current_utf8_offset < line.len() {
346 return RopeIndex::new(line_index, current_utf8_offset);
347 }
348 current_utf8_offset -= line.len();
349 }
350 self.last_index()
351 }
352
353 pub fn utf16_offset_to_utf8_offset(
354 &self,
355 utf16_offset: Utf16CodeUnitLength,
356 ) -> Utf8CodeUnitLength {
357 let mut current_utf16_offset = Utf16CodeUnitLength::zero();
358 let mut current_utf8_offset = Utf8CodeUnitLength::zero();
359
360 for character in self.chars() {
361 let utf16_length = character.len_utf16();
362 if current_utf16_offset + Utf16CodeUnitLength(utf16_length) > utf16_offset {
363 return current_utf8_offset;
364 }
365 current_utf8_offset += Utf8CodeUnitLength(character.len_utf8());
366 current_utf16_offset += Utf16CodeUnitLength(utf16_length);
367 }
368 current_utf8_offset
369 }
370
371 pub fn relevant_word_boundaries<'a>(&'a self, index: RopeIndex) -> RopeSlice<'a> {
380 let line = self.line_for_index(index);
381 let mut result_start = 0;
382 let mut result_end = None;
383 for (word_start, word) in line.unicode_word_indices() {
384 if word_start > index.code_point {
385 result_end = result_end.or_else(|| Some(word_start + word.len()));
386 break;
387 }
388 result_start = word_start;
389 result_end = Some(word_start + word.len());
390 }
391
392 let result_end = result_end.unwrap_or(result_start);
393 self.slice(
394 Some(RopeIndex::new(index.line, result_start)),
395 Some(RopeIndex::new(index.line, result_end)),
396 )
397 }
398
399 pub fn line_boundaries<'a>(&'a self, index: RopeIndex) -> RopeSlice<'a> {
401 self.slice(
402 Some(RopeIndex::new(index.line, 0)),
403 Some(self.last_index_in_line(index.line)),
404 )
405 }
406}
407
408#[derive(Clone, Copy, Debug, Default, Eq, MallocSizeOf, PartialEq, PartialOrd, Ord)]
417pub struct RopeIndex {
418 pub line: usize,
420 pub code_point: usize,
426}
427
428impl RopeIndex {
429 pub fn new(line: usize, code_point: usize) -> Self {
430 Self { line, code_point }
431 }
432}
433
434pub struct RopeSlice<'a> {
437 rope: &'a Rope,
439 pub start: RopeIndex,
441 pub end: RopeIndex,
443}
444
445impl From<RopeSlice<'_>> for String {
446 fn from(slice: RopeSlice<'_>) -> Self {
447 if slice.start.line == slice.end.line {
448 slice.rope.line_for_index(slice.start)[slice.start.code_point..slice.end.code_point]
449 .into()
450 } else {
451 once(&slice.rope.line_for_index(slice.start)[slice.start.code_point..])
452 .chain(
453 (slice.start.line + 1..slice.end.line)
454 .map(|line_index| slice.rope.line(line_index)),
455 )
456 .chain(once(
457 &slice.rope.line_for_index(slice.end)[..slice.end.code_point],
458 ))
459 .collect()
460 }
461 }
462}
463
464impl<'a> RopeSlice<'a> {
465 pub fn chars(self) -> RopeChars<'a> {
466 RopeChars {
467 movement_iterator: RopeMovementIterator {
468 slice: self,
469 end_of_forward_motion: |_, string| {
470 let (offset, character) = string.char_indices().next()?;
471 Some((offset + character.len_utf8(), character))
472 },
473 start_of_backward_motion: |_, string: &str| string.char_indices().next_back(),
474 },
475 }
476 }
477
478 fn grapheme_indices(self) -> RopeMovementIterator<'a, &'a str> {
479 RopeMovementIterator {
480 slice: self,
481 end_of_forward_motion: |_, string| {
482 let (offset, grapheme) = string.grapheme_indices(true).next()?;
483 Some((offset + grapheme.len(), grapheme))
484 },
485 start_of_backward_motion: |_, string| string.grapheme_indices(true).next_back(),
486 }
487 }
488
489 fn word_indices(self) -> RopeMovementIterator<'a, &'a str> {
490 RopeMovementIterator {
491 slice: self,
492 end_of_forward_motion: |_, string| {
493 let (offset, word) = string.unicode_word_indices().next()?;
494 Some((offset + word.len(), word))
495 },
496 start_of_backward_motion: |_, string| string.unicode_word_indices().next_back(),
497 }
498 }
499}
500
501struct RopeMovementIterator<'a, T> {
507 slice: RopeSlice<'a>,
508 end_of_forward_motion: fn(&RopeSlice, &'a str) -> Option<(usize, T)>,
509 start_of_backward_motion: fn(&RopeSlice, &'a str) -> Option<(usize, T)>,
510}
511
512impl<T> Iterator for RopeMovementIterator<'_, T> {
513 type Item = (RopeIndex, T);
514
515 fn next(&mut self) -> Option<Self::Item> {
516 if self.slice.start >= self.slice.end {
518 return None;
519 }
520
521 assert!(self.slice.start.line < self.slice.rope.lines.len());
522 let line = self.slice.rope.line_for_index(self.slice.start);
523
524 if self.slice.start.code_point < line.len() + 1 {
525 if let Some((end_offset, value)) =
526 (self.end_of_forward_motion)(&self.slice, &line[self.slice.start.code_point..])
527 {
528 self.slice.start.code_point += end_offset;
529 return Some((self.slice.start, value));
530 }
531 }
532
533 self.slice.start = self.slice.rope.start_of_following_line(self.slice.start);
535 self.next()
536 }
537}
538
539impl<T> DoubleEndedIterator for RopeMovementIterator<'_, T> {
540 fn next_back(&mut self) -> Option<Self::Item> {
541 if self.slice.end <= self.slice.start {
543 return None;
544 }
545
546 let line = self.slice.rope.line_for_index(self.slice.end);
547 if self.slice.end.code_point > 0 {
548 if let Some((new_start_index, value)) =
549 (self.start_of_backward_motion)(&self.slice, &line[..self.slice.end.code_point])
550 {
551 self.slice.end.code_point = new_start_index;
552 return Some((self.slice.end, value));
553 }
554 }
555
556 self.slice.end = self.slice.rope.end_of_preceding_line(self.slice.end);
558 self.next_back()
559 }
560}
561
562pub struct RopeChars<'a> {
564 movement_iterator: RopeMovementIterator<'a, char>,
565}
566
567impl Iterator for RopeChars<'_> {
568 type Item = char;
569 fn next(&mut self) -> Option<Self::Item> {
570 Some(self.movement_iterator.next()?.1)
571 }
572}
573
574impl DoubleEndedIterator for RopeChars<'_> {
575 fn next_back(&mut self) -> Option<Self::Item> {
576 Some(self.movement_iterator.next_back()?.1)
577 }
578}
579
580#[test]
581fn test_rope_index_conversion_to_utf8_offset() {
582 let rope = Rope::new("A\nBB\nCCC\nDDDD");
583 assert_eq!(
584 rope.index_to_utf8_offset(RopeIndex::new(0, 0)),
585 Utf8CodeUnitLength(0),
586 );
587 assert_eq!(
588 rope.index_to_utf8_offset(RopeIndex::new(0, 1)),
589 Utf8CodeUnitLength(1),
590 );
591 assert_eq!(
592 rope.index_to_utf8_offset(RopeIndex::new(0, 10)),
593 Utf8CodeUnitLength(1),
594 "RopeIndex with offset past the end of the line should return final offset in line",
595 );
596 assert_eq!(
597 rope.index_to_utf8_offset(RopeIndex::new(1, 0)),
598 Utf8CodeUnitLength(2),
599 );
600 assert_eq!(
601 rope.index_to_utf8_offset(RopeIndex::new(1, 2)),
602 Utf8CodeUnitLength(4),
603 );
604
605 assert_eq!(
606 rope.index_to_utf8_offset(RopeIndex::new(3, 0)),
607 Utf8CodeUnitLength(9),
608 );
609 assert_eq!(
610 rope.index_to_utf8_offset(RopeIndex::new(3, 3)),
611 Utf8CodeUnitLength(12),
612 );
613 assert_eq!(
614 rope.index_to_utf8_offset(RopeIndex::new(3, 4)),
615 Utf8CodeUnitLength(13),
616 "There should be no newline at the end of the TextInput",
617 );
618 assert_eq!(
619 rope.index_to_utf8_offset(RopeIndex::new(3, 40)),
620 Utf8CodeUnitLength(13),
621 "There should be no newline at the end of the TextInput",
622 );
623}
624
625#[test]
626fn test_rope_index_conversion_to_utf16_offset() {
627 let rope = Rope::new("A\nBB\nCCC\n家家");
628 assert_eq!(
629 rope.index_to_utf16_offset(RopeIndex::new(0, 0)),
630 Utf16CodeUnitLength(0),
631 );
632 assert_eq!(
633 rope.index_to_utf16_offset(RopeIndex::new(0, 1)),
634 Utf16CodeUnitLength(1),
635 );
636 assert_eq!(
637 rope.index_to_utf16_offset(RopeIndex::new(0, 10)),
638 Utf16CodeUnitLength(1),
639 "RopeIndex with offset past the end of the line should return final offset in line",
640 );
641 assert_eq!(
642 rope.index_to_utf16_offset(RopeIndex::new(3, 0)),
643 Utf16CodeUnitLength(9),
644 );
645
646 assert_eq!(
647 rope.index_to_utf16_offset(RopeIndex::new(3, 3)),
648 Utf16CodeUnitLength(10),
649 "3 code unit UTF-8 encodede character"
650 );
651 assert_eq!(
652 rope.index_to_utf16_offset(RopeIndex::new(3, 6)),
653 Utf16CodeUnitLength(11),
654 );
655 assert_eq!(
656 rope.index_to_utf16_offset(RopeIndex::new(3, 20)),
657 Utf16CodeUnitLength(11),
658 );
659}
660
661#[test]
662fn test_utf16_offset_to_utf8_offset() {
663 let rope = Rope::new("A\nBB\nCCC\n家家");
664 assert_eq!(
665 rope.utf16_offset_to_utf8_offset(Utf16CodeUnitLength(0)),
666 Utf8CodeUnitLength(0),
667 );
668 assert_eq!(
669 rope.utf16_offset_to_utf8_offset(Utf16CodeUnitLength(1)),
670 Utf8CodeUnitLength(1),
671 );
672 assert_eq!(
673 rope.utf16_offset_to_utf8_offset(Utf16CodeUnitLength(2)),
674 Utf8CodeUnitLength(2),
675 "Offset past the end of the line",
676 );
677 assert_eq!(
678 rope.utf16_offset_to_utf8_offset(Utf16CodeUnitLength(9)),
679 Utf8CodeUnitLength(9),
680 );
681
682 assert_eq!(
683 rope.utf16_offset_to_utf8_offset(Utf16CodeUnitLength(10)),
684 Utf8CodeUnitLength(12),
685 "3 code unit UTF-8 encodede character"
686 );
687 assert_eq!(
688 rope.utf16_offset_to_utf8_offset(Utf16CodeUnitLength(11)),
689 Utf8CodeUnitLength(15),
690 );
691 assert_eq!(
692 rope.utf16_offset_to_utf8_offset(Utf16CodeUnitLength(300)),
693 Utf8CodeUnitLength(15),
694 );
695}
696
697#[test]
698fn test_rope_delete_slice() {
699 let mut rope = Rope::new("ABC\nDEF\n");
700 rope.delete_range(RopeIndex::new(0, 1)..RopeIndex::new(0, 2));
701 assert_eq!(rope.contents(), "AC\nDEF\n");
702
703 let mut rope = Rope::new("ABC\nDEF\n");
706 rope.delete_range(RopeIndex::new(0, 3)..RopeIndex::new(0, 4));
707 assert_eq!(rope.lines, ["ABC\n", "DEF\n", ""]);
708
709 let mut rope = Rope::new("ABC\nDEF\n");
710 rope.delete_range(RopeIndex::new(0, 0)..RopeIndex::new(0, 4));
711 assert_eq!(rope.lines, ["\n", "DEF\n", ""]);
712
713 let mut rope = Rope::new("A\nBB\nCCC");
714 rope.delete_range(RopeIndex::new(0, 2)..RopeIndex::new(1, 0));
715 assert_eq!(rope.lines, ["ABB\n", "CCC"]);
716}
717
718#[test]
719fn test_rope_replace_slice() {
720 let mut rope = Rope::new("AAA\nBBB\nCCC");
721 rope.replace_range(RopeIndex::new(0, 1)..RopeIndex::new(0, 2), "x");
722 assert_eq!(rope.contents(), "AxA\nBBB\nCCC",);
723
724 let mut rope = Rope::new("A\nBB\nCCC");
725 rope.replace_range(RopeIndex::new(0, 2)..RopeIndex::new(1, 0), "D");
726 assert_eq!(rope.lines, ["ADBB\n", "CCC"]);
727
728 let mut rope = Rope::new("AAA\nBBB\nCCC\nDDD");
729 rope.replace_range(RopeIndex::new(0, 2)..RopeIndex::new(2, 1), "x");
730 assert_eq!(rope.lines, ["AAxCC\n", "DDD"]);
731}
732
733#[test]
734fn test_rope_relevant_word() {
735 let rope = Rope::new("AAA BBB CCC");
736 let boundaries = rope.relevant_word_boundaries(RopeIndex::new(0, 0));
737 assert_eq!(boundaries.start, RopeIndex::new(0, 0));
738 assert_eq!(boundaries.end, RopeIndex::new(0, 3));
739
740 let boundaries = rope.relevant_word_boundaries(RopeIndex::new(0, 4));
742 assert_eq!(boundaries.start, RopeIndex::new(0, 0));
743 assert_eq!(boundaries.end, RopeIndex::new(0, 3));
744
745 let boundaries = rope.relevant_word_boundaries(RopeIndex::new(0, 7));
747 assert_eq!(boundaries.start, RopeIndex::new(0, 7));
748 assert_eq!(boundaries.end, RopeIndex::new(0, 10));
749
750 let boundaries = rope.relevant_word_boundaries(RopeIndex::new(0, 8));
752 assert_eq!(boundaries.start, RopeIndex::new(0, 7));
753 assert_eq!(boundaries.end, RopeIndex::new(0, 10));
754
755 let rope = Rope::new(" AAA BBB CCC");
757 let boundaries = rope.relevant_word_boundaries(RopeIndex::new(0, 3));
758 assert_eq!(boundaries.start, RopeIndex::new(0, 0));
759 assert_eq!(boundaries.end, RopeIndex::new(0, 12));
760
761 let rope = Rope::new("");
763 let boundaries = rope.relevant_word_boundaries(RopeIndex::new(0, 0));
764 assert_eq!(boundaries.start, RopeIndex::new(0, 0));
765 assert_eq!(boundaries.end, RopeIndex::new(0, 0));
766}