1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */

use std::ops::Range;

use icu_segmenter::LineSegmenter;

pub(crate) struct LineBreaker {
    linebreaks: Vec<usize>,
    current_offset: usize,
}

impl LineBreaker {
    pub(crate) fn new(string: &str) -> Self {
        let line_segmenter = LineSegmenter::new_auto();
        Self {
            // From https://docs.rs/icu_segmenter/1.5.0/icu_segmenter/struct.LineSegmenter.html
            // > For consistency with the grapheme, word, and sentence segmenters, there is always a
            // > breakpoint returned at index 0, but this breakpoint is not a meaningful line break
            // > opportunity.
            //
            // Skip this first line break opportunity, as it isn't interesting to us.
            linebreaks: line_segmenter.segment_str(string).skip(1).collect(),
            current_offset: 0,
        }
    }

    pub(crate) fn advance_to_linebreaks_in_range(&mut self, text_range: Range<usize>) -> &[usize] {
        let linebreaks_in_range = self.linebreaks_in_range_after_current_offset(text_range);
        self.current_offset = linebreaks_in_range.end;
        &self.linebreaks[linebreaks_in_range]
    }

    fn linebreaks_in_range_after_current_offset(&self, text_range: Range<usize>) -> Range<usize> {
        assert!(text_range.start <= text_range.end);

        let mut linebreaks_range = self.current_offset..self.linebreaks.len();

        while self.linebreaks[linebreaks_range.start] < text_range.start &&
            linebreaks_range.len() > 1
        {
            linebreaks_range.start += 1;
        }

        let mut ending_linebreak_index = linebreaks_range.start;
        while self.linebreaks[ending_linebreak_index] < text_range.end &&
            ending_linebreak_index < self.linebreaks.len() - 1
        {
            ending_linebreak_index += 1;
        }
        linebreaks_range.end = ending_linebreak_index;
        linebreaks_range
    }
}

#[test]
fn test_linebreaker_ranges() {
    let linebreaker = LineBreaker::new("abc def");
    assert_eq!(linebreaker.linebreaks, [4, 7]);
    assert_eq!(
        linebreaker.linebreaks_in_range_after_current_offset(0..5),
        0..1
    );
    // The last linebreak should not be included for the text range we are interested in.
    assert_eq!(
        linebreaker.linebreaks_in_range_after_current_offset(0..7),
        0..1
    );

    let linebreaker = LineBreaker::new("abc d def");
    assert_eq!(linebreaker.linebreaks, [4, 6, 9]);
    assert_eq!(
        linebreaker.linebreaks_in_range_after_current_offset(0..5),
        0..1
    );
    assert_eq!(
        linebreaker.linebreaks_in_range_after_current_offset(0..7),
        0..2
    );
    assert_eq!(
        linebreaker.linebreaks_in_range_after_current_offset(0..9),
        0..2
    );

    assert_eq!(
        linebreaker.linebreaks_in_range_after_current_offset(4..9),
        0..2
    );

    std::panic::catch_unwind(|| {
        let linebreaker = LineBreaker::new("abc def");
        linebreaker.linebreaks_in_range_after_current_offset(5..2);
    })
    .expect_err("Reversed range should cause an assertion failure.");
}

#[test]
fn test_linebreaker_stateful_advance() {
    let mut linebreaker = LineBreaker::new("abc d def");
    assert_eq!(linebreaker.linebreaks, [4, 6, 9]);
    assert!(linebreaker.advance_to_linebreaks_in_range(0..7) == &[4, 6]);
    assert!(linebreaker.advance_to_linebreaks_in_range(8..9).is_empty());

    // We've already advanced, so a range from the beginning shouldn't affect things.
    assert!(linebreaker.advance_to_linebreaks_in_range(0..9).is_empty());

    linebreaker.current_offset = 0;

    // Sending a value out of range shoudn't break things.
    assert!(linebreaker.advance_to_linebreaks_in_range(0..999) == &[4, 6]);

    linebreaker.current_offset = 0;

    std::panic::catch_unwind(|| {
        let mut linebreaker = LineBreaker::new("abc d def");
        linebreaker.advance_to_linebreaks_in_range(2..0);
    })
    .expect_err("Reversed range should cause an assertion failure.");
}