bpaf/buffer/
splitter.rs

1pub(super) struct Splitter<'a> {
2    input: &'a str,
3
4    #[cfg(feature = "docgen")]
5    code: Code,
6}
7
8#[cfg(feature = "docgen")]
9enum Code {
10    No,
11    First,
12    Rest,
13}
14
15/// Split payload into chunks annotated with character width and containing no newlines according
16/// to text formatting rules
17pub(super) fn split(input: &str) -> Splitter {
18    Splitter {
19        input,
20        #[cfg(feature = "docgen")]
21        code: Code::No,
22    }
23}
24
25#[cfg_attr(test, derive(Debug, Clone, Copy, Eq, PartialEq))]
26pub(super) enum Chunk<'a> {
27    Raw(&'a str, usize),
28    Paragraph,
29    LineBreak,
30}
31
32impl Chunk<'_> {
33    pub(crate) const CODE: usize = 1_000_000;
34    pub(crate) const TICKED_CODE: usize = 1_000_001;
35}
36
37impl<'a> Iterator for Splitter<'a> {
38    type Item = Chunk<'a>;
39
40    // 1. paragraphs are separated by a blank line.
41    // 2. code blocks are aligned by 4 spaces and are kept intact
42    // 3. linebreaks followed by space are preserved
43    // 4. leftovers are fed word by word
44    // 5. everything between "^```" is passed as is
45
46    // 1. "\n\n" = Paragraph
47    // 2. "\n " = LineBreak
48    // 3. "\n" = " "
49    // 4. "\n    " = code block
50    // 5. take next word
51    //
52
53    fn next(&mut self) -> Option<Self::Item> {
54        if self.input.is_empty() {
55            return None;
56        }
57
58        #[cfg(feature = "docgen")]
59        if matches!(self.code, Code::First | Code::Rest) {
60            if matches!(self.code, Code::Rest) && self.input.starts_with("```") {
61                self.code = Code::No;
62            }
63            if matches!(self.code, Code::First) {
64                self.code = Code::Rest;
65            }
66
67            let tail = self.input;
68            let code = if let Some((code, rest)) = self.input.split_once('\n') {
69                let tail = &tail[code.len()..];
70                if tail.starts_with("\n\n") && matches!(self.code, Code::No) {
71                    self.input = tail;
72                } else {
73                    self.input = rest;
74                }
75                code
76            } else {
77                self.input = "";
78                tail
79            };
80            return Some(Chunk::Raw(code, Chunk::TICKED_CODE));
81        }
82
83        if let Some(tail) = self.input.strip_prefix('\n') {
84            if let Some(tail) = tail.strip_prefix("    ") {
85                let code = if let Some((code, _rest)) = tail.split_once('\n') {
86                    self.input = &tail[code.len()..];
87                    code
88                } else {
89                    self.input = "";
90                    tail
91                };
92                Some(Chunk::Raw(code, Chunk::CODE))
93            } else if tail.starts_with("\n```") {
94                #[cfg(feature = "docgen")]
95                {
96                    self.code = Code::First;
97                }
98                self.input = &tail[1..];
99                Some(Chunk::Paragraph)
100            } else if tail.starts_with("\n    ") {
101                self.input = tail;
102                Some(Chunk::Paragraph)
103            } else if let Some(tail) = tail.strip_prefix('\n') {
104                self.input = tail;
105                Some(Chunk::Paragraph)
106            } else if let Some(tail) = tail.strip_prefix(' ') {
107                self.input = tail;
108                Some(Chunk::LineBreak)
109            } else {
110                self.input = tail;
111                Some(Chunk::Raw(" ", 1))
112            }
113        } else if let Some(tail) = self.input.strip_prefix(' ') {
114            self.input = tail;
115            Some(Chunk::Raw(" ", 1))
116        } else {
117            let mut char_ix = 0;
118
119            // there's iterator position but it won't give me character length of the rest of the input
120            for (byte_ix, chr) in self.input.char_indices() {
121                if chr == '\n' || chr == ' ' {
122                    let head = &self.input[..byte_ix];
123                    let tail = &self.input[byte_ix..];
124                    self.input = tail;
125                    return Some(Chunk::Raw(head, char_ix));
126                }
127                char_ix += 1;
128            }
129            let head = self.input;
130            self.input = "";
131            Some(Chunk::Raw(head, char_ix))
132        }
133    }
134}
135
136#[test]
137fn space_code_block() {
138    use Chunk::*;
139    let xs = split("a\n\n    a\n    b\n\ndf\n\n    c\n    d\n").collect::<Vec<_>>();
140    assert_eq!(
141        xs,
142        [
143            Raw("a", 1),
144            Paragraph,
145            Raw("a", 1000000),
146            Raw("b", 1000000),
147            Paragraph,
148            Raw("df", 2),
149            Paragraph,
150            Raw("c", 1000000),
151            Raw("d", 1000000),
152            Raw(" ", 1),
153        ]
154    );
155}
156
157#[test]
158fn ticks_code_block() {
159    use Chunk::*;
160    let a = "a\n\n```text\na\nb\n```\n\ndf\n\n```\nc\nd\n```\n";
161    let xs = split(a).collect::<Vec<_>>();
162    assert_eq!(
163        xs,
164        [
165            Raw("a", 1),
166            Paragraph,
167            Raw("```text", Chunk::TICKED_CODE),
168            Raw("a", Chunk::TICKED_CODE),
169            Raw("b", Chunk::TICKED_CODE),
170            Raw("```", Chunk::TICKED_CODE),
171            Paragraph,
172            Raw("df", 2),
173            Paragraph,
174            Raw("```", Chunk::TICKED_CODE),
175            Raw("c", Chunk::TICKED_CODE),
176            Raw("d", Chunk::TICKED_CODE),
177            Raw("```", Chunk::TICKED_CODE),
178        ],
179    );
180}