tendril/
utf8_decode.rs

1// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
2// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
3// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
4// option. This file may not be copied, modified, or distributed
5// except according to those terms.
6
7use fmt;
8use tendril::{Atomicity, Tendril};
9use utf8;
10
11pub struct IncompleteUtf8(utf8::Incomplete);
12
13impl<A> Tendril<fmt::Bytes, A>
14where
15    A: Atomicity,
16{
17    pub fn decode_utf8_lossy<F>(mut self, mut push_utf8: F) -> Option<IncompleteUtf8>
18    where
19        F: FnMut(Tendril<fmt::UTF8, A>),
20    {
21        loop {
22            if self.is_empty() {
23                return None;
24            }
25            let unborrowed_result = match utf8::decode(&self) {
26                Ok(s) => {
27                    debug_assert!(s.as_ptr() == self.as_ptr());
28                    debug_assert!(s.len() == self.len());
29                    Ok(())
30                }
31                Err(utf8::DecodeError::Invalid {
32                    valid_prefix,
33                    invalid_sequence,
34                    ..
35                }) => {
36                    debug_assert!(valid_prefix.as_ptr() == self.as_ptr());
37                    debug_assert!(valid_prefix.len() <= self.len());
38                    Err((
39                        valid_prefix.len(),
40                        Err(valid_prefix.len() + invalid_sequence.len()),
41                    ))
42                }
43                Err(utf8::DecodeError::Incomplete {
44                    valid_prefix,
45                    incomplete_suffix,
46                }) => {
47                    debug_assert!(valid_prefix.as_ptr() == self.as_ptr());
48                    debug_assert!(valid_prefix.len() <= self.len());
49                    Err((valid_prefix.len(), Ok(incomplete_suffix)))
50                }
51            };
52            match unborrowed_result {
53                Ok(()) => {
54                    unsafe { push_utf8(self.reinterpret_without_validating()) }
55                    return None;
56                }
57                Err((valid_len, and_then)) => {
58                    if valid_len > 0 {
59                        let subtendril = self.subtendril(0, valid_len as u32);
60                        unsafe { push_utf8(subtendril.reinterpret_without_validating()) }
61                    }
62                    match and_then {
63                        Ok(incomplete) => return Some(IncompleteUtf8(incomplete)),
64                        Err(offset) => {
65                            push_utf8(Tendril::from_slice(utf8::REPLACEMENT_CHARACTER));
66                            self.pop_front(offset as u32)
67                        }
68                    }
69                }
70            }
71        }
72    }
73}
74
75impl IncompleteUtf8 {
76    pub fn try_complete<A, F>(
77        &mut self,
78        mut input: Tendril<fmt::Bytes, A>,
79        mut push_utf8: F,
80    ) -> Result<Tendril<fmt::Bytes, A>, ()>
81    where
82        A: Atomicity,
83        F: FnMut(Tendril<fmt::UTF8, A>),
84    {
85        let resume_at;
86        match self.0.try_complete(&input) {
87            None => return Err(()),
88            Some((result, rest)) => {
89                push_utf8(Tendril::from_slice(
90                    result.unwrap_or(utf8::REPLACEMENT_CHARACTER),
91                ));
92                resume_at = input.len() - rest.len();
93            }
94        }
95        input.pop_front(resume_at as u32);
96        Ok(input)
97    }
98}