tendril/
utf8_decode.rs

1// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
2// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
3// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
4// option. This file may not be copied, modified, or distributed
5// except according to those terms.
6
7use crate::fmt;
8use crate::{Atomicity, Tendril};
9
10pub struct IncompleteUtf8(utf8::Incomplete);
11
12impl<A> Tendril<fmt::Bytes, A>
13where
14    A: Atomicity,
15{
16    pub fn decode_utf8_lossy<F>(mut self, mut push_utf8: F) -> Option<IncompleteUtf8>
17    where
18        F: FnMut(Tendril<fmt::UTF8, A>),
19    {
20        loop {
21            if self.is_empty() {
22                return None;
23            }
24            let unborrowed_result = match utf8::decode(&self) {
25                Ok(s) => {
26                    debug_assert!(s.as_ptr() == self.as_ptr());
27                    debug_assert!(s.len() == self.len());
28                    Ok(())
29                },
30                Err(utf8::DecodeError::Invalid {
31                    valid_prefix,
32                    invalid_sequence,
33                    ..
34                }) => {
35                    debug_assert!(valid_prefix.as_ptr() == self.as_ptr());
36                    debug_assert!(valid_prefix.len() <= self.len());
37                    Err((
38                        valid_prefix.len(),
39                        Err(valid_prefix.len() + invalid_sequence.len()),
40                    ))
41                },
42                Err(utf8::DecodeError::Incomplete {
43                    valid_prefix,
44                    incomplete_suffix,
45                }) => {
46                    debug_assert!(valid_prefix.as_ptr() == self.as_ptr());
47                    debug_assert!(valid_prefix.len() <= self.len());
48                    Err((valid_prefix.len(), Ok(incomplete_suffix)))
49                },
50            };
51            match unborrowed_result {
52                Ok(()) => {
53                    unsafe { push_utf8(self.reinterpret_without_validating()) }
54                    return None;
55                },
56                Err((valid_len, and_then)) => {
57                    if valid_len > 0 {
58                        let subtendril = self.subtendril(0, valid_len as u32);
59                        unsafe { push_utf8(subtendril.reinterpret_without_validating()) }
60                    }
61                    match and_then {
62                        Ok(incomplete) => return Some(IncompleteUtf8(incomplete)),
63                        Err(offset) => {
64                            push_utf8(Tendril::from_slice(utf8::REPLACEMENT_CHARACTER));
65                            self.pop_front(offset as u32)
66                        },
67                    }
68                },
69            }
70        }
71    }
72}
73
74impl IncompleteUtf8 {
75    pub fn try_complete<A, F>(
76        &mut self,
77        mut input: Tendril<fmt::Bytes, A>,
78        mut push_utf8: F,
79    ) -> Result<Tendril<fmt::Bytes, A>, ()>
80    where
81        A: Atomicity,
82        F: FnMut(Tendril<fmt::UTF8, A>),
83    {
84        let resume_at = match self.0.try_complete(&input) {
85            None => return Err(()),
86            Some((result, rest)) => {
87                push_utf8(Tendril::from_slice(
88                    result.unwrap_or(utf8::REPLACEMENT_CHARACTER),
89                ));
90                input.len() - rest.len()
91            },
92        };
93        input.pop_front(resume_at as u32);
94        Ok(input)
95    }
96}