1use crate::tendril::StrTendril;
11
12pub(crate) fn extract_a_character_encoding_from_a_meta_element(
14 input: StrTendril,
15) -> Option<StrTendril> {
16 let mut position = 0;
18 loop {
19 loop {
22 let candidate = input.as_bytes().get(position..position + "charset".len())?;
23 if candidate.eq_ignore_ascii_case(b"charset") {
24 break;
25 }
26
27 position += 1;
28 }
29 position += "charset".len();
30
31 position += input.as_bytes()[position..]
33 .iter()
34 .take_while(|byte| byte.is_ascii_whitespace())
35 .count();
36
37 if input.as_bytes()[position] == b'=' {
40 break;
41 }
42 }
43 position += 1;
45
46 position += input.as_bytes()[position..]
48 .iter()
49 .take_while(|byte| byte.is_ascii_whitespace())
50 .count();
51
52 match input.as_bytes().get(position)? {
54 quote @ (b'"' | b'\'') => {
55 let length = input.as_bytes()[position + 1..]
58 .iter()
59 .position(|byte| byte == quote)?;
60 Some(input.subtendril(position as u32 + 1, length as u32))
61 },
62 _ => {
63 let length = input.as_bytes()[position..]
67 .iter()
68 .position(|byte| byte.is_ascii_whitespace() || *byte == b';');
69 if let Some(length) = length {
70 Some(input.subtendril(position as u32, length as u32))
71 } else {
72 Some(input.subtendril(position as u32, (input.len() - position) as u32))
73 }
74 },
75 }
76}
77
78#[cfg(test)]
79mod tests {
80 use super::*;
81
82 #[test]
83 fn meta_element_without_charset() {
84 assert_eq!(
85 extract_a_character_encoding_from_a_meta_element(StrTendril::from_slice("foobar")),
86 None
87 );
88 }
89
90 #[test]
91 fn meta_element_with_capitalized_charset() {
92 assert_eq!(
93 extract_a_character_encoding_from_a_meta_element(StrTendril::from_slice(
94 "cHarSet=utf8"
95 )),
96 Some(StrTendril::from_slice("utf8"))
97 );
98 }
99
100 #[test]
101 fn meta_element_with_no_equals_after_charset() {
102 assert_eq!(
103 extract_a_character_encoding_from_a_meta_element(StrTendril::from_slice(
104 "charset utf8"
105 )),
106 None
107 );
108 }
109
110 #[test]
111 fn meta_element_with_whitespace_around_equals() {
112 assert_eq!(
113 extract_a_character_encoding_from_a_meta_element(StrTendril::from_slice(
114 "charset \t=\tutf8"
115 )),
116 Some(StrTendril::from_slice("utf8"))
117 );
118 }
119
120 #[test]
121 fn meta_element_with_quoted_value() {
122 assert_eq!(
123 extract_a_character_encoding_from_a_meta_element(StrTendril::from_slice(
124 "charset='utf8'"
125 )),
126 Some(StrTendril::from_slice("utf8"))
127 );
128 assert_eq!(
129 extract_a_character_encoding_from_a_meta_element(StrTendril::from_slice(
130 "charset=\"utf8\""
131 )),
132 Some(StrTendril::from_slice("utf8"))
133 );
134 assert_eq!(
135 extract_a_character_encoding_from_a_meta_element(StrTendril::from_slice(
136 "charset='utf8"
137 )),
138 None
139 );
140 assert_eq!(
141 extract_a_character_encoding_from_a_meta_element(StrTendril::from_slice(
142 "charset=\"utf8"
143 )),
144 None
145 );
146 }
147
148 #[test]
149 fn meta_element_with_implicit_terminator() {
150 assert_eq!(
151 extract_a_character_encoding_from_a_meta_element(StrTendril::from_slice(
152 "charset=utf8 foo"
153 )),
154 Some(StrTendril::from_slice("utf8"))
155 );
156 assert_eq!(
157 extract_a_character_encoding_from_a_meta_element(StrTendril::from_slice(
158 "charset=utf8;foo"
159 )),
160 Some(StrTendril::from_slice("utf8"))
161 );
162 }
163
164 #[test]
165 fn meta_element_with_content_type() {
166 assert_eq!(
167 extract_a_character_encoding_from_a_meta_element(StrTendril::from_slice(
168 "text/html; charset=utf8"
169 )),
170 Some(StrTendril::from_slice("utf8"))
171 );
172 }
173}