1use self::Token::*;
8use crate::cow_rc_str::CowRcStr;
9use crate::parser::{ArbitrarySubstitutionFunctions, ParserState};
10use std::char;
11use std::ops::Range;
12
13#[cfg(not(feature = "dummy_match_byte"))]
14use cssparser_macros::match_byte;
15
16#[cfg(feature = "dummy_match_byte")]
17macro_rules! match_byte {
18 ($value:expr, $($rest:tt)* ) => {
19 match $value {
20 $(
21 $rest
22 )+
23 }
24 };
25}
26
27#[derive(PartialEq, Debug, Clone)]
32pub enum Token<'a> {
33 Ident(CowRcStr<'a>),
35
36 AtKeyword(CowRcStr<'a>),
40
41 Hash(CowRcStr<'a>),
45
46 IDHash(CowRcStr<'a>), QuotedString(CowRcStr<'a>),
55
56 UnquotedUrl(CowRcStr<'a>),
61
62 Delim(char),
64
65 Number {
67 has_sign: bool,
71
72 value: f32,
74
75 int_value: Option<i32>,
77 },
78
79 Percentage {
81 has_sign: bool,
83
84 unit_value: f32,
86
87 int_value: Option<i32>,
90 },
91
92 Dimension {
94 has_sign: bool,
98
99 value: f32,
101
102 int_value: Option<i32>,
104
105 unit: CowRcStr<'a>,
107 },
108
109 WhiteSpace(&'a str),
111
112 Comment(&'a str),
119
120 Colon, Semicolon, Comma, IncludeMatch,
131
132 DashMatch,
134
135 PrefixMatch,
137
138 SuffixMatch,
140
141 SubstringMatch,
143
144 CDO,
146
147 CDC,
149
150 Function(CowRcStr<'a>),
154
155 ParenthesisBlock,
157
158 SquareBracketBlock,
160
161 CurlyBracketBlock,
163
164 BadUrl(CowRcStr<'a>),
168
169 BadString(CowRcStr<'a>),
173
174 CloseParenthesis,
179
180 CloseSquareBracket,
185
186 CloseCurlyBracket,
191}
192
193impl Token<'_> {
194 pub fn is_parse_error(&self) -> bool {
201 matches!(
202 *self,
203 BadUrl(_) | BadString(_) | CloseParenthesis | CloseSquareBracket | CloseCurlyBracket
204 )
205 }
206}
207
208#[derive(Clone)]
209pub struct Tokenizer<'a> {
210 input: &'a str,
211 position: usize,
213 current_line_start_position: usize,
217 current_line_number: u32,
218 arbitrary_substitution_functions: SeenStatus<'a>,
219 source_map_url: Option<&'a str>,
220 source_url: Option<&'a str>,
221}
222
223#[derive(Copy, Clone, PartialEq, Eq)]
224enum SeenStatus<'a> {
225 DontCare,
226 LookingForThem(ArbitrarySubstitutionFunctions<'a>),
227 SeenAtLeastOne,
228}
229
230impl<'a> Tokenizer<'a> {
231 #[inline]
232 pub fn new(input: &'a str) -> Self {
233 Tokenizer {
234 input,
235 position: 0,
236 current_line_start_position: 0,
237 current_line_number: 0,
238 arbitrary_substitution_functions: SeenStatus::DontCare,
239 source_map_url: None,
240 source_url: None,
241 }
242 }
243
244 #[inline]
245 pub fn look_for_arbitrary_substitution_functions(
246 &mut self,
247 fns: ArbitrarySubstitutionFunctions<'a>,
248 ) {
249 self.arbitrary_substitution_functions = SeenStatus::LookingForThem(fns);
250 }
251
252 #[inline]
253 pub fn seen_arbitrary_substitution_functions(&mut self) -> bool {
254 let seen = self.arbitrary_substitution_functions == SeenStatus::SeenAtLeastOne;
255 self.arbitrary_substitution_functions = SeenStatus::DontCare;
256 seen
257 }
258
259 #[inline]
260 pub fn see_function(&mut self, name: &str) {
261 if let SeenStatus::LookingForThem(fns) = self.arbitrary_substitution_functions {
262 if fns.iter().any(|a| name.eq_ignore_ascii_case(a)) {
263 self.arbitrary_substitution_functions = SeenStatus::SeenAtLeastOne;
264 }
265 }
266 }
267
268 #[inline]
269 pub fn next(&mut self) -> Result<Token<'a>, ()> {
270 next_token(self)
271 }
272
273 #[inline]
274 pub fn position(&self) -> SourcePosition {
275 debug_assert!(self.input.is_char_boundary(self.position));
276 SourcePosition(self.position)
277 }
278
279 #[inline]
280 pub fn current_source_location(&self) -> SourceLocation {
281 SourceLocation {
282 line: self.current_line_number,
283 column: (self.position - self.current_line_start_position + 1) as u32,
284 }
285 }
286
287 #[inline]
288 pub fn current_source_map_url(&self) -> Option<&'a str> {
289 self.source_map_url
290 }
291
292 #[inline]
293 pub fn current_source_url(&self) -> Option<&'a str> {
294 self.source_url
295 }
296
297 #[inline]
298 pub fn state(&self) -> ParserState {
299 ParserState {
300 position: self.position,
301 current_line_start_position: self.current_line_start_position,
302 current_line_number: self.current_line_number,
303 at_start_of: None,
304 }
305 }
306
307 #[inline]
308 pub fn reset(&mut self, state: &ParserState) {
309 self.position = state.position;
310 self.current_line_start_position = state.current_line_start_position;
311 self.current_line_number = state.current_line_number;
312 }
313
314 #[inline]
315 pub(crate) fn slice_from(&self, start_pos: SourcePosition) -> &'a str {
316 self.slice(start_pos..self.position())
317 }
318
319 #[inline]
320 pub(crate) fn slice(&self, range: Range<SourcePosition>) -> &'a str {
321 debug_assert!(self.input.is_char_boundary(range.start.0));
322 debug_assert!(self.input.is_char_boundary(range.end.0));
323 unsafe { self.input.get_unchecked(range.start.0..range.end.0) }
324 }
325
326 pub fn current_source_line(&self) -> &'a str {
327 let current = self.position();
328 let start = self
329 .slice(SourcePosition(0)..current)
330 .rfind(['\r', '\n', '\x0C'])
331 .map_or(0, |start| start + 1);
332 let end = self
333 .slice(current..SourcePosition(self.input.len()))
334 .find(['\r', '\n', '\x0C'])
335 .map_or(self.input.len(), |end| current.0 + end);
336 self.slice(SourcePosition(start)..SourcePosition(end))
337 }
338
339 #[inline]
340 pub fn next_byte(&self) -> Option<u8> {
341 if self.is_eof() {
342 None
343 } else {
344 Some(self.input.as_bytes()[self.position])
345 }
346 }
347
348 #[inline]
350 fn is_eof(&self) -> bool {
351 !self.has_at_least(0)
352 }
353
354 #[inline]
357 fn has_at_least(&self, n: usize) -> bool {
358 self.position + n < self.input.len()
359 }
360
361 #[inline]
365 pub fn advance(&mut self, n: usize) {
366 if cfg!(debug_assertions) {
367 for i in 0..n {
371 let b = self.byte_at(i);
372 debug_assert!(b.is_ascii() || (b & 0xF0 != 0xF0 && b & 0xC0 != 0x80));
373 debug_assert!(b != b'\r' && b != b'\n' && b != b'\x0C');
374 }
375 }
376 self.position += n
377 }
378
379 #[inline]
381 fn next_byte_unchecked(&self) -> u8 {
382 self.byte_at(0)
383 }
384
385 #[inline]
386 fn byte_at(&self, offset: usize) -> u8 {
387 self.input.as_bytes()[self.position + offset]
388 }
389
390 #[inline]
393 fn consume_4byte_intro(&mut self) {
394 debug_assert!(self.next_byte_unchecked() & 0xF0 == 0xF0);
395 self.current_line_start_position = self.current_line_start_position.wrapping_sub(1);
398 self.position += 1;
399 }
400
401 #[inline]
404 fn consume_continuation_byte(&mut self) {
405 debug_assert!(self.next_byte_unchecked() & 0xC0 == 0x80);
406 self.current_line_start_position = self.current_line_start_position.wrapping_add(1);
410 self.position += 1;
411 }
412
413 #[inline(never)]
415 fn consume_known_byte(&mut self, byte: u8) {
416 debug_assert!(byte != b'\r' && byte != b'\n' && byte != b'\x0C');
417 self.position += 1;
418 if byte & 0xF0 == 0xF0 {
420 self.current_line_start_position = self.current_line_start_position.wrapping_sub(1);
423 } else if byte & 0xC0 == 0x80 {
424 self.current_line_start_position = self.current_line_start_position.wrapping_add(1);
427 }
428 }
429
430 #[inline]
431 fn next_char(&self) -> char {
432 unsafe { self.input.get_unchecked(self.position().0..) }
433 .chars()
434 .next()
435 .unwrap()
436 }
437
438 #[inline]
441 fn consume_newline(&mut self) {
442 let byte = self.next_byte_unchecked();
443 debug_assert!(byte == b'\r' || byte == b'\n' || byte == b'\x0C');
444 self.position += 1;
445 if byte == b'\r' && self.next_byte() == Some(b'\n') {
446 self.position += 1;
447 }
448 self.current_line_start_position = self.position;
449 self.current_line_number += 1;
450 }
451
452 #[inline]
453 fn has_newline_at(&self, offset: usize) -> bool {
454 self.position + offset < self.input.len()
455 && matches!(self.byte_at(offset), b'\n' | b'\r' | b'\x0C')
456 }
457
458 #[inline]
459 fn consume_char(&mut self) -> char {
460 let c = self.next_char();
461 let len_utf8 = c.len_utf8();
462 self.position += len_utf8;
463 self.current_line_start_position = self
466 .current_line_start_position
467 .wrapping_add(len_utf8 - c.len_utf16());
468 c
469 }
470
471 #[inline]
472 fn starts_with(&self, needle: &[u8]) -> bool {
473 self.input.as_bytes()[self.position..].starts_with(needle)
474 }
475
476 pub fn skip_whitespace(&mut self) {
477 while !self.is_eof() {
478 match_byte! { self.next_byte_unchecked(),
479 b' ' | b'\t' => {
480 self.advance(1)
481 },
482 b'\n' | b'\x0C' | b'\r' => {
483 self.consume_newline();
484 },
485 b'/' => {
486 if self.starts_with(b"/*") {
487 consume_comment(self);
488 } else {
489 return
490 }
491 }
492 _ => return,
493 }
494 }
495 }
496
497 pub fn skip_cdc_and_cdo(&mut self) {
498 while !self.is_eof() {
499 match_byte! { self.next_byte_unchecked(),
500 b' ' | b'\t' => {
501 self.advance(1)
502 },
503 b'\n' | b'\x0C' | b'\r' => {
504 self.consume_newline();
505 },
506 b'/' => {
507 if self.starts_with(b"/*") {
508 consume_comment(self);
509 } else {
510 return
511 }
512 }
513 b'<' => {
514 if self.starts_with(b"<!--") {
515 self.advance(4)
516 } else {
517 return
518 }
519 }
520 b'-' => {
521 if self.starts_with(b"-->") {
522 self.advance(3)
523 } else {
524 return
525 }
526 }
527 _ => {
528 return
529 }
530 }
531 }
532 }
533}
534
535#[derive(PartialEq, Eq, PartialOrd, Ord, Debug, Clone, Copy)]
537pub struct SourcePosition(pub(crate) usize);
538
539#[cfg(feature = "malloc_size_of")]
540malloc_size_of::malloc_size_of_is_0!(SourcePosition);
541
542impl SourcePosition {
543 #[inline]
545 pub fn byte_index(&self) -> usize {
546 self.0
547 }
548}
549
550#[derive(PartialEq, Eq, Debug, Clone, Copy, Default)]
552pub struct SourceLocation {
553 pub line: u32,
555
556 pub column: u32,
559}
560
561#[cfg(feature = "malloc_size_of")]
562malloc_size_of::malloc_size_of_is_0!(SourceLocation);
563
564fn next_token<'a>(tokenizer: &mut Tokenizer<'a>) -> Result<Token<'a>, ()> {
565 if tokenizer.is_eof() {
566 return Err(());
567 }
568 let b = tokenizer.next_byte_unchecked();
569 let token = match_byte! { b,
570 b' ' | b'\t' => {
571 consume_whitespace(tokenizer, false)
572 },
573 b'\n' | b'\x0C' | b'\r' => consume_whitespace(tokenizer, true),
574 b'"' => consume_string(tokenizer, false),
575 b'#' => {
576 tokenizer.advance(1);
577 if is_ident_start(tokenizer) { IDHash(consume_name(tokenizer)) }
578 else if !tokenizer.is_eof() &&
579 matches!(tokenizer.next_byte_unchecked(), b'0'..=b'9' | b'-') {
580 Hash(consume_name(tokenizer))
582 }
583 else { Delim('#') }
584 },
585 b'$' => {
586 if tokenizer.starts_with(b"$=") { tokenizer.advance(2); SuffixMatch }
587 else { tokenizer.advance(1); Delim('$') }
588 },
589 b'\'' => consume_string(tokenizer, true),
590 b'(' => { tokenizer.advance(1); ParenthesisBlock },
591 b')' => { tokenizer.advance(1); CloseParenthesis },
592 b'*' => {
593 if tokenizer.starts_with(b"*=") { tokenizer.advance(2); SubstringMatch }
594 else { tokenizer.advance(1); Delim('*') }
595 },
596 b'+' => {
597 if (
598 tokenizer.has_at_least(1)
599 && tokenizer.byte_at(1).is_ascii_digit()
600 ) || (
601 tokenizer.has_at_least(2)
602 && tokenizer.byte_at(1) == b'.'
603 && tokenizer.byte_at(2).is_ascii_digit()
604 ) {
605 consume_numeric(tokenizer)
606 } else {
607 tokenizer.advance(1);
608 Delim('+')
609 }
610 },
611 b',' => { tokenizer.advance(1); Comma },
612 b'-' => {
613 if (
614 tokenizer.has_at_least(1)
615 && tokenizer.byte_at(1).is_ascii_digit()
616 ) || (
617 tokenizer.has_at_least(2)
618 && tokenizer.byte_at(1) == b'.'
619 && tokenizer.byte_at(2).is_ascii_digit()
620 ) {
621 consume_numeric(tokenizer)
622 } else if tokenizer.starts_with(b"-->") {
623 tokenizer.advance(3);
624 CDC
625 } else if is_ident_start(tokenizer) {
626 consume_ident_like(tokenizer)
627 } else {
628 tokenizer.advance(1);
629 Delim('-')
630 }
631 },
632 b'.' => {
633 if tokenizer.has_at_least(1)
634 && tokenizer.byte_at(1).is_ascii_digit() {
635 consume_numeric(tokenizer)
636 } else {
637 tokenizer.advance(1);
638 Delim('.')
639 }
640 }
641 b'/' => {
642 if tokenizer.starts_with(b"/*") {
643 Comment(consume_comment(tokenizer))
644 } else {
645 tokenizer.advance(1);
646 Delim('/')
647 }
648 }
649 b'0'..=b'9' => consume_numeric(tokenizer),
650 b':' => { tokenizer.advance(1); Colon },
651 b';' => { tokenizer.advance(1); Semicolon },
652 b'<' => {
653 if tokenizer.starts_with(b"<!--") {
654 tokenizer.advance(4);
655 CDO
656 } else {
657 tokenizer.advance(1);
658 Delim('<')
659 }
660 },
661 b'@' => {
662 tokenizer.advance(1);
663 if is_ident_start(tokenizer) { AtKeyword(consume_name(tokenizer)) }
664 else { Delim('@') }
665 },
666 b'a'..=b'z' | b'A'..=b'Z' | b'_' | b'\0' => consume_ident_like(tokenizer),
667 b'[' => { tokenizer.advance(1); SquareBracketBlock },
668 b'\\' => {
669 if !tokenizer.has_newline_at(1) { consume_ident_like(tokenizer) }
670 else { tokenizer.advance(1); Delim('\\') }
671 },
672 b']' => { tokenizer.advance(1); CloseSquareBracket },
673 b'^' => {
674 if tokenizer.starts_with(b"^=") { tokenizer.advance(2); PrefixMatch }
675 else { tokenizer.advance(1); Delim('^') }
676 },
677 b'{' => { tokenizer.advance(1); CurlyBracketBlock },
678 b'|' => {
679 if tokenizer.starts_with(b"|=") { tokenizer.advance(2); DashMatch }
680 else { tokenizer.advance(1); Delim('|') }
681 },
682 b'}' => { tokenizer.advance(1); CloseCurlyBracket },
683 b'~' => {
684 if tokenizer.starts_with(b"~=") { tokenizer.advance(2); IncludeMatch }
685 else { tokenizer.advance(1); Delim('~') }
686 },
687 _ => {
688 if !b.is_ascii() {
689 consume_ident_like(tokenizer)
690 } else {
691 tokenizer.advance(1);
692 Delim(b as char)
693 }
694 },
695 };
696 Ok(token)
697}
698
699fn consume_whitespace<'a>(tokenizer: &mut Tokenizer<'a>, newline: bool) -> Token<'a> {
700 let start_position = tokenizer.position();
701 if newline {
702 tokenizer.consume_newline();
703 } else {
704 tokenizer.advance(1);
705 }
706 while !tokenizer.is_eof() {
707 let b = tokenizer.next_byte_unchecked();
708 match_byte! { b,
709 b' ' | b'\t' => {
710 tokenizer.advance(1);
711 }
712 b'\n' | b'\x0C' | b'\r' => {
713 tokenizer.consume_newline();
714 }
715 _ => {
716 break
717 }
718 }
719 }
720 WhiteSpace(tokenizer.slice_from(start_position))
721}
722
723fn check_for_source_map<'a>(tokenizer: &mut Tokenizer<'a>, contents: &'a str) {
726 let directive = "# sourceMappingURL=";
727 let directive_old = "@ sourceMappingURL=";
728
729 if contents.starts_with(directive) || contents.starts_with(directive_old) {
731 let contents = &contents[directive.len()..];
732 tokenizer.source_map_url = contents.split([' ', '\t', '\x0C', '\r', '\n']).next();
733 }
734
735 let directive = "# sourceURL=";
736 let directive_old = "@ sourceURL=";
737
738 if contents.starts_with(directive) || contents.starts_with(directive_old) {
740 let contents = &contents[directive.len()..];
741 tokenizer.source_url = contents.split([' ', '\t', '\x0C', '\r', '\n']).next()
742 }
743}
744
745fn consume_comment<'a>(tokenizer: &mut Tokenizer<'a>) -> &'a str {
746 tokenizer.advance(2); let start_position = tokenizer.position();
748 while !tokenizer.is_eof() {
749 match_byte! { tokenizer.next_byte_unchecked(),
750 b'*' => {
751 let end_position = tokenizer.position();
752 tokenizer.advance(1);
753 if tokenizer.next_byte() == Some(b'/') {
754 tokenizer.advance(1);
755 let contents = tokenizer.slice(start_position..end_position);
756 check_for_source_map(tokenizer, contents);
757 return contents
758 }
759 }
760 b'\n' | b'\x0C' | b'\r' => {
761 tokenizer.consume_newline();
762 }
763 b'\x80'..=b'\xBF' => { tokenizer.consume_continuation_byte(); }
764 b'\xF0'..=b'\xFF' => { tokenizer.consume_4byte_intro(); }
765 _ => {
766 tokenizer.advance(1);
768 }
769 }
770 }
771 let contents = tokenizer.slice_from(start_position);
772 check_for_source_map(tokenizer, contents);
773 contents
774}
775
776fn consume_string<'a>(tokenizer: &mut Tokenizer<'a>, single_quote: bool) -> Token<'a> {
777 match consume_quoted_string(tokenizer, single_quote) {
778 Ok(value) => QuotedString(value),
779 Err(value) => BadString(value),
780 }
781}
782
783fn consume_quoted_string<'a>(
785 tokenizer: &mut Tokenizer<'a>,
786 single_quote: bool,
787) -> Result<CowRcStr<'a>, CowRcStr<'a>> {
788 tokenizer.advance(1); let start_pos = tokenizer.position();
791 let mut string_bytes;
792 loop {
793 if tokenizer.is_eof() {
794 return Ok(tokenizer.slice_from(start_pos).into());
795 }
796 match_byte! { tokenizer.next_byte_unchecked(),
797 b'"' => {
798 if !single_quote {
799 let value = tokenizer.slice_from(start_pos);
800 tokenizer.advance(1);
801 return Ok(value.into())
802 }
803 tokenizer.advance(1);
804 }
805 b'\'' => {
806 if single_quote {
807 let value = tokenizer.slice_from(start_pos);
808 tokenizer.advance(1);
809 return Ok(value.into())
810 }
811 tokenizer.advance(1);
812 }
813 b'\\' | b'\0' => {
814 string_bytes = tokenizer.slice_from(start_pos).as_bytes().to_owned();
820 break
821 }
822 b'\n' | b'\r' | b'\x0C' => {
823 return Err(tokenizer.slice_from(start_pos).into())
824 },
825 b'\x80'..=b'\xBF' => { tokenizer.consume_continuation_byte(); }
826 b'\xF0'..=b'\xFF' => { tokenizer.consume_4byte_intro(); }
827 _ => {
828 tokenizer.advance(1);
830 }
831 }
832 }
833
834 while !tokenizer.is_eof() {
835 let b = tokenizer.next_byte_unchecked();
836 match_byte! { b,
837 b'\n' | b'\r' | b'\x0C' => {
838 return Err(
839 unsafe {
841 from_utf8_release_unchecked(string_bytes)
842 }.into()
843 );
844 }
845 b'"' => {
846 tokenizer.advance(1);
847 if !single_quote {
848 break;
849 }
850 }
851 b'\'' => {
852 tokenizer.advance(1);
853 if single_quote {
854 break;
855 }
856 }
857 b'\\' => {
858 tokenizer.advance(1);
859 if !tokenizer.is_eof() {
860 match tokenizer.next_byte_unchecked() {
861 b'\n' | b'\x0C' | b'\r' => {
863 tokenizer.consume_newline();
864 }
865 _ => consume_escape_and_write(tokenizer, &mut string_bytes)
867 }
868 }
869 continue;
871 }
872 b'\0' => {
873 tokenizer.advance(1);
874 string_bytes.extend("\u{FFFD}".as_bytes());
875 continue;
876 }
877 b'\x80'..=b'\xBF' => { tokenizer.consume_continuation_byte(); }
878 b'\xF0'..=b'\xFF' => { tokenizer.consume_4byte_intro(); }
879 _ => {
880 tokenizer.advance(1);
882 },
883 }
884
885 string_bytes.push(b);
888 }
889
890 Ok(
891 unsafe { from_utf8_release_unchecked(string_bytes) }.into(),
893 )
894}
895
896#[inline]
897fn is_ident_start(tokenizer: &Tokenizer) -> bool {
898 !tokenizer.is_eof()
899 && match_byte! { tokenizer.next_byte_unchecked(),
900 b'a'..=b'z' | b'A'..=b'Z' | b'_' | b'\0' => true,
901 b'-' => {
902 tokenizer.has_at_least(1) && match_byte! { tokenizer.byte_at(1),
903 b'a'..=b'z' | b'A'..=b'Z' | b'-' | b'_' | b'\0' => {
904 true
905 }
906 b'\\' => !tokenizer.has_newline_at(1),
907 b => !b.is_ascii(),
908 }
909 },
910 b'\\' => !tokenizer.has_newline_at(1),
911 b => !b.is_ascii(),
912 }
913}
914
915fn consume_ident_like<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> {
916 let value = consume_name(tokenizer);
917 if !tokenizer.is_eof() && tokenizer.next_byte_unchecked() == b'(' {
918 tokenizer.advance(1);
919 if value.eq_ignore_ascii_case("url") {
920 consume_unquoted_url(tokenizer).unwrap_or(Function(value))
921 } else {
922 tokenizer.see_function(&value);
923 Function(value)
924 }
925 } else {
926 Ident(value)
927 }
928}
929
930fn consume_name<'a>(tokenizer: &mut Tokenizer<'a>) -> CowRcStr<'a> {
931 let start_pos = tokenizer.position();
933 let mut value_bytes;
934 loop {
935 if tokenizer.is_eof() {
936 return tokenizer.slice_from(start_pos).into();
937 }
938 match_byte! { tokenizer.next_byte_unchecked(),
939 b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' | b'_' | b'-' => tokenizer.advance(1),
940 b'\\' | b'\0' => {
941 value_bytes = tokenizer.slice_from(start_pos).as_bytes().to_owned();
947 break
948 }
949 b'\x80'..=b'\xBF' => { tokenizer.consume_continuation_byte(); }
950 b'\xC0'..=b'\xEF' => { tokenizer.advance(1); }
951 b'\xF0'..=b'\xFF' => { tokenizer.consume_4byte_intro(); }
952 _b => {
953 return tokenizer.slice_from(start_pos).into();
954 }
955 }
956 }
957
958 while !tokenizer.is_eof() {
959 let b = tokenizer.next_byte_unchecked();
960 match_byte! { b,
961 b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' | b'_' | b'-' => {
962 tokenizer.advance(1);
963 value_bytes.push(b) }
965 b'\\' => {
966 if tokenizer.has_newline_at(1) { break }
967 tokenizer.advance(1);
968 consume_escape_and_write(tokenizer, &mut value_bytes)
970 }
971 b'\0' => {
972 tokenizer.advance(1);
973 value_bytes.extend("\u{FFFD}".as_bytes());
974 },
975 b'\x80'..=b'\xBF' => {
976 tokenizer.consume_continuation_byte();
979 value_bytes.push(b)
980 }
981 b'\xC0'..=b'\xEF' => {
982 tokenizer.advance(1);
985 value_bytes.push(b)
986 }
987 b'\xF0'..=b'\xFF' => {
988 tokenizer.consume_4byte_intro();
989 value_bytes.push(b)
990 }
991 _ => {
992 break;
994 }
995 }
996 }
997 unsafe { from_utf8_release_unchecked(value_bytes) }.into()
999}
1000
1001fn byte_to_hex_digit(b: u8) -> Option<u32> {
1002 Some(match_byte! { b,
1003 b'0' ..= b'9' => b - b'0',
1004 b'a' ..= b'f' => b - b'a' + 10,
1005 b'A' ..= b'F' => b - b'A' + 10,
1006 _ => {
1007 return None
1008 }
1009 } as u32)
1010}
1011
1012fn byte_to_decimal_digit(b: u8) -> Option<u32> {
1013 if b.is_ascii_digit() {
1014 Some((b - b'0') as u32)
1015 } else {
1016 None
1017 }
1018}
1019
1020fn consume_numeric<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> {
1021 let (has_sign, sign) = match tokenizer.next_byte_unchecked() {
1028 b'-' => (true, -1.),
1029 b'+' => (true, 1.),
1030 _ => (false, 1.),
1031 };
1032 if has_sign {
1033 tokenizer.advance(1);
1034 }
1035
1036 let mut integral_part: f64 = 0.;
1037 while let Some(digit) = byte_to_decimal_digit(tokenizer.next_byte_unchecked()) {
1038 integral_part = integral_part * 10. + digit as f64;
1039 tokenizer.advance(1);
1040 if tokenizer.is_eof() {
1041 break;
1042 }
1043 }
1044
1045 let mut is_integer = true;
1046
1047 let mut fractional_part: f64 = 0.;
1048 if tokenizer.has_at_least(1)
1049 && tokenizer.next_byte_unchecked() == b'.'
1050 && tokenizer.byte_at(1).is_ascii_digit()
1051 {
1052 is_integer = false;
1053 tokenizer.advance(1); let mut factor = 0.1;
1055 while let Some(digit) = byte_to_decimal_digit(tokenizer.next_byte_unchecked()) {
1056 fractional_part += digit as f64 * factor;
1057 factor *= 0.1;
1058 tokenizer.advance(1);
1059 if tokenizer.is_eof() {
1060 break;
1061 }
1062 }
1063 }
1064
1065 let mut value = sign * (integral_part + fractional_part);
1066
1067 if tokenizer.has_at_least(1)
1068 && matches!(tokenizer.next_byte_unchecked(), b'e' | b'E')
1069 && (tokenizer.byte_at(1).is_ascii_digit()
1070 || (tokenizer.has_at_least(2)
1071 && matches!(tokenizer.byte_at(1), b'+' | b'-')
1072 && tokenizer.byte_at(2).is_ascii_digit()))
1073 {
1074 is_integer = false;
1075 tokenizer.advance(1);
1076 let (has_sign, sign) = match tokenizer.next_byte_unchecked() {
1077 b'-' => (true, -1.),
1078 b'+' => (true, 1.),
1079 _ => (false, 1.),
1080 };
1081 if has_sign {
1082 tokenizer.advance(1);
1083 }
1084 let mut exponent: f64 = 0.;
1085 while let Some(digit) = byte_to_decimal_digit(tokenizer.next_byte_unchecked()) {
1086 exponent = exponent * 10. + digit as f64;
1087 tokenizer.advance(1);
1088 if tokenizer.is_eof() {
1089 break;
1090 }
1091 }
1092 value *= f64::powf(10., sign * exponent);
1093 }
1094
1095 let int_value = if is_integer {
1096 Some(if value >= i32::MAX as f64 {
1097 i32::MAX
1098 } else if value <= i32::MIN as f64 {
1099 i32::MIN
1100 } else {
1101 value as i32
1102 })
1103 } else {
1104 None
1105 };
1106
1107 if !tokenizer.is_eof() && tokenizer.next_byte_unchecked() == b'%' {
1108 tokenizer.advance(1);
1109 return Percentage {
1110 unit_value: (value / 100.) as f32,
1111 int_value,
1112 has_sign,
1113 };
1114 }
1115 let value = value as f32;
1116 if is_ident_start(tokenizer) {
1117 let unit = consume_name(tokenizer);
1118 Dimension {
1119 value,
1120 int_value,
1121 has_sign,
1122 unit,
1123 }
1124 } else {
1125 Number {
1126 value,
1127 int_value,
1128 has_sign,
1129 }
1130 }
1131}
1132
1133#[inline]
1134unsafe fn from_utf8_release_unchecked(string_bytes: Vec<u8>) -> String {
1135 if cfg!(debug_assertions) {
1136 String::from_utf8(string_bytes).unwrap()
1137 } else {
1138 String::from_utf8_unchecked(string_bytes)
1139 }
1140}
1141
1142fn consume_unquoted_url<'a>(tokenizer: &mut Tokenizer<'a>) -> Result<Token<'a>, ()> {
1143 let start_position = tokenizer.position;
1145 let from_start = &tokenizer.input[tokenizer.position..];
1146 let mut newlines = 0;
1147 let mut last_newline = 0;
1148 let mut found_printable_char = false;
1149 let mut iter = from_start.bytes().enumerate();
1150 loop {
1151 let (offset, b) = match iter.next() {
1152 Some(item) => item,
1153 None => {
1154 tokenizer.position = tokenizer.input.len();
1155 break;
1156 }
1157 };
1158 match_byte! { b,
1159 b' ' | b'\t' => {},
1160 b'\n' | b'\x0C' => {
1161 newlines += 1;
1162 last_newline = offset;
1163 }
1164 b'\r' => {
1165 if from_start.as_bytes().get(offset + 1) != Some(&b'\n') {
1166 newlines += 1;
1167 last_newline = offset;
1168 }
1169 }
1170 b'"' | b'\'' => return Err(()), b')' => {
1172 tokenizer.position += offset + 1;
1175 break
1176 }
1177 _ => {
1178 tokenizer.position += offset;
1181 found_printable_char = true;
1182 break
1183 }
1184 }
1185 }
1186
1187 if newlines > 0 {
1188 tokenizer.current_line_number += newlines;
1189 tokenizer.current_line_start_position = start_position + last_newline + 1;
1192 }
1193
1194 if found_printable_char {
1195 return Ok(consume_unquoted_url_internal(tokenizer));
1198 } else {
1199 return Ok(UnquotedUrl("".into()));
1200 }
1201
1202 fn consume_unquoted_url_internal<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> {
1203 let start_pos = tokenizer.position();
1205 let mut string_bytes: Vec<u8>;
1206 loop {
1207 if tokenizer.is_eof() {
1208 return UnquotedUrl(tokenizer.slice_from(start_pos).into());
1209 }
1210 match_byte! { tokenizer.next_byte_unchecked(),
1211 b' ' | b'\t' | b'\n' | b'\r' | b'\x0C' => {
1212 let value = tokenizer.slice_from(start_pos);
1213 return consume_url_end(tokenizer, start_pos, value.into())
1214 }
1215 b')' => {
1216 let value = tokenizer.slice_from(start_pos);
1217 tokenizer.advance(1);
1218 return UnquotedUrl(value.into())
1219 }
1220 b'\x01'..=b'\x08' | b'\x0B' | b'\x0E'..=b'\x1F' | b'\x7F' | b'"' | b'\'' | b'(' => {
1222 tokenizer.advance(1);
1223 return consume_bad_url(tokenizer, start_pos)
1224 },
1225 b'\\' | b'\0' => {
1226 string_bytes = tokenizer.slice_from(start_pos).as_bytes().to_owned();
1232 break
1233 }
1234 b'\x80'..=b'\xBF' => { tokenizer.consume_continuation_byte(); }
1235 b'\xF0'..=b'\xFF' => { tokenizer.consume_4byte_intro(); }
1236 _ => {
1237 tokenizer.advance(1);
1239 }
1240 }
1241 }
1242 while !tokenizer.is_eof() {
1243 let b = tokenizer.next_byte_unchecked();
1244 match_byte! { b,
1245 b' ' | b'\t' | b'\n' | b'\r' | b'\x0C' => {
1246 let string = unsafe { from_utf8_release_unchecked(string_bytes) }.into();
1248 return consume_url_end(tokenizer, start_pos, string)
1249 }
1250 b')' => {
1251 tokenizer.advance(1);
1252 break;
1253 }
1254 b'\x01'..=b'\x08' | b'\x0B' | b'\x0E'..=b'\x1F' | b'\x7F' | b'"' | b'\'' | b'(' => {
1256 tokenizer.advance(1);
1257 return consume_bad_url(tokenizer, start_pos);
1258 }
1259 b'\\' => {
1260 tokenizer.advance(1);
1261 if tokenizer.has_newline_at(0) {
1262 return consume_bad_url(tokenizer, start_pos)
1263 }
1264
1265 consume_escape_and_write(tokenizer, &mut string_bytes)
1267 },
1268 b'\0' => {
1269 tokenizer.advance(1);
1270 string_bytes.extend("\u{FFFD}".as_bytes());
1271 }
1272 b'\x80'..=b'\xBF' => {
1273 tokenizer.consume_continuation_byte();
1276 string_bytes.push(b);
1277 }
1278 b'\xF0'..=b'\xFF' => {
1279 tokenizer.consume_4byte_intro();
1282 string_bytes.push(b);
1283 }
1284 b => {
1287 tokenizer.advance(1);
1289 string_bytes.push(b)
1290 }
1291 }
1292 }
1293 UnquotedUrl(
1294 unsafe { from_utf8_release_unchecked(string_bytes) }.into(),
1296 )
1297 }
1298
1299 fn consume_url_end<'a>(
1300 tokenizer: &mut Tokenizer<'a>,
1301 start_pos: SourcePosition,
1302 string: CowRcStr<'a>,
1303 ) -> Token<'a> {
1304 while !tokenizer.is_eof() {
1305 match_byte! { tokenizer.next_byte_unchecked(),
1306 b')' => {
1307 tokenizer.advance(1);
1308 break
1309 }
1310 b' ' | b'\t' => { tokenizer.advance(1); }
1311 b'\n' | b'\x0C' | b'\r' => {
1312 tokenizer.consume_newline();
1313 }
1314 b => {
1315 tokenizer.consume_known_byte(b);
1316 return consume_bad_url(tokenizer, start_pos);
1317 }
1318 }
1319 }
1320 UnquotedUrl(string)
1321 }
1322
1323 fn consume_bad_url<'a>(tokenizer: &mut Tokenizer<'a>, start_pos: SourcePosition) -> Token<'a> {
1324 while !tokenizer.is_eof() {
1326 match_byte! { tokenizer.next_byte_unchecked(),
1327 b')' => {
1328 let contents = tokenizer.slice_from(start_pos).into();
1329 tokenizer.advance(1);
1330 return BadUrl(contents)
1331 }
1332 b'\\' => {
1333 tokenizer.advance(1);
1334 if matches!(tokenizer.next_byte(), Some(b')') | Some(b'\\')) {
1335 tokenizer.advance(1); }
1337 }
1338 b'\n' | b'\x0C' | b'\r' => {
1339 tokenizer.consume_newline();
1340 }
1341 b => {
1342 tokenizer.consume_known_byte(b);
1343 }
1344 }
1345 }
1346 BadUrl(tokenizer.slice_from(start_pos).into())
1347 }
1348}
1349
1350fn consume_hex_digits(tokenizer: &mut Tokenizer<'_>) -> (u32, u32) {
1352 let mut value = 0;
1353 let mut digits = 0;
1354 while digits < 6 && !tokenizer.is_eof() {
1355 match byte_to_hex_digit(tokenizer.next_byte_unchecked()) {
1356 Some(digit) => {
1357 value = value * 16 + digit;
1358 digits += 1;
1359 tokenizer.advance(1);
1360 }
1361 None => break,
1362 }
1363 }
1364 (value, digits)
1365}
1366
1367fn consume_escape_and_write(tokenizer: &mut Tokenizer, bytes: &mut Vec<u8>) {
1370 bytes.extend(
1371 consume_escape(tokenizer)
1372 .encode_utf8(&mut [0; 4])
1373 .as_bytes(),
1374 )
1375}
1376
1377fn consume_escape(tokenizer: &mut Tokenizer) -> char {
1381 if tokenizer.is_eof() {
1382 return '\u{FFFD}';
1383 } match_byte! { tokenizer.next_byte_unchecked(),
1385 b'0'..=b'9' | b'A'..=b'F' | b'a'..=b'f' => {
1386 let (c, _) = consume_hex_digits(tokenizer);
1387 if !tokenizer.is_eof() {
1388 match_byte! { tokenizer.next_byte_unchecked(),
1389 b' ' | b'\t' => {
1390 tokenizer.advance(1)
1391 }
1392 b'\n' | b'\x0C' | b'\r' => {
1393 tokenizer.consume_newline();
1394 }
1395 _ => {}
1396 }
1397 }
1398 static REPLACEMENT_CHAR: char = '\u{FFFD}';
1399 if c != 0 {
1400 let c = char::from_u32(c);
1401 c.unwrap_or(REPLACEMENT_CHAR)
1402 } else {
1403 REPLACEMENT_CHAR
1404 }
1405 },
1406 b'\0' => {
1407 tokenizer.advance(1);
1408 '\u{FFFD}'
1409 }
1410 _ => tokenizer.consume_char(),
1411 }
1412}