1use self::Token::*;
8use crate::cow_rc_str::CowRcStr;
9use crate::parser::ParserState;
10use std::char;
11use std::ops::Range;
12
13#[cfg(not(feature = "dummy_match_byte"))]
14use cssparser_macros::match_byte;
15
16#[cfg(feature = "dummy_match_byte")]
17macro_rules! match_byte {
18 ($value:expr, $($rest:tt)* ) => {
19 match $value {
20 $(
21 $rest
22 )+
23 }
24 };
25}
26
27#[derive(PartialEq, Debug, Clone)]
32pub enum Token<'a> {
33 Ident(CowRcStr<'a>),
35
36 AtKeyword(CowRcStr<'a>),
40
41 Hash(CowRcStr<'a>),
45
46 IDHash(CowRcStr<'a>), QuotedString(CowRcStr<'a>),
55
56 UnquotedUrl(CowRcStr<'a>),
61
62 Delim(char),
64
65 Number {
67 has_sign: bool,
71
72 value: f32,
74
75 int_value: Option<i32>,
77 },
78
79 Percentage {
81 has_sign: bool,
83
84 unit_value: f32,
86
87 int_value: Option<i32>,
90 },
91
92 Dimension {
94 has_sign: bool,
98
99 value: f32,
101
102 int_value: Option<i32>,
104
105 unit: CowRcStr<'a>,
107 },
108
109 WhiteSpace(&'a str),
111
112 Comment(&'a str),
119
120 Colon, Semicolon, Comma, IncludeMatch,
131
132 DashMatch,
134
135 PrefixMatch,
137
138 SuffixMatch,
140
141 SubstringMatch,
143
144 CDO,
146
147 CDC,
149
150 Function(CowRcStr<'a>),
154
155 ParenthesisBlock,
157
158 SquareBracketBlock,
160
161 CurlyBracketBlock,
163
164 BadUrl(CowRcStr<'a>),
168
169 BadString(CowRcStr<'a>),
173
174 CloseParenthesis,
179
180 CloseSquareBracket,
185
186 CloseCurlyBracket,
191}
192
193impl Token<'_> {
194 pub fn is_parse_error(&self) -> bool {
201 matches!(
202 *self,
203 BadUrl(_) | BadString(_) | CloseParenthesis | CloseSquareBracket | CloseCurlyBracket
204 )
205 }
206}
207
208#[derive(Clone)]
209pub struct Tokenizer<'a> {
210 input: &'a str,
211 position: usize,
213 current_line_start_position: usize,
217 current_line_number: u32,
218 var_or_env_functions: SeenStatus,
219 source_map_url: Option<&'a str>,
220 source_url: Option<&'a str>,
221}
222
223#[derive(Copy, Clone, PartialEq, Eq)]
224enum SeenStatus {
225 DontCare,
226 LookingForThem,
227 SeenAtLeastOne,
228}
229
230impl<'a> Tokenizer<'a> {
231 #[inline]
232 pub fn new(input: &str) -> Tokenizer {
233 Tokenizer {
234 input,
235 position: 0,
236 current_line_start_position: 0,
237 current_line_number: 0,
238 var_or_env_functions: SeenStatus::DontCare,
239 source_map_url: None,
240 source_url: None,
241 }
242 }
243
244 #[inline]
245 pub fn look_for_var_or_env_functions(&mut self) {
246 self.var_or_env_functions = SeenStatus::LookingForThem;
247 }
248
249 #[inline]
250 pub fn seen_var_or_env_functions(&mut self) -> bool {
251 let seen = self.var_or_env_functions == SeenStatus::SeenAtLeastOne;
252 self.var_or_env_functions = SeenStatus::DontCare;
253 seen
254 }
255
256 #[inline]
257 pub fn see_function(&mut self, name: &str) {
258 if self.var_or_env_functions == SeenStatus::LookingForThem
259 && (name.eq_ignore_ascii_case("var") || name.eq_ignore_ascii_case("env"))
260 {
261 self.var_or_env_functions = SeenStatus::SeenAtLeastOne;
262 }
263 }
264
265 #[inline]
266 pub fn next(&mut self) -> Result<Token<'a>, ()> {
267 next_token(self)
268 }
269
270 #[inline]
271 pub fn position(&self) -> SourcePosition {
272 debug_assert!(self.input.is_char_boundary(self.position));
273 SourcePosition(self.position)
274 }
275
276 #[inline]
277 pub fn current_source_location(&self) -> SourceLocation {
278 SourceLocation {
279 line: self.current_line_number,
280 column: (self.position - self.current_line_start_position + 1) as u32,
281 }
282 }
283
284 #[inline]
285 pub fn current_source_map_url(&self) -> Option<&'a str> {
286 self.source_map_url
287 }
288
289 #[inline]
290 pub fn current_source_url(&self) -> Option<&'a str> {
291 self.source_url
292 }
293
294 #[inline]
295 pub fn state(&self) -> ParserState {
296 ParserState {
297 position: self.position,
298 current_line_start_position: self.current_line_start_position,
299 current_line_number: self.current_line_number,
300 at_start_of: None,
301 }
302 }
303
304 #[inline]
305 pub fn reset(&mut self, state: &ParserState) {
306 self.position = state.position;
307 self.current_line_start_position = state.current_line_start_position;
308 self.current_line_number = state.current_line_number;
309 }
310
311 #[inline]
312 pub(crate) fn slice_from(&self, start_pos: SourcePosition) -> &'a str {
313 self.slice(start_pos..self.position())
314 }
315
316 #[inline]
317 pub(crate) fn slice(&self, range: Range<SourcePosition>) -> &'a str {
318 debug_assert!(self.input.is_char_boundary(range.start.0));
319 debug_assert!(self.input.is_char_boundary(range.end.0));
320 unsafe { self.input.get_unchecked(range.start.0..range.end.0) }
321 }
322
323 pub fn current_source_line(&self) -> &'a str {
324 let current = self.position();
325 let start = self
326 .slice(SourcePosition(0)..current)
327 .rfind(['\r', '\n', '\x0C'])
328 .map_or(0, |start| start + 1);
329 let end = self
330 .slice(current..SourcePosition(self.input.len()))
331 .find(['\r', '\n', '\x0C'])
332 .map_or(self.input.len(), |end| current.0 + end);
333 self.slice(SourcePosition(start)..SourcePosition(end))
334 }
335
336 #[inline]
337 pub fn next_byte(&self) -> Option<u8> {
338 if self.is_eof() {
339 None
340 } else {
341 Some(self.input.as_bytes()[self.position])
342 }
343 }
344
345 #[inline]
347 fn is_eof(&self) -> bool {
348 !self.has_at_least(0)
349 }
350
351 #[inline]
354 fn has_at_least(&self, n: usize) -> bool {
355 self.position + n < self.input.len()
356 }
357
358 #[inline]
362 pub fn advance(&mut self, n: usize) {
363 if cfg!(debug_assertions) {
364 for i in 0..n {
368 let b = self.byte_at(i);
369 debug_assert!(b.is_ascii() || (b & 0xF0 != 0xF0 && b & 0xC0 != 0x80));
370 debug_assert!(b != b'\r' && b != b'\n' && b != b'\x0C');
371 }
372 }
373 self.position += n
374 }
375
376 #[inline]
378 fn next_byte_unchecked(&self) -> u8 {
379 self.byte_at(0)
380 }
381
382 #[inline]
383 fn byte_at(&self, offset: usize) -> u8 {
384 self.input.as_bytes()[self.position + offset]
385 }
386
387 #[inline]
390 fn consume_4byte_intro(&mut self) {
391 debug_assert!(self.next_byte_unchecked() & 0xF0 == 0xF0);
392 self.current_line_start_position = self.current_line_start_position.wrapping_sub(1);
395 self.position += 1;
396 }
397
398 #[inline]
401 fn consume_continuation_byte(&mut self) {
402 debug_assert!(self.next_byte_unchecked() & 0xC0 == 0x80);
403 self.current_line_start_position = self.current_line_start_position.wrapping_add(1);
407 self.position += 1;
408 }
409
410 #[inline(never)]
412 fn consume_known_byte(&mut self, byte: u8) {
413 debug_assert!(byte != b'\r' && byte != b'\n' && byte != b'\x0C');
414 self.position += 1;
415 if byte & 0xF0 == 0xF0 {
417 self.current_line_start_position = self.current_line_start_position.wrapping_sub(1);
420 } else if byte & 0xC0 == 0x80 {
421 self.current_line_start_position = self.current_line_start_position.wrapping_add(1);
424 }
425 }
426
427 #[inline]
428 fn next_char(&self) -> char {
429 unsafe { self.input.get_unchecked(self.position().0..) }
430 .chars()
431 .next()
432 .unwrap()
433 }
434
435 #[inline]
438 fn consume_newline(&mut self) {
439 let byte = self.next_byte_unchecked();
440 debug_assert!(byte == b'\r' || byte == b'\n' || byte == b'\x0C');
441 self.position += 1;
442 if byte == b'\r' && self.next_byte() == Some(b'\n') {
443 self.position += 1;
444 }
445 self.current_line_start_position = self.position;
446 self.current_line_number += 1;
447 }
448
449 #[inline]
450 fn has_newline_at(&self, offset: usize) -> bool {
451 self.position + offset < self.input.len()
452 && matches!(self.byte_at(offset), b'\n' | b'\r' | b'\x0C')
453 }
454
455 #[inline]
456 fn consume_char(&mut self) -> char {
457 let c = self.next_char();
458 let len_utf8 = c.len_utf8();
459 self.position += len_utf8;
460 self.current_line_start_position = self
463 .current_line_start_position
464 .wrapping_add(len_utf8 - c.len_utf16());
465 c
466 }
467
468 #[inline]
469 fn starts_with(&self, needle: &[u8]) -> bool {
470 self.input.as_bytes()[self.position..].starts_with(needle)
471 }
472
473 pub fn skip_whitespace(&mut self) {
474 while !self.is_eof() {
475 match_byte! { self.next_byte_unchecked(),
476 b' ' | b'\t' => {
477 self.advance(1)
478 },
479 b'\n' | b'\x0C' | b'\r' => {
480 self.consume_newline();
481 },
482 b'/' => {
483 if self.starts_with(b"/*") {
484 consume_comment(self);
485 } else {
486 return
487 }
488 }
489 _ => return,
490 }
491 }
492 }
493
494 pub fn skip_cdc_and_cdo(&mut self) {
495 while !self.is_eof() {
496 match_byte! { self.next_byte_unchecked(),
497 b' ' | b'\t' => {
498 self.advance(1)
499 },
500 b'\n' | b'\x0C' | b'\r' => {
501 self.consume_newline();
502 },
503 b'/' => {
504 if self.starts_with(b"/*") {
505 consume_comment(self);
506 } else {
507 return
508 }
509 }
510 b'<' => {
511 if self.starts_with(b"<!--") {
512 self.advance(4)
513 } else {
514 return
515 }
516 }
517 b'-' => {
518 if self.starts_with(b"-->") {
519 self.advance(3)
520 } else {
521 return
522 }
523 }
524 _ => {
525 return
526 }
527 }
528 }
529 }
530}
531
532#[derive(PartialEq, Eq, PartialOrd, Ord, Debug, Clone, Copy)]
534pub struct SourcePosition(pub(crate) usize);
535
536#[cfg(feature = "malloc_size_of")]
537malloc_size_of::malloc_size_of_is_0!(SourcePosition);
538
539impl SourcePosition {
540 #[inline]
542 pub fn byte_index(&self) -> usize {
543 self.0
544 }
545}
546
547#[derive(PartialEq, Eq, Debug, Clone, Copy, Default)]
549pub struct SourceLocation {
550 pub line: u32,
552
553 pub column: u32,
556}
557
558#[cfg(feature = "malloc_size_of")]
559malloc_size_of::malloc_size_of_is_0!(SourceLocation);
560
561fn next_token<'a>(tokenizer: &mut Tokenizer<'a>) -> Result<Token<'a>, ()> {
562 if tokenizer.is_eof() {
563 return Err(());
564 }
565 let b = tokenizer.next_byte_unchecked();
566 let token = match_byte! { b,
567 b' ' | b'\t' => {
568 consume_whitespace(tokenizer, false)
569 },
570 b'\n' | b'\x0C' | b'\r' => consume_whitespace(tokenizer, true),
571 b'"' => consume_string(tokenizer, false),
572 b'#' => {
573 tokenizer.advance(1);
574 if is_ident_start(tokenizer) { IDHash(consume_name(tokenizer)) }
575 else if !tokenizer.is_eof() &&
576 matches!(tokenizer.next_byte_unchecked(), b'0'..=b'9' | b'-') {
577 Hash(consume_name(tokenizer))
579 }
580 else { Delim('#') }
581 },
582 b'$' => {
583 if tokenizer.starts_with(b"$=") { tokenizer.advance(2); SuffixMatch }
584 else { tokenizer.advance(1); Delim('$') }
585 },
586 b'\'' => consume_string(tokenizer, true),
587 b'(' => { tokenizer.advance(1); ParenthesisBlock },
588 b')' => { tokenizer.advance(1); CloseParenthesis },
589 b'*' => {
590 if tokenizer.starts_with(b"*=") { tokenizer.advance(2); SubstringMatch }
591 else { tokenizer.advance(1); Delim('*') }
592 },
593 b'+' => {
594 if (
595 tokenizer.has_at_least(1)
596 && tokenizer.byte_at(1).is_ascii_digit()
597 ) || (
598 tokenizer.has_at_least(2)
599 && tokenizer.byte_at(1) == b'.'
600 && tokenizer.byte_at(2).is_ascii_digit()
601 ) {
602 consume_numeric(tokenizer)
603 } else {
604 tokenizer.advance(1);
605 Delim('+')
606 }
607 },
608 b',' => { tokenizer.advance(1); Comma },
609 b'-' => {
610 if (
611 tokenizer.has_at_least(1)
612 && tokenizer.byte_at(1).is_ascii_digit()
613 ) || (
614 tokenizer.has_at_least(2)
615 && tokenizer.byte_at(1) == b'.'
616 && tokenizer.byte_at(2).is_ascii_digit()
617 ) {
618 consume_numeric(tokenizer)
619 } else if tokenizer.starts_with(b"-->") {
620 tokenizer.advance(3);
621 CDC
622 } else if is_ident_start(tokenizer) {
623 consume_ident_like(tokenizer)
624 } else {
625 tokenizer.advance(1);
626 Delim('-')
627 }
628 },
629 b'.' => {
630 if tokenizer.has_at_least(1)
631 && tokenizer.byte_at(1).is_ascii_digit() {
632 consume_numeric(tokenizer)
633 } else {
634 tokenizer.advance(1);
635 Delim('.')
636 }
637 }
638 b'/' => {
639 if tokenizer.starts_with(b"/*") {
640 Comment(consume_comment(tokenizer))
641 } else {
642 tokenizer.advance(1);
643 Delim('/')
644 }
645 }
646 b'0'..=b'9' => consume_numeric(tokenizer),
647 b':' => { tokenizer.advance(1); Colon },
648 b';' => { tokenizer.advance(1); Semicolon },
649 b'<' => {
650 if tokenizer.starts_with(b"<!--") {
651 tokenizer.advance(4);
652 CDO
653 } else {
654 tokenizer.advance(1);
655 Delim('<')
656 }
657 },
658 b'@' => {
659 tokenizer.advance(1);
660 if is_ident_start(tokenizer) { AtKeyword(consume_name(tokenizer)) }
661 else { Delim('@') }
662 },
663 b'a'..=b'z' | b'A'..=b'Z' | b'_' | b'\0' => consume_ident_like(tokenizer),
664 b'[' => { tokenizer.advance(1); SquareBracketBlock },
665 b'\\' => {
666 if !tokenizer.has_newline_at(1) { consume_ident_like(tokenizer) }
667 else { tokenizer.advance(1); Delim('\\') }
668 },
669 b']' => { tokenizer.advance(1); CloseSquareBracket },
670 b'^' => {
671 if tokenizer.starts_with(b"^=") { tokenizer.advance(2); PrefixMatch }
672 else { tokenizer.advance(1); Delim('^') }
673 },
674 b'{' => { tokenizer.advance(1); CurlyBracketBlock },
675 b'|' => {
676 if tokenizer.starts_with(b"|=") { tokenizer.advance(2); DashMatch }
677 else { tokenizer.advance(1); Delim('|') }
678 },
679 b'}' => { tokenizer.advance(1); CloseCurlyBracket },
680 b'~' => {
681 if tokenizer.starts_with(b"~=") { tokenizer.advance(2); IncludeMatch }
682 else { tokenizer.advance(1); Delim('~') }
683 },
684 _ => {
685 if !b.is_ascii() {
686 consume_ident_like(tokenizer)
687 } else {
688 tokenizer.advance(1);
689 Delim(b as char)
690 }
691 },
692 };
693 Ok(token)
694}
695
696fn consume_whitespace<'a>(tokenizer: &mut Tokenizer<'a>, newline: bool) -> Token<'a> {
697 let start_position = tokenizer.position();
698 if newline {
699 tokenizer.consume_newline();
700 } else {
701 tokenizer.advance(1);
702 }
703 while !tokenizer.is_eof() {
704 let b = tokenizer.next_byte_unchecked();
705 match_byte! { b,
706 b' ' | b'\t' => {
707 tokenizer.advance(1);
708 }
709 b'\n' | b'\x0C' | b'\r' => {
710 tokenizer.consume_newline();
711 }
712 _ => {
713 break
714 }
715 }
716 }
717 WhiteSpace(tokenizer.slice_from(start_position))
718}
719
720fn check_for_source_map<'a>(tokenizer: &mut Tokenizer<'a>, contents: &'a str) {
723 let directive = "# sourceMappingURL=";
724 let directive_old = "@ sourceMappingURL=";
725
726 if contents.starts_with(directive) || contents.starts_with(directive_old) {
728 let contents = &contents[directive.len()..];
729 tokenizer.source_map_url = contents.split([' ', '\t', '\x0C', '\r', '\n']).next();
730 }
731
732 let directive = "# sourceURL=";
733 let directive_old = "@ sourceURL=";
734
735 if contents.starts_with(directive) || contents.starts_with(directive_old) {
737 let contents = &contents[directive.len()..];
738 tokenizer.source_url = contents.split([' ', '\t', '\x0C', '\r', '\n']).next()
739 }
740}
741
742fn consume_comment<'a>(tokenizer: &mut Tokenizer<'a>) -> &'a str {
743 tokenizer.advance(2); let start_position = tokenizer.position();
745 while !tokenizer.is_eof() {
746 match_byte! { tokenizer.next_byte_unchecked(),
747 b'*' => {
748 let end_position = tokenizer.position();
749 tokenizer.advance(1);
750 if tokenizer.next_byte() == Some(b'/') {
751 tokenizer.advance(1);
752 let contents = tokenizer.slice(start_position..end_position);
753 check_for_source_map(tokenizer, contents);
754 return contents
755 }
756 }
757 b'\n' | b'\x0C' | b'\r' => {
758 tokenizer.consume_newline();
759 }
760 b'\x80'..=b'\xBF' => { tokenizer.consume_continuation_byte(); }
761 b'\xF0'..=b'\xFF' => { tokenizer.consume_4byte_intro(); }
762 _ => {
763 tokenizer.advance(1);
765 }
766 }
767 }
768 let contents = tokenizer.slice_from(start_position);
769 check_for_source_map(tokenizer, contents);
770 contents
771}
772
773fn consume_string<'a>(tokenizer: &mut Tokenizer<'a>, single_quote: bool) -> Token<'a> {
774 match consume_quoted_string(tokenizer, single_quote) {
775 Ok(value) => QuotedString(value),
776 Err(value) => BadString(value),
777 }
778}
779
780fn consume_quoted_string<'a>(
782 tokenizer: &mut Tokenizer<'a>,
783 single_quote: bool,
784) -> Result<CowRcStr<'a>, CowRcStr<'a>> {
785 tokenizer.advance(1); let start_pos = tokenizer.position();
788 let mut string_bytes;
789 loop {
790 if tokenizer.is_eof() {
791 return Ok(tokenizer.slice_from(start_pos).into());
792 }
793 match_byte! { tokenizer.next_byte_unchecked(),
794 b'"' => {
795 if !single_quote {
796 let value = tokenizer.slice_from(start_pos);
797 tokenizer.advance(1);
798 return Ok(value.into())
799 }
800 tokenizer.advance(1);
801 }
802 b'\'' => {
803 if single_quote {
804 let value = tokenizer.slice_from(start_pos);
805 tokenizer.advance(1);
806 return Ok(value.into())
807 }
808 tokenizer.advance(1);
809 }
810 b'\\' | b'\0' => {
811 string_bytes = tokenizer.slice_from(start_pos).as_bytes().to_owned();
817 break
818 }
819 b'\n' | b'\r' | b'\x0C' => {
820 return Err(tokenizer.slice_from(start_pos).into())
821 },
822 b'\x80'..=b'\xBF' => { tokenizer.consume_continuation_byte(); }
823 b'\xF0'..=b'\xFF' => { tokenizer.consume_4byte_intro(); }
824 _ => {
825 tokenizer.advance(1);
827 }
828 }
829 }
830
831 while !tokenizer.is_eof() {
832 let b = tokenizer.next_byte_unchecked();
833 match_byte! { b,
834 b'\n' | b'\r' | b'\x0C' => {
835 return Err(
836 unsafe {
838 from_utf8_release_unchecked(string_bytes)
839 }.into()
840 );
841 }
842 b'"' => {
843 tokenizer.advance(1);
844 if !single_quote {
845 break;
846 }
847 }
848 b'\'' => {
849 tokenizer.advance(1);
850 if single_quote {
851 break;
852 }
853 }
854 b'\\' => {
855 tokenizer.advance(1);
856 if !tokenizer.is_eof() {
857 match tokenizer.next_byte_unchecked() {
858 b'\n' | b'\x0C' | b'\r' => {
860 tokenizer.consume_newline();
861 }
862 _ => consume_escape_and_write(tokenizer, &mut string_bytes)
864 }
865 }
866 continue;
868 }
869 b'\0' => {
870 tokenizer.advance(1);
871 string_bytes.extend("\u{FFFD}".as_bytes());
872 continue;
873 }
874 b'\x80'..=b'\xBF' => { tokenizer.consume_continuation_byte(); }
875 b'\xF0'..=b'\xFF' => { tokenizer.consume_4byte_intro(); }
876 _ => {
877 tokenizer.advance(1);
879 },
880 }
881
882 string_bytes.push(b);
885 }
886
887 Ok(
888 unsafe { from_utf8_release_unchecked(string_bytes) }.into(),
890 )
891}
892
893#[inline]
894fn is_ident_start(tokenizer: &mut Tokenizer) -> bool {
895 !tokenizer.is_eof()
896 && match_byte! { tokenizer.next_byte_unchecked(),
897 b'a'..=b'z' | b'A'..=b'Z' | b'_' | b'\0' => true,
898 b'-' => {
899 tokenizer.has_at_least(1) && match_byte! { tokenizer.byte_at(1),
900 b'a'..=b'z' | b'A'..=b'Z' | b'-' | b'_' | b'\0' => {
901 true
902 }
903 b'\\' => !tokenizer.has_newline_at(1),
904 b => !b.is_ascii(),
905 }
906 },
907 b'\\' => !tokenizer.has_newline_at(1),
908 b => !b.is_ascii(),
909 }
910}
911
912fn consume_ident_like<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> {
913 let value = consume_name(tokenizer);
914 if !tokenizer.is_eof() && tokenizer.next_byte_unchecked() == b'(' {
915 tokenizer.advance(1);
916 if value.eq_ignore_ascii_case("url") {
917 consume_unquoted_url(tokenizer).unwrap_or(Function(value))
918 } else {
919 tokenizer.see_function(&value);
920 Function(value)
921 }
922 } else {
923 Ident(value)
924 }
925}
926
927fn consume_name<'a>(tokenizer: &mut Tokenizer<'a>) -> CowRcStr<'a> {
928 let start_pos = tokenizer.position();
930 let mut value_bytes;
931 loop {
932 if tokenizer.is_eof() {
933 return tokenizer.slice_from(start_pos).into();
934 }
935 match_byte! { tokenizer.next_byte_unchecked(),
936 b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' | b'_' | b'-' => tokenizer.advance(1),
937 b'\\' | b'\0' => {
938 value_bytes = tokenizer.slice_from(start_pos).as_bytes().to_owned();
944 break
945 }
946 b'\x80'..=b'\xBF' => { tokenizer.consume_continuation_byte(); }
947 b'\xC0'..=b'\xEF' => { tokenizer.advance(1); }
948 b'\xF0'..=b'\xFF' => { tokenizer.consume_4byte_intro(); }
949 _b => {
950 return tokenizer.slice_from(start_pos).into();
951 }
952 }
953 }
954
955 while !tokenizer.is_eof() {
956 let b = tokenizer.next_byte_unchecked();
957 match_byte! { b,
958 b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' | b'_' | b'-' => {
959 tokenizer.advance(1);
960 value_bytes.push(b) }
962 b'\\' => {
963 if tokenizer.has_newline_at(1) { break }
964 tokenizer.advance(1);
965 consume_escape_and_write(tokenizer, &mut value_bytes)
967 }
968 b'\0' => {
969 tokenizer.advance(1);
970 value_bytes.extend("\u{FFFD}".as_bytes());
971 },
972 b'\x80'..=b'\xBF' => {
973 tokenizer.consume_continuation_byte();
976 value_bytes.push(b)
977 }
978 b'\xC0'..=b'\xEF' => {
979 tokenizer.advance(1);
982 value_bytes.push(b)
983 }
984 b'\xF0'..=b'\xFF' => {
985 tokenizer.consume_4byte_intro();
986 value_bytes.push(b)
987 }
988 _ => {
989 break;
991 }
992 }
993 }
994 unsafe { from_utf8_release_unchecked(value_bytes) }.into()
996}
997
998fn byte_to_hex_digit(b: u8) -> Option<u32> {
999 Some(match_byte! { b,
1000 b'0' ..= b'9' => b - b'0',
1001 b'a' ..= b'f' => b - b'a' + 10,
1002 b'A' ..= b'F' => b - b'A' + 10,
1003 _ => {
1004 return None
1005 }
1006 } as u32)
1007}
1008
1009fn byte_to_decimal_digit(b: u8) -> Option<u32> {
1010 if b.is_ascii_digit() {
1011 Some((b - b'0') as u32)
1012 } else {
1013 None
1014 }
1015}
1016
1017fn consume_numeric<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> {
1018 let (has_sign, sign) = match tokenizer.next_byte_unchecked() {
1025 b'-' => (true, -1.),
1026 b'+' => (true, 1.),
1027 _ => (false, 1.),
1028 };
1029 if has_sign {
1030 tokenizer.advance(1);
1031 }
1032
1033 let mut integral_part: f64 = 0.;
1034 while let Some(digit) = byte_to_decimal_digit(tokenizer.next_byte_unchecked()) {
1035 integral_part = integral_part * 10. + digit as f64;
1036 tokenizer.advance(1);
1037 if tokenizer.is_eof() {
1038 break;
1039 }
1040 }
1041
1042 let mut is_integer = true;
1043
1044 let mut fractional_part: f64 = 0.;
1045 if tokenizer.has_at_least(1)
1046 && tokenizer.next_byte_unchecked() == b'.'
1047 && tokenizer.byte_at(1).is_ascii_digit()
1048 {
1049 is_integer = false;
1050 tokenizer.advance(1); let mut factor = 0.1;
1052 while let Some(digit) = byte_to_decimal_digit(tokenizer.next_byte_unchecked()) {
1053 fractional_part += digit as f64 * factor;
1054 factor *= 0.1;
1055 tokenizer.advance(1);
1056 if tokenizer.is_eof() {
1057 break;
1058 }
1059 }
1060 }
1061
1062 let mut value = sign * (integral_part + fractional_part);
1063
1064 if tokenizer.has_at_least(1)
1065 && matches!(tokenizer.next_byte_unchecked(), b'e' | b'E')
1066 && (tokenizer.byte_at(1).is_ascii_digit()
1067 || (tokenizer.has_at_least(2)
1068 && matches!(tokenizer.byte_at(1), b'+' | b'-')
1069 && tokenizer.byte_at(2).is_ascii_digit()))
1070 {
1071 is_integer = false;
1072 tokenizer.advance(1);
1073 let (has_sign, sign) = match tokenizer.next_byte_unchecked() {
1074 b'-' => (true, -1.),
1075 b'+' => (true, 1.),
1076 _ => (false, 1.),
1077 };
1078 if has_sign {
1079 tokenizer.advance(1);
1080 }
1081 let mut exponent: f64 = 0.;
1082 while let Some(digit) = byte_to_decimal_digit(tokenizer.next_byte_unchecked()) {
1083 exponent = exponent * 10. + digit as f64;
1084 tokenizer.advance(1);
1085 if tokenizer.is_eof() {
1086 break;
1087 }
1088 }
1089 value *= f64::powf(10., sign * exponent);
1090 }
1091
1092 let int_value = if is_integer {
1093 Some(if value >= i32::MAX as f64 {
1094 i32::MAX
1095 } else if value <= i32::MIN as f64 {
1096 i32::MIN
1097 } else {
1098 value as i32
1099 })
1100 } else {
1101 None
1102 };
1103
1104 if !tokenizer.is_eof() && tokenizer.next_byte_unchecked() == b'%' {
1105 tokenizer.advance(1);
1106 return Percentage {
1107 unit_value: (value / 100.) as f32,
1108 int_value,
1109 has_sign,
1110 };
1111 }
1112 let value = value as f32;
1113 if is_ident_start(tokenizer) {
1114 let unit = consume_name(tokenizer);
1115 Dimension {
1116 value,
1117 int_value,
1118 has_sign,
1119 unit,
1120 }
1121 } else {
1122 Number {
1123 value,
1124 int_value,
1125 has_sign,
1126 }
1127 }
1128}
1129
1130#[inline]
1131unsafe fn from_utf8_release_unchecked(string_bytes: Vec<u8>) -> String {
1132 if cfg!(debug_assertions) {
1133 String::from_utf8(string_bytes).unwrap()
1134 } else {
1135 String::from_utf8_unchecked(string_bytes)
1136 }
1137}
1138
1139fn consume_unquoted_url<'a>(tokenizer: &mut Tokenizer<'a>) -> Result<Token<'a>, ()> {
1140 let start_position = tokenizer.position;
1142 let from_start = &tokenizer.input[tokenizer.position..];
1143 let mut newlines = 0;
1144 let mut last_newline = 0;
1145 let mut found_printable_char = false;
1146 let mut iter = from_start.bytes().enumerate();
1147 loop {
1148 let (offset, b) = match iter.next() {
1149 Some(item) => item,
1150 None => {
1151 tokenizer.position = tokenizer.input.len();
1152 break;
1153 }
1154 };
1155 match_byte! { b,
1156 b' ' | b'\t' => {},
1157 b'\n' | b'\x0C' => {
1158 newlines += 1;
1159 last_newline = offset;
1160 }
1161 b'\r' => {
1162 if from_start.as_bytes().get(offset + 1) != Some(&b'\n') {
1163 newlines += 1;
1164 last_newline = offset;
1165 }
1166 }
1167 b'"' | b'\'' => return Err(()), b')' => {
1169 tokenizer.position += offset + 1;
1172 break
1173 }
1174 _ => {
1175 tokenizer.position += offset;
1178 found_printable_char = true;
1179 break
1180 }
1181 }
1182 }
1183
1184 if newlines > 0 {
1185 tokenizer.current_line_number += newlines;
1186 tokenizer.current_line_start_position = start_position + last_newline + 1;
1189 }
1190
1191 if found_printable_char {
1192 return Ok(consume_unquoted_url_internal(tokenizer));
1195 } else {
1196 return Ok(UnquotedUrl("".into()));
1197 }
1198
1199 fn consume_unquoted_url_internal<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> {
1200 let start_pos = tokenizer.position();
1202 let mut string_bytes: Vec<u8>;
1203 loop {
1204 if tokenizer.is_eof() {
1205 return UnquotedUrl(tokenizer.slice_from(start_pos).into());
1206 }
1207 match_byte! { tokenizer.next_byte_unchecked(),
1208 b' ' | b'\t' | b'\n' | b'\r' | b'\x0C' => {
1209 let value = tokenizer.slice_from(start_pos);
1210 return consume_url_end(tokenizer, start_pos, value.into())
1211 }
1212 b')' => {
1213 let value = tokenizer.slice_from(start_pos);
1214 tokenizer.advance(1);
1215 return UnquotedUrl(value.into())
1216 }
1217 b'\x01'..=b'\x08' | b'\x0B' | b'\x0E'..=b'\x1F' | b'\x7F' | b'"' | b'\'' | b'(' => {
1219 tokenizer.advance(1);
1220 return consume_bad_url(tokenizer, start_pos)
1221 },
1222 b'\\' | b'\0' => {
1223 string_bytes = tokenizer.slice_from(start_pos).as_bytes().to_owned();
1229 break
1230 }
1231 b'\x80'..=b'\xBF' => { tokenizer.consume_continuation_byte(); }
1232 b'\xF0'..=b'\xFF' => { tokenizer.consume_4byte_intro(); }
1233 _ => {
1234 tokenizer.advance(1);
1236 }
1237 }
1238 }
1239 while !tokenizer.is_eof() {
1240 let b = tokenizer.next_byte_unchecked();
1241 match_byte! { b,
1242 b' ' | b'\t' | b'\n' | b'\r' | b'\x0C' => {
1243 let string = unsafe { from_utf8_release_unchecked(string_bytes) }.into();
1245 return consume_url_end(tokenizer, start_pos, string)
1246 }
1247 b')' => {
1248 tokenizer.advance(1);
1249 break;
1250 }
1251 b'\x01'..=b'\x08' | b'\x0B' | b'\x0E'..=b'\x1F' | b'\x7F' | b'"' | b'\'' | b'(' => {
1253 tokenizer.advance(1);
1254 return consume_bad_url(tokenizer, start_pos);
1255 }
1256 b'\\' => {
1257 tokenizer.advance(1);
1258 if tokenizer.has_newline_at(0) {
1259 return consume_bad_url(tokenizer, start_pos)
1260 }
1261
1262 consume_escape_and_write(tokenizer, &mut string_bytes)
1264 },
1265 b'\0' => {
1266 tokenizer.advance(1);
1267 string_bytes.extend("\u{FFFD}".as_bytes());
1268 }
1269 b'\x80'..=b'\xBF' => {
1270 tokenizer.consume_continuation_byte();
1273 string_bytes.push(b);
1274 }
1275 b'\xF0'..=b'\xFF' => {
1276 tokenizer.consume_4byte_intro();
1279 string_bytes.push(b);
1280 }
1281 b => {
1284 tokenizer.advance(1);
1286 string_bytes.push(b)
1287 }
1288 }
1289 }
1290 UnquotedUrl(
1291 unsafe { from_utf8_release_unchecked(string_bytes) }.into(),
1293 )
1294 }
1295
1296 fn consume_url_end<'a>(
1297 tokenizer: &mut Tokenizer<'a>,
1298 start_pos: SourcePosition,
1299 string: CowRcStr<'a>,
1300 ) -> Token<'a> {
1301 while !tokenizer.is_eof() {
1302 match_byte! { tokenizer.next_byte_unchecked(),
1303 b')' => {
1304 tokenizer.advance(1);
1305 break
1306 }
1307 b' ' | b'\t' => { tokenizer.advance(1); }
1308 b'\n' | b'\x0C' | b'\r' => {
1309 tokenizer.consume_newline();
1310 }
1311 b => {
1312 tokenizer.consume_known_byte(b);
1313 return consume_bad_url(tokenizer, start_pos);
1314 }
1315 }
1316 }
1317 UnquotedUrl(string)
1318 }
1319
1320 fn consume_bad_url<'a>(tokenizer: &mut Tokenizer<'a>, start_pos: SourcePosition) -> Token<'a> {
1321 while !tokenizer.is_eof() {
1323 match_byte! { tokenizer.next_byte_unchecked(),
1324 b')' => {
1325 let contents = tokenizer.slice_from(start_pos).into();
1326 tokenizer.advance(1);
1327 return BadUrl(contents)
1328 }
1329 b'\\' => {
1330 tokenizer.advance(1);
1331 if matches!(tokenizer.next_byte(), Some(b')') | Some(b'\\')) {
1332 tokenizer.advance(1); }
1334 }
1335 b'\n' | b'\x0C' | b'\r' => {
1336 tokenizer.consume_newline();
1337 }
1338 b => {
1339 tokenizer.consume_known_byte(b);
1340 }
1341 }
1342 }
1343 BadUrl(tokenizer.slice_from(start_pos).into())
1344 }
1345}
1346
1347fn consume_hex_digits(tokenizer: &mut Tokenizer<'_>) -> (u32, u32) {
1349 let mut value = 0;
1350 let mut digits = 0;
1351 while digits < 6 && !tokenizer.is_eof() {
1352 match byte_to_hex_digit(tokenizer.next_byte_unchecked()) {
1353 Some(digit) => {
1354 value = value * 16 + digit;
1355 digits += 1;
1356 tokenizer.advance(1);
1357 }
1358 None => break,
1359 }
1360 }
1361 (value, digits)
1362}
1363
1364fn consume_escape_and_write(tokenizer: &mut Tokenizer, bytes: &mut Vec<u8>) {
1367 bytes.extend(
1368 consume_escape(tokenizer)
1369 .encode_utf8(&mut [0; 4])
1370 .as_bytes(),
1371 )
1372}
1373
1374fn consume_escape(tokenizer: &mut Tokenizer) -> char {
1378 if tokenizer.is_eof() {
1379 return '\u{FFFD}';
1380 } match_byte! { tokenizer.next_byte_unchecked(),
1382 b'0'..=b'9' | b'A'..=b'F' | b'a'..=b'f' => {
1383 let (c, _) = consume_hex_digits(tokenizer);
1384 if !tokenizer.is_eof() {
1385 match_byte! { tokenizer.next_byte_unchecked(),
1386 b' ' | b'\t' => {
1387 tokenizer.advance(1)
1388 }
1389 b'\n' | b'\x0C' | b'\r' => {
1390 tokenizer.consume_newline();
1391 }
1392 _ => {}
1393 }
1394 }
1395 static REPLACEMENT_CHAR: char = '\u{FFFD}';
1396 if c != 0 {
1397 let c = char::from_u32(c);
1398 c.unwrap_or(REPLACEMENT_CHAR)
1399 } else {
1400 REPLACEMENT_CHAR
1401 }
1402 },
1403 b'\0' => {
1404 tokenizer.advance(1);
1405 '\u{FFFD}'
1406 }
1407 _ => tokenizer.consume_char(),
1408 }
1409}