1use core::cell::{Cell, RefCell};
6
7use alloc::{boxed::Box, string::ToString, vec, vec::Vec};
8
9use crate::{
10 ast::{self, Ast, Span, Visitor},
11 either::Either,
12 hir::{self, Error, ErrorKind, Hir, HirKind},
13 unicode::{self, ClassQuery},
14};
15
16type Result<T> = core::result::Result<T, Error>;
17
18#[derive(Clone, Debug)]
20pub struct TranslatorBuilder {
21 utf8: bool,
22 line_terminator: u8,
23 flags: Flags,
24}
25
26impl Default for TranslatorBuilder {
27 fn default() -> TranslatorBuilder {
28 TranslatorBuilder::new()
29 }
30}
31
32impl TranslatorBuilder {
33 pub fn new() -> TranslatorBuilder {
35 TranslatorBuilder {
36 utf8: true,
37 line_terminator: b'\n',
38 flags: Flags::default(),
39 }
40 }
41
42 pub fn build(&self) -> Translator {
44 Translator {
45 stack: RefCell::new(vec![]),
46 flags: Cell::new(self.flags),
47 utf8: self.utf8,
48 line_terminator: self.line_terminator,
49 }
50 }
51
52 pub fn utf8(&mut self, yes: bool) -> &mut TranslatorBuilder {
68 self.utf8 = yes;
69 self
70 }
71
72 pub fn line_terminator(&mut self, byte: u8) -> &mut TranslatorBuilder {
93 self.line_terminator = byte;
94 self
95 }
96
97 pub fn case_insensitive(&mut self, yes: bool) -> &mut TranslatorBuilder {
99 self.flags.case_insensitive = if yes { Some(true) } else { None };
100 self
101 }
102
103 pub fn multi_line(&mut self, yes: bool) -> &mut TranslatorBuilder {
105 self.flags.multi_line = if yes { Some(true) } else { None };
106 self
107 }
108
109 pub fn dot_matches_new_line(
112 &mut self,
113 yes: bool,
114 ) -> &mut TranslatorBuilder {
115 self.flags.dot_matches_new_line = if yes { Some(true) } else { None };
116 self
117 }
118
119 pub fn crlf(&mut self, yes: bool) -> &mut TranslatorBuilder {
121 self.flags.crlf = if yes { Some(true) } else { None };
122 self
123 }
124
125 pub fn swap_greed(&mut self, yes: bool) -> &mut TranslatorBuilder {
127 self.flags.swap_greed = if yes { Some(true) } else { None };
128 self
129 }
130
131 pub fn unicode(&mut self, yes: bool) -> &mut TranslatorBuilder {
133 self.flags.unicode = if yes { None } else { Some(false) };
134 self
135 }
136}
137
138#[derive(Clone, Debug)]
147pub struct Translator {
148 stack: RefCell<Vec<HirFrame>>,
150 flags: Cell<Flags>,
152 utf8: bool,
154 line_terminator: u8,
156}
157
158impl Translator {
159 pub fn new() -> Translator {
161 TranslatorBuilder::new().build()
162 }
163
164 pub fn translate(&mut self, pattern: &str, ast: &Ast) -> Result<Hir> {
174 ast::visit(ast, TranslatorI::new(self, pattern))
175 }
176}
177
178#[derive(Clone, Debug)]
185enum HirFrame {
186 Expr(Hir),
190 Literal(Vec<u8>),
196 ClassUnicode(hir::ClassUnicode),
200 ClassBytes(hir::ClassBytes),
208 Repetition,
215 Group {
219 old_flags: Flags,
232 },
233 Concat,
237 Alternation,
241 AlternationBranch,
249}
250
251impl HirFrame {
252 fn unwrap_expr(self) -> Hir {
254 match self {
255 HirFrame::Expr(expr) => expr,
256 HirFrame::Literal(lit) => Hir::literal(lit),
257 _ => panic!("tried to unwrap expr from HirFrame, got: {self:?}"),
258 }
259 }
260
261 fn unwrap_class_unicode(self) -> hir::ClassUnicode {
264 match self {
265 HirFrame::ClassUnicode(cls) => cls,
266 _ => panic!(
267 "tried to unwrap Unicode class \
268 from HirFrame, got: {:?}",
269 self
270 ),
271 }
272 }
273
274 fn unwrap_class_bytes(self) -> hir::ClassBytes {
277 match self {
278 HirFrame::ClassBytes(cls) => cls,
279 _ => panic!(
280 "tried to unwrap byte class \
281 from HirFrame, got: {:?}",
282 self
283 ),
284 }
285 }
286
287 fn unwrap_repetition(self) {
290 match self {
291 HirFrame::Repetition => {}
292 _ => {
293 panic!(
294 "tried to unwrap repetition from HirFrame, got: {self:?}"
295 )
296 }
297 }
298 }
299
300 fn unwrap_group(self) -> Flags {
304 match self {
305 HirFrame::Group { old_flags } => old_flags,
306 _ => {
307 panic!("tried to unwrap group from HirFrame, got: {self:?}")
308 }
309 }
310 }
311
312 fn unwrap_alternation_pipe(self) {
315 match self {
316 HirFrame::AlternationBranch => {}
317 _ => {
318 panic!("tried to unwrap alt pipe from HirFrame, got: {self:?}")
319 }
320 }
321 }
322}
323
324impl<'t, 'p> Visitor for TranslatorI<'t, 'p> {
325 type Output = Hir;
326 type Err = Error;
327
328 fn finish(self) -> Result<Hir> {
329 assert_eq!(self.trans().stack.borrow().len(), 1);
331 Ok(self.pop().unwrap().unwrap_expr())
332 }
333
334 fn visit_pre(&mut self, ast: &Ast) -> Result<()> {
335 match *ast {
336 Ast::ClassBracketed(_) => {
337 if self.flags().unicode() {
338 let cls = hir::ClassUnicode::empty();
339 self.push(HirFrame::ClassUnicode(cls));
340 } else {
341 let cls = hir::ClassBytes::empty();
342 self.push(HirFrame::ClassBytes(cls));
343 }
344 }
345 Ast::Repetition(_) => self.push(HirFrame::Repetition),
346 Ast::Group(ref x) => {
347 let old_flags = x
348 .flags()
349 .map(|ast| self.set_flags(ast))
350 .unwrap_or_else(|| self.flags());
351 self.push(HirFrame::Group { old_flags });
352 }
353 Ast::Concat(_) => {
354 self.push(HirFrame::Concat);
355 }
356 Ast::Alternation(ref x) => {
357 self.push(HirFrame::Alternation);
358 if !x.asts.is_empty() {
359 self.push(HirFrame::AlternationBranch);
360 }
361 }
362 _ => {}
363 }
364 Ok(())
365 }
366
367 fn visit_post(&mut self, ast: &Ast) -> Result<()> {
368 match *ast {
369 Ast::Empty(_) => {
370 self.push(HirFrame::Expr(Hir::empty()));
371 }
372 Ast::Flags(ref x) => {
373 self.set_flags(&x.flags);
374 self.push(HirFrame::Expr(Hir::empty()));
384 }
385 Ast::Literal(ref x) => match self.ast_literal_to_scalar(x)? {
386 Either::Right(byte) => self.push_byte(byte),
387 Either::Left(ch) => match self.case_fold_char(x.span, ch)? {
388 None => self.push_char(ch),
389 Some(expr) => self.push(HirFrame::Expr(expr)),
390 },
391 },
392 Ast::Dot(ref span) => {
393 self.push(HirFrame::Expr(self.hir_dot(**span)?));
394 }
395 Ast::Assertion(ref x) => {
396 self.push(HirFrame::Expr(self.hir_assertion(x)?));
397 }
398 Ast::ClassPerl(ref x) => {
399 if self.flags().unicode() {
400 let cls = self.hir_perl_unicode_class(x)?;
401 let hcls = hir::Class::Unicode(cls);
402 self.push(HirFrame::Expr(Hir::class(hcls)));
403 } else {
404 let cls = self.hir_perl_byte_class(x)?;
405 let hcls = hir::Class::Bytes(cls);
406 self.push(HirFrame::Expr(Hir::class(hcls)));
407 }
408 }
409 Ast::ClassUnicode(ref x) => {
410 let cls = hir::Class::Unicode(self.hir_unicode_class(x)?);
411 self.push(HirFrame::Expr(Hir::class(cls)));
412 }
413 Ast::ClassBracketed(ref ast) => {
414 if self.flags().unicode() {
415 let mut cls = self.pop().unwrap().unwrap_class_unicode();
416 self.unicode_fold_and_negate(
417 &ast.span,
418 ast.negated,
419 &mut cls,
420 )?;
421 let expr = Hir::class(hir::Class::Unicode(cls));
422 self.push(HirFrame::Expr(expr));
423 } else {
424 let mut cls = self.pop().unwrap().unwrap_class_bytes();
425 self.bytes_fold_and_negate(
426 &ast.span,
427 ast.negated,
428 &mut cls,
429 )?;
430 let expr = Hir::class(hir::Class::Bytes(cls));
431 self.push(HirFrame::Expr(expr));
432 }
433 }
434 Ast::Repetition(ref x) => {
435 let expr = self.pop().unwrap().unwrap_expr();
436 self.pop().unwrap().unwrap_repetition();
437 self.push(HirFrame::Expr(self.hir_repetition(x, expr)));
438 }
439 Ast::Group(ref x) => {
440 let expr = self.pop().unwrap().unwrap_expr();
441 let old_flags = self.pop().unwrap().unwrap_group();
442 self.trans().flags.set(old_flags);
443 self.push(HirFrame::Expr(self.hir_capture(x, expr)));
444 }
445 Ast::Concat(_) => {
446 let mut exprs = vec![];
447 while let Some(expr) = self.pop_concat_expr() {
448 if !matches!(*expr.kind(), HirKind::Empty) {
449 exprs.push(expr);
450 }
451 }
452 exprs.reverse();
453 self.push(HirFrame::Expr(Hir::concat(exprs)));
454 }
455 Ast::Alternation(_) => {
456 let mut exprs = vec![];
457 while let Some(expr) = self.pop_alt_expr() {
458 self.pop().unwrap().unwrap_alternation_pipe();
459 exprs.push(expr);
460 }
461 exprs.reverse();
462 self.push(HirFrame::Expr(Hir::alternation(exprs)));
463 }
464 }
465 Ok(())
466 }
467
468 fn visit_alternation_in(&mut self) -> Result<()> {
469 self.push(HirFrame::AlternationBranch);
470 Ok(())
471 }
472
473 fn visit_class_set_item_pre(
474 &mut self,
475 ast: &ast::ClassSetItem,
476 ) -> Result<()> {
477 match *ast {
478 ast::ClassSetItem::Bracketed(_) => {
479 if self.flags().unicode() {
480 let cls = hir::ClassUnicode::empty();
481 self.push(HirFrame::ClassUnicode(cls));
482 } else {
483 let cls = hir::ClassBytes::empty();
484 self.push(HirFrame::ClassBytes(cls));
485 }
486 }
487 _ => {}
490 }
491 Ok(())
492 }
493
494 fn visit_class_set_item_post(
495 &mut self,
496 ast: &ast::ClassSetItem,
497 ) -> Result<()> {
498 match *ast {
499 ast::ClassSetItem::Empty(_) => {}
500 ast::ClassSetItem::Literal(ref x) => {
501 if self.flags().unicode() {
502 let mut cls = self.pop().unwrap().unwrap_class_unicode();
503 cls.push(hir::ClassUnicodeRange::new(x.c, x.c));
504 self.push(HirFrame::ClassUnicode(cls));
505 } else {
506 let mut cls = self.pop().unwrap().unwrap_class_bytes();
507 let byte = self.class_literal_byte(x)?;
508 cls.push(hir::ClassBytesRange::new(byte, byte));
509 self.push(HirFrame::ClassBytes(cls));
510 }
511 }
512 ast::ClassSetItem::Range(ref x) => {
513 if self.flags().unicode() {
514 let mut cls = self.pop().unwrap().unwrap_class_unicode();
515 cls.push(hir::ClassUnicodeRange::new(x.start.c, x.end.c));
516 self.push(HirFrame::ClassUnicode(cls));
517 } else {
518 let mut cls = self.pop().unwrap().unwrap_class_bytes();
519 let start = self.class_literal_byte(&x.start)?;
520 let end = self.class_literal_byte(&x.end)?;
521 cls.push(hir::ClassBytesRange::new(start, end));
522 self.push(HirFrame::ClassBytes(cls));
523 }
524 }
525 ast::ClassSetItem::Ascii(ref x) => {
526 if self.flags().unicode() {
527 let xcls = self.hir_ascii_unicode_class(x)?;
528 let mut cls = self.pop().unwrap().unwrap_class_unicode();
529 cls.union(&xcls);
530 self.push(HirFrame::ClassUnicode(cls));
531 } else {
532 let xcls = self.hir_ascii_byte_class(x)?;
533 let mut cls = self.pop().unwrap().unwrap_class_bytes();
534 cls.union(&xcls);
535 self.push(HirFrame::ClassBytes(cls));
536 }
537 }
538 ast::ClassSetItem::Unicode(ref x) => {
539 let xcls = self.hir_unicode_class(x)?;
540 let mut cls = self.pop().unwrap().unwrap_class_unicode();
541 cls.union(&xcls);
542 self.push(HirFrame::ClassUnicode(cls));
543 }
544 ast::ClassSetItem::Perl(ref x) => {
545 if self.flags().unicode() {
546 let xcls = self.hir_perl_unicode_class(x)?;
547 let mut cls = self.pop().unwrap().unwrap_class_unicode();
548 cls.union(&xcls);
549 self.push(HirFrame::ClassUnicode(cls));
550 } else {
551 let xcls = self.hir_perl_byte_class(x)?;
552 let mut cls = self.pop().unwrap().unwrap_class_bytes();
553 cls.union(&xcls);
554 self.push(HirFrame::ClassBytes(cls));
555 }
556 }
557 ast::ClassSetItem::Bracketed(ref ast) => {
558 if self.flags().unicode() {
559 let mut cls1 = self.pop().unwrap().unwrap_class_unicode();
560 self.unicode_fold_and_negate(
561 &ast.span,
562 ast.negated,
563 &mut cls1,
564 )?;
565
566 let mut cls2 = self.pop().unwrap().unwrap_class_unicode();
567 cls2.union(&cls1);
568 self.push(HirFrame::ClassUnicode(cls2));
569 } else {
570 let mut cls1 = self.pop().unwrap().unwrap_class_bytes();
571 self.bytes_fold_and_negate(
572 &ast.span,
573 ast.negated,
574 &mut cls1,
575 )?;
576
577 let mut cls2 = self.pop().unwrap().unwrap_class_bytes();
578 cls2.union(&cls1);
579 self.push(HirFrame::ClassBytes(cls2));
580 }
581 }
582 ast::ClassSetItem::Union(_) => {}
584 }
585 Ok(())
586 }
587
588 fn visit_class_set_binary_op_pre(
589 &mut self,
590 _op: &ast::ClassSetBinaryOp,
591 ) -> Result<()> {
592 if self.flags().unicode() {
593 let cls = hir::ClassUnicode::empty();
594 self.push(HirFrame::ClassUnicode(cls));
595 } else {
596 let cls = hir::ClassBytes::empty();
597 self.push(HirFrame::ClassBytes(cls));
598 }
599 Ok(())
600 }
601
602 fn visit_class_set_binary_op_in(
603 &mut self,
604 _op: &ast::ClassSetBinaryOp,
605 ) -> Result<()> {
606 if self.flags().unicode() {
607 let cls = hir::ClassUnicode::empty();
608 self.push(HirFrame::ClassUnicode(cls));
609 } else {
610 let cls = hir::ClassBytes::empty();
611 self.push(HirFrame::ClassBytes(cls));
612 }
613 Ok(())
614 }
615
616 fn visit_class_set_binary_op_post(
617 &mut self,
618 op: &ast::ClassSetBinaryOp,
619 ) -> Result<()> {
620 use crate::ast::ClassSetBinaryOpKind::*;
621
622 if self.flags().unicode() {
623 let mut rhs = self.pop().unwrap().unwrap_class_unicode();
624 let mut lhs = self.pop().unwrap().unwrap_class_unicode();
625 let mut cls = self.pop().unwrap().unwrap_class_unicode();
626 if self.flags().case_insensitive() {
627 rhs.try_case_fold_simple().map_err(|_| {
628 self.error(
629 op.rhs.span().clone(),
630 ErrorKind::UnicodeCaseUnavailable,
631 )
632 })?;
633 lhs.try_case_fold_simple().map_err(|_| {
634 self.error(
635 op.lhs.span().clone(),
636 ErrorKind::UnicodeCaseUnavailable,
637 )
638 })?;
639 }
640 match op.kind {
641 Intersection => lhs.intersect(&rhs),
642 Difference => lhs.difference(&rhs),
643 SymmetricDifference => lhs.symmetric_difference(&rhs),
644 }
645 cls.union(&lhs);
646 self.push(HirFrame::ClassUnicode(cls));
647 } else {
648 let mut rhs = self.pop().unwrap().unwrap_class_bytes();
649 let mut lhs = self.pop().unwrap().unwrap_class_bytes();
650 let mut cls = self.pop().unwrap().unwrap_class_bytes();
651 if self.flags().case_insensitive() {
652 rhs.case_fold_simple();
653 lhs.case_fold_simple();
654 }
655 match op.kind {
656 Intersection => lhs.intersect(&rhs),
657 Difference => lhs.difference(&rhs),
658 SymmetricDifference => lhs.symmetric_difference(&rhs),
659 }
660 cls.union(&lhs);
661 self.push(HirFrame::ClassBytes(cls));
662 }
663 Ok(())
664 }
665}
666
667#[derive(Clone, Debug)]
674struct TranslatorI<'t, 'p> {
675 trans: &'t Translator,
676 pattern: &'p str,
677}
678
679impl<'t, 'p> TranslatorI<'t, 'p> {
680 fn new(trans: &'t Translator, pattern: &'p str) -> TranslatorI<'t, 'p> {
682 TranslatorI { trans, pattern }
683 }
684
685 fn trans(&self) -> &Translator {
687 &self.trans
688 }
689
690 fn push(&self, frame: HirFrame) {
692 self.trans().stack.borrow_mut().push(frame);
693 }
694
695 fn push_char(&self, ch: char) {
701 let mut buf = [0; 4];
702 let bytes = ch.encode_utf8(&mut buf).as_bytes();
703 let mut stack = self.trans().stack.borrow_mut();
704 if let Some(HirFrame::Literal(ref mut literal)) = stack.last_mut() {
705 literal.extend_from_slice(bytes);
706 } else {
707 stack.push(HirFrame::Literal(bytes.to_vec()));
708 }
709 }
710
711 fn push_byte(&self, byte: u8) {
717 let mut stack = self.trans().stack.borrow_mut();
718 if let Some(HirFrame::Literal(ref mut literal)) = stack.last_mut() {
719 literal.push(byte);
720 } else {
721 stack.push(HirFrame::Literal(vec![byte]));
722 }
723 }
724
725 fn pop(&self) -> Option<HirFrame> {
727 self.trans().stack.borrow_mut().pop()
728 }
729
730 fn pop_concat_expr(&self) -> Option<Hir> {
735 let frame = self.pop()?;
736 match frame {
737 HirFrame::Concat => None,
738 HirFrame::Expr(expr) => Some(expr),
739 HirFrame::Literal(lit) => Some(Hir::literal(lit)),
740 HirFrame::ClassUnicode(_) => {
741 unreachable!("expected expr or concat, got Unicode class")
742 }
743 HirFrame::ClassBytes(_) => {
744 unreachable!("expected expr or concat, got byte class")
745 }
746 HirFrame::Repetition => {
747 unreachable!("expected expr or concat, got repetition")
748 }
749 HirFrame::Group { .. } => {
750 unreachable!("expected expr or concat, got group")
751 }
752 HirFrame::Alternation => {
753 unreachable!("expected expr or concat, got alt marker")
754 }
755 HirFrame::AlternationBranch => {
756 unreachable!("expected expr or concat, got alt branch marker")
757 }
758 }
759 }
760
761 fn pop_alt_expr(&self) -> Option<Hir> {
766 let frame = self.pop()?;
767 match frame {
768 HirFrame::Alternation => None,
769 HirFrame::Expr(expr) => Some(expr),
770 HirFrame::Literal(lit) => Some(Hir::literal(lit)),
771 HirFrame::ClassUnicode(_) => {
772 unreachable!("expected expr or alt, got Unicode class")
773 }
774 HirFrame::ClassBytes(_) => {
775 unreachable!("expected expr or alt, got byte class")
776 }
777 HirFrame::Repetition => {
778 unreachable!("expected expr or alt, got repetition")
779 }
780 HirFrame::Group { .. } => {
781 unreachable!("expected expr or alt, got group")
782 }
783 HirFrame::Concat => {
784 unreachable!("expected expr or alt, got concat marker")
785 }
786 HirFrame::AlternationBranch => {
787 unreachable!("expected expr or alt, got alt branch marker")
788 }
789 }
790 }
791
792 fn error(&self, span: Span, kind: ErrorKind) -> Error {
794 Error { kind, pattern: self.pattern.to_string(), span }
795 }
796
797 fn flags(&self) -> Flags {
799 self.trans().flags.get()
800 }
801
802 fn set_flags(&self, ast_flags: &ast::Flags) -> Flags {
805 let old_flags = self.flags();
806 let mut new_flags = Flags::from_ast(ast_flags);
807 new_flags.merge(&old_flags);
808 self.trans().flags.set(new_flags);
809 old_flags
810 }
811
812 fn ast_literal_to_scalar(
822 &self,
823 lit: &ast::Literal,
824 ) -> Result<Either<char, u8>> {
825 if self.flags().unicode() {
826 return Ok(Either::Left(lit.c));
827 }
828 let byte = match lit.byte() {
829 None => return Ok(Either::Left(lit.c)),
830 Some(byte) => byte,
831 };
832 if byte <= 0x7F {
833 return Ok(Either::Left(char::try_from(byte).unwrap()));
834 }
835 if self.trans().utf8 {
836 return Err(self.error(lit.span, ErrorKind::InvalidUtf8));
837 }
838 Ok(Either::Right(byte))
839 }
840
841 fn case_fold_char(&self, span: Span, c: char) -> Result<Option<Hir>> {
842 if !self.flags().case_insensitive() {
843 return Ok(None);
844 }
845 if self.flags().unicode() {
846 let map = unicode::SimpleCaseFolder::new()
848 .map(|f| f.overlaps(c, c))
849 .map_err(|_| {
850 self.error(span, ErrorKind::UnicodeCaseUnavailable)
851 })?;
852 if !map {
853 return Ok(None);
854 }
855 let mut cls =
856 hir::ClassUnicode::new(vec![hir::ClassUnicodeRange::new(
857 c, c,
858 )]);
859 cls.try_case_fold_simple().map_err(|_| {
860 self.error(span, ErrorKind::UnicodeCaseUnavailable)
861 })?;
862 Ok(Some(Hir::class(hir::Class::Unicode(cls))))
863 } else {
864 if !c.is_ascii() {
865 return Ok(None);
866 }
867 match c {
869 'A'..='Z' | 'a'..='z' => {}
870 _ => return Ok(None),
871 }
872 let mut cls =
873 hir::ClassBytes::new(vec![hir::ClassBytesRange::new(
874 u8::try_from(c).unwrap(),
877 u8::try_from(c).unwrap(),
878 )]);
879 cls.case_fold_simple();
880 Ok(Some(Hir::class(hir::Class::Bytes(cls))))
881 }
882 }
883
884 fn hir_dot(&self, span: Span) -> Result<Hir> {
885 let (utf8, lineterm, flags) =
886 (self.trans().utf8, self.trans().line_terminator, self.flags());
887 if utf8 && (!flags.unicode() || !lineterm.is_ascii()) {
888 return Err(self.error(span, ErrorKind::InvalidUtf8));
889 }
890 let dot = if flags.dot_matches_new_line() {
891 if flags.unicode() {
892 hir::Dot::AnyChar
893 } else {
894 hir::Dot::AnyByte
895 }
896 } else {
897 if flags.unicode() {
898 if flags.crlf() {
899 hir::Dot::AnyCharExceptCRLF
900 } else {
901 if !lineterm.is_ascii() {
902 return Err(
903 self.error(span, ErrorKind::InvalidLineTerminator)
904 );
905 }
906 hir::Dot::AnyCharExcept(char::from(lineterm))
907 }
908 } else {
909 if flags.crlf() {
910 hir::Dot::AnyByteExceptCRLF
911 } else {
912 hir::Dot::AnyByteExcept(lineterm)
913 }
914 }
915 };
916 Ok(Hir::dot(dot))
917 }
918
919 fn hir_assertion(&self, asst: &ast::Assertion) -> Result<Hir> {
920 let unicode = self.flags().unicode();
921 let multi_line = self.flags().multi_line();
922 let crlf = self.flags().crlf();
923 Ok(match asst.kind {
924 ast::AssertionKind::StartLine => Hir::look(if multi_line {
925 if crlf {
926 hir::Look::StartCRLF
927 } else {
928 hir::Look::StartLF
929 }
930 } else {
931 hir::Look::Start
932 }),
933 ast::AssertionKind::EndLine => Hir::look(if multi_line {
934 if crlf {
935 hir::Look::EndCRLF
936 } else {
937 hir::Look::EndLF
938 }
939 } else {
940 hir::Look::End
941 }),
942 ast::AssertionKind::StartText => Hir::look(hir::Look::Start),
943 ast::AssertionKind::EndText => Hir::look(hir::Look::End),
944 ast::AssertionKind::WordBoundary => Hir::look(if unicode {
945 hir::Look::WordUnicode
946 } else {
947 hir::Look::WordAscii
948 }),
949 ast::AssertionKind::NotWordBoundary => Hir::look(if unicode {
950 hir::Look::WordUnicodeNegate
951 } else {
952 hir::Look::WordAsciiNegate
953 }),
954 ast::AssertionKind::WordBoundaryStart
955 | ast::AssertionKind::WordBoundaryStartAngle => {
956 Hir::look(if unicode {
957 hir::Look::WordStartUnicode
958 } else {
959 hir::Look::WordStartAscii
960 })
961 }
962 ast::AssertionKind::WordBoundaryEnd
963 | ast::AssertionKind::WordBoundaryEndAngle => {
964 Hir::look(if unicode {
965 hir::Look::WordEndUnicode
966 } else {
967 hir::Look::WordEndAscii
968 })
969 }
970 ast::AssertionKind::WordBoundaryStartHalf => {
971 Hir::look(if unicode {
972 hir::Look::WordStartHalfUnicode
973 } else {
974 hir::Look::WordStartHalfAscii
975 })
976 }
977 ast::AssertionKind::WordBoundaryEndHalf => Hir::look(if unicode {
978 hir::Look::WordEndHalfUnicode
979 } else {
980 hir::Look::WordEndHalfAscii
981 }),
982 })
983 }
984
985 fn hir_capture(&self, group: &ast::Group, expr: Hir) -> Hir {
986 let (index, name) = match group.kind {
987 ast::GroupKind::CaptureIndex(index) => (index, None),
988 ast::GroupKind::CaptureName { ref name, .. } => {
989 (name.index, Some(name.name.clone().into_boxed_str()))
990 }
991 ast::GroupKind::NonCapturing(_) => return expr,
994 };
995 Hir::capture(hir::Capture { index, name, sub: Box::new(expr) })
996 }
997
998 fn hir_repetition(&self, rep: &ast::Repetition, expr: Hir) -> Hir {
999 let (min, max) = match rep.op.kind {
1000 ast::RepetitionKind::ZeroOrOne => (0, Some(1)),
1001 ast::RepetitionKind::ZeroOrMore => (0, None),
1002 ast::RepetitionKind::OneOrMore => (1, None),
1003 ast::RepetitionKind::Range(ast::RepetitionRange::Exactly(m)) => {
1004 (m, Some(m))
1005 }
1006 ast::RepetitionKind::Range(ast::RepetitionRange::AtLeast(m)) => {
1007 (m, None)
1008 }
1009 ast::RepetitionKind::Range(ast::RepetitionRange::Bounded(
1010 m,
1011 n,
1012 )) => (m, Some(n)),
1013 };
1014 let greedy =
1015 if self.flags().swap_greed() { !rep.greedy } else { rep.greedy };
1016 Hir::repetition(hir::Repetition {
1017 min,
1018 max,
1019 greedy,
1020 sub: Box::new(expr),
1021 })
1022 }
1023
1024 fn hir_unicode_class(
1025 &self,
1026 ast_class: &ast::ClassUnicode,
1027 ) -> Result<hir::ClassUnicode> {
1028 use crate::ast::ClassUnicodeKind::*;
1029
1030 if !self.flags().unicode() {
1031 return Err(
1032 self.error(ast_class.span, ErrorKind::UnicodeNotAllowed)
1033 );
1034 }
1035 let query = match ast_class.kind {
1036 OneLetter(name) => ClassQuery::OneLetter(name),
1037 Named(ref name) => ClassQuery::Binary(name),
1038 NamedValue { ref name, ref value, .. } => ClassQuery::ByValue {
1039 property_name: name,
1040 property_value: value,
1041 },
1042 };
1043 let mut result = self.convert_unicode_class_error(
1044 &ast_class.span,
1045 unicode::class(query),
1046 );
1047 if let Ok(ref mut class) = result {
1048 self.unicode_fold_and_negate(
1049 &ast_class.span,
1050 ast_class.negated,
1051 class,
1052 )?;
1053 }
1054 result
1055 }
1056
1057 fn hir_ascii_unicode_class(
1058 &self,
1059 ast: &ast::ClassAscii,
1060 ) -> Result<hir::ClassUnicode> {
1061 let mut cls = hir::ClassUnicode::new(
1062 ascii_class_as_chars(&ast.kind)
1063 .map(|(s, e)| hir::ClassUnicodeRange::new(s, e)),
1064 );
1065 self.unicode_fold_and_negate(&ast.span, ast.negated, &mut cls)?;
1066 Ok(cls)
1067 }
1068
1069 fn hir_ascii_byte_class(
1070 &self,
1071 ast: &ast::ClassAscii,
1072 ) -> Result<hir::ClassBytes> {
1073 let mut cls = hir::ClassBytes::new(
1074 ascii_class(&ast.kind)
1075 .map(|(s, e)| hir::ClassBytesRange::new(s, e)),
1076 );
1077 self.bytes_fold_and_negate(&ast.span, ast.negated, &mut cls)?;
1078 Ok(cls)
1079 }
1080
1081 fn hir_perl_unicode_class(
1082 &self,
1083 ast_class: &ast::ClassPerl,
1084 ) -> Result<hir::ClassUnicode> {
1085 use crate::ast::ClassPerlKind::*;
1086
1087 assert!(self.flags().unicode());
1088 let result = match ast_class.kind {
1089 Digit => unicode::perl_digit(),
1090 Space => unicode::perl_space(),
1091 Word => unicode::perl_word(),
1092 };
1093 let mut class =
1094 self.convert_unicode_class_error(&ast_class.span, result)?;
1095 if ast_class.negated {
1098 class.negate();
1099 }
1100 Ok(class)
1101 }
1102
1103 fn hir_perl_byte_class(
1104 &self,
1105 ast_class: &ast::ClassPerl,
1106 ) -> Result<hir::ClassBytes> {
1107 use crate::ast::ClassPerlKind::*;
1108
1109 assert!(!self.flags().unicode());
1110 let mut class = match ast_class.kind {
1111 Digit => hir_ascii_class_bytes(&ast::ClassAsciiKind::Digit),
1112 Space => hir_ascii_class_bytes(&ast::ClassAsciiKind::Space),
1113 Word => hir_ascii_class_bytes(&ast::ClassAsciiKind::Word),
1114 };
1115 if ast_class.negated {
1118 class.negate();
1119 }
1120 if self.trans().utf8 && !class.is_ascii() {
1124 return Err(self.error(ast_class.span, ErrorKind::InvalidUtf8));
1125 }
1126 Ok(class)
1127 }
1128
1129 fn convert_unicode_class_error(
1134 &self,
1135 span: &Span,
1136 result: core::result::Result<hir::ClassUnicode, unicode::Error>,
1137 ) -> Result<hir::ClassUnicode> {
1138 result.map_err(|err| {
1139 let sp = span.clone();
1140 match err {
1141 unicode::Error::PropertyNotFound => {
1142 self.error(sp, ErrorKind::UnicodePropertyNotFound)
1143 }
1144 unicode::Error::PropertyValueNotFound => {
1145 self.error(sp, ErrorKind::UnicodePropertyValueNotFound)
1146 }
1147 unicode::Error::PerlClassNotFound => {
1148 self.error(sp, ErrorKind::UnicodePerlClassNotFound)
1149 }
1150 }
1151 })
1152 }
1153
1154 fn unicode_fold_and_negate(
1155 &self,
1156 span: &Span,
1157 negated: bool,
1158 class: &mut hir::ClassUnicode,
1159 ) -> Result<()> {
1160 if self.flags().case_insensitive() {
1165 class.try_case_fold_simple().map_err(|_| {
1166 self.error(span.clone(), ErrorKind::UnicodeCaseUnavailable)
1167 })?;
1168 }
1169 if negated {
1170 class.negate();
1171 }
1172 Ok(())
1173 }
1174
1175 fn bytes_fold_and_negate(
1176 &self,
1177 span: &Span,
1178 negated: bool,
1179 class: &mut hir::ClassBytes,
1180 ) -> Result<()> {
1181 if self.flags().case_insensitive() {
1186 class.case_fold_simple();
1187 }
1188 if negated {
1189 class.negate();
1190 }
1191 if self.trans().utf8 && !class.is_ascii() {
1192 return Err(self.error(span.clone(), ErrorKind::InvalidUtf8));
1193 }
1194 Ok(())
1195 }
1196
1197 fn class_literal_byte(&self, ast: &ast::Literal) -> Result<u8> {
1200 match self.ast_literal_to_scalar(ast)? {
1201 Either::Right(byte) => Ok(byte),
1202 Either::Left(ch) => {
1203 if ch.is_ascii() {
1204 Ok(u8::try_from(ch).unwrap())
1205 } else {
1206 Err(self.error(ast.span, ErrorKind::UnicodeNotAllowed))
1210 }
1211 }
1212 }
1213 }
1214}
1215
1216#[derive(Clone, Copy, Debug, Default)]
1222struct Flags {
1223 case_insensitive: Option<bool>,
1224 multi_line: Option<bool>,
1225 dot_matches_new_line: Option<bool>,
1226 swap_greed: Option<bool>,
1227 unicode: Option<bool>,
1228 crlf: Option<bool>,
1229 }
1232
1233impl Flags {
1234 fn from_ast(ast: &ast::Flags) -> Flags {
1235 let mut flags = Flags::default();
1236 let mut enable = true;
1237 for item in &ast.items {
1238 match item.kind {
1239 ast::FlagsItemKind::Negation => {
1240 enable = false;
1241 }
1242 ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive) => {
1243 flags.case_insensitive = Some(enable);
1244 }
1245 ast::FlagsItemKind::Flag(ast::Flag::MultiLine) => {
1246 flags.multi_line = Some(enable);
1247 }
1248 ast::FlagsItemKind::Flag(ast::Flag::DotMatchesNewLine) => {
1249 flags.dot_matches_new_line = Some(enable);
1250 }
1251 ast::FlagsItemKind::Flag(ast::Flag::SwapGreed) => {
1252 flags.swap_greed = Some(enable);
1253 }
1254 ast::FlagsItemKind::Flag(ast::Flag::Unicode) => {
1255 flags.unicode = Some(enable);
1256 }
1257 ast::FlagsItemKind::Flag(ast::Flag::CRLF) => {
1258 flags.crlf = Some(enable);
1259 }
1260 ast::FlagsItemKind::Flag(ast::Flag::IgnoreWhitespace) => {}
1261 }
1262 }
1263 flags
1264 }
1265
1266 fn merge(&mut self, previous: &Flags) {
1267 if self.case_insensitive.is_none() {
1268 self.case_insensitive = previous.case_insensitive;
1269 }
1270 if self.multi_line.is_none() {
1271 self.multi_line = previous.multi_line;
1272 }
1273 if self.dot_matches_new_line.is_none() {
1274 self.dot_matches_new_line = previous.dot_matches_new_line;
1275 }
1276 if self.swap_greed.is_none() {
1277 self.swap_greed = previous.swap_greed;
1278 }
1279 if self.unicode.is_none() {
1280 self.unicode = previous.unicode;
1281 }
1282 if self.crlf.is_none() {
1283 self.crlf = previous.crlf;
1284 }
1285 }
1286
1287 fn case_insensitive(&self) -> bool {
1288 self.case_insensitive.unwrap_or(false)
1289 }
1290
1291 fn multi_line(&self) -> bool {
1292 self.multi_line.unwrap_or(false)
1293 }
1294
1295 fn dot_matches_new_line(&self) -> bool {
1296 self.dot_matches_new_line.unwrap_or(false)
1297 }
1298
1299 fn swap_greed(&self) -> bool {
1300 self.swap_greed.unwrap_or(false)
1301 }
1302
1303 fn unicode(&self) -> bool {
1304 self.unicode.unwrap_or(true)
1305 }
1306
1307 fn crlf(&self) -> bool {
1308 self.crlf.unwrap_or(false)
1309 }
1310}
1311
1312fn hir_ascii_class_bytes(kind: &ast::ClassAsciiKind) -> hir::ClassBytes {
1313 let ranges: Vec<_> = ascii_class(kind)
1314 .map(|(s, e)| hir::ClassBytesRange::new(s, e))
1315 .collect();
1316 hir::ClassBytes::new(ranges)
1317}
1318
1319fn ascii_class(kind: &ast::ClassAsciiKind) -> impl Iterator<Item = (u8, u8)> {
1320 use crate::ast::ClassAsciiKind::*;
1321
1322 let slice: &'static [(u8, u8)] = match *kind {
1323 Alnum => &[(b'0', b'9'), (b'A', b'Z'), (b'a', b'z')],
1324 Alpha => &[(b'A', b'Z'), (b'a', b'z')],
1325 Ascii => &[(b'\x00', b'\x7F')],
1326 Blank => &[(b'\t', b'\t'), (b' ', b' ')],
1327 Cntrl => &[(b'\x00', b'\x1F'), (b'\x7F', b'\x7F')],
1328 Digit => &[(b'0', b'9')],
1329 Graph => &[(b'!', b'~')],
1330 Lower => &[(b'a', b'z')],
1331 Print => &[(b' ', b'~')],
1332 Punct => &[(b'!', b'/'), (b':', b'@'), (b'[', b'`'), (b'{', b'~')],
1333 Space => &[
1334 (b'\t', b'\t'),
1335 (b'\n', b'\n'),
1336 (b'\x0B', b'\x0B'),
1337 (b'\x0C', b'\x0C'),
1338 (b'\r', b'\r'),
1339 (b' ', b' '),
1340 ],
1341 Upper => &[(b'A', b'Z')],
1342 Word => &[(b'0', b'9'), (b'A', b'Z'), (b'_', b'_'), (b'a', b'z')],
1343 Xdigit => &[(b'0', b'9'), (b'A', b'F'), (b'a', b'f')],
1344 };
1345 slice.iter().copied()
1346}
1347
1348fn ascii_class_as_chars(
1349 kind: &ast::ClassAsciiKind,
1350) -> impl Iterator<Item = (char, char)> {
1351 ascii_class(kind).map(|(s, e)| (char::from(s), char::from(e)))
1352}
1353
1354#[cfg(test)]
1355mod tests {
1356 use crate::{
1357 ast::{parse::ParserBuilder, Position},
1358 hir::{Look, Properties},
1359 };
1360
1361 use super::*;
1362
1363 #[derive(Clone, Debug)]
1367 struct TestError {
1368 span: Span,
1369 kind: hir::ErrorKind,
1370 }
1371
1372 impl PartialEq<hir::Error> for TestError {
1373 fn eq(&self, other: &hir::Error) -> bool {
1374 self.span == other.span && self.kind == other.kind
1375 }
1376 }
1377
1378 impl PartialEq<TestError> for hir::Error {
1379 fn eq(&self, other: &TestError) -> bool {
1380 self.span == other.span && self.kind == other.kind
1381 }
1382 }
1383
1384 fn parse(pattern: &str) -> Ast {
1385 ParserBuilder::new().octal(true).build().parse(pattern).unwrap()
1386 }
1387
1388 fn t(pattern: &str) -> Hir {
1389 TranslatorBuilder::new()
1390 .utf8(true)
1391 .build()
1392 .translate(pattern, &parse(pattern))
1393 .unwrap()
1394 }
1395
1396 fn t_err(pattern: &str) -> hir::Error {
1397 TranslatorBuilder::new()
1398 .utf8(true)
1399 .build()
1400 .translate(pattern, &parse(pattern))
1401 .unwrap_err()
1402 }
1403
1404 fn t_bytes(pattern: &str) -> Hir {
1405 TranslatorBuilder::new()
1406 .utf8(false)
1407 .build()
1408 .translate(pattern, &parse(pattern))
1409 .unwrap()
1410 }
1411
1412 fn props(pattern: &str) -> Properties {
1413 t(pattern).properties().clone()
1414 }
1415
1416 fn props_bytes(pattern: &str) -> Properties {
1417 t_bytes(pattern).properties().clone()
1418 }
1419
1420 fn hir_lit(s: &str) -> Hir {
1421 hir_blit(s.as_bytes())
1422 }
1423
1424 fn hir_blit(s: &[u8]) -> Hir {
1425 Hir::literal(s)
1426 }
1427
1428 fn hir_capture(index: u32, expr: Hir) -> Hir {
1429 Hir::capture(hir::Capture { index, name: None, sub: Box::new(expr) })
1430 }
1431
1432 fn hir_capture_name(index: u32, name: &str, expr: Hir) -> Hir {
1433 Hir::capture(hir::Capture {
1434 index,
1435 name: Some(name.into()),
1436 sub: Box::new(expr),
1437 })
1438 }
1439
1440 fn hir_quest(greedy: bool, expr: Hir) -> Hir {
1441 Hir::repetition(hir::Repetition {
1442 min: 0,
1443 max: Some(1),
1444 greedy,
1445 sub: Box::new(expr),
1446 })
1447 }
1448
1449 fn hir_star(greedy: bool, expr: Hir) -> Hir {
1450 Hir::repetition(hir::Repetition {
1451 min: 0,
1452 max: None,
1453 greedy,
1454 sub: Box::new(expr),
1455 })
1456 }
1457
1458 fn hir_plus(greedy: bool, expr: Hir) -> Hir {
1459 Hir::repetition(hir::Repetition {
1460 min: 1,
1461 max: None,
1462 greedy,
1463 sub: Box::new(expr),
1464 })
1465 }
1466
1467 fn hir_range(greedy: bool, min: u32, max: Option<u32>, expr: Hir) -> Hir {
1468 Hir::repetition(hir::Repetition {
1469 min,
1470 max,
1471 greedy,
1472 sub: Box::new(expr),
1473 })
1474 }
1475
1476 fn hir_alt(alts: Vec<Hir>) -> Hir {
1477 Hir::alternation(alts)
1478 }
1479
1480 fn hir_cat(exprs: Vec<Hir>) -> Hir {
1481 Hir::concat(exprs)
1482 }
1483
1484 #[allow(dead_code)]
1485 fn hir_uclass_query(query: ClassQuery<'_>) -> Hir {
1486 Hir::class(hir::Class::Unicode(unicode::class(query).unwrap()))
1487 }
1488
1489 #[allow(dead_code)]
1490 fn hir_uclass_perl_word() -> Hir {
1491 Hir::class(hir::Class::Unicode(unicode::perl_word().unwrap()))
1492 }
1493
1494 fn hir_ascii_uclass(kind: &ast::ClassAsciiKind) -> Hir {
1495 Hir::class(hir::Class::Unicode(hir::ClassUnicode::new(
1496 ascii_class_as_chars(kind)
1497 .map(|(s, e)| hir::ClassUnicodeRange::new(s, e)),
1498 )))
1499 }
1500
1501 fn hir_ascii_bclass(kind: &ast::ClassAsciiKind) -> Hir {
1502 Hir::class(hir::Class::Bytes(hir::ClassBytes::new(
1503 ascii_class(kind).map(|(s, e)| hir::ClassBytesRange::new(s, e)),
1504 )))
1505 }
1506
1507 fn hir_uclass(ranges: &[(char, char)]) -> Hir {
1508 Hir::class(uclass(ranges))
1509 }
1510
1511 fn hir_bclass(ranges: &[(u8, u8)]) -> Hir {
1512 Hir::class(bclass(ranges))
1513 }
1514
1515 fn hir_case_fold(expr: Hir) -> Hir {
1516 match expr.into_kind() {
1517 HirKind::Class(mut cls) => {
1518 cls.case_fold_simple();
1519 Hir::class(cls)
1520 }
1521 _ => panic!("cannot case fold non-class Hir expr"),
1522 }
1523 }
1524
1525 fn hir_negate(expr: Hir) -> Hir {
1526 match expr.into_kind() {
1527 HirKind::Class(mut cls) => {
1528 cls.negate();
1529 Hir::class(cls)
1530 }
1531 _ => panic!("cannot negate non-class Hir expr"),
1532 }
1533 }
1534
1535 fn uclass(ranges: &[(char, char)]) -> hir::Class {
1536 let ranges: Vec<hir::ClassUnicodeRange> = ranges
1537 .iter()
1538 .map(|&(s, e)| hir::ClassUnicodeRange::new(s, e))
1539 .collect();
1540 hir::Class::Unicode(hir::ClassUnicode::new(ranges))
1541 }
1542
1543 fn bclass(ranges: &[(u8, u8)]) -> hir::Class {
1544 let ranges: Vec<hir::ClassBytesRange> = ranges
1545 .iter()
1546 .map(|&(s, e)| hir::ClassBytesRange::new(s, e))
1547 .collect();
1548 hir::Class::Bytes(hir::ClassBytes::new(ranges))
1549 }
1550
1551 #[cfg(feature = "unicode-case")]
1552 fn class_case_fold(mut cls: hir::Class) -> Hir {
1553 cls.case_fold_simple();
1554 Hir::class(cls)
1555 }
1556
1557 fn class_negate(mut cls: hir::Class) -> Hir {
1558 cls.negate();
1559 Hir::class(cls)
1560 }
1561
1562 #[allow(dead_code)]
1563 fn hir_union(expr1: Hir, expr2: Hir) -> Hir {
1564 use crate::hir::Class::{Bytes, Unicode};
1565
1566 match (expr1.into_kind(), expr2.into_kind()) {
1567 (HirKind::Class(Unicode(mut c1)), HirKind::Class(Unicode(c2))) => {
1568 c1.union(&c2);
1569 Hir::class(hir::Class::Unicode(c1))
1570 }
1571 (HirKind::Class(Bytes(mut c1)), HirKind::Class(Bytes(c2))) => {
1572 c1.union(&c2);
1573 Hir::class(hir::Class::Bytes(c1))
1574 }
1575 _ => panic!("cannot union non-class Hir exprs"),
1576 }
1577 }
1578
1579 #[allow(dead_code)]
1580 fn hir_difference(expr1: Hir, expr2: Hir) -> Hir {
1581 use crate::hir::Class::{Bytes, Unicode};
1582
1583 match (expr1.into_kind(), expr2.into_kind()) {
1584 (HirKind::Class(Unicode(mut c1)), HirKind::Class(Unicode(c2))) => {
1585 c1.difference(&c2);
1586 Hir::class(hir::Class::Unicode(c1))
1587 }
1588 (HirKind::Class(Bytes(mut c1)), HirKind::Class(Bytes(c2))) => {
1589 c1.difference(&c2);
1590 Hir::class(hir::Class::Bytes(c1))
1591 }
1592 _ => panic!("cannot difference non-class Hir exprs"),
1593 }
1594 }
1595
1596 fn hir_look(look: hir::Look) -> Hir {
1597 Hir::look(look)
1598 }
1599
1600 #[test]
1601 fn empty() {
1602 assert_eq!(t(""), Hir::empty());
1603 assert_eq!(t("(?i)"), Hir::empty());
1604 assert_eq!(t("()"), hir_capture(1, Hir::empty()));
1605 assert_eq!(t("(?:)"), Hir::empty());
1606 assert_eq!(t("(?P<wat>)"), hir_capture_name(1, "wat", Hir::empty()));
1607 assert_eq!(t("|"), hir_alt(vec![Hir::empty(), Hir::empty()]));
1608 assert_eq!(
1609 t("()|()"),
1610 hir_alt(vec![
1611 hir_capture(1, Hir::empty()),
1612 hir_capture(2, Hir::empty()),
1613 ])
1614 );
1615 assert_eq!(
1616 t("(|b)"),
1617 hir_capture(1, hir_alt(vec![Hir::empty(), hir_lit("b"),]))
1618 );
1619 assert_eq!(
1620 t("(a|)"),
1621 hir_capture(1, hir_alt(vec![hir_lit("a"), Hir::empty(),]))
1622 );
1623 assert_eq!(
1624 t("(a||c)"),
1625 hir_capture(
1626 1,
1627 hir_alt(vec![hir_lit("a"), Hir::empty(), hir_lit("c"),])
1628 )
1629 );
1630 assert_eq!(
1631 t("(||)"),
1632 hir_capture(
1633 1,
1634 hir_alt(vec![Hir::empty(), Hir::empty(), Hir::empty(),])
1635 )
1636 );
1637 }
1638
1639 #[test]
1640 fn literal() {
1641 assert_eq!(t("a"), hir_lit("a"));
1642 assert_eq!(t("(?-u)a"), hir_lit("a"));
1643 assert_eq!(t("☃"), hir_lit("☃"));
1644 assert_eq!(t("abcd"), hir_lit("abcd"));
1645
1646 assert_eq!(t_bytes("(?-u)a"), hir_lit("a"));
1647 assert_eq!(t_bytes("(?-u)\x61"), hir_lit("a"));
1648 assert_eq!(t_bytes(r"(?-u)\x61"), hir_lit("a"));
1649 assert_eq!(t_bytes(r"(?-u)\xFF"), hir_blit(b"\xFF"));
1650
1651 assert_eq!(t("(?-u)☃"), hir_lit("☃"));
1652 assert_eq!(
1653 t_err(r"(?-u)\xFF"),
1654 TestError {
1655 kind: hir::ErrorKind::InvalidUtf8,
1656 span: Span::new(
1657 Position::new(5, 1, 6),
1658 Position::new(9, 1, 10)
1659 ),
1660 }
1661 );
1662 }
1663
1664 #[test]
1665 fn literal_case_insensitive() {
1666 #[cfg(feature = "unicode-case")]
1667 assert_eq!(t("(?i)a"), hir_uclass(&[('A', 'A'), ('a', 'a'),]));
1668 #[cfg(feature = "unicode-case")]
1669 assert_eq!(t("(?i:a)"), hir_uclass(&[('A', 'A'), ('a', 'a')]));
1670 #[cfg(feature = "unicode-case")]
1671 assert_eq!(
1672 t("a(?i)a(?-i)a"),
1673 hir_cat(vec![
1674 hir_lit("a"),
1675 hir_uclass(&[('A', 'A'), ('a', 'a')]),
1676 hir_lit("a"),
1677 ])
1678 );
1679 #[cfg(feature = "unicode-case")]
1680 assert_eq!(
1681 t("(?i)ab@c"),
1682 hir_cat(vec![
1683 hir_uclass(&[('A', 'A'), ('a', 'a')]),
1684 hir_uclass(&[('B', 'B'), ('b', 'b')]),
1685 hir_lit("@"),
1686 hir_uclass(&[('C', 'C'), ('c', 'c')]),
1687 ])
1688 );
1689 #[cfg(feature = "unicode-case")]
1690 assert_eq!(
1691 t("(?i)β"),
1692 hir_uclass(&[('Β', 'Β'), ('β', 'β'), ('ϐ', 'ϐ'),])
1693 );
1694
1695 assert_eq!(t("(?i-u)a"), hir_bclass(&[(b'A', b'A'), (b'a', b'a'),]));
1696 #[cfg(feature = "unicode-case")]
1697 assert_eq!(
1698 t("(?-u)a(?i)a(?-i)a"),
1699 hir_cat(vec![
1700 hir_lit("a"),
1701 hir_bclass(&[(b'A', b'A'), (b'a', b'a')]),
1702 hir_lit("a"),
1703 ])
1704 );
1705 assert_eq!(
1706 t("(?i-u)ab@c"),
1707 hir_cat(vec![
1708 hir_bclass(&[(b'A', b'A'), (b'a', b'a')]),
1709 hir_bclass(&[(b'B', b'B'), (b'b', b'b')]),
1710 hir_lit("@"),
1711 hir_bclass(&[(b'C', b'C'), (b'c', b'c')]),
1712 ])
1713 );
1714
1715 assert_eq!(
1716 t_bytes("(?i-u)a"),
1717 hir_bclass(&[(b'A', b'A'), (b'a', b'a'),])
1718 );
1719 assert_eq!(
1720 t_bytes("(?i-u)\x61"),
1721 hir_bclass(&[(b'A', b'A'), (b'a', b'a'),])
1722 );
1723 assert_eq!(
1724 t_bytes(r"(?i-u)\x61"),
1725 hir_bclass(&[(b'A', b'A'), (b'a', b'a'),])
1726 );
1727 assert_eq!(t_bytes(r"(?i-u)\xFF"), hir_blit(b"\xFF"));
1728
1729 assert_eq!(t("(?i-u)β"), hir_lit("β"),);
1730 }
1731
1732 #[test]
1733 fn dot() {
1734 assert_eq!(
1735 t("."),
1736 hir_uclass(&[('\0', '\t'), ('\x0B', '\u{10FFFF}')])
1737 );
1738 assert_eq!(
1739 t("(?R)."),
1740 hir_uclass(&[
1741 ('\0', '\t'),
1742 ('\x0B', '\x0C'),
1743 ('\x0E', '\u{10FFFF}'),
1744 ])
1745 );
1746 assert_eq!(t("(?s)."), hir_uclass(&[('\0', '\u{10FFFF}')]));
1747 assert_eq!(t("(?Rs)."), hir_uclass(&[('\0', '\u{10FFFF}')]));
1748 assert_eq!(
1749 t_bytes("(?-u)."),
1750 hir_bclass(&[(b'\0', b'\t'), (b'\x0B', b'\xFF')])
1751 );
1752 assert_eq!(
1753 t_bytes("(?R-u)."),
1754 hir_bclass(&[
1755 (b'\0', b'\t'),
1756 (b'\x0B', b'\x0C'),
1757 (b'\x0E', b'\xFF'),
1758 ])
1759 );
1760 assert_eq!(t_bytes("(?s-u)."), hir_bclass(&[(b'\0', b'\xFF'),]));
1761 assert_eq!(t_bytes("(?Rs-u)."), hir_bclass(&[(b'\0', b'\xFF'),]));
1762
1763 assert_eq!(
1765 t_err("(?-u)."),
1766 TestError {
1767 kind: hir::ErrorKind::InvalidUtf8,
1768 span: Span::new(
1769 Position::new(5, 1, 6),
1770 Position::new(6, 1, 7)
1771 ),
1772 }
1773 );
1774 assert_eq!(
1775 t_err("(?R-u)."),
1776 TestError {
1777 kind: hir::ErrorKind::InvalidUtf8,
1778 span: Span::new(
1779 Position::new(6, 1, 7),
1780 Position::new(7, 1, 8)
1781 ),
1782 }
1783 );
1784 assert_eq!(
1785 t_err("(?s-u)."),
1786 TestError {
1787 kind: hir::ErrorKind::InvalidUtf8,
1788 span: Span::new(
1789 Position::new(6, 1, 7),
1790 Position::new(7, 1, 8)
1791 ),
1792 }
1793 );
1794 assert_eq!(
1795 t_err("(?Rs-u)."),
1796 TestError {
1797 kind: hir::ErrorKind::InvalidUtf8,
1798 span: Span::new(
1799 Position::new(7, 1, 8),
1800 Position::new(8, 1, 9)
1801 ),
1802 }
1803 );
1804 }
1805
1806 #[test]
1807 fn assertions() {
1808 assert_eq!(t("^"), hir_look(hir::Look::Start));
1809 assert_eq!(t("$"), hir_look(hir::Look::End));
1810 assert_eq!(t(r"\A"), hir_look(hir::Look::Start));
1811 assert_eq!(t(r"\z"), hir_look(hir::Look::End));
1812 assert_eq!(t("(?m)^"), hir_look(hir::Look::StartLF));
1813 assert_eq!(t("(?m)$"), hir_look(hir::Look::EndLF));
1814 assert_eq!(t(r"(?m)\A"), hir_look(hir::Look::Start));
1815 assert_eq!(t(r"(?m)\z"), hir_look(hir::Look::End));
1816
1817 assert_eq!(t(r"\b"), hir_look(hir::Look::WordUnicode));
1818 assert_eq!(t(r"\B"), hir_look(hir::Look::WordUnicodeNegate));
1819 assert_eq!(t(r"(?-u)\b"), hir_look(hir::Look::WordAscii));
1820 assert_eq!(t(r"(?-u)\B"), hir_look(hir::Look::WordAsciiNegate));
1821 }
1822
1823 #[test]
1824 fn group() {
1825 assert_eq!(t("(a)"), hir_capture(1, hir_lit("a")));
1826 assert_eq!(
1827 t("(a)(b)"),
1828 hir_cat(vec![
1829 hir_capture(1, hir_lit("a")),
1830 hir_capture(2, hir_lit("b")),
1831 ])
1832 );
1833 assert_eq!(
1834 t("(a)|(b)"),
1835 hir_alt(vec![
1836 hir_capture(1, hir_lit("a")),
1837 hir_capture(2, hir_lit("b")),
1838 ])
1839 );
1840 assert_eq!(t("(?P<foo>)"), hir_capture_name(1, "foo", Hir::empty()));
1841 assert_eq!(t("(?P<foo>a)"), hir_capture_name(1, "foo", hir_lit("a")));
1842 assert_eq!(
1843 t("(?P<foo>a)(?P<bar>b)"),
1844 hir_cat(vec![
1845 hir_capture_name(1, "foo", hir_lit("a")),
1846 hir_capture_name(2, "bar", hir_lit("b")),
1847 ])
1848 );
1849 assert_eq!(t("(?:)"), Hir::empty());
1850 assert_eq!(t("(?:a)"), hir_lit("a"));
1851 assert_eq!(
1852 t("(?:a)(b)"),
1853 hir_cat(vec![hir_lit("a"), hir_capture(1, hir_lit("b")),])
1854 );
1855 assert_eq!(
1856 t("(a)(?:b)(c)"),
1857 hir_cat(vec![
1858 hir_capture(1, hir_lit("a")),
1859 hir_lit("b"),
1860 hir_capture(2, hir_lit("c")),
1861 ])
1862 );
1863 assert_eq!(
1864 t("(a)(?P<foo>b)(c)"),
1865 hir_cat(vec![
1866 hir_capture(1, hir_lit("a")),
1867 hir_capture_name(2, "foo", hir_lit("b")),
1868 hir_capture(3, hir_lit("c")),
1869 ])
1870 );
1871 assert_eq!(t("()"), hir_capture(1, Hir::empty()));
1872 assert_eq!(t("((?i))"), hir_capture(1, Hir::empty()));
1873 assert_eq!(t("((?x))"), hir_capture(1, Hir::empty()));
1874 assert_eq!(
1875 t("(((?x)))"),
1876 hir_capture(1, hir_capture(2, Hir::empty()))
1877 );
1878 }
1879
1880 #[test]
1881 fn line_anchors() {
1882 assert_eq!(t("^"), hir_look(hir::Look::Start));
1883 assert_eq!(t("$"), hir_look(hir::Look::End));
1884 assert_eq!(t(r"\A"), hir_look(hir::Look::Start));
1885 assert_eq!(t(r"\z"), hir_look(hir::Look::End));
1886
1887 assert_eq!(t(r"(?m)\A"), hir_look(hir::Look::Start));
1888 assert_eq!(t(r"(?m)\z"), hir_look(hir::Look::End));
1889 assert_eq!(t("(?m)^"), hir_look(hir::Look::StartLF));
1890 assert_eq!(t("(?m)$"), hir_look(hir::Look::EndLF));
1891
1892 assert_eq!(t(r"(?R)\A"), hir_look(hir::Look::Start));
1893 assert_eq!(t(r"(?R)\z"), hir_look(hir::Look::End));
1894 assert_eq!(t("(?R)^"), hir_look(hir::Look::Start));
1895 assert_eq!(t("(?R)$"), hir_look(hir::Look::End));
1896
1897 assert_eq!(t(r"(?Rm)\A"), hir_look(hir::Look::Start));
1898 assert_eq!(t(r"(?Rm)\z"), hir_look(hir::Look::End));
1899 assert_eq!(t("(?Rm)^"), hir_look(hir::Look::StartCRLF));
1900 assert_eq!(t("(?Rm)$"), hir_look(hir::Look::EndCRLF));
1901 }
1902
1903 #[test]
1904 fn flags() {
1905 #[cfg(feature = "unicode-case")]
1906 assert_eq!(
1907 t("(?i:a)a"),
1908 hir_cat(
1909 vec![hir_uclass(&[('A', 'A'), ('a', 'a')]), hir_lit("a"),]
1910 )
1911 );
1912 assert_eq!(
1913 t("(?i-u:a)β"),
1914 hir_cat(vec![
1915 hir_bclass(&[(b'A', b'A'), (b'a', b'a')]),
1916 hir_lit("β"),
1917 ])
1918 );
1919 assert_eq!(
1920 t("(?:(?i-u)a)b"),
1921 hir_cat(vec![
1922 hir_bclass(&[(b'A', b'A'), (b'a', b'a')]),
1923 hir_lit("b"),
1924 ])
1925 );
1926 assert_eq!(
1927 t("((?i-u)a)b"),
1928 hir_cat(vec![
1929 hir_capture(1, hir_bclass(&[(b'A', b'A'), (b'a', b'a')])),
1930 hir_lit("b"),
1931 ])
1932 );
1933 #[cfg(feature = "unicode-case")]
1934 assert_eq!(
1935 t("(?i)(?-i:a)a"),
1936 hir_cat(
1937 vec![hir_lit("a"), hir_uclass(&[('A', 'A'), ('a', 'a')]),]
1938 )
1939 );
1940 #[cfg(feature = "unicode-case")]
1941 assert_eq!(
1942 t("(?im)a^"),
1943 hir_cat(vec![
1944 hir_uclass(&[('A', 'A'), ('a', 'a')]),
1945 hir_look(hir::Look::StartLF),
1946 ])
1947 );
1948 #[cfg(feature = "unicode-case")]
1949 assert_eq!(
1950 t("(?im)a^(?i-m)a^"),
1951 hir_cat(vec![
1952 hir_uclass(&[('A', 'A'), ('a', 'a')]),
1953 hir_look(hir::Look::StartLF),
1954 hir_uclass(&[('A', 'A'), ('a', 'a')]),
1955 hir_look(hir::Look::Start),
1956 ])
1957 );
1958 assert_eq!(
1959 t("(?U)a*a*?(?-U)a*a*?"),
1960 hir_cat(vec![
1961 hir_star(false, hir_lit("a")),
1962 hir_star(true, hir_lit("a")),
1963 hir_star(true, hir_lit("a")),
1964 hir_star(false, hir_lit("a")),
1965 ])
1966 );
1967 #[cfg(feature = "unicode-case")]
1968 assert_eq!(
1969 t("(?:a(?i)a)a"),
1970 hir_cat(vec![
1971 hir_cat(vec![
1972 hir_lit("a"),
1973 hir_uclass(&[('A', 'A'), ('a', 'a')]),
1974 ]),
1975 hir_lit("a"),
1976 ])
1977 );
1978 #[cfg(feature = "unicode-case")]
1979 assert_eq!(
1980 t("(?i)(?:a(?-i)a)a"),
1981 hir_cat(vec![
1982 hir_cat(vec![
1983 hir_uclass(&[('A', 'A'), ('a', 'a')]),
1984 hir_lit("a"),
1985 ]),
1986 hir_uclass(&[('A', 'A'), ('a', 'a')]),
1987 ])
1988 );
1989 }
1990
1991 #[test]
1992 fn escape() {
1993 assert_eq!(
1994 t(r"\\\.\+\*\?\(\)\|\[\]\{\}\^\$\#"),
1995 hir_lit(r"\.+*?()|[]{}^$#")
1996 );
1997 }
1998
1999 #[test]
2000 fn repetition() {
2001 assert_eq!(t("a?"), hir_quest(true, hir_lit("a")));
2002 assert_eq!(t("a*"), hir_star(true, hir_lit("a")));
2003 assert_eq!(t("a+"), hir_plus(true, hir_lit("a")));
2004 assert_eq!(t("a??"), hir_quest(false, hir_lit("a")));
2005 assert_eq!(t("a*?"), hir_star(false, hir_lit("a")));
2006 assert_eq!(t("a+?"), hir_plus(false, hir_lit("a")));
2007
2008 assert_eq!(t("a{1}"), hir_range(true, 1, Some(1), hir_lit("a"),));
2009 assert_eq!(t("a{1,}"), hir_range(true, 1, None, hir_lit("a"),));
2010 assert_eq!(t("a{1,2}"), hir_range(true, 1, Some(2), hir_lit("a"),));
2011 assert_eq!(t("a{1}?"), hir_range(false, 1, Some(1), hir_lit("a"),));
2012 assert_eq!(t("a{1,}?"), hir_range(false, 1, None, hir_lit("a"),));
2013 assert_eq!(t("a{1,2}?"), hir_range(false, 1, Some(2), hir_lit("a"),));
2014
2015 assert_eq!(
2016 t("ab?"),
2017 hir_cat(vec![hir_lit("a"), hir_quest(true, hir_lit("b")),])
2018 );
2019 assert_eq!(t("(ab)?"), hir_quest(true, hir_capture(1, hir_lit("ab"))));
2020 assert_eq!(
2021 t("a|b?"),
2022 hir_alt(vec![hir_lit("a"), hir_quest(true, hir_lit("b")),])
2023 );
2024 }
2025
2026 #[test]
2027 fn cat_alt() {
2028 let a = || hir_look(hir::Look::Start);
2029 let b = || hir_look(hir::Look::End);
2030 let c = || hir_look(hir::Look::WordUnicode);
2031 let d = || hir_look(hir::Look::WordUnicodeNegate);
2032
2033 assert_eq!(t("(^$)"), hir_capture(1, hir_cat(vec![a(), b()])));
2034 assert_eq!(t("^|$"), hir_alt(vec![a(), b()]));
2035 assert_eq!(t(r"^|$|\b"), hir_alt(vec![a(), b(), c()]));
2036 assert_eq!(
2037 t(r"^$|$\b|\b\B"),
2038 hir_alt(vec![
2039 hir_cat(vec![a(), b()]),
2040 hir_cat(vec![b(), c()]),
2041 hir_cat(vec![c(), d()]),
2042 ])
2043 );
2044 assert_eq!(t("(^|$)"), hir_capture(1, hir_alt(vec![a(), b()])));
2045 assert_eq!(
2046 t(r"(^|$|\b)"),
2047 hir_capture(1, hir_alt(vec![a(), b(), c()]))
2048 );
2049 assert_eq!(
2050 t(r"(^$|$\b|\b\B)"),
2051 hir_capture(
2052 1,
2053 hir_alt(vec![
2054 hir_cat(vec![a(), b()]),
2055 hir_cat(vec![b(), c()]),
2056 hir_cat(vec![c(), d()]),
2057 ])
2058 )
2059 );
2060 assert_eq!(
2061 t(r"(^$|($\b|(\b\B)))"),
2062 hir_capture(
2063 1,
2064 hir_alt(vec![
2065 hir_cat(vec![a(), b()]),
2066 hir_capture(
2067 2,
2068 hir_alt(vec![
2069 hir_cat(vec![b(), c()]),
2070 hir_capture(3, hir_cat(vec![c(), d()])),
2071 ])
2072 ),
2073 ])
2074 )
2075 );
2076 }
2077
2078 #[test]
2084 fn cat_class_flattened() {
2085 assert_eq!(t(r"[a-z]|[A-Z]"), hir_uclass(&[('A', 'Z'), ('a', 'z')]));
2086 #[cfg(feature = "unicode-gencat")]
2089 assert_eq!(
2090 t(r"(?x)
2091 \p{Lowercase_Letter}
2092 |\p{Uppercase_Letter}
2093 |\p{Titlecase_Letter}
2094 |\p{Modifier_Letter}
2095 |\p{Other_Letter}
2096 "),
2097 hir_uclass_query(ClassQuery::Binary("letter"))
2098 );
2099 assert_eq!(
2102 t_bytes(r"[Δδ]|(?-u:[\x90-\xFF])|[Λλ]"),
2103 hir_alt(vec![
2104 hir_uclass(&[('Δ', 'Δ'), ('δ', 'δ')]),
2105 hir_bclass(&[(b'\x90', b'\xFF')]),
2106 hir_uclass(&[('Λ', 'Λ'), ('λ', 'λ')]),
2107 ])
2108 );
2109 assert_eq!(
2112 t_bytes(r"[a-z]|(?-u:[\x90-\xFF])|[A-Z]"),
2113 hir_bclass(&[(b'A', b'Z'), (b'a', b'z'), (b'\x90', b'\xFF')]),
2114 );
2115 }
2116
2117 #[test]
2118 fn class_ascii() {
2119 assert_eq!(
2120 t("[[:alnum:]]"),
2121 hir_ascii_uclass(&ast::ClassAsciiKind::Alnum)
2122 );
2123 assert_eq!(
2124 t("[[:alpha:]]"),
2125 hir_ascii_uclass(&ast::ClassAsciiKind::Alpha)
2126 );
2127 assert_eq!(
2128 t("[[:ascii:]]"),
2129 hir_ascii_uclass(&ast::ClassAsciiKind::Ascii)
2130 );
2131 assert_eq!(
2132 t("[[:blank:]]"),
2133 hir_ascii_uclass(&ast::ClassAsciiKind::Blank)
2134 );
2135 assert_eq!(
2136 t("[[:cntrl:]]"),
2137 hir_ascii_uclass(&ast::ClassAsciiKind::Cntrl)
2138 );
2139 assert_eq!(
2140 t("[[:digit:]]"),
2141 hir_ascii_uclass(&ast::ClassAsciiKind::Digit)
2142 );
2143 assert_eq!(
2144 t("[[:graph:]]"),
2145 hir_ascii_uclass(&ast::ClassAsciiKind::Graph)
2146 );
2147 assert_eq!(
2148 t("[[:lower:]]"),
2149 hir_ascii_uclass(&ast::ClassAsciiKind::Lower)
2150 );
2151 assert_eq!(
2152 t("[[:print:]]"),
2153 hir_ascii_uclass(&ast::ClassAsciiKind::Print)
2154 );
2155 assert_eq!(
2156 t("[[:punct:]]"),
2157 hir_ascii_uclass(&ast::ClassAsciiKind::Punct)
2158 );
2159 assert_eq!(
2160 t("[[:space:]]"),
2161 hir_ascii_uclass(&ast::ClassAsciiKind::Space)
2162 );
2163 assert_eq!(
2164 t("[[:upper:]]"),
2165 hir_ascii_uclass(&ast::ClassAsciiKind::Upper)
2166 );
2167 assert_eq!(
2168 t("[[:word:]]"),
2169 hir_ascii_uclass(&ast::ClassAsciiKind::Word)
2170 );
2171 assert_eq!(
2172 t("[[:xdigit:]]"),
2173 hir_ascii_uclass(&ast::ClassAsciiKind::Xdigit)
2174 );
2175
2176 assert_eq!(
2177 t("[[:^lower:]]"),
2178 hir_negate(hir_ascii_uclass(&ast::ClassAsciiKind::Lower))
2179 );
2180 #[cfg(feature = "unicode-case")]
2181 assert_eq!(
2182 t("(?i)[[:lower:]]"),
2183 hir_uclass(&[
2184 ('A', 'Z'),
2185 ('a', 'z'),
2186 ('\u{17F}', '\u{17F}'),
2187 ('\u{212A}', '\u{212A}'),
2188 ])
2189 );
2190
2191 assert_eq!(
2192 t("(?-u)[[:lower:]]"),
2193 hir_ascii_bclass(&ast::ClassAsciiKind::Lower)
2194 );
2195 assert_eq!(
2196 t("(?i-u)[[:lower:]]"),
2197 hir_case_fold(hir_ascii_bclass(&ast::ClassAsciiKind::Lower))
2198 );
2199
2200 assert_eq!(
2201 t_err("(?-u)[[:^lower:]]"),
2202 TestError {
2203 kind: hir::ErrorKind::InvalidUtf8,
2204 span: Span::new(
2205 Position::new(6, 1, 7),
2206 Position::new(16, 1, 17)
2207 ),
2208 }
2209 );
2210 assert_eq!(
2211 t_err("(?i-u)[[:^lower:]]"),
2212 TestError {
2213 kind: hir::ErrorKind::InvalidUtf8,
2214 span: Span::new(
2215 Position::new(7, 1, 8),
2216 Position::new(17, 1, 18)
2217 ),
2218 }
2219 );
2220 }
2221
2222 #[test]
2223 fn class_ascii_multiple() {
2224 assert_eq!(
2226 t("[[:alnum:][:^ascii:]]"),
2227 hir_union(
2228 hir_ascii_uclass(&ast::ClassAsciiKind::Alnum),
2229 hir_uclass(&[('\u{80}', '\u{10FFFF}')]),
2230 ),
2231 );
2232 assert_eq!(
2233 t_bytes("(?-u)[[:alnum:][:^ascii:]]"),
2234 hir_union(
2235 hir_ascii_bclass(&ast::ClassAsciiKind::Alnum),
2236 hir_bclass(&[(0x80, 0xFF)]),
2237 ),
2238 );
2239 }
2240
2241 #[test]
2242 #[cfg(feature = "unicode-perl")]
2243 fn class_perl_unicode() {
2244 assert_eq!(t(r"\d"), hir_uclass_query(ClassQuery::Binary("digit")));
2246 assert_eq!(t(r"\s"), hir_uclass_query(ClassQuery::Binary("space")));
2247 assert_eq!(t(r"\w"), hir_uclass_perl_word());
2248 #[cfg(feature = "unicode-case")]
2249 assert_eq!(
2250 t(r"(?i)\d"),
2251 hir_uclass_query(ClassQuery::Binary("digit"))
2252 );
2253 #[cfg(feature = "unicode-case")]
2254 assert_eq!(
2255 t(r"(?i)\s"),
2256 hir_uclass_query(ClassQuery::Binary("space"))
2257 );
2258 #[cfg(feature = "unicode-case")]
2259 assert_eq!(t(r"(?i)\w"), hir_uclass_perl_word());
2260
2261 assert_eq!(
2263 t(r"\D"),
2264 hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
2265 );
2266 assert_eq!(
2267 t(r"\S"),
2268 hir_negate(hir_uclass_query(ClassQuery::Binary("space")))
2269 );
2270 assert_eq!(t(r"\W"), hir_negate(hir_uclass_perl_word()));
2271 #[cfg(feature = "unicode-case")]
2272 assert_eq!(
2273 t(r"(?i)\D"),
2274 hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
2275 );
2276 #[cfg(feature = "unicode-case")]
2277 assert_eq!(
2278 t(r"(?i)\S"),
2279 hir_negate(hir_uclass_query(ClassQuery::Binary("space")))
2280 );
2281 #[cfg(feature = "unicode-case")]
2282 assert_eq!(t(r"(?i)\W"), hir_negate(hir_uclass_perl_word()));
2283 }
2284
2285 #[test]
2286 fn class_perl_ascii() {
2287 assert_eq!(
2289 t(r"(?-u)\d"),
2290 hir_ascii_bclass(&ast::ClassAsciiKind::Digit)
2291 );
2292 assert_eq!(
2293 t(r"(?-u)\s"),
2294 hir_ascii_bclass(&ast::ClassAsciiKind::Space)
2295 );
2296 assert_eq!(
2297 t(r"(?-u)\w"),
2298 hir_ascii_bclass(&ast::ClassAsciiKind::Word)
2299 );
2300 assert_eq!(
2301 t(r"(?i-u)\d"),
2302 hir_ascii_bclass(&ast::ClassAsciiKind::Digit)
2303 );
2304 assert_eq!(
2305 t(r"(?i-u)\s"),
2306 hir_ascii_bclass(&ast::ClassAsciiKind::Space)
2307 );
2308 assert_eq!(
2309 t(r"(?i-u)\w"),
2310 hir_ascii_bclass(&ast::ClassAsciiKind::Word)
2311 );
2312
2313 assert_eq!(
2315 t_bytes(r"(?-u)\D"),
2316 hir_negate(hir_ascii_bclass(&ast::ClassAsciiKind::Digit))
2317 );
2318 assert_eq!(
2319 t_bytes(r"(?-u)\S"),
2320 hir_negate(hir_ascii_bclass(&ast::ClassAsciiKind::Space))
2321 );
2322 assert_eq!(
2323 t_bytes(r"(?-u)\W"),
2324 hir_negate(hir_ascii_bclass(&ast::ClassAsciiKind::Word))
2325 );
2326 assert_eq!(
2327 t_bytes(r"(?i-u)\D"),
2328 hir_negate(hir_ascii_bclass(&ast::ClassAsciiKind::Digit))
2329 );
2330 assert_eq!(
2331 t_bytes(r"(?i-u)\S"),
2332 hir_negate(hir_ascii_bclass(&ast::ClassAsciiKind::Space))
2333 );
2334 assert_eq!(
2335 t_bytes(r"(?i-u)\W"),
2336 hir_negate(hir_ascii_bclass(&ast::ClassAsciiKind::Word))
2337 );
2338
2339 assert_eq!(
2343 t_err(r"(?-u)\D"),
2344 TestError {
2345 kind: hir::ErrorKind::InvalidUtf8,
2346 span: Span::new(
2347 Position::new(5, 1, 6),
2348 Position::new(7, 1, 8),
2349 ),
2350 },
2351 );
2352 assert_eq!(
2353 t_err(r"(?-u)\S"),
2354 TestError {
2355 kind: hir::ErrorKind::InvalidUtf8,
2356 span: Span::new(
2357 Position::new(5, 1, 6),
2358 Position::new(7, 1, 8),
2359 ),
2360 },
2361 );
2362 assert_eq!(
2363 t_err(r"(?-u)\W"),
2364 TestError {
2365 kind: hir::ErrorKind::InvalidUtf8,
2366 span: Span::new(
2367 Position::new(5, 1, 6),
2368 Position::new(7, 1, 8),
2369 ),
2370 },
2371 );
2372 assert_eq!(
2373 t_err(r"(?i-u)\D"),
2374 TestError {
2375 kind: hir::ErrorKind::InvalidUtf8,
2376 span: Span::new(
2377 Position::new(6, 1, 7),
2378 Position::new(8, 1, 9),
2379 ),
2380 },
2381 );
2382 assert_eq!(
2383 t_err(r"(?i-u)\S"),
2384 TestError {
2385 kind: hir::ErrorKind::InvalidUtf8,
2386 span: Span::new(
2387 Position::new(6, 1, 7),
2388 Position::new(8, 1, 9),
2389 ),
2390 },
2391 );
2392 assert_eq!(
2393 t_err(r"(?i-u)\W"),
2394 TestError {
2395 kind: hir::ErrorKind::InvalidUtf8,
2396 span: Span::new(
2397 Position::new(6, 1, 7),
2398 Position::new(8, 1, 9),
2399 ),
2400 },
2401 );
2402 }
2403
2404 #[test]
2405 #[cfg(not(feature = "unicode-perl"))]
2406 fn class_perl_word_disabled() {
2407 assert_eq!(
2408 t_err(r"\w"),
2409 TestError {
2410 kind: hir::ErrorKind::UnicodePerlClassNotFound,
2411 span: Span::new(
2412 Position::new(0, 1, 1),
2413 Position::new(2, 1, 3)
2414 ),
2415 }
2416 );
2417 }
2418
2419 #[test]
2420 #[cfg(all(not(feature = "unicode-perl"), not(feature = "unicode-bool")))]
2421 fn class_perl_space_disabled() {
2422 assert_eq!(
2423 t_err(r"\s"),
2424 TestError {
2425 kind: hir::ErrorKind::UnicodePerlClassNotFound,
2426 span: Span::new(
2427 Position::new(0, 1, 1),
2428 Position::new(2, 1, 3)
2429 ),
2430 }
2431 );
2432 }
2433
2434 #[test]
2435 #[cfg(all(
2436 not(feature = "unicode-perl"),
2437 not(feature = "unicode-gencat")
2438 ))]
2439 fn class_perl_digit_disabled() {
2440 assert_eq!(
2441 t_err(r"\d"),
2442 TestError {
2443 kind: hir::ErrorKind::UnicodePerlClassNotFound,
2444 span: Span::new(
2445 Position::new(0, 1, 1),
2446 Position::new(2, 1, 3)
2447 ),
2448 }
2449 );
2450 }
2451
2452 #[test]
2453 #[cfg(feature = "unicode-gencat")]
2454 fn class_unicode_gencat() {
2455 assert_eq!(t(r"\pZ"), hir_uclass_query(ClassQuery::Binary("Z")));
2456 assert_eq!(t(r"\pz"), hir_uclass_query(ClassQuery::Binary("Z")));
2457 assert_eq!(
2458 t(r"\p{Separator}"),
2459 hir_uclass_query(ClassQuery::Binary("Z"))
2460 );
2461 assert_eq!(
2462 t(r"\p{se PaRa ToR}"),
2463 hir_uclass_query(ClassQuery::Binary("Z"))
2464 );
2465 assert_eq!(
2466 t(r"\p{gc:Separator}"),
2467 hir_uclass_query(ClassQuery::Binary("Z"))
2468 );
2469 assert_eq!(
2470 t(r"\p{gc=Separator}"),
2471 hir_uclass_query(ClassQuery::Binary("Z"))
2472 );
2473 assert_eq!(
2474 t(r"\p{Other}"),
2475 hir_uclass_query(ClassQuery::Binary("Other"))
2476 );
2477 assert_eq!(t(r"\pC"), hir_uclass_query(ClassQuery::Binary("Other")));
2478
2479 assert_eq!(
2480 t(r"\PZ"),
2481 hir_negate(hir_uclass_query(ClassQuery::Binary("Z")))
2482 );
2483 assert_eq!(
2484 t(r"\P{separator}"),
2485 hir_negate(hir_uclass_query(ClassQuery::Binary("Z")))
2486 );
2487 assert_eq!(
2488 t(r"\P{gc!=separator}"),
2489 hir_negate(hir_uclass_query(ClassQuery::Binary("Z")))
2490 );
2491
2492 assert_eq!(t(r"\p{any}"), hir_uclass_query(ClassQuery::Binary("Any")));
2493 assert_eq!(
2494 t(r"\p{assigned}"),
2495 hir_uclass_query(ClassQuery::Binary("Assigned"))
2496 );
2497 assert_eq!(
2498 t(r"\p{ascii}"),
2499 hir_uclass_query(ClassQuery::Binary("ASCII"))
2500 );
2501 assert_eq!(
2502 t(r"\p{gc:any}"),
2503 hir_uclass_query(ClassQuery::Binary("Any"))
2504 );
2505 assert_eq!(
2506 t(r"\p{gc:assigned}"),
2507 hir_uclass_query(ClassQuery::Binary("Assigned"))
2508 );
2509 assert_eq!(
2510 t(r"\p{gc:ascii}"),
2511 hir_uclass_query(ClassQuery::Binary("ASCII"))
2512 );
2513
2514 assert_eq!(
2515 t_err(r"(?-u)\pZ"),
2516 TestError {
2517 kind: hir::ErrorKind::UnicodeNotAllowed,
2518 span: Span::new(
2519 Position::new(5, 1, 6),
2520 Position::new(8, 1, 9)
2521 ),
2522 }
2523 );
2524 assert_eq!(
2525 t_err(r"(?-u)\p{Separator}"),
2526 TestError {
2527 kind: hir::ErrorKind::UnicodeNotAllowed,
2528 span: Span::new(
2529 Position::new(5, 1, 6),
2530 Position::new(18, 1, 19)
2531 ),
2532 }
2533 );
2534 assert_eq!(
2535 t_err(r"\pE"),
2536 TestError {
2537 kind: hir::ErrorKind::UnicodePropertyNotFound,
2538 span: Span::new(
2539 Position::new(0, 1, 1),
2540 Position::new(3, 1, 4)
2541 ),
2542 }
2543 );
2544 assert_eq!(
2545 t_err(r"\p{Foo}"),
2546 TestError {
2547 kind: hir::ErrorKind::UnicodePropertyNotFound,
2548 span: Span::new(
2549 Position::new(0, 1, 1),
2550 Position::new(7, 1, 8)
2551 ),
2552 }
2553 );
2554 assert_eq!(
2555 t_err(r"\p{gc:Foo}"),
2556 TestError {
2557 kind: hir::ErrorKind::UnicodePropertyValueNotFound,
2558 span: Span::new(
2559 Position::new(0, 1, 1),
2560 Position::new(10, 1, 11)
2561 ),
2562 }
2563 );
2564 }
2565
2566 #[test]
2567 #[cfg(not(feature = "unicode-gencat"))]
2568 fn class_unicode_gencat_disabled() {
2569 assert_eq!(
2570 t_err(r"\p{Separator}"),
2571 TestError {
2572 kind: hir::ErrorKind::UnicodePropertyNotFound,
2573 span: Span::new(
2574 Position::new(0, 1, 1),
2575 Position::new(13, 1, 14)
2576 ),
2577 }
2578 );
2579
2580 assert_eq!(
2581 t_err(r"\p{Any}"),
2582 TestError {
2583 kind: hir::ErrorKind::UnicodePropertyNotFound,
2584 span: Span::new(
2585 Position::new(0, 1, 1),
2586 Position::new(7, 1, 8)
2587 ),
2588 }
2589 );
2590 }
2591
2592 #[test]
2593 #[cfg(feature = "unicode-script")]
2594 fn class_unicode_script() {
2595 assert_eq!(
2596 t(r"\p{Greek}"),
2597 hir_uclass_query(ClassQuery::Binary("Greek"))
2598 );
2599 #[cfg(feature = "unicode-case")]
2600 assert_eq!(
2601 t(r"(?i)\p{Greek}"),
2602 hir_case_fold(hir_uclass_query(ClassQuery::Binary("Greek")))
2603 );
2604 #[cfg(feature = "unicode-case")]
2605 assert_eq!(
2606 t(r"(?i)\P{Greek}"),
2607 hir_negate(hir_case_fold(hir_uclass_query(ClassQuery::Binary(
2608 "Greek"
2609 ))))
2610 );
2611
2612 assert_eq!(
2613 t_err(r"\p{sc:Foo}"),
2614 TestError {
2615 kind: hir::ErrorKind::UnicodePropertyValueNotFound,
2616 span: Span::new(
2617 Position::new(0, 1, 1),
2618 Position::new(10, 1, 11)
2619 ),
2620 }
2621 );
2622 assert_eq!(
2623 t_err(r"\p{scx:Foo}"),
2624 TestError {
2625 kind: hir::ErrorKind::UnicodePropertyValueNotFound,
2626 span: Span::new(
2627 Position::new(0, 1, 1),
2628 Position::new(11, 1, 12)
2629 ),
2630 }
2631 );
2632 }
2633
2634 #[test]
2635 #[cfg(not(feature = "unicode-script"))]
2636 fn class_unicode_script_disabled() {
2637 assert_eq!(
2638 t_err(r"\p{Greek}"),
2639 TestError {
2640 kind: hir::ErrorKind::UnicodePropertyNotFound,
2641 span: Span::new(
2642 Position::new(0, 1, 1),
2643 Position::new(9, 1, 10)
2644 ),
2645 }
2646 );
2647
2648 assert_eq!(
2649 t_err(r"\p{scx:Greek}"),
2650 TestError {
2651 kind: hir::ErrorKind::UnicodePropertyNotFound,
2652 span: Span::new(
2653 Position::new(0, 1, 1),
2654 Position::new(13, 1, 14)
2655 ),
2656 }
2657 );
2658 }
2659
2660 #[test]
2661 #[cfg(feature = "unicode-age")]
2662 fn class_unicode_age() {
2663 assert_eq!(
2664 t_err(r"\p{age:Foo}"),
2665 TestError {
2666 kind: hir::ErrorKind::UnicodePropertyValueNotFound,
2667 span: Span::new(
2668 Position::new(0, 1, 1),
2669 Position::new(11, 1, 12)
2670 ),
2671 }
2672 );
2673 }
2674
2675 #[test]
2676 #[cfg(feature = "unicode-gencat")]
2677 fn class_unicode_any_empty() {
2678 assert_eq!(t(r"\P{any}"), hir_uclass(&[]),);
2679 }
2680
2681 #[test]
2682 #[cfg(not(feature = "unicode-age"))]
2683 fn class_unicode_age_disabled() {
2684 assert_eq!(
2685 t_err(r"\p{age:3.0}"),
2686 TestError {
2687 kind: hir::ErrorKind::UnicodePropertyNotFound,
2688 span: Span::new(
2689 Position::new(0, 1, 1),
2690 Position::new(11, 1, 12)
2691 ),
2692 }
2693 );
2694 }
2695
2696 #[test]
2697 fn class_bracketed() {
2698 assert_eq!(t("[a]"), hir_lit("a"));
2699 assert_eq!(t("[ab]"), hir_uclass(&[('a', 'b')]));
2700 assert_eq!(t("[^[a]]"), class_negate(uclass(&[('a', 'a')])));
2701 assert_eq!(t("[a-z]"), hir_uclass(&[('a', 'z')]));
2702 assert_eq!(t("[a-fd-h]"), hir_uclass(&[('a', 'h')]));
2703 assert_eq!(t("[a-fg-m]"), hir_uclass(&[('a', 'm')]));
2704 assert_eq!(t(r"[\x00]"), hir_uclass(&[('\0', '\0')]));
2705 assert_eq!(t(r"[\n]"), hir_uclass(&[('\n', '\n')]));
2706 assert_eq!(t("[\n]"), hir_uclass(&[('\n', '\n')]));
2707 #[cfg(any(feature = "unicode-perl", feature = "unicode-gencat"))]
2708 assert_eq!(t(r"[\d]"), hir_uclass_query(ClassQuery::Binary("digit")));
2709 #[cfg(feature = "unicode-gencat")]
2710 assert_eq!(
2711 t(r"[\pZ]"),
2712 hir_uclass_query(ClassQuery::Binary("separator"))
2713 );
2714 #[cfg(feature = "unicode-gencat")]
2715 assert_eq!(
2716 t(r"[\p{separator}]"),
2717 hir_uclass_query(ClassQuery::Binary("separator"))
2718 );
2719 #[cfg(any(feature = "unicode-perl", feature = "unicode-gencat"))]
2720 assert_eq!(t(r"[^\D]"), hir_uclass_query(ClassQuery::Binary("digit")));
2721 #[cfg(feature = "unicode-gencat")]
2722 assert_eq!(
2723 t(r"[^\PZ]"),
2724 hir_uclass_query(ClassQuery::Binary("separator"))
2725 );
2726 #[cfg(feature = "unicode-gencat")]
2727 assert_eq!(
2728 t(r"[^\P{separator}]"),
2729 hir_uclass_query(ClassQuery::Binary("separator"))
2730 );
2731 #[cfg(all(
2732 feature = "unicode-case",
2733 any(feature = "unicode-perl", feature = "unicode-gencat")
2734 ))]
2735 assert_eq!(
2736 t(r"(?i)[^\D]"),
2737 hir_uclass_query(ClassQuery::Binary("digit"))
2738 );
2739 #[cfg(all(feature = "unicode-case", feature = "unicode-script"))]
2740 assert_eq!(
2741 t(r"(?i)[^\P{greek}]"),
2742 hir_case_fold(hir_uclass_query(ClassQuery::Binary("greek")))
2743 );
2744
2745 assert_eq!(t("(?-u)[a]"), hir_bclass(&[(b'a', b'a')]));
2746 assert_eq!(t(r"(?-u)[\x00]"), hir_bclass(&[(b'\0', b'\0')]));
2747 assert_eq!(t_bytes(r"(?-u)[\xFF]"), hir_bclass(&[(b'\xFF', b'\xFF')]));
2748
2749 #[cfg(feature = "unicode-case")]
2750 assert_eq!(t("(?i)[a]"), hir_uclass(&[('A', 'A'), ('a', 'a')]));
2751 #[cfg(feature = "unicode-case")]
2752 assert_eq!(
2753 t("(?i)[k]"),
2754 hir_uclass(&[('K', 'K'), ('k', 'k'), ('\u{212A}', '\u{212A}'),])
2755 );
2756 #[cfg(feature = "unicode-case")]
2757 assert_eq!(
2758 t("(?i)[β]"),
2759 hir_uclass(&[('Β', 'Β'), ('β', 'β'), ('ϐ', 'ϐ'),])
2760 );
2761 assert_eq!(t("(?i-u)[k]"), hir_bclass(&[(b'K', b'K'), (b'k', b'k'),]));
2762
2763 assert_eq!(t("[^a]"), class_negate(uclass(&[('a', 'a')])));
2764 assert_eq!(t(r"[^\x00]"), class_negate(uclass(&[('\0', '\0')])));
2765 assert_eq!(
2766 t_bytes("(?-u)[^a]"),
2767 class_negate(bclass(&[(b'a', b'a')]))
2768 );
2769 #[cfg(any(feature = "unicode-perl", feature = "unicode-gencat"))]
2770 assert_eq!(
2771 t(r"[^\d]"),
2772 hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
2773 );
2774 #[cfg(feature = "unicode-gencat")]
2775 assert_eq!(
2776 t(r"[^\pZ]"),
2777 hir_negate(hir_uclass_query(ClassQuery::Binary("separator")))
2778 );
2779 #[cfg(feature = "unicode-gencat")]
2780 assert_eq!(
2781 t(r"[^\p{separator}]"),
2782 hir_negate(hir_uclass_query(ClassQuery::Binary("separator")))
2783 );
2784 #[cfg(all(feature = "unicode-case", feature = "unicode-script"))]
2785 assert_eq!(
2786 t(r"(?i)[^\p{greek}]"),
2787 hir_negate(hir_case_fold(hir_uclass_query(ClassQuery::Binary(
2788 "greek"
2789 ))))
2790 );
2791 #[cfg(all(feature = "unicode-case", feature = "unicode-script"))]
2792 assert_eq!(
2793 t(r"(?i)[\P{greek}]"),
2794 hir_negate(hir_case_fold(hir_uclass_query(ClassQuery::Binary(
2795 "greek"
2796 ))))
2797 );
2798
2799 assert_eq!(t(r"[\[]"), hir_uclass(&[('[', '[')]));
2801
2802 assert_eq!(t(r"[&]"), hir_uclass(&[('&', '&')]));
2803 assert_eq!(t(r"[\&]"), hir_uclass(&[('&', '&')]));
2804 assert_eq!(t(r"[\&\&]"), hir_uclass(&[('&', '&')]));
2805 assert_eq!(t(r"[\x00-&]"), hir_uclass(&[('\0', '&')]));
2806 assert_eq!(t(r"[&-\xFF]"), hir_uclass(&[('&', '\u{FF}')]));
2807
2808 assert_eq!(t(r"[~]"), hir_uclass(&[('~', '~')]));
2809 assert_eq!(t(r"[\~]"), hir_uclass(&[('~', '~')]));
2810 assert_eq!(t(r"[\~\~]"), hir_uclass(&[('~', '~')]));
2811 assert_eq!(t(r"[\x00-~]"), hir_uclass(&[('\0', '~')]));
2812 assert_eq!(t(r"[~-\xFF]"), hir_uclass(&[('~', '\u{FF}')]));
2813
2814 assert_eq!(t(r"[-]"), hir_uclass(&[('-', '-')]));
2815 assert_eq!(t(r"[\-]"), hir_uclass(&[('-', '-')]));
2816 assert_eq!(t(r"[\-\-]"), hir_uclass(&[('-', '-')]));
2817 assert_eq!(t(r"[\x00-\-]"), hir_uclass(&[('\0', '-')]));
2818 assert_eq!(t(r"[\--\xFF]"), hir_uclass(&[('-', '\u{FF}')]));
2819
2820 assert_eq!(
2821 t_err("(?-u)[^a]"),
2822 TestError {
2823 kind: hir::ErrorKind::InvalidUtf8,
2824 span: Span::new(
2825 Position::new(5, 1, 6),
2826 Position::new(9, 1, 10)
2827 ),
2828 }
2829 );
2830 #[cfg(any(feature = "unicode-perl", feature = "unicode-bool"))]
2831 assert_eq!(t(r"[^\s\S]"), hir_uclass(&[]),);
2832 #[cfg(any(feature = "unicode-perl", feature = "unicode-bool"))]
2833 assert_eq!(t_bytes(r"(?-u)[^\s\S]"), hir_bclass(&[]),);
2834 }
2835
2836 #[test]
2837 fn class_bracketed_union() {
2838 assert_eq!(t("[a-zA-Z]"), hir_uclass(&[('A', 'Z'), ('a', 'z')]));
2839 #[cfg(feature = "unicode-gencat")]
2840 assert_eq!(
2841 t(r"[a\pZb]"),
2842 hir_union(
2843 hir_uclass(&[('a', 'b')]),
2844 hir_uclass_query(ClassQuery::Binary("separator"))
2845 )
2846 );
2847 #[cfg(all(feature = "unicode-gencat", feature = "unicode-script"))]
2848 assert_eq!(
2849 t(r"[\pZ\p{Greek}]"),
2850 hir_union(
2851 hir_uclass_query(ClassQuery::Binary("greek")),
2852 hir_uclass_query(ClassQuery::Binary("separator"))
2853 )
2854 );
2855 #[cfg(all(
2856 feature = "unicode-age",
2857 feature = "unicode-gencat",
2858 feature = "unicode-script"
2859 ))]
2860 assert_eq!(
2861 t(r"[\p{age:3.0}\pZ\p{Greek}]"),
2862 hir_union(
2863 hir_uclass_query(ClassQuery::ByValue {
2864 property_name: "age",
2865 property_value: "3.0",
2866 }),
2867 hir_union(
2868 hir_uclass_query(ClassQuery::Binary("greek")),
2869 hir_uclass_query(ClassQuery::Binary("separator"))
2870 )
2871 )
2872 );
2873 #[cfg(all(
2874 feature = "unicode-age",
2875 feature = "unicode-gencat",
2876 feature = "unicode-script"
2877 ))]
2878 assert_eq!(
2879 t(r"[[[\p{age:3.0}\pZ]\p{Greek}][\p{Cyrillic}]]"),
2880 hir_union(
2881 hir_uclass_query(ClassQuery::ByValue {
2882 property_name: "age",
2883 property_value: "3.0",
2884 }),
2885 hir_union(
2886 hir_uclass_query(ClassQuery::Binary("cyrillic")),
2887 hir_union(
2888 hir_uclass_query(ClassQuery::Binary("greek")),
2889 hir_uclass_query(ClassQuery::Binary("separator"))
2890 )
2891 )
2892 )
2893 );
2894
2895 #[cfg(all(
2896 feature = "unicode-age",
2897 feature = "unicode-case",
2898 feature = "unicode-gencat",
2899 feature = "unicode-script"
2900 ))]
2901 assert_eq!(
2902 t(r"(?i)[\p{age:3.0}\pZ\p{Greek}]"),
2903 hir_case_fold(hir_union(
2904 hir_uclass_query(ClassQuery::ByValue {
2905 property_name: "age",
2906 property_value: "3.0",
2907 }),
2908 hir_union(
2909 hir_uclass_query(ClassQuery::Binary("greek")),
2910 hir_uclass_query(ClassQuery::Binary("separator"))
2911 )
2912 ))
2913 );
2914 #[cfg(all(
2915 feature = "unicode-age",
2916 feature = "unicode-gencat",
2917 feature = "unicode-script"
2918 ))]
2919 assert_eq!(
2920 t(r"[^\p{age:3.0}\pZ\p{Greek}]"),
2921 hir_negate(hir_union(
2922 hir_uclass_query(ClassQuery::ByValue {
2923 property_name: "age",
2924 property_value: "3.0",
2925 }),
2926 hir_union(
2927 hir_uclass_query(ClassQuery::Binary("greek")),
2928 hir_uclass_query(ClassQuery::Binary("separator"))
2929 )
2930 ))
2931 );
2932 #[cfg(all(
2933 feature = "unicode-age",
2934 feature = "unicode-case",
2935 feature = "unicode-gencat",
2936 feature = "unicode-script"
2937 ))]
2938 assert_eq!(
2939 t(r"(?i)[^\p{age:3.0}\pZ\p{Greek}]"),
2940 hir_negate(hir_case_fold(hir_union(
2941 hir_uclass_query(ClassQuery::ByValue {
2942 property_name: "age",
2943 property_value: "3.0",
2944 }),
2945 hir_union(
2946 hir_uclass_query(ClassQuery::Binary("greek")),
2947 hir_uclass_query(ClassQuery::Binary("separator"))
2948 )
2949 )))
2950 );
2951 }
2952
2953 #[test]
2954 fn class_bracketed_nested() {
2955 assert_eq!(t(r"[a[^c]]"), class_negate(uclass(&[('c', 'c')])));
2956 assert_eq!(t(r"[a-b[^c]]"), class_negate(uclass(&[('c', 'c')])));
2957 assert_eq!(t(r"[a-c[^c]]"), class_negate(uclass(&[])));
2958
2959 assert_eq!(t(r"[^a[^c]]"), hir_uclass(&[('c', 'c')]));
2960 assert_eq!(t(r"[^a-b[^c]]"), hir_uclass(&[('c', 'c')]));
2961
2962 #[cfg(feature = "unicode-case")]
2963 assert_eq!(
2964 t(r"(?i)[a[^c]]"),
2965 hir_negate(class_case_fold(uclass(&[('c', 'c')])))
2966 );
2967 #[cfg(feature = "unicode-case")]
2968 assert_eq!(
2969 t(r"(?i)[a-b[^c]]"),
2970 hir_negate(class_case_fold(uclass(&[('c', 'c')])))
2971 );
2972
2973 #[cfg(feature = "unicode-case")]
2974 assert_eq!(t(r"(?i)[^a[^c]]"), hir_uclass(&[('C', 'C'), ('c', 'c')]));
2975 #[cfg(feature = "unicode-case")]
2976 assert_eq!(
2977 t(r"(?i)[^a-b[^c]]"),
2978 hir_uclass(&[('C', 'C'), ('c', 'c')])
2979 );
2980
2981 assert_eq!(t(r"[^a-c[^c]]"), hir_uclass(&[]),);
2982 #[cfg(feature = "unicode-case")]
2983 assert_eq!(t(r"(?i)[^a-c[^c]]"), hir_uclass(&[]),);
2984 }
2985
2986 #[test]
2987 fn class_bracketed_intersect() {
2988 assert_eq!(t("[abc&&b-c]"), hir_uclass(&[('b', 'c')]));
2989 assert_eq!(t("[abc&&[b-c]]"), hir_uclass(&[('b', 'c')]));
2990 assert_eq!(t("[[abc]&&[b-c]]"), hir_uclass(&[('b', 'c')]));
2991 assert_eq!(t("[a-z&&b-y&&c-x]"), hir_uclass(&[('c', 'x')]));
2992 assert_eq!(t("[c-da-b&&a-d]"), hir_uclass(&[('a', 'd')]));
2993 assert_eq!(t("[a-d&&c-da-b]"), hir_uclass(&[('a', 'd')]));
2994 assert_eq!(t(r"[a-z&&a-c]"), hir_uclass(&[('a', 'c')]));
2995 assert_eq!(t(r"[[a-z&&a-c]]"), hir_uclass(&[('a', 'c')]));
2996 assert_eq!(t(r"[^[a-z&&a-c]]"), hir_negate(hir_uclass(&[('a', 'c')])));
2997
2998 assert_eq!(t("(?-u)[abc&&b-c]"), hir_bclass(&[(b'b', b'c')]));
2999 assert_eq!(t("(?-u)[abc&&[b-c]]"), hir_bclass(&[(b'b', b'c')]));
3000 assert_eq!(t("(?-u)[[abc]&&[b-c]]"), hir_bclass(&[(b'b', b'c')]));
3001 assert_eq!(t("(?-u)[a-z&&b-y&&c-x]"), hir_bclass(&[(b'c', b'x')]));
3002 assert_eq!(t("(?-u)[c-da-b&&a-d]"), hir_bclass(&[(b'a', b'd')]));
3003 assert_eq!(t("(?-u)[a-d&&c-da-b]"), hir_bclass(&[(b'a', b'd')]));
3004
3005 #[cfg(feature = "unicode-case")]
3006 assert_eq!(
3007 t("(?i)[abc&&b-c]"),
3008 hir_case_fold(hir_uclass(&[('b', 'c')]))
3009 );
3010 #[cfg(feature = "unicode-case")]
3011 assert_eq!(
3012 t("(?i)[abc&&[b-c]]"),
3013 hir_case_fold(hir_uclass(&[('b', 'c')]))
3014 );
3015 #[cfg(feature = "unicode-case")]
3016 assert_eq!(
3017 t("(?i)[[abc]&&[b-c]]"),
3018 hir_case_fold(hir_uclass(&[('b', 'c')]))
3019 );
3020 #[cfg(feature = "unicode-case")]
3021 assert_eq!(
3022 t("(?i)[a-z&&b-y&&c-x]"),
3023 hir_case_fold(hir_uclass(&[('c', 'x')]))
3024 );
3025 #[cfg(feature = "unicode-case")]
3026 assert_eq!(
3027 t("(?i)[c-da-b&&a-d]"),
3028 hir_case_fold(hir_uclass(&[('a', 'd')]))
3029 );
3030 #[cfg(feature = "unicode-case")]
3031 assert_eq!(
3032 t("(?i)[a-d&&c-da-b]"),
3033 hir_case_fold(hir_uclass(&[('a', 'd')]))
3034 );
3035
3036 assert_eq!(
3037 t("(?i-u)[abc&&b-c]"),
3038 hir_case_fold(hir_bclass(&[(b'b', b'c')]))
3039 );
3040 assert_eq!(
3041 t("(?i-u)[abc&&[b-c]]"),
3042 hir_case_fold(hir_bclass(&[(b'b', b'c')]))
3043 );
3044 assert_eq!(
3045 t("(?i-u)[[abc]&&[b-c]]"),
3046 hir_case_fold(hir_bclass(&[(b'b', b'c')]))
3047 );
3048 assert_eq!(
3049 t("(?i-u)[a-z&&b-y&&c-x]"),
3050 hir_case_fold(hir_bclass(&[(b'c', b'x')]))
3051 );
3052 assert_eq!(
3053 t("(?i-u)[c-da-b&&a-d]"),
3054 hir_case_fold(hir_bclass(&[(b'a', b'd')]))
3055 );
3056 assert_eq!(
3057 t("(?i-u)[a-d&&c-da-b]"),
3058 hir_case_fold(hir_bclass(&[(b'a', b'd')]))
3059 );
3060
3061 assert_eq!(t(r"[\^&&^]"), hir_uclass(&[('^', '^')]));
3064 assert_eq!(t(r"[]&&\]]"), hir_uclass(&[(']', ']')]));
3066 assert_eq!(t(r"[-&&-]"), hir_uclass(&[('-', '-')]));
3067 assert_eq!(t(r"[\&&&&]"), hir_uclass(&[('&', '&')]));
3068 assert_eq!(t(r"[\&&&\&]"), hir_uclass(&[('&', '&')]));
3069 assert_eq!(
3071 t(r"[a-w&&[^c-g]z]"),
3072 hir_uclass(&[('a', 'b'), ('h', 'w')])
3073 );
3074 }
3075
3076 #[test]
3077 fn class_bracketed_intersect_negate() {
3078 #[cfg(feature = "unicode-perl")]
3079 assert_eq!(
3080 t(r"[^\w&&\d]"),
3081 hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
3082 );
3083 assert_eq!(t(r"[^[a-z&&a-c]]"), hir_negate(hir_uclass(&[('a', 'c')])));
3084 #[cfg(feature = "unicode-perl")]
3085 assert_eq!(
3086 t(r"[^[\w&&\d]]"),
3087 hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
3088 );
3089 #[cfg(feature = "unicode-perl")]
3090 assert_eq!(
3091 t(r"[^[^\w&&\d]]"),
3092 hir_uclass_query(ClassQuery::Binary("digit"))
3093 );
3094 #[cfg(feature = "unicode-perl")]
3095 assert_eq!(t(r"[[[^\w]&&[^\d]]]"), hir_negate(hir_uclass_perl_word()));
3096
3097 #[cfg(feature = "unicode-perl")]
3098 assert_eq!(
3099 t_bytes(r"(?-u)[^\w&&\d]"),
3100 hir_negate(hir_ascii_bclass(&ast::ClassAsciiKind::Digit))
3101 );
3102 assert_eq!(
3103 t_bytes(r"(?-u)[^[a-z&&a-c]]"),
3104 hir_negate(hir_bclass(&[(b'a', b'c')]))
3105 );
3106 assert_eq!(
3107 t_bytes(r"(?-u)[^[\w&&\d]]"),
3108 hir_negate(hir_ascii_bclass(&ast::ClassAsciiKind::Digit))
3109 );
3110 assert_eq!(
3111 t_bytes(r"(?-u)[^[^\w&&\d]]"),
3112 hir_ascii_bclass(&ast::ClassAsciiKind::Digit)
3113 );
3114 assert_eq!(
3115 t_bytes(r"(?-u)[[[^\w]&&[^\d]]]"),
3116 hir_negate(hir_ascii_bclass(&ast::ClassAsciiKind::Word))
3117 );
3118 }
3119
3120 #[test]
3121 fn class_bracketed_difference() {
3122 #[cfg(feature = "unicode-gencat")]
3123 assert_eq!(
3124 t(r"[\pL--[:ascii:]]"),
3125 hir_difference(
3126 hir_uclass_query(ClassQuery::Binary("letter")),
3127 hir_uclass(&[('\0', '\x7F')])
3128 )
3129 );
3130
3131 assert_eq!(
3132 t(r"(?-u)[[:alpha:]--[:lower:]]"),
3133 hir_bclass(&[(b'A', b'Z')])
3134 );
3135 }
3136
3137 #[test]
3138 fn class_bracketed_symmetric_difference() {
3139 #[cfg(feature = "unicode-script")]
3140 assert_eq!(
3141 t(r"[\p{sc:Greek}~~\p{scx:Greek}]"),
3142 hir_uclass(&[
3156 ('·', '·'),
3157 ('\u{0300}', '\u{0301}'),
3158 ('\u{0304}', '\u{0304}'),
3159 ('\u{0306}', '\u{0306}'),
3160 ('\u{0308}', '\u{0308}'),
3161 ('\u{0313}', '\u{0313}'),
3162 ('\u{0342}', '\u{0342}'),
3163 ('\u{0345}', '\u{0345}'),
3164 ('ʹ', 'ʹ'),
3165 ('\u{1DC0}', '\u{1DC1}'),
3166 ('⁝', '⁝'),
3167 ])
3168 );
3169 assert_eq!(t(r"[a-g~~c-j]"), hir_uclass(&[('a', 'b'), ('h', 'j')]));
3170
3171 assert_eq!(
3172 t(r"(?-u)[a-g~~c-j]"),
3173 hir_bclass(&[(b'a', b'b'), (b'h', b'j')])
3174 );
3175 }
3176
3177 #[test]
3178 fn ignore_whitespace() {
3179 assert_eq!(t(r"(?x)\12 3"), hir_lit("\n3"));
3180 assert_eq!(t(r"(?x)\x { 53 }"), hir_lit("S"));
3181 assert_eq!(
3182 t(r"(?x)\x # comment
3183{ # comment
3184 53 # comment
3185} #comment"),
3186 hir_lit("S")
3187 );
3188
3189 assert_eq!(t(r"(?x)\x 53"), hir_lit("S"));
3190 assert_eq!(
3191 t(r"(?x)\x # comment
3192 53 # comment"),
3193 hir_lit("S")
3194 );
3195 assert_eq!(t(r"(?x)\x5 3"), hir_lit("S"));
3196
3197 #[cfg(feature = "unicode-gencat")]
3198 assert_eq!(
3199 t(r"(?x)\p # comment
3200{ # comment
3201 Separator # comment
3202} # comment"),
3203 hir_uclass_query(ClassQuery::Binary("separator"))
3204 );
3205
3206 assert_eq!(
3207 t(r"(?x)a # comment
3208{ # comment
3209 5 # comment
3210 , # comment
3211 10 # comment
3212} # comment"),
3213 hir_range(true, 5, Some(10), hir_lit("a"))
3214 );
3215
3216 assert_eq!(t(r"(?x)a\ # hi there"), hir_lit("a "));
3217 }
3218
3219 #[test]
3220 fn analysis_is_utf8() {
3221 assert!(props_bytes(r"a").is_utf8());
3223 assert!(props_bytes(r"ab").is_utf8());
3224 assert!(props_bytes(r"(?-u)a").is_utf8());
3225 assert!(props_bytes(r"(?-u)ab").is_utf8());
3226 assert!(props_bytes(r"\xFF").is_utf8());
3227 assert!(props_bytes(r"\xFF\xFF").is_utf8());
3228 assert!(props_bytes(r"[^a]").is_utf8());
3229 assert!(props_bytes(r"[^a][^a]").is_utf8());
3230 assert!(props_bytes(r"\b").is_utf8());
3231 assert!(props_bytes(r"\B").is_utf8());
3232 assert!(props_bytes(r"(?-u)\b").is_utf8());
3233 assert!(props_bytes(r"(?-u)\B").is_utf8());
3234
3235 assert!(!props_bytes(r"(?-u)\xFF").is_utf8());
3237 assert!(!props_bytes(r"(?-u)\xFF\xFF").is_utf8());
3238 assert!(!props_bytes(r"(?-u)[^a]").is_utf8());
3239 assert!(!props_bytes(r"(?-u)[^a][^a]").is_utf8());
3240 }
3241
3242 #[test]
3243 fn analysis_captures_len() {
3244 assert_eq!(0, props(r"a").explicit_captures_len());
3245 assert_eq!(0, props(r"(?:a)").explicit_captures_len());
3246 assert_eq!(0, props(r"(?i-u:a)").explicit_captures_len());
3247 assert_eq!(0, props(r"(?i-u)a").explicit_captures_len());
3248 assert_eq!(1, props(r"(a)").explicit_captures_len());
3249 assert_eq!(1, props(r"(?P<foo>a)").explicit_captures_len());
3250 assert_eq!(1, props(r"()").explicit_captures_len());
3251 assert_eq!(1, props(r"()a").explicit_captures_len());
3252 assert_eq!(1, props(r"(a)+").explicit_captures_len());
3253 assert_eq!(2, props(r"(a)(b)").explicit_captures_len());
3254 assert_eq!(2, props(r"(a)|(b)").explicit_captures_len());
3255 assert_eq!(2, props(r"((a))").explicit_captures_len());
3256 assert_eq!(1, props(r"([a&&b])").explicit_captures_len());
3257 }
3258
3259 #[test]
3260 fn analysis_static_captures_len() {
3261 let len = |pattern| props(pattern).static_explicit_captures_len();
3262 assert_eq!(Some(0), len(r""));
3263 assert_eq!(Some(0), len(r"foo|bar"));
3264 assert_eq!(None, len(r"(foo)|bar"));
3265 assert_eq!(None, len(r"foo|(bar)"));
3266 assert_eq!(Some(1), len(r"(foo|bar)"));
3267 assert_eq!(Some(1), len(r"(a|b|c|d|e|f)"));
3268 assert_eq!(Some(1), len(r"(a)|(b)|(c)|(d)|(e)|(f)"));
3269 assert_eq!(Some(2), len(r"(a)(b)|(c)(d)|(e)(f)"));
3270 assert_eq!(Some(6), len(r"(a)(b)(c)(d)(e)(f)"));
3271 assert_eq!(Some(3), len(r"(a)(b)(extra)|(a)(b)()"));
3272 assert_eq!(Some(3), len(r"(a)(b)((?:extra)?)"));
3273 assert_eq!(None, len(r"(a)(b)(extra)?"));
3274 assert_eq!(Some(1), len(r"(foo)|(bar)"));
3275 assert_eq!(Some(2), len(r"(foo)(bar)"));
3276 assert_eq!(Some(2), len(r"(foo)+(bar)"));
3277 assert_eq!(None, len(r"(foo)*(bar)"));
3278 assert_eq!(Some(0), len(r"(foo)?{0}"));
3279 assert_eq!(None, len(r"(foo)?{1}"));
3280 assert_eq!(Some(1), len(r"(foo){1}"));
3281 assert_eq!(Some(1), len(r"(foo){1,}"));
3282 assert_eq!(Some(1), len(r"(foo){1,}?"));
3283 assert_eq!(None, len(r"(foo){1,}??"));
3284 assert_eq!(None, len(r"(foo){0,}"));
3285 assert_eq!(Some(1), len(r"(foo)(?:bar)"));
3286 assert_eq!(Some(2), len(r"(foo(?:bar)+)(?:baz(boo))"));
3287 assert_eq!(Some(2), len(r"(?P<bar>foo)(?:bar)(bal|loon)"));
3288 assert_eq!(
3289 Some(2),
3290 len(r#"<(a)[^>]+href="([^"]+)"|<(img)[^>]+src="([^"]+)""#)
3291 );
3292 }
3293
3294 #[test]
3295 fn analysis_is_all_assertions() {
3296 let p = props(r"\b");
3298 assert!(!p.look_set().is_empty());
3299 assert_eq!(p.minimum_len(), Some(0));
3300
3301 let p = props(r"\B");
3302 assert!(!p.look_set().is_empty());
3303 assert_eq!(p.minimum_len(), Some(0));
3304
3305 let p = props(r"^");
3306 assert!(!p.look_set().is_empty());
3307 assert_eq!(p.minimum_len(), Some(0));
3308
3309 let p = props(r"$");
3310 assert!(!p.look_set().is_empty());
3311 assert_eq!(p.minimum_len(), Some(0));
3312
3313 let p = props(r"\A");
3314 assert!(!p.look_set().is_empty());
3315 assert_eq!(p.minimum_len(), Some(0));
3316
3317 let p = props(r"\z");
3318 assert!(!p.look_set().is_empty());
3319 assert_eq!(p.minimum_len(), Some(0));
3320
3321 let p = props(r"$^\z\A\b\B");
3322 assert!(!p.look_set().is_empty());
3323 assert_eq!(p.minimum_len(), Some(0));
3324
3325 let p = props(r"$|^|\z|\A|\b|\B");
3326 assert!(!p.look_set().is_empty());
3327 assert_eq!(p.minimum_len(), Some(0));
3328
3329 let p = props(r"^$|$^");
3330 assert!(!p.look_set().is_empty());
3331 assert_eq!(p.minimum_len(), Some(0));
3332
3333 let p = props(r"((\b)+())*^");
3334 assert!(!p.look_set().is_empty());
3335 assert_eq!(p.minimum_len(), Some(0));
3336
3337 let p = props(r"^a");
3339 assert!(!p.look_set().is_empty());
3340 assert_eq!(p.minimum_len(), Some(1));
3341 }
3342
3343 #[test]
3344 fn analysis_look_set_prefix_any() {
3345 let p = props(r"(?-u)(?i:(?:\b|_)win(?:32|64|dows)?(?:\b|_))");
3346 assert!(p.look_set_prefix_any().contains(Look::WordAscii));
3347 }
3348
3349 #[test]
3350 fn analysis_is_anchored() {
3351 let is_start = |p| props(p).look_set_prefix().contains(Look::Start);
3352 let is_end = |p| props(p).look_set_suffix().contains(Look::End);
3353
3354 assert!(is_start(r"^"));
3356 assert!(is_end(r"$"));
3357
3358 assert!(is_start(r"^^"));
3359 assert!(props(r"$$").look_set_suffix().contains(Look::End));
3360
3361 assert!(is_start(r"^$"));
3362 assert!(is_end(r"^$"));
3363
3364 assert!(is_start(r"^foo"));
3365 assert!(is_end(r"foo$"));
3366
3367 assert!(is_start(r"^foo|^bar"));
3368 assert!(is_end(r"foo$|bar$"));
3369
3370 assert!(is_start(r"^(foo|bar)"));
3371 assert!(is_end(r"(foo|bar)$"));
3372
3373 assert!(is_start(r"^+"));
3374 assert!(is_end(r"$+"));
3375 assert!(is_start(r"^++"));
3376 assert!(is_end(r"$++"));
3377 assert!(is_start(r"(^)+"));
3378 assert!(is_end(r"($)+"));
3379
3380 assert!(is_start(r"$^"));
3381 assert!(is_start(r"$^"));
3382 assert!(is_start(r"$^|^$"));
3383 assert!(is_end(r"$^|^$"));
3384
3385 assert!(is_start(r"\b^"));
3386 assert!(is_end(r"$\b"));
3387 assert!(is_start(r"^(?m:^)"));
3388 assert!(is_end(r"(?m:$)$"));
3389 assert!(is_start(r"(?m:^)^"));
3390 assert!(is_end(r"$(?m:$)"));
3391
3392 assert!(!is_start(r"(?m)^"));
3394 assert!(!is_end(r"(?m)$"));
3395 assert!(!is_start(r"(?m:^$)|$^"));
3396 assert!(!is_end(r"(?m:^$)|$^"));
3397 assert!(!is_start(r"$^|(?m:^$)"));
3398 assert!(!is_end(r"$^|(?m:^$)"));
3399
3400 assert!(!is_start(r"a^"));
3401 assert!(!is_start(r"$a"));
3402
3403 assert!(!is_end(r"a^"));
3404 assert!(!is_end(r"$a"));
3405
3406 assert!(!is_start(r"^foo|bar"));
3407 assert!(!is_end(r"foo|bar$"));
3408
3409 assert!(!is_start(r"^*"));
3410 assert!(!is_end(r"$*"));
3411 assert!(!is_start(r"^*+"));
3412 assert!(!is_end(r"$*+"));
3413 assert!(!is_start(r"^+*"));
3414 assert!(!is_end(r"$+*"));
3415 assert!(!is_start(r"(^)*"));
3416 assert!(!is_end(r"($)*"));
3417 }
3418
3419 #[test]
3420 fn analysis_is_any_anchored() {
3421 let is_start = |p| props(p).look_set().contains(Look::Start);
3422 let is_end = |p| props(p).look_set().contains(Look::End);
3423
3424 assert!(is_start(r"^"));
3426 assert!(is_end(r"$"));
3427 assert!(is_start(r"\A"));
3428 assert!(is_end(r"\z"));
3429
3430 assert!(!is_start(r"(?m)^"));
3432 assert!(!is_end(r"(?m)$"));
3433 assert!(!is_start(r"$"));
3434 assert!(!is_end(r"^"));
3435 }
3436
3437 #[test]
3438 fn analysis_can_empty() {
3439 let assert_empty =
3441 |p| assert_eq!(Some(0), props_bytes(p).minimum_len());
3442 assert_empty(r"");
3443 assert_empty(r"()");
3444 assert_empty(r"()*");
3445 assert_empty(r"()+");
3446 assert_empty(r"()?");
3447 assert_empty(r"a*");
3448 assert_empty(r"a?");
3449 assert_empty(r"a{0}");
3450 assert_empty(r"a{0,}");
3451 assert_empty(r"a{0,1}");
3452 assert_empty(r"a{0,10}");
3453 #[cfg(feature = "unicode-gencat")]
3454 assert_empty(r"\pL*");
3455 assert_empty(r"a*|b");
3456 assert_empty(r"b|a*");
3457 assert_empty(r"a|");
3458 assert_empty(r"|a");
3459 assert_empty(r"a||b");
3460 assert_empty(r"a*a?(abcd)*");
3461 assert_empty(r"^");
3462 assert_empty(r"$");
3463 assert_empty(r"(?m)^");
3464 assert_empty(r"(?m)$");
3465 assert_empty(r"\A");
3466 assert_empty(r"\z");
3467 assert_empty(r"\B");
3468 assert_empty(r"(?-u)\B");
3469 assert_empty(r"\b");
3470 assert_empty(r"(?-u)\b");
3471
3472 let assert_non_empty =
3474 |p| assert_ne!(Some(0), props_bytes(p).minimum_len());
3475 assert_non_empty(r"a+");
3476 assert_non_empty(r"a{1}");
3477 assert_non_empty(r"a{1,}");
3478 assert_non_empty(r"a{1,2}");
3479 assert_non_empty(r"a{1,10}");
3480 assert_non_empty(r"b|a");
3481 assert_non_empty(r"a*a+(abcd)*");
3482 #[cfg(feature = "unicode-gencat")]
3483 assert_non_empty(r"\P{any}");
3484 assert_non_empty(r"[a--a]");
3485 assert_non_empty(r"[a&&b]");
3486 }
3487
3488 #[test]
3489 fn analysis_is_literal() {
3490 assert!(props(r"a").is_literal());
3492 assert!(props(r"ab").is_literal());
3493 assert!(props(r"abc").is_literal());
3494 assert!(props(r"(?m)abc").is_literal());
3495 assert!(props(r"(?:a)").is_literal());
3496 assert!(props(r"foo(?:a)").is_literal());
3497 assert!(props(r"(?:a)foo").is_literal());
3498 assert!(props(r"[a]").is_literal());
3499
3500 assert!(!props(r"").is_literal());
3502 assert!(!props(r"^").is_literal());
3503 assert!(!props(r"a|b").is_literal());
3504 assert!(!props(r"(a)").is_literal());
3505 assert!(!props(r"a+").is_literal());
3506 assert!(!props(r"foo(a)").is_literal());
3507 assert!(!props(r"(a)foo").is_literal());
3508 assert!(!props(r"[ab]").is_literal());
3509 }
3510
3511 #[test]
3512 fn analysis_is_alternation_literal() {
3513 assert!(props(r"a").is_alternation_literal());
3515 assert!(props(r"ab").is_alternation_literal());
3516 assert!(props(r"abc").is_alternation_literal());
3517 assert!(props(r"(?m)abc").is_alternation_literal());
3518 assert!(props(r"foo|bar").is_alternation_literal());
3519 assert!(props(r"foo|bar|baz").is_alternation_literal());
3520 assert!(props(r"[a]").is_alternation_literal());
3521 assert!(props(r"(?:ab)|cd").is_alternation_literal());
3522 assert!(props(r"ab|(?:cd)").is_alternation_literal());
3523
3524 assert!(!props(r"").is_alternation_literal());
3526 assert!(!props(r"^").is_alternation_literal());
3527 assert!(!props(r"(a)").is_alternation_literal());
3528 assert!(!props(r"a+").is_alternation_literal());
3529 assert!(!props(r"foo(a)").is_alternation_literal());
3530 assert!(!props(r"(a)foo").is_alternation_literal());
3531 assert!(!props(r"[ab]").is_alternation_literal());
3532 assert!(!props(r"[ab]|b").is_alternation_literal());
3533 assert!(!props(r"a|[ab]").is_alternation_literal());
3534 assert!(!props(r"(a)|b").is_alternation_literal());
3535 assert!(!props(r"a|(b)").is_alternation_literal());
3536 assert!(!props(r"a|b").is_alternation_literal());
3537 assert!(!props(r"a|b|c").is_alternation_literal());
3538 assert!(!props(r"[a]|b").is_alternation_literal());
3539 assert!(!props(r"a|[b]").is_alternation_literal());
3540 assert!(!props(r"(?:a)|b").is_alternation_literal());
3541 assert!(!props(r"a|(?:b)").is_alternation_literal());
3542 assert!(!props(r"(?:z|xx)@|xx").is_alternation_literal());
3543 }
3544
3545 #[test]
3548 fn smart_repetition() {
3549 assert_eq!(t(r"a{0}"), Hir::empty());
3550 assert_eq!(t(r"a{1}"), hir_lit("a"));
3551 assert_eq!(t(r"\B{32111}"), hir_look(hir::Look::WordUnicodeNegate));
3552 }
3553
3554 #[test]
3557 fn smart_concat() {
3558 assert_eq!(t(""), Hir::empty());
3559 assert_eq!(t("(?:)"), Hir::empty());
3560 assert_eq!(t("abc"), hir_lit("abc"));
3561 assert_eq!(t("(?:foo)(?:bar)"), hir_lit("foobar"));
3562 assert_eq!(t("quux(?:foo)(?:bar)baz"), hir_lit("quuxfoobarbaz"));
3563 assert_eq!(
3564 t("foo(?:bar^baz)quux"),
3565 hir_cat(vec![
3566 hir_lit("foobar"),
3567 hir_look(hir::Look::Start),
3568 hir_lit("bazquux"),
3569 ])
3570 );
3571 assert_eq!(
3572 t("foo(?:ba(?:r^b)az)quux"),
3573 hir_cat(vec![
3574 hir_lit("foobar"),
3575 hir_look(hir::Look::Start),
3576 hir_lit("bazquux"),
3577 ])
3578 );
3579 }
3580
3581 #[test]
3584 fn smart_alternation() {
3585 assert_eq!(
3586 t("(?:foo)|(?:bar)"),
3587 hir_alt(vec![hir_lit("foo"), hir_lit("bar")])
3588 );
3589 assert_eq!(
3590 t("quux|(?:abc|def|xyz)|baz"),
3591 hir_alt(vec![
3592 hir_lit("quux"),
3593 hir_lit("abc"),
3594 hir_lit("def"),
3595 hir_lit("xyz"),
3596 hir_lit("baz"),
3597 ])
3598 );
3599 assert_eq!(
3600 t("quux|(?:abc|(?:def|mno)|xyz)|baz"),
3601 hir_alt(vec![
3602 hir_lit("quux"),
3603 hir_lit("abc"),
3604 hir_lit("def"),
3605 hir_lit("mno"),
3606 hir_lit("xyz"),
3607 hir_lit("baz"),
3608 ])
3609 );
3610 assert_eq!(
3611 t("a|b|c|d|e|f|x|y|z"),
3612 hir_uclass(&[('a', 'f'), ('x', 'z')]),
3613 );
3614 assert_eq!(
3616 t("[A-Z]foo|[A-Z]quux"),
3617 hir_cat(vec![
3618 hir_uclass(&[('A', 'Z')]),
3619 hir_alt(vec![hir_lit("foo"), hir_lit("quux")]),
3620 ]),
3621 );
3622 assert_eq!(
3623 t("[A-Z][A-Z]|[A-Z]quux"),
3624 hir_cat(vec![
3625 hir_uclass(&[('A', 'Z')]),
3626 hir_alt(vec![hir_uclass(&[('A', 'Z')]), hir_lit("quux")]),
3627 ]),
3628 );
3629 assert_eq!(
3630 t("[A-Z][A-Z]|[A-Z][A-Z]quux"),
3631 hir_cat(vec![
3632 hir_uclass(&[('A', 'Z')]),
3633 hir_uclass(&[('A', 'Z')]),
3634 hir_alt(vec![Hir::empty(), hir_lit("quux")]),
3635 ]),
3636 );
3637 assert_eq!(
3638 t("[A-Z]foo|[A-Z]foobar"),
3639 hir_cat(vec![
3640 hir_uclass(&[('A', 'Z')]),
3641 hir_alt(vec![hir_lit("foo"), hir_lit("foobar")]),
3642 ]),
3643 );
3644 }
3645
3646 #[test]
3647 fn regression_alt_empty_concat() {
3648 use crate::ast::{self, Ast};
3649
3650 let span = Span::splat(Position::new(0, 0, 0));
3651 let ast = Ast::alternation(ast::Alternation {
3652 span,
3653 asts: vec![Ast::concat(ast::Concat { span, asts: vec![] })],
3654 });
3655
3656 let mut t = Translator::new();
3657 assert_eq!(Ok(Hir::empty()), t.translate("", &ast));
3658 }
3659
3660 #[test]
3661 fn regression_empty_alt() {
3662 use crate::ast::{self, Ast};
3663
3664 let span = Span::splat(Position::new(0, 0, 0));
3665 let ast = Ast::concat(ast::Concat {
3666 span,
3667 asts: vec![Ast::alternation(ast::Alternation {
3668 span,
3669 asts: vec![],
3670 })],
3671 });
3672
3673 let mut t = Translator::new();
3674 assert_eq!(Ok(Hir::fail()), t.translate("", &ast));
3675 }
3676
3677 #[test]
3678 fn regression_singleton_alt() {
3679 use crate::{
3680 ast::{self, Ast},
3681 hir::Dot,
3682 };
3683
3684 let span = Span::splat(Position::new(0, 0, 0));
3685 let ast = Ast::concat(ast::Concat {
3686 span,
3687 asts: vec![Ast::alternation(ast::Alternation {
3688 span,
3689 asts: vec![Ast::dot(span)],
3690 })],
3691 });
3692
3693 let mut t = Translator::new();
3694 assert_eq!(Ok(Hir::dot(Dot::AnyCharExceptLF)), t.translate("", &ast));
3695 }
3696
3697 #[test]
3699 fn regression_fuzz_match() {
3700 let pat = "[(\u{6} \0-\u{afdf5}] \0 ";
3701 let ast = ParserBuilder::new()
3702 .octal(false)
3703 .ignore_whitespace(true)
3704 .build()
3705 .parse(pat)
3706 .unwrap();
3707 let hir = TranslatorBuilder::new()
3708 .utf8(true)
3709 .case_insensitive(false)
3710 .multi_line(false)
3711 .dot_matches_new_line(false)
3712 .swap_greed(true)
3713 .unicode(true)
3714 .build()
3715 .translate(pat, &ast)
3716 .unwrap();
3717 assert_eq!(
3718 hir,
3719 Hir::concat(vec![
3720 hir_uclass(&[('\0', '\u{afdf5}')]),
3721 hir_lit("\0"),
3722 ])
3723 );
3724 }
3725
3726 #[cfg(feature = "unicode")]
3728 #[test]
3729 fn regression_fuzz_difference1() {
3730 let pat = r"\W\W|\W[^\v--\W\W\P{Script_Extensions:Pau_Cin_Hau}\u10A1A1-\U{3E3E3}--~~~~--~~~~~~~~------~~~~~~--~~~~~~]*";
3731 let _ = t(pat); }
3733
3734 #[test]
3736 fn regression_fuzz_char_decrement1() {
3737 let pat = "w[w[^w?\rw\rw[^w?\rw[^w?\rw[^w?\rw[^w?\rw[^w?\rw[^w?\r\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0w?\rw[^w?\rw[^w?\rw[^w\0\0\u{1}\0]\0\0-*\0]\0\0\0\0\0\0\u{1}\0]\0\0-*\0]\0\0\0\0\0\u{1}\0]\0\0\0\0\0\0\0\0\0*\0\0\u{1}\0]\0\0-*\0][^w?\rw[^w?\rw[^w?\rw[^w?\rw[^w?\rw[^w?\rw[^w\0\0\u{1}\0]\0\0-*\0]\0\0\0\0\0\0\u{1}\0]\0\0-*\0]\0\0\0\0\0\u{1}\0]\0\0\0\0\0\0\0\0\0x\0\0\u{1}\0]\0\0-*\0]\0\0\0\0\0\0\0\0\0*??\0\u{7f}{2}\u{10}??\0\0\0\0\0\0\0\0\0\u{3}\0\0\0}\0-*\0]\0\0\0\0\0\0\u{1}\0]\0\0-*\0]\0\0\0\0\0\0\u{1}\0]\0\0-*\0]\0\0\0\0\0\u{1}\0]\0\0-*\0]\0\0\0\0\0\0\0\u{1}\0]\0\u{1}\u{1}H-i]-]\0\0\0\0\u{1}\0]\0\0\0\u{1}\0]\0\0-*\0\0\0\0\u{1}9-\u{7f}]\0'|-\u{7f}]\0'|(?i-ux)[-\u{7f}]\0'\u{3}\0\0\0}\0-*\0]<D\0\0\0\0\0\0\u{1}]\0\0\0\0]\0\0-*\0]\0\0 ";
3738 let _ = t(pat); }
3740}