1#![allow(dead_code)]
2use std::fmt::Write;
3use std::iter::Iterator;
4
5#[derive(Debug)]
7pub struct Tokenizer<'a> {
8 input: &'a str,
9 chars: std::str::Chars<'a>,
10 c: Option<char>,
11 p: usize,
12}
13
14#[derive(Debug, PartialEq, Eq)]
15#[non_exhaustive]
16pub enum Token<'a> {
17 Quoted(&'a str),
18 Unquoted(&'a str),
19 Space(&'a str),
20 Punctuation(&'a str),
21 Comment(&'a str),
22}
23
24impl<'a> Tokenizer<'a> {
25 pub fn new(string: &'a str) -> Self {
26 let mut chars = string.chars();
27 let c = chars.next();
28 Self {
29 input: string,
30 chars,
31 c,
32 p: 0,
33 }
34 }
35
36 pub fn iter(self) -> impl Iterator<Item = Token<'a>> {
37 self
38 }
39
40 fn get(&self) -> char {
41 self.c.unwrap()
42 }
43
44 fn peek(&self) -> char {
45 self.c.unwrap_or('\0')
46 }
47
48 fn inc(&mut self) {
49 let c = self.get();
50 self.c = self.chars.next();
51 self.p += c.len_utf8();
52 }
53
54 fn end(&self) -> bool {
55 self.c.is_none()
56 }
57
58 fn p_c(&self, c: char) -> usize {
59 self.p + c.len_utf8()
60 }
61
62 fn space(&mut self) -> Option<Token<'a>> {
63 let a = self.p;
64 let mut b = a;
65
66 while !self.end() {
67 let c = self.get();
68 if Self::is_space(c) {
69 b = self.p_c(c);
70 } else {
71 break;
72 }
73 self.inc();
74 }
75
76 if a != b {
77 Some(Token::Space(&self.input[a..b]))
78 } else {
79 None
80 }
81 }
82
83 fn unquoted(&mut self) -> Option<Token<'a>> {
84 let a = self.p;
85 let mut b = a;
86
87 let mut first = true;
88 while !self.end() {
89 let c = self.get();
90 if Self::is_alphanumeric(c) {
91 b = self.p_c(c);
92 first = false;
93 self.inc();
94 } else if !first && Self::is_identifier(c) {
95 b = self.p_c(c);
96 self.inc();
97 } else {
98 break;
99 }
100 }
101
102 if a != b {
103 Some(Token::Unquoted(&self.input[a..b]))
104 } else {
105 None
106 }
107 }
108
109 fn quoted(&mut self) -> Option<Token<'a>> {
110 let a = self.p;
111 let mut b = a;
112
113 let mut first = true;
114 let mut escape = false;
115 let mut start = ' ';
116 while !self.end() {
117 let c = self.get();
118 if first && Self::is_string_delimiter_start(c) {
119 b = self.p_c(c);
120 first = false;
121 start = c;
122 self.inc();
123 } else if !first && !escape && Self::is_string_delimiter_end_for(start, c) {
124 b = self.p_c(c);
125 self.inc();
126 if self.end() {
127 break;
128 }
129 if !Self::is_string_escape_for(start, self.get()) {
130 break;
131 } else {
132 b = self.p_c(c);
133 self.inc();
134 }
135 } else if !first {
136 escape = !escape && Self::is_escape_char(c);
137 b = self.p_c(c);
138 self.inc();
139 } else {
140 break;
141 }
142 }
143 if a != b {
144 Some(Token::Quoted(&self.input[a..b]))
145 } else {
146 None
147 }
148 }
149
150 fn unquote(mut self) -> String {
152 let mut string = String::new();
153 let mut first = true;
154 let mut escape = false;
155 let mut start = ' ';
156 while !self.end() {
157 let c = self.get();
158 if first && Self::is_string_delimiter_start(c) {
159 first = false;
160 start = c;
161 self.inc();
162 } else if !first && !escape && Self::is_string_delimiter_end_for(start, c) {
163 self.inc();
164 if self.end() {
165 break;
166 }
167 if !Self::is_string_escape_for(start, self.get()) {
168 break;
169 } else {
170 string.write_char(c).unwrap();
171 self.inc();
172 }
173 } else if !first {
174 escape = !escape && Self::is_escape_char(c);
175 string.write_char(c).unwrap();
176 self.inc();
177 } else {
178 break;
179 }
180 }
181 string
182 }
183
184 fn punctuation(&mut self) -> Option<Token<'a>> {
185 let a = self.p;
186 let mut b = a;
187
188 if !self.end() {
189 let c = self.get();
190 if !Self::is_space(c) && !Self::is_alphanumeric(c) {
191 b = self.p_c(c);
192 self.inc();
193 }
194 }
195
196 if a != b {
197 let string = &self.input[a..b];
198 if string == "-" && self.peek() == '-' {
199 b = self.p_c('-');
200 self.inc();
201 while !self.end() {
202 let c = self.get();
203 if c == '\n' {
204 break;
205 } else {
206 b = self.p_c(c);
207 }
208 self.inc();
209 }
210 let string = &self.input[a..b];
211 return Some(Token::Comment(string));
212 } else if string == "/" && self.peek() == '*' {
213 b = self.p_c('*');
214 self.inc();
215 while !self.end() {
216 let c = self.get();
217 b = self.p_c(c);
218 self.inc();
219 if c == '*' && self.peek() == '/' {
220 b = self.p_c('/');
221 self.inc();
222 break;
223 }
224 }
225 let string = &self.input[a..b];
226 return Some(Token::Comment(string));
227 }
228 Some(Token::Punctuation(string))
229 } else {
230 None
231 }
232 }
233
234 fn is_space(c: char) -> bool {
235 matches!(c, ' ' | '\t' | '\r' | '\n')
236 }
237
238 fn is_identifier(c: char) -> bool {
239 matches!(c, '_' | '$')
240 }
241
242 fn is_alphanumeric(c: char) -> bool {
243 c.is_alphabetic() || c.is_ascii_digit()
244 }
245
246 fn is_string_delimiter_start(c: char) -> bool {
247 matches!(c, '`' | '[' | '\'' | '"')
248 }
249
250 fn is_string_escape_for(start: char, c: char) -> bool {
251 match start {
252 '`' => c == '`',
253 '\'' => c == '\'',
254 '"' => c == '"',
255 _ => false,
256 }
257 }
258
259 fn is_string_delimiter_end_for(start: char, c: char) -> bool {
260 match start {
261 '`' => c == '`',
262 '[' => c == ']',
263 '\'' => c == '\'',
264 '"' => c == '"',
265 _ => false,
266 }
267 }
268
269 fn is_escape_char(c: char) -> bool {
270 c == '\\'
271 }
272}
273
274impl<'a> Iterator for Tokenizer<'a> {
275 type Item = Token<'a>;
276
277 fn next(&mut self) -> Option<Self::Item> {
278 if let Some(space) = self.space() {
279 return Some(space);
280 }
281 if let Some(unquoted) = self.unquoted() {
282 return Some(unquoted);
283 }
284 if let Some(quoted) = self.quoted() {
285 return Some(quoted);
286 }
287 if let Some(punctuation) = self.punctuation() {
288 return Some(punctuation);
289 }
290 None
291 }
292}
293
294impl Token<'_> {
295 pub fn is_quoted(&self) -> bool {
296 matches!(self, Self::Quoted(_))
297 }
298
299 pub fn is_unquoted(&self) -> bool {
300 matches!(self, Self::Unquoted(_))
301 }
302
303 pub fn is_space(&self) -> bool {
304 matches!(self, Self::Space(_))
305 }
306
307 pub fn is_punctuation(&self) -> bool {
308 matches!(self, Self::Punctuation(_))
309 }
310
311 pub fn as_str(&self) -> &str {
312 match self {
313 Self::Quoted(string) => string,
314 Self::Unquoted(string) => string,
315 Self::Space(string) => string,
316 Self::Punctuation(string) => string,
317 Self::Comment(string) => string,
318 }
319 }
320
321 pub fn unquote(&self) -> Option<String> {
322 if self.is_quoted() {
323 let tokenizer = Tokenizer::new(self.as_str());
324 Some(tokenizer.unquote())
325 } else {
326 None
327 }
328 }
329}
330
331impl std::fmt::Display for Token<'_> {
332 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
333 f.write_str(self.as_str())
334 }
335}
336
337#[cfg(test)]
338mod tests {
339 use super::*;
340
341 #[test]
342 fn test_0() {
343 let tokenizer = Tokenizer::new("");
344 let tokens: Vec<Token> = tokenizer.iter().collect();
345 assert_eq!(tokens, vec![]);
346 }
347
348 #[test]
349 fn test_1() {
350 let string = "SELECT * FROM `character`";
351 let tokenizer = Tokenizer::new(string);
352 let tokens: Vec<Token> = tokenizer.iter().collect();
353 assert_eq!(
354 tokens,
355 vec![
356 Token::Unquoted("SELECT"),
357 Token::Space(" "),
358 Token::Punctuation("*"),
359 Token::Space(" "),
360 Token::Unquoted("FROM"),
361 Token::Space(" "),
362 Token::Quoted("`character`"),
363 ]
364 );
365 assert_eq!(
366 string,
367 tokens.iter().map(|x| x.as_str()).collect::<String>()
368 );
369 }
370
371 #[test]
372 fn test_2() {
373 let string = "SELECT * FROM `character` WHERE id = ?";
374 let tokenizer = Tokenizer::new(string);
375 let tokens: Vec<Token> = tokenizer.iter().collect();
376 assert_eq!(
377 tokens,
378 vec![
379 Token::Unquoted("SELECT"),
380 Token::Space(" "),
381 Token::Punctuation("*"),
382 Token::Space(" "),
383 Token::Unquoted("FROM"),
384 Token::Space(" "),
385 Token::Quoted("`character`"),
386 Token::Space(" "),
387 Token::Unquoted("WHERE"),
388 Token::Space(" "),
389 Token::Unquoted("id"),
390 Token::Space(" "),
391 Token::Punctuation("="),
392 Token::Space(" "),
393 Token::Punctuation("?"),
394 ]
395 );
396 assert_eq!(
397 string,
398 tokens.iter().map(|x| x.as_str()).collect::<String>()
399 );
400 }
401
402 #[test]
403 fn test_3() {
404 let string = r#"? = "?" "#;
405 let tokenizer = Tokenizer::new(string);
406 let tokens: Vec<Token> = tokenizer.iter().collect();
407 assert_eq!(
408 tokens,
409 vec![
410 Token::Punctuation("?"),
411 Token::Space(" "),
412 Token::Punctuation("="),
413 Token::Space(" "),
414 Token::Quoted(r#""?""#),
415 Token::Space(" "),
416 ]
417 );
418 assert_eq!(
419 string,
420 tokens.iter().map(|x| x.as_str()).collect::<String>()
421 );
422 }
423
424 #[test]
425 fn test_4() {
426 let string = r#""a\"bc""#;
427 let tokenizer = Tokenizer::new(string);
428 let tokens: Vec<Token> = tokenizer.iter().collect();
429 assert_eq!(tokens, vec![Token::Quoted("\"a\\\"bc\"")]);
430 assert_eq!(
431 string,
432 tokens.iter().map(|x| x.as_str()).collect::<String>()
433 );
434 }
435
436 #[test]
437 fn test_5() {
438 let string = "abc123";
439 let tokenizer = Tokenizer::new(string);
440 let tokens: Vec<Token> = tokenizer.iter().collect();
441 assert_eq!(tokens, vec![Token::Unquoted(string)]);
442 assert_eq!(
443 string,
444 tokens.iter().map(|x| x.as_str()).collect::<String>()
445 );
446 }
447
448 #[test]
449 fn test_6() {
450 let string = "2.3*4/5";
451 let tokenizer = Tokenizer::new(string);
452 let tokens: Vec<Token> = tokenizer.iter().collect();
453 assert_eq!(
454 tokens,
455 vec![
456 Token::Unquoted("2"),
457 Token::Punctuation("."),
458 Token::Unquoted("3"),
459 Token::Punctuation("*"),
460 Token::Unquoted("4"),
461 Token::Punctuation("/"),
462 Token::Unquoted("5"),
463 ]
464 );
465 assert_eq!(
466 string,
467 tokens.iter().map(|x| x.as_str()).collect::<String>()
468 );
469 }
470
471 #[test]
472 fn test_7() {
473 let string = r#""a\\" B"#;
474 let tokenizer = Tokenizer::new(string);
475 let tokens: Vec<Token> = tokenizer.iter().collect();
476 assert_eq!(
477 tokens,
478 vec![
479 Token::Quoted("\"a\\\\\""),
480 Token::Space(" "),
481 Token::Unquoted("B"),
482 ]
483 );
484 assert_eq!(
485 string,
486 tokens.iter().map(|x| x.as_str()).collect::<String>()
487 );
488 }
489
490 #[test]
491 fn test_8() {
492 let string = r#"`a"b` "#;
493 let tokenizer = Tokenizer::new(string);
494 let tokens: Vec<Token> = tokenizer.iter().collect();
495 assert_eq!(tokens, vec![Token::Quoted("`a\"b`"), Token::Space(" ")]);
496 assert_eq!(
497 string,
498 tokens.iter().map(|x| x.as_str()).collect::<String>()
499 );
500 }
501
502 #[test]
503 fn test_9() {
504 let string = r"[ab] ";
505 let tokenizer = Tokenizer::new(string);
506 let tokens: Vec<Token> = tokenizer.iter().collect();
507 assert_eq!(tokens, vec![Token::Quoted("[ab]"), Token::Space(" ")]);
508 assert_eq!(
509 string,
510 tokens.iter().map(|x| x.as_str()).collect::<String>()
511 );
512 }
513
514 #[test]
515 fn test_10() {
516 let string = r#" 'a"b' "#;
517 let tokenizer = Tokenizer::new(string);
518 let tokens: Vec<Token> = tokenizer.iter().collect();
519 assert_eq!(
520 tokens,
521 vec![
522 Token::Space(" "),
523 Token::Quoted("'a\"b'"),
524 Token::Space(" "),
525 ]
526 );
527 assert_eq!(
528 string,
529 tokens.iter().map(|x| x.as_str()).collect::<String>()
530 );
531 }
532
533 #[test]
534 fn test_11() {
535 let string = r" `a``b` ";
536 let tokenizer = Tokenizer::new(string);
537 let tokens: Vec<Token> = tokenizer.iter().collect();
538 assert_eq!(
539 tokens,
540 vec![
541 Token::Space(" "),
542 Token::Quoted("`a``b`"),
543 Token::Space(" "),
544 ]
545 );
546 assert_eq!(
547 string,
548 tokens.iter().map(|x| x.as_str()).collect::<String>()
549 );
550 }
551
552 #[test]
553 fn test_12() {
554 let string = r" 'a''b' ";
555 let tokenizer = Tokenizer::new(string);
556 let tokens: Vec<Token> = tokenizer.iter().collect();
557 assert_eq!(
558 tokens,
559 vec![
560 Token::Space(" "),
561 Token::Quoted("'a''b'"),
562 Token::Space(" "),
563 ]
564 );
565 assert_eq!(
566 string,
567 tokens.iter().map(|x| x.as_str()).collect::<String>()
568 );
569 }
570
571 #[test]
572 fn test_13() {
573 let string = r"(?)";
574 let tokenizer = Tokenizer::new(string);
575 let tokens: Vec<Token> = tokenizer.iter().collect();
576 assert_eq!(
577 tokens,
578 vec![
579 Token::Punctuation("("),
580 Token::Punctuation("?"),
581 Token::Punctuation(")"),
582 ]
583 );
584 assert_eq!(
585 string,
586 tokens.iter().map(|x| x.as_str()).collect::<String>()
587 );
588 }
589
590 #[test]
591 fn test_14() {
592 let string = r"($1 = $2)";
593 let tokenizer = Tokenizer::new(string);
594 let tokens: Vec<Token> = tokenizer.iter().collect();
595 assert_eq!(
596 tokens,
597 vec![
598 Token::Punctuation("("),
599 Token::Punctuation("$"),
600 Token::Unquoted("1"),
601 Token::Space(" "),
602 Token::Punctuation("="),
603 Token::Space(" "),
604 Token::Punctuation("$"),
605 Token::Unquoted("2"),
606 Token::Punctuation(")"),
607 ]
608 );
609 assert_eq!(
610 string,
611 tokens.iter().map(|x| x.as_str()).collect::<String>()
612 );
613 }
614
615 #[test]
616 fn test_15() {
617 let string = r#" "Hello World" "#;
618 let tokenizer = Tokenizer::new(string);
619 let tokens: Vec<Token> = tokenizer.iter().collect();
620 assert_eq!(
621 tokens,
622 vec![
623 Token::Space(" "),
624 Token::Quoted("\"Hello World\""),
625 Token::Space(" "),
626 ]
627 );
628 assert_eq!(
629 string,
630 tokens.iter().map(|x| x.as_str()).collect::<String>()
631 );
632 }
633
634 #[test]
635 fn test_16() {
636 let string = "abc_$123";
637 let tokenizer = Tokenizer::new(string);
638 let tokens: Vec<Token> = tokenizer.iter().collect();
639 assert_eq!(tokens, vec![Token::Unquoted(string)]);
640 assert_eq!(
641 string,
642 tokens.iter().map(|x| x.as_str()).collect::<String>()
643 );
644 }
645
646 #[test]
647 fn test_17() {
648 let string = "$abc$123";
649 let tokenizer = Tokenizer::new(string);
650 let tokens: Vec<Token> = tokenizer.iter().collect();
651 assert_eq!(
652 tokens,
653 vec![Token::Punctuation("$"), Token::Unquoted("abc$123"),]
654 );
655 assert_eq!(
656 string,
657 tokens.iter().map(|x| x.as_str()).collect::<String>()
658 );
659 }
660
661 #[test]
662 fn test_18() {
663 let string = "_$abc_123$";
664 let tokenizer = Tokenizer::new(string);
665 let tokens: Vec<Token> = tokenizer.iter().collect();
666 assert_eq!(
667 tokens,
668 vec![
669 Token::Punctuation("_"),
670 Token::Punctuation("$"),
671 Token::Unquoted("abc_123$"),
672 ]
673 );
674 assert_eq!(
675 string,
676 tokens.iter().map(|x| x.as_str()).collect::<String>()
677 );
678 }
679
680 #[test]
681 fn test_19() {
682 let string = r#""a\"bc""#;
683 let tokenizer = Tokenizer::new(string);
684 assert_eq!(tokenizer.unquote(), "a\\\"bc".to_owned());
685 }
686
687 #[test]
688 fn test_20() {
689 let string = r#""a""bc""#;
690 let tokenizer = Tokenizer::new(string);
691 assert_eq!(tokenizer.unquote(), "a\"bc".to_owned());
692 }
693
694 #[test]
695 fn test_21() {
696 assert_eq!(
697 Token::Quoted("'a\\nb'").unquote().unwrap(),
698 "a\\nb".to_owned()
699 );
700 }
701
702 #[test]
703 fn test_22() {
704 let string = r#" "Hello\nWorld" "#;
705 let tokenizer = Tokenizer::new(string);
706 let tokens: Vec<Token> = tokenizer.iter().collect();
707 assert_eq!(
708 tokens,
709 vec![
710 Token::Space(" "),
711 Token::Quoted("\"Hello\\nWorld\""),
712 Token::Space(" "),
713 ]
714 );
715 assert_eq!(
716 string,
717 tokens.iter().map(|x| x.as_str()).collect::<String>()
718 );
719 }
720
721 #[test]
722 fn test_23() {
723 let string = "{ab} '{cd}'";
724 let tokenizer = Tokenizer::new(string);
725 let tokens: Vec<Token> = tokenizer.iter().collect();
726 assert_eq!(
727 tokens,
728 vec![
729 Token::Punctuation("{"),
730 Token::Unquoted("ab"),
731 Token::Punctuation("}"),
732 Token::Space(" "),
733 Token::Quoted("'{cd}'"),
734 ]
735 );
736 assert_eq!(
737 string,
738 tokens.iter().map(|x| x.as_str()).collect::<String>()
739 );
740 }
741
742 #[test]
743 fn test_24() {
744 let string = r#"新"老虎","#;
745 let tokenizer = Tokenizer::new(string);
746 let tokens: Vec<Token> = tokenizer.iter().collect();
747 assert_eq!(
748 tokens,
749 vec![
750 Token::Unquoted("新"),
751 Token::Quoted("\"老虎\""),
752 Token::Punctuation(","),
753 ]
754 );
755 assert_eq!(
756 string,
757 tokens.iter().map(|x| x.as_str()).collect::<String>()
758 );
759 }
760
761 #[test]
762 fn test_25() {
763 let string = r#"{a.1:2}"#;
764 let tokenizer = Tokenizer::new(string);
765 let tokens: Vec<Token> = tokenizer.iter().collect();
766 assert_eq!(
767 tokens,
768 vec![
769 Token::Punctuation("{"),
770 Token::Unquoted("a"),
771 Token::Punctuation("."),
772 Token::Unquoted("1"),
773 Token::Punctuation(":"),
774 Token::Unquoted("2"),
775 Token::Punctuation("}"),
776 ]
777 );
778 assert_eq!(
779 string,
780 tokens.iter().map(|x| x.as_str()).collect::<String>()
781 );
782 }
783
784 #[test]
785 fn test_26() {
786 let string = r#"{..(a.1:2)}"#;
787 let tokenizer = Tokenizer::new(string);
788 let tokens: Vec<Token> = tokenizer.iter().collect();
789 assert_eq!(
790 tokens,
791 vec![
792 Token::Punctuation("{"),
793 Token::Punctuation("."),
794 Token::Punctuation("."),
795 Token::Punctuation("("),
796 Token::Unquoted("a"),
797 Token::Punctuation("."),
798 Token::Unquoted("1"),
799 Token::Punctuation(":"),
800 Token::Unquoted("2"),
801 Token::Punctuation(")"),
802 Token::Punctuation("}"),
803 ]
804 );
805 assert_eq!(
806 string,
807 tokens.iter().map(|x| x.as_str()).collect::<String>()
808 );
809 }
810
811 #[test]
812 fn test_single_line_comment() {
813 let string = r#"SELECT
814 -- hello
815 1"#;
816 let tokenizer = Tokenizer::new(string);
817 let tokens: Vec<Token> = tokenizer.iter().collect();
818 assert_eq!(
819 tokens,
820 vec![
821 Token::Unquoted("SELECT"),
822 Token::Space("\n "),
823 Token::Comment("-- hello "),
824 Token::Space("\n "),
825 Token::Unquoted("1"),
826 ]
827 );
828 assert_eq!(
829 string,
830 tokens.iter().map(|x| x.as_str()).collect::<String>()
831 );
832
833 let string = r#"SELECT -- hello
834 1"#;
835 let tokenizer = Tokenizer::new(string);
836 let tokens: Vec<Token> = tokenizer.iter().collect();
837 assert_eq!(
838 tokens,
839 vec![
840 Token::Unquoted("SELECT"),
841 Token::Space(" "),
842 Token::Comment("-- hello"),
843 Token::Space("\n "),
844 Token::Unquoted("1"),
845 ]
846 );
847 assert_eq!(
848 string,
849 tokens.iter().map(|x| x.as_str()).collect::<String>()
850 );
851
852 let string = r#"SELECT 1 -- hello"#;
853 let tokenizer = Tokenizer::new(string);
854 let tokens: Vec<Token> = tokenizer.iter().collect();
855 assert_eq!(
856 tokens,
857 vec![
858 Token::Unquoted("SELECT"),
859 Token::Space(" "),
860 Token::Unquoted("1"),
861 Token::Space(" "),
862 Token::Comment("-- hello"),
863 ]
864 );
865 assert_eq!(
866 string,
867 tokens.iter().map(|x| x.as_str()).collect::<String>()
868 );
869
870 let string = r#"SELECT 1 --"#;
871 let tokenizer = Tokenizer::new(string);
872 let tokens: Vec<Token> = tokenizer.iter().collect();
873 assert_eq!(
874 tokens,
875 vec![
876 Token::Unquoted("SELECT"),
877 Token::Space(" "),
878 Token::Unquoted("1"),
879 Token::Space(" "),
880 Token::Comment("--"),
881 ]
882 );
883 assert_eq!(
884 string,
885 tokens.iter().map(|x| x.as_str()).collect::<String>()
886 );
887
888 let string = r#"SELECT 1 -"#;
889 let tokenizer = Tokenizer::new(string);
890 let tokens: Vec<Token> = tokenizer.iter().collect();
891 assert_eq!(
892 tokens,
893 vec![
894 Token::Unquoted("SELECT"),
895 Token::Space(" "),
896 Token::Unquoted("1"),
897 Token::Space(" "),
898 Token::Punctuation("-"),
899 ]
900 );
901 assert_eq!(
902 string,
903 tokens.iter().map(|x| x.as_str()).collect::<String>()
904 );
905 }
906
907 #[test]
908 fn test_block_comment() {
909 let string = r#"SELECT /* hello */ 1"#;
910 let tokenizer = Tokenizer::new(string);
911 let tokens: Vec<Token> = tokenizer.iter().collect();
912 assert_eq!(
913 tokens,
914 vec![
915 Token::Unquoted("SELECT"),
916 Token::Space(" "),
917 Token::Comment("/* hello */"),
918 Token::Space(" "),
919 Token::Unquoted("1"),
920 ]
921 );
922 assert_eq!(
923 string,
924 tokens.iter().map(|x| x.as_str()).collect::<String>()
925 );
926
927 let string = r#"SELECT /*hello*/ 1"#;
928 let tokenizer = Tokenizer::new(string);
929 let tokens: Vec<Token> = tokenizer.iter().collect();
930 assert_eq!(
931 tokens,
932 vec![
933 Token::Unquoted("SELECT"),
934 Token::Space(" "),
935 Token::Comment("/*hello*/"),
936 Token::Space(" "),
937 Token::Unquoted("1"),
938 ]
939 );
940 assert_eq!(
941 string,
942 tokens.iter().map(|x| x.as_str()).collect::<String>()
943 );
944
945 let string = r#"SELECT /* --hello */ 1"#;
946 let tokenizer = Tokenizer::new(string);
947 let tokens: Vec<Token> = tokenizer.iter().collect();
948 assert_eq!(
949 tokens,
950 vec![
951 Token::Unquoted("SELECT"),
952 Token::Space(" "),
953 Token::Comment("/* --hello */"),
954 Token::Space(" "),
955 Token::Unquoted("1"),
956 ]
957 );
958 assert_eq!(
959 string,
960 tokens.iter().map(|x| x.as_str()).collect::<String>()
961 );
962
963 let string = r#"SELECT
964 /* hello */
965 1"#;
966 let tokenizer = Tokenizer::new(string);
967 let tokens: Vec<Token> = tokenizer.iter().collect();
968 assert_eq!(
969 tokens,
970 vec![
971 Token::Unquoted("SELECT"),
972 Token::Space("\n "),
973 Token::Comment("/* hello */"),
974 Token::Space("\n "),
975 Token::Unquoted("1"),
976 ]
977 );
978 assert_eq!(
979 string,
980 tokens.iter().map(|x| x.as_str()).collect::<String>()
981 );
982
983 let string = r#"SELECT /*
984 -- hello */
985 1"#;
986 let tokenizer = Tokenizer::new(string);
987 let tokens: Vec<Token> = tokenizer.iter().collect();
988 assert_eq!(
989 tokens,
990 vec![
991 Token::Unquoted("SELECT"),
992 Token::Space(" "),
993 Token::Comment("/*\n -- hello */"),
994 Token::Space("\n "),
995 Token::Unquoted("1"),
996 ]
997 );
998 assert_eq!(
999 string,
1000 tokens.iter().map(|x| x.as_str()).collect::<String>()
1001 );
1002
1003 let string = r#"SELECT 1/*hello*/"#;
1004 let tokenizer = Tokenizer::new(string);
1005 let tokens: Vec<Token> = tokenizer.iter().collect();
1006 assert_eq!(
1007 tokens,
1008 vec![
1009 Token::Unquoted("SELECT"),
1010 Token::Space(" "),
1011 Token::Unquoted("1"),
1012 Token::Comment("/*hello*/"),
1013 ]
1014 );
1015 assert_eq!(
1016 string,
1017 tokens.iter().map(|x| x.as_str()).collect::<String>()
1018 );
1019
1020 let string = r#"SELECT 1/*hello*"#;
1021 let tokenizer = Tokenizer::new(string);
1022 let tokens: Vec<Token> = tokenizer.iter().collect();
1023 assert_eq!(
1024 tokens,
1025 vec![
1026 Token::Unquoted("SELECT"),
1027 Token::Space(" "),
1028 Token::Unquoted("1"),
1029 Token::Comment("/*hello*"),
1030 ]
1031 );
1032 assert_eq!(
1033 string,
1034 tokens.iter().map(|x| x.as_str()).collect::<String>()
1035 );
1036
1037 let string = r#"SELECT 1/*hello"#;
1038 let tokenizer = Tokenizer::new(string);
1039 let tokens: Vec<Token> = tokenizer.iter().collect();
1040 assert_eq!(
1041 tokens,
1042 vec![
1043 Token::Unquoted("SELECT"),
1044 Token::Space(" "),
1045 Token::Unquoted("1"),
1046 Token::Comment("/*hello"),
1047 ]
1048 );
1049 assert_eq!(
1050 string,
1051 tokens.iter().map(|x| x.as_str()).collect::<String>()
1052 );
1053
1054 let string = r#"SELECT 1 /*"#;
1055 let tokenizer = Tokenizer::new(string);
1056 let tokens: Vec<Token> = tokenizer.iter().collect();
1057 assert_eq!(
1058 tokens,
1059 vec![
1060 Token::Unquoted("SELECT"),
1061 Token::Space(" "),
1062 Token::Unquoted("1"),
1063 Token::Space(" "),
1064 Token::Comment("/*"),
1065 ]
1066 );
1067 assert_eq!(
1068 string,
1069 tokens.iter().map(|x| x.as_str()).collect::<String>()
1070 );
1071 }
1072}