1use mime::{self, Mime};
6
7use crate::LoadContext;
8
9pub struct MimeClassifier {
10 image_classifier: GroupedClassifier,
11 audio_video_classifier: GroupedClassifier,
12 scriptable_classifier: GroupedClassifier,
13 plaintext_classifier: GroupedClassifier,
14 archive_classifier: GroupedClassifier,
15 binary_or_plaintext: BinaryOrPlaintextClassifier,
16 font_classifier: GroupedClassifier,
17}
18
19#[derive(PartialEq)]
20pub enum MediaType {
21 Xml,
22 Html,
23 AudioVideo,
24 Image,
25 JavaScript,
26 Json,
27 Font,
28 Text,
29 Css,
30}
31
32#[derive(PartialEq)]
33pub enum ApacheBugFlag {
34 On,
35 Off,
36}
37
38impl ApacheBugFlag {
39 pub fn from_content_type(mime_type: Option<&Mime>) -> ApacheBugFlag {
41 if mime_type.is_some_and(|mime_type| {
43 *mime_type == mime::TEXT_PLAIN || *mime_type == mime::TEXT_PLAIN_UTF_8
44 }) {
45 ApacheBugFlag::On
46 } else {
47 ApacheBugFlag::Off
48 }
49 }
50}
51
52#[derive(PartialEq)]
53pub enum NoSniffFlag {
54 On,
55 Off,
56}
57
58impl From<bool> for NoSniffFlag {
59 fn from(boolean: bool) -> Self {
60 if boolean {
61 NoSniffFlag::On
62 } else {
63 NoSniffFlag::Off
64 }
65 }
66}
67
68impl Default for MimeClassifier {
69 fn default() -> Self {
70 Self {
71 image_classifier: GroupedClassifier::image_classifer(),
72 audio_video_classifier: GroupedClassifier::audio_video_classifier(),
73 scriptable_classifier: GroupedClassifier::scriptable_classifier(),
74 plaintext_classifier: GroupedClassifier::plaintext_classifier(),
75 archive_classifier: GroupedClassifier::archive_classifier(),
76 binary_or_plaintext: BinaryOrPlaintextClassifier,
77 font_classifier: GroupedClassifier::font_classifier(),
78 }
79 }
80}
81
82impl MimeClassifier {
83 pub fn classify<'a>(
85 &'a self,
86 context: LoadContext,
87 no_sniff_flag: NoSniffFlag,
88 apache_bug_flag: ApacheBugFlag,
89 supplied_type: &Option<Mime>,
90 data: &'a [u8],
91 ) -> Mime {
92 let supplied_type_or_octet_stream = supplied_type
93 .clone()
94 .unwrap_or(mime::APPLICATION_OCTET_STREAM);
95 if Self::is_xml(&supplied_type_or_octet_stream) ||
98 Self::is_html(&supplied_type_or_octet_stream)
99 {
100 return supplied_type_or_octet_stream;
101 }
102 match context {
103 LoadContext::Browsing => match *supplied_type {
104 None => self.sniff_unknown_type(no_sniff_flag, data),
108 Some(ref supplied_type) => {
109 if MimeClassifier::is_explicit_unknown(supplied_type) {
110 return self.sniff_unknown_type(no_sniff_flag, data);
111 }
112 if no_sniff_flag == NoSniffFlag::On {
115 return supplied_type.clone();
116 }
117 if apache_bug_flag == ApacheBugFlag::On {
120 return self.sniff_text_or_data(data);
121 }
122 match MimeClassifier::get_media_type(supplied_type) {
123 Some(MediaType::Image) => {
127 self.image_classifier.classify(data)
129 },
130 Some(MediaType::AudioVideo) => {
134 self.audio_video_classifier.classify(data)
136 },
137 Some(MediaType::Html) | Some(MediaType::Xml) => unreachable!(),
138 _ => None,
139 }
140 .unwrap_or(supplied_type.clone())
142 },
143 },
144 LoadContext::Image => {
145 match MimeClassifier::maybe_get_media_type(supplied_type) {
147 Some(MediaType::Xml) => None,
148 _ => self.image_classifier.classify(data),
149 }
150 .unwrap_or(supplied_type_or_octet_stream)
151 },
152 LoadContext::AudioVideo => {
153 match MimeClassifier::maybe_get_media_type(supplied_type) {
155 Some(MediaType::Xml) => None,
156 _ => self.audio_video_classifier.classify(data),
157 }
158 .unwrap_or(supplied_type_or_octet_stream)
159 },
160 LoadContext::Plugin => {
161 match *supplied_type {
166 None => mime::APPLICATION_OCTET_STREAM,
167 _ => supplied_type_or_octet_stream,
168 }
169 },
170 LoadContext::Style => {
171 supplied_type.clone().unwrap_or_else(|| {
176 if no_sniff_flag == NoSniffFlag::On {
177 mime::APPLICATION_OCTET_STREAM
178 } else {
179 mime::TEXT_CSS
180 }
181 })
182 },
183 LoadContext::Script => {
184 match *supplied_type {
189 None => mime::TEXT_JAVASCRIPT,
190 _ => supplied_type_or_octet_stream,
191 }
192 },
193 LoadContext::Font => {
194 match MimeClassifier::maybe_get_media_type(supplied_type) {
196 Some(MediaType::Xml) => None,
197 _ => self.font_classifier.classify(data),
198 }
199 .unwrap_or(supplied_type_or_octet_stream)
200 },
201 LoadContext::TextTrack => {
202 "text/vtt".parse().unwrap()
207 },
208 LoadContext::CacheManifest => {
209 "text/cache-manifest".parse().unwrap()
214 },
215 }
216 }
217
218 pub fn validate(&self) -> Result<(), String> {
219 self.image_classifier.validate()?;
220 self.audio_video_classifier.validate()?;
221 self.scriptable_classifier.validate()?;
222 self.plaintext_classifier.validate()?;
223 self.archive_classifier.validate()?;
224 self.binary_or_plaintext.validate()?;
225 self.font_classifier.validate()?;
226 Ok(())
227 }
228
229 fn sniff_unknown_type(&self, no_sniff_flag: NoSniffFlag, data: &[u8]) -> Mime {
231 let should_sniff_scriptable = no_sniff_flag == NoSniffFlag::Off;
232 let sniffed = if should_sniff_scriptable {
233 self.scriptable_classifier.classify(data)
234 } else {
235 None
236 };
237
238 sniffed
239 .or_else(|| self.plaintext_classifier.classify(data))
240 .or_else(|| self.image_classifier.classify(data))
241 .or_else(|| self.audio_video_classifier.classify(data))
242 .or_else(|| self.archive_classifier.classify(data))
243 .or_else(|| self.binary_or_plaintext.classify(data))
244 .expect("BinaryOrPlaintextClassifier always succeeds")
245 }
246
247 fn sniff_text_or_data<'a>(&'a self, data: &'a [u8]) -> Mime {
248 self.binary_or_plaintext
249 .classify(data)
250 .expect("BinaryOrPlaintextClassifier always succeeds")
251 }
252
253 fn is_xml(mt: &Mime) -> bool {
257 !Self::is_image(mt) &&
258 (mt.suffix() == Some(mime::XML) ||
259 mt.essence_str() == "text/xml" ||
260 mt.essence_str() == "application/xml")
261 }
262
263 fn is_html(mt: &Mime) -> bool {
265 mt.essence_str() == "text/html"
266 }
267
268 fn is_image(mt: &Mime) -> bool {
270 mt.type_() == mime::IMAGE
271 }
272
273 fn is_audio_video(mt: &Mime) -> bool {
275 mt.type_() == mime::AUDIO ||
276 mt.type_() == mime::VIDEO ||
277 mt.essence_str() == "application/ogg"
278 }
279
280 fn is_explicit_unknown(mt: &Mime) -> bool {
281 mt.type_().as_str() == "unknown" && mt.subtype().as_str() == "unknown" ||
282 mt.type_() == mime::APPLICATION && mt.subtype().as_str() == "unknown" ||
283 mt.type_() == mime::STAR && mt.subtype() == mime::STAR
284 }
285
286 pub fn is_javascript(mt: &Mime) -> bool {
288 (mt.type_() == mime::APPLICATION &&
289 (["ecmascript", "javascript", "x-ecmascript", "x-javascript"]
290 .contains(&mt.subtype().as_str()))) ||
291 (mt.type_() == mime::TEXT &&
292 ([
293 "ecmascript",
294 "javascript",
295 "javascript1.0",
296 "javascript1.1",
297 "javascript1.2",
298 "javascript1.3",
299 "javascript1.4",
300 "javascript1.5",
301 "jscript",
302 "livescript",
303 "x-ecmascript",
304 "x-javascript",
305 ]
306 .contains(&mt.subtype().as_str())))
307 }
308
309 pub fn is_json(mt: &Mime) -> bool {
311 mt.suffix() == Some(mime::JSON) ||
312 mt.essence_str() == "application/json" ||
313 mt.essence_str() == "text/json"
314 }
315
316 fn is_font(mt: &Mime) -> bool {
318 mt.type_() == mime::FONT ||
319 (mt.type_() == mime::APPLICATION &&
320 ([
321 "font-cff",
322 "font-off",
323 "font-sfnt",
324 "font-ttf",
325 "font-woff",
326 "vnd.ms-fontobject",
327 "vnd.ms-opentype",
328 ]
329 .contains(&mt.subtype().as_str())))
330 }
331
332 fn is_text(mt: &Mime) -> bool {
333 *mt == mime::TEXT_PLAIN || mt.essence_str() == "text/vtt"
334 }
335
336 pub fn is_css(mt: &Mime) -> bool {
337 mt.essence_str() == "text/css"
338 }
339
340 pub fn get_media_type(mime: &Mime) -> Option<MediaType> {
341 if MimeClassifier::is_xml(mime) {
342 Some(MediaType::Xml)
343 } else if MimeClassifier::is_html(mime) {
344 Some(MediaType::Html)
345 } else if MimeClassifier::is_image(mime) {
346 Some(MediaType::Image)
347 } else if MimeClassifier::is_audio_video(mime) {
348 Some(MediaType::AudioVideo)
349 } else if MimeClassifier::is_javascript(mime) {
350 Some(MediaType::JavaScript)
351 } else if MimeClassifier::is_font(mime) {
352 Some(MediaType::Font)
353 } else if MimeClassifier::is_json(mime) {
354 Some(MediaType::Json)
355 } else if MimeClassifier::is_text(mime) {
356 Some(MediaType::Text)
357 } else if MimeClassifier::is_css(mime) {
358 Some(MediaType::Css)
359 } else {
360 None
361 }
362 }
363
364 fn maybe_get_media_type(supplied_type: &Option<Mime>) -> Option<MediaType> {
365 supplied_type
366 .as_ref()
367 .and_then(MimeClassifier::get_media_type)
368 }
369}
370
371trait MIMEChecker {
373 fn classify(&self, data: &[u8]) -> Option<Mime>;
374 fn validate(&self) -> Result<(), String>;
376}
377
378struct ByteMatcher {
379 pattern: &'static [u8],
380 mask: &'static [u8],
381 leading_ignore: &'static [u8],
382 content_type: Mime,
383}
384
385impl ByteMatcher {
386 fn matches(&self, data: &[u8]) -> Option<usize> {
387 if data.len() < self.pattern.len() {
388 None
389 } else if data == self.pattern {
390 Some(self.pattern.len())
391 } else {
392 data[..data.len() - self.pattern.len() + 1]
393 .iter()
394 .position(|x| !self.leading_ignore.contains(x))
395 .and_then(|start| {
396 if data[start..]
397 .iter()
398 .zip(self.pattern.iter())
399 .zip(self.mask.iter())
400 .all(|((&data, &pattern), &mask)| (data & mask) == pattern)
401 {
402 Some(start + self.pattern.len())
403 } else {
404 None
405 }
406 })
407 }
408 }
409}
410
411impl MIMEChecker for ByteMatcher {
412 fn classify(&self, data: &[u8]) -> Option<Mime> {
413 self.matches(data).map(|_| self.content_type.clone())
414 }
415
416 fn validate(&self) -> Result<(), String> {
417 if self.pattern.is_empty() {
418 return Err(format!("Zero length pattern for {:?}", self.content_type));
419 }
420 if self.pattern.len() != self.mask.len() {
421 return Err(format!(
422 "Unequal pattern and mask length for {:?}",
423 self.content_type
424 ));
425 }
426 if self
427 .pattern
428 .iter()
429 .zip(self.mask.iter())
430 .any(|(&pattern, &mask)| pattern & mask != pattern)
431 {
432 return Err(format!(
433 "Pattern not pre-masked for {:?}",
434 self.content_type
435 ));
436 }
437 Ok(())
438 }
439}
440
441struct TagTerminatedByteMatcher {
442 matcher: ByteMatcher,
443}
444
445impl MIMEChecker for TagTerminatedByteMatcher {
446 fn classify(&self, data: &[u8]) -> Option<Mime> {
447 self.matcher.matches(data).and_then(|j| {
448 if j < data.len() && (data[j] == b' ' || data[j] == b'>') {
449 Some(self.matcher.content_type.clone())
450 } else {
451 None
452 }
453 })
454 }
455
456 fn validate(&self) -> Result<(), String> {
457 self.matcher.validate()
458 }
459}
460
461pub struct Mp4Matcher;
462
463impl Mp4Matcher {
464 pub fn matches(&self, data: &[u8]) -> bool {
466 if data.len() < 12 {
471 return false;
472 }
473
474 let box_size = (((data[0] as u32) << 24) |
477 ((data[1] as u32) << 16) |
478 ((data[2] as u32) << 8) |
479 (data[3] as u32)) as usize;
480 if (data.len() < box_size) || (box_size % 4 != 0) {
482 return false;
483 }
484
485 let ftyp = [0x66, 0x74, 0x79, 0x70];
487 if !data[4..].starts_with(&ftyp) {
488 return false;
489 }
490
491 let mp4 = [0x6D, 0x70, 0x34];
493 data[8..].starts_with(&mp4) ||
494 data[16..box_size]
497 .chunks(4)
499 .any(|chunk| chunk.starts_with(&mp4))
502 }
504}
505impl MIMEChecker for Mp4Matcher {
506 fn classify(&self, data: &[u8]) -> Option<Mime> {
507 if self.matches(data) {
508 Some("video/mp4".parse().unwrap())
509 } else {
510 None
511 }
512 }
513
514 fn validate(&self) -> Result<(), String> {
515 Ok(())
516 }
517}
518
519struct BinaryOrPlaintextClassifier;
520
521impl BinaryOrPlaintextClassifier {
522 fn classify_impl(&self, data: &[u8]) -> Mime {
524 if data.starts_with(&[0xFFu8, 0xFEu8]) ||
532 data.starts_with(&[0xFEu8, 0xFFu8]) ||
533 data.starts_with(&[0xEFu8, 0xBBu8, 0xBFu8])
534 {
535 mime::TEXT_PLAIN
536 } else if data.iter().any(|&x| {
537 x <= 0x08u8 ||
538 x == 0x0Bu8 ||
539 (0x0Eu8..=0x1Au8).contains(&x) ||
540 (0x1Cu8..=0x1Fu8).contains(&x)
541 }) {
542 mime::APPLICATION_OCTET_STREAM
544 } else {
545 mime::TEXT_PLAIN
548 }
549 }
550}
551impl MIMEChecker for BinaryOrPlaintextClassifier {
552 fn classify(&self, data: &[u8]) -> Option<Mime> {
553 Some(self.classify_impl(data))
554 }
555
556 fn validate(&self) -> Result<(), String> {
557 Ok(())
558 }
559}
560struct GroupedClassifier {
561 byte_matchers: Vec<Box<dyn MIMEChecker + Send + Sync>>,
562}
563impl GroupedClassifier {
564 fn image_classifer() -> GroupedClassifier {
565 GroupedClassifier {
566 byte_matchers: vec![
567 Box::new(ByteMatcher::image_x_icon()),
570 Box::new(ByteMatcher::image_x_icon_cursor()),
571 Box::new(ByteMatcher::image_bmp()),
572 Box::new(ByteMatcher::image_gif89a()),
573 Box::new(ByteMatcher::image_gif87a()),
574 Box::new(ByteMatcher::image_webp()),
575 Box::new(ByteMatcher::image_png()),
576 Box::new(ByteMatcher::image_jpeg()),
577 ],
578 }
579 }
580 fn audio_video_classifier() -> GroupedClassifier {
581 GroupedClassifier {
582 byte_matchers: vec![
583 Box::new(ByteMatcher::video_webm()),
584 Box::new(ByteMatcher::audio_basic()),
585 Box::new(ByteMatcher::audio_aiff()),
586 Box::new(ByteMatcher::audio_mpeg()),
587 Box::new(ByteMatcher::application_ogg()),
588 Box::new(ByteMatcher::audio_midi()),
589 Box::new(ByteMatcher::video_avi()),
590 Box::new(ByteMatcher::audio_wave()),
591 Box::new(Mp4Matcher),
592 ],
593 }
594 }
595 fn scriptable_classifier() -> GroupedClassifier {
596 GroupedClassifier {
597 byte_matchers: vec![
598 Box::new(ByteMatcher::text_html_doctype()),
599 Box::new(ByteMatcher::text_html_page()),
600 Box::new(ByteMatcher::text_html_head()),
601 Box::new(ByteMatcher::text_html_script()),
602 Box::new(ByteMatcher::text_html_iframe()),
603 Box::new(ByteMatcher::text_html_h1()),
604 Box::new(ByteMatcher::text_html_div()),
605 Box::new(ByteMatcher::text_html_font()),
606 Box::new(ByteMatcher::text_html_table()),
607 Box::new(ByteMatcher::text_html_a()),
608 Box::new(ByteMatcher::text_html_style()),
609 Box::new(ByteMatcher::text_html_title()),
610 Box::new(ByteMatcher::text_html_b()),
611 Box::new(ByteMatcher::text_html_body()),
612 Box::new(ByteMatcher::text_html_br()),
613 Box::new(ByteMatcher::text_html_p()),
614 Box::new(ByteMatcher::text_html_comment()),
615 Box::new(ByteMatcher::text_xml()),
616 Box::new(ByteMatcher::application_pdf()),
617 ],
618 }
619 }
620 fn plaintext_classifier() -> GroupedClassifier {
621 GroupedClassifier {
622 byte_matchers: vec![
623 Box::new(ByteMatcher::text_plain_utf_8_bom()),
624 Box::new(ByteMatcher::text_plain_utf_16le_bom()),
625 Box::new(ByteMatcher::text_plain_utf_16be_bom()),
626 Box::new(ByteMatcher::application_postscript()),
627 ],
628 }
629 }
630 fn archive_classifier() -> GroupedClassifier {
631 GroupedClassifier {
632 byte_matchers: vec![
633 Box::new(ByteMatcher::application_x_gzip()),
634 Box::new(ByteMatcher::application_zip()),
635 Box::new(ByteMatcher::application_x_rar_compressed()),
636 ],
637 }
638 }
639
640 fn font_classifier() -> GroupedClassifier {
641 GroupedClassifier {
642 byte_matchers: vec![
643 Box::new(ByteMatcher::application_font_woff()),
644 Box::new(ByteMatcher::true_type_collection()),
645 Box::new(ByteMatcher::open_type()),
646 Box::new(ByteMatcher::true_type()),
647 Box::new(ByteMatcher::application_vnd_ms_font_object()),
648 ],
649 }
650 }
651}
652impl MIMEChecker for GroupedClassifier {
653 fn classify(&self, data: &[u8]) -> Option<Mime> {
654 self.byte_matchers
655 .iter()
656 .find_map(|matcher| matcher.classify(data))
657 }
658
659 fn validate(&self) -> Result<(), String> {
660 for byte_matcher in &self.byte_matchers {
661 byte_matcher.validate()?
662 }
663 Ok(())
664 }
665}
666
667impl ByteMatcher {
670 fn image_x_icon() -> ByteMatcher {
672 ByteMatcher {
673 pattern: b"\x00\x00\x01\x00",
674 mask: b"\xFF\xFF\xFF\xFF",
675 content_type: "image/x-icon".parse().unwrap(),
676 leading_ignore: &[],
677 }
678 }
679 fn image_x_icon_cursor() -> ByteMatcher {
681 ByteMatcher {
682 pattern: b"\x00\x00\x02\x00",
683 mask: b"\xFF\xFF\xFF\xFF",
684 content_type: "image/x-icon".parse().unwrap(),
685 leading_ignore: &[],
686 }
687 }
688 fn image_bmp() -> ByteMatcher {
690 ByteMatcher {
691 pattern: b"BM",
692 mask: b"\xFF\xFF",
693 content_type: mime::IMAGE_BMP,
694 leading_ignore: &[],
695 }
696 }
697 fn image_gif89a() -> ByteMatcher {
699 ByteMatcher {
700 pattern: b"GIF89a",
701 mask: b"\xFF\xFF\xFF\xFF\xFF\xFF",
702 content_type: mime::IMAGE_GIF,
703 leading_ignore: &[],
704 }
705 }
706 fn image_gif87a() -> ByteMatcher {
708 ByteMatcher {
709 pattern: b"GIF87a",
710 mask: b"\xFF\xFF\xFF\xFF\xFF\xFF",
711 content_type: mime::IMAGE_GIF,
712 leading_ignore: &[],
713 }
714 }
715 fn image_webp() -> ByteMatcher {
717 ByteMatcher {
718 pattern: b"RIFF\x00\x00\x00\x00WEBPVP",
719 mask: b"\xFF\xFF\xFF\xFF\x00\x00\x00\x00\xFF\xFF\xFF\xFF\xFF\xFF",
720 content_type: "image/webp".parse().unwrap(),
721 leading_ignore: &[],
722 }
723 }
724 fn image_png() -> ByteMatcher {
727 ByteMatcher {
728 pattern: b"\x89PNG\r\n\x1A\n",
729 mask: b"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF",
730 content_type: mime::IMAGE_PNG,
731 leading_ignore: &[],
732 }
733 }
734 fn image_jpeg() -> ByteMatcher {
736 ByteMatcher {
737 pattern: b"\xFF\xD8\xFF",
738 mask: b"\xFF\xFF\xFF",
739 content_type: mime::IMAGE_JPEG,
740 leading_ignore: &[],
741 }
742 }
743 fn video_webm() -> ByteMatcher {
745 ByteMatcher {
746 pattern: b"\x1A\x45\xDF\xA3",
747 mask: b"\xFF\xFF\xFF\xFF",
748 content_type: "video/webm".parse().unwrap(),
749 leading_ignore: &[],
750 }
751 }
752 fn audio_basic() -> ByteMatcher {
754 ByteMatcher {
755 pattern: b".snd",
756 mask: b"\xFF\xFF\xFF\xFF",
757 content_type: "audio/basic".parse().unwrap(),
758 leading_ignore: &[],
759 }
760 }
761 fn audio_aiff() -> ByteMatcher {
763 ByteMatcher {
764 pattern: b"FORM\x00\x00\x00\x00AIFF",
765 mask: b"\xFF\xFF\xFF\xFF\x00\x00\x00\x00\xFF\xFF\xFF\xFF",
766 content_type: "audio/aiff".parse().unwrap(),
767 leading_ignore: &[],
768 }
769 }
770 fn audio_mpeg() -> ByteMatcher {
772 ByteMatcher {
773 pattern: b"ID3",
774 mask: b"\xFF\xFF\xFF",
775 content_type: "audio/mpeg".parse().unwrap(),
776 leading_ignore: &[],
777 }
778 }
779 fn application_ogg() -> ByteMatcher {
781 ByteMatcher {
782 pattern: b"OggS\x00",
783 mask: b"\xFF\xFF\xFF\xFF\xFF",
784 content_type: "application/ogg".parse().unwrap(),
785 leading_ignore: &[],
786 }
787 }
788 fn audio_midi() -> ByteMatcher {
791 ByteMatcher {
792 pattern: b"MThd\x00\x00\x00\x06",
793 mask: b"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF",
794 content_type: "audio/midi".parse().unwrap(),
795 leading_ignore: &[],
796 }
797 }
798 fn video_avi() -> ByteMatcher {
800 ByteMatcher {
801 pattern: b"RIFF\x00\x00\x00\x00AVI ",
802 mask: b"\xFF\xFF\xFF\xFF\x00\x00\x00\x00\xFF\xFF\xFF\xFF",
803 content_type: "video/avi".parse().unwrap(),
804 leading_ignore: &[],
805 }
806 }
807 fn audio_wave() -> ByteMatcher {
809 ByteMatcher {
810 pattern: b"RIFF\x00\x00\x00\x00WAVE",
811 mask: b"\xFF\xFF\xFF\xFF\x00\x00\x00\x00\xFF\xFF\xFF\xFF",
812 content_type: "audio/wave".parse().unwrap(),
813 leading_ignore: &[],
814 }
815 }
816 fn text_html_doctype() -> TagTerminatedByteMatcher {
818 TagTerminatedByteMatcher {
819 matcher: ByteMatcher {
820 pattern: b"<!DOCTYPE HTML",
821 mask: b"\xFF\xFF\xDF\xDF\xDF\xDF\xDF\xDF\xDF\xFF\xDF\xDF\xDF\xDF",
822 content_type: mime::TEXT_HTML,
823 leading_ignore: b"\t\n\x0C\r ",
824 },
825 }
826 }
827
828 fn text_html_page() -> TagTerminatedByteMatcher {
830 TagTerminatedByteMatcher {
831 matcher: ByteMatcher {
832 pattern: b"<HTML",
833 mask: b"\xFF\xDF\xDF\xDF\xDF",
834 content_type: mime::TEXT_HTML,
835 leading_ignore: b"\t\n\x0C\r ",
836 },
837 }
838 }
839
840 fn text_html_head() -> TagTerminatedByteMatcher {
842 TagTerminatedByteMatcher {
843 matcher: ByteMatcher {
844 pattern: b"<HEAD",
845 mask: b"\xFF\xDF\xDF\xDF\xDF",
846 content_type: mime::TEXT_HTML,
847 leading_ignore: b"\t\n\x0C\r ",
848 },
849 }
850 }
851
852 fn text_html_script() -> TagTerminatedByteMatcher {
854 TagTerminatedByteMatcher {
855 matcher: ByteMatcher {
856 pattern: b"<SCRIPT",
857 mask: b"\xFF\xDF\xDF\xDF\xDF\xDF\xDF",
858 content_type: mime::TEXT_HTML,
859 leading_ignore: b"\t\n\x0C\r ",
860 },
861 }
862 }
863
864 fn text_html_iframe() -> TagTerminatedByteMatcher {
866 TagTerminatedByteMatcher {
867 matcher: ByteMatcher {
868 pattern: b"<IFRAME",
869 mask: b"\xFF\xDF\xDF\xDF\xDF\xDF\xDF",
870 content_type: mime::TEXT_HTML,
871 leading_ignore: b"\t\n\x0C\r ",
872 },
873 }
874 }
875
876 fn text_html_h1() -> TagTerminatedByteMatcher {
878 TagTerminatedByteMatcher {
879 matcher: ByteMatcher {
880 pattern: b"<H1",
881 mask: b"\xFF\xDF\xFF",
882 content_type: mime::TEXT_HTML,
883 leading_ignore: b"\t\n\x0C\r ",
884 },
885 }
886 }
887
888 fn text_html_div() -> TagTerminatedByteMatcher {
890 TagTerminatedByteMatcher {
891 matcher: ByteMatcher {
892 pattern: b"<DIV",
893 mask: b"\xFF\xDF\xDF\xDF",
894 content_type: mime::TEXT_HTML,
895 leading_ignore: b"\t\n\x0C\r ",
896 },
897 }
898 }
899
900 fn text_html_font() -> TagTerminatedByteMatcher {
902 TagTerminatedByteMatcher {
903 matcher: ByteMatcher {
904 pattern: b"<FONT",
905 mask: b"\xFF\xDF\xDF\xDF\xDF",
906 content_type: mime::TEXT_HTML,
907 leading_ignore: b"\t\n\x0C\r ",
908 },
909 }
910 }
911
912 fn text_html_table() -> TagTerminatedByteMatcher {
914 TagTerminatedByteMatcher {
915 matcher: ByteMatcher {
916 pattern: b"<TABLE",
917 mask: b"\xFF\xDF\xDF\xDF\xDF\xDF",
918 content_type: mime::TEXT_HTML,
919 leading_ignore: b"\t\n\x0C\r ",
920 },
921 }
922 }
923
924 fn text_html_a() -> TagTerminatedByteMatcher {
926 TagTerminatedByteMatcher {
927 matcher: ByteMatcher {
928 pattern: b"<A",
929 mask: b"\xFF\xDF",
930 content_type: mime::TEXT_HTML,
931 leading_ignore: b"\t\n\x0C\r ",
932 },
933 }
934 }
935
936 fn text_html_style() -> TagTerminatedByteMatcher {
938 TagTerminatedByteMatcher {
939 matcher: ByteMatcher {
940 pattern: b"<STYLE",
941 mask: b"\xFF\xDF\xDF\xDF\xDF\xDF",
942 content_type: mime::TEXT_HTML,
943 leading_ignore: b"\t\n\x0C\r ",
944 },
945 }
946 }
947
948 fn text_html_title() -> TagTerminatedByteMatcher {
950 TagTerminatedByteMatcher {
951 matcher: ByteMatcher {
952 pattern: b"<TITLE",
953 mask: b"\xFF\xDF\xDF\xDF\xDF\xDF",
954 content_type: mime::TEXT_HTML,
955 leading_ignore: b"\t\n\x0C\r ",
956 },
957 }
958 }
959
960 fn text_html_b() -> TagTerminatedByteMatcher {
962 TagTerminatedByteMatcher {
963 matcher: ByteMatcher {
964 pattern: b"<B",
965 mask: b"\xFF\xDF",
966 content_type: mime::TEXT_HTML,
967 leading_ignore: b"\t\n\x0C\r ",
968 },
969 }
970 }
971
972 fn text_html_body() -> TagTerminatedByteMatcher {
974 TagTerminatedByteMatcher {
975 matcher: ByteMatcher {
976 pattern: b"<BODY",
977 mask: b"\xFF\xDF\xDF\xDF\xDF",
978 content_type: mime::TEXT_HTML,
979 leading_ignore: b"\t\n\x0C\r ",
980 },
981 }
982 }
983
984 fn text_html_br() -> TagTerminatedByteMatcher {
986 TagTerminatedByteMatcher {
987 matcher: ByteMatcher {
988 pattern: b"<BR",
989 mask: b"\xFF\xDF\xDF",
990 content_type: mime::TEXT_HTML,
991 leading_ignore: b"\t\n\x0C\r ",
992 },
993 }
994 }
995
996 fn text_html_p() -> TagTerminatedByteMatcher {
998 TagTerminatedByteMatcher {
999 matcher: ByteMatcher {
1000 pattern: b"<P",
1001 mask: b"\xFF\xDF",
1002 content_type: mime::TEXT_HTML,
1003 leading_ignore: b"\t\n\x0C\r ",
1004 },
1005 }
1006 }
1007
1008 fn text_html_comment() -> TagTerminatedByteMatcher {
1010 TagTerminatedByteMatcher {
1011 matcher: ByteMatcher {
1012 pattern: b"<!--",
1013 mask: b"\xFF\xFF\xFF\xFF",
1014 content_type: mime::TEXT_HTML,
1015 leading_ignore: b"\t\n\x0C\r ",
1016 },
1017 }
1018 }
1019
1020 fn text_xml() -> ByteMatcher {
1022 ByteMatcher {
1023 pattern: b"<?xml",
1024 mask: b"\xFF\xFF\xFF\xFF\xFF",
1025 content_type: mime::TEXT_XML,
1026 leading_ignore: b"\t\n\x0C\r ",
1027 }
1028 }
1029 fn application_pdf() -> ByteMatcher {
1031 ByteMatcher {
1032 pattern: b"%PDF-",
1033 mask: b"\xFF\xFF\xFF\xFF\xFF",
1034 content_type: mime::APPLICATION_PDF,
1035 leading_ignore: &[],
1036 }
1037 }
1038 fn application_vnd_ms_font_object() -> ByteMatcher {
1040 ByteMatcher {
1041 pattern: b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\
1042 \x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\
1043 \x00\x00LP",
1044 mask: b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\
1045 \x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\
1046 \x00\x00\xFF\xFF",
1047 content_type: "application/vnd.ms-fontobject".parse().unwrap(),
1048 leading_ignore: &[],
1049 }
1050 }
1051 fn true_type() -> ByteMatcher {
1053 ByteMatcher {
1054 pattern: b"\x00\x01\x00\x00",
1055 mask: b"\xFF\xFF\xFF\xFF",
1056 content_type: "application/font-sfnt".parse().unwrap(),
1057 leading_ignore: &[],
1058 }
1059 }
1060 fn open_type() -> ByteMatcher {
1062 ByteMatcher {
1063 pattern: b"OTTO",
1064 mask: b"\xFF\xFF\xFF\xFF",
1065 content_type: "application/font-sfnt".parse().unwrap(),
1066 leading_ignore: &[],
1067 }
1068 }
1069 fn true_type_collection() -> ByteMatcher {
1071 ByteMatcher {
1072 pattern: b"ttcf",
1073 mask: b"\xFF\xFF\xFF\xFF",
1074 content_type: "application/font-sfnt".parse().unwrap(),
1075 leading_ignore: &[],
1076 }
1077 }
1078 fn application_font_woff() -> ByteMatcher {
1080 ByteMatcher {
1081 pattern: b"wOFF",
1082 mask: b"\xFF\xFF\xFF\xFF",
1083 content_type: "application/font-woff".parse().unwrap(),
1084 leading_ignore: &[],
1085 }
1086 }
1087 fn application_x_gzip() -> ByteMatcher {
1089 ByteMatcher {
1090 pattern: b"\x1F\x8B\x08",
1091 mask: b"\xFF\xFF\xFF",
1092 content_type: "application/x-gzip".parse().unwrap(),
1093 leading_ignore: &[],
1094 }
1095 }
1096 fn application_zip() -> ByteMatcher {
1098 ByteMatcher {
1099 pattern: b"PK\x03\x04",
1100 mask: b"\xFF\xFF\xFF\xFF",
1101 content_type: "application/zip".parse().unwrap(),
1102 leading_ignore: &[],
1103 }
1104 }
1105 fn application_x_rar_compressed() -> ByteMatcher {
1107 ByteMatcher {
1108 pattern: b"Rar \x1A\x07\x00",
1109 mask: b"\xFF\xFF\xFF\xFF\xFF\xFF\xFF",
1110 content_type: "application/x-rar-compressed".parse().unwrap(),
1111 leading_ignore: &[],
1112 }
1113 }
1114 fn application_postscript() -> ByteMatcher {
1116 ByteMatcher {
1117 pattern: b"%!PS-Adobe-",
1118 mask: b"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF",
1119 content_type: "application/postscript".parse().unwrap(),
1120 leading_ignore: &[],
1121 }
1122 }
1123 fn text_plain_utf_16be_bom() -> ByteMatcher {
1125 ByteMatcher {
1126 pattern: b"\xFE\xFF\x00\x00",
1127 mask: b"\xFF\xFF\x00\x00",
1128 content_type: mime::TEXT_PLAIN,
1129 leading_ignore: &[],
1130 }
1131 }
1132 fn text_plain_utf_16le_bom() -> ByteMatcher {
1134 ByteMatcher {
1135 pattern: b"\xFF\xFE\x00\x00",
1136 mask: b"\xFF\xFF\x00\x00",
1137 content_type: mime::TEXT_PLAIN,
1138 leading_ignore: &[],
1139 }
1140 }
1141 fn text_plain_utf_8_bom() -> ByteMatcher {
1143 ByteMatcher {
1144 pattern: b"\xEF\xBB\xBF\x00",
1145 mask: b"\xFF\xFF\xFF\x00",
1146 content_type: mime::TEXT_PLAIN,
1147 leading_ignore: &[],
1148 }
1149 }
1150}