1use mime::{self, Mime};
6
7use crate::LoadContext;
8
9pub struct MimeClassifier {
10 image_classifier: GroupedClassifier,
11 audio_video_classifier: GroupedClassifier,
12 scriptable_classifier: GroupedClassifier,
13 plaintext_classifier: GroupedClassifier,
14 archive_classifier: GroupedClassifier,
15 binary_or_plaintext: BinaryOrPlaintextClassifier,
16 font_classifier: GroupedClassifier,
17}
18
19#[derive(PartialEq)]
20pub enum MediaType {
21 Xml,
22 Html,
23 AudioVideo,
24 Image,
25 JavaScript,
26 Json,
27 Font,
28 Text,
29 Css,
30}
31
32#[derive(PartialEq)]
33pub enum ApacheBugFlag {
34 On,
35 Off,
36}
37
38impl ApacheBugFlag {
39 pub fn from_content_type(mime_type: Option<&Mime>) -> ApacheBugFlag {
41 if mime_type.is_some_and(|mime_type| {
43 *mime_type == mime::TEXT_PLAIN || *mime_type == mime::TEXT_PLAIN_UTF_8
44 }) {
45 ApacheBugFlag::On
46 } else {
47 ApacheBugFlag::Off
48 }
49 }
50}
51
52#[derive(PartialEq)]
53pub enum NoSniffFlag {
54 On,
55 Off,
56}
57
58impl Default for MimeClassifier {
59 fn default() -> Self {
60 Self {
61 image_classifier: GroupedClassifier::image_classifer(),
62 audio_video_classifier: GroupedClassifier::audio_video_classifier(),
63 scriptable_classifier: GroupedClassifier::scriptable_classifier(),
64 plaintext_classifier: GroupedClassifier::plaintext_classifier(),
65 archive_classifier: GroupedClassifier::archive_classifier(),
66 binary_or_plaintext: BinaryOrPlaintextClassifier,
67 font_classifier: GroupedClassifier::font_classifier(),
68 }
69 }
70}
71
72impl MimeClassifier {
73 pub fn classify<'a>(
75 &'a self,
76 context: LoadContext,
77 no_sniff_flag: NoSniffFlag,
78 apache_bug_flag: ApacheBugFlag,
79 supplied_type: &Option<Mime>,
80 data: &'a [u8],
81 ) -> Mime {
82 let supplied_type_or_octet_stream = supplied_type
83 .clone()
84 .unwrap_or(mime::APPLICATION_OCTET_STREAM);
85 if Self::is_xml(&supplied_type_or_octet_stream) ||
88 Self::is_html(&supplied_type_or_octet_stream)
89 {
90 return supplied_type_or_octet_stream;
91 }
92 match context {
93 LoadContext::Browsing => match *supplied_type {
94 None => self.sniff_unknown_type(no_sniff_flag, data),
98 Some(ref supplied_type) => {
99 if MimeClassifier::is_explicit_unknown(supplied_type) {
100 return self.sniff_unknown_type(no_sniff_flag, data);
101 }
102 if no_sniff_flag == NoSniffFlag::On {
105 return supplied_type.clone();
106 }
107 if apache_bug_flag == ApacheBugFlag::On {
110 return self.sniff_text_or_data(data);
111 }
112 match MimeClassifier::get_media_type(supplied_type) {
113 Some(MediaType::Image) => {
117 self.image_classifier.classify(data)
119 },
120 Some(MediaType::AudioVideo) => {
124 self.audio_video_classifier.classify(data)
126 },
127 Some(MediaType::Html) | Some(MediaType::Xml) => unreachable!(),
128 _ => None,
129 }
130 .unwrap_or(supplied_type.clone())
132 },
133 },
134 LoadContext::Image => {
135 match MimeClassifier::maybe_get_media_type(supplied_type) {
137 Some(MediaType::Xml) => None,
138 _ => self.image_classifier.classify(data),
139 }
140 .unwrap_or(supplied_type_or_octet_stream)
141 },
142 LoadContext::AudioVideo => {
143 match MimeClassifier::maybe_get_media_type(supplied_type) {
145 Some(MediaType::Xml) => None,
146 _ => self.audio_video_classifier.classify(data),
147 }
148 .unwrap_or(supplied_type_or_octet_stream)
149 },
150 LoadContext::Plugin => {
151 match *supplied_type {
156 None => mime::APPLICATION_OCTET_STREAM,
157 _ => supplied_type_or_octet_stream,
158 }
159 },
160 LoadContext::Style => {
161 match *supplied_type {
166 None => mime::TEXT_CSS,
167 _ => supplied_type_or_octet_stream,
168 }
169 },
170 LoadContext::Script => {
171 match *supplied_type {
176 None => mime::TEXT_JAVASCRIPT,
177 _ => supplied_type_or_octet_stream,
178 }
179 },
180 LoadContext::Font => {
181 match MimeClassifier::maybe_get_media_type(supplied_type) {
183 Some(MediaType::Xml) => None,
184 _ => self.font_classifier.classify(data),
185 }
186 .unwrap_or(supplied_type_or_octet_stream)
187 },
188 LoadContext::TextTrack => {
189 "text/vtt".parse().unwrap()
194 },
195 LoadContext::CacheManifest => {
196 "text/cache-manifest".parse().unwrap()
201 },
202 }
203 }
204
205 pub fn validate(&self) -> Result<(), String> {
206 self.image_classifier.validate()?;
207 self.audio_video_classifier.validate()?;
208 self.scriptable_classifier.validate()?;
209 self.plaintext_classifier.validate()?;
210 self.archive_classifier.validate()?;
211 self.binary_or_plaintext.validate()?;
212 self.font_classifier.validate()?;
213 Ok(())
214 }
215
216 fn sniff_unknown_type(&self, no_sniff_flag: NoSniffFlag, data: &[u8]) -> Mime {
218 let should_sniff_scriptable = no_sniff_flag == NoSniffFlag::Off;
219 let sniffed = if should_sniff_scriptable {
220 self.scriptable_classifier.classify(data)
221 } else {
222 None
223 };
224
225 sniffed
226 .or_else(|| self.plaintext_classifier.classify(data))
227 .or_else(|| self.image_classifier.classify(data))
228 .or_else(|| self.audio_video_classifier.classify(data))
229 .or_else(|| self.archive_classifier.classify(data))
230 .or_else(|| self.binary_or_plaintext.classify(data))
231 .expect("BinaryOrPlaintextClassifier always succeeds")
232 }
233
234 fn sniff_text_or_data<'a>(&'a self, data: &'a [u8]) -> Mime {
235 self.binary_or_plaintext
236 .classify(data)
237 .expect("BinaryOrPlaintextClassifier always succeeds")
238 }
239
240 fn is_xml(mt: &Mime) -> bool {
242 mt.suffix() == Some(mime::XML) ||
243 mt.essence_str() == "text/xml" ||
244 mt.essence_str() == "application/xml"
245 }
246
247 fn is_html(mt: &Mime) -> bool {
249 mt.essence_str() == "text/html"
250 }
251
252 fn is_image(mt: &Mime) -> bool {
254 mt.type_() == mime::IMAGE
255 }
256
257 fn is_audio_video(mt: &Mime) -> bool {
259 mt.type_() == mime::AUDIO ||
260 mt.type_() == mime::VIDEO ||
261 mt.essence_str() == "application/ogg"
262 }
263
264 fn is_explicit_unknown(mt: &Mime) -> bool {
265 mt.type_().as_str() == "unknown" && mt.subtype().as_str() == "unknown" ||
266 mt.type_() == mime::APPLICATION && mt.subtype().as_str() == "unknown" ||
267 mt.type_() == mime::STAR && mt.subtype() == mime::STAR
268 }
269
270 fn is_javascript(mt: &Mime) -> bool {
272 (mt.type_() == mime::APPLICATION &&
273 (["ecmascript", "javascript", "x-ecmascript", "x-javascript"]
274 .contains(&mt.subtype().as_str()))) ||
275 (mt.type_() == mime::TEXT &&
276 ([
277 "ecmascript",
278 "javascript",
279 "javascript1.0",
280 "javascript1.1",
281 "javascript1.2",
282 "javascript1.3",
283 "javascript1.4",
284 "javascript1.5",
285 "jscript",
286 "livescript",
287 "x-ecmascript",
288 "x-javascript",
289 ]
290 .contains(&mt.subtype().as_str())))
291 }
292
293 fn is_json(mt: &Mime) -> bool {
295 mt.suffix() == Some(mime::JSON) ||
296 (mt.subtype() == mime::JSON &&
297 (mt.type_() == mime::APPLICATION || mt.type_() == mime::TEXT))
298 }
299
300 fn is_font(mt: &Mime) -> bool {
302 mt.type_() == mime::FONT ||
303 (mt.type_() == mime::APPLICATION &&
304 ([
305 "font-cff",
306 "font-off",
307 "font-sfnt",
308 "font-ttf",
309 "font-woff",
310 "vnd.ms-fontobject",
311 "vnd.ms-opentype",
312 ]
313 .contains(&mt.subtype().as_str())))
314 }
315
316 fn is_text(mt: &Mime) -> bool {
317 *mt == mime::TEXT_PLAIN || mt.essence_str() == "text/vtt"
318 }
319
320 fn is_css(mt: &Mime) -> bool {
321 mt.essence_str() == "text/css"
322 }
323
324 pub fn get_media_type(mime: &Mime) -> Option<MediaType> {
325 if MimeClassifier::is_xml(mime) {
326 Some(MediaType::Xml)
327 } else if MimeClassifier::is_html(mime) {
328 Some(MediaType::Html)
329 } else if MimeClassifier::is_image(mime) {
330 Some(MediaType::Image)
331 } else if MimeClassifier::is_audio_video(mime) {
332 Some(MediaType::AudioVideo)
333 } else if MimeClassifier::is_javascript(mime) {
334 Some(MediaType::JavaScript)
335 } else if MimeClassifier::is_font(mime) {
336 Some(MediaType::Font)
337 } else if MimeClassifier::is_json(mime) {
338 Some(MediaType::Json)
339 } else if MimeClassifier::is_text(mime) {
340 Some(MediaType::Text)
341 } else if MimeClassifier::is_css(mime) {
342 Some(MediaType::Css)
343 } else {
344 None
345 }
346 }
347
348 fn maybe_get_media_type(supplied_type: &Option<Mime>) -> Option<MediaType> {
349 supplied_type
350 .as_ref()
351 .and_then(MimeClassifier::get_media_type)
352 }
353}
354
355trait MIMEChecker {
357 fn classify(&self, data: &[u8]) -> Option<Mime>;
358 fn validate(&self) -> Result<(), String>;
360}
361
362struct ByteMatcher {
363 pattern: &'static [u8],
364 mask: &'static [u8],
365 leading_ignore: &'static [u8],
366 content_type: Mime,
367}
368
369impl ByteMatcher {
370 fn matches(&self, data: &[u8]) -> Option<usize> {
371 if data.len() < self.pattern.len() {
372 None
373 } else if data == self.pattern {
374 Some(self.pattern.len())
375 } else {
376 data[..data.len() - self.pattern.len() + 1]
377 .iter()
378 .position(|x| !self.leading_ignore.contains(x))
379 .and_then(|start| {
380 if data[start..]
381 .iter()
382 .zip(self.pattern.iter())
383 .zip(self.mask.iter())
384 .all(|((&data, &pattern), &mask)| (data & mask) == pattern)
385 {
386 Some(start + self.pattern.len())
387 } else {
388 None
389 }
390 })
391 }
392 }
393}
394
395impl MIMEChecker for ByteMatcher {
396 fn classify(&self, data: &[u8]) -> Option<Mime> {
397 self.matches(data).map(|_| self.content_type.clone())
398 }
399
400 fn validate(&self) -> Result<(), String> {
401 if self.pattern.is_empty() {
402 return Err(format!("Zero length pattern for {:?}", self.content_type));
403 }
404 if self.pattern.len() != self.mask.len() {
405 return Err(format!(
406 "Unequal pattern and mask length for {:?}",
407 self.content_type
408 ));
409 }
410 if self
411 .pattern
412 .iter()
413 .zip(self.mask.iter())
414 .any(|(&pattern, &mask)| pattern & mask != pattern)
415 {
416 return Err(format!(
417 "Pattern not pre-masked for {:?}",
418 self.content_type
419 ));
420 }
421 Ok(())
422 }
423}
424
425struct TagTerminatedByteMatcher {
426 matcher: ByteMatcher,
427}
428
429impl MIMEChecker for TagTerminatedByteMatcher {
430 fn classify(&self, data: &[u8]) -> Option<Mime> {
431 self.matcher.matches(data).and_then(|j| {
432 if j < data.len() && (data[j] == b' ' || data[j] == b'>') {
433 Some(self.matcher.content_type.clone())
434 } else {
435 None
436 }
437 })
438 }
439
440 fn validate(&self) -> Result<(), String> {
441 self.matcher.validate()
442 }
443}
444
445pub struct Mp4Matcher;
446
447impl Mp4Matcher {
448 pub fn matches(&self, data: &[u8]) -> bool {
450 if data.len() < 12 {
455 return false;
456 }
457
458 let box_size = (((data[0] as u32) << 24) |
461 ((data[1] as u32) << 16) |
462 ((data[2] as u32) << 8) |
463 (data[3] as u32)) as usize;
464 if (data.len() < box_size) || (box_size % 4 != 0) {
466 return false;
467 }
468
469 let ftyp = [0x66, 0x74, 0x79, 0x70];
471 if !data[4..].starts_with(&ftyp) {
472 return false;
473 }
474
475 let mp4 = [0x6D, 0x70, 0x34];
477 data[8..].starts_with(&mp4) ||
478 data[16..box_size]
481 .chunks(4)
483 .any(|chunk| chunk.starts_with(&mp4))
486 }
488}
489impl MIMEChecker for Mp4Matcher {
490 fn classify(&self, data: &[u8]) -> Option<Mime> {
491 if self.matches(data) {
492 Some("video/mp4".parse().unwrap())
493 } else {
494 None
495 }
496 }
497
498 fn validate(&self) -> Result<(), String> {
499 Ok(())
500 }
501}
502
503struct BinaryOrPlaintextClassifier;
504
505impl BinaryOrPlaintextClassifier {
506 fn classify_impl(&self, data: &[u8]) -> Mime {
508 if data.starts_with(&[0xFFu8, 0xFEu8]) ||
516 data.starts_with(&[0xFEu8, 0xFFu8]) ||
517 data.starts_with(&[0xEFu8, 0xBBu8, 0xBFu8])
518 {
519 mime::TEXT_PLAIN
520 } else if data.iter().any(|&x| {
521 x <= 0x08u8 ||
522 x == 0x0Bu8 ||
523 (0x0Eu8..=0x1Au8).contains(&x) ||
524 (0x1Cu8..=0x1Fu8).contains(&x)
525 }) {
526 mime::APPLICATION_OCTET_STREAM
528 } else {
529 mime::TEXT_PLAIN
532 }
533 }
534}
535impl MIMEChecker for BinaryOrPlaintextClassifier {
536 fn classify(&self, data: &[u8]) -> Option<Mime> {
537 Some(self.classify_impl(data))
538 }
539
540 fn validate(&self) -> Result<(), String> {
541 Ok(())
542 }
543}
544struct GroupedClassifier {
545 byte_matchers: Vec<Box<dyn MIMEChecker + Send + Sync>>,
546}
547impl GroupedClassifier {
548 fn image_classifer() -> GroupedClassifier {
549 GroupedClassifier {
550 byte_matchers: vec![
551 Box::new(ByteMatcher::image_x_icon()),
554 Box::new(ByteMatcher::image_x_icon_cursor()),
555 Box::new(ByteMatcher::image_bmp()),
556 Box::new(ByteMatcher::image_gif89a()),
557 Box::new(ByteMatcher::image_gif87a()),
558 Box::new(ByteMatcher::image_webp()),
559 Box::new(ByteMatcher::image_png()),
560 Box::new(ByteMatcher::image_jpeg()),
561 ],
562 }
563 }
564 fn audio_video_classifier() -> GroupedClassifier {
565 GroupedClassifier {
566 byte_matchers: vec![
567 Box::new(ByteMatcher::video_webm()),
568 Box::new(ByteMatcher::audio_basic()),
569 Box::new(ByteMatcher::audio_aiff()),
570 Box::new(ByteMatcher::audio_mpeg()),
571 Box::new(ByteMatcher::application_ogg()),
572 Box::new(ByteMatcher::audio_midi()),
573 Box::new(ByteMatcher::video_avi()),
574 Box::new(ByteMatcher::audio_wave()),
575 Box::new(Mp4Matcher),
576 ],
577 }
578 }
579 fn scriptable_classifier() -> GroupedClassifier {
580 GroupedClassifier {
581 byte_matchers: vec![
582 Box::new(ByteMatcher::text_html_doctype()),
583 Box::new(ByteMatcher::text_html_page()),
584 Box::new(ByteMatcher::text_html_head()),
585 Box::new(ByteMatcher::text_html_script()),
586 Box::new(ByteMatcher::text_html_iframe()),
587 Box::new(ByteMatcher::text_html_h1()),
588 Box::new(ByteMatcher::text_html_div()),
589 Box::new(ByteMatcher::text_html_font()),
590 Box::new(ByteMatcher::text_html_table()),
591 Box::new(ByteMatcher::text_html_a()),
592 Box::new(ByteMatcher::text_html_style()),
593 Box::new(ByteMatcher::text_html_title()),
594 Box::new(ByteMatcher::text_html_b()),
595 Box::new(ByteMatcher::text_html_body()),
596 Box::new(ByteMatcher::text_html_br()),
597 Box::new(ByteMatcher::text_html_p()),
598 Box::new(ByteMatcher::text_html_comment()),
599 Box::new(ByteMatcher::text_xml()),
600 Box::new(ByteMatcher::application_pdf()),
601 ],
602 }
603 }
604 fn plaintext_classifier() -> GroupedClassifier {
605 GroupedClassifier {
606 byte_matchers: vec![
607 Box::new(ByteMatcher::text_plain_utf_8_bom()),
608 Box::new(ByteMatcher::text_plain_utf_16le_bom()),
609 Box::new(ByteMatcher::text_plain_utf_16be_bom()),
610 Box::new(ByteMatcher::application_postscript()),
611 ],
612 }
613 }
614 fn archive_classifier() -> GroupedClassifier {
615 GroupedClassifier {
616 byte_matchers: vec![
617 Box::new(ByteMatcher::application_x_gzip()),
618 Box::new(ByteMatcher::application_zip()),
619 Box::new(ByteMatcher::application_x_rar_compressed()),
620 ],
621 }
622 }
623
624 fn font_classifier() -> GroupedClassifier {
625 GroupedClassifier {
626 byte_matchers: vec![
627 Box::new(ByteMatcher::application_font_woff()),
628 Box::new(ByteMatcher::true_type_collection()),
629 Box::new(ByteMatcher::open_type()),
630 Box::new(ByteMatcher::true_type()),
631 Box::new(ByteMatcher::application_vnd_ms_font_object()),
632 ],
633 }
634 }
635}
636impl MIMEChecker for GroupedClassifier {
637 fn classify(&self, data: &[u8]) -> Option<Mime> {
638 self.byte_matchers
639 .iter()
640 .filter_map(|matcher| matcher.classify(data))
641 .next()
642 }
643
644 fn validate(&self) -> Result<(), String> {
645 for byte_matcher in &self.byte_matchers {
646 byte_matcher.validate()?
647 }
648 Ok(())
649 }
650}
651
652impl ByteMatcher {
655 fn image_x_icon() -> ByteMatcher {
657 ByteMatcher {
658 pattern: b"\x00\x00\x01\x00",
659 mask: b"\xFF\xFF\xFF\xFF",
660 content_type: "image/x-icon".parse().unwrap(),
661 leading_ignore: &[],
662 }
663 }
664 fn image_x_icon_cursor() -> ByteMatcher {
666 ByteMatcher {
667 pattern: b"\x00\x00\x02\x00",
668 mask: b"\xFF\xFF\xFF\xFF",
669 content_type: "image/x-icon".parse().unwrap(),
670 leading_ignore: &[],
671 }
672 }
673 fn image_bmp() -> ByteMatcher {
675 ByteMatcher {
676 pattern: b"BM",
677 mask: b"\xFF\xFF",
678 content_type: mime::IMAGE_BMP,
679 leading_ignore: &[],
680 }
681 }
682 fn image_gif89a() -> ByteMatcher {
684 ByteMatcher {
685 pattern: b"GIF89a",
686 mask: b"\xFF\xFF\xFF\xFF\xFF\xFF",
687 content_type: mime::IMAGE_GIF,
688 leading_ignore: &[],
689 }
690 }
691 fn image_gif87a() -> ByteMatcher {
693 ByteMatcher {
694 pattern: b"GIF87a",
695 mask: b"\xFF\xFF\xFF\xFF\xFF\xFF",
696 content_type: mime::IMAGE_GIF,
697 leading_ignore: &[],
698 }
699 }
700 fn image_webp() -> ByteMatcher {
702 ByteMatcher {
703 pattern: b"RIFF\x00\x00\x00\x00WEBPVP",
704 mask: b"\xFF\xFF\xFF\xFF\x00\x00\x00\x00\xFF\xFF\xFF\xFF\xFF\xFF",
705 content_type: "image/webp".parse().unwrap(),
706 leading_ignore: &[],
707 }
708 }
709 fn image_png() -> ByteMatcher {
712 ByteMatcher {
713 pattern: b"\x89PNG\r\n\x1A\n",
714 mask: b"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF",
715 content_type: mime::IMAGE_PNG,
716 leading_ignore: &[],
717 }
718 }
719 fn image_jpeg() -> ByteMatcher {
721 ByteMatcher {
722 pattern: b"\xFF\xD8\xFF",
723 mask: b"\xFF\xFF\xFF",
724 content_type: mime::IMAGE_JPEG,
725 leading_ignore: &[],
726 }
727 }
728 fn video_webm() -> ByteMatcher {
730 ByteMatcher {
731 pattern: b"\x1A\x45\xDF\xA3",
732 mask: b"\xFF\xFF\xFF\xFF",
733 content_type: "video/webm".parse().unwrap(),
734 leading_ignore: &[],
735 }
736 }
737 fn audio_basic() -> ByteMatcher {
739 ByteMatcher {
740 pattern: b".snd",
741 mask: b"\xFF\xFF\xFF\xFF",
742 content_type: "audio/basic".parse().unwrap(),
743 leading_ignore: &[],
744 }
745 }
746 fn audio_aiff() -> ByteMatcher {
748 ByteMatcher {
749 pattern: b"FORM\x00\x00\x00\x00AIFF",
750 mask: b"\xFF\xFF\xFF\xFF\x00\x00\x00\x00\xFF\xFF\xFF\xFF",
751 content_type: "audio/aiff".parse().unwrap(),
752 leading_ignore: &[],
753 }
754 }
755 fn audio_mpeg() -> ByteMatcher {
757 ByteMatcher {
758 pattern: b"ID3",
759 mask: b"\xFF\xFF\xFF",
760 content_type: "audio/mpeg".parse().unwrap(),
761 leading_ignore: &[],
762 }
763 }
764 fn application_ogg() -> ByteMatcher {
766 ByteMatcher {
767 pattern: b"OggS\x00",
768 mask: b"\xFF\xFF\xFF\xFF\xFF",
769 content_type: "application/ogg".parse().unwrap(),
770 leading_ignore: &[],
771 }
772 }
773 fn audio_midi() -> ByteMatcher {
776 ByteMatcher {
777 pattern: b"MThd\x00\x00\x00\x06",
778 mask: b"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF",
779 content_type: "audio/midi".parse().unwrap(),
780 leading_ignore: &[],
781 }
782 }
783 fn video_avi() -> ByteMatcher {
785 ByteMatcher {
786 pattern: b"RIFF\x00\x00\x00\x00AVI ",
787 mask: b"\xFF\xFF\xFF\xFF\x00\x00\x00\x00\xFF\xFF\xFF\xFF",
788 content_type: "video/avi".parse().unwrap(),
789 leading_ignore: &[],
790 }
791 }
792 fn audio_wave() -> ByteMatcher {
794 ByteMatcher {
795 pattern: b"RIFF\x00\x00\x00\x00WAVE",
796 mask: b"\xFF\xFF\xFF\xFF\x00\x00\x00\x00\xFF\xFF\xFF\xFF",
797 content_type: "audio/wave".parse().unwrap(),
798 leading_ignore: &[],
799 }
800 }
801 fn text_html_doctype() -> TagTerminatedByteMatcher {
803 TagTerminatedByteMatcher {
804 matcher: ByteMatcher {
805 pattern: b"<!DOCTYPE HTML",
806 mask: b"\xFF\xFF\xDF\xDF\xDF\xDF\xDF\xDF\xDF\xFF\xDF\xDF\xDF\xDF",
807 content_type: mime::TEXT_HTML,
808 leading_ignore: b"\t\n\x0C\r ",
809 },
810 }
811 }
812
813 fn text_html_page() -> TagTerminatedByteMatcher {
815 TagTerminatedByteMatcher {
816 matcher: ByteMatcher {
817 pattern: b"<HTML",
818 mask: b"\xFF\xDF\xDF\xDF\xDF",
819 content_type: mime::TEXT_HTML,
820 leading_ignore: b"\t\n\x0C\r ",
821 },
822 }
823 }
824
825 fn text_html_head() -> TagTerminatedByteMatcher {
827 TagTerminatedByteMatcher {
828 matcher: ByteMatcher {
829 pattern: b"<HEAD",
830 mask: b"\xFF\xDF\xDF\xDF\xDF",
831 content_type: mime::TEXT_HTML,
832 leading_ignore: b"\t\n\x0C\r ",
833 },
834 }
835 }
836
837 fn text_html_script() -> TagTerminatedByteMatcher {
839 TagTerminatedByteMatcher {
840 matcher: ByteMatcher {
841 pattern: b"<SCRIPT",
842 mask: b"\xFF\xDF\xDF\xDF\xDF\xDF\xDF",
843 content_type: mime::TEXT_HTML,
844 leading_ignore: b"\t\n\x0C\r ",
845 },
846 }
847 }
848
849 fn text_html_iframe() -> TagTerminatedByteMatcher {
851 TagTerminatedByteMatcher {
852 matcher: ByteMatcher {
853 pattern: b"<IFRAME",
854 mask: b"\xFF\xDF\xDF\xDF\xDF\xDF\xDF",
855 content_type: mime::TEXT_HTML,
856 leading_ignore: b"\t\n\x0C\r ",
857 },
858 }
859 }
860
861 fn text_html_h1() -> TagTerminatedByteMatcher {
863 TagTerminatedByteMatcher {
864 matcher: ByteMatcher {
865 pattern: b"<H1",
866 mask: b"\xFF\xDF\xFF",
867 content_type: mime::TEXT_HTML,
868 leading_ignore: b"\t\n\x0C\r ",
869 },
870 }
871 }
872
873 fn text_html_div() -> TagTerminatedByteMatcher {
875 TagTerminatedByteMatcher {
876 matcher: ByteMatcher {
877 pattern: b"<DIV",
878 mask: b"\xFF\xDF\xDF\xDF",
879 content_type: mime::TEXT_HTML,
880 leading_ignore: b"\t\n\x0C\r ",
881 },
882 }
883 }
884
885 fn text_html_font() -> TagTerminatedByteMatcher {
887 TagTerminatedByteMatcher {
888 matcher: ByteMatcher {
889 pattern: b"<FONT",
890 mask: b"\xFF\xDF\xDF\xDF\xDF",
891 content_type: mime::TEXT_HTML,
892 leading_ignore: b"\t\n\x0C\r ",
893 },
894 }
895 }
896
897 fn text_html_table() -> TagTerminatedByteMatcher {
899 TagTerminatedByteMatcher {
900 matcher: ByteMatcher {
901 pattern: b"<TABLE",
902 mask: b"\xFF\xDF\xDF\xDF\xDF\xDF",
903 content_type: mime::TEXT_HTML,
904 leading_ignore: b"\t\n\x0C\r ",
905 },
906 }
907 }
908
909 fn text_html_a() -> TagTerminatedByteMatcher {
911 TagTerminatedByteMatcher {
912 matcher: ByteMatcher {
913 pattern: b"<A",
914 mask: b"\xFF\xDF",
915 content_type: mime::TEXT_HTML,
916 leading_ignore: b"\t\n\x0C\r ",
917 },
918 }
919 }
920
921 fn text_html_style() -> TagTerminatedByteMatcher {
923 TagTerminatedByteMatcher {
924 matcher: ByteMatcher {
925 pattern: b"<STYLE",
926 mask: b"\xFF\xDF\xDF\xDF\xDF\xDF",
927 content_type: mime::TEXT_HTML,
928 leading_ignore: b"\t\n\x0C\r ",
929 },
930 }
931 }
932
933 fn text_html_title() -> TagTerminatedByteMatcher {
935 TagTerminatedByteMatcher {
936 matcher: ByteMatcher {
937 pattern: b"<TITLE",
938 mask: b"\xFF\xDF\xDF\xDF\xDF\xDF",
939 content_type: mime::TEXT_HTML,
940 leading_ignore: b"\t\n\x0C\r ",
941 },
942 }
943 }
944
945 fn text_html_b() -> TagTerminatedByteMatcher {
947 TagTerminatedByteMatcher {
948 matcher: ByteMatcher {
949 pattern: b"<B",
950 mask: b"\xFF\xDF",
951 content_type: mime::TEXT_HTML,
952 leading_ignore: b"\t\n\x0C\r ",
953 },
954 }
955 }
956
957 fn text_html_body() -> TagTerminatedByteMatcher {
959 TagTerminatedByteMatcher {
960 matcher: ByteMatcher {
961 pattern: b"<BODY",
962 mask: b"\xFF\xDF\xDF\xDF\xDF",
963 content_type: mime::TEXT_HTML,
964 leading_ignore: b"\t\n\x0C\r ",
965 },
966 }
967 }
968
969 fn text_html_br() -> TagTerminatedByteMatcher {
971 TagTerminatedByteMatcher {
972 matcher: ByteMatcher {
973 pattern: b"<BR",
974 mask: b"\xFF\xDF\xDF",
975 content_type: mime::TEXT_HTML,
976 leading_ignore: b"\t\n\x0C\r ",
977 },
978 }
979 }
980
981 fn text_html_p() -> TagTerminatedByteMatcher {
983 TagTerminatedByteMatcher {
984 matcher: ByteMatcher {
985 pattern: b"<P",
986 mask: b"\xFF\xDF",
987 content_type: mime::TEXT_HTML,
988 leading_ignore: b"\t\n\x0C\r ",
989 },
990 }
991 }
992
993 fn text_html_comment() -> TagTerminatedByteMatcher {
995 TagTerminatedByteMatcher {
996 matcher: ByteMatcher {
997 pattern: b"<!--",
998 mask: b"\xFF\xFF\xFF\xFF",
999 content_type: mime::TEXT_HTML,
1000 leading_ignore: b"\t\n\x0C\r ",
1001 },
1002 }
1003 }
1004
1005 fn text_xml() -> ByteMatcher {
1007 ByteMatcher {
1008 pattern: b"<?xml",
1009 mask: b"\xFF\xFF\xFF\xFF\xFF",
1010 content_type: mime::TEXT_XML,
1011 leading_ignore: b"\t\n\x0C\r ",
1012 }
1013 }
1014 fn application_pdf() -> ByteMatcher {
1016 ByteMatcher {
1017 pattern: b"%PDF-",
1018 mask: b"\xFF\xFF\xFF\xFF\xFF",
1019 content_type: mime::APPLICATION_PDF,
1020 leading_ignore: &[],
1021 }
1022 }
1023 fn application_vnd_ms_font_object() -> ByteMatcher {
1025 ByteMatcher {
1026 pattern: b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\
1027 \x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\
1028 \x00\x00LP",
1029 mask: b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\
1030 \x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\
1031 \x00\x00\xFF\xFF",
1032 content_type: "application/vnd.ms-fontobject".parse().unwrap(),
1033 leading_ignore: &[],
1034 }
1035 }
1036 fn true_type() -> ByteMatcher {
1038 ByteMatcher {
1039 pattern: b"\x00\x01\x00\x00",
1040 mask: b"\xFF\xFF\xFF\xFF",
1041 content_type: "application/font-sfnt".parse().unwrap(),
1042 leading_ignore: &[],
1043 }
1044 }
1045 fn open_type() -> ByteMatcher {
1047 ByteMatcher {
1048 pattern: b"OTTO",
1049 mask: b"\xFF\xFF\xFF\xFF",
1050 content_type: "application/font-sfnt".parse().unwrap(),
1051 leading_ignore: &[],
1052 }
1053 }
1054 fn true_type_collection() -> ByteMatcher {
1056 ByteMatcher {
1057 pattern: b"ttcf",
1058 mask: b"\xFF\xFF\xFF\xFF",
1059 content_type: "application/font-sfnt".parse().unwrap(),
1060 leading_ignore: &[],
1061 }
1062 }
1063 fn application_font_woff() -> ByteMatcher {
1065 ByteMatcher {
1066 pattern: b"wOFF",
1067 mask: b"\xFF\xFF\xFF\xFF",
1068 content_type: "application/font-woff".parse().unwrap(),
1069 leading_ignore: &[],
1070 }
1071 }
1072 fn application_x_gzip() -> ByteMatcher {
1074 ByteMatcher {
1075 pattern: b"\x1F\x8B\x08",
1076 mask: b"\xFF\xFF\xFF",
1077 content_type: "application/x-gzip".parse().unwrap(),
1078 leading_ignore: &[],
1079 }
1080 }
1081 fn application_zip() -> ByteMatcher {
1083 ByteMatcher {
1084 pattern: b"PK\x03\x04",
1085 mask: b"\xFF\xFF\xFF\xFF",
1086 content_type: "application/zip".parse().unwrap(),
1087 leading_ignore: &[],
1088 }
1089 }
1090 fn application_x_rar_compressed() -> ByteMatcher {
1092 ByteMatcher {
1093 pattern: b"Rar \x1A\x07\x00",
1094 mask: b"\xFF\xFF\xFF\xFF\xFF\xFF\xFF",
1095 content_type: "application/x-rar-compressed".parse().unwrap(),
1096 leading_ignore: &[],
1097 }
1098 }
1099 fn application_postscript() -> ByteMatcher {
1101 ByteMatcher {
1102 pattern: b"%!PS-Adobe-",
1103 mask: b"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF",
1104 content_type: "application/postscript".parse().unwrap(),
1105 leading_ignore: &[],
1106 }
1107 }
1108 fn text_plain_utf_16be_bom() -> ByteMatcher {
1110 ByteMatcher {
1111 pattern: b"\xFE\xFF\x00\x00",
1112 mask: b"\xFF\xFF\x00\x00",
1113 content_type: mime::TEXT_PLAIN,
1114 leading_ignore: &[],
1115 }
1116 }
1117 fn text_plain_utf_16le_bom() -> ByteMatcher {
1119 ByteMatcher {
1120 pattern: b"\xFF\xFE\x00\x00",
1121 mask: b"\xFF\xFF\x00\x00",
1122 content_type: mime::TEXT_PLAIN,
1123 leading_ignore: &[],
1124 }
1125 }
1126 fn text_plain_utf_8_bom() -> ByteMatcher {
1128 ByteMatcher {
1129 pattern: b"\xEF\xBB\xBF\x00",
1130 mask: b"\xFF\xFF\xFF\x00",
1131 content_type: mime::TEXT_PLAIN,
1132 leading_ignore: &[],
1133 }
1134 }
1135}