1use mime::{self, Mime};
6
7use crate::LoadContext;
8
9pub struct MimeClassifier {
10 image_classifier: GroupedClassifier,
11 audio_video_classifier: GroupedClassifier,
12 scriptable_classifier: GroupedClassifier,
13 plaintext_classifier: GroupedClassifier,
14 archive_classifier: GroupedClassifier,
15 binary_or_plaintext: BinaryOrPlaintextClassifier,
16 font_classifier: GroupedClassifier,
17}
18
19#[derive(PartialEq)]
20pub enum MediaType {
21 Xml,
22 Html,
23 AudioVideo,
24 Image,
25 JavaScript,
26 Json,
27 Font,
28 Text,
29 Css,
30}
31
32#[derive(PartialEq)]
33pub enum ApacheBugFlag {
34 On,
35 Off,
36}
37
38impl ApacheBugFlag {
39 pub fn from_content_type(mime_type: Option<&Mime>) -> ApacheBugFlag {
41 if mime_type.is_some_and(|mime_type| {
43 *mime_type == mime::TEXT_PLAIN || *mime_type == mime::TEXT_PLAIN_UTF_8
44 }) {
45 ApacheBugFlag::On
46 } else {
47 ApacheBugFlag::Off
48 }
49 }
50}
51
52#[derive(PartialEq)]
53pub enum NoSniffFlag {
54 On,
55 Off,
56}
57
58impl Default for MimeClassifier {
59 fn default() -> Self {
60 Self {
61 image_classifier: GroupedClassifier::image_classifer(),
62 audio_video_classifier: GroupedClassifier::audio_video_classifier(),
63 scriptable_classifier: GroupedClassifier::scriptable_classifier(),
64 plaintext_classifier: GroupedClassifier::plaintext_classifier(),
65 archive_classifier: GroupedClassifier::archive_classifier(),
66 binary_or_plaintext: BinaryOrPlaintextClassifier,
67 font_classifier: GroupedClassifier::font_classifier(),
68 }
69 }
70}
71
72impl MimeClassifier {
73 pub fn classify<'a>(
75 &'a self,
76 context: LoadContext,
77 no_sniff_flag: NoSniffFlag,
78 apache_bug_flag: ApacheBugFlag,
79 supplied_type: &Option<Mime>,
80 data: &'a [u8],
81 ) -> Mime {
82 let supplied_type_or_octet_stream = supplied_type
83 .clone()
84 .unwrap_or(mime::APPLICATION_OCTET_STREAM);
85 if Self::is_xml(&supplied_type_or_octet_stream) ||
88 Self::is_html(&supplied_type_or_octet_stream)
89 {
90 return supplied_type_or_octet_stream;
91 }
92 match context {
93 LoadContext::Browsing => match *supplied_type {
94 None => self.sniff_unknown_type(no_sniff_flag, data),
98 Some(ref supplied_type) => {
99 if MimeClassifier::is_explicit_unknown(supplied_type) {
100 return self.sniff_unknown_type(no_sniff_flag, data);
101 }
102 if no_sniff_flag == NoSniffFlag::On {
105 return supplied_type.clone();
106 }
107 if apache_bug_flag == ApacheBugFlag::On {
110 return self.sniff_text_or_data(data);
111 }
112 match MimeClassifier::get_media_type(supplied_type) {
113 Some(MediaType::Image) => {
117 self.image_classifier.classify(data)
119 },
120 Some(MediaType::AudioVideo) => {
124 self.audio_video_classifier.classify(data)
126 },
127 Some(MediaType::Html) | Some(MediaType::Xml) => unreachable!(),
128 _ => None,
129 }
130 .unwrap_or(supplied_type.clone())
132 },
133 },
134 LoadContext::Image => {
135 match MimeClassifier::maybe_get_media_type(supplied_type) {
137 Some(MediaType::Xml) => None,
138 _ => self.image_classifier.classify(data),
139 }
140 .unwrap_or(supplied_type_or_octet_stream)
141 },
142 LoadContext::AudioVideo => {
143 match MimeClassifier::maybe_get_media_type(supplied_type) {
145 Some(MediaType::Xml) => None,
146 _ => self.audio_video_classifier.classify(data),
147 }
148 .unwrap_or(supplied_type_or_octet_stream)
149 },
150 LoadContext::Plugin => {
151 match *supplied_type {
156 None => mime::APPLICATION_OCTET_STREAM,
157 _ => supplied_type_or_octet_stream,
158 }
159 },
160 LoadContext::Style => {
161 match *supplied_type {
166 None => mime::TEXT_CSS,
167 _ => supplied_type_or_octet_stream,
168 }
169 },
170 LoadContext::Script => {
171 match *supplied_type {
176 None => mime::TEXT_JAVASCRIPT,
177 _ => supplied_type_or_octet_stream,
178 }
179 },
180 LoadContext::Font => {
181 match MimeClassifier::maybe_get_media_type(supplied_type) {
183 Some(MediaType::Xml) => None,
184 _ => self.font_classifier.classify(data),
185 }
186 .unwrap_or(supplied_type_or_octet_stream)
187 },
188 LoadContext::TextTrack => {
189 "text/vtt".parse().unwrap()
194 },
195 LoadContext::CacheManifest => {
196 "text/cache-manifest".parse().unwrap()
201 },
202 }
203 }
204
205 pub fn validate(&self) -> Result<(), String> {
206 self.image_classifier.validate()?;
207 self.audio_video_classifier.validate()?;
208 self.scriptable_classifier.validate()?;
209 self.plaintext_classifier.validate()?;
210 self.archive_classifier.validate()?;
211 self.binary_or_plaintext.validate()?;
212 self.font_classifier.validate()?;
213 Ok(())
214 }
215
216 fn sniff_unknown_type(&self, no_sniff_flag: NoSniffFlag, data: &[u8]) -> Mime {
218 let should_sniff_scriptable = no_sniff_flag == NoSniffFlag::Off;
219 let sniffed = if should_sniff_scriptable {
220 self.scriptable_classifier.classify(data)
221 } else {
222 None
223 };
224
225 sniffed
226 .or_else(|| self.plaintext_classifier.classify(data))
227 .or_else(|| self.image_classifier.classify(data))
228 .or_else(|| self.audio_video_classifier.classify(data))
229 .or_else(|| self.archive_classifier.classify(data))
230 .or_else(|| self.binary_or_plaintext.classify(data))
231 .expect("BinaryOrPlaintextClassifier always succeeds")
232 }
233
234 fn sniff_text_or_data<'a>(&'a self, data: &'a [u8]) -> Mime {
235 self.binary_or_plaintext
236 .classify(data)
237 .expect("BinaryOrPlaintextClassifier always succeeds")
238 }
239
240 fn is_xml(mt: &Mime) -> bool {
244 !Self::is_image(mt) &&
245 (mt.suffix() == Some(mime::XML) ||
246 mt.essence_str() == "text/xml" ||
247 mt.essence_str() == "application/xml")
248 }
249
250 fn is_html(mt: &Mime) -> bool {
252 mt.essence_str() == "text/html"
253 }
254
255 fn is_image(mt: &Mime) -> bool {
257 mt.type_() == mime::IMAGE
258 }
259
260 fn is_audio_video(mt: &Mime) -> bool {
262 mt.type_() == mime::AUDIO ||
263 mt.type_() == mime::VIDEO ||
264 mt.essence_str() == "application/ogg"
265 }
266
267 fn is_explicit_unknown(mt: &Mime) -> bool {
268 mt.type_().as_str() == "unknown" && mt.subtype().as_str() == "unknown" ||
269 mt.type_() == mime::APPLICATION && mt.subtype().as_str() == "unknown" ||
270 mt.type_() == mime::STAR && mt.subtype() == mime::STAR
271 }
272
273 pub fn is_javascript(mt: &Mime) -> bool {
275 (mt.type_() == mime::APPLICATION &&
276 (["ecmascript", "javascript", "x-ecmascript", "x-javascript"]
277 .contains(&mt.subtype().as_str()))) ||
278 (mt.type_() == mime::TEXT &&
279 ([
280 "ecmascript",
281 "javascript",
282 "javascript1.0",
283 "javascript1.1",
284 "javascript1.2",
285 "javascript1.3",
286 "javascript1.4",
287 "javascript1.5",
288 "jscript",
289 "livescript",
290 "x-ecmascript",
291 "x-javascript",
292 ]
293 .contains(&mt.subtype().as_str())))
294 }
295
296 pub fn is_json(mt: &Mime) -> bool {
298 mt.suffix() == Some(mime::JSON) ||
299 (mt.subtype() == mime::JSON &&
300 (mt.type_() == mime::APPLICATION || mt.type_() == mime::TEXT))
301 }
302
303 fn is_font(mt: &Mime) -> bool {
305 mt.type_() == mime::FONT ||
306 (mt.type_() == mime::APPLICATION &&
307 ([
308 "font-cff",
309 "font-off",
310 "font-sfnt",
311 "font-ttf",
312 "font-woff",
313 "vnd.ms-fontobject",
314 "vnd.ms-opentype",
315 ]
316 .contains(&mt.subtype().as_str())))
317 }
318
319 fn is_text(mt: &Mime) -> bool {
320 *mt == mime::TEXT_PLAIN || mt.essence_str() == "text/vtt"
321 }
322
323 fn is_css(mt: &Mime) -> bool {
324 mt.essence_str() == "text/css"
325 }
326
327 pub fn get_media_type(mime: &Mime) -> Option<MediaType> {
328 if MimeClassifier::is_xml(mime) {
329 Some(MediaType::Xml)
330 } else if MimeClassifier::is_html(mime) {
331 Some(MediaType::Html)
332 } else if MimeClassifier::is_image(mime) {
333 Some(MediaType::Image)
334 } else if MimeClassifier::is_audio_video(mime) {
335 Some(MediaType::AudioVideo)
336 } else if MimeClassifier::is_javascript(mime) {
337 Some(MediaType::JavaScript)
338 } else if MimeClassifier::is_font(mime) {
339 Some(MediaType::Font)
340 } else if MimeClassifier::is_json(mime) {
341 Some(MediaType::Json)
342 } else if MimeClassifier::is_text(mime) {
343 Some(MediaType::Text)
344 } else if MimeClassifier::is_css(mime) {
345 Some(MediaType::Css)
346 } else {
347 None
348 }
349 }
350
351 fn maybe_get_media_type(supplied_type: &Option<Mime>) -> Option<MediaType> {
352 supplied_type
353 .as_ref()
354 .and_then(MimeClassifier::get_media_type)
355 }
356}
357
358trait MIMEChecker {
360 fn classify(&self, data: &[u8]) -> Option<Mime>;
361 fn validate(&self) -> Result<(), String>;
363}
364
365struct ByteMatcher {
366 pattern: &'static [u8],
367 mask: &'static [u8],
368 leading_ignore: &'static [u8],
369 content_type: Mime,
370}
371
372impl ByteMatcher {
373 fn matches(&self, data: &[u8]) -> Option<usize> {
374 if data.len() < self.pattern.len() {
375 None
376 } else if data == self.pattern {
377 Some(self.pattern.len())
378 } else {
379 data[..data.len() - self.pattern.len() + 1]
380 .iter()
381 .position(|x| !self.leading_ignore.contains(x))
382 .and_then(|start| {
383 if data[start..]
384 .iter()
385 .zip(self.pattern.iter())
386 .zip(self.mask.iter())
387 .all(|((&data, &pattern), &mask)| (data & mask) == pattern)
388 {
389 Some(start + self.pattern.len())
390 } else {
391 None
392 }
393 })
394 }
395 }
396}
397
398impl MIMEChecker for ByteMatcher {
399 fn classify(&self, data: &[u8]) -> Option<Mime> {
400 self.matches(data).map(|_| self.content_type.clone())
401 }
402
403 fn validate(&self) -> Result<(), String> {
404 if self.pattern.is_empty() {
405 return Err(format!("Zero length pattern for {:?}", self.content_type));
406 }
407 if self.pattern.len() != self.mask.len() {
408 return Err(format!(
409 "Unequal pattern and mask length for {:?}",
410 self.content_type
411 ));
412 }
413 if self
414 .pattern
415 .iter()
416 .zip(self.mask.iter())
417 .any(|(&pattern, &mask)| pattern & mask != pattern)
418 {
419 return Err(format!(
420 "Pattern not pre-masked for {:?}",
421 self.content_type
422 ));
423 }
424 Ok(())
425 }
426}
427
428struct TagTerminatedByteMatcher {
429 matcher: ByteMatcher,
430}
431
432impl MIMEChecker for TagTerminatedByteMatcher {
433 fn classify(&self, data: &[u8]) -> Option<Mime> {
434 self.matcher.matches(data).and_then(|j| {
435 if j < data.len() && (data[j] == b' ' || data[j] == b'>') {
436 Some(self.matcher.content_type.clone())
437 } else {
438 None
439 }
440 })
441 }
442
443 fn validate(&self) -> Result<(), String> {
444 self.matcher.validate()
445 }
446}
447
448pub struct Mp4Matcher;
449
450impl Mp4Matcher {
451 pub fn matches(&self, data: &[u8]) -> bool {
453 if data.len() < 12 {
458 return false;
459 }
460
461 let box_size = (((data[0] as u32) << 24) |
464 ((data[1] as u32) << 16) |
465 ((data[2] as u32) << 8) |
466 (data[3] as u32)) as usize;
467 if (data.len() < box_size) || (box_size % 4 != 0) {
469 return false;
470 }
471
472 let ftyp = [0x66, 0x74, 0x79, 0x70];
474 if !data[4..].starts_with(&ftyp) {
475 return false;
476 }
477
478 let mp4 = [0x6D, 0x70, 0x34];
480 data[8..].starts_with(&mp4) ||
481 data[16..box_size]
484 .chunks(4)
486 .any(|chunk| chunk.starts_with(&mp4))
489 }
491}
492impl MIMEChecker for Mp4Matcher {
493 fn classify(&self, data: &[u8]) -> Option<Mime> {
494 if self.matches(data) {
495 Some("video/mp4".parse().unwrap())
496 } else {
497 None
498 }
499 }
500
501 fn validate(&self) -> Result<(), String> {
502 Ok(())
503 }
504}
505
506struct BinaryOrPlaintextClassifier;
507
508impl BinaryOrPlaintextClassifier {
509 fn classify_impl(&self, data: &[u8]) -> Mime {
511 if data.starts_with(&[0xFFu8, 0xFEu8]) ||
519 data.starts_with(&[0xFEu8, 0xFFu8]) ||
520 data.starts_with(&[0xEFu8, 0xBBu8, 0xBFu8])
521 {
522 mime::TEXT_PLAIN
523 } else if data.iter().any(|&x| {
524 x <= 0x08u8 ||
525 x == 0x0Bu8 ||
526 (0x0Eu8..=0x1Au8).contains(&x) ||
527 (0x1Cu8..=0x1Fu8).contains(&x)
528 }) {
529 mime::APPLICATION_OCTET_STREAM
531 } else {
532 mime::TEXT_PLAIN
535 }
536 }
537}
538impl MIMEChecker for BinaryOrPlaintextClassifier {
539 fn classify(&self, data: &[u8]) -> Option<Mime> {
540 Some(self.classify_impl(data))
541 }
542
543 fn validate(&self) -> Result<(), String> {
544 Ok(())
545 }
546}
547struct GroupedClassifier {
548 byte_matchers: Vec<Box<dyn MIMEChecker + Send + Sync>>,
549}
550impl GroupedClassifier {
551 fn image_classifer() -> GroupedClassifier {
552 GroupedClassifier {
553 byte_matchers: vec![
554 Box::new(ByteMatcher::image_x_icon()),
557 Box::new(ByteMatcher::image_x_icon_cursor()),
558 Box::new(ByteMatcher::image_bmp()),
559 Box::new(ByteMatcher::image_gif89a()),
560 Box::new(ByteMatcher::image_gif87a()),
561 Box::new(ByteMatcher::image_webp()),
562 Box::new(ByteMatcher::image_png()),
563 Box::new(ByteMatcher::image_jpeg()),
564 ],
565 }
566 }
567 fn audio_video_classifier() -> GroupedClassifier {
568 GroupedClassifier {
569 byte_matchers: vec![
570 Box::new(ByteMatcher::video_webm()),
571 Box::new(ByteMatcher::audio_basic()),
572 Box::new(ByteMatcher::audio_aiff()),
573 Box::new(ByteMatcher::audio_mpeg()),
574 Box::new(ByteMatcher::application_ogg()),
575 Box::new(ByteMatcher::audio_midi()),
576 Box::new(ByteMatcher::video_avi()),
577 Box::new(ByteMatcher::audio_wave()),
578 Box::new(Mp4Matcher),
579 ],
580 }
581 }
582 fn scriptable_classifier() -> GroupedClassifier {
583 GroupedClassifier {
584 byte_matchers: vec![
585 Box::new(ByteMatcher::text_html_doctype()),
586 Box::new(ByteMatcher::text_html_page()),
587 Box::new(ByteMatcher::text_html_head()),
588 Box::new(ByteMatcher::text_html_script()),
589 Box::new(ByteMatcher::text_html_iframe()),
590 Box::new(ByteMatcher::text_html_h1()),
591 Box::new(ByteMatcher::text_html_div()),
592 Box::new(ByteMatcher::text_html_font()),
593 Box::new(ByteMatcher::text_html_table()),
594 Box::new(ByteMatcher::text_html_a()),
595 Box::new(ByteMatcher::text_html_style()),
596 Box::new(ByteMatcher::text_html_title()),
597 Box::new(ByteMatcher::text_html_b()),
598 Box::new(ByteMatcher::text_html_body()),
599 Box::new(ByteMatcher::text_html_br()),
600 Box::new(ByteMatcher::text_html_p()),
601 Box::new(ByteMatcher::text_html_comment()),
602 Box::new(ByteMatcher::text_xml()),
603 Box::new(ByteMatcher::application_pdf()),
604 ],
605 }
606 }
607 fn plaintext_classifier() -> GroupedClassifier {
608 GroupedClassifier {
609 byte_matchers: vec![
610 Box::new(ByteMatcher::text_plain_utf_8_bom()),
611 Box::new(ByteMatcher::text_plain_utf_16le_bom()),
612 Box::new(ByteMatcher::text_plain_utf_16be_bom()),
613 Box::new(ByteMatcher::application_postscript()),
614 ],
615 }
616 }
617 fn archive_classifier() -> GroupedClassifier {
618 GroupedClassifier {
619 byte_matchers: vec![
620 Box::new(ByteMatcher::application_x_gzip()),
621 Box::new(ByteMatcher::application_zip()),
622 Box::new(ByteMatcher::application_x_rar_compressed()),
623 ],
624 }
625 }
626
627 fn font_classifier() -> GroupedClassifier {
628 GroupedClassifier {
629 byte_matchers: vec![
630 Box::new(ByteMatcher::application_font_woff()),
631 Box::new(ByteMatcher::true_type_collection()),
632 Box::new(ByteMatcher::open_type()),
633 Box::new(ByteMatcher::true_type()),
634 Box::new(ByteMatcher::application_vnd_ms_font_object()),
635 ],
636 }
637 }
638}
639impl MIMEChecker for GroupedClassifier {
640 fn classify(&self, data: &[u8]) -> Option<Mime> {
641 self.byte_matchers
642 .iter()
643 .find_map(|matcher| matcher.classify(data))
644 }
645
646 fn validate(&self) -> Result<(), String> {
647 for byte_matcher in &self.byte_matchers {
648 byte_matcher.validate()?
649 }
650 Ok(())
651 }
652}
653
654impl ByteMatcher {
657 fn image_x_icon() -> ByteMatcher {
659 ByteMatcher {
660 pattern: b"\x00\x00\x01\x00",
661 mask: b"\xFF\xFF\xFF\xFF",
662 content_type: "image/x-icon".parse().unwrap(),
663 leading_ignore: &[],
664 }
665 }
666 fn image_x_icon_cursor() -> ByteMatcher {
668 ByteMatcher {
669 pattern: b"\x00\x00\x02\x00",
670 mask: b"\xFF\xFF\xFF\xFF",
671 content_type: "image/x-icon".parse().unwrap(),
672 leading_ignore: &[],
673 }
674 }
675 fn image_bmp() -> ByteMatcher {
677 ByteMatcher {
678 pattern: b"BM",
679 mask: b"\xFF\xFF",
680 content_type: mime::IMAGE_BMP,
681 leading_ignore: &[],
682 }
683 }
684 fn image_gif89a() -> ByteMatcher {
686 ByteMatcher {
687 pattern: b"GIF89a",
688 mask: b"\xFF\xFF\xFF\xFF\xFF\xFF",
689 content_type: mime::IMAGE_GIF,
690 leading_ignore: &[],
691 }
692 }
693 fn image_gif87a() -> ByteMatcher {
695 ByteMatcher {
696 pattern: b"GIF87a",
697 mask: b"\xFF\xFF\xFF\xFF\xFF\xFF",
698 content_type: mime::IMAGE_GIF,
699 leading_ignore: &[],
700 }
701 }
702 fn image_webp() -> ByteMatcher {
704 ByteMatcher {
705 pattern: b"RIFF\x00\x00\x00\x00WEBPVP",
706 mask: b"\xFF\xFF\xFF\xFF\x00\x00\x00\x00\xFF\xFF\xFF\xFF\xFF\xFF",
707 content_type: "image/webp".parse().unwrap(),
708 leading_ignore: &[],
709 }
710 }
711 fn image_png() -> ByteMatcher {
714 ByteMatcher {
715 pattern: b"\x89PNG\r\n\x1A\n",
716 mask: b"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF",
717 content_type: mime::IMAGE_PNG,
718 leading_ignore: &[],
719 }
720 }
721 fn image_jpeg() -> ByteMatcher {
723 ByteMatcher {
724 pattern: b"\xFF\xD8\xFF",
725 mask: b"\xFF\xFF\xFF",
726 content_type: mime::IMAGE_JPEG,
727 leading_ignore: &[],
728 }
729 }
730 fn video_webm() -> ByteMatcher {
732 ByteMatcher {
733 pattern: b"\x1A\x45\xDF\xA3",
734 mask: b"\xFF\xFF\xFF\xFF",
735 content_type: "video/webm".parse().unwrap(),
736 leading_ignore: &[],
737 }
738 }
739 fn audio_basic() -> ByteMatcher {
741 ByteMatcher {
742 pattern: b".snd",
743 mask: b"\xFF\xFF\xFF\xFF",
744 content_type: "audio/basic".parse().unwrap(),
745 leading_ignore: &[],
746 }
747 }
748 fn audio_aiff() -> ByteMatcher {
750 ByteMatcher {
751 pattern: b"FORM\x00\x00\x00\x00AIFF",
752 mask: b"\xFF\xFF\xFF\xFF\x00\x00\x00\x00\xFF\xFF\xFF\xFF",
753 content_type: "audio/aiff".parse().unwrap(),
754 leading_ignore: &[],
755 }
756 }
757 fn audio_mpeg() -> ByteMatcher {
759 ByteMatcher {
760 pattern: b"ID3",
761 mask: b"\xFF\xFF\xFF",
762 content_type: "audio/mpeg".parse().unwrap(),
763 leading_ignore: &[],
764 }
765 }
766 fn application_ogg() -> ByteMatcher {
768 ByteMatcher {
769 pattern: b"OggS\x00",
770 mask: b"\xFF\xFF\xFF\xFF\xFF",
771 content_type: "application/ogg".parse().unwrap(),
772 leading_ignore: &[],
773 }
774 }
775 fn audio_midi() -> ByteMatcher {
778 ByteMatcher {
779 pattern: b"MThd\x00\x00\x00\x06",
780 mask: b"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF",
781 content_type: "audio/midi".parse().unwrap(),
782 leading_ignore: &[],
783 }
784 }
785 fn video_avi() -> ByteMatcher {
787 ByteMatcher {
788 pattern: b"RIFF\x00\x00\x00\x00AVI ",
789 mask: b"\xFF\xFF\xFF\xFF\x00\x00\x00\x00\xFF\xFF\xFF\xFF",
790 content_type: "video/avi".parse().unwrap(),
791 leading_ignore: &[],
792 }
793 }
794 fn audio_wave() -> ByteMatcher {
796 ByteMatcher {
797 pattern: b"RIFF\x00\x00\x00\x00WAVE",
798 mask: b"\xFF\xFF\xFF\xFF\x00\x00\x00\x00\xFF\xFF\xFF\xFF",
799 content_type: "audio/wave".parse().unwrap(),
800 leading_ignore: &[],
801 }
802 }
803 fn text_html_doctype() -> TagTerminatedByteMatcher {
805 TagTerminatedByteMatcher {
806 matcher: ByteMatcher {
807 pattern: b"<!DOCTYPE HTML",
808 mask: b"\xFF\xFF\xDF\xDF\xDF\xDF\xDF\xDF\xDF\xFF\xDF\xDF\xDF\xDF",
809 content_type: mime::TEXT_HTML,
810 leading_ignore: b"\t\n\x0C\r ",
811 },
812 }
813 }
814
815 fn text_html_page() -> TagTerminatedByteMatcher {
817 TagTerminatedByteMatcher {
818 matcher: ByteMatcher {
819 pattern: b"<HTML",
820 mask: b"\xFF\xDF\xDF\xDF\xDF",
821 content_type: mime::TEXT_HTML,
822 leading_ignore: b"\t\n\x0C\r ",
823 },
824 }
825 }
826
827 fn text_html_head() -> TagTerminatedByteMatcher {
829 TagTerminatedByteMatcher {
830 matcher: ByteMatcher {
831 pattern: b"<HEAD",
832 mask: b"\xFF\xDF\xDF\xDF\xDF",
833 content_type: mime::TEXT_HTML,
834 leading_ignore: b"\t\n\x0C\r ",
835 },
836 }
837 }
838
839 fn text_html_script() -> TagTerminatedByteMatcher {
841 TagTerminatedByteMatcher {
842 matcher: ByteMatcher {
843 pattern: b"<SCRIPT",
844 mask: b"\xFF\xDF\xDF\xDF\xDF\xDF\xDF",
845 content_type: mime::TEXT_HTML,
846 leading_ignore: b"\t\n\x0C\r ",
847 },
848 }
849 }
850
851 fn text_html_iframe() -> TagTerminatedByteMatcher {
853 TagTerminatedByteMatcher {
854 matcher: ByteMatcher {
855 pattern: b"<IFRAME",
856 mask: b"\xFF\xDF\xDF\xDF\xDF\xDF\xDF",
857 content_type: mime::TEXT_HTML,
858 leading_ignore: b"\t\n\x0C\r ",
859 },
860 }
861 }
862
863 fn text_html_h1() -> TagTerminatedByteMatcher {
865 TagTerminatedByteMatcher {
866 matcher: ByteMatcher {
867 pattern: b"<H1",
868 mask: b"\xFF\xDF\xFF",
869 content_type: mime::TEXT_HTML,
870 leading_ignore: b"\t\n\x0C\r ",
871 },
872 }
873 }
874
875 fn text_html_div() -> TagTerminatedByteMatcher {
877 TagTerminatedByteMatcher {
878 matcher: ByteMatcher {
879 pattern: b"<DIV",
880 mask: b"\xFF\xDF\xDF\xDF",
881 content_type: mime::TEXT_HTML,
882 leading_ignore: b"\t\n\x0C\r ",
883 },
884 }
885 }
886
887 fn text_html_font() -> TagTerminatedByteMatcher {
889 TagTerminatedByteMatcher {
890 matcher: ByteMatcher {
891 pattern: b"<FONT",
892 mask: b"\xFF\xDF\xDF\xDF\xDF",
893 content_type: mime::TEXT_HTML,
894 leading_ignore: b"\t\n\x0C\r ",
895 },
896 }
897 }
898
899 fn text_html_table() -> TagTerminatedByteMatcher {
901 TagTerminatedByteMatcher {
902 matcher: ByteMatcher {
903 pattern: b"<TABLE",
904 mask: b"\xFF\xDF\xDF\xDF\xDF\xDF",
905 content_type: mime::TEXT_HTML,
906 leading_ignore: b"\t\n\x0C\r ",
907 },
908 }
909 }
910
911 fn text_html_a() -> TagTerminatedByteMatcher {
913 TagTerminatedByteMatcher {
914 matcher: ByteMatcher {
915 pattern: b"<A",
916 mask: b"\xFF\xDF",
917 content_type: mime::TEXT_HTML,
918 leading_ignore: b"\t\n\x0C\r ",
919 },
920 }
921 }
922
923 fn text_html_style() -> TagTerminatedByteMatcher {
925 TagTerminatedByteMatcher {
926 matcher: ByteMatcher {
927 pattern: b"<STYLE",
928 mask: b"\xFF\xDF\xDF\xDF\xDF\xDF",
929 content_type: mime::TEXT_HTML,
930 leading_ignore: b"\t\n\x0C\r ",
931 },
932 }
933 }
934
935 fn text_html_title() -> TagTerminatedByteMatcher {
937 TagTerminatedByteMatcher {
938 matcher: ByteMatcher {
939 pattern: b"<TITLE",
940 mask: b"\xFF\xDF\xDF\xDF\xDF\xDF",
941 content_type: mime::TEXT_HTML,
942 leading_ignore: b"\t\n\x0C\r ",
943 },
944 }
945 }
946
947 fn text_html_b() -> TagTerminatedByteMatcher {
949 TagTerminatedByteMatcher {
950 matcher: ByteMatcher {
951 pattern: b"<B",
952 mask: b"\xFF\xDF",
953 content_type: mime::TEXT_HTML,
954 leading_ignore: b"\t\n\x0C\r ",
955 },
956 }
957 }
958
959 fn text_html_body() -> TagTerminatedByteMatcher {
961 TagTerminatedByteMatcher {
962 matcher: ByteMatcher {
963 pattern: b"<BODY",
964 mask: b"\xFF\xDF\xDF\xDF\xDF",
965 content_type: mime::TEXT_HTML,
966 leading_ignore: b"\t\n\x0C\r ",
967 },
968 }
969 }
970
971 fn text_html_br() -> TagTerminatedByteMatcher {
973 TagTerminatedByteMatcher {
974 matcher: ByteMatcher {
975 pattern: b"<BR",
976 mask: b"\xFF\xDF\xDF",
977 content_type: mime::TEXT_HTML,
978 leading_ignore: b"\t\n\x0C\r ",
979 },
980 }
981 }
982
983 fn text_html_p() -> TagTerminatedByteMatcher {
985 TagTerminatedByteMatcher {
986 matcher: ByteMatcher {
987 pattern: b"<P",
988 mask: b"\xFF\xDF",
989 content_type: mime::TEXT_HTML,
990 leading_ignore: b"\t\n\x0C\r ",
991 },
992 }
993 }
994
995 fn text_html_comment() -> TagTerminatedByteMatcher {
997 TagTerminatedByteMatcher {
998 matcher: ByteMatcher {
999 pattern: b"<!--",
1000 mask: b"\xFF\xFF\xFF\xFF",
1001 content_type: mime::TEXT_HTML,
1002 leading_ignore: b"\t\n\x0C\r ",
1003 },
1004 }
1005 }
1006
1007 fn text_xml() -> ByteMatcher {
1009 ByteMatcher {
1010 pattern: b"<?xml",
1011 mask: b"\xFF\xFF\xFF\xFF\xFF",
1012 content_type: mime::TEXT_XML,
1013 leading_ignore: b"\t\n\x0C\r ",
1014 }
1015 }
1016 fn application_pdf() -> ByteMatcher {
1018 ByteMatcher {
1019 pattern: b"%PDF-",
1020 mask: b"\xFF\xFF\xFF\xFF\xFF",
1021 content_type: mime::APPLICATION_PDF,
1022 leading_ignore: &[],
1023 }
1024 }
1025 fn application_vnd_ms_font_object() -> ByteMatcher {
1027 ByteMatcher {
1028 pattern: b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\
1029 \x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\
1030 \x00\x00LP",
1031 mask: b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\
1032 \x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\
1033 \x00\x00\xFF\xFF",
1034 content_type: "application/vnd.ms-fontobject".parse().unwrap(),
1035 leading_ignore: &[],
1036 }
1037 }
1038 fn true_type() -> ByteMatcher {
1040 ByteMatcher {
1041 pattern: b"\x00\x01\x00\x00",
1042 mask: b"\xFF\xFF\xFF\xFF",
1043 content_type: "application/font-sfnt".parse().unwrap(),
1044 leading_ignore: &[],
1045 }
1046 }
1047 fn open_type() -> ByteMatcher {
1049 ByteMatcher {
1050 pattern: b"OTTO",
1051 mask: b"\xFF\xFF\xFF\xFF",
1052 content_type: "application/font-sfnt".parse().unwrap(),
1053 leading_ignore: &[],
1054 }
1055 }
1056 fn true_type_collection() -> ByteMatcher {
1058 ByteMatcher {
1059 pattern: b"ttcf",
1060 mask: b"\xFF\xFF\xFF\xFF",
1061 content_type: "application/font-sfnt".parse().unwrap(),
1062 leading_ignore: &[],
1063 }
1064 }
1065 fn application_font_woff() -> ByteMatcher {
1067 ByteMatcher {
1068 pattern: b"wOFF",
1069 mask: b"\xFF\xFF\xFF\xFF",
1070 content_type: "application/font-woff".parse().unwrap(),
1071 leading_ignore: &[],
1072 }
1073 }
1074 fn application_x_gzip() -> ByteMatcher {
1076 ByteMatcher {
1077 pattern: b"\x1F\x8B\x08",
1078 mask: b"\xFF\xFF\xFF",
1079 content_type: "application/x-gzip".parse().unwrap(),
1080 leading_ignore: &[],
1081 }
1082 }
1083 fn application_zip() -> ByteMatcher {
1085 ByteMatcher {
1086 pattern: b"PK\x03\x04",
1087 mask: b"\xFF\xFF\xFF\xFF",
1088 content_type: "application/zip".parse().unwrap(),
1089 leading_ignore: &[],
1090 }
1091 }
1092 fn application_x_rar_compressed() -> ByteMatcher {
1094 ByteMatcher {
1095 pattern: b"Rar \x1A\x07\x00",
1096 mask: b"\xFF\xFF\xFF\xFF\xFF\xFF\xFF",
1097 content_type: "application/x-rar-compressed".parse().unwrap(),
1098 leading_ignore: &[],
1099 }
1100 }
1101 fn application_postscript() -> ByteMatcher {
1103 ByteMatcher {
1104 pattern: b"%!PS-Adobe-",
1105 mask: b"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF",
1106 content_type: "application/postscript".parse().unwrap(),
1107 leading_ignore: &[],
1108 }
1109 }
1110 fn text_plain_utf_16be_bom() -> ByteMatcher {
1112 ByteMatcher {
1113 pattern: b"\xFE\xFF\x00\x00",
1114 mask: b"\xFF\xFF\x00\x00",
1115 content_type: mime::TEXT_PLAIN,
1116 leading_ignore: &[],
1117 }
1118 }
1119 fn text_plain_utf_16le_bom() -> ByteMatcher {
1121 ByteMatcher {
1122 pattern: b"\xFF\xFE\x00\x00",
1123 mask: b"\xFF\xFF\x00\x00",
1124 content_type: mime::TEXT_PLAIN,
1125 leading_ignore: &[],
1126 }
1127 }
1128 fn text_plain_utf_8_bom() -> ByteMatcher {
1130 ByteMatcher {
1131 pattern: b"\xEF\xBB\xBF\x00",
1132 mask: b"\xFF\xFF\xFF\x00",
1133 content_type: mime::TEXT_PLAIN,
1134 leading_ignore: &[],
1135 }
1136 }
1137}