1use mime::{self, Mime};
6
7use crate::LoadContext;
8
9pub struct MimeClassifier {
10 image_classifier: GroupedClassifier,
11 audio_video_classifier: GroupedClassifier,
12 scriptable_classifier: GroupedClassifier,
13 plaintext_classifier: GroupedClassifier,
14 archive_classifier: GroupedClassifier,
15 binary_or_plaintext: BinaryOrPlaintextClassifier,
16 font_classifier: GroupedClassifier,
17}
18
19#[derive(PartialEq)]
20pub enum MediaType {
21 Xml,
22 Html,
23 AudioVideo,
24 Image,
25 JavaScript,
26 Json,
27 Font,
28 Text,
29 Css,
30}
31
32#[derive(PartialEq)]
33pub enum ApacheBugFlag {
34 On,
35 Off,
36}
37
38impl ApacheBugFlag {
39 pub fn from_content_type(mime_type: Option<&Mime>) -> ApacheBugFlag {
41 if mime_type.is_some_and(|mime_type| {
43 *mime_type == mime::TEXT_PLAIN || *mime_type == mime::TEXT_PLAIN_UTF_8
44 }) {
45 ApacheBugFlag::On
46 } else {
47 ApacheBugFlag::Off
48 }
49 }
50}
51
52#[derive(PartialEq)]
53pub enum NoSniffFlag {
54 On,
55 Off,
56}
57
58impl Default for MimeClassifier {
59 fn default() -> Self {
60 Self {
61 image_classifier: GroupedClassifier::image_classifer(),
62 audio_video_classifier: GroupedClassifier::audio_video_classifier(),
63 scriptable_classifier: GroupedClassifier::scriptable_classifier(),
64 plaintext_classifier: GroupedClassifier::plaintext_classifier(),
65 archive_classifier: GroupedClassifier::archive_classifier(),
66 binary_or_plaintext: BinaryOrPlaintextClassifier,
67 font_classifier: GroupedClassifier::font_classifier(),
68 }
69 }
70}
71
72impl MimeClassifier {
73 pub fn classify<'a>(
75 &'a self,
76 context: LoadContext,
77 no_sniff_flag: NoSniffFlag,
78 apache_bug_flag: ApacheBugFlag,
79 supplied_type: &Option<Mime>,
80 data: &'a [u8],
81 ) -> Mime {
82 let supplied_type_or_octet_stream = supplied_type
83 .clone()
84 .unwrap_or(mime::APPLICATION_OCTET_STREAM);
85 if Self::is_xml(&supplied_type_or_octet_stream) ||
88 Self::is_html(&supplied_type_or_octet_stream)
89 {
90 return supplied_type_or_octet_stream;
91 }
92 match context {
93 LoadContext::Browsing => match *supplied_type {
94 None => self.sniff_unknown_type(no_sniff_flag, data),
98 Some(ref supplied_type) => {
99 if MimeClassifier::is_explicit_unknown(supplied_type) {
100 return self.sniff_unknown_type(no_sniff_flag, data);
101 }
102 if no_sniff_flag == NoSniffFlag::On {
105 return supplied_type.clone();
106 }
107 if apache_bug_flag == ApacheBugFlag::On {
110 return self.sniff_text_or_data(data);
111 }
112 match MimeClassifier::get_media_type(supplied_type) {
113 Some(MediaType::Image) => {
117 self.image_classifier.classify(data)
119 },
120 Some(MediaType::AudioVideo) => {
124 self.audio_video_classifier.classify(data)
126 },
127 Some(MediaType::Html) | Some(MediaType::Xml) => unreachable!(),
128 _ => None,
129 }
130 .unwrap_or(supplied_type.clone())
132 },
133 },
134 LoadContext::Image => {
135 match MimeClassifier::maybe_get_media_type(supplied_type) {
137 Some(MediaType::Xml) => None,
138 _ => self.image_classifier.classify(data),
139 }
140 .unwrap_or(supplied_type_or_octet_stream)
141 },
142 LoadContext::AudioVideo => {
143 match MimeClassifier::maybe_get_media_type(supplied_type) {
145 Some(MediaType::Xml) => None,
146 _ => self.audio_video_classifier.classify(data),
147 }
148 .unwrap_or(supplied_type_or_octet_stream)
149 },
150 LoadContext::Plugin => {
151 match *supplied_type {
156 None => mime::APPLICATION_OCTET_STREAM,
157 _ => supplied_type_or_octet_stream,
158 }
159 },
160 LoadContext::Style => {
161 match *supplied_type {
166 None => mime::TEXT_CSS,
167 _ => supplied_type_or_octet_stream,
168 }
169 },
170 LoadContext::Script => {
171 match *supplied_type {
176 None => mime::TEXT_JAVASCRIPT,
177 _ => supplied_type_or_octet_stream,
178 }
179 },
180 LoadContext::Font => {
181 match MimeClassifier::maybe_get_media_type(supplied_type) {
183 Some(MediaType::Xml) => None,
184 _ => self.font_classifier.classify(data),
185 }
186 .unwrap_or(supplied_type_or_octet_stream)
187 },
188 LoadContext::TextTrack => {
189 "text/vtt".parse().unwrap()
194 },
195 LoadContext::CacheManifest => {
196 "text/cache-manifest".parse().unwrap()
201 },
202 }
203 }
204
205 pub fn validate(&self) -> Result<(), String> {
206 self.image_classifier.validate()?;
207 self.audio_video_classifier.validate()?;
208 self.scriptable_classifier.validate()?;
209 self.plaintext_classifier.validate()?;
210 self.archive_classifier.validate()?;
211 self.binary_or_plaintext.validate()?;
212 self.font_classifier.validate()?;
213 Ok(())
214 }
215
216 fn sniff_unknown_type(&self, no_sniff_flag: NoSniffFlag, data: &[u8]) -> Mime {
218 let should_sniff_scriptable = no_sniff_flag == NoSniffFlag::Off;
219 let sniffed = if should_sniff_scriptable {
220 self.scriptable_classifier.classify(data)
221 } else {
222 None
223 };
224
225 sniffed
226 .or_else(|| self.plaintext_classifier.classify(data))
227 .or_else(|| self.image_classifier.classify(data))
228 .or_else(|| self.audio_video_classifier.classify(data))
229 .or_else(|| self.archive_classifier.classify(data))
230 .or_else(|| self.binary_or_plaintext.classify(data))
231 .expect("BinaryOrPlaintextClassifier always succeeds")
232 }
233
234 fn sniff_text_or_data<'a>(&'a self, data: &'a [u8]) -> Mime {
235 self.binary_or_plaintext
236 .classify(data)
237 .expect("BinaryOrPlaintextClassifier always succeeds")
238 }
239
240 fn is_xml(mt: &Mime) -> bool {
244 !Self::is_image(mt) &&
245 (mt.suffix() == Some(mime::XML) ||
246 mt.essence_str() == "text/xml" ||
247 mt.essence_str() == "application/xml")
248 }
249
250 fn is_html(mt: &Mime) -> bool {
252 mt.essence_str() == "text/html"
253 }
254
255 fn is_image(mt: &Mime) -> bool {
257 mt.type_() == mime::IMAGE
258 }
259
260 fn is_audio_video(mt: &Mime) -> bool {
262 mt.type_() == mime::AUDIO ||
263 mt.type_() == mime::VIDEO ||
264 mt.essence_str() == "application/ogg"
265 }
266
267 fn is_explicit_unknown(mt: &Mime) -> bool {
268 mt.type_().as_str() == "unknown" && mt.subtype().as_str() == "unknown" ||
269 mt.type_() == mime::APPLICATION && mt.subtype().as_str() == "unknown" ||
270 mt.type_() == mime::STAR && mt.subtype() == mime::STAR
271 }
272
273 pub fn is_javascript(mt: &Mime) -> bool {
275 (mt.type_() == mime::APPLICATION &&
276 (["ecmascript", "javascript", "x-ecmascript", "x-javascript"]
277 .contains(&mt.subtype().as_str()))) ||
278 (mt.type_() == mime::TEXT &&
279 ([
280 "ecmascript",
281 "javascript",
282 "javascript1.0",
283 "javascript1.1",
284 "javascript1.2",
285 "javascript1.3",
286 "javascript1.4",
287 "javascript1.5",
288 "jscript",
289 "livescript",
290 "x-ecmascript",
291 "x-javascript",
292 ]
293 .contains(&mt.subtype().as_str())))
294 }
295
296 pub fn is_json(mt: &Mime) -> bool {
298 mt.suffix() == Some(mime::JSON) ||
299 (mt.subtype() == mime::JSON &&
300 (mt.type_() == mime::APPLICATION || mt.type_() == mime::TEXT))
301 }
302
303 fn is_font(mt: &Mime) -> bool {
305 mt.type_() == mime::FONT ||
306 (mt.type_() == mime::APPLICATION &&
307 ([
308 "font-cff",
309 "font-off",
310 "font-sfnt",
311 "font-ttf",
312 "font-woff",
313 "vnd.ms-fontobject",
314 "vnd.ms-opentype",
315 ]
316 .contains(&mt.subtype().as_str())))
317 }
318
319 fn is_text(mt: &Mime) -> bool {
320 *mt == mime::TEXT_PLAIN || mt.essence_str() == "text/vtt"
321 }
322
323 fn is_css(mt: &Mime) -> bool {
324 mt.essence_str() == "text/css"
325 }
326
327 pub fn get_media_type(mime: &Mime) -> Option<MediaType> {
328 if MimeClassifier::is_xml(mime) {
329 Some(MediaType::Xml)
330 } else if MimeClassifier::is_html(mime) {
331 Some(MediaType::Html)
332 } else if MimeClassifier::is_image(mime) {
333 Some(MediaType::Image)
334 } else if MimeClassifier::is_audio_video(mime) {
335 Some(MediaType::AudioVideo)
336 } else if MimeClassifier::is_javascript(mime) {
337 Some(MediaType::JavaScript)
338 } else if MimeClassifier::is_font(mime) {
339 Some(MediaType::Font)
340 } else if MimeClassifier::is_json(mime) {
341 Some(MediaType::Json)
342 } else if MimeClassifier::is_text(mime) {
343 Some(MediaType::Text)
344 } else if MimeClassifier::is_css(mime) {
345 Some(MediaType::Css)
346 } else {
347 None
348 }
349 }
350
351 fn maybe_get_media_type(supplied_type: &Option<Mime>) -> Option<MediaType> {
352 supplied_type
353 .as_ref()
354 .and_then(MimeClassifier::get_media_type)
355 }
356}
357
358trait MIMEChecker {
360 fn classify(&self, data: &[u8]) -> Option<Mime>;
361 fn validate(&self) -> Result<(), String>;
363}
364
365struct ByteMatcher {
366 pattern: &'static [u8],
367 mask: &'static [u8],
368 leading_ignore: &'static [u8],
369 content_type: Mime,
370}
371
372impl ByteMatcher {
373 fn matches(&self, data: &[u8]) -> Option<usize> {
374 if data.len() < self.pattern.len() {
375 None
376 } else if data == self.pattern {
377 Some(self.pattern.len())
378 } else {
379 data[..data.len() - self.pattern.len() + 1]
380 .iter()
381 .position(|x| !self.leading_ignore.contains(x))
382 .and_then(|start| {
383 if data[start..]
384 .iter()
385 .zip(self.pattern.iter())
386 .zip(self.mask.iter())
387 .all(|((&data, &pattern), &mask)| (data & mask) == pattern)
388 {
389 Some(start + self.pattern.len())
390 } else {
391 None
392 }
393 })
394 }
395 }
396}
397
398impl MIMEChecker for ByteMatcher {
399 fn classify(&self, data: &[u8]) -> Option<Mime> {
400 self.matches(data).map(|_| self.content_type.clone())
401 }
402
403 fn validate(&self) -> Result<(), String> {
404 if self.pattern.is_empty() {
405 return Err(format!("Zero length pattern for {:?}", self.content_type));
406 }
407 if self.pattern.len() != self.mask.len() {
408 return Err(format!(
409 "Unequal pattern and mask length for {:?}",
410 self.content_type
411 ));
412 }
413 if self
414 .pattern
415 .iter()
416 .zip(self.mask.iter())
417 .any(|(&pattern, &mask)| pattern & mask != pattern)
418 {
419 return Err(format!(
420 "Pattern not pre-masked for {:?}",
421 self.content_type
422 ));
423 }
424 Ok(())
425 }
426}
427
428struct TagTerminatedByteMatcher {
429 matcher: ByteMatcher,
430}
431
432impl MIMEChecker for TagTerminatedByteMatcher {
433 fn classify(&self, data: &[u8]) -> Option<Mime> {
434 self.matcher.matches(data).and_then(|j| {
435 if j < data.len() && (data[j] == b' ' || data[j] == b'>') {
436 Some(self.matcher.content_type.clone())
437 } else {
438 None
439 }
440 })
441 }
442
443 fn validate(&self) -> Result<(), String> {
444 self.matcher.validate()
445 }
446}
447
448pub struct Mp4Matcher;
449
450impl Mp4Matcher {
451 pub fn matches(&self, data: &[u8]) -> bool {
453 if data.len() < 12 {
458 return false;
459 }
460
461 let box_size = (((data[0] as u32) << 24) |
464 ((data[1] as u32) << 16) |
465 ((data[2] as u32) << 8) |
466 (data[3] as u32)) as usize;
467 if (data.len() < box_size) || (box_size % 4 != 0) {
469 return false;
470 }
471
472 let ftyp = [0x66, 0x74, 0x79, 0x70];
474 if !data[4..].starts_with(&ftyp) {
475 return false;
476 }
477
478 let mp4 = [0x6D, 0x70, 0x34];
480 data[8..].starts_with(&mp4) ||
481 data[16..box_size]
484 .chunks(4)
486 .any(|chunk| chunk.starts_with(&mp4))
489 }
491}
492impl MIMEChecker for Mp4Matcher {
493 fn classify(&self, data: &[u8]) -> Option<Mime> {
494 if self.matches(data) {
495 Some("video/mp4".parse().unwrap())
496 } else {
497 None
498 }
499 }
500
501 fn validate(&self) -> Result<(), String> {
502 Ok(())
503 }
504}
505
506struct BinaryOrPlaintextClassifier;
507
508impl BinaryOrPlaintextClassifier {
509 fn classify_impl(&self, data: &[u8]) -> Mime {
511 if data.starts_with(&[0xFFu8, 0xFEu8]) ||
519 data.starts_with(&[0xFEu8, 0xFFu8]) ||
520 data.starts_with(&[0xEFu8, 0xBBu8, 0xBFu8])
521 {
522 mime::TEXT_PLAIN
523 } else if data.iter().any(|&x| {
524 x <= 0x08u8 ||
525 x == 0x0Bu8 ||
526 (0x0Eu8..=0x1Au8).contains(&x) ||
527 (0x1Cu8..=0x1Fu8).contains(&x)
528 }) {
529 mime::APPLICATION_OCTET_STREAM
531 } else {
532 mime::TEXT_PLAIN
535 }
536 }
537}
538impl MIMEChecker for BinaryOrPlaintextClassifier {
539 fn classify(&self, data: &[u8]) -> Option<Mime> {
540 Some(self.classify_impl(data))
541 }
542
543 fn validate(&self) -> Result<(), String> {
544 Ok(())
545 }
546}
547struct GroupedClassifier {
548 byte_matchers: Vec<Box<dyn MIMEChecker + Send + Sync>>,
549}
550impl GroupedClassifier {
551 fn image_classifer() -> GroupedClassifier {
552 GroupedClassifier {
553 byte_matchers: vec![
554 Box::new(ByteMatcher::image_x_icon()),
557 Box::new(ByteMatcher::image_x_icon_cursor()),
558 Box::new(ByteMatcher::image_bmp()),
559 Box::new(ByteMatcher::image_gif89a()),
560 Box::new(ByteMatcher::image_gif87a()),
561 Box::new(ByteMatcher::image_webp()),
562 Box::new(ByteMatcher::image_png()),
563 Box::new(ByteMatcher::image_jpeg()),
564 ],
565 }
566 }
567 fn audio_video_classifier() -> GroupedClassifier {
568 GroupedClassifier {
569 byte_matchers: vec![
570 Box::new(ByteMatcher::video_webm()),
571 Box::new(ByteMatcher::audio_basic()),
572 Box::new(ByteMatcher::audio_aiff()),
573 Box::new(ByteMatcher::audio_mpeg()),
574 Box::new(ByteMatcher::application_ogg()),
575 Box::new(ByteMatcher::audio_midi()),
576 Box::new(ByteMatcher::video_avi()),
577 Box::new(ByteMatcher::audio_wave()),
578 Box::new(Mp4Matcher),
579 ],
580 }
581 }
582 fn scriptable_classifier() -> GroupedClassifier {
583 GroupedClassifier {
584 byte_matchers: vec![
585 Box::new(ByteMatcher::text_html_doctype()),
586 Box::new(ByteMatcher::text_html_page()),
587 Box::new(ByteMatcher::text_html_head()),
588 Box::new(ByteMatcher::text_html_script()),
589 Box::new(ByteMatcher::text_html_iframe()),
590 Box::new(ByteMatcher::text_html_h1()),
591 Box::new(ByteMatcher::text_html_div()),
592 Box::new(ByteMatcher::text_html_font()),
593 Box::new(ByteMatcher::text_html_table()),
594 Box::new(ByteMatcher::text_html_a()),
595 Box::new(ByteMatcher::text_html_style()),
596 Box::new(ByteMatcher::text_html_title()),
597 Box::new(ByteMatcher::text_html_b()),
598 Box::new(ByteMatcher::text_html_body()),
599 Box::new(ByteMatcher::text_html_br()),
600 Box::new(ByteMatcher::text_html_p()),
601 Box::new(ByteMatcher::text_html_comment()),
602 Box::new(ByteMatcher::text_xml()),
603 Box::new(ByteMatcher::application_pdf()),
604 ],
605 }
606 }
607 fn plaintext_classifier() -> GroupedClassifier {
608 GroupedClassifier {
609 byte_matchers: vec![
610 Box::new(ByteMatcher::text_plain_utf_8_bom()),
611 Box::new(ByteMatcher::text_plain_utf_16le_bom()),
612 Box::new(ByteMatcher::text_plain_utf_16be_bom()),
613 Box::new(ByteMatcher::application_postscript()),
614 ],
615 }
616 }
617 fn archive_classifier() -> GroupedClassifier {
618 GroupedClassifier {
619 byte_matchers: vec![
620 Box::new(ByteMatcher::application_x_gzip()),
621 Box::new(ByteMatcher::application_zip()),
622 Box::new(ByteMatcher::application_x_rar_compressed()),
623 ],
624 }
625 }
626
627 fn font_classifier() -> GroupedClassifier {
628 GroupedClassifier {
629 byte_matchers: vec![
630 Box::new(ByteMatcher::application_font_woff()),
631 Box::new(ByteMatcher::true_type_collection()),
632 Box::new(ByteMatcher::open_type()),
633 Box::new(ByteMatcher::true_type()),
634 Box::new(ByteMatcher::application_vnd_ms_font_object()),
635 ],
636 }
637 }
638}
639impl MIMEChecker for GroupedClassifier {
640 fn classify(&self, data: &[u8]) -> Option<Mime> {
641 self.byte_matchers
642 .iter()
643 .filter_map(|matcher| matcher.classify(data))
644 .next()
645 }
646
647 fn validate(&self) -> Result<(), String> {
648 for byte_matcher in &self.byte_matchers {
649 byte_matcher.validate()?
650 }
651 Ok(())
652 }
653}
654
655impl ByteMatcher {
658 fn image_x_icon() -> ByteMatcher {
660 ByteMatcher {
661 pattern: b"\x00\x00\x01\x00",
662 mask: b"\xFF\xFF\xFF\xFF",
663 content_type: "image/x-icon".parse().unwrap(),
664 leading_ignore: &[],
665 }
666 }
667 fn image_x_icon_cursor() -> ByteMatcher {
669 ByteMatcher {
670 pattern: b"\x00\x00\x02\x00",
671 mask: b"\xFF\xFF\xFF\xFF",
672 content_type: "image/x-icon".parse().unwrap(),
673 leading_ignore: &[],
674 }
675 }
676 fn image_bmp() -> ByteMatcher {
678 ByteMatcher {
679 pattern: b"BM",
680 mask: b"\xFF\xFF",
681 content_type: mime::IMAGE_BMP,
682 leading_ignore: &[],
683 }
684 }
685 fn image_gif89a() -> ByteMatcher {
687 ByteMatcher {
688 pattern: b"GIF89a",
689 mask: b"\xFF\xFF\xFF\xFF\xFF\xFF",
690 content_type: mime::IMAGE_GIF,
691 leading_ignore: &[],
692 }
693 }
694 fn image_gif87a() -> ByteMatcher {
696 ByteMatcher {
697 pattern: b"GIF87a",
698 mask: b"\xFF\xFF\xFF\xFF\xFF\xFF",
699 content_type: mime::IMAGE_GIF,
700 leading_ignore: &[],
701 }
702 }
703 fn image_webp() -> ByteMatcher {
705 ByteMatcher {
706 pattern: b"RIFF\x00\x00\x00\x00WEBPVP",
707 mask: b"\xFF\xFF\xFF\xFF\x00\x00\x00\x00\xFF\xFF\xFF\xFF\xFF\xFF",
708 content_type: "image/webp".parse().unwrap(),
709 leading_ignore: &[],
710 }
711 }
712 fn image_png() -> ByteMatcher {
715 ByteMatcher {
716 pattern: b"\x89PNG\r\n\x1A\n",
717 mask: b"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF",
718 content_type: mime::IMAGE_PNG,
719 leading_ignore: &[],
720 }
721 }
722 fn image_jpeg() -> ByteMatcher {
724 ByteMatcher {
725 pattern: b"\xFF\xD8\xFF",
726 mask: b"\xFF\xFF\xFF",
727 content_type: mime::IMAGE_JPEG,
728 leading_ignore: &[],
729 }
730 }
731 fn video_webm() -> ByteMatcher {
733 ByteMatcher {
734 pattern: b"\x1A\x45\xDF\xA3",
735 mask: b"\xFF\xFF\xFF\xFF",
736 content_type: "video/webm".parse().unwrap(),
737 leading_ignore: &[],
738 }
739 }
740 fn audio_basic() -> ByteMatcher {
742 ByteMatcher {
743 pattern: b".snd",
744 mask: b"\xFF\xFF\xFF\xFF",
745 content_type: "audio/basic".parse().unwrap(),
746 leading_ignore: &[],
747 }
748 }
749 fn audio_aiff() -> ByteMatcher {
751 ByteMatcher {
752 pattern: b"FORM\x00\x00\x00\x00AIFF",
753 mask: b"\xFF\xFF\xFF\xFF\x00\x00\x00\x00\xFF\xFF\xFF\xFF",
754 content_type: "audio/aiff".parse().unwrap(),
755 leading_ignore: &[],
756 }
757 }
758 fn audio_mpeg() -> ByteMatcher {
760 ByteMatcher {
761 pattern: b"ID3",
762 mask: b"\xFF\xFF\xFF",
763 content_type: "audio/mpeg".parse().unwrap(),
764 leading_ignore: &[],
765 }
766 }
767 fn application_ogg() -> ByteMatcher {
769 ByteMatcher {
770 pattern: b"OggS\x00",
771 mask: b"\xFF\xFF\xFF\xFF\xFF",
772 content_type: "application/ogg".parse().unwrap(),
773 leading_ignore: &[],
774 }
775 }
776 fn audio_midi() -> ByteMatcher {
779 ByteMatcher {
780 pattern: b"MThd\x00\x00\x00\x06",
781 mask: b"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF",
782 content_type: "audio/midi".parse().unwrap(),
783 leading_ignore: &[],
784 }
785 }
786 fn video_avi() -> ByteMatcher {
788 ByteMatcher {
789 pattern: b"RIFF\x00\x00\x00\x00AVI ",
790 mask: b"\xFF\xFF\xFF\xFF\x00\x00\x00\x00\xFF\xFF\xFF\xFF",
791 content_type: "video/avi".parse().unwrap(),
792 leading_ignore: &[],
793 }
794 }
795 fn audio_wave() -> ByteMatcher {
797 ByteMatcher {
798 pattern: b"RIFF\x00\x00\x00\x00WAVE",
799 mask: b"\xFF\xFF\xFF\xFF\x00\x00\x00\x00\xFF\xFF\xFF\xFF",
800 content_type: "audio/wave".parse().unwrap(),
801 leading_ignore: &[],
802 }
803 }
804 fn text_html_doctype() -> TagTerminatedByteMatcher {
806 TagTerminatedByteMatcher {
807 matcher: ByteMatcher {
808 pattern: b"<!DOCTYPE HTML",
809 mask: b"\xFF\xFF\xDF\xDF\xDF\xDF\xDF\xDF\xDF\xFF\xDF\xDF\xDF\xDF",
810 content_type: mime::TEXT_HTML,
811 leading_ignore: b"\t\n\x0C\r ",
812 },
813 }
814 }
815
816 fn text_html_page() -> TagTerminatedByteMatcher {
818 TagTerminatedByteMatcher {
819 matcher: ByteMatcher {
820 pattern: b"<HTML",
821 mask: b"\xFF\xDF\xDF\xDF\xDF",
822 content_type: mime::TEXT_HTML,
823 leading_ignore: b"\t\n\x0C\r ",
824 },
825 }
826 }
827
828 fn text_html_head() -> TagTerminatedByteMatcher {
830 TagTerminatedByteMatcher {
831 matcher: ByteMatcher {
832 pattern: b"<HEAD",
833 mask: b"\xFF\xDF\xDF\xDF\xDF",
834 content_type: mime::TEXT_HTML,
835 leading_ignore: b"\t\n\x0C\r ",
836 },
837 }
838 }
839
840 fn text_html_script() -> TagTerminatedByteMatcher {
842 TagTerminatedByteMatcher {
843 matcher: ByteMatcher {
844 pattern: b"<SCRIPT",
845 mask: b"\xFF\xDF\xDF\xDF\xDF\xDF\xDF",
846 content_type: mime::TEXT_HTML,
847 leading_ignore: b"\t\n\x0C\r ",
848 },
849 }
850 }
851
852 fn text_html_iframe() -> TagTerminatedByteMatcher {
854 TagTerminatedByteMatcher {
855 matcher: ByteMatcher {
856 pattern: b"<IFRAME",
857 mask: b"\xFF\xDF\xDF\xDF\xDF\xDF\xDF",
858 content_type: mime::TEXT_HTML,
859 leading_ignore: b"\t\n\x0C\r ",
860 },
861 }
862 }
863
864 fn text_html_h1() -> TagTerminatedByteMatcher {
866 TagTerminatedByteMatcher {
867 matcher: ByteMatcher {
868 pattern: b"<H1",
869 mask: b"\xFF\xDF\xFF",
870 content_type: mime::TEXT_HTML,
871 leading_ignore: b"\t\n\x0C\r ",
872 },
873 }
874 }
875
876 fn text_html_div() -> TagTerminatedByteMatcher {
878 TagTerminatedByteMatcher {
879 matcher: ByteMatcher {
880 pattern: b"<DIV",
881 mask: b"\xFF\xDF\xDF\xDF",
882 content_type: mime::TEXT_HTML,
883 leading_ignore: b"\t\n\x0C\r ",
884 },
885 }
886 }
887
888 fn text_html_font() -> TagTerminatedByteMatcher {
890 TagTerminatedByteMatcher {
891 matcher: ByteMatcher {
892 pattern: b"<FONT",
893 mask: b"\xFF\xDF\xDF\xDF\xDF",
894 content_type: mime::TEXT_HTML,
895 leading_ignore: b"\t\n\x0C\r ",
896 },
897 }
898 }
899
900 fn text_html_table() -> TagTerminatedByteMatcher {
902 TagTerminatedByteMatcher {
903 matcher: ByteMatcher {
904 pattern: b"<TABLE",
905 mask: b"\xFF\xDF\xDF\xDF\xDF\xDF",
906 content_type: mime::TEXT_HTML,
907 leading_ignore: b"\t\n\x0C\r ",
908 },
909 }
910 }
911
912 fn text_html_a() -> TagTerminatedByteMatcher {
914 TagTerminatedByteMatcher {
915 matcher: ByteMatcher {
916 pattern: b"<A",
917 mask: b"\xFF\xDF",
918 content_type: mime::TEXT_HTML,
919 leading_ignore: b"\t\n\x0C\r ",
920 },
921 }
922 }
923
924 fn text_html_style() -> TagTerminatedByteMatcher {
926 TagTerminatedByteMatcher {
927 matcher: ByteMatcher {
928 pattern: b"<STYLE",
929 mask: b"\xFF\xDF\xDF\xDF\xDF\xDF",
930 content_type: mime::TEXT_HTML,
931 leading_ignore: b"\t\n\x0C\r ",
932 },
933 }
934 }
935
936 fn text_html_title() -> TagTerminatedByteMatcher {
938 TagTerminatedByteMatcher {
939 matcher: ByteMatcher {
940 pattern: b"<TITLE",
941 mask: b"\xFF\xDF\xDF\xDF\xDF\xDF",
942 content_type: mime::TEXT_HTML,
943 leading_ignore: b"\t\n\x0C\r ",
944 },
945 }
946 }
947
948 fn text_html_b() -> TagTerminatedByteMatcher {
950 TagTerminatedByteMatcher {
951 matcher: ByteMatcher {
952 pattern: b"<B",
953 mask: b"\xFF\xDF",
954 content_type: mime::TEXT_HTML,
955 leading_ignore: b"\t\n\x0C\r ",
956 },
957 }
958 }
959
960 fn text_html_body() -> TagTerminatedByteMatcher {
962 TagTerminatedByteMatcher {
963 matcher: ByteMatcher {
964 pattern: b"<BODY",
965 mask: b"\xFF\xDF\xDF\xDF\xDF",
966 content_type: mime::TEXT_HTML,
967 leading_ignore: b"\t\n\x0C\r ",
968 },
969 }
970 }
971
972 fn text_html_br() -> TagTerminatedByteMatcher {
974 TagTerminatedByteMatcher {
975 matcher: ByteMatcher {
976 pattern: b"<BR",
977 mask: b"\xFF\xDF\xDF",
978 content_type: mime::TEXT_HTML,
979 leading_ignore: b"\t\n\x0C\r ",
980 },
981 }
982 }
983
984 fn text_html_p() -> TagTerminatedByteMatcher {
986 TagTerminatedByteMatcher {
987 matcher: ByteMatcher {
988 pattern: b"<P",
989 mask: b"\xFF\xDF",
990 content_type: mime::TEXT_HTML,
991 leading_ignore: b"\t\n\x0C\r ",
992 },
993 }
994 }
995
996 fn text_html_comment() -> TagTerminatedByteMatcher {
998 TagTerminatedByteMatcher {
999 matcher: ByteMatcher {
1000 pattern: b"<!--",
1001 mask: b"\xFF\xFF\xFF\xFF",
1002 content_type: mime::TEXT_HTML,
1003 leading_ignore: b"\t\n\x0C\r ",
1004 },
1005 }
1006 }
1007
1008 fn text_xml() -> ByteMatcher {
1010 ByteMatcher {
1011 pattern: b"<?xml",
1012 mask: b"\xFF\xFF\xFF\xFF\xFF",
1013 content_type: mime::TEXT_XML,
1014 leading_ignore: b"\t\n\x0C\r ",
1015 }
1016 }
1017 fn application_pdf() -> ByteMatcher {
1019 ByteMatcher {
1020 pattern: b"%PDF-",
1021 mask: b"\xFF\xFF\xFF\xFF\xFF",
1022 content_type: mime::APPLICATION_PDF,
1023 leading_ignore: &[],
1024 }
1025 }
1026 fn application_vnd_ms_font_object() -> ByteMatcher {
1028 ByteMatcher {
1029 pattern: b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\
1030 \x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\
1031 \x00\x00LP",
1032 mask: b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\
1033 \x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\
1034 \x00\x00\xFF\xFF",
1035 content_type: "application/vnd.ms-fontobject".parse().unwrap(),
1036 leading_ignore: &[],
1037 }
1038 }
1039 fn true_type() -> ByteMatcher {
1041 ByteMatcher {
1042 pattern: b"\x00\x01\x00\x00",
1043 mask: b"\xFF\xFF\xFF\xFF",
1044 content_type: "application/font-sfnt".parse().unwrap(),
1045 leading_ignore: &[],
1046 }
1047 }
1048 fn open_type() -> ByteMatcher {
1050 ByteMatcher {
1051 pattern: b"OTTO",
1052 mask: b"\xFF\xFF\xFF\xFF",
1053 content_type: "application/font-sfnt".parse().unwrap(),
1054 leading_ignore: &[],
1055 }
1056 }
1057 fn true_type_collection() -> ByteMatcher {
1059 ByteMatcher {
1060 pattern: b"ttcf",
1061 mask: b"\xFF\xFF\xFF\xFF",
1062 content_type: "application/font-sfnt".parse().unwrap(),
1063 leading_ignore: &[],
1064 }
1065 }
1066 fn application_font_woff() -> ByteMatcher {
1068 ByteMatcher {
1069 pattern: b"wOFF",
1070 mask: b"\xFF\xFF\xFF\xFF",
1071 content_type: "application/font-woff".parse().unwrap(),
1072 leading_ignore: &[],
1073 }
1074 }
1075 fn application_x_gzip() -> ByteMatcher {
1077 ByteMatcher {
1078 pattern: b"\x1F\x8B\x08",
1079 mask: b"\xFF\xFF\xFF",
1080 content_type: "application/x-gzip".parse().unwrap(),
1081 leading_ignore: &[],
1082 }
1083 }
1084 fn application_zip() -> ByteMatcher {
1086 ByteMatcher {
1087 pattern: b"PK\x03\x04",
1088 mask: b"\xFF\xFF\xFF\xFF",
1089 content_type: "application/zip".parse().unwrap(),
1090 leading_ignore: &[],
1091 }
1092 }
1093 fn application_x_rar_compressed() -> ByteMatcher {
1095 ByteMatcher {
1096 pattern: b"Rar \x1A\x07\x00",
1097 mask: b"\xFF\xFF\xFF\xFF\xFF\xFF\xFF",
1098 content_type: "application/x-rar-compressed".parse().unwrap(),
1099 leading_ignore: &[],
1100 }
1101 }
1102 fn application_postscript() -> ByteMatcher {
1104 ByteMatcher {
1105 pattern: b"%!PS-Adobe-",
1106 mask: b"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF",
1107 content_type: "application/postscript".parse().unwrap(),
1108 leading_ignore: &[],
1109 }
1110 }
1111 fn text_plain_utf_16be_bom() -> ByteMatcher {
1113 ByteMatcher {
1114 pattern: b"\xFE\xFF\x00\x00",
1115 mask: b"\xFF\xFF\x00\x00",
1116 content_type: mime::TEXT_PLAIN,
1117 leading_ignore: &[],
1118 }
1119 }
1120 fn text_plain_utf_16le_bom() -> ByteMatcher {
1122 ByteMatcher {
1123 pattern: b"\xFF\xFE\x00\x00",
1124 mask: b"\xFF\xFF\x00\x00",
1125 content_type: mime::TEXT_PLAIN,
1126 leading_ignore: &[],
1127 }
1128 }
1129 fn text_plain_utf_8_bom() -> ByteMatcher {
1131 ByteMatcher {
1132 pattern: b"\xEF\xBB\xBF\x00",
1133 mask: b"\xFF\xFF\xFF\x00",
1134 content_type: mime::TEXT_PLAIN,
1135 leading_ignore: &[],
1136 }
1137 }
1138}