Skip to main content

script/
mime_multipart.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
4
5// Copyright 2026      The Servo Developers
6// Copyright 2016-2025 mime-multipart Developers
7//
8// Licensed under the Apache License, Version 2.0, <LICENSE-APACHE or
9// http://apache.org/licenses/LICENSE-2.0> or the MIT license <LICENSE-MIT or
10// http://opensource.org/licenses/MIT>, at your option. This file may not be
11// copied, modified, or distributed except according to those terms.
12//
13// This file originates from github.com/mikedilger/mime-multipart and https://github.com/gw-de/mime-multipart-hyper1.
14// The file as is, is licensed under MPL-2.0. Any code that is originally from mime-multipart
15// or its fork mime-multipart-hyper1 are additionally licensed under Apache-2.0 and MIT, as
16// per the original license.
17
18mod error {
19    use std::error::Error as StdError;
20    use std::fmt::{self, Display};
21    use std::io;
22
23    use http::header::ToStrError;
24
25    /// An error type for the `mime-multipart` crate.
26    pub enum Error {
27        /// The Hyper request did not have a Content-Type header.
28        NoRequestContentType,
29        /// The Hyper request Content-Type top-level Mime was not `Multipart`.
30        NotMultipart,
31        /// The Content-Type header failed to specify boundary token.
32        BoundaryNotSpecified,
33        /// A multipart section contained only partial headers.
34        PartialHeaders,
35        EofBeforeFirstBoundary,
36        NoCrLfAfterBoundary,
37        EofInPartHeaders,
38        EofInFile,
39        EofInPart,
40        InvalidHeaderNameOrValue,
41        HeaderValueNotMime,
42        ToStr(ToStrError),
43        /// An HTTP parsing error from a multipart section.
44        Httparse(httparse::Error),
45        /// An I/O error.
46        Io(io::Error),
47    }
48
49    impl From<io::Error> for Error {
50        fn from(err: io::Error) -> Error {
51            Error::Io(err)
52        }
53    }
54
55    impl From<httparse::Error> for Error {
56        fn from(err: httparse::Error) -> Error {
57            Error::Httparse(err)
58        }
59    }
60
61    impl Display for Error {
62        fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
63            match *self {
64                Error::Httparse(ref e) => format!("Httparse: {:?}", e).fmt(f),
65                Error::Io(ref e) => format!("Io: {}", e).fmt(f),
66                Error::ToStr(ref e) => format!("ToStr: {}", e).fmt(f),
67                Error::NoRequestContentType => "NoRequestContentType".to_string().fmt(f),
68                Error::NotMultipart => "NotMultipart".to_string().fmt(f),
69                Error::BoundaryNotSpecified => "BoundaryNotSpecified".to_string().fmt(f),
70                Error::PartialHeaders => "PartialHeaders".to_string().fmt(f),
71                Error::EofBeforeFirstBoundary => "EofBeforeFirstBoundary".to_string().fmt(f),
72                Error::NoCrLfAfterBoundary => "NoCrLfAfterBoundary".to_string().fmt(f),
73                Error::EofInPartHeaders => "EofInPartHeaders".to_string().fmt(f),
74                Error::EofInFile => "EofInFile".to_string().fmt(f),
75                Error::EofInPart => "EofInPart".to_string().fmt(f),
76                Error::InvalidHeaderNameOrValue => "InvalidHeaderNameOrValue".to_string().fmt(f),
77                Error::HeaderValueNotMime => "HeaderValueNotMime".to_string().fmt(f),
78            }
79        }
80    }
81
82    impl fmt::Debug for Error {
83        fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
84            write!(f, "{}", self)?;
85            if self.source().is_some() {
86                write!(f, ": {:?}", self.source().unwrap())?; // recurse
87            }
88            Ok(())
89        }
90    }
91
92    impl StdError for Error {
93        fn description(&self) -> &str {
94            match *self {
95                Error::NoRequestContentType => {
96                    "The Hyper request did not have a Content-Type header."
97                },
98                Error::NotMultipart => {
99                    "The Hyper request Content-Type top-level Mime was not multipart."
100                },
101                Error::BoundaryNotSpecified => {
102                    "The Content-Type header failed to specify a boundary token."
103                },
104                Error::PartialHeaders => "A multipart section contained only partial headers.",
105                Error::EofBeforeFirstBoundary => {
106                    "The request body ended prior to reaching the expected starting boundary."
107                },
108                Error::NoCrLfAfterBoundary => "Missing CRLF after boundary.",
109                Error::EofInPartHeaders => {
110                    "The request body ended prematurely while parsing headers of a multipart part."
111                },
112                Error::EofInFile => {
113                    "The request body ended prematurely while streaming a file part."
114                },
115                Error::EofInPart => {
116                    "The request body ended prematurely while reading a multipart part."
117                },
118                Error::Httparse(_) => {
119                    "A parse error occurred while parsing the headers of a multipart section."
120                },
121                Error::Io(_) => "An I/O error occurred.",
122                Error::InvalidHeaderNameOrValue => "Parsing to HeaderName or HeaderValue failed",
123                Error::HeaderValueNotMime => "HeaderValue could not be parsed to Mime",
124                Error::ToStr(_) => "A ToStr error occurred.",
125            }
126        }
127    }
128}
129
130use std::fs::File;
131use std::io::{BufRead, BufReader, Read};
132use std::ops::Drop;
133use std::path::PathBuf;
134use std::str::FromStr;
135
136use buf_read_ext::BufReadExt;
137pub use error::Error;
138use http::header::{HeaderMap, HeaderName, HeaderValue};
139use mime::Mime;
140
141/// A multipart part which is not a file (stored in memory)
142#[derive(Clone, Debug, PartialEq)]
143pub struct Part {
144    pub headers: HeaderMap,
145    pub body: Vec<u8>,
146}
147
148/// A file that is to be inserted into a `multipart/*` or alternatively an uploaded file that
149/// was received as part of `multipart/*` parsing.
150#[derive(Debug, PartialEq)]
151pub struct FilePart {
152    /// The headers of the part
153    pub headers: HeaderMap,
154    /// A temporary file containing the file content
155    pub path: PathBuf,
156    /// Optionally, the size of the file.  This is filled when multiparts are parsed, but is
157    /// not necessary when they are generated.
158    pub size: Option<usize>,
159    // The temporary directory the upload was put into, saved for the Drop trait
160    tempdir: Option<PathBuf>,
161}
162impl FilePart {
163    /// Create a new temporary FilePart (when created this way, the file will be
164    /// deleted once the FilePart object goes out of scope).
165    pub fn create(headers: HeaderMap) -> Result<FilePart, Error> {
166        // TODO: Do we really need a dir with only one file in it?
167        // Perhaps we just just do a tempfile, then we also have
168        // one cleanup step less!
169        // Setup a file to capture the contents.
170        let mut path = tempfile::Builder::new()
171            .prefix("mime_multipart")
172            .tempdir()?
173            .keep();
174        let tempdir = Some(path.clone());
175        // The directory name is already guaranteed to be unique.
176        path.push("part");
177        Ok(FilePart {
178            headers,
179            path,
180            size: None,
181            tempdir,
182        })
183    }
184}
185impl Drop for FilePart {
186    fn drop(&mut self) {
187        if let Some(tempdir) = &self.tempdir {
188            let _ = std::fs::remove_file(&self.path);
189            let _ = std::fs::remove_dir(tempdir);
190        }
191    }
192}
193
194/// A multipart part which could be either a file, in memory, or another multipart
195/// container containing nested parts.
196#[derive(Debug)]
197pub enum Node {
198    /// A part in memory
199    Part(Part),
200    /// A part streamed to a file
201    File(FilePart),
202    /// A container of nested multipart parts
203    Multipart((HeaderMap, Vec<Node>)),
204}
205
206/// Parse a MIME `multipart/*` from a `Read`able stream into a `Vec` of `Node`s, streaming
207/// files to disk and keeping the rest in memory.  Recursive `multipart/*` parts will are
208/// parsed as well and returned within a `Node::Multipart` variant.
209///
210/// If `always_use_files` is true, all parts will be streamed to files.  If false, only parts
211/// with a `ContentDisposition` header set to `Attachment` or otherwise containing a `Filename`
212/// parameter will be streamed to files.
213///
214/// It is presumed that you have the `Headers` already and the stream starts at the body.
215/// If the headers are still in the stream, use `read_multipart()` instead.
216pub fn read_multipart_body<S: Read>(
217    stream: &mut S,
218    headers: &HeaderMap,
219    always_use_files: bool,
220) -> Result<Vec<Node>, Error> {
221    let mut reader = BufReader::with_capacity(4096, stream);
222    inner(&mut reader, headers, always_use_files)
223}
224
225fn inner<R: BufRead>(
226    reader: &mut R,
227    headers: &HeaderMap,
228    always_use_files: bool,
229) -> Result<Vec<Node>, Error> {
230    let mut nodes: Vec<Node> = Vec::new();
231    let mut buf: Vec<u8> = Vec::new();
232
233    let boundary = get_multipart_boundary(headers)?;
234
235    // Read past the initial boundary
236    let (_, found) = reader.stream_until_token(&boundary, &mut buf)?;
237    if !found {
238        return Err(Error::EofBeforeFirstBoundary);
239    }
240
241    // Define the boundary, including the line terminator preceding it.
242    // Use their first line terminator to determine whether to use CRLF or LF.
243    let (lt, ltlt, lt_boundary) = {
244        let peeker = reader.fill_buf()?;
245        if peeker.len() > 1 && &peeker[..2] == b"\r\n" {
246            let mut output = Vec::with_capacity(2 + boundary.len());
247            output.push(b'\r');
248            output.push(b'\n');
249            output.extend(boundary.clone());
250            (vec![b'\r', b'\n'], vec![b'\r', b'\n', b'\r', b'\n'], output)
251        } else if !peeker.is_empty() && peeker[0] == b'\n' {
252            let mut output = Vec::with_capacity(1 + boundary.len());
253            output.push(b'\n');
254            output.extend(boundary.clone());
255            (vec![b'\n'], vec![b'\n', b'\n'], output)
256        } else {
257            return Err(Error::NoCrLfAfterBoundary);
258        }
259    };
260
261    loop {
262        // If the next two lookahead characters are '--', parsing is finished.
263        {
264            let peeker = reader.fill_buf()?;
265            if peeker.len() >= 2 && &peeker[..2] == b"--" {
266                return Ok(nodes);
267            }
268        }
269
270        // Read the line terminator after the boundary
271        let (_, found) = reader.stream_until_token(&lt, &mut buf)?;
272        if !found {
273            return Err(Error::NoCrLfAfterBoundary);
274        }
275
276        // Read the headers (which end in 2 line terminators)
277        buf.truncate(0); // start fresh
278        let (_, found) = reader.stream_until_token(&ltlt, &mut buf)?;
279        if !found {
280            return Err(Error::EofInPartHeaders);
281        }
282
283        // Keep the 2 line terminators as httparse will expect it
284        buf.extend(ltlt.iter().cloned());
285
286        // Parse the headers
287        let part_headers = {
288            let mut header_memory = [httparse::EMPTY_HEADER; 4];
289            match httparse::parse_headers(&buf, &mut header_memory) {
290                Ok(httparse::Status::Complete((_, raw_headers))) => {
291                    let mut headers = HeaderMap::new();
292                    for header in raw_headers {
293                        if header.value.is_empty() {
294                            break;
295                        }
296                        let trim = header
297                            .value
298                            .iter()
299                            .rev()
300                            .take_while(|&&x| x == b' ')
301                            .count();
302                        let value = &header.value[..header.value.len() - trim];
303
304                        let header_value = match HeaderValue::from_bytes(value) {
305                            Ok(value) => value,
306                            Err(_) => return Err(Error::InvalidHeaderNameOrValue),
307                        };
308
309                        let header_name = header.name.to_owned();
310                        let header_name = match HeaderName::from_str(&header_name) {
311                            Ok(value) => value,
312                            Err(_) => return Err(Error::InvalidHeaderNameOrValue),
313                        };
314                        headers.append(header_name, header_value);
315                    }
316                    Ok(headers)
317                },
318                Ok(httparse::Status::Partial) => Err(Error::PartialHeaders),
319                Err(err) => Err(From::from(err)),
320            }?
321        };
322
323        // Check for a nested multipart
324        let nested = {
325            match part_headers.get("content-type") {
326                Some(ct) => match ct.to_str() {
327                    Ok(value) => match Mime::from_str(value) {
328                        Ok(mime) => mime.type_() == mime::MULTIPART,
329                        Err(_) => return Err(Error::HeaderValueNotMime),
330                    },
331                    Err(err) => return Err(Error::ToStr(err)),
332                },
333                None => false,
334            }
335        };
336        if nested {
337            // Recurse:
338            let inner_nodes = inner(reader, &part_headers, always_use_files)?;
339            nodes.push(Node::Multipart((part_headers, inner_nodes)));
340            continue;
341        }
342
343        let is_file = always_use_files || {
344            match part_headers.get("content-disposition") {
345                Some(content) => match content.to_str() {
346                    Ok(value) => value.contains("attachment") || value.contains("filename"),
347                    Err(err) => return Err(Error::ToStr(err)),
348                },
349                None => false,
350            }
351        };
352        if is_file {
353            // Setup a file to capture the contents.
354            let mut filepart = FilePart::create(part_headers)?;
355            let mut file = File::create(filepart.path.clone())?;
356
357            // Stream out the file.
358            let (read, found) = reader.stream_until_token(&lt_boundary, &mut file)?;
359            if !found {
360                return Err(Error::EofInFile);
361            }
362            filepart.size = Some(read);
363
364            // TODO: Handle Content-Transfer-Encoding.  RFC 7578 section 4.7 deprecated
365            // this, and the authors state "Currently, no deployed implementations that
366            // send such bodies have been discovered", so this is very low priority.
367
368            nodes.push(Node::File(filepart));
369        } else {
370            buf.truncate(0); // start fresh
371            let (_, found) = reader.stream_until_token(&lt_boundary, &mut buf)?;
372            if !found {
373                return Err(Error::EofInPart);
374            }
375
376            nodes.push(Node::Part(Part {
377                headers: part_headers,
378                body: buf.clone(),
379            }));
380        }
381    }
382}
383
384/// Get the `multipart/*` boundary string from `hyper::Headers`
385pub fn get_multipart_boundary(headers: &HeaderMap) -> Result<Vec<u8>, Error> {
386    // Verify that the request is 'Content-Type: multipart/*'.
387    let mime = match headers.get("content-type") {
388        Some(ct) => match ct.to_str() {
389            Ok(value) => match Mime::from_str(value) {
390                Ok(value) => value,
391                Err(_) => return Err(Error::HeaderValueNotMime),
392            },
393            Err(err) => return Err(Error::ToStr(err)),
394        },
395        None => return Err(Error::NoRequestContentType),
396    };
397    let top_level = mime.type_();
398
399    if top_level != mime::MULTIPART {
400        return Err(Error::NotMultipart);
401    }
402
403    match mime.get_param(mime::BOUNDARY) {
404        None => Err(Error::BoundaryNotSpecified),
405        Some(content) => {
406            let mut boundary = vec![];
407            boundary.extend(b"--".iter().cloned());
408            boundary.extend(content.to_string().as_bytes());
409            Ok(boundary)
410        },
411    }
412}