Skip to main content

memmap2/
unix.rs

1use std::fs::File;
2use std::io;
3use std::mem::ManuallyDrop;
4use std::os::unix::io::{FromRawFd, RawFd};
5use std::sync::atomic::{AtomicUsize, Ordering};
6
7#[cfg(any(
8    all(target_os = "linux", not(target_arch = "mips")),
9    target_os = "freebsd",
10    target_os = "android"
11))]
12const MAP_STACK: libc::c_int = libc::MAP_STACK;
13
14#[cfg(not(any(
15    all(target_os = "linux", not(target_arch = "mips")),
16    target_os = "freebsd",
17    target_os = "android"
18)))]
19const MAP_STACK: libc::c_int = 0;
20
21#[cfg(any(target_os = "linux", target_os = "android"))]
22const MAP_POPULATE: libc::c_int = libc::MAP_POPULATE;
23
24#[cfg(not(any(target_os = "linux", target_os = "android")))]
25const MAP_POPULATE: libc::c_int = 0;
26
27#[cfg(any(target_os = "linux", target_os = "android"))]
28const MAP_HUGETLB: libc::c_int = libc::MAP_HUGETLB;
29
30#[cfg(target_os = "linux")]
31const MAP_HUGE_MASK: libc::c_int = libc::MAP_HUGE_MASK;
32
33#[cfg(any(target_os = "linux", target_os = "android"))]
34const MAP_HUGE_SHIFT: libc::c_int = libc::MAP_HUGE_SHIFT;
35
36#[cfg(not(any(target_os = "linux", target_os = "android")))]
37const MAP_HUGETLB: libc::c_int = 0;
38
39#[cfg(not(target_os = "linux"))]
40const MAP_HUGE_MASK: libc::c_int = 0;
41
42#[cfg(not(any(target_os = "linux", target_os = "android")))]
43const MAP_HUGE_SHIFT: libc::c_int = 0;
44
45#[cfg(any(
46    target_os = "linux",
47    target_os = "android",
48    target_vendor = "apple",
49    target_os = "netbsd",
50    target_os = "solaris",
51    target_os = "illumos",
52))]
53const MAP_NORESERVE: libc::c_int = libc::MAP_NORESERVE;
54
55#[cfg(not(any(
56    target_os = "linux",
57    target_os = "android",
58    target_vendor = "apple",
59    target_os = "netbsd",
60    target_os = "solaris",
61    target_os = "illumos",
62)))]
63const MAP_NORESERVE: libc::c_int = 0;
64
65#[cfg(any(
66    target_os = "android",
67    all(target_os = "linux", not(target_env = "musl"))
68))]
69use libc::{mmap64 as mmap, off64_t as off_t};
70
71#[cfg(not(any(
72    target_os = "android",
73    all(target_os = "linux", not(target_env = "musl"))
74)))]
75use libc::{mmap, off_t};
76
77pub struct MmapInner {
78    ptr: *mut libc::c_void,
79    len: usize,
80}
81
82impl MmapInner {
83    /// Creates a new `MmapInner`.
84    ///
85    /// This is a thin wrapper around the `mmap` system call.
86    fn new(
87        len: usize,
88        prot: libc::c_int,
89        flags: libc::c_int,
90        file: RawFd,
91        offset: u64,
92    ) -> io::Result<MmapInner> {
93        let alignment = offset % page_size() as u64;
94        let aligned_offset = offset - alignment;
95
96        let (map_len, map_offset) = Self::adjust_mmap_params(len, alignment as usize)?;
97
98        // SAFETY: creating a new memory map with a nullptr as address hint is always sound:
99        // it does not modify any existing mapping or memory contents.
100        let ptr = unsafe {
101            mmap(
102                std::ptr::null_mut(),
103                map_len as libc::size_t,
104                prot,
105                flags,
106                file,
107                aligned_offset as off_t,
108            )
109        };
110
111        if ptr == libc::MAP_FAILED {
112            Err(io::Error::last_os_error())
113        } else {
114            // SAFETY: The ptr and len have been checked,
115            // and the offset has been calculated as required.
116            Ok(unsafe { Self::from_raw_parts(ptr, len, map_offset) })
117        }
118    }
119
120    fn adjust_mmap_params(len: usize, alignment: usize) -> io::Result<(usize, usize)> {
121        // Rust's slice cannot be larger than isize::MAX.
122        // See https://doc.rust-lang.org/std/slice/fn.from_raw_parts.html
123        //
124        // This is not a problem on 64-bit targets, but on 32-bit one
125        // having a file or an anonymous mapping larger than 2GB is quite normal
126        // and we have to prevent it.
127        //
128        // The code below is essentially the same as in Rust's std:
129        // https://github.com/rust-lang/rust/blob/db78ab70a88a0a5e89031d7ee4eccec835dcdbde/library/alloc/src/raw_vec.rs#L495
130        if std::mem::size_of::<usize>() < 8 && len > isize::MAX as usize {
131            return Err(io::Error::new(
132                io::ErrorKind::InvalidData,
133                "memory map length overflows isize",
134            ));
135        }
136
137        let map_len = len + alignment;
138        let map_offset = alignment;
139
140        // `libc::mmap` does not support zero-size mappings. POSIX defines:
141        //
142        // https://pubs.opengroup.org/onlinepubs/9699919799/functions/mmap.html
143        // > If `len` is zero, `mmap()` shall fail and no mapping shall be established.
144        //
145        // So if we would create such a mapping, crate a one-byte mapping instead:
146        let map_len = map_len.max(1);
147
148        // Note that in that case `MmapInner::len` is still set to zero,
149        // and `Mmap` will still dereferences to an empty slice.
150        //
151        // If this mapping is backed by an empty file, we create a mapping larger than the file.
152        // This is unusual but well-defined. On the same man page, POSIX further defines:
153        //
154        // > The `mmap()` function can be used to map a region of memory that is larger
155        // > than the current size of the object.
156        //
157        // (The object here is the file.)
158        //
159        // > Memory access within the mapping but beyond the current end of the underlying
160        // > objects may result in SIGBUS signals being sent to the process. The reason for this
161        // > is that the size of the object can be manipulated by other processes and can change
162        // > at any moment. The implementation should tell the application that a memory reference
163        // > is outside the object where this can be detected; otherwise, written data may be lost
164        // > and read data may not reflect actual data in the object.
165        //
166        // Because `MmapInner::len` is not incremented, this increment of `aligned_len`
167        // will not allow accesses past the end of the file and will not cause SIGBUS.
168        //
169        // (SIGBUS is still possible by mapping a non-empty file and then truncating it
170        // to a shorter size, but that is unrelated to this handling of empty files.)
171        Ok((map_len, map_offset))
172    }
173
174    /// Get the current memory mapping as a `(ptr, map_len, offset)` tuple.
175    ///
176    /// Note that `map_len` is the length of the memory mapping itself and
177    /// _not_ the one that would be passed to `from_raw_parts`.
178    fn as_mmap_params(&self) -> (*mut libc::c_void, usize, usize) {
179        let offset = self.ptr as usize % page_size();
180        let len = self.len + offset;
181
182        // There are two possible memory layouts we could have, depending on
183        // the length and offset passed when constructing this instance:
184        //
185        // 1. The "normal" memory layout looks like this:
186        //
187        //         |<------------------>|<---------------------->|
188        //     mmap ptr    offset      ptr     public slice
189        //
190        //    That is, we have
191        //    - The start of the page-aligned memory mapping returned by mmap,
192        //      followed by,
193        //    - Some number of bytes that are memory mapped but ignored since
194        //      they are before the byte offset requested by the user, followed
195        //      by,
196        //    - The actual memory mapped slice requested by the user.
197        //
198        //    This maps cleanly to a (ptr, len, offset) tuple.
199        //
200        // 2. Then, we have the case where the user requested a zero-length
201        //    memory mapping. mmap(2) does not support zero-length mappings so
202        //    this crate works around that by actually making a mapping of
203        //    length one. This means that we have
204        //    - A length zero slice, followed by,
205        //    - A single memory mapped byte
206        //
207        //    Note that this only happens if the offset within the page is also
208        //    zero. Otherwise, we have a memory map of offset bytes and not a
209        //    zero-length memory map.
210        //
211        //    This doesn't fit cleanly into a (ptr, len, offset) tuple. Instead,
212        //    we fudge it slightly: a zero-length memory map turns into a
213        //    mapping of length one and can't be told apart outside of this
214        //    method without knowing the original length.
215        if len == 0 {
216            (self.ptr, 1, 0)
217        } else {
218            let offset = self.ptr as usize % page_size();
219            // SAFETY: MmapInner guarantees that rounding `self.ptr` down to a page boundary gives the real address of the memory map.
220            // This means that it points into the same allocation as `self.ptr`.
221            let ptr = unsafe { self.ptr.sub(offset) };
222            (ptr, len, offset)
223        }
224    }
225
226    /// Construct this `MmapInner` from its raw components
227    ///
228    /// # Safety
229    ///
230    /// - `ptr` must point to the start of memory mapping that can be freed
231    ///   using `munmap(2)` (i.e. returned by `mmap(2)` or `mremap(2)`)
232    /// - The memory mapping at `ptr` must have a length of `len + offset`.
233    /// - If `len + offset == 0` then the memory mapping must be of length 1.
234    /// - `offset` must be less than the current page size.
235    unsafe fn from_raw_parts(ptr: *mut libc::c_void, len: usize, offset: usize) -> Self {
236        debug_assert_eq!(ptr as usize % page_size(), 0, "ptr not page-aligned");
237        debug_assert!(offset < page_size(), "offset larger than page size");
238
239        Self {
240            ptr: unsafe { ptr.add(offset) },
241            len,
242        }
243    }
244
245    pub fn map(
246        len: usize,
247        file: RawFd,
248        offset: u64,
249        populate: bool,
250        no_reserve: bool,
251    ) -> io::Result<MmapInner> {
252        let populate = if populate { MAP_POPULATE } else { 0 };
253        let no_reserve = if no_reserve { MAP_NORESERVE } else { 0 };
254        MmapInner::new(
255            len,
256            libc::PROT_READ,
257            libc::MAP_SHARED | populate | no_reserve,
258            file,
259            offset,
260        )
261    }
262
263    pub fn map_exec(
264        len: usize,
265        file: RawFd,
266        offset: u64,
267        populate: bool,
268        no_reserve: bool,
269    ) -> io::Result<MmapInner> {
270        let populate = if populate { MAP_POPULATE } else { 0 };
271        let no_reserve = if no_reserve { MAP_NORESERVE } else { 0 };
272        MmapInner::new(
273            len,
274            libc::PROT_READ | libc::PROT_EXEC,
275            libc::MAP_SHARED | populate | no_reserve,
276            file,
277            offset,
278        )
279    }
280
281    pub fn map_mut(
282        len: usize,
283        file: RawFd,
284        offset: u64,
285        populate: bool,
286        no_reserve: bool,
287    ) -> io::Result<MmapInner> {
288        let populate = if populate { MAP_POPULATE } else { 0 };
289        let no_reserve = if no_reserve { MAP_NORESERVE } else { 0 };
290        MmapInner::new(
291            len,
292            libc::PROT_READ | libc::PROT_WRITE,
293            libc::MAP_SHARED | populate | no_reserve,
294            file,
295            offset,
296        )
297    }
298
299    pub fn map_copy(
300        len: usize,
301        file: RawFd,
302        offset: u64,
303        populate: bool,
304        no_reserve: bool,
305    ) -> io::Result<MmapInner> {
306        let populate = if populate { MAP_POPULATE } else { 0 };
307        let no_reserve = if no_reserve { MAP_NORESERVE } else { 0 };
308        MmapInner::new(
309            len,
310            libc::PROT_READ | libc::PROT_WRITE,
311            libc::MAP_PRIVATE | populate | no_reserve,
312            file,
313            offset,
314        )
315    }
316
317    pub fn map_copy_read_only(
318        len: usize,
319        file: RawFd,
320        offset: u64,
321        populate: bool,
322        no_reserve: bool,
323    ) -> io::Result<MmapInner> {
324        let populate = if populate { MAP_POPULATE } else { 0 };
325        let no_reserve = if no_reserve { MAP_NORESERVE } else { 0 };
326        MmapInner::new(
327            len,
328            libc::PROT_READ,
329            libc::MAP_PRIVATE | populate | no_reserve,
330            file,
331            offset,
332        )
333    }
334
335    /// Open an anonymous memory map.
336    pub fn map_anon(
337        len: usize,
338        stack: bool,
339        populate: bool,
340        huge: Option<u8>,
341        no_reserve: bool,
342    ) -> io::Result<MmapInner> {
343        let stack = if stack { MAP_STACK } else { 0 };
344        let populate = if populate { MAP_POPULATE } else { 0 };
345        let hugetlb = if huge.is_some() { MAP_HUGETLB } else { 0 };
346        let hugetlb_size = huge.map_or(0, |mask| {
347            (u64::from(mask) & (MAP_HUGE_MASK as u64)) << MAP_HUGE_SHIFT
348        }) as i32;
349        let no_reserve = if no_reserve { MAP_NORESERVE } else { 0 };
350        MmapInner::new(
351            len,
352            libc::PROT_READ | libc::PROT_WRITE,
353            libc::MAP_PRIVATE
354                | libc::MAP_ANON
355                | stack
356                | populate
357                | hugetlb
358                | hugetlb_size
359                | no_reserve,
360            -1,
361            0,
362        )
363    }
364
365    pub fn flush(&self, offset: usize, len: usize) -> io::Result<()> {
366        if offset > self.len || len > self.len - offset {
367            return Err(io::ErrorKind::InvalidInput.into());
368        }
369        let alignment = (self.ptr as usize + offset) % page_size();
370        let offset = offset as isize - alignment as isize;
371        let len = len + alignment;
372        let result =
373            // SAFETY: We've checked that offset and len fall within the mapped region.
374            unsafe { libc::msync(self.ptr.offset(offset), len as libc::size_t, libc::MS_SYNC) };
375        if result == 0 {
376            Ok(())
377        } else {
378            Err(io::Error::last_os_error())
379        }
380    }
381
382    pub fn flush_async(&self, offset: usize, len: usize) -> io::Result<()> {
383        if offset > self.len || len > self.len - offset {
384            return Err(io::ErrorKind::InvalidInput.into());
385        }
386        let alignment = (self.ptr as usize + offset) % page_size();
387        let offset = offset as isize - alignment as isize;
388        let len = len + alignment;
389        let result =
390            // SAFETY: We've checked that offset and len fall within the mapped region.
391            unsafe { libc::msync(self.ptr.offset(offset), len as libc::size_t, libc::MS_ASYNC) };
392        if result == 0 {
393            Ok(())
394        } else {
395            Err(io::Error::last_os_error())
396        }
397    }
398
399    fn mprotect(&mut self, prot: libc::c_int) -> io::Result<()> {
400        let alignment = self.ptr as usize % page_size();
401        // SAFETY: rounding self.ptr down to the previous page boundary gives the pointer of the actual memory map.
402        let ptr = unsafe { self.ptr.sub(alignment) };
403        let len = self.len + alignment;
404        let len = len.max(1);
405
406        // SAFETY: the contract of MmapInner guarantees ptr and len are valid.
407        if unsafe { libc::mprotect(ptr, len, prot) } == 0 {
408            Ok(())
409        } else {
410            Err(io::Error::last_os_error())
411        }
412    }
413
414    pub fn make_read_only(&mut self) -> io::Result<()> {
415        self.mprotect(libc::PROT_READ)
416    }
417
418    pub fn make_exec(&mut self) -> io::Result<()> {
419        self.mprotect(libc::PROT_READ | libc::PROT_EXEC)
420    }
421
422    pub fn make_mut(&mut self) -> io::Result<()> {
423        self.mprotect(libc::PROT_READ | libc::PROT_WRITE)
424    }
425
426    #[inline]
427    pub fn ptr(&self) -> *const u8 {
428        self.ptr as *const u8
429    }
430
431    #[inline]
432    pub fn mut_ptr(&mut self) -> *mut u8 {
433        self.ptr.cast()
434    }
435
436    #[inline]
437    pub fn len(&self) -> usize {
438        self.len
439    }
440
441    /// Perform an `madvise()`.
442    ///
443    /// # Safety
444    ///
445    /// Some `advise` values can be unsound depending on the situation.
446    /// It is up to the caller to only perform sound madvise() calls on the memory range.
447    pub unsafe fn advise(&self, advice: libc::c_int, offset: usize, len: usize) -> io::Result<()> {
448        if offset > self.len || len > self.len {
449            return Err(std::io::ErrorKind::InvalidInput.into());
450        }
451        let alignment = (self.ptr as usize + offset) % page_size();
452        let offset = offset as isize - alignment as isize;
453        let len = len + alignment;
454
455        // SAFETY: We've checked that offset is within the mapped region.
456        let ptr = unsafe { self.ptr.offset(offset) };
457
458        // The AIX signature of 'madvise()' differs from the POSIX
459        // specification, which expects 'void *' as the type of the
460        // 'addr' argument, whereas AIX uses 'caddr_t' (i.e., 'char *').
461        #[cfg(target_os = "aix")]
462        let ptr = self.ptr.offset(offset).cast();
463
464        // SAFETY: ptr and len are valid. The burden of giving a safe `advice` value is on the caller.
465        if unsafe { libc::madvise(ptr, len, advice) } != 0 {
466            Err(io::Error::last_os_error())
467        } else {
468            Ok(())
469        }
470    }
471
472    #[cfg(target_os = "linux")]
473    pub fn remap(&mut self, new_len: usize, options: crate::RemapOptions) -> io::Result<()> {
474        let (old_ptr, old_len, offset) = self.as_mmap_params();
475        let (map_len, offset) = Self::adjust_mmap_params(new_len, offset)?;
476
477        // SAFETY: we hold a mutable reference to self, so we can adjust the location and size of the mapping.
478        let new_ptr = unsafe { libc::mremap(old_ptr, old_len, map_len, options.into_flags()) };
479
480        if new_ptr == libc::MAP_FAILED {
481            Err(io::Error::last_os_error())
482        } else {
483            // SAFETY: The pointer and length passed to `from_raw_parts` have just been obtained from a real map, so they must be valid.
484            let new_map = unsafe { Self::from_raw_parts(new_ptr, new_len, offset) };
485            // We explicitly don't drop self since the pointer within is no longer valid.
486            // Instead, swap the new map into `self` and forget the old one.
487            let old_map = std::mem::replace(self, new_map);
488            std::mem::forget(old_map);
489            Ok(())
490        }
491    }
492
493    pub fn lock(&self) -> io::Result<()> {
494        unsafe {
495            if libc::mlock(self.ptr, self.len) != 0 {
496                Err(io::Error::last_os_error())
497            } else {
498                Ok(())
499            }
500        }
501    }
502
503    pub fn unlock(&self) -> io::Result<()> {
504        unsafe {
505            if libc::munlock(self.ptr, self.len) != 0 {
506                Err(io::Error::last_os_error())
507            } else {
508                Ok(())
509            }
510        }
511    }
512}
513
514impl Drop for MmapInner {
515    fn drop(&mut self) {
516        let (ptr, len, _) = self.as_mmap_params();
517
518        // Any errors during unmapping/closing are ignored as the only way
519        // to report them would be through panicking which is highly discouraged
520        // in Drop impls, c.f. https://github.com/rust-lang/lang-team/issues/97
521        unsafe { libc::munmap(ptr, len as libc::size_t) };
522    }
523}
524
525unsafe impl Sync for MmapInner {}
526unsafe impl Send for MmapInner {}
527
528fn page_size() -> usize {
529    static PAGE_SIZE: AtomicUsize = AtomicUsize::new(0);
530
531    match PAGE_SIZE.load(Ordering::Relaxed) {
532        0 => {
533            let page_size = unsafe { libc::sysconf(libc::_SC_PAGESIZE) as usize };
534
535            PAGE_SIZE.store(page_size, Ordering::Relaxed);
536
537            page_size
538        }
539        page_size => page_size,
540    }
541}
542
543pub fn file_len(file: RawFd) -> io::Result<u64> {
544    // SAFETY: We must not close the passed-in fd by dropping the File we create,
545    // we ensure this by immediately wrapping it in a ManuallyDrop.
546    unsafe {
547        let file = ManuallyDrop::new(File::from_raw_fd(file));
548        Ok(file.metadata()?.len())
549    }
550}