gaol/platform/linux/
namespace.rs

1// Copyright 2015 The Rust Project Developers. See the COPYRIGHT
2// file at the top-level directory of this distribution and at
3// http://rust-lang.org/COPYRIGHT.
4//
5// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8// option. This file may not be copied, modified, or distributed
9// except according to those terms.
10
11//! Sandboxing on Linux via namespaces.
12
13use platform::linux::seccomp;
14use platform::unix::process::Process;
15use platform::unix;
16use profile::{Operation, PathPattern, Profile};
17use sandbox::Command;
18
19use libc::{self, c_char, c_int, c_ulong, c_void, gid_t, pid_t, size_t, ssize_t, uid_t};
20use std::env;
21use std::ffi::{CString, OsStr, OsString};
22use std::fs::{self, File};
23use std::io::{self, Write};
24use std::iter;
25use std::mem;
26use std::os::unix::prelude::OsStrExt;
27use std::path::{Path, PathBuf};
28use std::ptr;
29
30/// Creates a namespace and sets up a chroot jail.
31pub fn activate(profile: &Profile) -> Result<(),c_int> {
32    let jail = try!(ChrootJail::new(profile));
33    try!(jail.enter());
34    drop_capabilities()
35}
36
37/// A `chroot` jail with a restricted view of the filesystem inside it.
38struct ChrootJail {
39    directory: PathBuf,
40}
41
42impl ChrootJail {
43    /// Creates a new `chroot` jail.
44    fn new(profile: &Profile) -> Result<ChrootJail,c_int> {
45        let prefix = CString::new("/tmp/gaol.XXXXXX").unwrap();
46        let mut prefix: Vec<u8> = prefix.as_bytes_with_nul().iter().map(|x| *x).collect();
47        unsafe {
48            if libc::mkdtemp(prefix.as_mut_ptr() as *mut c_char).is_null() {
49                return Err(-1)
50            }
51        }
52        let jail_dir = PathBuf::from(OsStr::from_bytes(&prefix[..prefix.len() - 1]));
53        let jail = ChrootJail {
54            directory: jail_dir,
55        };
56
57        let dest = CString::new(jail.directory
58                                    .as_os_str()
59                                    .to_str()
60                                    .unwrap()
61                                    .as_bytes()).unwrap();
62        let tmpfs = CString::new("tmpfs").unwrap();
63        let result = unsafe {
64            libc::mount(tmpfs.as_ptr(),
65                        dest.as_ptr(),
66                        tmpfs.as_ptr(),
67                        libc::MS_NOATIME | libc::MS_NODEV | libc::MS_NOEXEC | libc::MS_NOSUID,
68                        ptr::null())
69        };
70        if result != 0 {
71            return Err(result)
72        }
73
74        for operation in profile.allowed_operations().iter() {
75            match *operation {
76                Operation::FileReadAll(PathPattern::Literal(ref path)) |
77                Operation::FileReadAll(PathPattern::Subpath(ref path)) => {
78                    try!(jail.bind_mount(path));
79                }
80                _ => {}
81            }
82        }
83
84        Ok(jail)
85    }
86
87    /// Enters the `chroot` jail.
88    fn enter(&self) -> Result<(),c_int> {
89        let directory = CString::new(self.directory
90                                         .as_os_str()
91                                         .to_str()
92                                         .unwrap()
93                                         .as_bytes()).unwrap();
94        let result = unsafe {
95            libc::chroot(directory.as_ptr())
96        };
97        if result != 0 {
98            return Err(result)
99        }
100
101        match env::set_current_dir(&Path::new(".")) {
102            Ok(_) => Ok(()),
103            Err(_) => Err(-1),
104        }
105    }
106
107    /// Bind mounts a path into our chroot jail.
108    fn bind_mount(&self, source_path: &Path) -> Result<(),c_int> {
109        // Create all intermediate directories.
110        let mut destination_path = self.directory.clone();
111        let mut components: Vec<OsString> =
112            source_path.components().skip(1)
113                                    .map(|component| component.as_os_str().to_os_string())
114                                    .collect();
115        let last_component = components.pop();
116        for component in components.into_iter() {
117            destination_path.push(component);
118            if fs::create_dir(&destination_path).is_err() {
119                return Err(-1)
120            }
121        }
122
123        // Create the mount file or directory.
124        if let Some(last_component) = last_component {
125            destination_path.push(last_component);
126            match fs::metadata(source_path) {
127                Ok(ref metadata) if metadata.is_dir() => {
128                    if fs::create_dir(&destination_path).is_err() {
129                        return Err(-1)
130                    }
131                }
132                Ok(_) => {
133                    if File::create(&destination_path).is_err() {
134                        return Err(-1)
135                    }
136                }
137                Err(_) => {
138                    // The source directory didn't exist. Just don't create the bind mount.
139                    return Ok(())
140                }
141            }
142        }
143
144        // Create the bind mount.
145        let source_path = CString::new(source_path.as_os_str()
146                                                  .to_str()
147                                                  .unwrap()
148                                                  .as_bytes()).unwrap();
149        let destination_path = CString::new(destination_path.as_os_str()
150                                                            .to_str()
151                                                            .unwrap()
152                                                            .as_bytes()).unwrap();
153        let bind = CString::new("bind").unwrap();
154        let result = unsafe {
155            libc::mount(source_path.as_ptr(),
156                  destination_path.as_ptr(),
157                  bind.as_ptr(),
158                  libc::MS_MGC_VAL | libc::MS_BIND | libc::MS_REC,
159                  ptr::null_mut())
160        };
161        if result == 0 {
162            Ok(())
163        } else {
164            Err(result)
165        }
166    }
167}
168
169/// Removes fake-superuser capabilities. This removes our ability to mess with the filesystem view
170/// we've set up.
171fn drop_capabilities() -> Result<(),c_int> {
172    let capability_data: Vec<_> = iter::repeat(__user_cap_data_struct {
173        effective: 0,
174        permitted: 0,
175        inheritable: 0,
176    }).take(_LINUX_CAPABILITY_U32S_3 as usize).collect();
177    let result = unsafe {
178        capset(&__user_cap_header_struct {
179            version: _LINUX_CAPABILITY_VERSION_3,
180            pid: 0,
181        }, capability_data.as_ptr())
182    };
183    if result == 0 {
184        Ok(())
185    } else {
186        Err(result)
187    }
188}
189
190/// Sets up the user and PID namespaces.
191unsafe fn prepare_user_and_pid_namespaces(parent_uid: uid_t, parent_gid: gid_t) -> io::Result<()> {
192    // Enter the main user and PID namespaces.
193    assert!(libc::unshare(libc::CLONE_NEWUSER | libc::CLONE_NEWPID) == 0);
194
195    // See http://crbug.com/457362 for more information on this.
196    try!(try!(File::create(&Path::new("/proc/self/setgroups"))).write_all(b"deny"));
197
198    let gid_contents = format!("0 {} 1", parent_gid);
199    try!(try!(File::create(&Path::new("/proc/self/gid_map"))).write_all(gid_contents.as_bytes()));
200    let uid_contents = format!("0 {} 1", parent_uid);
201    try!(try!(File::create(&Path::new("/proc/self/uid_map"))).write_all(uid_contents.as_bytes()));
202    Ok(())
203}
204
205/// Spawns a child process in a new namespace.
206///
207/// This function is quite tricky. Hic sunt dracones!
208pub fn start(profile: &Profile, command: &mut Command) -> io::Result<Process> {
209    // Store our root namespace UID and GID because they're going to change once we enter a user
210    // namespace.
211    let (parent_uid, parent_gid) = unsafe {
212        (libc::getuid(), libc::getgid())
213    };
214
215    // Always create an IPC namespace, a mount namespace, and a UTS namespace. Additionally, if we
216    // aren't allowing network operations, create a network namespace.
217    let mut unshare_flags = libc::CLONE_NEWIPC | libc::CLONE_NEWNS | libc::CLONE_NEWUTS;
218    if !profile.allowed_operations().iter().any(|operation| {
219        match *operation {
220            Operation::NetworkOutbound(_) => true,
221            _ => false,
222        }
223    }) {
224        unshare_flags |= libc::CLONE_NEWNET
225    }
226
227    unsafe {
228        // Create a pipe so we can communicate the PID of our grandchild back.
229        let mut pipe_fds = [0, 0];
230        assert!(libc::pipe(&mut pipe_fds[0]) == 0);
231
232        // Set this `prctl` flag so that we can wait on our grandchild. (Otherwise it'll be
233        // reparented to init.)
234        assert!(libc::prctl(libc::PR_SET_CHILD_SUBREAPER, 1, 0, 0, 0) == 0);
235
236        // Fork so that we can unshare without removing our ability to create threads.
237        if libc::fork() == 0 {
238            // Close the reading end of the pipe.
239            libc::close(pipe_fds[0]);
240
241            // Set up our user and PID namespaces. The PID namespace won't actually come into
242            // effect until the next fork(), because PIDs are immutable.
243            prepare_user_and_pid_namespaces(parent_uid, parent_gid).unwrap();
244
245            // Fork again, to enter the PID namespace.
246            match libc::fork() {
247                0 => {
248                    // Enter the auxiliary namespaces.
249                    assert!(libc::unshare(unshare_flags) == 0);
250
251                    // Go ahead and start the command.
252                    drop(unix::process::exec(command));
253                    libc::abort()
254                }
255                grandchild_pid => {
256                    // Send the PID of our child up to our parent and exit.
257                    assert!(libc::write(pipe_fds[1],
258                                        &grandchild_pid as *const pid_t as *const c_void,
259                                        mem::size_of::<pid_t>() as size_t) ==
260                                            mem::size_of::<pid_t>() as ssize_t);
261                    libc::exit(0);
262                }
263            }
264        }
265
266        // Grandparent execution continues here. First, close the writing end of the pipe.
267        libc::close(pipe_fds[1]);
268
269        // Retrieve our grandchild's PID.
270        let mut grandchild_pid: pid_t = 0;
271        assert!(libc::read(pipe_fds[0],
272                           &mut grandchild_pid as *mut i32 as *mut c_void,
273                           mem::size_of::<pid_t>() as size_t) ==
274                mem::size_of::<pid_t>() as ssize_t);
275        Ok(Process {
276            pid: grandchild_pid,
277        })
278    }
279}
280#[repr(C)]
281#[allow(non_camel_case_types)]
282struct __user_cap_header_struct {
283    version: u32,
284    pid: c_int,
285}
286
287#[repr(C)]
288#[allow(non_camel_case_types)]
289#[derive(Copy, Clone)]
290struct __user_cap_data_struct {
291    effective: u32,
292    permitted: u32,
293    inheritable: u32,
294}
295
296#[allow(non_camel_case_types)]
297type cap_user_header_t = *const __user_cap_header_struct;
298
299#[allow(non_camel_case_types)]
300type const_cap_user_data_t = *const __user_cap_data_struct;
301
302const _LINUX_CAPABILITY_VERSION_3: u32 = 0x20080522;
303const _LINUX_CAPABILITY_U32S_3: u32 = 2;
304extern {
305    fn capset(hdrp: cap_user_header_t, datap: const_cap_user_data_t) -> c_int;
306}