1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306
// Copyright 2015 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
//> or the MIT license
// <LICENSE-MIT or>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
//! Sandboxing on Linux via namespaces.
use platform::linux::seccomp;
use platform::unix::process::Process;
use platform::unix;
use profile::{Operation, PathPattern, Profile};
use sandbox::Command;
use libc::{self, c_char, c_int, c_ulong, c_void, gid_t, pid_t, size_t, ssize_t, uid_t};
use std::env;
use std::ffi::{CString, OsStr, OsString};
use std::fs::{self, File};
use std::io::{self, Write};
use std::iter;
use std::mem;
use std::os::unix::prelude::OsStrExt;
use std::path::{Path, PathBuf};
use std::ptr;
/// Creates a namespace and sets up a chroot jail.
pub fn activate(profile: &Profile) -> Result<(),c_int> {
let jail = try!(ChrootJail::new(profile));
/// A `chroot` jail with a restricted view of the filesystem inside it.
struct ChrootJail {
directory: PathBuf,
impl ChrootJail {
/// Creates a new `chroot` jail.
fn new(profile: &Profile) -> Result<ChrootJail,c_int> {
let prefix = CString::new("/tmp/gaol.XXXXXX").unwrap();
let mut prefix: Vec<u8> = prefix.as_bytes_with_nul().iter().map(|x| *x).collect();
unsafe {
if libc::mkdtemp(prefix.as_mut_ptr() as *mut c_char).is_null() {
return Err(-1)
let jail_dir = PathBuf::from(OsStr::from_bytes(&prefix[..prefix.len() - 1]));
let jail = ChrootJail {
directory: jail_dir,
let dest = CString::new(
let tmpfs = CString::new("tmpfs").unwrap();
let result = unsafe {
libc::MS_NOATIME | libc::MS_NODEV | libc::MS_NOEXEC | libc::MS_NOSUID,
if result != 0 {
return Err(result)
for operation in profile.allowed_operations().iter() {
match *operation {
Operation::FileReadAll(PathPattern::Literal(ref path)) |
Operation::FileReadAll(PathPattern::Subpath(ref path)) => {
_ => {}
/// Enters the `chroot` jail.
fn enter(&self) -> Result<(),c_int> {
let directory = CString::new(
let result = unsafe {
if result != 0 {
return Err(result)
match env::set_current_dir(&Path::new(".")) {
Ok(_) => Ok(()),
Err(_) => Err(-1),
/// Bind mounts a path into our chroot jail.
fn bind_mount(&self, source_path: &Path) -> Result<(),c_int> {
// Create all intermediate directories.
let mut destination_path =;
let mut components: Vec<OsString> =
.map(|component| component.as_os_str().to_os_string())
let last_component = components.pop();
for component in components.into_iter() {
if fs::create_dir(&destination_path).is_err() {
return Err(-1)
// Create the mount file or directory.
if let Some(last_component) = last_component {
match fs::metadata(source_path) {
Ok(ref metadata) if metadata.is_dir() => {
if fs::create_dir(&destination_path).is_err() {
return Err(-1)
Ok(_) => {
if File::create(&destination_path).is_err() {
return Err(-1)
Err(_) => {
// The source directory didn't exist. Just don't create the bind mount.
return Ok(())
// Create the bind mount.
let source_path = CString::new(source_path.as_os_str()
let destination_path = CString::new(destination_path.as_os_str()
let bind = CString::new("bind").unwrap();
let result = unsafe {
libc::MS_MGC_VAL | libc::MS_BIND | libc::MS_REC,
if result == 0 {
} else {
/// Removes fake-superuser capabilities. This removes our ability to mess with the filesystem view
/// we've set up.
fn drop_capabilities() -> Result<(),c_int> {
let capability_data: Vec<_> = iter::repeat(__user_cap_data_struct {
effective: 0,
permitted: 0,
inheritable: 0,
}).take(_LINUX_CAPABILITY_U32S_3 as usize).collect();
let result = unsafe {
capset(&__user_cap_header_struct {
pid: 0,
}, capability_data.as_ptr())
if result == 0 {
} else {
/// Sets up the user and PID namespaces.
unsafe fn prepare_user_and_pid_namespaces(parent_uid: uid_t, parent_gid: gid_t) -> io::Result<()> {
// Enter the main user and PID namespaces.
assert!(libc::unshare(libc::CLONE_NEWUSER | libc::CLONE_NEWPID) == 0);
// See for more information on this.
let gid_contents = format!("0 {} 1", parent_gid);
let uid_contents = format!("0 {} 1", parent_uid);
/// Spawns a child process in a new namespace.
/// This function is quite tricky. Hic sunt dracones!
pub fn start(profile: &Profile, command: &mut Command) -> io::Result<Process> {
// Store our root namespace UID and GID because they're going to change once we enter a user
// namespace.
let (parent_uid, parent_gid) = unsafe {
(libc::getuid(), libc::getgid())
// Always create an IPC namespace, a mount namespace, and a UTS namespace. Additionally, if we
// aren't allowing network operations, create a network namespace.
let mut unshare_flags = libc::CLONE_NEWIPC | libc::CLONE_NEWNS | libc::CLONE_NEWUTS;
if !profile.allowed_operations().iter().any(|operation| {
match *operation {
Operation::NetworkOutbound(_) => true,
_ => false,
}) {
unshare_flags |= libc::CLONE_NEWNET
unsafe {
// Create a pipe so we can communicate the PID of our grandchild back.
let mut pipe_fds = [0, 0];
assert!(libc::pipe(&mut pipe_fds[0]) == 0);
// Set this `prctl` flag so that we can wait on our grandchild. (Otherwise it'll be
// reparented to init.)
assert!(libc::prctl(libc::PR_SET_CHILD_SUBREAPER, 1, 0, 0, 0) == 0);
// Fork so that we can unshare without removing our ability to create threads.
if libc::fork() == 0 {
// Close the reading end of the pipe.
// Set up our user and PID namespaces. The PID namespace won't actually come into
// effect until the next fork(), because PIDs are immutable.
prepare_user_and_pid_namespaces(parent_uid, parent_gid).unwrap();
// Fork again, to enter the PID namespace.
match libc::fork() {
0 => {
// Enter the auxiliary namespaces.
assert!(libc::unshare(unshare_flags) == 0);
// Go ahead and start the command.
grandchild_pid => {
// Send the PID of our child up to our parent and exit.
&grandchild_pid as *const pid_t as *const c_void,
mem::size_of::<pid_t>() as size_t) ==
mem::size_of::<pid_t>() as ssize_t);
// Grandparent execution continues here. First, close the writing end of the pipe.
// Retrieve our grandchild's PID.
let mut grandchild_pid: pid_t = 0;
&mut grandchild_pid as *mut i32 as *mut c_void,
mem::size_of::<pid_t>() as size_t) ==
mem::size_of::<pid_t>() as ssize_t);
Ok(Process {
pid: grandchild_pid,
struct __user_cap_header_struct {
version: u32,
pid: c_int,
#[derive(Copy, Clone)]
struct __user_cap_data_struct {
effective: u32,
permitted: u32,
inheritable: u32,
type cap_user_header_t = *const __user_cap_header_struct;
type const_cap_user_data_t = *const __user_cap_data_struct;
const _LINUX_CAPABILITY_VERSION_3: u32 = 0x20080522;
const _LINUX_CAPABILITY_U32S_3: u32 = 2;
extern {
fn capset(hdrp: cap_user_header_t, datap: const_cap_user_data_t) -> c_int;