1#![allow(non_upper_case_globals, unused_imports)]
18
19use profile::{Operation, Profile};
20
21use libc::{self, CLONE_CHILD_CLEARTID, CLONE_FILES, CLONE_FS,
22 CLONE_PARENT_SETTID, CLONE_SETTLS, CLONE_SIGHAND, CLONE_SYSVSEM,
23 CLONE_THREAD, CLONE_VM};
24use libc::{AF_INET, AF_INET6, AF_UNIX, AF_NETLINK};
25use libc::{c_char, c_int, c_ulong, c_ushort, c_void};
26use libc::{O_NONBLOCK, O_RDONLY, O_NOCTTY, O_CLOEXEC, FIONREAD, FIOCLEX};
27use libc::{MADV_NORMAL, MADV_RANDOM, MADV_SEQUENTIAL, MADV_WILLNEED, MADV_DONTNEED};
28use std::ffi::CString;
29use std::mem;
30
31#[cfg(target_arch="x86")]
33const ARCH_NR: u32 = AUDIT_ARCH_X86;
34#[cfg(target_arch="x86_64")]
36const ARCH_NR: u32 = AUDIT_ARCH_X86_64;
37#[cfg(target_arch="arm")]
39const ARCH_NR: u32 = AUDIT_ARCH_ARM;
40#[cfg(target_arch="aarch64")]
42const ARCH_NR: u32 = AUDIT_ARCH_AARCH64;
43#[cfg(target_arch="powerpc")]
44const ARCH_NR: u32 = AUDIT_ARCH_PPC;
45#[cfg(all(target_arch="powerpc64", target_endian="big"))]
46const ARCH_NR: u32 = AUDIT_ARCH_PPC64;
47#[cfg(all(target_arch="powerpc64", target_endian="little"))]
48const ARCH_NR: u32 = AUDIT_ARCH_PPC64LE;
49
50const SECCOMP_RET_KILL: u32 = 0;
51const SECCOMP_RET_ALLOW: u32 = 0x7fff_0000;
52
53const LD: u16 = 0x00;
54const JMP: u16 = 0x05;
55const RET: u16 = 0x06;
56
57const W: u16 = 0;
58const ABS: u16 = 0x20;
59
60const JEQ: u16 = 0x10;
61const JSET: u16 = 0x40;
62
63const K: u16 = 0x00;
64
65const SYSCALL_NR_OFFSET: u32 = 0;
66const ARCH_NR_OFFSET: u32 = 4;
67const ARG_0_OFFSET: u32 = 16;
68const ARG_1_OFFSET: u32 = 24;
69const ARG_2_OFFSET: u32 = 32;
70
71const NETLINK_ROUTE: c_int = 0;
72
73const EM_386: u32 = 3;
74const EM_PPC: u32 = 20;
75const EM_PPC64: u32 = 21;
76const EM_ARM: u32 = 40;
77const EM_X86_64: u32 = 62;
78const EM_AARCH64: u32 = 183;
79
80const __AUDIT_ARCH_64BIT: u32 = 0x8000_0000;
82const __AUDIT_ARCH_LE: u32 = 0x4000_0000;
84const AUDIT_ARCH_X86: u32 = EM_386 | __AUDIT_ARCH_LE;
86const AUDIT_ARCH_X86_64: u32 = EM_X86_64 | __AUDIT_ARCH_64BIT | __AUDIT_ARCH_LE;
88const AUDIT_ARCH_ARM: u32 = EM_ARM | __AUDIT_ARCH_LE;
90const AUDIT_ARCH_AARCH64: u32 = EM_AARCH64 | __AUDIT_ARCH_64BIT | __AUDIT_ARCH_LE;
92const AUDIT_ARCH_PPC: u32 = EM_PPC;
94const AUDIT_ARCH_PPC64: u32 = EM_PPC64 | __AUDIT_ARCH_64BIT;
96const AUDIT_ARCH_PPC64LE: u32 = EM_PPC64 | __AUDIT_ARCH_64BIT | __AUDIT_ARCH_LE;
98
99const PR_SET_SECCOMP: c_int = 22;
100const PR_SET_NO_NEW_PRIVS: c_int = 38;
101
102const SECCOMP_MODE_FILTER: c_ulong = 2;
103
104static FILTER_PROLOGUE: [sock_filter; 3] = [
105 VALIDATE_ARCHITECTURE_0,
106 VALIDATE_ARCHITECTURE_1,
107 VALIDATE_ARCHITECTURE_2,
108];
109
110static FILTER_EPILOGUE: [sock_filter; 1] = [
112 KILL_PROCESS,
113];
114
115pub static ALLOWED_SYSCALLS: [u32; 21] = [
117 libc::SYS_brk as u32,
118 libc::SYS_close as u32,
119 libc::SYS_exit as u32,
120 libc::SYS_exit_group as u32,
121 libc::SYS_futex as u32,
122 libc::SYS_getrandom as u32,
123 libc::SYS_getuid as u32,
124 libc::SYS_mmap as u32,
125 libc::SYS_mprotect as u32,
126 libc::SYS_munmap as u32,
127 libc::SYS_poll as u32,
128 libc::SYS_read as u32,
129 libc::SYS_recvfrom as u32,
130 libc::SYS_recvmsg as u32,
131 libc::SYS_rt_sigreturn as u32,
132 libc::SYS_sched_getaffinity as u32,
133 libc::SYS_sendmmsg as u32,
134 libc::SYS_sendto as u32,
135 libc::SYS_set_robust_list as u32,
136 libc::SYS_sigaltstack as u32,
137 libc::SYS_write as u32,
138];
139
140static ALLOWED_SYSCALLS_FOR_FILE_READ: [u32; 5] = [
141 libc::SYS_access as u32,
142 libc::SYS_fstat as u32,
143 libc::SYS_lseek as u32,
144 libc::SYS_readlink as u32,
145 libc::SYS_stat as u32,
146];
147
148static ALLOWED_SYSCALLS_FOR_NETWORK_OUTBOUND: [u32; 3] = [
149 libc::SYS_bind as u32,
150 libc::SYS_connect as u32,
151 libc::SYS_getsockname as u32,
152];
153
154const ALLOW_SYSCALL: sock_filter = sock_filter {
155 code: RET + K,
156 k: SECCOMP_RET_ALLOW,
157 jt: 0,
158 jf: 0,
159};
160
161const KILL_PROCESS: sock_filter = sock_filter {
162 code: RET + K,
163 k: SECCOMP_RET_KILL,
164 jt: 0,
165 jf: 0,
166};
167
168const EXAMINE_SYSCALL: sock_filter = sock_filter {
169 code: LD + W + ABS,
170 k: SYSCALL_NR_OFFSET,
171 jt: 0,
172 jf: 0,
173};
174
175const EXAMINE_ARG_0: sock_filter = sock_filter {
176 code: LD + W + ABS,
177 k: ARG_0_OFFSET,
178 jt: 0,
179 jf: 0,
180};
181
182const EXAMINE_ARG_1: sock_filter = sock_filter {
183 code: LD + W + ABS,
184 k: ARG_1_OFFSET,
185 jt: 0,
186 jf: 0,
187};
188
189const EXAMINE_ARG_2: sock_filter = sock_filter {
190 code: LD + W + ABS,
191 k: ARG_2_OFFSET,
192 jt: 0,
193 jf: 0,
194};
195
196const VALIDATE_ARCHITECTURE_0: sock_filter = sock_filter {
197 code: LD + W + ABS,
198 k: ARCH_NR_OFFSET,
199 jt: 0,
200 jf: 0,
201};
202
203const VALIDATE_ARCHITECTURE_1: sock_filter = sock_filter {
204 code: JMP + JEQ + K,
205 k: ARCH_NR,
206 jt: 1,
207 jf: 0,
208};
209
210const VALIDATE_ARCHITECTURE_2: sock_filter = KILL_PROCESS;
211
212pub struct Filter {
213 program: Vec<sock_filter>,
214}
215
216impl Filter {
217 pub fn new(profile: &Profile) -> Filter {
218 let mut filter = Filter {
219 program: FILTER_PROLOGUE.iter().map(|x| *x).collect(),
220 };
221 filter.allow_syscalls(&ALLOWED_SYSCALLS);
222
223 if profile.allowed_operations().iter().any(|operation| {
224 match *operation {
225 Operation::FileReadAll(_) | Operation::FileReadMetadata(_) => true,
226 _ => false,
227 }
228 }) {
229 filter.allow_syscalls(&ALLOWED_SYSCALLS_FOR_FILE_READ);
230
231 filter.if_syscall_is(libc::SYS_open as u32, |filter| {
233 filter.if_arg1_hasnt_set(!(O_RDONLY | O_CLOEXEC | O_NOCTTY | O_NONBLOCK) as u32,
234 |filter| filter.allow_this_syscall())
235 });
236
237 filter.if_syscall_is(libc::SYS_ioctl as u32, |filter| {
239 filter.if_arg1_is(FIONREAD as u32, |filter| filter.allow_this_syscall());
240 filter.if_arg1_is(FIOCLEX as u32, |filter| filter.allow_this_syscall())
241 })
242 }
243
244 if profile.allowed_operations().iter().any(|operation| {
245 match *operation {
246 Operation::NetworkOutbound(_) => true,
247 _ => false,
248 }
249 }) {
250 filter.allow_syscalls(&ALLOWED_SYSCALLS_FOR_NETWORK_OUTBOUND);
251
252 filter.if_syscall_is(libc::SYS_socket as u32, |filter| {
254 filter.if_arg0_is(AF_UNIX as u32, |filter| filter.allow_this_syscall());
255 filter.if_arg0_is(AF_INET as u32, |filter| filter.allow_this_syscall());
256 filter.if_arg0_is(AF_INET6 as u32, |filter| filter.allow_this_syscall());
257 filter.if_arg0_is(AF_NETLINK as u32, |filter| {
258 filter.if_arg2_is(NETLINK_ROUTE as u32, |filter| filter.allow_this_syscall())
259 })
260 })
261 }
262
263 filter.if_syscall_is(libc::SYS_clone as u32, |filter| {
265 filter.if_arg0_is((CLONE_VM |
266 CLONE_FS |
267 CLONE_FILES |
268 CLONE_SIGHAND |
269 CLONE_THREAD |
270 CLONE_SYSVSEM |
271 CLONE_SETTLS |
272 CLONE_PARENT_SETTID |
273 CLONE_CHILD_CLEARTID) as u32,
274 |filter| filter.allow_this_syscall())
275 });
276
277 filter.if_syscall_is(libc::SYS_madvise as u32, |filter| {
279 for mode in [
280 MADV_NORMAL,
281 MADV_RANDOM,
282 MADV_SEQUENTIAL,
283 MADV_WILLNEED,
284 MADV_DONTNEED
285 ].iter() {
286 filter.if_arg2_is(*mode as u32, |filter| filter.allow_this_syscall())
287 }
288 });
289
290 filter.program.extend_from_slice(&FILTER_EPILOGUE);
291 filter
292 }
293
294 #[cfg(dump_bpf_sockets)]
296 pub fn dump(&self) {
297 let path = CString::from_slice(b"/tmp/gaol-bpf.XXXXXX");
298 let mut path = path.as_bytes_with_nul().to_vec();
299 let fd = unsafe {
300 libc::mkstemp(path.as_mut_ptr() as *mut c_char)
301 };
302 let nbytes = self.program.len() * mem::size_of::<sock_filter>();
303 unsafe {
304 assert!(libc::write(fd, self.program.as_ptr() as *const c_void, nbytes as u64) ==
305 nbytes as i64);
306 libc::close(fd);
307 }
308 }
309
310 #[cfg(not(dump_bpf_sockets))]
311 pub fn dump(&self) {}
312
313 pub fn activate(&self) -> Result<(),c_int> {
316 unsafe {
317 let result = libc::prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
318 if result != 0 {
319 return Err(result)
320 }
321
322 let program = sock_fprog {
323 len: self.program.len() as c_ushort,
324 filter: self.program.as_ptr(),
325 };
326 let result = libc::prctl(PR_SET_SECCOMP,
327 SECCOMP_MODE_FILTER,
328 &program as *const sock_fprog as usize as c_ulong,
329 !0,
330 0);
331 if result == 0 {
332 Ok(())
333 } else {
334 Err(result)
335 }
336 }
337 }
338
339 fn allow_this_syscall(&mut self) {
340 self.program.push(ALLOW_SYSCALL)
341 }
342
343 fn allow_syscalls(&mut self, syscalls: &[u32]) {
344 for &syscall in syscalls.iter() {
345 self.if_syscall_is(syscall, |filter| filter.allow_this_syscall())
346 }
347 }
348
349 fn if_syscall_is<F>(&mut self, number: u32, then: F) where F: FnMut(&mut Filter) {
350 self.program.push(EXAMINE_SYSCALL);
351 self.if_k_is(number, then)
352 }
353
354 fn if_arg0_is<F>(&mut self, value: u32, then: F) where F: FnMut(&mut Filter) {
355 self.program.push(EXAMINE_ARG_0);
356 self.if_k_is(value, then)
357 }
358
359 fn if_arg1_is<F>(&mut self, value: u32, then: F) where F: FnMut(&mut Filter) {
360 self.program.push(EXAMINE_ARG_1);
361 self.if_k_is(value, then)
362 }
363
364 fn if_arg1_hasnt_set<F>(&mut self, value: u32, then: F) where F: FnMut(&mut Filter) {
365 self.program.push(EXAMINE_ARG_1);
366 self.if_k_hasnt_set(value, then)
367 }
368
369 fn if_arg2_is<F>(&mut self, value: u32, then: F) where F: FnMut(&mut Filter) {
370 self.program.push(EXAMINE_ARG_2);
371 self.if_k_is(value, then)
372 }
373
374 fn if_k_is<F>(&mut self, value: u32, mut then: F) where F: FnMut(&mut Filter) {
375 let index = self.program.len();
376 self.program.push(sock_filter {
377 code: JMP + JEQ + K,
378 k: value,
379 jt: 0,
380 jf: 0,
381 });
382 then(self);
383 self.program[index].jf = (self.program.len() - index - 1) as u8;
384 }
385
386 fn if_k_hasnt_set<F>(&mut self, value: u32, mut then: F) where F: FnMut(&mut Filter) {
387 let index = self.program.len();
388 self.program.push(sock_filter {
389 code: JMP + JSET + K,
390 k: value,
391 jt: 0,
392 jf: 0,
393 });
394 then(self);
395 self.program[index].jt = (self.program.len() - index - 1) as u8;
396 }
397}
398
399#[repr(C)]
400#[derive(Copy, Clone)]
401struct sock_filter {
402 code: u16,
403 jt: u8,
404 jf: u8,
405 k: u32,
406}
407
408#[repr(C)]
409#[derive(Copy, Clone)]
410struct sock_fprog {
411 len: c_ushort,
412 filter: *const sock_filter,
413}