background_hang_monitor/
sampler_linux.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
4
5#![allow(unsafe_code)]
6
7use std::cell::UnsafeCell;
8use std::{io, mem, process, thread};
9
10use nix::sys::signal::{SaFlags, SigAction, SigHandler, SigSet, Signal, sigaction};
11
12use crate::sampler::{NativeStack, Sampler};
13
14struct UncheckedSyncUnsafeCell<T>(std::cell::UnsafeCell<T>);
15
16/// Safety: dereferencing the pointer from `UnsafeCell::get` must involve external synchronization
17unsafe impl<T> Sync for UncheckedSyncUnsafeCell<T> {}
18
19static SHARED_STATE: UncheckedSyncUnsafeCell<SharedState> =
20    UncheckedSyncUnsafeCell(std::cell::UnsafeCell::new(SharedState {
21        msg2: None,
22        msg3: None,
23        msg4: None,
24    }));
25
26type MonitoredThreadId = libc::pid_t;
27
28struct SharedState {
29    // "msg1" is the signal.
30    msg2: Option<PosixSemaphore>,
31    msg3: Option<PosixSemaphore>,
32    msg4: Option<PosixSemaphore>,
33}
34
35fn clear_shared_state() {
36    // Safety: this is only called from the sampling thread (there’s only one)
37    // Sampled threads only access SHARED_STATE in their signal handler.
38    // This signal and the semaphores in SHARED_STATE provide the necessary synchronization.
39    unsafe {
40        let shared_state = &mut *SHARED_STATE.0.get();
41        shared_state.msg2 = None;
42        shared_state.msg3 = None;
43        shared_state.msg4 = None;
44    }
45}
46
47fn reset_shared_state() {
48    // Safety: same as clear_shared_state
49    unsafe {
50        let shared_state = &mut *SHARED_STATE.0.get();
51        shared_state.msg2 = Some(PosixSemaphore::new(0).expect("valid semaphore"));
52        shared_state.msg3 = Some(PosixSemaphore::new(0).expect("valid semaphore"));
53        shared_state.msg4 = Some(PosixSemaphore::new(0).expect("valid semaphore"));
54    }
55}
56
57struct PosixSemaphore {
58    sem: UnsafeCell<libc::sem_t>,
59}
60
61impl PosixSemaphore {
62    pub fn new(value: u32) -> io::Result<Self> {
63        let mut sem = mem::MaybeUninit::uninit();
64        let r = unsafe {
65            libc::sem_init(sem.as_mut_ptr(), 0 /* not shared */, value)
66        };
67        if r == -1 {
68            return Err(io::Error::last_os_error());
69        }
70        Ok(PosixSemaphore {
71            sem: UnsafeCell::new(unsafe { sem.assume_init() }),
72        })
73    }
74
75    pub fn post(&self) -> io::Result<()> {
76        if unsafe { libc::sem_post(self.sem.get()) } == 0 {
77            Ok(())
78        } else {
79            Err(io::Error::last_os_error())
80        }
81    }
82
83    pub fn wait(&self) -> io::Result<()> {
84        if unsafe { libc::sem_wait(self.sem.get()) } == 0 {
85            Ok(())
86        } else {
87            Err(io::Error::last_os_error())
88        }
89    }
90
91    /// Retries the wait if it returned due to EINTR.
92    /// Returns Ok on success and the error on any other return value.
93    pub fn wait_through_intr(&self) -> io::Result<()> {
94        loop {
95            match self.wait() {
96                Err(os_error) => {
97                    let err = os_error.raw_os_error().expect("no os error");
98                    if err == libc::EINTR {
99                        thread::yield_now();
100                        continue;
101                    }
102                    return Err(os_error);
103                },
104                _ => return Ok(()),
105            }
106        }
107    }
108}
109
110unsafe impl Sync for PosixSemaphore {}
111
112impl Drop for PosixSemaphore {
113    /// Destroys the semaphore.
114    fn drop(&mut self) {
115        unsafe { libc::sem_destroy(self.sem.get()) };
116    }
117}
118
119#[allow(dead_code)]
120pub struct LinuxSampler {
121    thread_id: MonitoredThreadId,
122    old_handler: SigAction,
123}
124
125impl LinuxSampler {
126    #[allow(unsafe_code, dead_code)]
127    pub fn new_boxed() -> Box<dyn Sampler> {
128        let thread_id = unsafe { libc::syscall(libc::SYS_gettid) as libc::pid_t };
129        let handler = SigHandler::SigAction(sigprof_handler);
130        let action = SigAction::new(
131            handler,
132            SaFlags::SA_RESTART | SaFlags::SA_SIGINFO,
133            SigSet::empty(),
134        );
135        let old_handler =
136            unsafe { sigaction(Signal::SIGPROF, &action).expect("signal handler set") };
137        Box::new(LinuxSampler {
138            thread_id,
139            old_handler,
140        })
141    }
142}
143
144impl Sampler for LinuxSampler {
145    #[allow(unsafe_code)]
146    fn suspend_and_sample_thread(&self) -> Result<NativeStack, ()> {
147        // Warning: The "critical section" begins here.
148        // In the critical section:
149        // we must not do any dynamic memory allocation,
150        // nor try to acquire any lock
151        // or any other unshareable resource.
152        // first we reinitialize the semaphores
153        reset_shared_state();
154
155        // signal the thread, wait for it to tell us state was copied.
156        send_sigprof(self.thread_id);
157
158        // Safety: non-exclusive reference only
159        // since sampled threads are accessing this concurrently
160        let result;
161        {
162            let shared_state = unsafe { &*SHARED_STATE.0.get() };
163            shared_state
164                .msg2
165                .as_ref()
166                .unwrap()
167                .wait_through_intr()
168                .expect("msg2 failed");
169
170            let mut native_stack = NativeStack::new();
171            unsafe {
172                backtrace::trace_unsynchronized(|frame| {
173                    let ip = frame.ip();
174                    let sp = frame.sp();
175
176                    // This return value here determines whether we proceed to the next stack frame or not.
177                    native_stack.process_register(ip, sp).is_ok()
178                })
179            };
180            result = Ok(native_stack);
181
182            // signal the thread to continue.
183            shared_state
184                .msg3
185                .as_ref()
186                .unwrap()
187                .post()
188                .expect("msg3 failed");
189
190            // wait for thread to continue.
191            shared_state
192                .msg4
193                .as_ref()
194                .unwrap()
195                .wait_through_intr()
196                .expect("msg4 failed");
197        }
198
199        clear_shared_state();
200
201        // NOTE: End of "critical section".
202        result
203    }
204}
205
206impl Drop for LinuxSampler {
207    fn drop(&mut self) {
208        unsafe {
209            sigaction(Signal::SIGPROF, &self.old_handler).expect("previous signal handler restored")
210        };
211    }
212}
213
214extern "C" fn sigprof_handler(
215    sig: libc::c_int,
216    _info: *mut libc::siginfo_t,
217    _ctx: *mut libc::c_void,
218) {
219    assert_eq!(sig, libc::SIGPROF);
220
221    // Safety: non-exclusive reference only
222    // since the sampling thread is accessing this concurrently
223    let shared_state = unsafe { &*SHARED_STATE.0.get() };
224
225    // Tell the sampler we copied the context.
226    shared_state.msg2.as_ref().unwrap().post().expect("posted");
227
228    // Wait for sampling to finish.
229    shared_state
230        .msg3
231        .as_ref()
232        .unwrap()
233        .wait_through_intr()
234        .expect("msg3 wait succeeded");
235
236    // OK we are done!
237    shared_state.msg4.as_ref().unwrap().post().expect("posted");
238    // DO NOT TOUCH shared state here onwards.
239}
240
241fn send_sigprof(to: libc::pid_t) {
242    unsafe {
243        libc::syscall(libc::SYS_tgkill, process::id(), to, libc::SIGPROF);
244    }
245}