background_hang_monitor/
sampler_linux.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
4
5#![allow(unsafe_code)]
6
7use std::cell::UnsafeCell;
8use std::{io, mem, process, thread};
9
10use nix::sys::signal::{SaFlags, SigAction, SigHandler, SigSet, Signal, sigaction};
11
12use crate::sampler::{NativeStack, Sampler};
13
14struct UncheckedSyncUnsafeCell<T>(std::cell::UnsafeCell<T>);
15
16/// Safety: dereferencing the pointer from `UnsafeCell::get` must involve external synchronization
17unsafe impl<T> Sync for UncheckedSyncUnsafeCell<T> {}
18
19static SHARED_STATE: UncheckedSyncUnsafeCell<SharedState> =
20    UncheckedSyncUnsafeCell(std::cell::UnsafeCell::new(SharedState {
21        msg2: None,
22        msg3: None,
23        msg4: None,
24    }));
25
26type MonitoredThreadId = libc::pid_t;
27
28struct SharedState {
29    // "msg1" is the signal.
30    msg2: Option<PosixSemaphore>,
31    msg3: Option<PosixSemaphore>,
32    msg4: Option<PosixSemaphore>,
33}
34
35fn clear_shared_state() {
36    // Safety: this is only called from the sampling thread (there’s only one)
37    // Sampled threads only access SHARED_STATE in their signal handler.
38    // This signal and the semaphores in SHARED_STATE provide the necessary synchronization.
39    unsafe {
40        let shared_state = &mut *SHARED_STATE.0.get();
41        shared_state.msg2 = None;
42        shared_state.msg3 = None;
43        shared_state.msg4 = None;
44    }
45}
46
47fn reset_shared_state() {
48    // Safety: same as clear_shared_state
49    unsafe {
50        let shared_state = &mut *SHARED_STATE.0.get();
51        shared_state.msg2 = Some(PosixSemaphore::new(0).expect("valid semaphore"));
52        shared_state.msg3 = Some(PosixSemaphore::new(0).expect("valid semaphore"));
53        shared_state.msg4 = Some(PosixSemaphore::new(0).expect("valid semaphore"));
54    }
55}
56
57struct PosixSemaphore {
58    sem: UnsafeCell<libc::sem_t>,
59}
60
61impl PosixSemaphore {
62    pub fn new(value: u32) -> io::Result<Self> {
63        let mut sem = mem::MaybeUninit::uninit();
64        let r = unsafe {
65            libc::sem_init(sem.as_mut_ptr(), 0 /* not shared */, value)
66        };
67        if r == -1 {
68            return Err(io::Error::last_os_error());
69        }
70        Ok(PosixSemaphore {
71            sem: UnsafeCell::new(unsafe { sem.assume_init() }),
72        })
73    }
74
75    pub fn post(&self) -> io::Result<()> {
76        if unsafe { libc::sem_post(self.sem.get()) } == 0 {
77            Ok(())
78        } else {
79            Err(io::Error::last_os_error())
80        }
81    }
82
83    pub fn wait(&self) -> io::Result<()> {
84        if unsafe { libc::sem_wait(self.sem.get()) } == 0 {
85            Ok(())
86        } else {
87            Err(io::Error::last_os_error())
88        }
89    }
90
91    /// Retries the wait if it returned due to EINTR.
92    /// Returns Ok on success and the error on any other return value.
93    pub fn wait_through_intr(&self) -> io::Result<()> {
94        loop {
95            match self.wait() {
96                Err(os_error) => {
97                    let err = os_error.raw_os_error().expect("no os error");
98                    if err == libc::EINTR {
99                        thread::yield_now();
100                        continue;
101                    }
102                    return Err(os_error);
103                },
104                _ => return Ok(()),
105            }
106        }
107    }
108}
109
110unsafe impl Sync for PosixSemaphore {}
111
112impl Drop for PosixSemaphore {
113    /// Destroys the semaphore.
114    fn drop(&mut self) {
115        unsafe { libc::sem_destroy(self.sem.get()) };
116    }
117}
118
119pub struct LinuxSampler {
120    thread_id: MonitoredThreadId,
121    old_handler: SigAction,
122}
123
124impl LinuxSampler {
125    #[allow(unsafe_code, dead_code)]
126    pub fn new_boxed() -> Box<dyn Sampler> {
127        let thread_id = unsafe { libc::syscall(libc::SYS_gettid) as libc::pid_t };
128        let handler = SigHandler::SigAction(sigprof_handler);
129        let action = SigAction::new(
130            handler,
131            SaFlags::SA_RESTART | SaFlags::SA_SIGINFO,
132            SigSet::empty(),
133        );
134        let old_handler =
135            unsafe { sigaction(Signal::SIGPROF, &action).expect("signal handler set") };
136        Box::new(LinuxSampler {
137            thread_id,
138            old_handler,
139        })
140    }
141}
142
143impl Sampler for LinuxSampler {
144    #[expect(unsafe_code)]
145    fn suspend_and_sample_thread(&self) -> Result<NativeStack, ()> {
146        // Warning: The "critical section" begins here.
147        // In the critical section:
148        // we must not do any dynamic memory allocation,
149        // nor try to acquire any lock
150        // or any other unshareable resource.
151        // first we reinitialize the semaphores
152        reset_shared_state();
153
154        // signal the thread, wait for it to tell us state was copied.
155        send_sigprof(self.thread_id);
156
157        // Safety: non-exclusive reference only
158        // since sampled threads are accessing this concurrently
159        let result;
160        {
161            let shared_state = unsafe { &*SHARED_STATE.0.get() };
162            shared_state
163                .msg2
164                .as_ref()
165                .unwrap()
166                .wait_through_intr()
167                .expect("msg2 failed");
168
169            let mut native_stack = NativeStack::default();
170            unsafe {
171                backtrace::trace_unsynchronized(|frame| {
172                    let ip = frame.ip();
173                    let sp = frame.sp();
174
175                    // This return value here determines whether we proceed to the next stack frame or not.
176                    native_stack.process_register(ip, sp).is_ok()
177                })
178            };
179            result = Ok(native_stack);
180
181            // signal the thread to continue.
182            shared_state
183                .msg3
184                .as_ref()
185                .unwrap()
186                .post()
187                .expect("msg3 failed");
188
189            // wait for thread to continue.
190            shared_state
191                .msg4
192                .as_ref()
193                .unwrap()
194                .wait_through_intr()
195                .expect("msg4 failed");
196        }
197
198        clear_shared_state();
199
200        // NOTE: End of "critical section".
201        result
202    }
203}
204
205impl Drop for LinuxSampler {
206    fn drop(&mut self) {
207        unsafe {
208            sigaction(Signal::SIGPROF, &self.old_handler).expect("previous signal handler restored")
209        };
210    }
211}
212
213extern "C" fn sigprof_handler(
214    sig: libc::c_int,
215    _info: *mut libc::siginfo_t,
216    _ctx: *mut libc::c_void,
217) {
218    assert_eq!(sig, libc::SIGPROF);
219
220    // Safety: non-exclusive reference only
221    // since the sampling thread is accessing this concurrently
222    let shared_state = unsafe { &*SHARED_STATE.0.get() };
223
224    // Tell the sampler we copied the context.
225    shared_state.msg2.as_ref().unwrap().post().expect("posted");
226
227    // Wait for sampling to finish.
228    shared_state
229        .msg3
230        .as_ref()
231        .unwrap()
232        .wait_through_intr()
233        .expect("msg3 wait succeeded");
234
235    // OK we are done!
236    shared_state.msg4.as_ref().unwrap().post().expect("posted");
237    // DO NOT TOUCH shared state here onwards.
238}
239
240fn send_sigprof(to: libc::pid_t) {
241    unsafe {
242        libc::syscall(libc::SYS_tgkill, process::id(), to, libc::SIGPROF);
243    }
244}