1use std::collections::VecDeque;
6use std::thread::{self, Builder, JoinHandle};
7use std::time::{Duration, Instant};
8
9use background_hang_monitor_api::{
10 BackgroundHangMonitor, BackgroundHangMonitorClone, BackgroundHangMonitorControlMsg,
11 BackgroundHangMonitorExitSignal, BackgroundHangMonitorRegister, HangAlert, HangAnnotation,
12 HangMonitorAlert, MonitoredComponentId,
13};
14use base::generic_channel::{GenericReceiver, GenericSender, RoutedReceiver};
15use crossbeam_channel::{Receiver, Sender, after, never, select, unbounded};
16use log::{error, warn};
17use rustc_hash::FxHashMap;
18
19use crate::SamplerImpl;
20use crate::sampler::{NativeStack, Sampler};
21
22#[derive(Clone)]
23pub struct HangMonitorRegister {
24 sender: MonitoredComponentSender,
25 monitoring_enabled: bool,
26}
27
28impl HangMonitorRegister {
29 pub fn init(
32 constellation_chan: GenericSender<HangMonitorAlert>,
33 control_port: GenericReceiver<BackgroundHangMonitorControlMsg>,
34 monitoring_enabled: bool,
35 ) -> (Box<dyn BackgroundHangMonitorRegister>, JoinHandle<()>) {
36 let (sender, port) = unbounded();
37
38 let join_handle = Builder::new()
39 .name("BackgroundHangMonitor".to_owned())
40 .spawn(move || {
41 let mut monitor = BackgroundHangMonitorWorker::new(
42 constellation_chan,
43 control_port,
44 port,
45 monitoring_enabled,
46 );
47 while monitor.run() {
48 }
50 })
51 .expect("Couldn't start BHM worker.");
52 (
53 Box::new(HangMonitorRegister {
54 sender,
55 monitoring_enabled,
56 }),
57 join_handle,
58 )
59 }
60}
61
62impl BackgroundHangMonitorRegister for HangMonitorRegister {
63 fn register_component(
67 &self,
68 component_id: MonitoredComponentId,
69 transient_hang_timeout: Duration,
70 permanent_hang_timeout: Duration,
71 exit_signal: Box<dyn BackgroundHangMonitorExitSignal>,
72 ) -> Box<dyn BackgroundHangMonitor> {
73 let bhm_chan = BackgroundHangMonitorChan::new(
74 self.sender.clone(),
75 component_id,
76 self.monitoring_enabled,
77 );
78
79 bhm_chan.send(MonitoredComponentMsg::Register(
80 SamplerImpl::default(),
81 thread::current().name().map(str::to_owned),
82 transient_hang_timeout,
83 permanent_hang_timeout,
84 exit_signal,
85 ));
86 Box::new(bhm_chan)
87 }
88}
89
90impl BackgroundHangMonitorClone for HangMonitorRegister {
91 fn clone_box(&self) -> Box<dyn BackgroundHangMonitorRegister> {
92 Box::new(self.clone())
93 }
94}
95
96enum MonitoredComponentMsg {
98 Register(
100 SamplerImpl,
101 Option<String>,
102 Duration,
103 Duration,
104 Box<dyn BackgroundHangMonitorExitSignal>,
105 ),
106 Unregister,
108 NotifyActivity(HangAnnotation),
110 NotifyWait,
112}
113
114struct BackgroundHangMonitorChan {
118 sender: MonitoredComponentSender,
119 component_id: MonitoredComponentId,
120 monitoring_enabled: bool,
121}
122
123impl BackgroundHangMonitorChan {
124 fn new(
125 sender: MonitoredComponentSender,
126 component_id: MonitoredComponentId,
127 monitoring_enabled: bool,
128 ) -> Self {
129 BackgroundHangMonitorChan {
130 sender,
131 component_id,
132 monitoring_enabled,
133 }
134 }
135
136 fn send(&self, msg: MonitoredComponentMsg) {
137 self.sender
138 .send((self.component_id.clone(), msg))
139 .expect("BHM is gone");
140 }
141}
142
143impl BackgroundHangMonitor for BackgroundHangMonitorChan {
144 fn notify_activity(&self, annotation: HangAnnotation) {
145 if self.monitoring_enabled {
146 let msg = MonitoredComponentMsg::NotifyActivity(annotation);
147 self.send(msg);
148 }
149 }
150 fn notify_wait(&self) {
151 if self.monitoring_enabled {
152 let msg = MonitoredComponentMsg::NotifyWait;
153 self.send(msg);
154 }
155 }
156 fn unregister(&self) {
157 let msg = MonitoredComponentMsg::Unregister;
158 self.send(msg);
159 }
160}
161
162struct MonitoredComponent {
163 sampler: SamplerImpl,
164 last_activity: Instant,
165 last_annotation: Option<HangAnnotation>,
166 transient_hang_timeout: Duration,
167 permanent_hang_timeout: Duration,
168 sent_transient_alert: bool,
169 sent_permanent_alert: bool,
170 is_waiting: bool,
171 exit_signal: Box<dyn BackgroundHangMonitorExitSignal>,
172}
173
174struct Sample(MonitoredComponentId, Instant, NativeStack);
175
176struct BackgroundHangMonitorWorker {
177 component_names: FxHashMap<MonitoredComponentId, String>,
178 monitored_components: FxHashMap<MonitoredComponentId, MonitoredComponent>,
179 constellation_chan: GenericSender<HangMonitorAlert>,
180 port: Receiver<(MonitoredComponentId, MonitoredComponentMsg)>,
181 control_port: Option<RoutedReceiver<BackgroundHangMonitorControlMsg>>,
182 sampling_duration: Option<Duration>,
183 sampling_max_duration: Option<Duration>,
184 last_sample: Instant,
185 creation: Instant,
186 sampling_baseline: Instant,
187 samples: VecDeque<Sample>,
188 monitoring_enabled: bool,
189 shutting_down: bool,
190}
191
192type MonitoredComponentSender = Sender<(MonitoredComponentId, MonitoredComponentMsg)>;
193type MonitoredComponentReceiver = Receiver<(MonitoredComponentId, MonitoredComponentMsg)>;
194
195impl BackgroundHangMonitorWorker {
196 fn new(
197 constellation_chan: GenericSender<HangMonitorAlert>,
198 control_port: GenericReceiver<BackgroundHangMonitorControlMsg>,
199 port: MonitoredComponentReceiver,
200 monitoring_enabled: bool,
201 ) -> Self {
202 let control_port = control_port.route_preserving_errors();
203 Self {
204 component_names: Default::default(),
205 monitored_components: Default::default(),
206 constellation_chan,
207 port,
208 control_port: Some(control_port),
209 sampling_duration: None,
210 sampling_max_duration: None,
211 last_sample: Instant::now(),
212 sampling_baseline: Instant::now(),
213 creation: Instant::now(),
214 samples: Default::default(),
215 monitoring_enabled,
216 shutting_down: Default::default(),
217 }
218 }
219
220 fn finish_sampled_profile(&mut self) {
221 let mut bytes = vec![];
222 bytes.extend(
223 format!(
224 "{{ \"rate\": {}, \"start\": {}, \"data\": [\n",
225 self.sampling_duration.unwrap().as_millis(),
226 (self.sampling_baseline - self.creation).as_millis(),
227 )
228 .as_bytes(),
229 );
230
231 let mut first = true;
232 let to_resolve = self.samples.len();
233 for (i, Sample(id, instant, stack)) in self.samples.drain(..).enumerate() {
234 println!("Resolving {}/{}", i + 1, to_resolve);
235 let profile = stack.to_hangprofile();
236 let name = match self.component_names.get(&id) {
237 Some(ref s) => format!("\"{}\"", s),
238 None => "null".to_string(),
239 };
240 let json = format!(
241 "{}{{ \"name\": {}, \"event loop id\": \"{:?}\", \
242 \"time\": {}, \"frames\": {} }}",
243 if !first { ",\n" } else { "" },
244 name,
245 id,
246 (instant - self.sampling_baseline).as_millis(),
247 serde_json::to_string(&profile.backtrace).unwrap(),
248 );
249 bytes.extend(json.as_bytes());
250 first = false;
251 }
252
253 bytes.extend(b"\n] }");
254 let _ = self
255 .constellation_chan
256 .send(HangMonitorAlert::Profile(bytes));
257 }
258
259 fn run(&mut self) -> bool {
260 let tick = if let Some(duration) = self.sampling_duration {
261 let duration = duration
262 .checked_sub(Instant::now() - self.last_sample)
263 .unwrap_or_else(|| Duration::from_millis(0));
264 after(duration)
265 } else if self.monitoring_enabled {
266 after(Duration::from_millis(100))
267 } else {
268 never()
269 };
270
271 enum BhmMessage {
274 ComponentMessage((MonitoredComponentId, MonitoredComponentMsg)),
275 ToggleSampler(Duration, Duration),
276 Exit,
277 ControlError(String),
278 ControlDisconnected,
279 Tick,
280 PortDisconnected,
281 }
282
283 let result = if let Some(ref control_port) = self.control_port {
284 select! {
285 recv(self.port) -> event => {
286 match event {
287 Ok(msg) => BhmMessage::ComponentMessage(msg),
288 Err(_) => BhmMessage::PortDisconnected,
289 }
290 },
291 recv(control_port) -> event => {
292 match event {
293 Ok(Ok(BackgroundHangMonitorControlMsg::ToggleSampler(rate, max_duration))) => {
294 BhmMessage::ToggleSampler(rate, max_duration)
295 },
296 Ok(Ok(BackgroundHangMonitorControlMsg::Exit)) => BhmMessage::Exit,
297 Ok(Err(e)) => BhmMessage::ControlError(format!("{e:?}")),
298 Err(_) => BhmMessage::ControlDisconnected,
299 }
300 }
301 recv(tick) -> _ => BhmMessage::Tick,
302 }
303 } else {
304 select! {
306 recv(self.port) -> event => {
307 match event {
308 Ok(msg) => BhmMessage::ComponentMessage(msg),
309 Err(_) => BhmMessage::PortDisconnected,
310 }
311 },
312 recv(tick) -> _ => BhmMessage::Tick,
313 }
314 };
315
316 match result {
317 BhmMessage::PortDisconnected => {
318 return false;
322 },
323 BhmMessage::ToggleSampler(rate, max_duration) => {
324 if self.sampling_duration.is_some() {
325 println!("Enabling profiler.");
326 self.finish_sampled_profile();
327 self.sampling_duration = None;
328 } else {
329 println!("Disabling profiler.");
330 self.sampling_duration = Some(rate);
331 self.sampling_max_duration = Some(max_duration);
332 self.sampling_baseline = Instant::now();
333 }
334 },
335 BhmMessage::Exit => {
336 for component in self.monitored_components.values_mut() {
337 component.exit_signal.signal_to_exit();
338 }
339
340 self.shutting_down = true;
344 },
345 BhmMessage::ControlError(e) => {
346 warn!("BackgroundHangMonitorWorker control message deserialization error: {e}");
347 },
348 BhmMessage::ControlDisconnected => {
349 if !self.shutting_down {
355 error!("BHM control disconnected before shutting down!");
356 for component in self.monitored_components.values_mut() {
357 component.exit_signal.signal_to_exit();
358 }
359 self.shutting_down = true;
360 }
361 self.control_port = None;
362 },
363 BhmMessage::ComponentMessage(msg) => {
364 self.handle_msg(msg);
365 while let Ok(another_msg) = self.port.try_recv() {
366 self.handle_msg(another_msg);
368 }
369 },
370 BhmMessage::Tick => {
371 },
373 }
374
375 if let Some(duration) = self.sampling_duration {
376 let now = Instant::now();
377 if now - self.last_sample > duration {
378 self.sample();
379 self.last_sample = now;
380 }
381 } else {
382 self.perform_a_hang_monitor_checkpoint();
383 }
384 true
385 }
386
387 fn handle_msg(&mut self, msg: (MonitoredComponentId, MonitoredComponentMsg)) {
388 match msg {
389 (
390 component_id,
391 MonitoredComponentMsg::Register(
392 sampler,
393 name,
394 transient_hang_timeout,
395 permanent_hang_timeout,
396 exit_signal,
397 ),
398 ) => {
399 if self.shutting_down {
404 exit_signal.signal_to_exit();
405 }
406
407 let component = MonitoredComponent {
408 sampler,
409 last_activity: Instant::now(),
410 last_annotation: None,
411 transient_hang_timeout,
412 permanent_hang_timeout,
413 sent_transient_alert: false,
414 sent_permanent_alert: false,
415 is_waiting: true,
416 exit_signal,
417 };
418 if let Some(name) = name {
419 self.component_names.insert(component_id.clone(), name);
420 }
421 assert!(
422 self.monitored_components
423 .insert(component_id, component)
424 .is_none(),
425 "This component was already registered for monitoring."
426 );
427 },
428 (component_id, MonitoredComponentMsg::Unregister) => {
429 self.monitored_components
430 .remove_entry(&component_id)
431 .expect("Received Unregister for an unknown component");
432 },
433 (component_id, MonitoredComponentMsg::NotifyActivity(annotation)) => {
434 let component = self
435 .monitored_components
436 .get_mut(&component_id)
437 .expect("Received NotifyActivity for an unknown component");
438 component.last_activity = Instant::now();
439 component.last_annotation = Some(annotation);
440 component.sent_transient_alert = false;
441 component.sent_permanent_alert = false;
442 component.is_waiting = false;
443 },
444 (component_id, MonitoredComponentMsg::NotifyWait) => {
445 let component = self
446 .monitored_components
447 .get_mut(&component_id)
448 .expect("Received NotifyWait for an unknown component");
449 component.last_activity = Instant::now();
450 component.sent_transient_alert = false;
451 component.sent_permanent_alert = false;
452 component.is_waiting = true;
453 },
454 }
455 }
456
457 fn perform_a_hang_monitor_checkpoint(&mut self) {
458 for (component_id, monitored) in self.monitored_components.iter_mut() {
459 if monitored.is_waiting {
460 continue;
461 }
462 let last_annotation = monitored.last_annotation.unwrap();
463 if monitored.last_activity.elapsed() > monitored.permanent_hang_timeout {
464 if monitored.sent_permanent_alert {
465 continue;
466 }
467 let profile = match monitored.sampler.suspend_and_sample_thread() {
468 Ok(native_stack) => Some(native_stack.to_hangprofile()),
469 Err(()) => None,
470 };
471 let _ = self
472 .constellation_chan
473 .send(HangMonitorAlert::Hang(HangAlert::Permanent(
474 component_id.clone(),
475 last_annotation,
476 profile,
477 )));
478 monitored.sent_permanent_alert = true;
479 continue;
480 }
481 if monitored.last_activity.elapsed() > monitored.transient_hang_timeout {
482 if monitored.sent_transient_alert {
483 continue;
484 }
485 let _ = self
486 .constellation_chan
487 .send(HangMonitorAlert::Hang(HangAlert::Transient(
488 component_id.clone(),
489 last_annotation,
490 )));
491 monitored.sent_transient_alert = true;
492 }
493 }
494 }
495
496 fn sample(&mut self) {
497 for (component_id, monitored) in self.monitored_components.iter_mut() {
498 let instant = Instant::now();
499 if let Ok(stack) = monitored.sampler.suspend_and_sample_thread() {
500 if self.sampling_baseline.elapsed() >
501 self.sampling_max_duration
502 .expect("Max duration has been set")
503 {
504 self.samples.pop_front();
506 }
507 self.samples
508 .push_back(Sample(component_id.clone(), instant, stack));
509 }
510 }
511 }
512}