1use gleam::gl;
6use std::mem;
7use std::rc::Rc;
8
9use crate::device::GpuFrameId;
10use crate::profiler::GpuProfileTag;
11
12#[derive(Copy, Clone, Debug)]
13pub enum GpuDebugMethod {
14 None,
15 MarkerEXT,
16 KHR,
17}
18
19#[derive(Debug, Clone)]
20pub struct GpuTimer {
21 pub tag: GpuProfileTag,
22 pub time_ns: u64,
23}
24
25#[derive(Debug, Clone)]
26pub struct GpuSampler {
27 pub tag: GpuProfileTag,
28 pub count: u64,
29}
30
31pub struct QuerySet<T> {
32 set: Vec<gl::GLuint>,
33 data: Vec<T>,
34 pending: gl::GLuint,
35}
36
37impl<T> QuerySet<T> {
38 fn new() -> Self {
39 QuerySet {
40 set: Vec::new(),
41 data: Vec::new(),
42 pending: 0,
43 }
44 }
45
46 fn reset(&mut self) {
47 self.data.clear();
48 self.pending = 0;
49 }
50
51 fn add(&mut self, value: T) -> Option<gl::GLuint> {
52 assert_eq!(self.pending, 0);
53 self.set.get(self.data.len()).cloned().map(|query_id| {
54 self.data.push(value);
55 self.pending = query_id;
56 query_id
57 })
58 }
59
60 fn take<F: Fn(&mut T, gl::GLuint)>(&mut self, fun: F) -> Vec<T> {
61 let mut data = mem::replace(&mut self.data, Vec::new());
62 for (value, &query) in data.iter_mut().zip(self.set.iter()) {
63 fun(value, query)
64 }
65 data
66 }
67}
68
69pub struct GpuFrameProfile {
70 gl: Rc<dyn gl::Gl>,
71 timers: QuerySet<GpuTimer>,
72 samplers: QuerySet<GpuSampler>,
73 frame_id: GpuFrameId,
74 inside_frame: bool,
75 debug_method: GpuDebugMethod,
76}
77
78impl GpuFrameProfile {
79 fn new(gl: Rc<dyn gl::Gl>, debug_method: GpuDebugMethod) -> Self {
80 GpuFrameProfile {
81 gl,
82 timers: QuerySet::new(),
83 samplers: QuerySet::new(),
84 frame_id: GpuFrameId::new(0),
85 inside_frame: false,
86 debug_method
87 }
88 }
89
90 fn enable_timers(&mut self, count: i32) {
91 self.timers.set = self.gl.gen_queries(count);
92 }
93
94 fn disable_timers(&mut self) {
95 if !self.timers.set.is_empty() {
96 self.gl.delete_queries(&self.timers.set);
97 }
98 self.timers.set = Vec::new();
99 }
100
101 fn enable_samplers(&mut self, count: i32) {
102 self.samplers.set = self.gl.gen_queries(count);
103 }
104
105 fn disable_samplers(&mut self) {
106 if !self.samplers.set.is_empty() {
107 self.gl.delete_queries(&self.samplers.set);
108 }
109 self.samplers.set = Vec::new();
110 }
111
112 fn begin_frame(&mut self, frame_id: GpuFrameId) {
113 self.frame_id = frame_id;
114 self.timers.reset();
115 self.samplers.reset();
116 self.inside_frame = true;
117 }
118
119 fn end_frame(&mut self) {
120 self.finish_timer();
121 self.finish_sampler();
122 self.inside_frame = false;
123 }
124
125 fn finish_timer(&mut self) {
126 debug_assert!(self.inside_frame);
127 if self.timers.pending != 0 {
128 self.gl.end_query(gl::TIME_ELAPSED);
129 self.timers.pending = 0;
130 }
131 }
132
133 fn finish_sampler(&mut self) {
134 debug_assert!(self.inside_frame);
135 if self.samplers.pending != 0 {
136 self.gl.end_query(gl::SAMPLES_PASSED);
137 self.samplers.pending = 0;
138 }
139 }
140
141 fn start_timer(&mut self, tag: GpuProfileTag) -> GpuTimeQuery {
142 self.finish_timer();
143
144 let marker = GpuMarker::new(&self.gl, tag.label, self.debug_method);
145
146 if let Some(query) = self.timers.add(GpuTimer { tag, time_ns: 0 }) {
147 self.gl.begin_query(gl::TIME_ELAPSED, query);
148 }
149
150 GpuTimeQuery(marker)
151 }
152
153 fn start_sampler(&mut self, tag: GpuProfileTag) -> GpuSampleQuery {
154 self.finish_sampler();
155
156 if let Some(query) = self.samplers.add(GpuSampler { tag, count: 0 }) {
157 self.gl.begin_query(gl::SAMPLES_PASSED, query);
158 }
159
160 GpuSampleQuery
161 }
162
163 fn build_samples(&mut self) -> (GpuFrameId, Vec<GpuTimer>, Vec<GpuSampler>) {
164 debug_assert!(!self.inside_frame);
165 let gl = &self.gl;
166
167 (
168 self.frame_id,
169 self.timers.take(|timer, query| {
170 timer.time_ns = gl.get_query_object_ui64v(query, gl::QUERY_RESULT)
171 }),
172 self.samplers.take(|sampler, query| {
173 sampler.count = gl.get_query_object_ui64v(query, gl::QUERY_RESULT)
174 }),
175 )
176 }
177}
178
179impl Drop for GpuFrameProfile {
180 fn drop(&mut self) {
181 self.disable_timers();
182 self.disable_samplers();
183 }
184}
185
186const NUM_PROFILE_FRAMES: usize = 4;
187
188pub struct GpuProfiler {
189 gl: Rc<dyn gl::Gl>,
190 frames: [GpuFrameProfile; NUM_PROFILE_FRAMES],
191 next_frame: usize,
192 debug_method: GpuDebugMethod
193}
194
195impl GpuProfiler {
196 pub fn new(gl: Rc<dyn gl::Gl>, debug_method: GpuDebugMethod) -> Self {
197 let f = || GpuFrameProfile::new(Rc::clone(&gl), debug_method);
198
199 let frames = [f(), f(), f(), f()];
200 GpuProfiler {
201 gl,
202 next_frame: 0,
203 frames,
204 debug_method
205 }
206 }
207
208 pub fn enable_timers(&mut self) {
209 const MAX_TIMERS_PER_FRAME: i32 = 256;
210
211 for frame in &mut self.frames {
212 frame.enable_timers(MAX_TIMERS_PER_FRAME);
213 }
214 }
215
216 pub fn disable_timers(&mut self) {
217 for frame in &mut self.frames {
218 frame.disable_timers();
219 }
220 }
221
222 pub fn enable_samplers(&mut self) {
223 const MAX_SAMPLERS_PER_FRAME: i32 = 16;
224 if cfg!(target_os = "macos") {
225 warn!("Expect macOS driver bugs related to sample queries")
226 }
227
228 for frame in &mut self.frames {
229 frame.enable_samplers(MAX_SAMPLERS_PER_FRAME);
230 }
231 }
232
233 pub fn disable_samplers(&mut self) {
234 for frame in &mut self.frames {
235 frame.disable_samplers();
236 }
237 }
238
239 pub fn build_samples(&mut self) -> (GpuFrameId, Vec<GpuTimer>, Vec<GpuSampler>) {
240 self.frames[self.next_frame].build_samples()
241 }
242
243 pub fn begin_frame(&mut self, frame_id: GpuFrameId) {
244 self.frames[self.next_frame].begin_frame(frame_id);
245 }
246
247 pub fn end_frame(&mut self) {
248 self.frames[self.next_frame].end_frame();
249 self.next_frame = (self.next_frame + 1) % self.frames.len();
250 }
251
252 pub fn start_timer(&mut self, tag: GpuProfileTag) -> GpuTimeQuery {
253 self.frames[self.next_frame].start_timer(tag)
254 }
255
256 pub fn start_sampler(&mut self, tag: GpuProfileTag) -> GpuSampleQuery {
257 self.frames[self.next_frame].start_sampler(tag)
258 }
259
260 pub fn finish_sampler(&mut self, _sampler: GpuSampleQuery) {
261 self.frames[self.next_frame].finish_sampler()
262 }
263
264 pub fn start_marker(&mut self, label: &str) -> GpuMarker {
265 GpuMarker::new(&self.gl, label, self.debug_method)
266 }
267
268 pub fn place_marker(&mut self, label: &str) {
269 GpuMarker::fire(&self.gl, label, self.debug_method)
270 }
271}
272
273#[must_use]
274pub struct GpuMarker {
275 gl: Option<(Rc<dyn gl::Gl>, GpuDebugMethod)>,
276}
277
278impl GpuMarker {
279 fn new(gl: &Rc<dyn gl::Gl>, message: &str, debug_method: GpuDebugMethod) -> Self {
280 let gl = match debug_method {
281 GpuDebugMethod::KHR => {
282 gl.push_debug_group_khr(gl::DEBUG_SOURCE_APPLICATION, 0, message);
283 Some((Rc::clone(gl), debug_method))
284 },
285 GpuDebugMethod::MarkerEXT => {
286 gl.push_group_marker_ext(message);
287 Some((Rc::clone(gl), debug_method))
288 },
289 GpuDebugMethod::None => None,
290 };
291 GpuMarker { gl }
292 }
293
294 fn fire(gl: &Rc<dyn gl::Gl>, message: &str, debug_method: GpuDebugMethod) {
295 match debug_method {
296 GpuDebugMethod::KHR => gl.debug_message_insert_khr(gl::DEBUG_SOURCE_APPLICATION, gl::DEBUG_TYPE_MARKER, 0, gl::DEBUG_SEVERITY_NOTIFICATION, message),
297 GpuDebugMethod::MarkerEXT => gl.insert_event_marker_ext(message),
298 GpuDebugMethod::None => {}
299 };
300 }
301}
302
303impl Drop for GpuMarker {
304 fn drop(&mut self) {
305 if let Some((ref gl, debug_method)) = self.gl {
306 match debug_method {
307 GpuDebugMethod::KHR => gl.pop_debug_group_khr(),
308 GpuDebugMethod::MarkerEXT => gl.pop_group_marker_ext(),
309 GpuDebugMethod::None => {}
310 };
311 }
312 }
313}
314
315#[must_use]
316pub struct GpuTimeQuery(#[allow(dead_code)] GpuMarker);
317#[must_use]
318pub struct GpuSampleQuery;