1use std::{cmp, mem};
6use api::units::*;
7use malloc_size_of::MallocSizeOfOps;
8use crate::{
9 device::{CustomVAO, Device, DrawTarget, Program, ReadTarget, Texture, TextureFilter, UploadPBOPool, VBO},
10 gpu_cache::{GpuBlockData, GpuCacheUpdate, GpuCacheUpdateList},
11 internal_types::{FrameId, RenderTargetInfo, Swizzle},
12 prim_store::DeferredResolve,
13 profiler,
14 render_api::MemoryReport,
15};
16
17const GPU_CACHE_RESIZE_TEST: bool = false;
21
22struct CacheRow {
24 cpu_blocks: Box<[GpuBlockData; super::MAX_VERTEX_TEXTURE_WIDTH]>,
27 min_dirty: u16,
29 max_dirty: u16,
31}
32
33impl CacheRow {
34 fn new() -> Self {
35 CacheRow {
36 cpu_blocks: Box::new([GpuBlockData::EMPTY; super::MAX_VERTEX_TEXTURE_WIDTH]),
37 min_dirty: super::MAX_VERTEX_TEXTURE_WIDTH as _,
38 max_dirty: 0,
39 }
40 }
41
42 fn is_dirty(&self) -> bool {
43 return self.min_dirty < self.max_dirty;
44 }
45
46 fn clear_dirty(&mut self) {
47 self.min_dirty = super::MAX_VERTEX_TEXTURE_WIDTH as _;
48 self.max_dirty = 0;
49 }
50
51 fn add_dirty(&mut self, block_offset: usize, block_count: usize) {
52 self.min_dirty = self.min_dirty.min(block_offset as _);
53 self.max_dirty = self.max_dirty.max((block_offset + block_count) as _);
54 }
55
56 fn dirty_blocks(&self) -> &[GpuBlockData] {
57 return &self.cpu_blocks[self.min_dirty as usize .. self.max_dirty as usize];
58 }
59}
60
61enum GpuCacheBus {
64 PixelBuffer {
67 rows: Vec<CacheRow>,
69 },
70 Scatter {
73 program: Program,
75 vao: CustomVAO,
77 buf_position: VBO<[u16; 2]>,
79 buf_value: VBO<GpuBlockData>,
81 count: usize,
83 },
84}
85
86pub struct GpuCacheTexture {
88 texture: Option<Texture>,
89 bus: GpuCacheBus,
90}
91
92impl GpuCacheTexture {
93 fn ensure_texture(&mut self, device: &mut Device, height: i32) {
95 if self.texture.as_ref().map_or(false, |t| t.get_dimensions().height >= height) {
97 if GPU_CACHE_RESIZE_TEST {
98 } else {
100 return;
101 }
102 }
103
104 let blit_source = self.texture.take();
106
107 assert!(height >= 2, "Height is too small for ANGLE");
109 let new_size = DeviceIntSize::new(super::MAX_VERTEX_TEXTURE_WIDTH as _, height);
110 let supports_copy_image_sub_data = device.get_capabilities().supports_copy_image_sub_data;
117 let supports_color_buffer_float = device.get_capabilities().supports_color_buffer_float;
118 let rt_info = if matches!(self.bus, GpuCacheBus::PixelBuffer { .. })
119 && (supports_copy_image_sub_data || !supports_color_buffer_float)
120 {
121 None
122 } else {
123 Some(RenderTargetInfo { has_depth: false })
124 };
125 let mut texture = device.create_texture(
126 api::ImageBufferKind::Texture2D,
127 api::ImageFormat::RGBAF32,
128 new_size.width,
129 new_size.height,
130 TextureFilter::Nearest,
131 rt_info,
132 );
133
134 if let Some(blit_source) = blit_source {
136 if !supports_copy_image_sub_data && !supports_color_buffer_float {
137 match self.bus {
139 GpuCacheBus::PixelBuffer { ref mut rows } => {
140 for row in rows {
141 row.add_dirty(0, super::MAX_VERTEX_TEXTURE_WIDTH);
142 }
143 }
144 GpuCacheBus::Scatter { .. } => {
145 panic!("Texture must be copyable to use scatter GPU cache bus method");
146 }
147 }
148 } else {
149 device.copy_entire_texture(&mut texture, &blit_source);
150 }
151 device.delete_texture(blit_source);
152 }
153
154 self.texture = Some(texture);
155 }
156
157 pub fn new(device: &mut Device, use_scatter: bool) -> Result<Self, super::RendererError> {
158 use super::desc::GPU_CACHE_UPDATE;
159
160 let bus = if use_scatter {
161 assert!(
162 device.get_capabilities().supports_color_buffer_float,
163 "GpuCache scatter method requires EXT_color_buffer_float",
164 );
165 let program = device.create_program_linked(
166 "gpu_cache_update",
167 &[],
168 &GPU_CACHE_UPDATE,
169 )?;
170 let buf_position = device.create_vbo();
171 let buf_value = device.create_vbo();
172 let vao = device.create_custom_vao(&[
175 buf_position.stream_with(&GPU_CACHE_UPDATE.vertex_attributes[0..1]),
176 buf_value .stream_with(&GPU_CACHE_UPDATE.vertex_attributes[1..2]),
177 ]);
178 GpuCacheBus::Scatter {
179 program,
180 vao,
181 buf_position,
182 buf_value,
183 count: 0,
184 }
185 } else {
186 GpuCacheBus::PixelBuffer {
187 rows: Vec::new(),
188 }
189 };
190
191 Ok(GpuCacheTexture {
192 texture: None,
193 bus,
194 })
195 }
196
197 pub fn deinit(mut self, device: &mut Device) {
198 if let Some(t) = self.texture.take() {
199 device.delete_texture(t);
200 }
201 if let GpuCacheBus::Scatter { program, vao, buf_position, buf_value, .. } = self.bus {
202 device.delete_program(program);
203 device.delete_custom_vao(vao);
204 device.delete_vbo(buf_position);
205 device.delete_vbo(buf_value);
206 }
207 }
208
209 pub fn get_height(&self) -> i32 {
210 self.texture.as_ref().map_or(0, |t| t.get_dimensions().height)
211 }
212
213 #[cfg(feature = "capture")]
214 pub fn get_texture(&self) -> &Texture {
215 self.texture.as_ref().unwrap()
216 }
217
218 fn prepare_for_updates(
219 &mut self,
220 device: &mut Device,
221 total_block_count: usize,
222 max_height: i32,
223 ) {
224 self.ensure_texture(device, max_height);
225 match self.bus {
226 GpuCacheBus::PixelBuffer { .. } => {},
227 GpuCacheBus::Scatter {
228 ref mut buf_position,
229 ref mut buf_value,
230 ref mut count,
231 ..
232 } => {
233 *count = 0;
234 if total_block_count > buf_value.allocated_count() {
235 device.allocate_vbo(buf_position, total_block_count, super::ONE_TIME_USAGE_HINT);
236 device.allocate_vbo(buf_value, total_block_count, super::ONE_TIME_USAGE_HINT);
237 }
238 }
239 }
240 }
241
242 pub fn invalidate(&mut self) {
243 match self.bus {
244 GpuCacheBus::PixelBuffer { ref mut rows, .. } => {
245 info!("Invalidating GPU caches");
246 for row in rows {
247 row.add_dirty(0, super::MAX_VERTEX_TEXTURE_WIDTH);
248 }
249 }
250 GpuCacheBus::Scatter { .. } => {
251 warn!("Unable to invalidate scattered GPU cache");
252 }
253 }
254 }
255
256 fn update(&mut self, device: &mut Device, updates: &GpuCacheUpdateList) {
257 match self.bus {
258 GpuCacheBus::PixelBuffer { ref mut rows, .. } => {
259 for update in &updates.updates {
260 match *update {
261 GpuCacheUpdate::Copy {
262 block_index,
263 block_count,
264 address,
265 } => {
266 let row = address.v as usize;
267
268 while rows.len() <= row {
271 rows.push(CacheRow::new());
273 }
274
275 let block_offset = address.u as usize;
277 let data = &mut rows[row].cpu_blocks;
278 for i in 0 .. block_count {
279 data[block_offset + i] = updates.blocks[block_index + i];
280 }
281
282 rows[row].add_dirty(block_offset, block_count);
284 }
285 }
286 }
287 }
288 GpuCacheBus::Scatter {
289 ref buf_position,
290 ref buf_value,
291 ref mut count,
292 ..
293 } => {
294 let mut position_data = vec![[!0u16; 2]; updates.blocks.len()];
298 let size = self.texture.as_ref().unwrap().get_dimensions().to_usize();
299
300 for update in &updates.updates {
301 match *update {
302 GpuCacheUpdate::Copy {
303 block_index,
304 block_count,
305 address,
306 } => {
307 let y = ((2*address.v as usize + 1) << 15) / size.height;
309 for i in 0 .. block_count {
310 let x = ((2*address.u as usize + 2*i + 1) << 15) / size.width;
311 position_data[block_index + i] = [x as _, y as _];
312 }
313 }
314 }
315 }
316
317 device.fill_vbo(buf_value, &updates.blocks, *count);
318 device.fill_vbo(buf_position, &position_data, *count);
319 *count += position_data.len();
320 }
321 }
322 }
323
324 fn flush(&mut self, device: &mut Device, pbo_pool: &mut UploadPBOPool) -> usize {
325 let texture = self.texture.as_ref().unwrap();
326 match self.bus {
327 GpuCacheBus::PixelBuffer { ref mut rows } => {
328 let rows_dirty = rows
329 .iter()
330 .filter(|row| row.is_dirty())
331 .count();
332 if rows_dirty == 0 {
333 return 0
334 }
335
336 let mut uploader = device.upload_texture(pbo_pool);
337
338 for (row_index, row) in rows.iter_mut().enumerate() {
339 if !row.is_dirty() {
340 continue;
341 }
342
343 let blocks = row.dirty_blocks();
344 let rect = DeviceIntRect::from_origin_and_size(
345 DeviceIntPoint::new(row.min_dirty as i32, row_index as i32),
346 DeviceIntSize::new(blocks.len() as i32, 1),
347 );
348
349 uploader.upload(device, texture, rect, None, None, blocks.as_ptr(), blocks.len());
350
351 row.clear_dirty();
352 }
353
354 uploader.flush(device);
355
356 rows_dirty
357 }
358 GpuCacheBus::Scatter { ref program, ref vao, count, .. } => {
359 device.disable_depth();
360 device.set_blend(false);
361 device.bind_program(program);
362 device.bind_custom_vao(vao);
363 device.bind_draw_target(
364 DrawTarget::from_texture(
365 texture,
366 false,
367 ),
368 );
369 device.draw_nonindexed_points(0, count as _);
370 0
371 }
372 }
373 }
374
375 #[cfg(feature = "replay")]
376 pub fn remove_texture(&mut self, device: &mut Device) {
377 if let Some(t) = self.texture.take() {
378 device.delete_texture(t);
379 }
380 }
381
382 #[cfg(feature = "replay")]
383 pub fn load_from_data(&mut self, texture: Texture, data: Vec<u8>) {
384 assert!(self.texture.is_none());
385 match self.bus {
386 GpuCacheBus::PixelBuffer { ref mut rows, .. } => {
387 let dim = texture.get_dimensions();
388 let blocks = unsafe {
389 std::slice::from_raw_parts(
390 data.as_ptr() as *const GpuBlockData,
391 data.len() / mem::size_of::<GpuBlockData>(),
392 )
393 };
394 rows.clear();
396 rows.extend((0 .. dim.height).map(|_| CacheRow::new()));
397 let chunks = blocks.chunks(super::MAX_VERTEX_TEXTURE_WIDTH);
398 debug_assert_eq!(chunks.len(), rows.len());
399 for (row, chunk) in rows.iter_mut().zip(chunks) {
400 row.cpu_blocks.copy_from_slice(chunk);
401 }
402 }
403 GpuCacheBus::Scatter { .. } => {}
404 }
405 self.texture = Some(texture);
406 }
407
408 pub fn report_memory_to(&self, report: &mut MemoryReport, size_op_funs: &MallocSizeOfOps) {
409 if let GpuCacheBus::PixelBuffer{ref rows, ..} = self.bus {
410 for row in rows.iter() {
411 report.gpu_cache_cpu_mirror += unsafe { (size_op_funs.size_of_op)(row.cpu_blocks.as_ptr() as *const _) };
412 }
413 }
414
415 report.gpu_cache_textures +=
417 self.texture.as_ref().map_or(0, |t| t.size_in_bytes());
418 }
419
420 pub fn gpu_size_in_bytes(&self) -> usize {
421 match &self.texture {
422 Some(tex) => tex.size_in_bytes(),
423 None => 0,
424 }
425 }
426}
427
428impl super::Renderer {
429 pub fn update_gpu_cache(&mut self) {
430 let _gm = self.gpu_profiler.start_marker("gpu cache update");
431
432 let gpu_cache_height = self.gpu_cache_texture.get_height();
435 if gpu_cache_height != 0 && GPU_CACHE_RESIZE_TEST {
436 self.pending_gpu_cache_updates.push(GpuCacheUpdateList {
437 frame_id: FrameId::INVALID,
438 clear: false,
439 height: gpu_cache_height,
440 blocks: vec![[1f32; 4].into()],
441 updates: Vec::new(),
442 debug_commands: Vec::new(),
443 });
444 }
445
446 let (updated_blocks, max_requested_height) = self
447 .pending_gpu_cache_updates
448 .iter()
449 .fold((0, gpu_cache_height), |(count, height), list| {
450 (count + list.blocks.len(), cmp::max(height, list.height))
451 });
452
453 if max_requested_height > self.get_max_texture_size() && !self.gpu_cache_overflow {
454 self.gpu_cache_overflow = true;
455 self.renderer_errors.push(super::RendererError::MaxTextureSize);
456 }
457
458 self.gpu_cache_texture.prepare_for_updates(
462 &mut self.device,
463 updated_blocks,
464 max_requested_height,
465 );
466
467 for update_list in self.pending_gpu_cache_updates.drain(..) {
468 assert!(update_list.height <= max_requested_height);
469 if update_list.frame_id > self.gpu_cache_frame_id {
470 self.gpu_cache_frame_id = update_list.frame_id
471 }
472 self.gpu_cache_texture
473 .update(&mut self.device, &update_list);
474 }
475
476 self.profile.start_time(profiler::GPU_CACHE_UPLOAD_TIME);
477 let updated_rows = self.gpu_cache_texture.flush(
478 &mut self.device,
479 &mut self.texture_upload_pbo_pool
480 );
481 self.gpu_cache_upload_time += self.profile.end_time(profiler::GPU_CACHE_UPLOAD_TIME);
482
483 self.profile.set(profiler::GPU_CACHE_ROWS_UPDATED, updated_rows);
484 self.profile.set(profiler::GPU_CACHE_BLOCKS_UPDATED, updated_blocks);
485 }
486
487 pub fn prepare_gpu_cache(
488 &mut self,
489 deferred_resolves: &[DeferredResolve],
490 ) -> Result<(), super::RendererError> {
491 self.profile.start_time(profiler::GPU_CACHE_PREPARE_TIME);
492
493 if self.pending_gpu_cache_clear {
494 let use_scatter =
495 matches!(self.gpu_cache_texture.bus, GpuCacheBus::Scatter { .. });
496 let new_cache = match GpuCacheTexture::new(&mut self.device, use_scatter) {
497 Ok(cache) => cache,
498 Err(err) => {
499 self.profile.end_time(profiler::GPU_CACHE_PREPARE_TIME);
500 return Err(err);
501 }
502 };
503 let old_cache = mem::replace(&mut self.gpu_cache_texture, new_cache);
504 old_cache.deinit(&mut self.device);
505 self.pending_gpu_cache_clear = false;
506 }
507
508 let deferred_update_list = self.update_deferred_resolves(deferred_resolves);
509 self.pending_gpu_cache_updates.extend(deferred_update_list);
510
511 self.update_gpu_cache();
512
513 self.device.bind_texture(
516 super::TextureSampler::GpuCache,
517 self.gpu_cache_texture.texture.as_ref().unwrap(),
518 Swizzle::default(),
519 );
520
521 self.profile.end_time(profiler::GPU_CACHE_PREPARE_TIME);
522
523 Ok(())
524 }
525
526 pub fn read_gpu_cache(&mut self) -> (DeviceIntSize, Vec<u8>) {
527 let texture = self.gpu_cache_texture.texture.as_ref().unwrap();
528 let size = device_size_as_framebuffer_size(texture.get_dimensions());
529 let mut texels = vec![0; (size.width * size.height * 16) as usize];
530 self.device.begin_frame();
531 self.device.bind_read_target(ReadTarget::from_texture(texture));
532 self.device.read_pixels_into(
533 size.into(),
534 api::ImageFormat::RGBAF32,
535 &mut texels,
536 );
537 self.device.reset_read_target();
538 self.device.end_frame();
539 (texture.get_dimensions(), texels)
540 }
541}