From b86e97f67a07368877bd18501aebcbe80cf93118 Mon Sep 17 00:00:00 2001 From: tcmal Date: Sun, 25 Aug 2024 17:44:24 +0100 Subject: feat(skeleton): add memory pools added stock memory pools behind a feature gate refactored buffers to use them and to have better APIs. moved some util code out of builders::pipeline updated stockton-render for the changes deactivation is a WIP breaks UI drawing, fix WIP --- stockton-skeleton/src/mem.rs | 368 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 368 insertions(+) create mode 100644 stockton-skeleton/src/mem.rs (limited to 'stockton-skeleton/src/mem.rs') diff --git a/stockton-skeleton/src/mem.rs b/stockton-skeleton/src/mem.rs new file mode 100644 index 0000000..af0a42b --- /dev/null +++ b/stockton-skeleton/src/mem.rs @@ -0,0 +1,368 @@ +//! Used to represent access different memory 'pools'. +//! Ideally, each pool is optimised for a specific use case. +//! You can implement your own pools using whatever algorithm you'd like. You just need to implement [`MemoryPool`] and optionally [`Block`], then access it +//! using [`RenderingContext.pool_allocator`] +//! Alternatively, some default memory pools are availble when the feature `rendy_pools` is used (on by default). + +use crate::{context::RenderingContext, types::*}; + +use std::{ + ops::Range, + sync::{Arc, RwLock}, +}; + +use anyhow::Result; +use hal::memory::Properties; + +/// An allocator whose memory and allocation pattern is optimised for a specific use case. +pub trait MemoryPool: Send + Sync + 'static { + /// The block returned by this pool + type Block: Block + Send + Sync; + + /// Create a new memory pool from the given context + /// This is called to lazily initialise the memory pool when it is first requested. + /// It can do any sort of filtering on memory types required. + fn from_context(context: &RenderingContext) -> Result>>; + + /// Allocate block of memory. + /// On success returns allocated block and amount of memory consumed from device. + /// The returned block must not overlap with any other allocated block, the start of it must be `0 mod(align)`, + /// and it must be at least `size` bytes. + fn alloc(&mut self, device: &DeviceT, size: u64, align: u64) -> Result<(Self::Block, u64)>; + + /// Free block of memory. + /// Returns amount of memory returned to the device. + /// If the given block was not allocated from this pool, this should be a no-op and should return 0. + fn free(&mut self, device: &DeviceT, block: Self::Block) -> u64; + + /// Deactivate this memory pool, freeing any allocated memory objects. + fn deactivate(self, context: &mut RenderingContext); +} + +/// Block that owns a `Range` of the `Memory`. +/// Provides access to safe memory range mapping. +pub trait Block { + /// Get memory properties of the block. + fn properties(&self) -> Properties; + + /// Get raw memory object. + fn memory(&self) -> &MemoryT; + + /// Get memory range owned by this block. + fn range(&self) -> Range; + + /// Get size of the block. + fn size(&self) -> u64 { + let range = self.range(); + range.end - range.start + } +} + +/// An additional trait for [`Block`]s that can be mapped to CPU-visible memory. +/// +/// This should only be implemented for blocks that are *guaranteed* to be visible to the CPU +/// and may panic if this is not the case. +pub trait MappableBlock: Block { + /// Attempt to map this block to CPU-visible memory. + /// `inner_range` is counted from only inside this block, not the wider memory object this block is a part of + fn map(&mut self, device: &mut DeviceT, inner_range: Range) -> Result<*mut u8>; + + /// Unmap this block from CPU-visible memory. + /// If this block is not mapped, this should be a no-op. + /// Implementors should ensure that this does not accidentally unmap other blocks using the same memory block. + fn unmap(&mut self, device: &mut DeviceT) -> Result<()>; +} + +#[cfg(feature = "rendy-pools")] +mod rendy { + use super::*; + + use crate::{ + error::{EnvironmentError, LockPoisoned, UsageError}, + utils::find_memory_type_id, + }; + + use anyhow::{anyhow, Context, Result}; + use hal::{ + format::Format, + memory::{Properties as MemProps, SparseFlags}, + }; + use rendy_memory::{Allocator, Block as RBlock, DynamicAllocator, DynamicBlock, DynamicConfig}; + + /// So we can use rendy blocks as our blocks + impl> Block for T { + fn properties(&self) -> Properties { + >::properties(&self) + } + + fn memory(&self) -> &MemoryT { + >::memory(&self) + } + + fn range(&self) -> Range { + >::range(&self) + } + } + + /// Intended to be used for textures. + /// The allocated memory is guaranteed to be suitable for any colour image with optimal tiling and no extra sparse flags or view capabilities. + pub struct TexturesPool(DynamicAllocator); + impl MemoryPool for TexturesPool { + type Block = DynamicBlock; + + fn alloc(&mut self, device: &DeviceT, size: u64, align: u64) -> Result<(Self::Block, u64)> { + Ok(self.0.alloc(device, size, align)?) + } + + fn free(&mut self, device: &DeviceT, block: Self::Block) -> u64 { + self.0.free(device, block) + } + + fn from_context(context: &RenderingContext) -> Result>> { + let type_mask = unsafe { + use hal::image::{Kind, Tiling, Usage, ViewCapabilities}; + + // We create an empty image with the same format as used for textures + // this is to get the type_mask required, which will stay the same for + // all colour images of the same tiling. (certain memory flags excluded). + + // Size and alignment don't necessarily stay the same, so we're forced to + // guess at the alignment for our allocator. + let device = context.device().write().map_err(|_| LockPoisoned::Device)?; + let img = device + .create_image( + Kind::D2(16, 16, 1, 1), + 1, + Format::Rgba8Srgb, + Tiling::Optimal, + Usage::SAMPLED, + SparseFlags::empty(), + ViewCapabilities::empty(), + ) + .context("Error creating test image to get buffer settings")?; + + let type_mask = device.get_image_requirements(&img).type_mask; + + device.destroy_image(img); + + type_mask + }; + + let allocator = { + let props = MemProps::DEVICE_LOCAL; + + DynamicAllocator::new( + find_memory_type_id(context.adapter(), type_mask, props) + .ok_or(EnvironmentError::NoMemoryTypes)?, + props, + DynamicConfig { + block_size_granularity: 4 * 32 * 32, // 32x32 image + max_chunk_size: u64::pow(2, 63), + min_device_allocation: 4 * 32 * 32, + }, + context + .physical_device_properties() + .limits + .non_coherent_atom_size as u64, + ) + }; + + Ok(Arc::new(RwLock::new(Self(allocator)))) + } + + fn deactivate(self, _context: &mut RenderingContext) { + self.0.dispose(); + } + } + + /// Used for depth buffers. + /// Memory returned is guaranteed to be suitable for any image using `context.target_chain().properties().depth_format` with optimal tiling, and no sparse flags or view capabilities. + pub struct DepthBufferPool(DynamicAllocator); + impl MemoryPool for DepthBufferPool { + type Block = DynamicBlock; + + fn alloc(&mut self, device: &DeviceT, size: u64, align: u64) -> Result<(Self::Block, u64)> { + Ok(self.0.alloc(device, size, align)?) + } + + fn free(&mut self, device: &DeviceT, block: Self::Block) -> u64 { + self.0.free(device, block) + } + + fn from_context(context: &RenderingContext) -> Result>> { + let type_mask = unsafe { + use hal::image::{Kind, Tiling, Usage, ViewCapabilities}; + + let device = context.device().write().map_err(|_| LockPoisoned::Device)?; + let img = device + .create_image( + Kind::D2(16, 16, 1, 1), + 1, + context.target_chain().properties().depth_format, + Tiling::Optimal, + Usage::SAMPLED, + SparseFlags::empty(), + ViewCapabilities::empty(), + ) + .context("Error creating test image to get buffer settings")?; + + let type_mask = device.get_image_requirements(&img).type_mask; + + device.destroy_image(img); + + type_mask + }; + + let allocator = { + let props = MemProps::DEVICE_LOCAL; + + DynamicAllocator::new( + find_memory_type_id(context.adapter(), type_mask, props) + .ok_or(EnvironmentError::NoMemoryTypes)?, + props, + DynamicConfig { + block_size_granularity: 4 * 32 * 32, // 32x32 image + max_chunk_size: u64::pow(2, 63), + min_device_allocation: 4 * 32 * 32, + }, + context + .physical_device_properties() + .limits + .non_coherent_atom_size as u64, + ) + }; + + Ok(Arc::new(RwLock::new(Self(allocator)))) + } + + fn deactivate(self, _context: &mut RenderingContext) { + self.0.dispose() + } + } + + /// Used for staging buffers + pub struct StagingPool(DynamicAllocator); + impl MemoryPool for StagingPool { + type Block = MappableRBlock>; + + fn alloc(&mut self, device: &DeviceT, size: u64, align: u64) -> Result<(Self::Block, u64)> { + let (b, size) = self.0.alloc(device, size, align)?; + Ok((MappableRBlock::new_unchecked(b), size)) + } + + fn free(&mut self, device: &DeviceT, block: Self::Block) -> u64 { + self.0.free(device, block.0) + } + + fn from_context(context: &RenderingContext) -> Result>> { + let allocator = { + let props = MemProps::CPU_VISIBLE | MemProps::COHERENT; + let t = find_memory_type_id(context.adapter(), u32::MAX, props) + .ok_or(EnvironmentError::NoMemoryTypes)?; + DynamicAllocator::new( + t, + props, + DynamicConfig { + block_size_granularity: 4 * 32 * 32, // 32x32 image + max_chunk_size: u64::pow(2, 63), + min_device_allocation: 4 * 32 * 32, + }, + context + .physical_device_properties() + .limits + .non_coherent_atom_size as u64, + ) + }; + + Ok(Arc::new(RwLock::new(StagingPool(allocator)))) + } + + fn deactivate(self, _context: &mut RenderingContext) { + self.0.dispose() + } + } + + /// Suitable for input data, such as vertices and indices. + pub struct DataPool(DynamicAllocator); + impl MemoryPool for DataPool { + type Block = DynamicBlock; + + fn alloc(&mut self, device: &DeviceT, size: u64, align: u64) -> Result<(Self::Block, u64)> { + Ok(self.0.alloc(device, size, align)?) + } + + fn free(&mut self, device: &DeviceT, block: Self::Block) -> u64 { + self.0.free(device, block) + } + + fn from_context(context: &RenderingContext) -> Result>> { + let allocator = { + let props = MemProps::CPU_VISIBLE | MemProps::COHERENT; + let t = find_memory_type_id(context.adapter(), u32::MAX, props) + .ok_or(EnvironmentError::NoMemoryTypes)?; + DynamicAllocator::new( + t, + props, + DynamicConfig { + block_size_granularity: 4 * 4 * 128, // 128 f32 XYZ[?] vertices + max_chunk_size: u64::pow(2, 63), + min_device_allocation: 4 * 4 * 128, + }, + context + .physical_device_properties() + .limits + .non_coherent_atom_size as u64, + ) + }; + + Ok(Arc::new(RwLock::new(DataPool(allocator)))) + } + + fn deactivate(self, _context: &mut RenderingContext) { + self.0.dispose() + } + } + + /// A rendy memory block that is guaranteed to be CPU visible. + pub struct MappableRBlock>(B); + impl> MappableRBlock { + /// Create a new mappable memory block, returning an error if the block is not CPU visible + pub fn new(block: B) -> Result { + if !block.properties().contains(MemProps::CPU_VISIBLE) { + return Err(anyhow!(UsageError::NonMappableMemory)); + } + Ok(Self::new_unchecked(block)) + } + + /// Create a new mappable memory block, without checking if the block is CPU visible. + pub fn new_unchecked(block: B) -> Self { + Self(block) + } + } + + impl> Block for MappableRBlock { + fn properties(&self) -> MemProps { + self.0.properties() + } + + fn memory(&self) -> &MemoryT { + self.0.memory() + } + + fn range(&self) -> Range { + self.0.range() + } + } + impl> MappableBlock for MappableRBlock { + fn map(&mut self, device: &mut DeviceT, inner_range: Range) -> Result<*mut u8> { + unsafe { Ok(self.0.map(device, inner_range)?.ptr().as_mut()) } + } + + fn unmap(&mut self, device: &mut DeviceT) -> Result<()> { + Ok(self.0.unmap(device)) + } + } +} + +#[cfg(feature = "rendy-pools")] +pub use rendy::*; -- cgit v1.2.3