diff options
author | tcmal <me@aria.rip> | 2024-08-25 17:44:22 +0100 |
---|---|---|
committer | tcmal <me@aria.rip> | 2024-08-25 17:44:22 +0100 |
commit | d7d0f0277c437004ed476393542da39c84c5cf9f (patch) | |
tree | 65c369e00fc1416c74019fde4456b0bf90d3067c /rendy-memory/src | |
parent | 10b3d4ac59e826b31d2114999e31893390acfb9c (diff) |
chore(render): update hal and fix some errors
Diffstat (limited to 'rendy-memory/src')
-rw-r--r-- | rendy-memory/src/allocator/dedicated.rs | 218 | ||||
-rw-r--r-- | rendy-memory/src/allocator/dynamic.rs | 716 | ||||
-rw-r--r-- | rendy-memory/src/allocator/linear.rs | 363 | ||||
-rw-r--r-- | rendy-memory/src/allocator/mod.rs | 50 | ||||
-rw-r--r-- | rendy-memory/src/block.rs | 36 | ||||
-rw-r--r-- | rendy-memory/src/heaps/heap.rs | 49 | ||||
-rw-r--r-- | rendy-memory/src/heaps/memory_type.rs | 158 | ||||
-rw-r--r-- | rendy-memory/src/heaps/mod.rs | 327 | ||||
-rw-r--r-- | rendy-memory/src/lib.rs | 31 | ||||
-rw-r--r-- | rendy-memory/src/mapping/mod.rs | 345 | ||||
-rw-r--r-- | rendy-memory/src/mapping/range.rs | 85 | ||||
-rw-r--r-- | rendy-memory/src/mapping/write.rs | 73 | ||||
-rw-r--r-- | rendy-memory/src/memory.rs | 98 | ||||
-rw-r--r-- | rendy-memory/src/usage.rs | 210 | ||||
-rw-r--r-- | rendy-memory/src/util.rs | 157 | ||||
-rw-r--r-- | rendy-memory/src/utilization.rs | 137 |
16 files changed, 3053 insertions, 0 deletions
diff --git a/rendy-memory/src/allocator/dedicated.rs b/rendy-memory/src/allocator/dedicated.rs new file mode 100644 index 0000000..7806509 --- /dev/null +++ b/rendy-memory/src/allocator/dedicated.rs @@ -0,0 +1,218 @@ +use std::{ops::Range, ptr::NonNull}; + +use { + crate::{ + allocator::{Allocator, Kind}, + block::Block, + mapping::{mapped_sub_range, MappedRange}, + memory::*, + util::*, + }, + gfx_hal::{device::Device as _, Backend}, +}; + +/// Memory block allocated from `DedicatedAllocator` +#[derive(Debug)] +pub struct DedicatedBlock<B: Backend> { + memory: Memory<B>, + mapping: Option<(NonNull<u8>, Range<u64>)>, +} + +unsafe impl<B> Send for DedicatedBlock<B> where B: Backend {} +unsafe impl<B> Sync for DedicatedBlock<B> where B: Backend {} + +impl<B> DedicatedBlock<B> +where + B: Backend, +{ + /// Get inner memory. + /// Panics if mapped. + pub fn unwrap_memory(self) -> Memory<B> { + assert!(self.mapping.is_none()); + self.memory + } + + /// Make unmapped block. + pub fn from_memory(memory: Memory<B>) -> Self { + DedicatedBlock { + memory, + mapping: None, + } + } +} + +impl<B> Block<B> for DedicatedBlock<B> +where + B: Backend, +{ + #[inline] + fn properties(&self) -> gfx_hal::memory::Properties { + self.memory.properties() + } + + #[inline] + fn memory(&self) -> &B::Memory { + self.memory.raw() + } + + #[inline] + fn range(&self) -> Range<u64> { + 0..self.memory.size() + } + + fn map<'a>( + &'a mut self, + device: &B::Device, + range: Range<u64>, + ) -> Result<MappedRange<'a, B>, gfx_hal::device::MapError> { + assert!( + range.start < range.end, + "Memory mapping region must have valid size" + ); + + if !self.memory.host_visible() { + //TODO: invalid access error + return Err(gfx_hal::device::MapError::MappingFailed); + } + + let requested_range = range.clone(); + let mapping_range = if !self.memory.host_coherent() { + align_range(range, self.memory.non_coherent_atom_size()) + } else { + range + }; + + unsafe { + if let Some(ptr) = self + .mapping + .clone() + .and_then(|(ptr, range)| mapped_sub_range(ptr, range, mapping_range.clone())) + { + Ok(MappedRange::from_raw( + &self.memory, + ptr, + mapping_range, + requested_range, + )) + } else { + self.unmap(device); + let ptr = device.map_memory( + self.memory.raw_mut(), + gfx_hal::memory::Segment { + offset: mapping_range.start, + size: Some(mapping_range.end - mapping_range.start), + }, + )?; + let ptr = NonNull::new(ptr).expect("Memory mapping shouldn't return nullptr"); + let mapping = + MappedRange::from_raw(&self.memory, ptr, mapping_range, requested_range); + self.mapping = Some((mapping.ptr(), mapping.range())); + Ok(mapping) + } + } + } + + fn unmap(&mut self, device: &B::Device) { + if self.mapping.take().is_some() { + unsafe { + // trace!("Unmap memory: {:#?}", self.memory); + device.unmap_memory(self.memory.raw_mut()); + } + } + } +} + +/// Dedicated memory allocator that uses memory object per allocation requested. +/// +/// This allocator suites best huge allocations. +/// From 32 MiB when GPU has 4-8 GiB memory total. +/// +/// `Heaps` use this allocator when none of sub-allocators bound to the memory type +/// can handle size required. +/// TODO: Check if resource prefers dedicated memory. +#[derive(Debug)] +pub struct DedicatedAllocator { + memory_type: gfx_hal::MemoryTypeId, + memory_properties: gfx_hal::memory::Properties, + non_coherent_atom_size: u64, + used: u64, +} + +impl DedicatedAllocator { + /// Get properties required by the allocator. + pub fn properties_required() -> gfx_hal::memory::Properties { + gfx_hal::memory::Properties::empty() + } + + /// Create new `LinearAllocator` + /// for `memory_type` with `memory_properties` specified + pub fn new( + memory_type: gfx_hal::MemoryTypeId, + memory_properties: gfx_hal::memory::Properties, + non_coherent_atom_size: u64, + ) -> Self { + DedicatedAllocator { + memory_type, + memory_properties, + non_coherent_atom_size, + used: 0, + } + } +} + +impl<B> Allocator<B> for DedicatedAllocator +where + B: Backend, +{ + type Block = DedicatedBlock<B>; + + fn kind() -> Kind { + Kind::Dedicated + } + + #[inline] + fn alloc( + &mut self, + device: &B::Device, + size: u64, + _align: u64, + ) -> Result<(DedicatedBlock<B>, u64), gfx_hal::device::AllocationError> { + let size = if is_non_coherent_visible(self.memory_properties) { + align_size(size, self.non_coherent_atom_size) + } else { + size + }; + + let memory = unsafe { + Memory::from_raw( + device.allocate_memory(self.memory_type, size)?, + size, + self.memory_properties, + self.non_coherent_atom_size, + ) + }; + + self.used += size; + + Ok((DedicatedBlock::from_memory(memory), size)) + } + + #[inline] + fn free(&mut self, device: &B::Device, mut block: DedicatedBlock<B>) -> u64 { + block.unmap(device); + let size = block.memory.size(); + self.used -= size; + unsafe { + device.free_memory(block.memory.into_raw()); + } + size + } +} + +impl Drop for DedicatedAllocator { + fn drop(&mut self) { + if self.used > 0 { + log::error!("Not all allocation from DedicatedAllocator was freed"); + } + } +} diff --git a/rendy-memory/src/allocator/dynamic.rs b/rendy-memory/src/allocator/dynamic.rs new file mode 100644 index 0000000..a41cc27 --- /dev/null +++ b/rendy-memory/src/allocator/dynamic.rs @@ -0,0 +1,716 @@ +use std::{ + collections::{BTreeSet, HashMap}, + ops::Range, + ptr::NonNull, + thread, +}; + +use { + crate::{ + allocator::{Allocator, Kind}, + block::Block, + mapping::*, + memory::*, + util::*, + }, + gfx_hal::{device::Device as _, Backend}, + hibitset::{BitSet, BitSetLike as _}, +}; + +/// Memory block allocated from `DynamicAllocator` +#[derive(Debug)] +pub struct DynamicBlock<B: Backend> { + block_index: u32, + chunk_index: u32, + count: u32, + memory: *const Memory<B>, + ptr: Option<NonNull<u8>>, + range: Range<u64>, + relevant: relevant::Relevant, +} + +unsafe impl<B> Send for DynamicBlock<B> where B: Backend {} +unsafe impl<B> Sync for DynamicBlock<B> where B: Backend {} + +impl<B> DynamicBlock<B> +where + B: Backend, +{ + fn shared_memory(&self) -> &Memory<B> { + // Memory won't be freed until last block created from it deallocated. + unsafe { &*self.memory } + } + + fn size(&self) -> u64 { + self.range.end - self.range.start + } + + fn dispose(self) { + self.relevant.dispose(); + } +} + +impl<B> Block<B> for DynamicBlock<B> +where + B: Backend, +{ + #[inline] + fn properties(&self) -> gfx_hal::memory::Properties { + self.shared_memory().properties() + } + + #[inline] + fn memory(&self) -> &B::Memory { + self.shared_memory().raw() + } + + #[inline] + fn range(&self) -> Range<u64> { + self.range.clone() + } + + #[inline] + fn map<'a>( + &'a mut self, + _device: &B::Device, + range: Range<u64>, + ) -> Result<MappedRange<'a, B>, gfx_hal::device::MapError> { + debug_assert!( + range.start < range.end, + "Memory mapping region must have valid size" + ); + + if !self.shared_memory().host_visible() { + //TODO: invalid access error + return Err(gfx_hal::device::MapError::MappingFailed); + } + + let requested_range = relative_to_sub_range(self.range.clone(), range) + .ok_or(gfx_hal::device::MapError::OutOfBounds)?; + + let mapping_range = if !self.shared_memory().host_coherent() { + align_range( + requested_range.clone(), + self.shared_memory().non_coherent_atom_size(), + ) + } else { + requested_range.clone() + }; + + if let Some(ptr) = self.ptr { + let ptr = mapped_sub_range(ptr, self.range.clone(), mapping_range.clone()).unwrap(); + let mapping = unsafe { + MappedRange::from_raw(self.shared_memory(), ptr, mapping_range, requested_range) + }; + Ok(mapping) + } else { + Err(gfx_hal::device::MapError::MappingFailed) + } + } + + #[inline] + fn unmap(&mut self, _device: &B::Device) {} +} + +/// Config for `DynamicAllocator`. +#[derive(Clone, Copy, Debug)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub struct DynamicConfig { + /// All requests are rounded up to multiple of this value. + pub block_size_granularity: u64, + + /// Maximum chunk of blocks size. + /// Actual chunk size is `min(max_chunk_size, block_size * blocks_per_chunk)` + pub max_chunk_size: u64, + + /// Minimum size of device allocation. + pub min_device_allocation: u64, +} + +/// No-fragmentation allocator. +/// Suitable for any type of small allocations. +/// Every freed block can be reused. +#[derive(Debug)] +pub struct DynamicAllocator<B: Backend> { + /// Memory type that this allocator allocates. + memory_type: gfx_hal::MemoryTypeId, + + /// Memory properties of the memory type. + memory_properties: gfx_hal::memory::Properties, + + /// All requests are rounded up to multiple of this value. + block_size_granularity: u64, + + /// Maximum chunk of blocks size. + max_chunk_size: u64, + + /// Minimum size of device allocation. + min_device_allocation: u64, + + /// Chunk lists. + sizes: HashMap<u64, SizeEntry<B>>, + + /// Ordered set of sizes that have allocated chunks. + chunks: BTreeSet<u64>, + non_coherent_atom_size: u64, +} + +unsafe impl<B> Send for DynamicAllocator<B> where B: Backend {} +unsafe impl<B> Sync for DynamicAllocator<B> where B: Backend {} + +#[derive(Debug)] +struct SizeEntry<B: Backend> { + /// Total count of allocated blocks with size corresponding to this entry. + total_blocks: u64, + + /// Bits per ready (non-exhausted) chunks with free blocks. + ready_chunks: BitSet, + + /// List of chunks. + chunks: slab::Slab<Chunk<B>>, +} + +impl<B> Default for SizeEntry<B> +where + B: Backend, +{ + fn default() -> Self { + SizeEntry { + chunks: Default::default(), + total_blocks: 0, + ready_chunks: Default::default(), + } + } +} + +const MAX_BLOCKS_PER_CHUNK: u32 = 64; +const MIN_BLOCKS_PER_CHUNK: u32 = 8; + +impl<B> DynamicAllocator<B> +where + B: Backend, +{ + /// Create new `DynamicAllocator` + /// for `memory_type` with `memory_properties` specified, + /// with `DynamicConfig` provided. + pub fn new( + memory_type: gfx_hal::MemoryTypeId, + memory_properties: gfx_hal::memory::Properties, + config: DynamicConfig, + non_coherent_atom_size: u64, + ) -> Self { + log::trace!( + "Create new allocator: type: '{:?}', properties: '{:#?}' config: '{:#?}'", + memory_type, + memory_properties, + config + ); + + assert!( + config.block_size_granularity.is_power_of_two(), + "Allocation granularity must be power of two" + ); + + let block_size_granularity = if is_non_coherent_visible(memory_properties) { + non_coherent_atom_size + .max(config.block_size_granularity) + .next_power_of_two() + } else { + config.block_size_granularity + }; + + assert!( + config.max_chunk_size.is_power_of_two(), + "Max chunk size must be power of two" + ); + + assert!( + config.min_device_allocation.is_power_of_two(), + "Min device allocation must be power of two" + ); + + assert!( + config.min_device_allocation <= config.max_chunk_size, + "Min device allocation must be less than or equalt to max chunk size" + ); + + if memory_properties.contains(gfx_hal::memory::Properties::CPU_VISIBLE) { + debug_assert!( + fits_usize(config.max_chunk_size), + "Max chunk size must fit usize for mapping" + ); + } + + DynamicAllocator { + memory_type, + memory_properties, + block_size_granularity, + max_chunk_size: config.max_chunk_size, + min_device_allocation: config.min_device_allocation, + sizes: HashMap::new(), + chunks: BTreeSet::new(), + non_coherent_atom_size, + } + } + + /// Maximum allocation size. + pub fn max_allocation(&self) -> u64 { + self.max_chunk_size / MIN_BLOCKS_PER_CHUNK as u64 + } + + /// Allocate memory chunk from device. + fn alloc_chunk_from_device( + &self, + device: &B::Device, + block_size: u64, + chunk_size: u64, + ) -> Result<Chunk<B>, gfx_hal::device::AllocationError> { + log::trace!( + "Allocate chunk of size: {} for blocks of size {} from device", + chunk_size, + block_size + ); + + // Allocate from device. + let (memory, mapping) = unsafe { + // Valid memory type specified. + let mut raw = device.allocate_memory(self.memory_type, chunk_size)?; + + let mapping = if self + .memory_properties + .contains(gfx_hal::memory::Properties::CPU_VISIBLE) + { + log::trace!("Map new memory object"); + match device.map_memory( + &mut raw, + gfx_hal::memory::Segment { + offset: 0, + size: Some(chunk_size), + }, + ) { + Ok(mapping) => Some(NonNull::new_unchecked(mapping)), + Err(gfx_hal::device::MapError::OutOfMemory(error)) => { + device.free_memory(raw); + return Err(error.into()); + } + Err(_) => panic!("Unexpected mapping failure"), + } + } else { + None + }; + let memory = Memory::from_raw( + raw, + chunk_size, + self.memory_properties, + self.non_coherent_atom_size, + ); + (memory, mapping) + }; + Ok(Chunk::from_memory(block_size, memory, mapping)) + } + + /// Allocate memory chunk for given block size. + fn alloc_chunk( + &mut self, + device: &B::Device, + block_size: u64, + total_blocks: u64, + ) -> Result<(Chunk<B>, u64), gfx_hal::device::AllocationError> { + log::trace!( + "Allocate chunk for blocks of size {} ({} total blocks allocated)", + block_size, + total_blocks + ); + + let min_chunk_size = MIN_BLOCKS_PER_CHUNK as u64 * block_size; + let min_size = min_chunk_size.min(total_blocks * block_size); + let max_chunk_size = MAX_BLOCKS_PER_CHUNK as u64 * block_size; + + // If smallest possible chunk size is larger then this allocator max allocation + if min_size > self.max_allocation() + || (total_blocks < MIN_BLOCKS_PER_CHUNK as u64 + && min_size >= self.min_device_allocation) + { + // Allocate memory block from device. + let chunk = self.alloc_chunk_from_device(device, block_size, min_size)?; + return Ok((chunk, min_size)); + } + + if let Some(&chunk_size) = self + .chunks + .range(min_chunk_size..=max_chunk_size) + .next_back() + { + // Allocate block for the chunk. + let (block, allocated) = self.alloc_from_entry(device, chunk_size, 1, block_size)?; + Ok((Chunk::from_block(block_size, block), allocated)) + } else { + let total_blocks = self.sizes[&block_size].total_blocks; + let chunk_size = + (max_chunk_size.min(min_chunk_size.max(total_blocks * block_size)) / 2 + 1) + .next_power_of_two(); + let (block, allocated) = self.alloc_block(device, chunk_size, block_size)?; + Ok((Chunk::from_block(block_size, block), allocated)) + } + } + + /// Allocate blocks from particular chunk. + fn alloc_from_chunk( + chunks: &mut slab::Slab<Chunk<B>>, + chunk_index: u32, + block_size: u64, + count: u32, + align: u64, + ) -> Option<DynamicBlock<B>> { + log::trace!( + "Allocate {} consecutive blocks of size {} from chunk {}", + count, + block_size, + chunk_index + ); + + let chunk = &mut chunks[chunk_index as usize]; + let block_index = chunk.acquire_blocks(count, block_size, align)?; + let block_range = chunk.blocks_range(block_size, block_index, count); + + debug_assert_eq!((block_range.end - block_range.start) % count as u64, 0); + + Some(DynamicBlock { + range: block_range.clone(), + memory: chunk.shared_memory(), + block_index, + chunk_index, + count, + ptr: chunk.mapping_ptr().map(|ptr| { + mapped_sub_range(ptr, chunk.range(), block_range) + .expect("Block must be sub-range of chunk") + }), + relevant: relevant::Relevant, + }) + } + + /// Allocate blocks from size entry. + fn alloc_from_entry( + &mut self, + device: &B::Device, + block_size: u64, + count: u32, + align: u64, + ) -> Result<(DynamicBlock<B>, u64), gfx_hal::device::AllocationError> { + log::trace!( + "Allocate {} consecutive blocks for size {} from the entry", + count, + block_size + ); + + debug_assert!(count < MIN_BLOCKS_PER_CHUNK); + let size_entry = self.sizes.entry(block_size).or_default(); + + for chunk_index in (&size_entry.ready_chunks).iter() { + if let Some(block) = Self::alloc_from_chunk( + &mut size_entry.chunks, + chunk_index, + block_size, + count, + align, + ) { + return Ok((block, 0)); + } + } + + if size_entry.chunks.vacant_entry().key() > max_chunks_per_size() { + return Err(gfx_hal::device::OutOfMemory::Host.into()); + } + + let total_blocks = size_entry.total_blocks; + let (chunk, allocated) = self.alloc_chunk(device, block_size, total_blocks)?; + let size_entry = self.sizes.entry(block_size).or_default(); + let chunk_index = size_entry.chunks.insert(chunk) as u32; + + let block = Self::alloc_from_chunk( + &mut size_entry.chunks, + chunk_index, + block_size, + count, + align, + ) + .expect("New chunk should yield blocks"); + + if !size_entry.chunks[chunk_index as usize].is_exhausted() { + size_entry.ready_chunks.add(chunk_index); + } + + Ok((block, allocated)) + } + + /// Allocate block. + fn alloc_block( + &mut self, + device: &B::Device, + block_size: u64, + align: u64, + ) -> Result<(DynamicBlock<B>, u64), gfx_hal::device::AllocationError> { + log::trace!("Allocate block of size {}", block_size); + + debug_assert_eq!(block_size % self.block_size_granularity, 0); + let size_entry = self.sizes.entry(block_size).or_default(); + size_entry.total_blocks += 1; + + let overhead = (MIN_BLOCKS_PER_CHUNK as u64 - 1) / size_entry.total_blocks; + + if overhead >= 1 { + if let Some(&size) = self + .chunks + .range(block_size / 4..block_size * overhead) + .next() + { + return self.alloc_from_entry( + device, + size, + ((block_size - 1) / size + 1) as u32, + align, + ); + } + } + + if size_entry.total_blocks == MIN_BLOCKS_PER_CHUNK as u64 { + self.chunks.insert(block_size); + } + + self.alloc_from_entry(device, block_size, 1, align) + } + + fn free_chunk(&mut self, device: &B::Device, chunk: Chunk<B>, block_size: u64) -> u64 { + log::trace!("Free chunk: {:#?}", chunk); + assert!(chunk.is_unused(block_size)); + match chunk.flavor { + ChunkFlavor::Dedicated(mut boxed, _) => { + let size = boxed.size(); + unsafe { + if self + .memory_properties + .contains(gfx_hal::memory::Properties::CPU_VISIBLE) + { + log::trace!("Unmap memory: {:#?}", boxed); + device.unmap_memory(boxed.raw_mut()); + } + device.free_memory(boxed.into_raw()); + } + size + } + ChunkFlavor::Dynamic(dynamic_block) => self.free(device, dynamic_block), + } + } + + fn free_block(&mut self, device: &B::Device, block: DynamicBlock<B>) -> u64 { + log::trace!("Free block: {:#?}", block); + + let block_size = block.size() / block.count as u64; + let size_entry = &mut self + .sizes + .get_mut(&block_size) + .expect("Unable to get size entry from which block was allocated"); + let chunk_index = block.chunk_index; + let chunk = &mut size_entry.chunks[chunk_index as usize]; + let block_index = block.block_index; + let count = block.count; + block.dispose(); + chunk.release_blocks(block_index, count); + if chunk.is_unused(block_size) { + size_entry.ready_chunks.remove(chunk_index); + let chunk = size_entry.chunks.remove(chunk_index as usize); + self.free_chunk(device, chunk, block_size) + } else { + size_entry.ready_chunks.add(chunk_index); + 0 + } + } + + /// Perform full cleanup of the memory allocated. + pub fn dispose(self) { + if !thread::panicking() { + for (index, size) in self.sizes { + assert_eq!(size.chunks.len(), 0, "SizeEntry({}) is still used", index); + } + } else { + for (index, size) in self.sizes { + if !size.chunks.is_empty() { + log::error!("Memory leak: SizeEntry({}) is still used", index); + } + } + } + } +} + +impl<B> Allocator<B> for DynamicAllocator<B> +where + B: Backend, +{ + type Block = DynamicBlock<B>; + + fn kind() -> Kind { + Kind::Dynamic + } + + fn alloc( + &mut self, + device: &B::Device, + size: u64, + align: u64, + ) -> Result<(DynamicBlock<B>, u64), gfx_hal::device::AllocationError> { + debug_assert!(size <= self.max_allocation()); + debug_assert!(align.is_power_of_two()); + let aligned_size = ((size - 1) | (align - 1) | (self.block_size_granularity - 1)) + 1; + + // This will change nothing if `self.non_coherent_atom_size` is power of two. + // But just in case... + let aligned_size = if is_non_coherent_visible(self.memory_properties) { + align_size(aligned_size, self.non_coherent_atom_size) + } else { + aligned_size + }; + + log::trace!( + "Allocate dynamic block: size: {}, align: {}, aligned size: {}, type: {}", + size, + align, + aligned_size, + self.memory_type.0 + ); + + self.alloc_block(device, aligned_size, align) + } + + fn free(&mut self, device: &B::Device, block: DynamicBlock<B>) -> u64 { + self.free_block(device, block) + } +} + +/// Block allocated for chunk. +#[derive(Debug)] +enum ChunkFlavor<B: Backend> { + /// Allocated from device. + Dedicated(Box<Memory<B>>, Option<NonNull<u8>>), + + /// Allocated from chunk of bigger blocks. + Dynamic(DynamicBlock<B>), +} + +#[derive(Debug)] +struct Chunk<B: Backend> { + flavor: ChunkFlavor<B>, + blocks: u64, +} + +impl<B> Chunk<B> +where + B: Backend, +{ + fn from_memory(block_size: u64, memory: Memory<B>, mapping: Option<NonNull<u8>>) -> Self { + let blocks = memory.size() / block_size; + debug_assert!(blocks <= MAX_BLOCKS_PER_CHUNK as u64); + + let high_bit = 1 << (blocks - 1); + + Chunk { + flavor: ChunkFlavor::Dedicated(Box::new(memory), mapping), + blocks: (high_bit - 1) | high_bit, + } + } + + fn from_block(block_size: u64, chunk_block: DynamicBlock<B>) -> Self { + let blocks = (chunk_block.size() / block_size).min(MAX_BLOCKS_PER_CHUNK as u64); + + let high_bit = 1 << (blocks - 1); + + Chunk { + flavor: ChunkFlavor::Dynamic(chunk_block), + blocks: (high_bit - 1) | high_bit, + } + } + + fn shared_memory(&self) -> &Memory<B> { + match &self.flavor { + ChunkFlavor::Dedicated(boxed, _) => &*boxed, + ChunkFlavor::Dynamic(chunk_block) => chunk_block.shared_memory(), + } + } + + fn range(&self) -> Range<u64> { + match &self.flavor { + ChunkFlavor::Dedicated(boxed, _) => 0..boxed.size(), + ChunkFlavor::Dynamic(chunk_block) => chunk_block.range(), + } + } + + fn size(&self) -> u64 { + let range = self.range(); + range.end - range.start + } + + // Get block bytes range + fn blocks_range(&self, block_size: u64, block_index: u32, count: u32) -> Range<u64> { + let range = self.range(); + let start = range.start + block_size * block_index as u64; + let end = start + block_size * count as u64; + debug_assert!(end <= range.end); + start..end + } + + /// Check if there are free blocks. + fn is_unused(&self, block_size: u64) -> bool { + let blocks = (self.size() / block_size).min(MAX_BLOCKS_PER_CHUNK as u64); + + let high_bit = 1 << (blocks - 1); + let mask = (high_bit - 1) | high_bit; + + debug_assert!(self.blocks <= mask); + self.blocks == mask + } + + /// Check if there are free blocks. + fn is_exhausted(&self) -> bool { + self.blocks == 0 + } + + fn acquire_blocks(&mut self, count: u32, block_size: u64, align: u64) -> Option<u32> { + debug_assert!(count > 0 && count <= MAX_BLOCKS_PER_CHUNK); + + // Holds a bit-array of all positions with `count` free blocks. + let mut blocks = !0; + for i in 0..count { + blocks &= self.blocks >> i; + } + // Find a position in `blocks` that is aligned. + while blocks != 0 { + let index = blocks.trailing_zeros(); + blocks &= !(1 << index); + + if (index as u64 * block_size) & (align - 1) == 0 { + let mask = ((1 << count) - 1) << index; + self.blocks &= !mask; + return Some(index); + } + } + None + } + + fn release_blocks(&mut self, index: u32, count: u32) { + let mask = ((1 << count) - 1) << index; + debug_assert_eq!(self.blocks & mask, 0); + self.blocks |= mask; + } + + fn mapping_ptr(&self) -> Option<NonNull<u8>> { + match &self.flavor { + ChunkFlavor::Dedicated(_, ptr) => *ptr, + ChunkFlavor::Dynamic(chunk_block) => chunk_block.ptr, + } + } +} + +fn max_chunks_per_size() -> usize { + let value = (std::mem::size_of::<usize>() * 8).pow(4); + debug_assert!(fits_u32(value)); + value +} diff --git a/rendy-memory/src/allocator/linear.rs b/rendy-memory/src/allocator/linear.rs new file mode 100644 index 0000000..6687d4a --- /dev/null +++ b/rendy-memory/src/allocator/linear.rs @@ -0,0 +1,363 @@ +use std::{collections::VecDeque, ops::Range, ptr::NonNull}; + +use { + crate::{ + allocator::{Allocator, Kind}, + block::Block, + mapping::*, + memory::*, + util::*, + }, + gfx_hal::{device::Device as _, Backend}, + std::sync::Arc, +}; + +/// Memory block allocated from `LinearAllocator` +pub struct LinearBlock<B: Backend> { + memory: Arc<Memory<B>>, + linear_index: u64, + ptr: NonNull<u8>, + range: Range<u64>, + relevant: relevant::Relevant, +} + +impl<B> std::fmt::Debug for LinearBlock<B> +where + B: Backend, +{ + fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + fmt.debug_struct("LinearBlock") + .field("memory", &*self.memory) + .field("linear_index", &self.linear_index) + .field("ptr", &self.ptr) + .field("range", &self.range) + .finish() + } +} + +unsafe impl<B> Send for LinearBlock<B> where B: Backend {} +unsafe impl<B> Sync for LinearBlock<B> where B: Backend {} + +impl<B> LinearBlock<B> +where + B: Backend, +{ + fn size(&self) -> u64 { + self.range.end - self.range.start + } + + fn dispose(self) { + self.relevant.dispose(); + } +} + +impl<B> Block<B> for LinearBlock<B> +where + B: Backend, +{ + #[inline] + fn properties(&self) -> gfx_hal::memory::Properties { + self.memory.properties() + } + + #[inline] + fn memory(&self) -> &B::Memory { + self.memory.raw() + } + + #[inline] + fn range(&self) -> Range<u64> { + self.range.clone() + } + + #[inline] + fn map<'a>( + &'a mut self, + _device: &B::Device, + range: Range<u64>, + ) -> Result<MappedRange<'a, B>, gfx_hal::device::MapError> { + assert!( + range.start < range.end, + "Memory mapping region must have valid size" + ); + + if !self.memory.host_visible() { + //TODO: invalid access error + return Err(gfx_hal::device::MapError::MappingFailed); + } + + let requested_range = relative_to_sub_range(self.range.clone(), range) + .ok_or(gfx_hal::device::MapError::OutOfBounds)?; + + let mapping_range = if !self.memory.host_coherent() { + align_range( + requested_range.clone(), + self.memory.non_coherent_atom_size(), + ) + } else { + requested_range.clone() + }; + + let ptr = mapped_sub_range(self.ptr, self.range.clone(), mapping_range.clone()).unwrap(); + let mapping = + unsafe { MappedRange::from_raw(&*self.memory, ptr, mapping_range, requested_range) }; + Ok(mapping) + } + + #[inline] + fn unmap(&mut self, _device: &B::Device) { + debug_assert!(self.memory.host_visible()); + } +} + +/// Config for `LinearAllocator`. +#[derive(Clone, Copy, Debug)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub struct LinearConfig { + /// Size of the linear chunk. + /// Keep it big. + pub linear_size: u64, +} + +/// Linear allocator that return memory from chunk sequentially. +/// It keeps only number of bytes allocated from each chunk. +/// Once chunk is exhausted it is placed into list. +/// When all blocks allocated from head of that list are freed, +/// head is freed as well. +/// +/// This allocator suites best short-lived types of allocations. +/// Allocation strategy requires minimal overhead and implementation is fast. +/// But holding single block will completely stop memory recycling. +#[derive(Debug)] +pub struct LinearAllocator<B: Backend> { + memory_type: gfx_hal::MemoryTypeId, + memory_properties: gfx_hal::memory::Properties, + linear_size: u64, + offset: u64, + lines: VecDeque<Line<B>>, + non_coherent_atom_size: u64, +} + +#[derive(Debug)] +struct Line<B: Backend> { + used: u64, + free: u64, + memory: Arc<Memory<B>>, + ptr: NonNull<u8>, +} + +unsafe impl<B> Send for Line<B> where B: Backend {} +unsafe impl<B> Sync for Line<B> where B: Backend {} + +impl<B> LinearAllocator<B> +where + B: Backend, +{ + /// Get properties required by the `LinearAllocator`. + pub fn properties_required() -> gfx_hal::memory::Properties { + gfx_hal::memory::Properties::CPU_VISIBLE + } + + /// Maximum allocation size. + pub fn max_allocation(&self) -> u64 { + self.linear_size / 2 + } + + /// Create new `LinearAllocator` + /// for `memory_type` with `memory_properties` specified, + /// with `LinearConfig` provided. + pub fn new( + memory_type: gfx_hal::MemoryTypeId, + memory_properties: gfx_hal::memory::Properties, + config: LinearConfig, + non_coherent_atom_size: u64, + ) -> Self { + log::trace!( + "Create new 'linear' allocator: type: '{:?}', properties: '{:#?}' config: '{:#?}'", + memory_type, + memory_properties, + config + ); + let linear_size = if is_non_coherent_visible(memory_properties) { + align_size(config.linear_size, non_coherent_atom_size) + } else { + config.linear_size + }; + assert!(memory_properties.contains(Self::properties_required())); + assert!( + fits_usize(linear_size), + "Linear size must fit in both usize and u64" + ); + LinearAllocator { + memory_type, + memory_properties, + linear_size, + offset: 0, + lines: VecDeque::new(), + non_coherent_atom_size, + } + } + + /// Perform full cleanup of the memory allocated. + pub fn dispose(mut self, device: &B::Device) { + let _ = self.cleanup(device, 0); + if !self.lines.is_empty() { + log::error!( + "Lines are not empty during allocator disposal. Lines: {:#?}", + self.lines + ); + } + } + + fn cleanup(&mut self, device: &B::Device, off: usize) -> u64 { + let mut freed = 0; + while self.lines.len() > off { + if self.lines[0].used > self.lines[0].free { + break; + } + + let line = self.lines.pop_front().unwrap(); + self.offset += 1; + + unsafe { + match Arc::try_unwrap(line.memory) { + Ok(mut memory) => { + // trace!("Unmap memory: {:#?}", line.memory); + device.unmap_memory(memory.raw_mut()); + + freed += memory.size(); + device.free_memory(memory.into_raw()); + } + Err(_) => log::error!("Allocated `Line` was freed, but memory is still shared and never will be destroyed"), + } + } + } + freed + } +} + +impl<B> Allocator<B> for LinearAllocator<B> +where + B: Backend, +{ + type Block = LinearBlock<B>; + + fn kind() -> Kind { + Kind::Linear + } + + fn alloc( + &mut self, + device: &B::Device, + size: u64, + align: u64, + ) -> Result<(LinearBlock<B>, u64), gfx_hal::device::AllocationError> { + debug_assert!(self + .memory_properties + .contains(gfx_hal::memory::Properties::CPU_VISIBLE)); + + let (size, align) = if is_non_coherent_visible(self.memory_properties) { + ( + align_size(size, self.non_coherent_atom_size), + align_size(align, self.non_coherent_atom_size), + ) + } else { + (size, align) + }; + + assert!(size <= self.linear_size); + assert!(align <= self.linear_size); + + let count = self.lines.len() as u64; + if let Some(line) = self.lines.back_mut() { + let aligned_offset = aligned(line.used, align); + let overhead = aligned_offset - line.used; + if self.linear_size - size > aligned_offset { + line.used = aligned_offset + size; + line.free += overhead; + + let range = aligned_offset..aligned_offset + size; + + let ptr = mapped_sub_range(line.ptr, 0..self.linear_size, range.clone()) + .expect("This sub-range must fit in line mapping"); + + return Ok(( + LinearBlock { + linear_index: self.offset + count - 1, + memory: line.memory.clone(), + ptr, + range, + relevant: relevant::Relevant, + }, + 0, + )); + } + } + + let (memory, ptr) = unsafe { + let mut raw = device.allocate_memory(self.memory_type, self.linear_size)?; + + let ptr = match device.map_memory( + &mut raw, + gfx_hal::memory::Segment { + offset: 0, + size: Some(self.linear_size), + }, + ) { + Ok(ptr) => NonNull::new_unchecked(ptr), + Err(gfx_hal::device::MapError::OutOfMemory(error)) => { + device.free_memory(raw); + return Err(error.into()); + } + Err(_) => panic!("Unexpected mapping failure"), + }; + + let memory = Memory::from_raw( + raw, + self.linear_size, + self.memory_properties, + self.non_coherent_atom_size, + ); + + (memory, ptr) + }; + + let line = Line { + used: size, + free: 0, + ptr, + memory: Arc::new(memory), + }; + + let block = LinearBlock { + linear_index: self.offset + count, + memory: line.memory.clone(), + ptr, + range: 0..size, + relevant: relevant::Relevant, + }; + + self.lines.push_back(line); + Ok((block, self.linear_size)) + } + + fn free(&mut self, device: &B::Device, block: Self::Block) -> u64 { + let index = block.linear_index - self.offset; + assert!( + fits_usize(index), + "This can't exceed lines list length which fits into usize by definition" + ); + let index = index as usize; + assert!( + index < self.lines.len(), + "Can't be allocated from not yet created line" + ); + { + let line = &mut self.lines[index]; + line.free += block.size(); + } + block.dispose(); + + self.cleanup(device, 1) + } +} diff --git a/rendy-memory/src/allocator/mod.rs b/rendy-memory/src/allocator/mod.rs new file mode 100644 index 0000000..770a8a8 --- /dev/null +++ b/rendy-memory/src/allocator/mod.rs @@ -0,0 +1,50 @@ +//! This module provides `Allocator` trait and few allocators that implements the trait. + +mod dedicated; +mod dynamic; +mod linear; + +use crate::block::Block; + +pub use self::{ + dedicated::{DedicatedAllocator, DedicatedBlock}, + dynamic::{DynamicAllocator, DynamicBlock, DynamicConfig}, + linear::{LinearAllocator, LinearBlock, LinearConfig}, +}; + +/// Allocator kind. +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] +pub enum Kind { + /// Memory object per allocation. + Dedicated, + + /// General purpose allocator. + Dynamic, + + /// Allocates linearly. + /// Fast and low overhead. + /// Suitable for one-time-use allocations. + Linear, +} + +/// Allocator trait implemented for various allocators. +pub trait Allocator<B: gfx_hal::Backend> { + /// Block type returned by allocator. + type Block: Block<B>; + + /// Get allocator kind. + fn kind() -> Kind; + + /// Allocate block of memory. + /// On success returns allocated block and amount of memory consumed from device. + fn alloc( + &mut self, + device: &B::Device, + size: u64, + align: u64, + ) -> Result<(Self::Block, u64), gfx_hal::device::AllocationError>; + + /// Free block of memory. + /// Returns amount of memory returned to the device. + fn free(&mut self, device: &B::Device, block: Self::Block) -> u64; +} diff --git a/rendy-memory/src/block.rs b/rendy-memory/src/block.rs new file mode 100644 index 0000000..23ff4f4 --- /dev/null +++ b/rendy-memory/src/block.rs @@ -0,0 +1,36 @@ +use std::ops::Range; + +use crate::mapping::MappedRange; + +/// Block that owns a `Range` of the `Memory`. +/// Implementor must ensure that there can't be any other blocks +/// with overlapping range (either through type system or safety notes for unsafe functions). +/// Provides access to safe memory range mapping. +pub trait Block<B: gfx_hal::Backend> { + /// Get memory properties of the block. + fn properties(&self) -> gfx_hal::memory::Properties; + + /// Get raw memory object. + fn memory(&self) -> &B::Memory; + + /// Get memory range owned by this block. + fn range(&self) -> Range<u64>; + + /// Get size of the block. + fn size(&self) -> u64 { + let range = self.range(); + range.end - range.start + } + + /// Get mapping for the buffer range. + /// Memory writes to the region performed by device become available for the host. + fn map<'a>( + &'a mut self, + device: &B::Device, + range: Range<u64>, + ) -> Result<MappedRange<'a, B>, gfx_hal::device::MapError>; + + /// Release memory mapping. Must be called after successful `map` call. + /// No-op if block is not mapped. + fn unmap(&mut self, device: &B::Device); +} diff --git a/rendy-memory/src/heaps/heap.rs b/rendy-memory/src/heaps/heap.rs new file mode 100644 index 0000000..6595cbc --- /dev/null +++ b/rendy-memory/src/heaps/heap.rs @@ -0,0 +1,49 @@ +use crate::utilization::*; + +#[derive(Debug)] +pub(super) struct MemoryHeap { + size: u64, + used: u64, + effective: u64, +} + +impl MemoryHeap { + pub(super) fn new(size: u64) -> Self { + MemoryHeap { + size, + used: 0, + effective: 0, + } + } + + pub(super) fn available(&self) -> u64 { + if self.used > self.size { + log::warn!("Heap size exceeded"); + 0 + } else { + self.size - self.used + } + } + + pub(super) fn allocated(&mut self, used: u64, effective: u64) { + self.used += used; + self.effective += effective; + debug_assert!(self.used >= self.effective); + } + + pub(super) fn freed(&mut self, used: u64, effective: u64) { + self.used -= used; + self.effective -= effective; + debug_assert!(self.used >= self.effective); + } + + pub(super) fn utilization(&self) -> MemoryHeapUtilization { + MemoryHeapUtilization { + utilization: MemoryUtilization { + used: self.used, + effective: self.effective, + }, + size: self.size, + } + } +} diff --git a/rendy-memory/src/heaps/memory_type.rs b/rendy-memory/src/heaps/memory_type.rs new file mode 100644 index 0000000..e3c2e72 --- /dev/null +++ b/rendy-memory/src/heaps/memory_type.rs @@ -0,0 +1,158 @@ +use { + super::{BlockFlavor, HeapsConfig}, + crate::{allocator::*, usage::MemoryUsage, utilization::*}, + gfx_hal::memory::Properties, +}; + +#[derive(Debug)] +pub(super) struct MemoryType<B: gfx_hal::Backend> { + heap_index: usize, + properties: Properties, + dedicated: DedicatedAllocator, + linear: Option<LinearAllocator<B>>, + dynamic: Option<DynamicAllocator<B>>, + // chunk: Option<ChunkAllocator>, + used: u64, + effective: u64, +} + +impl<B> MemoryType<B> +where + B: gfx_hal::Backend, +{ + pub(super) fn new( + memory_type: gfx_hal::MemoryTypeId, + heap_index: usize, + properties: Properties, + config: HeapsConfig, + non_coherent_atom_size: u64, + ) -> Self { + MemoryType { + properties, + heap_index, + dedicated: DedicatedAllocator::new(memory_type, properties, non_coherent_atom_size), + linear: if properties.contains(Properties::CPU_VISIBLE) { + config.linear.map(|config| { + LinearAllocator::new(memory_type, properties, config, non_coherent_atom_size) + }) + } else { + None + }, + dynamic: config.dynamic.map(|config| { + DynamicAllocator::new(memory_type, properties, config, non_coherent_atom_size) + }), + used: 0, + effective: 0, + } + } + + pub(super) fn properties(&self) -> Properties { + self.properties + } + + pub(super) fn heap_index(&self) -> usize { + self.heap_index + } + + pub(super) fn alloc( + &mut self, + device: &B::Device, + usage: impl MemoryUsage, + size: u64, + align: u64, + ) -> Result<(BlockFlavor<B>, u64), gfx_hal::device::AllocationError> { + let (block, allocated) = self.alloc_impl(device, usage, size, align)?; + self.effective += block.size(); + self.used += allocated; + Ok((block, allocated)) + } + + fn alloc_impl( + &mut self, + device: &B::Device, + usage: impl MemoryUsage, + size: u64, + align: u64, + ) -> Result<(BlockFlavor<B>, u64), gfx_hal::device::AllocationError> { + match (self.dynamic.as_mut(), self.linear.as_mut()) { + (Some(dynamic), Some(linear)) => { + if dynamic.max_allocation() >= size + && usage.allocator_fitness(Kind::Dynamic) + > usage.allocator_fitness(Kind::Linear) + { + dynamic + .alloc(device, size, align) + .map(|(block, size)| (BlockFlavor::Dynamic(block), size)) + } else if linear.max_allocation() >= size + && usage.allocator_fitness(Kind::Linear) > 0 + { + linear + .alloc(device, size, align) + .map(|(block, size)| (BlockFlavor::Linear(block), size)) + } else { + self.dedicated + .alloc(device, size, align) + .map(|(block, size)| (BlockFlavor::Dedicated(block), size)) + } + } + (Some(dynamic), None) => { + if dynamic.max_allocation() >= size && usage.allocator_fitness(Kind::Dynamic) > 0 { + dynamic + .alloc(device, size, align) + .map(|(block, size)| (BlockFlavor::Dynamic(block), size)) + } else { + self.dedicated + .alloc(device, size, align) + .map(|(block, size)| (BlockFlavor::Dedicated(block), size)) + } + } + (None, Some(linear)) => { + if linear.max_allocation() >= size && usage.allocator_fitness(Kind::Linear) > 0 { + linear + .alloc(device, size, align) + .map(|(block, size)| (BlockFlavor::Linear(block), size)) + } else { + self.dedicated + .alloc(device, size, align) + .map(|(block, size)| (BlockFlavor::Dedicated(block), size)) + } + } + (None, None) => self + .dedicated + .alloc(device, size, align) + .map(|(block, size)| (BlockFlavor::Dedicated(block), size)), + } + } + + pub(super) fn free(&mut self, device: &B::Device, block: BlockFlavor<B>) -> u64 { + match block { + BlockFlavor::Dedicated(block) => self.dedicated.free(device, block), + BlockFlavor::Linear(block) => self.linear.as_mut().unwrap().free(device, block), + BlockFlavor::Dynamic(block) => self.dynamic.as_mut().unwrap().free(device, block), + } + } + + pub(super) fn dispose(self, device: &B::Device) { + log::trace!("Dispose memory allocators"); + + if let Some(linear) = self.linear { + linear.dispose(device); + log::trace!("Linear allocator disposed"); + } + if let Some(dynamic) = self.dynamic { + dynamic.dispose(); + log::trace!("Dynamic allocator disposed"); + } + } + + pub(super) fn utilization(&self) -> MemoryTypeUtilization { + MemoryTypeUtilization { + utilization: MemoryUtilization { + used: self.used, + effective: self.effective, + }, + properties: self.properties, + heap_index: self.heap_index, + } + } +} diff --git a/rendy-memory/src/heaps/mod.rs b/rendy-memory/src/heaps/mod.rs new file mode 100644 index 0000000..0dd7983 --- /dev/null +++ b/rendy-memory/src/heaps/mod.rs @@ -0,0 +1,327 @@ +mod heap; +mod memory_type; + +use { + self::{heap::MemoryHeap, memory_type::MemoryType}, + crate::{allocator::*, block::Block, mapping::*, usage::MemoryUsage, util::*, utilization::*}, + std::ops::Range, +}; + +/// Possible errors returned by `Heaps`. +#[allow(missing_copy_implementations)] +#[derive(Clone, Debug, PartialEq)] +pub enum HeapsError { + /// Memory allocation failure. + AllocationError(gfx_hal::device::AllocationError), + /// No memory types among required for resource with requested properties was found. + NoSuitableMemory(u32, gfx_hal::memory::Properties), +} + +impl std::fmt::Display for HeapsError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + HeapsError::AllocationError(e) => write!(f, "{:?}", e), + HeapsError::NoSuitableMemory(e, e2) => write!( + f, + "Memory type among ({}) with properties ({:?}) not found", + e, e2 + ), + } + } +} +impl std::error::Error for HeapsError {} + +impl From<gfx_hal::device::AllocationError> for HeapsError { + fn from(error: gfx_hal::device::AllocationError) -> Self { + HeapsError::AllocationError(error) + } +} + +impl From<gfx_hal::device::OutOfMemory> for HeapsError { + fn from(error: gfx_hal::device::OutOfMemory) -> Self { + HeapsError::AllocationError(error.into()) + } +} + +/// Config for `Heaps` allocator. +#[derive(Clone, Copy, Debug)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub struct HeapsConfig { + /// Config for linear sub-allocator. + pub linear: Option<LinearConfig>, + + /// Config for dynamic sub-allocator. + pub dynamic: Option<DynamicConfig>, +} + +/// Heaps available on particular physical device. +#[derive(Debug)] +pub struct Heaps<B: gfx_hal::Backend> { + types: Vec<MemoryType<B>>, + heaps: Vec<MemoryHeap>, +} + +impl<B> Heaps<B> +where + B: gfx_hal::Backend, +{ + /// This must be called with `gfx_hal::memory::Properties` fetched from physical device. + pub unsafe fn new<P, H>(types: P, heaps: H, non_coherent_atom_size: u64) -> Self + where + P: IntoIterator<Item = (gfx_hal::memory::Properties, u32, HeapsConfig)>, + H: IntoIterator<Item = u64>, + { + let heaps = heaps.into_iter().map(MemoryHeap::new).collect::<Vec<_>>(); + Heaps { + types: types + .into_iter() + .enumerate() + .map(|(index, (properties, heap_index, config))| { + assert!( + fits_u32(index), + "Number of memory types must fit in u32 limit" + ); + assert!( + fits_usize(heap_index), + "Number of memory types must fit in u32 limit" + ); + let memory_type = gfx_hal::MemoryTypeId(index); + let heap_index = heap_index as usize; + assert!(heap_index < heaps.len()); + MemoryType::new( + memory_type, + heap_index, + properties, + config, + non_coherent_atom_size, + ) + }) + .collect(), + heaps, + } + } + + /// Allocate memory block + /// from one of memory types specified by `mask`, + /// for intended `usage`, + /// with `size` + /// and `align` requirements. + pub fn allocate( + &mut self, + device: &B::Device, + mask: u32, + usage: impl MemoryUsage, + size: u64, + align: u64, + ) -> Result<MemoryBlock<B>, HeapsError> { + debug_assert!(fits_u32(self.types.len())); + + let (memory_index, _, _) = { + let suitable_types = self + .types + .iter() + .enumerate() + .filter(|(index, _)| (mask & (1u32 << index)) != 0) + .filter_map(|(index, mt)| { + if mt.properties().contains(usage.properties_required()) { + let fitness = usage.memory_fitness(mt.properties()); + Some((index, mt, fitness)) + } else { + None + } + }) + .collect::<smallvec::SmallVec<[_; 64]>>(); + + if suitable_types.is_empty() { + return Err(HeapsError::NoSuitableMemory( + mask, + usage.properties_required(), + )); + } + + suitable_types + .into_iter() + .filter(|(_, mt, _)| self.heaps[mt.heap_index()].available() > size + align) + .max_by_key(|&(_, _, fitness)| fitness) + .ok_or_else(|| { + log::error!("All suitable heaps are exhausted. {:#?}", self); + gfx_hal::device::OutOfMemory::Device + })? + }; + + self.allocate_from(device, memory_index as u32, usage, size, align) + } + + /// Allocate memory block + /// from `memory_index` specified, + /// for intended `usage`, + /// with `size` + /// and `align` requirements. + fn allocate_from( + &mut self, + device: &B::Device, + memory_index: u32, + usage: impl MemoryUsage, + size: u64, + align: u64, + ) -> Result<MemoryBlock<B>, HeapsError> { + log::trace!( + "Allocate memory block: type '{}', usage '{:#?}', size: '{}', align: '{}'", + memory_index, + usage, + size, + align + ); + assert!(fits_usize(memory_index)); + + let memory_type = &mut self.types[memory_index as usize]; + let memory_heap = &mut self.heaps[memory_type.heap_index()]; + + if memory_heap.available() < size { + return Err(gfx_hal::device::OutOfMemory::Device.into()); + } + + let (block, allocated) = memory_type.alloc(device, usage, size, align)?; + memory_heap.allocated(allocated, block.size()); + + Ok(MemoryBlock { + block, + memory_index, + }) + } + + /// Free memory block. + /// + /// Memory block must be allocated from this heap. + pub fn free(&mut self, device: &B::Device, block: MemoryBlock<B>) { + // trace!("Free block '{:#?}'", block); + let memory_index = block.memory_index; + debug_assert!(fits_usize(memory_index)); + let size = block.size(); + + let memory_type = &mut self.types[memory_index as usize]; + let memory_heap = &mut self.heaps[memory_type.heap_index()]; + let freed = memory_type.free(device, block.block); + memory_heap.freed(freed, size); + } + + /// Dispose of allocator. + /// Cleanup allocators before dropping. + /// Will panic if memory instances are left allocated. + pub fn dispose(self, device: &B::Device) { + for mt in self.types { + mt.dispose(device) + } + } + + /// Get memory utilization. + pub fn utilization(&self) -> TotalMemoryUtilization { + TotalMemoryUtilization { + heaps: self.heaps.iter().map(MemoryHeap::utilization).collect(), + types: self.types.iter().map(MemoryType::utilization).collect(), + } + } +} + +/// Memory block allocated from `Heaps`. +#[derive(Debug)] +pub struct MemoryBlock<B: gfx_hal::Backend> { + block: BlockFlavor<B>, + memory_index: u32, +} + +impl<B> MemoryBlock<B> +where + B: gfx_hal::Backend, +{ + /// Get memory type id. + pub fn memory_type(&self) -> u32 { + self.memory_index + } +} + +#[derive(Debug)] +enum BlockFlavor<B: gfx_hal::Backend> { + Dedicated(DedicatedBlock<B>), + Linear(LinearBlock<B>), + Dynamic(DynamicBlock<B>), + // Chunk(ChunkBlock<B>), +} + +macro_rules! any_block { + ($self:ident. $block:ident => $expr:expr) => {{ + use self::BlockFlavor::*; + match $self.$block { + Dedicated($block) => $expr, + Linear($block) => $expr, + Dynamic($block) => $expr, + // Chunk($block) => $expr, + } + }}; + (& $self:ident. $block:ident => $expr:expr) => {{ + use self::BlockFlavor::*; + match &$self.$block { + Dedicated($block) => $expr, + Linear($block) => $expr, + Dynamic($block) => $expr, + // Chunk($block) => $expr, + } + }}; + (&mut $self:ident. $block:ident => $expr:expr) => {{ + use self::BlockFlavor::*; + match &mut $self.$block { + Dedicated($block) => $expr, + Linear($block) => $expr, + Dynamic($block) => $expr, + // Chunk($block) => $expr, + } + }}; +} + +impl<B> BlockFlavor<B> +where + B: gfx_hal::Backend, +{ + #[inline] + fn size(&self) -> u64 { + use self::BlockFlavor::*; + match self { + Dedicated(block) => block.size(), + Linear(block) => block.size(), + Dynamic(block) => block.size(), + // Chunk(block) => block.size(), + } + } +} + +impl<B> Block<B> for MemoryBlock<B> +where + B: gfx_hal::Backend, +{ + #[inline] + fn properties(&self) -> gfx_hal::memory::Properties { + any_block!(&self.block => block.properties()) + } + + #[inline] + fn memory(&self) -> &B::Memory { + any_block!(&self.block => block.memory()) + } + + #[inline] + fn range(&self) -> Range<u64> { + any_block!(&self.block => block.range()) + } + + fn map<'a>( + &'a mut self, + device: &B::Device, + range: Range<u64>, + ) -> Result<MappedRange<'a, B>, gfx_hal::device::MapError> { + any_block!(&mut self.block => block.map(device, range)) + } + + fn unmap(&mut self, device: &B::Device) { + any_block!(&mut self.block => block.unmap(device)) + } +} diff --git a/rendy-memory/src/lib.rs b/rendy-memory/src/lib.rs new file mode 100644 index 0000000..a0653ee --- /dev/null +++ b/rendy-memory/src/lib.rs @@ -0,0 +1,31 @@ +//! GPU memory management +//! + +#![warn( + missing_debug_implementations, + missing_copy_implementations, + missing_docs, + trivial_casts, + trivial_numeric_casts, + unused_extern_crates, + unused_import_braces, + unused_qualifications +)] +mod allocator; +mod block; +mod heaps; +mod mapping; +mod memory; +mod usage; +mod util; +mod utilization; + +pub use crate::{ + allocator::*, + block::Block, + heaps::{Heaps, HeapsConfig, HeapsError, MemoryBlock}, + mapping::{write::Write, Coherent, MappedRange, MaybeCoherent, NonCoherent}, + memory::Memory, + usage::*, + utilization::*, +}; diff --git a/rendy-memory/src/mapping/mod.rs b/rendy-memory/src/mapping/mod.rs new file mode 100644 index 0000000..63b2f34 --- /dev/null +++ b/rendy-memory/src/mapping/mod.rs @@ -0,0 +1,345 @@ +mod range; +pub(crate) mod write; + +use { + crate::{memory::Memory, util::*}, + gfx_hal::{device::Device as _, Backend}, + std::{ops::Range, ptr::NonNull}, +}; + +pub(crate) use self::range::*; +use self::write::{Write, WriteCoherent, WriteFlush}; + +/// Non-coherent marker. +#[derive(Clone, Copy, Debug)] +pub struct NonCoherent; + +/// Coherent marker. +#[derive(Clone, Copy, Debug)] +pub struct Coherent; + +/// Value that contains either coherent marker or non-coherent marker. +#[derive(Clone, Copy, Debug)] +pub struct MaybeCoherent(bool); + +/// Represents range of the memory mapped to the host. +/// Provides methods for safer host access to the memory. +#[derive(Debug)] +pub struct MappedRange<'a, B: Backend, C = MaybeCoherent> { + /// Memory object that is mapped. + memory: &'a Memory<B>, + + /// Pointer to range mapped memory. + ptr: NonNull<u8>, + + /// Range of mapped memory. + mapping_range: Range<u64>, + + /// Mapping range requested by caller. + /// Must be subrange of `mapping_range`. + requested_range: Range<u64>, + + /// Coherency marker + coherent: C, +} + +impl<'a, B> MappedRange<'a, B> +where + B: Backend, +{ + // /// Map range of memory. + // /// `range` is in memory object space. + // /// + // /// # Safety + // /// + // /// * Only one range for the given memory object can be mapped. + // /// * Memory object must be not mapped. + // /// * Memory object must be created with device specified. + // pub unsafe fn new( + // memory: &'a Memory<B>, + // device: &B::Device, + // range: Range<u64>, + // ) -> Result<Self, gfx_hal::device::MapError> { + // assert!( + // range.start < range.end, + // "Memory mapping region must have valid size" + // ); + // assert!( + // fits_usize(range.end - range.start), + // "Range length must fit in usize" + // ); + // assert!(memory.host_visible()); + + // let ptr = device.map_memory(memory.raw(), range.clone())?; + // assert!( + // (ptr as usize).wrapping_neg() >= (range.end - range.start) as usize, + // "Resulting pointer value + range length must fit in usize. Pointer: {:p}, range {:?}", + // ptr, + // range, + // ); + + // Ok(Self::from_raw(memory, NonNull::new_unchecked(ptr), range)) + // } + + /// Construct mapped range from raw mapping + /// + /// # Safety + /// + /// `memory` `range` must be mapped to host memory region pointer by `ptr`. + /// `range` is in memory object space. + /// `ptr` points to the `range.start` offset from memory origin. + pub(crate) unsafe fn from_raw( + memory: &'a Memory<B>, + ptr: NonNull<u8>, + mapping_range: Range<u64>, + requested_range: Range<u64>, + ) -> Self { + debug_assert!( + mapping_range.start < mapping_range.end, + "Memory mapping region must have valid size" + ); + + debug_assert!( + requested_range.start < requested_range.end, + "Memory mapping region must have valid size" + ); + + if !memory.host_coherent() { + debug_assert_eq!(mapping_range.start % memory.non_coherent_atom_size(), 0, "Bounds of non-coherent memory mapping ranges must be multiple of `Limits::non_coherent_atom_size`"); + debug_assert_eq!(mapping_range.end % memory.non_coherent_atom_size(), 0, "Bounds of non-coherent memory mapping ranges must be multiple of `Limits::non_coherent_atom_size`"); + debug_assert!( + is_sub_range(mapping_range.clone(), requested_range.clone()), + "`requested_range` must be sub-range of `mapping_range`", + ); + } else { + debug_assert_eq!(mapping_range, requested_range); + } + + MappedRange { + ptr, + mapping_range, + requested_range, + memory, + coherent: MaybeCoherent(memory.host_coherent()), + } + } + + /// Get pointer to beginning of memory region. + /// i.e. to `range().start` offset from memory origin. + pub fn ptr(&self) -> NonNull<u8> { + mapped_sub_range( + self.ptr, + self.mapping_range.clone(), + self.requested_range.clone(), + ) + .unwrap() + } + + /// Get mapped range. + pub fn range(&self) -> Range<u64> { + self.requested_range.clone() + } + + /// Fetch readable slice of sub-range to be read. + /// Invalidating range if memory is not coherent. + /// `range.end - range.start` must be multiple of `size_of::()`. + /// `mapping offset + range.start` must be multiple of `align_of::()`. + /// + /// # Safety + /// + /// * Caller must ensure that device won't write to the memory region until the borrowing ends. + /// * `T` Must be plain-old-data type compatible with data in mapped region. + pub unsafe fn read<'b, T>( + &'b mut self, + device: &B::Device, + range: Range<u64>, + ) -> Result<&'b [T], gfx_hal::device::MapError> + where + 'a: 'b, + T: Copy, + { + debug_assert!( + range.start < range.end, + "Memory mapping region must have valid size" + ); + debug_assert!( + fits_usize(range.end - range.start), + "Range length must fit in usize" + ); + + let sub_range = relative_to_sub_range(self.requested_range.clone(), range) + .ok_or(gfx_hal::device::MapError::OutOfBounds)?; + + let ptr = + mapped_sub_range(self.ptr, self.mapping_range.clone(), sub_range.clone()).unwrap(); + + let size = (sub_range.end - sub_range.start) as usize; + + if !self.coherent.0 { + let aligned_sub_range = align_range(sub_range, self.memory.non_coherent_atom_size()); + debug_assert!(is_sub_range( + self.mapping_range.clone(), + aligned_sub_range.clone() + )); + device.invalidate_mapped_memory_ranges(std::iter::once(( + self.memory.raw(), + gfx_hal::memory::Segment { + offset: aligned_sub_range.start, + size: Some(aligned_sub_range.end - aligned_sub_range.start), + }, + )))?; + } + + let slice = mapped_slice::<T>(ptr, size); + Ok(slice) + } + + /// Fetch writer to the sub-region. + /// This writer will flush data on drop if written at least once. + /// + /// # Safety + /// + /// * Caller must ensure that device won't write to or read from the memory region. + pub unsafe fn write<'b, T: 'b>( + &'b mut self, + device: &'b B::Device, + range: Range<u64>, + ) -> Result<impl Write<T> + 'b, gfx_hal::device::MapError> + where + 'a: 'b, + T: Copy, + { + assert!( + range.start < range.end, + "Memory mapping region must have valid size" + ); + assert!( + fits_usize(range.end - range.start), + "Range length must fit in usize" + ); + + let sub_range = relative_to_sub_range(self.requested_range.clone(), range) + .ok_or(gfx_hal::device::MapError::OutOfBounds)?; + + let ptr = + mapped_sub_range(self.ptr, self.mapping_range.clone(), sub_range.clone()).unwrap(); + + let size = (sub_range.end - sub_range.start) as usize; + + let slice = mapped_slice_mut::<T>(ptr, size); + + let memory = &self.memory; + let flush = if !self.coherent.0 { + let aligned_sub_range = align_range(sub_range, self.memory.non_coherent_atom_size()); + debug_assert!(is_sub_range( + self.mapping_range.clone(), + aligned_sub_range.clone() + )); + Some(move || { + device + .flush_mapped_memory_ranges(std::iter::once(( + memory.raw(), + gfx_hal::memory::Segment { + offset: aligned_sub_range.start, + size: Some(aligned_sub_range.end - aligned_sub_range.start), + }, + ))) + .expect("Should flush successfully"); + }) + } else { + None + }; + + Ok(WriteFlush { slice, flush }) + } + + /// Convert into mapped range with statically known coherency. + pub fn coherent(self) -> Result<MappedRange<'a, B, Coherent>, MappedRange<'a, B, NonCoherent>> { + if self.coherent.0 { + Ok(MappedRange { + memory: self.memory, + ptr: self.ptr, + mapping_range: self.mapping_range, + requested_range: self.requested_range, + coherent: Coherent, + }) + } else { + Err(MappedRange { + memory: self.memory, + ptr: self.ptr, + mapping_range: self.mapping_range, + requested_range: self.requested_range, + coherent: NonCoherent, + }) + } + } +} + +impl<'a, B> From<MappedRange<'a, B, Coherent>> for MappedRange<'a, B> +where + B: Backend, +{ + fn from(range: MappedRange<'a, B, Coherent>) -> Self { + MappedRange { + memory: range.memory, + ptr: range.ptr, + mapping_range: range.mapping_range, + requested_range: range.requested_range, + coherent: MaybeCoherent(true), + } + } +} + +impl<'a, B> From<MappedRange<'a, B, NonCoherent>> for MappedRange<'a, B> +where + B: Backend, +{ + fn from(range: MappedRange<'a, B, NonCoherent>) -> Self { + MappedRange { + memory: range.memory, + ptr: range.ptr, + mapping_range: range.mapping_range, + requested_range: range.requested_range, + coherent: MaybeCoherent(false), + } + } +} + +impl<'a, B> MappedRange<'a, B, Coherent> +where + B: Backend, +{ + /// Fetch writer to the sub-region. + /// + /// # Safety + /// + /// * Caller must ensure that device won't write to or read from the memory region. + pub unsafe fn write<'b, U: 'b>( + &'b mut self, + range: Range<u64>, + ) -> Result<impl Write<U> + 'b, gfx_hal::device::MapError> + where + U: Copy, + { + assert!( + range.start < range.end, + "Memory mapping region must have valid size" + ); + assert!( + fits_usize(range.end - range.start), + "Range length must fit in usize" + ); + + let sub_range = relative_to_sub_range(self.requested_range.clone(), range) + .ok_or(gfx_hal::device::MapError::OutOfBounds)?; + + let ptr = + mapped_sub_range(self.ptr, self.mapping_range.clone(), sub_range.clone()).unwrap(); + + let size = (sub_range.end - sub_range.start) as usize; + + let slice = mapped_slice_mut::<U>(ptr, size); + + Ok(WriteCoherent { slice }) + } +} diff --git a/rendy-memory/src/mapping/range.rs b/rendy-memory/src/mapping/range.rs new file mode 100644 index 0000000..f4c49be --- /dev/null +++ b/rendy-memory/src/mapping/range.rs @@ -0,0 +1,85 @@ +use { + crate::util::fits_usize, + std::{ + mem::{align_of, size_of}, + ops::Range, + ptr::NonNull, + slice::{from_raw_parts, from_raw_parts_mut}, + }, +}; + +/// Get sub-range of memory mapping. +/// `range` and `fitting` are in memory object space. +/// `ptr` points to the `range.start` offset from memory origin. +/// returns pointer to `fitting.start` offset from memory origin +/// if `fitting` is contained in `range`. +pub(crate) fn mapped_sub_range( + ptr: NonNull<u8>, + range: Range<u64>, + fitting: Range<u64>, +) -> Option<NonNull<u8>> { + assert!( + range.start < range.end, + "Memory mapping region must have valid size" + ); + assert!( + fitting.start < fitting.end, + "Memory mapping region must have valid size" + ); + assert!(fits_usize(range.end - range.start)); + assert!(usize::max_value() - (range.end - range.start) as usize >= ptr.as_ptr() as usize); + + if fitting.start < range.start || fitting.end > range.end { + None + } else { + Some(unsafe { + // for x > 0 and y >= 0: x + y > 0. No overflow due to checks above. + NonNull::new_unchecked( + (ptr.as_ptr() as usize + (fitting.start - range.start) as usize) as *mut u8, + ) + }) + } +} + +/// # Safety +/// +/// User must ensure that: +/// * this function won't create aliasing slices. +/// * returned slice doesn't outlive mapping. +/// * `T` Must be plain-old-data type compatible with data in mapped region. +pub(crate) unsafe fn mapped_slice_mut<'a, T>(ptr: NonNull<u8>, size: usize) -> &'a mut [T] { + assert_eq!( + size % size_of::<T>(), + 0, + "Range length must be multiple of element size" + ); + let offset = ptr.as_ptr() as usize; + assert_eq!( + offset % align_of::<T>(), + 0, + "Range offset must be multiple of element alignment" + ); + assert!(usize::max_value() - size >= ptr.as_ptr() as usize); + from_raw_parts_mut(ptr.as_ptr() as *mut T, size) +} + +/// # Safety +/// +/// User must ensure that: +/// * returned slice doesn't outlive mapping. +/// * `T` Must be plain-old-data type compatible with data in mapped region. +pub(crate) unsafe fn mapped_slice<'a, T>(ptr: NonNull<u8>, size: usize) -> &'a [T] { + assert_eq!( + size % size_of::<T>(), + 0, + "Range length must be multiple of element size" + ); + let offset = ptr.as_ptr() as usize; + assert_eq!( + offset % align_of::<T>(), + 0, + "Range offset must be multiple of element alignment" + ); + assert!(usize::max_value() - size >= ptr.as_ptr() as usize); + from_raw_parts(ptr.as_ptr() as *const T, size) +} diff --git a/rendy-memory/src/mapping/write.rs b/rendy-memory/src/mapping/write.rs new file mode 100644 index 0000000..d067a61 --- /dev/null +++ b/rendy-memory/src/mapping/write.rs @@ -0,0 +1,73 @@ +use std::ptr::copy_nonoverlapping; + +/// Trait for memory region suitable for host writes. +pub trait Write<T: Copy> { + /// Get mutable slice of `T` bound to mapped range. + /// + /// # Safety + /// + /// * Returned slice should not be read. + unsafe fn slice(&mut self) -> &mut [T]; + + /// Write data into mapped memory sub-region. + /// + /// # Panic + /// + /// Panics if `data.len()` is greater than this sub-region len. + fn write(&mut self, data: &[T]) { + unsafe { + let slice = self.slice(); + assert!(data.len() <= slice.len()); + copy_nonoverlapping(data.as_ptr(), slice.as_mut_ptr(), data.len()); + } + } +} + +#[derive(Debug)] +pub(super) struct WriteFlush<'a, T, F: FnOnce() + 'a> { + pub(super) slice: &'a mut [T], + pub(super) flush: Option<F>, +} + +impl<'a, T, F> Drop for WriteFlush<'a, T, F> +where + T: 'a, + F: FnOnce() + 'a, +{ + fn drop(&mut self) { + if let Some(f) = self.flush.take() { + f(); + } + } +} + +impl<'a, T, F> Write<T> for WriteFlush<'a, T, F> +where + T: Copy + 'a, + F: FnOnce() + 'a, +{ + /// # Safety + /// + /// [See doc comment for trait method](trait.Write#method.slice) + unsafe fn slice(&mut self) -> &mut [T] { + self.slice + } +} + +#[warn(dead_code)] +#[derive(Debug)] +pub(super) struct WriteCoherent<'a, T> { + pub(super) slice: &'a mut [T], +} + +impl<'a, T> Write<T> for WriteCoherent<'a, T> +where + T: Copy + 'a, +{ + /// # Safety + /// + /// [See doc comment for trait method](trait.Write#method.slice) + unsafe fn slice(&mut self) -> &mut [T] { + self.slice + } +} diff --git a/rendy-memory/src/memory.rs b/rendy-memory/src/memory.rs new file mode 100644 index 0000000..a529efe --- /dev/null +++ b/rendy-memory/src/memory.rs @@ -0,0 +1,98 @@ +// use std::fmt; + +/// Memory object wrapper. +/// Contains size and properties of the memory. +#[derive(Debug)] +pub struct Memory<B: gfx_hal::Backend> { + raw: B::Memory, + size: u64, + properties: gfx_hal::memory::Properties, + non_coherent_atom_size: u64, + relevant: relevant::Relevant, +} + +impl<B> Memory<B> +where + B: gfx_hal::Backend, +{ + /// Get memory properties. + pub fn properties(&self) -> gfx_hal::memory::Properties { + self.properties + } + + /// Get memory size. + pub fn size(&self) -> u64 { + self.size + } + + /// Get raw memory. + pub fn raw(&self) -> &B::Memory { + &self.raw + } + + /// Get raw memory mutably. + pub fn raw_mut(&mut self) -> &mut B::Memory { + &mut self.raw + } + + /// Unwrap raw memory. + pub fn into_raw(self) -> B::Memory { + self.relevant.dispose(); + self.raw + } + + pub(crate) fn non_coherent_atom_size(&self) -> u64 { + debug_assert!( + self.host_visible() && !self.host_coherent(), + "Irrelevent and shouldn't be called", + ); + self.non_coherent_atom_size + } + + /// Create memory from raw object. + /// + /// # Safety + /// + /// TODO: + pub unsafe fn from_raw( + raw: B::Memory, + size: u64, + properties: gfx_hal::memory::Properties, + non_coherent_atom_size: u64, + ) -> Self { + Memory { + properties, + raw, + size, + non_coherent_atom_size, + relevant: relevant::Relevant, + } + } + + /// Check if this memory is host-visible and can be mapped. + /// `memory.host_visible()` is equivalent to `memory.properties().contains(Properties::CPU_VISIBLE)` + pub fn host_visible(&self) -> bool { + self.properties + .contains(gfx_hal::memory::Properties::CPU_VISIBLE) + } + + /// Check if this memory is host-coherent and doesn't require invalidating or flushing. + /// `memory.host_coherent()` is equivalent to `memory.properties().contains(Properties::COHERENT)` + pub fn host_coherent(&self) -> bool { + self.properties + .contains(gfx_hal::memory::Properties::COHERENT) + } +} + +// pub(crate) fn memory_ptr_fmt( +// memory: &*const Memory, +// fmt: &mut fmt::Formatter<'_>, +// ) -> Result<(), fmt::Error> { +// unsafe { +// if fmt.alternate() { +// write!(fmt, "*const {:#?}", **memory) +// } else { +// write!(fmt, "*const {:?}", **memory) +// } +// } +// } diff --git a/rendy-memory/src/usage.rs b/rendy-memory/src/usage.rs new file mode 100644 index 0000000..a9a4012 --- /dev/null +++ b/rendy-memory/src/usage.rs @@ -0,0 +1,210 @@ +//! Defines usage types for memory bocks. +//! See `Usage` and implementations for details. + +use crate::allocator::Kind; + +/// Memory usage trait. +pub trait MemoryUsage: std::fmt::Debug { + /// Get set of properties required for the usage. + fn properties_required(&self) -> gfx_hal::memory::Properties; + + /// Get comparable fitness value for memory properties. + /// + /// # Panics + /// + /// This function will panic if properties set doesn't contain required properties. + fn memory_fitness(&self, properties: gfx_hal::memory::Properties) -> u32; + + /// Get comparable fitness value for memory allocator. + fn allocator_fitness(&self, kind: Kind) -> u32; +} + +impl<T> MemoryUsage for T +where + T: std::ops::Deref + std::fmt::Debug, + T::Target: MemoryUsage, +{ + fn properties_required(&self) -> gfx_hal::memory::Properties { + (&**self).properties_required() + } + fn memory_fitness(&self, properties: gfx_hal::memory::Properties) -> u32 { + (&**self).memory_fitness(properties) + } + fn allocator_fitness(&self, kind: Kind) -> u32 { + (&**self).allocator_fitness(kind) + } +} + +/// Full speed GPU access. +/// Optimal for render targets and persistent resources. +/// Avoid memory with host access. +#[derive(Clone, Copy, Debug)] +pub struct Data; + +impl MemoryUsage for Data { + fn properties_required(&self) -> gfx_hal::memory::Properties { + gfx_hal::memory::Properties::DEVICE_LOCAL + } + + #[inline] + fn memory_fitness(&self, properties: gfx_hal::memory::Properties) -> u32 { + assert!(properties.contains(gfx_hal::memory::Properties::DEVICE_LOCAL)); + 0 | ((!properties.contains(gfx_hal::memory::Properties::CPU_VISIBLE)) as u32) << 3 + | ((!properties.contains(gfx_hal::memory::Properties::LAZILY_ALLOCATED)) as u32) << 2 + | ((!properties.contains(gfx_hal::memory::Properties::CPU_CACHED)) as u32) << 1 + | ((!properties.contains(gfx_hal::memory::Properties::COHERENT)) as u32) << 0 + } + + fn allocator_fitness(&self, kind: Kind) -> u32 { + match kind { + Kind::Dedicated => 1, + Kind::Dynamic => 2, + Kind::Linear => 0, + } + } +} + +/// CPU to GPU data flow with update commands. +/// Used for dynamic buffer data, typically constant buffers. +/// Host access is guaranteed. +/// Prefers memory with fast GPU access. +#[derive(Clone, Copy, Debug)] +pub struct Dynamic; + +impl MemoryUsage for Dynamic { + fn properties_required(&self) -> gfx_hal::memory::Properties { + gfx_hal::memory::Properties::CPU_VISIBLE + } + + #[inline] + fn memory_fitness(&self, properties: gfx_hal::memory::Properties) -> u32 { + assert!(properties.contains(gfx_hal::memory::Properties::CPU_VISIBLE)); + assert!(!properties.contains(gfx_hal::memory::Properties::LAZILY_ALLOCATED)); + + 0 | (properties.contains(gfx_hal::memory::Properties::DEVICE_LOCAL) as u32) << 2 + | (properties.contains(gfx_hal::memory::Properties::COHERENT) as u32) << 1 + | ((!properties.contains(gfx_hal::memory::Properties::CPU_CACHED)) as u32) << 0 + } + + fn allocator_fitness(&self, kind: Kind) -> u32 { + match kind { + Kind::Dedicated => 1, + Kind::Dynamic => 2, + Kind::Linear => 0, + } + } +} + +/// CPU to GPU data flow with mapping. +/// Used for staging data before copying to the `Data` memory. +/// Host access is guaranteed. +#[derive(Clone, Copy, Debug)] +pub struct Upload; + +impl MemoryUsage for Upload { + fn properties_required(&self) -> gfx_hal::memory::Properties { + gfx_hal::memory::Properties::CPU_VISIBLE + } + + #[inline] + fn memory_fitness(&self, properties: gfx_hal::memory::Properties) -> u32 { + assert!(properties.contains(gfx_hal::memory::Properties::CPU_VISIBLE)); + assert!(!properties.contains(gfx_hal::memory::Properties::LAZILY_ALLOCATED)); + + 0 | ((!properties.contains(gfx_hal::memory::Properties::DEVICE_LOCAL)) as u32) << 2 + | (properties.contains(gfx_hal::memory::Properties::COHERENT) as u32) << 1 + | ((!properties.contains(gfx_hal::memory::Properties::CPU_CACHED)) as u32) << 0 + } + + fn allocator_fitness(&self, kind: Kind) -> u32 { + match kind { + Kind::Dedicated => 0, + Kind::Dynamic => 1, + Kind::Linear => 2, + } + } +} + +/// GPU to CPU data flow with mapping. +/// Used for copying data from `Data` memory to be read by the host. +/// Host access is guaranteed. +#[derive(Clone, Copy, Debug)] +pub struct Download; + +impl MemoryUsage for Download { + fn properties_required(&self) -> gfx_hal::memory::Properties { + gfx_hal::memory::Properties::CPU_VISIBLE + } + + #[inline] + fn memory_fitness(&self, properties: gfx_hal::memory::Properties) -> u32 { + assert!(properties.contains(gfx_hal::memory::Properties::CPU_VISIBLE)); + assert!(!properties.contains(gfx_hal::memory::Properties::LAZILY_ALLOCATED)); + + 0 | ((!properties.contains(gfx_hal::memory::Properties::DEVICE_LOCAL)) as u32) << 2 + | (properties.contains(gfx_hal::memory::Properties::CPU_CACHED) as u32) << 1 + | (properties.contains(gfx_hal::memory::Properties::COHERENT) as u32) << 0 + } + + fn allocator_fitness(&self, kind: Kind) -> u32 { + match kind { + Kind::Dedicated => 0, + Kind::Dynamic => 1, + Kind::Linear => 2, + } + } +} + +/// Well-known memory usage types. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum MemoryUsageValue { + /// See [`Data`] + /// + /// [`Data`]: struct.Data.html + Data, + + /// See [`Dynamic`] + /// + /// [`Dynamic`]: struct.Dynamic.html + Dynamic, + + /// See [`Upload`] + /// + /// [`Upload`]: struct.Upload.html + Upload, + + /// See [`Download`] + /// + /// [`Download`]: struct.Download.html + Download, +} + +/// Memory usage trait. +impl MemoryUsage for MemoryUsageValue { + fn properties_required(&self) -> gfx_hal::memory::Properties { + match self { + MemoryUsageValue::Data => Data.properties_required(), + MemoryUsageValue::Dynamic => Dynamic.properties_required(), + MemoryUsageValue::Upload => Upload.properties_required(), + MemoryUsageValue::Download => Download.properties_required(), + } + } + + fn memory_fitness(&self, properties: gfx_hal::memory::Properties) -> u32 { + match self { + MemoryUsageValue::Data => Data.memory_fitness(properties), + MemoryUsageValue::Dynamic => Dynamic.memory_fitness(properties), + MemoryUsageValue::Upload => Upload.memory_fitness(properties), + MemoryUsageValue::Download => Download.memory_fitness(properties), + } + } + + fn allocator_fitness(&self, kind: Kind) -> u32 { + match self { + MemoryUsageValue::Data => Data.allocator_fitness(kind), + MemoryUsageValue::Dynamic => Dynamic.allocator_fitness(kind), + MemoryUsageValue::Upload => Upload.allocator_fitness(kind), + MemoryUsageValue::Download => Download.allocator_fitness(kind), + } + } +} diff --git a/rendy-memory/src/util.rs b/rendy-memory/src/util.rs new file mode 100644 index 0000000..07b9b7c --- /dev/null +++ b/rendy-memory/src/util.rs @@ -0,0 +1,157 @@ +pub(crate) fn aligned(value: u64, align: u64) -> u64 { + debug_assert_ne!(align, 0); + debug_assert_eq!(align.count_ones(), 1); + if value == 0 { + 0 + } else { + 1u64 + ((value - 1u64) | (align - 1u64)) + } +} + +pub(crate) trait IntegerFitting { + fn fits_usize(self) -> bool; + fn fits_isize(self) -> bool; + + fn usize_fits(value: usize) -> bool; + fn isize_fits(value: isize) -> bool; +} + +#[cfg(any(target_pointer_width = "16", target_pointer_width = "32"))] +impl IntegerFitting for u64 { + fn fits_usize(self) -> bool { + self <= usize::max_value() as u64 + } + fn fits_isize(self) -> bool { + self <= isize::max_value() as u64 + } + fn usize_fits(_value: usize) -> bool { + true + } + fn isize_fits(value: isize) -> bool { + value >= 0 + } +} + +#[cfg(target_pointer_width = "64")] +impl IntegerFitting for u64 { + fn fits_usize(self) -> bool { + true + } + fn fits_isize(self) -> bool { + self <= isize::max_value() as u64 + } + fn usize_fits(_value: usize) -> bool { + true + } + fn isize_fits(value: isize) -> bool { + value >= 0 + } +} + +#[cfg(not(any( + target_pointer_width = "16", + target_pointer_width = "32", + target_pointer_width = "64" +)))] +impl IntegerFitting for u64 { + fn fits_usize(self) -> bool { + true + } + fn fits_isize(self) -> bool { + true + } + fn usize_fits(value: usize) -> bool { + value <= u64::max_value() as usize + } + fn isize_fits(value: isize) -> bool { + value >= 0 && value <= u64::max_value() as isize + } +} + +#[cfg(target_pointer_width = "16")] +impl IntegerFitting for u32 { + fn fits_usize(self) -> bool { + self <= usize::max_value() as u32 + } + fn fits_isize(self) -> bool { + self <= isize::max_value() as u32 + } + fn usize_fits(_value: usize) -> bool { + true + } + fn isize_fits(value: isize) -> bool { + value >= 0 + } +} + +#[cfg(target_pointer_width = "32")] +impl IntegerFitting for u32 { + fn fits_usize(self) -> bool { + true + } + fn fits_isize(self) -> bool { + self <= isize::max_value() as u32 + } + fn usize_fits(_value: usize) -> bool { + true + } + fn isize_fits(value: isize) -> bool { + value >= 0 + } +} + +#[cfg(not(any(target_pointer_width = "16", target_pointer_width = "32")))] +impl IntegerFitting for u32 { + fn fits_usize(self) -> bool { + true + } + fn fits_isize(self) -> bool { + true + } + fn usize_fits(value: usize) -> bool { + value <= u32::max_value() as usize + } + fn isize_fits(value: isize) -> bool { + value >= 0 && value <= u32::max_value() as isize + } +} + +pub(crate) fn fits_usize<T: IntegerFitting>(value: T) -> bool { + value.fits_usize() +} + +pub(crate) fn fits_u32(value: usize) -> bool { + u32::usize_fits(value) +} + +pub(crate) fn align_range(range: std::ops::Range<u64>, align: u64) -> std::ops::Range<u64> { + let start = range.start - range.start % align; + let end = ((range.end - 1) / align + 1) * align; + start..end +} + +pub(crate) fn align_size(size: u64, align: u64) -> u64 { + ((size - 1) / align + 1) * align +} + +pub(crate) fn is_non_coherent_visible(properties: gfx_hal::memory::Properties) -> bool { + properties & (gfx_hal::memory::Properties::CPU_VISIBLE | gfx_hal::memory::Properties::COHERENT) + == gfx_hal::memory::Properties::CPU_VISIBLE +} + +pub(crate) fn relative_to_sub_range( + range: std::ops::Range<u64>, + relative: std::ops::Range<u64>, +) -> Option<std::ops::Range<u64>> { + let start = relative.start + range.start; + let end = relative.end + range.start; + if end <= range.end { + Some(start..end) + } else { + None + } +} + +pub(crate) fn is_sub_range(range: std::ops::Range<u64>, sub: std::ops::Range<u64>) -> bool { + sub.start >= range.start && sub.end <= range.end +} diff --git a/rendy-memory/src/utilization.rs b/rendy-memory/src/utilization.rs new file mode 100644 index 0000000..723c429 --- /dev/null +++ b/rendy-memory/src/utilization.rs @@ -0,0 +1,137 @@ +use { + colorful::{core::color_string::CString, Color, Colorful as _}, + gfx_hal::memory::Properties, +}; + +/// Memory utilization stats. +#[derive(Clone, Copy, Debug)] +pub struct MemoryUtilization { + /// Total number of bytes allocated. + pub used: u64, + /// Effective number bytes allocated. + pub effective: u64, +} + +/// Memory utilization of one heap. +#[derive(Clone, Copy, Debug)] +pub struct MemoryHeapUtilization { + /// Utilization. + pub utilization: MemoryUtilization, + + /// Memory heap size. + pub size: u64, +} + +/// Memory utilization of one type. +#[derive(Clone, Copy, Debug)] +pub struct MemoryTypeUtilization { + /// Utilization. + pub utilization: MemoryUtilization, + + /// Memory type info. + pub properties: Properties, + + /// Index of heap this memory type uses. + pub heap_index: usize, +} + +/// Total memory utilization. +#[derive(Clone, Debug)] +pub struct TotalMemoryUtilization { + /// Utilization by types. + pub types: Vec<MemoryTypeUtilization>, + + /// Utilization by heaps. + pub heaps: Vec<MemoryHeapUtilization>, +} + +impl std::fmt::Display for TotalMemoryUtilization { + fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + const MB: u64 = 1024 * 1024; + + writeln!(fmt, "!!! Memory utilization !!!")?; + for (index, heap) in self.heaps.iter().enumerate() { + let size = heap.size; + let MemoryUtilization { used, effective } = heap.utilization; + let usage_basis_points = used * 10000 / size; + let fill = if usage_basis_points > 10000 { + // Shouldn't happen, but just in case. + 50 + } else { + (usage_basis_points / 200) as usize + }; + let effective_basis_points = if used > 0 { + effective * 10000 / used + } else { + 10000 + }; + + let line = ("|".repeat(fill) + &(" ".repeat(50 - fill))) + .gradient_with_color(Color::Green, Color::Red); + writeln!( + fmt, + "Heap {}:\n{:6} / {:<6} or{} {{ effective:{} }} [{}]", + format!("{}", index).magenta(), + format!("{}MB", used / MB), + format!("{}MB", size / MB), + format_basis_points(usage_basis_points), + format_basis_points_inverted(effective_basis_points), + line + )?; + + for ty in self.types.iter().filter(|ty| ty.heap_index == index) { + let properties = ty.properties; + let MemoryUtilization { used, effective } = ty.utilization; + let usage_basis_points = used * 10000 / size; + let effective_basis_points = if used > 0 { + effective * 10000 / used + } else { + 0 + }; + + writeln!( + fmt, + " {:>6} or{} {{ effective:{} }} | {:?}", + format!("{}MB", used / MB), + format_basis_points(usage_basis_points), + format_basis_points_inverted(effective_basis_points), + properties, + )?; + } + } + + Ok(()) + } +} + +fn format_basis_points(basis_points: u64) -> CString { + debug_assert!(basis_points <= 10000); + let s = format!("{:>3}.{:02}%", basis_points / 100, basis_points % 100); + if basis_points > 7500 { + s.red() + } else if basis_points > 5000 { + s.yellow() + } else if basis_points > 2500 { + s.green() + } else if basis_points > 100 { + s.blue() + } else { + s.white() + } +} + +fn format_basis_points_inverted(basis_points: u64) -> CString { + debug_assert!(basis_points <= 10000); + let s = format!("{:>3}.{:02}%", basis_points / 100, basis_points % 100); + if basis_points > 9900 { + s.white() + } else if basis_points > 7500 { + s.blue() + } else if basis_points > 5000 { + s.green() + } else if basis_points > 2500 { + s.yellow() + } else { + s.red() + } +} |