diff --git a/openvmm/membacking/src/mapping_manager/manager.rs b/openvmm/membacking/src/mapping_manager/manager.rs index 495740fcb3..1650f582ca 100644 --- a/openvmm/membacking/src/mapping_manager/manager.rs +++ b/openvmm/membacking/src/mapping_manager/manager.rs @@ -35,7 +35,11 @@ pub struct MappingManager { impl MappingManager { /// Returns a new mapping manager that can map addresses up to `max_addr`. - pub fn new(spawn: impl Spawn, max_addr: u64) -> Self { + /// + /// If `private_ram` is true, mappers created from this manager will use + /// anonymous private memory for guest RAM instead of shared file-backed + /// memory. + pub fn new(spawn: impl Spawn, max_addr: u64, private_ram: bool) -> Self { let (req_send, mut req_recv) = mesh::mpsc_channel(); spawn .spawn("mapping_manager", { @@ -50,6 +54,7 @@ impl MappingManager { id: ObjectId::new(), req_send, max_addr, + private_ram, }, } } @@ -67,6 +72,7 @@ pub struct MappingManagerClient { req_send: mesh::Sender, id: ObjectId, max_addr: u64, + private_ram: bool, } static MAPPER_CACHE: ObjectCache = ObjectCache::new(); @@ -74,13 +80,16 @@ static MAPPER_CACHE: ObjectCache = ObjectCache::new(); impl MappingManagerClient { /// Returns a VA mapper for this guest memory. /// - /// This will single instance the mapper, so this is safe to call multiple times. + /// This will single instance the mapper, so this is safe to call multiple + /// times. If `private_ram` was set when creating the [`MappingManager`], + /// the mapper will use anonymous private memory for guest RAM. pub async fn new_mapper(&self) -> Result, VaMapperError> { // Get the VA mapper from the mapper cache if possible to avoid keeping // multiple VA ranges for this memory per process. + let private_ram = self.private_ram; MAPPER_CACHE .get_or_insert_with(&self.id, async { - VaMapper::new(self.req_send.clone(), self.max_addr, None).await + VaMapper::new(self.req_send.clone(), self.max_addr, None, private_ram).await }) .await } @@ -89,12 +98,19 @@ impl MappingManagerClient { /// address space of `process`. /// /// Each call will allocate a new unique mapper. + /// + /// Returns an error if private memory mode is enabled, since private + /// anonymous pages would be committed in the remote process and not + /// accessible locally. pub async fn new_remote_mapper( &self, process: RemoteProcess, ) -> Result, VaMapperError> { + if self.private_ram { + return Err(VaMapperError::RemoteWithPrivateMemory); + } Ok(Arc::new( - VaMapper::new(self.req_send.clone(), self.max_addr, Some(process)).await?, + VaMapper::new(self.req_send.clone(), self.max_addr, Some(process), false).await?, )) } diff --git a/openvmm/membacking/src/mapping_manager/va_mapper.rs b/openvmm/membacking/src/mapping_manager/va_mapper.rs index 4330448d71..24c0725446 100644 --- a/openvmm/membacking/src/mapping_manager/va_mapper.rs +++ b/openvmm/membacking/src/mapping_manager/va_mapper.rs @@ -45,6 +45,7 @@ use thiserror::Error; pub struct VaMapper { inner: Arc, process: Option, + private_ram: bool, _thread: JoinHandle<()>, } @@ -161,6 +162,8 @@ pub enum VaMapperError { MemoryManagerGone(#[source] RpcError), #[error("failed to reserve address space")] Reserve(#[source] std::io::Error), + #[error("remote mappers are not supported in private memory mode")] + RemoteWithPrivateMemory, } #[derive(Debug, Error)] @@ -195,6 +198,7 @@ impl VaMapper { req_send: mesh::Sender, len: u64, remote_process: Option, + private_ram: bool, ) -> Result { let mapping = match &remote_process { None => SparseMapping::new(len as usize), @@ -239,6 +243,7 @@ impl VaMapper { Ok(VaMapper { inner, process: remote_process, + private_ram, _thread: thread, }) } @@ -259,6 +264,25 @@ impl VaMapper { pub fn process(&self) -> Option<&RemoteProcess> { self.process.as_ref() } + + /// Allocates private anonymous memory for a range within the mapping. + /// + /// This replaces the placeholder at the given offset with committed + /// anonymous memory. Only valid when private_ram mode is enabled. + pub fn alloc_range(&self, offset: usize, len: usize) -> Result<(), std::io::Error> { + assert!(self.private_ram, "alloc_range requires private RAM mode"); + self.inner.mapping.alloc(offset, len) + } + + /// Decommits a range of private RAM, releasing physical pages back to the + /// host. + /// + /// Only valid when private_ram mode is enabled. + #[allow(dead_code)] // Will be used by ballooning / memory hot-remove. + pub fn decommit(&self, offset: usize, len: usize) -> Result<(), std::io::Error> { + assert!(self.private_ram, "decommit requires private RAM mode"); + self.inner.mapping.decommit(offset, len) + } } /// SAFETY: the underlying VA mapping is guaranteed to be valid for the lifetime @@ -286,6 +310,37 @@ unsafe impl GuestMemoryAccess for VaMapper { bitmap_failure: bool, ) -> PageFaultAction { assert!(!bitmap_failure, "bitmaps are not used"); + + if self.private_ram { + // Private RAM mode: commit the page(s) directly. + #[cfg(windows)] + { + // Commit in 64KB-aligned chunks to amortize overhead. + let commit_start = address & !0xFFFF; // round down to 64KB + let commit_end = ((address + len as u64) + 0xFFFF) & !0xFFFF; // round up + let commit_end = commit_end.min(self.inner.mapping.len() as u64); + let commit_len = (commit_end - commit_start) as usize; + + if let Err(err) = self.inner.mapping.commit(commit_start as usize, commit_len) { + return PageFaultAction::Fail(PageFaultError::new( + guestmem::GuestMemoryErrorKind::Other, + err, + )); + } + return PageFaultAction::Retry; + } + #[cfg(unix)] + { + // On Linux, the kernel handles page faults transparently. + // If we get here, something is wrong. + return PageFaultAction::Fail(PageFaultError::new( + guestmem::GuestMemoryErrorKind::Other, + std::io::Error::other("unexpected page fault in private RAM mode on Linux"), + )); + } + } + + // File-backed path: request mapping from MappingManager. // `block_on` is OK to call here (will not deadlock) because this is // never called from the page fault handler thread or any threads it // depends on. @@ -304,3 +359,116 @@ unsafe impl GuestMemoryAccess for VaMapper { PageFaultAction::Retry } } + +#[cfg(test)] +mod tests { + use sparse_mmap::SparseMapping; + + /// Tests that private RAM pages can be allocated, written to, and read from. + #[test] + fn test_private_ram_alloc_write_read() { + let page_size = SparseMapping::page_size(); + let mapping = SparseMapping::new(4 * page_size).unwrap(); + + // Allocate (commit) the first two pages. + mapping.alloc(0, 2 * page_size).unwrap(); + + // Write and read through SparseMapping methods. + let data = [0xABu8; 128]; + mapping.write_at(0, &data).unwrap(); + + let mut buf = [0u8; 128]; + mapping.read_at(0, &mut buf).unwrap(); + assert_eq!(buf, data); + + // Verify zeros at an untouched offset within committed range. + let mut zero_buf = [0xFFu8; 64]; + mapping.read_at(page_size, &mut zero_buf).unwrap(); + assert!( + zero_buf.iter().all(|&b| b == 0), + "untouched committed memory should be zeros" + ); + } + + /// Tests that decommitting pages releases their contents (zeros on re-read on Linux). + #[test] + fn test_private_ram_decommit_zeros() { + let page_size = SparseMapping::page_size(); + let mapping = SparseMapping::new(4 * page_size).unwrap(); + + // Commit and write data. + mapping.alloc(0, 2 * page_size).unwrap(); + let pattern = vec![0xABu8; 64]; + mapping.write_at(0, &pattern).unwrap(); + mapping.write_at(page_size, &pattern).unwrap(); + + // Decommit first page. + mapping.decommit(0, page_size).unwrap(); + + // On Linux, decommitted pages read as zeros. + #[cfg(unix)] + { + let mut buf = vec![0xFFu8; 64]; + mapping.read_at(0, &mut buf).unwrap(); + assert!( + buf.iter().all(|&b| b == 0), + "decommitted page should be zeros on Linux" + ); + } + + // Second page should still have its data. + let mut buf2 = vec![0u8; 64]; + mapping.read_at(page_size, &mut buf2).unwrap(); + assert_eq!(buf2, pattern); + } + + /// Tests that recommitting pages after decommit provides zeroed memory. + #[test] + fn test_private_ram_recommit_after_decommit() { + let page_size = SparseMapping::page_size(); + let mapping = SparseMapping::new(4 * page_size).unwrap(); + + // Commit, write, decommit, recommit. + mapping.alloc(0, page_size).unwrap(); + let pattern = vec![0xCDu8; 64]; + mapping.write_at(0, &pattern).unwrap(); + + mapping.decommit(0, page_size).unwrap(); + mapping.commit(0, page_size).unwrap(); + + // After recommit, the page should be zeros (old data is gone). + let mut buf = vec![0xFFu8; 64]; + mapping.read_at(0, &mut buf).unwrap(); + assert!( + buf.iter().all(|&b| b == 0), + "recommitted page should be zeros" + ); + + // Can write and read new data. + let new_data = vec![0xEFu8; 64]; + mapping.write_at(0, &new_data).unwrap(); + let mut buf2 = vec![0u8; 64]; + mapping.read_at(0, &mut buf2).unwrap(); + assert_eq!(buf2, new_data); + } + + /// Tests that commit is idempotent (committing already-committed pages is + /// a no-op). + #[test] + fn test_private_ram_commit_idempotent() { + let page_size = SparseMapping::page_size(); + let mapping = SparseMapping::new(4 * page_size).unwrap(); + + // Alloc then commit the same range again. + mapping.alloc(0, 2 * page_size).unwrap(); + mapping.commit(0, 2 * page_size).unwrap(); + mapping.commit(0, page_size).unwrap(); + + // Write and read should work. + let pattern = vec![0xEFu8; 64]; + mapping.write_at(0, &pattern).unwrap(); + let mut buf = vec![0u8; 64]; + mapping.read_at(0, &mut buf).unwrap(); + assert_eq!(buf, pattern); + } +} diff --git a/openvmm/membacking/src/memory_manager/mod.rs b/openvmm/membacking/src/memory_manager/mod.rs index 2a5574b379..47672f7a44 100644 --- a/openvmm/membacking/src/memory_manager/mod.rs +++ b/openvmm/membacking/src/memory_manager/mod.rs @@ -31,9 +31,9 @@ use vm_topology::memory::MemoryLayout; /// The OpenVMM memory manager. #[derive(Debug, Inspect)] pub struct GuestMemoryManager { - /// Guest RAM allocation. + /// Guest RAM allocation. None in private memory mode. #[inspect(skip)] - guest_ram: Mappable, + guest_ram: Option, #[inspect(skip)] ram_regions: Arc>, @@ -89,6 +89,15 @@ pub enum MemoryBuildError { /// Memory layout incompatible with x86 legacy support. #[error("x86 support requires RAM to start at 0 and contain at least 1MB")] InvalidRamForX86, + /// Private memory is incompatible with x86 legacy support. + #[error("private memory is incompatible with x86 legacy support")] + PrivateMemoryWithLegacy, + /// Private memory is incompatible with existing memory backing. + #[error("private memory is incompatible with existing memory backing")] + PrivateMemoryWithExistingBacking, + /// Failed to allocate private RAM range. + #[error("failed to allocate private RAM range {1}")] + PrivateRamAlloc(#[source] std::io::Error, MemoryRange), } /// A builder for [`GuestMemoryManager`]. @@ -98,6 +107,7 @@ pub struct GuestMemoryBuilder { prefetch_ram: bool, pin_mappings: bool, x86_legacy_support: bool, + private_memory: bool, } impl GuestMemoryBuilder { @@ -109,6 +119,7 @@ impl GuestMemoryBuilder { pin_mappings: false, prefetch_ram: false, x86_legacy_support: false, + private_memory: false, } } @@ -158,24 +169,54 @@ impl GuestMemoryBuilder { self } + /// Enables private anonymous memory for guest RAM. + /// + /// When set, guest RAM is backed by anonymous pages (`mmap + /// MAP_ANONYMOUS` on Linux, `VirtualAlloc` on Windows) rather than + /// shared file-backed sections. This supports decommit to release + /// physical pages back to the host. + /// + /// This is incompatible with [`x86_legacy_support`](Self::x86_legacy_support) + /// and [`existing_backing`](Self::existing_backing). + pub fn private_memory(mut self, enable: bool) -> Self { + self.private_memory = enable; + self + } + /// Builds the memory backing, allocating memory if existing memory was not /// provided by [`existing_backing`](Self::existing_backing). pub async fn build( self, mem_layout: &MemoryLayout, ) -> Result { + // Validate private memory constraints. + if self.private_memory { + if self.x86_legacy_support { + return Err(MemoryBuildError::PrivateMemoryWithLegacy); + } + if self.existing_mapping.is_some() { + return Err(MemoryBuildError::PrivateMemoryWithExistingBacking); + } + } + let ram_size = mem_layout.ram_size() + mem_layout.vtl2_range().map_or(0, |r| r.len()); - let memory = if let Some(memory) = self.existing_mapping { - memory.guest_ram + let memory: Option = if self.private_memory { + // Private memory mode: no shared file-backed allocation. + // RAM will be backed by anonymous pages in the VaMapper's SparseMapping. + None + } else if let Some(memory) = self.existing_mapping { + Some(memory.guest_ram) } else { - sparse_mmap::alloc_shared_memory( - ram_size - .try_into() - .map_err(|_| MemoryBuildError::RamTooLarge(ram_size))?, + Some( + sparse_mmap::alloc_shared_memory( + ram_size + .try_into() + .map_err(|_| MemoryBuildError::RamTooLarge(ram_size))?, + ) + .map_err(MemoryBuildError::AllocationFailed)? + .into(), ) - .map_err(MemoryBuildError::AllocationFailed)? - .into() }; // Spawn a thread to handle memory requests. @@ -195,7 +236,7 @@ impl GuestMemoryBuilder { None }; - let mapping_manager = MappingManager::new(&spawner, max_addr); + let mapping_manager = MappingManager::new(&spawner, max_addr, self.private_memory); let va_mapper = mapping_manager .client() .new_mapper() @@ -246,6 +287,17 @@ impl GuestMemoryBuilder { ); } + // In private memory mode, eagerly commit all RAM ranges with + // anonymous memory. alloc_range() handles both Linux (mmap MAP_FIXED) + // and Windows (MEM_REPLACE_PLACEHOLDER). + if self.private_memory { + for range in &ram_ranges { + va_mapper + .alloc_range(range.start() as usize, range.len() as usize) + .map_err(|e| MemoryBuildError::PrivateRamAlloc(e, *range))?; + } + } + let mut ram_regions = Vec::new(); let mut start = 0; for range in &ram_ranges { @@ -255,20 +307,25 @@ impl GuestMemoryBuilder { .await .expect("regions cannot overlap yet"); - region - .add_mapping( - MemoryRange::new(0..range.len()), - memory.clone(), - start, - true, - ) - .await; + if let Some(ref memory) = memory { + // File-backed mode: add mapping for this RAM range. + region + .add_mapping( + MemoryRange::new(0..range.len()), + memory.clone(), + start, + true, + ) + .await; + } + // In private_memory mode, skip add_mapping — no file-backed RAM. + // The SparseMapping VA is already committed via alloc_range() above. region .map(MapParams { writable: true, executable: true, - prefetch: self.prefetch_ram, + prefetch: self.prefetch_ram && !self.private_memory, }) .await; @@ -354,9 +411,12 @@ impl GuestMemoryManager { /// new memory manager with the same memory state. Only one instance of this /// type should be managing a given memory backing at a time, though, or the /// guest may see unpredictable results. - pub fn shared_memory_backing(&self) -> SharedMemoryBacking { - let guest_ram = self.guest_ram.clone(); - SharedMemoryBacking { guest_ram } + /// + /// Returns `None` in private memory mode, where there is no shared + /// file-backed allocation. + pub fn shared_memory_backing(&self) -> Option { + let guest_ram = self.guest_ram.clone()?; + Some(SharedMemoryBacking { guest_ram }) } /// Attaches the guest memory to a partition, mapping it to the guest diff --git a/openvmm/membacking/src/region_manager.rs b/openvmm/membacking/src/region_manager.rs index c397dc4fe3..875f95d49f 100644 --- a/openvmm/membacking/src/region_manager.rs +++ b/openvmm/membacking/src/region_manager.rs @@ -681,7 +681,7 @@ mod tests { } } - let mm = MappingManager::new(spawn, 0x200000); + let mm = MappingManager::new(spawn, 0x200000, false); let mut task = TestTask(RegionManagerTask::new(mm.client().clone())); let high = task.add(1, 0x1000..0x3000).await.unwrap(); diff --git a/openvmm/openvmm_core/src/worker/dispatch.rs b/openvmm/openvmm_core/src/worker/dispatch.rs index ee3358e80a..394c8ec6ed 100644 --- a/openvmm/openvmm_core/src/worker/dispatch.rs +++ b/openvmm/openvmm_core/src/worker/dispatch.rs @@ -276,7 +276,7 @@ pub struct RestartState { manifest: Manifest, running: bool, saved_state: SavedState, - shared_memory: SharedMemoryBacking, + shared_memory: Option, rpc: mesh::Receiver, notify: mesh::Sender, } @@ -348,7 +348,7 @@ impl Worker for VmWorker { VmTaskDriverSource::new(ThreadDriverBackend::new(device_driver)), hypervisor, manifest, - Some(shared_memory), + shared_memory, ))?; pal_async::local::block_on(async { let mut vm = vm.load(Some(saved_state), notify).await?; @@ -905,6 +905,7 @@ impl InitializedVm { .existing_backing(shared_memory) .vtl0_alias_map(vtl0_alias_map) .prefetch_ram(cfg.memory.prefetch_memory) + .private_memory(cfg.memory.private_memory) .x86_legacy_support( matches!(cfg.load_mode, LoadMode::Pcat { .. }) || cfg.chipset.with_hyperv_vga, ); @@ -2620,6 +2621,9 @@ impl LoadedVm { // First run the non-destructive operations. let r = async { let shared_memory = self.inner.memory_manager.shared_memory_backing(); + if shared_memory.is_none() { + anyhow::bail!("restart is not supported with --private-memory"); + } if self.running { self.state_units.stop().await; stopped = true; @@ -2899,7 +2903,7 @@ impl LoadedVm { async fn serialize( mut self, rpc: mesh::Receiver, - shared_memory: SharedMemoryBacking, + shared_memory: Option, saved_state: SavedState, ) -> RestartState { let notify = self.inner.partition_unit.teardown().await; diff --git a/openvmm/openvmm_defs/src/config.rs b/openvmm/openvmm_defs/src/config.rs index 251616edb7..923ebebf8d 100644 --- a/openvmm/openvmm_defs/src/config.rs +++ b/openvmm/openvmm_defs/src/config.rs @@ -268,10 +268,11 @@ pub enum ArchTopologyConfig { #[derive(Debug, MeshPayload)] pub struct MemoryConfig { pub mem_size: u64, + pub prefetch_memory: bool, + pub private_memory: bool, pub mmio_gaps: Vec, pub pci_ecam_gaps: Vec, pub pci_mmio_gaps: Vec, - pub prefetch_memory: bool, } #[derive(Debug, MeshPayload, Default)] diff --git a/openvmm/openvmm_entry/src/cli_args.rs b/openvmm/openvmm_entry/src/cli_args.rs index 550a9b0653..4e295d0e41 100644 --- a/openvmm/openvmm_entry/src/cli_args.rs +++ b/openvmm/openvmm_entry/src/cli_args.rs @@ -61,6 +61,10 @@ pub struct Options { #[clap(long)] pub prefetch: bool, + /// use private anonymous memory for guest RAM + #[clap(long)] + pub private_memory: bool, + /// start in paused state #[clap(short = 'P', long)] pub paused: bool, diff --git a/openvmm/openvmm_entry/src/lib.rs b/openvmm/openvmm_entry/src/lib.rs index f0215b0f05..97eb881d85 100644 --- a/openvmm/openvmm_entry/src/lib.rs +++ b/openvmm/openvmm_entry/src/lib.rs @@ -1525,9 +1525,10 @@ async fn vm_config_from_command_line( memory: MemoryConfig { mem_size: opt.memory, mmio_gaps, + prefetch_memory: opt.prefetch, + private_memory: opt.private_memory, pci_ecam_gaps, pci_mmio_gaps, - prefetch_memory: opt.prefetch, }, processor_topology: ProcessorTopologyConfig { proc_count: opt.processors, diff --git a/openvmm/openvmm_entry/src/ttrpc/mod.rs b/openvmm/openvmm_entry/src/ttrpc/mod.rs index b5088646aa..f0a96d3067 100644 --- a/openvmm/openvmm_entry/src/ttrpc/mod.rs +++ b/openvmm/openvmm_entry/src/ttrpc/mod.rs @@ -472,6 +472,7 @@ impl VmService { pci_ecam_gaps: vec![], pci_mmio_gaps: vec![], prefetch_memory: false, + private_memory: false, }, chipset: chipset.chipset, processor_topology: ProcessorTopologyConfig { diff --git a/petri/src/vm/openvmm/construct.rs b/petri/src/vm/openvmm/construct.rs index 9f874a45ae..e734e51045 100644 --- a/petri/src/vm/openvmm/construct.rs +++ b/petri/src/vm/openvmm/construct.rs @@ -305,6 +305,7 @@ impl PetriVmConfigOpenVmm { pci_ecam_gaps: vec![], pci_mmio_gaps: vec![], prefetch_memory: false, + private_memory: false, } }; diff --git a/support/sparse_mmap/src/lib.rs b/support/sparse_mmap/src/lib.rs index fb93a89246..b1d8840175 100644 --- a/support/sparse_mmap/src/lib.rs +++ b/support/sparse_mmap/src/lib.rs @@ -233,4 +233,85 @@ mod tests { drop(mapping); } + + #[test] + fn test_decommit_zeros_pages() { + let page_size = SparseMapping::page_size(); + let mapping = SparseMapping::new(4 * page_size).unwrap(); + + // Allocate and write a pattern. + mapping.alloc(0, 4 * page_size).unwrap(); + let pattern = vec![0xABu8; page_size]; + mapping.write_at(0, &pattern).unwrap(); + mapping.write_at(page_size, &pattern).unwrap(); + + // Verify data is present. + let mut buf = vec![0u8; page_size]; + mapping.read_at(0, &mut buf).unwrap(); + assert_eq!(buf, pattern); + + // Decommit the first page. + mapping.decommit(0, page_size).unwrap(); + + // Read it back — should be zeros (on Linux, kernel gives zero pages; + // on Windows, the page is decommitted so we skip this read there). + #[cfg(unix)] + { + let mut buf = vec![0xFFu8; page_size]; + mapping.read_at(0, &mut buf).unwrap(); + assert!( + buf.iter().all(|&b| b == 0), + "decommitted page should be zeros" + ); + } + + // Second page should still have its data. + let mut buf2 = vec![0u8; page_size]; + mapping.read_at(page_size, &mut buf2).unwrap(); + assert_eq!(buf2, pattern); + } + + #[test] + fn test_commit_after_decommit() { + let page_size = SparseMapping::page_size(); + let mapping = SparseMapping::new(4 * page_size).unwrap(); + + // Allocate and write data. + mapping.alloc(0, 4 * page_size).unwrap(); + let pattern = vec![0xCDu8; page_size]; + mapping.write_at(0, &pattern).unwrap(); + + // Decommit then recommit. + mapping.decommit(0, page_size).unwrap(); + mapping.commit(0, page_size).unwrap(); + + // After recommit, the page should be accessible and zeroed. + let mut buf = vec![0xFFu8; page_size]; + mapping.read_at(0, &mut buf).unwrap(); + assert!( + buf.iter().all(|&b| b == 0), + "recommitted page should be zeros" + ); + } + + #[test] + fn test_commit_idempotent() { + let page_size = SparseMapping::page_size(); + let mapping = SparseMapping::new(4 * page_size).unwrap(); + + // Allocate (commit) pages. + mapping.alloc(0, 4 * page_size).unwrap(); + + // Commit the same range again — should be a no-op, no error. + mapping.commit(0, 4 * page_size).unwrap(); + mapping.commit(0, page_size).unwrap(); + mapping.commit(page_size, page_size).unwrap(); + + // Write and read to verify pages still work. + let pattern = vec![0xEFu8; page_size]; + mapping.write_at(0, &pattern).unwrap(); + let mut buf = vec![0u8; page_size]; + mapping.read_at(0, &mut buf).unwrap(); + assert_eq!(buf, pattern); + } } diff --git a/support/sparse_mmap/src/unix.rs b/support/sparse_mmap/src/unix.rs index 556359b599..6b2ae9592d 100644 --- a/support/sparse_mmap/src/unix.rs +++ b/support/sparse_mmap/src/unix.rs @@ -336,6 +336,34 @@ impl SparseMapping { Ok(()) } + /// Decommits a range of memory, releasing physical pages back to the host. + /// + /// The virtual address range remains accessible; the next access will get + /// fresh zero pages from the kernel. + pub fn decommit(&self, offset: usize, len: usize) -> Result<(), Error> { + let _ = self.validate_offset_len(offset, len)?; + if len == 0 { + return Ok(()); + } + // SAFETY: the address and length have been validated above. + unsafe { + let addr = self.address.add(offset); + if libc::madvise(addr, len, libc::MADV_DONTNEED) < 0 { + return Err(Error::last_os_error()); + } + } + Ok(()) + } + + /// Commits a range of memory, making it accessible. + /// + /// On Linux, this is a no-op because the kernel handles page faults + /// transparently for anonymous memory. + pub fn commit(&self, offset: usize, len: usize) -> Result<(), Error> { + let _ = self.validate_offset_len(offset, len)?; + Ok(()) + } + /// Unmaps memory from the mapping. pub fn unmap(&self, offset: usize, len: usize) -> io::Result<()> { let _ = self.validate_offset_len(offset, len)?; diff --git a/support/sparse_mmap/src/windows.rs b/support/sparse_mmap/src/windows.rs index 702b212e5a..8146807990 100644 --- a/support/sparse_mmap/src/windows.rs +++ b/support/sparse_mmap/src/windows.rs @@ -7,6 +7,7 @@ use Memory::CreateFileMappingW; use Memory::MEM_COMMIT; +use Memory::MEM_DECOMMIT; use Memory::MEM_RELEASE; use Memory::MEM_RESERVE; use Memory::MEMORY_MAPPED_VIEW_ADDRESS; @@ -644,6 +645,55 @@ impl SparseMapping { start_index } + /// Decommits a range of memory, releasing physical pages back to the host. + /// + /// The virtual address range remains reserved; accessing decommitted + /// pages will cause an access violation until they are recommitted + /// with [`commit()`](Self::commit). + /// + /// This is only valid for ranges that were previously committed with + /// [`alloc()`](Self::alloc) or [`commit()`](Self::commit). + pub fn decommit(&self, offset: usize, len: usize) -> Result<(), Error> { + let _ = self.validate_offset_len(offset, len)?; + if len == 0 { + return Ok(()); + } + unsafe { + virtual_free( + self.process.as_ref(), + self.address.wrapping_add(offset), + len, + MEM_DECOMMIT, + ) + } + } + + /// Commits a range of previously reserved or decommitted memory. + /// + /// This is used to recommit pages after [`decommit()`](Self::decommit). + /// For the initial commit of anonymous pages (replacing placeholders), + /// use [`alloc()`](Self::alloc) instead. + /// + /// Committing already-committed pages is a no-op. + pub fn commit(&self, offset: usize, len: usize) -> Result<(), Error> { + let _ = self.validate_offset_len(offset, len)?; + if len == 0 { + return Ok(()); + } + unsafe { + virtual_alloc( + self.process.as_ref(), + self.address.wrapping_add(offset), + len, + MEM_COMMIT, + PAGE_READWRITE, + null_mut(), + 0, + )?; + } + Ok(()) + } + /// Unmaps a range of mappings. pub fn unmap(&self, offset: usize, len: usize) -> io::Result<()> { let end = self.validate_offset_len(offset, len)?; diff --git a/vmm_tests/vmm_tests/tests/tests/multiarch.rs b/vmm_tests/vmm_tests/tests/tests/multiarch.rs index 7527a5934e..09e26bedb5 100644 --- a/vmm_tests/vmm_tests/tests/tests/multiarch.rs +++ b/vmm_tests/vmm_tests/tests/tests/multiarch.rs @@ -16,6 +16,7 @@ use petri::openvmm::OpenVmmPetriBackend; use petri::pipette::cmd; use petri_artifacts_common::tags::MachineArch; use petri_artifacts_common::tags::OsFlavor; +use vmm_test_macros::openvmm_test; use vmm_test_macros::openvmm_test_no_agent; use vmm_test_macros::vmm_test; use vmm_test_macros::vmm_test_no_agent; @@ -87,6 +88,28 @@ async fn boot(config: PetriVmBuilder) -> anyhow::Result<( Ok(()) } +/// Boot with private anonymous memory instead of shared memory sections. +#[openvmm_test( + linux_direct_x64, + // TODO: add linux_direct_aarch64 (GH #1798) +)] +async fn boot_private_memory(config: PetriVmBuilder) -> anyhow::Result<()> { + let (vm, agent) = config + .modify_backend(|b| { + b.with_custom_config(|c| { + c.memory.private_memory = true; + }) + }) + .run() + .await?; + + agent.ping().await?; + agent.power_off().await?; + vm.wait_for_clean_teardown().await?; + + Ok(()) +} + /// Basic boot test for images that require small amounts of ram, like alpine. #[vmm_test( openvmm_uefi_x64(vhd(alpine_3_23_x64)),