From 1d1b190de48b59749a2b1c41f83f4897fbeff6f7 Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Tue, 20 Jan 2026 19:33:47 +0100 Subject: [PATCH 1/2] Add proper multithreading support to Arena --- crates/edit/src/bin/edit/apperr.rs | 35 +++++++++++++++++++ crates/stdext/Cargo.toml | 3 ++ crates/stdext/src/arena/debug.rs | 5 +-- crates/stdext/src/arena/fs.rs | 55 ++++++++++++++++++++++++++++++ crates/stdext/src/arena/mod.rs | 10 +++--- crates/stdext/src/arena/release.rs | 36 ++++++++++--------- crates/stdext/src/arena/scratch.rs | 30 +++++++++------- crates/stdext/src/arena/string.rs | 7 ++-- crates/stdext/src/sys/windows.rs | 14 +++++--- 9 files changed, 153 insertions(+), 42 deletions(-) create mode 100644 crates/edit/src/bin/edit/apperr.rs create mode 100644 crates/stdext/src/arena/fs.rs diff --git a/crates/edit/src/bin/edit/apperr.rs b/crates/edit/src/bin/edit/apperr.rs new file mode 100644 index 000000000000..baa8d071a8e5 --- /dev/null +++ b/crates/edit/src/bin/edit/apperr.rs @@ -0,0 +1,35 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +use std::io; + +use edit::{buffer, icu}; + +#[derive(Debug)] +pub enum Error { + Io(io::Error), + Icu(icu::Error), +} + +pub type Result = std::result::Result; + +impl From for Error { + fn from(err: io::Error) -> Self { + Self::Io(err) + } +} + +impl From for Error { + fn from(err: icu::Error) -> Self { + Self::Icu(err) + } +} + +impl From for Error { + fn from(err: buffer::IoError) -> Self { + match err { + buffer::IoError::Io(e) => Self::Io(e), + buffer::IoError::Icu(e) => Self::Icu(e), + } + } +} diff --git a/crates/stdext/Cargo.toml b/crates/stdext/Cargo.toml index 6291567feb7c..8feec1485fdc 100644 --- a/crates/stdext/Cargo.toml +++ b/crates/stdext/Cargo.toml @@ -7,5 +7,8 @@ license.workspace = true repository.workspace = true rust-version.workspace = true +[features] +single-threaded = [] + [target.'cfg(unix)'.dependencies] libc = "0.2" diff --git a/crates/stdext/src/arena/debug.rs b/crates/stdext/src/arena/debug.rs index 3c9144b39af7..851474e546f6 100644 --- a/crates/stdext/src/arena/debug.rs +++ b/crates/stdext/src/arena/debug.rs @@ -4,6 +4,7 @@ #![allow(clippy::missing_safety_doc, clippy::mut_from_ref)] use std::alloc::{AllocError, Allocator, Layout}; +use std::io; use std::mem::MaybeUninit; use std::ptr::NonNull; @@ -62,7 +63,7 @@ impl Arena { Self::Owned { arena: release::Arena::empty() } } - pub fn new(capacity: usize) -> Result { + pub fn new(capacity: usize) -> io::Result { Ok(Self::Owned { arena: release::Arena::new(capacity)? }) } @@ -113,7 +114,7 @@ impl Arena { unsafe impl Allocator for Arena { fn allocate(&self, layout: Layout) -> Result, AllocError> { - self.delegate_target().alloc_raw(layout.size(), layout.align()) + Ok(self.delegate_target().alloc_raw(layout.size(), layout.align())) } fn allocate_zeroed(&self, layout: Layout) -> Result, AllocError> { diff --git a/crates/stdext/src/arena/fs.rs b/crates/stdext/src/arena/fs.rs new file mode 100644 index 000000000000..58396233c804 --- /dev/null +++ b/crates/stdext/src/arena/fs.rs @@ -0,0 +1,55 @@ +use std::fs::File; +use std::io::{self, Read}; +use std::mem::MaybeUninit; +use std::path::Path; +use std::slice::from_raw_parts_mut; + +use super::{Arena, ArenaString}; + +pub fn read_to_vec>(arena: &Arena, path: P) -> io::Result> { + fn inner<'a>(arena: &'a Arena, path: &Path) -> io::Result> { + let mut file = File::open(path)?; + let mut vec = Vec::new_in(arena); + + const MIN_SIZE: usize = 1024; + const MAX_SIZE: usize = 128 * 1024; + let mut buf_size = MIN_SIZE; + + loop { + vec.reserve(buf_size); + let spare = vec.spare_capacity_mut(); + let to_read = spare.len().min(buf_size); + + match file_read_uninit(&mut file, &mut spare[..to_read]) { + Ok(0) => break, + Ok(n) => { + unsafe { vec.set_len(vec.len() + n) }; + buf_size = (buf_size * 2).min(MAX_SIZE); + } + Err(e) if e.kind() == io::ErrorKind::Interrupted => {} + Err(e) => return Err(e), + } + } + + Ok(vec) + } + inner(arena, path.as_ref()) +} + +pub fn read_to_string>(arena: &Arena, path: P) -> io::Result> { + fn inner<'a>(arena: &'a Arena, path: &Path) -> io::Result> { + let vec = read_to_vec(arena, path)?; + ArenaString::from_utf8(vec).map_err(|_| { + io::Error::new(io::ErrorKind::InvalidData, "stream did not contain valid UTF-8") + }) + } + inner(arena, path.as_ref()) +} + +fn file_read_uninit(file: &mut T, buf: &mut [MaybeUninit]) -> io::Result { + unsafe { + let buf_slice = from_raw_parts_mut(buf.as_mut_ptr() as *mut u8, buf.len()); + let n = file.read(buf_slice)?; + Ok(n) + } +} diff --git a/crates/stdext/src/arena/mod.rs b/crates/stdext/src/arena/mod.rs index a7099c7b8aca..2a76e210b4e4 100644 --- a/crates/stdext/src/arena/mod.rs +++ b/crates/stdext/src/arena/mod.rs @@ -5,13 +5,15 @@ #[cfg(debug_assertions)] mod debug; +mod fs; mod release; mod scratch; mod string; #[cfg(all(not(doc), debug_assertions))] -pub use self::debug::Arena; +pub use self::debug::*; +pub use self::fs::*; #[cfg(any(doc, not(debug_assertions)))] -pub use self::release::Arena; -pub use self::scratch::{ScratchArena, init, scratch_arena}; -pub use self::string::ArenaString; +pub use self::release::*; +pub use self::scratch::*; +pub use self::string::*; diff --git a/crates/stdext/src/arena/release.rs b/crates/stdext/src/arena/release.rs index c1441c2e8c6d..53fea9505adb 100644 --- a/crates/stdext/src/arena/release.rs +++ b/crates/stdext/src/arena/release.rs @@ -7,10 +7,13 @@ use std::alloc::{AllocError, Allocator, Layout}; use std::cell::Cell; use std::mem::MaybeUninit; use std::ptr::{self, NonNull}; -use std::{mem, slice}; +use std::{io, mem, slice}; use crate::{cold_path, sys}; +#[cfg(target_pointer_width = "32")] +const ALLOC_CHUNK_SIZE: usize = 32 * 1024; +#[cfg(target_pointer_width = "64")] const ALLOC_CHUNK_SIZE: usize = 64 * 1024; /// An arena allocator. @@ -62,7 +65,7 @@ impl Arena { } } - pub fn new(capacity: usize) -> Result { + pub fn new(capacity: usize) -> io::Result { let capacity = (capacity.max(1) + ALLOC_CHUNK_SIZE - 1) & !(ALLOC_CHUNK_SIZE - 1); let base = unsafe { sys::virtual_reserve(capacity)? }; @@ -103,11 +106,7 @@ impl Arena { } #[inline] - pub(super) fn alloc_raw( - &self, - bytes: usize, - alignment: usize, - ) -> Result, AllocError> { + pub(super) fn alloc_raw(&self, bytes: usize, alignment: usize) -> NonNull<[u8]> { let commit = self.commit.get(); let offset = self.offset.get(); @@ -125,12 +124,12 @@ impl Arena { } self.offset.replace(end); - Ok(unsafe { NonNull::slice_from_raw_parts(self.base.add(beg), bytes) }) + unsafe { NonNull::slice_from_raw_parts(self.base.add(beg), bytes) } } // With the code in `alloc_raw_bump()` out of the way, `alloc_raw()` compiles down to some super tight assembly. #[cold] - fn alloc_raw_bump(&self, beg: usize, end: usize) -> Result, AllocError> { + fn alloc_raw_bump(&self, beg: usize, end: usize) -> NonNull<[u8]> { let offset = self.offset.get(); let commit_old = self.commit.get(); let commit_new = (end + ALLOC_CHUNK_SIZE - 1) & !(ALLOC_CHUNK_SIZE - 1); @@ -140,7 +139,10 @@ impl Arena { sys::virtual_commit(self.base.add(commit_old), commit_new - commit_old).is_err() } { - return Err(AllocError); + // Panicking inside this [cold] function has the benefit of removing duplicated panic code from any + // inlined alloc() function. If we ever add fallible allocations, we should probably duplicate alloc_raw() + // and alloc_raw_bump() instead of returning a Result here and calling unwrap() in the common path. + panic!("out of memory"); } if cfg!(debug_assertions) { @@ -151,22 +153,24 @@ impl Arena { self.commit.replace(commit_new); self.offset.replace(end); - Ok(unsafe { NonNull::slice_from_raw_parts(self.base.add(beg), end - beg) }) + unsafe { NonNull::slice_from_raw_parts(self.base.add(beg), end - beg) } } + #[inline] #[allow(clippy::mut_from_ref)] pub fn alloc_uninit(&self) -> &mut MaybeUninit { let bytes = mem::size_of::(); let alignment = mem::align_of::(); - let ptr = self.alloc_raw(bytes, alignment).unwrap(); + let ptr = self.alloc_raw(bytes, alignment); unsafe { ptr.cast().as_mut() } } + #[inline] #[allow(clippy::mut_from_ref)] pub fn alloc_uninit_slice(&self, count: usize) -> &mut [MaybeUninit] { let bytes = mem::size_of::() * count; let alignment = mem::align_of::(); - let ptr = self.alloc_raw(bytes, alignment).unwrap(); + let ptr = self.alloc_raw(bytes, alignment); unsafe { slice::from_raw_parts_mut(ptr.cast().as_ptr(), count) } } } @@ -187,11 +191,11 @@ impl Default for Arena { unsafe impl Allocator for Arena { fn allocate(&self, layout: Layout) -> Result, AllocError> { - self.alloc_raw(layout.size(), layout.align()) + Ok(self.alloc_raw(layout.size(), layout.align())) } fn allocate_zeroed(&self, layout: Layout) -> Result, AllocError> { - let p = self.alloc_raw(layout.size(), layout.align())?; + let p = self.alloc_raw(layout.size(), layout.align()); unsafe { p.cast::().as_ptr().write_bytes(0, p.len()) } Ok(p) } @@ -217,7 +221,7 @@ unsafe impl Allocator for Arena { let delta = new_layout.size() - old_layout.size(); // Assuming that the given ptr/length area is at the end of the arena, // we can just push more memory to the end of the arena to grow it. - self.alloc_raw(delta, 1)?; + self.alloc_raw(delta, 1); } else { cold_path(); diff --git a/crates/stdext/src/arena/scratch.rs b/crates/stdext/src/arena/scratch.rs index 7c519dfad108..036a8ce2b34f 100644 --- a/crates/stdext/src/arena/scratch.rs +++ b/crates/stdext/src/arena/scratch.rs @@ -1,7 +1,7 @@ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. -use std::alloc::AllocError; +use std::io; use std::ops::Deref; #[cfg(debug_assertions)] @@ -74,7 +74,7 @@ mod single_threaded { /// Initialize the scratch arenas with a given capacity. /// Call this before using [`scratch_arena`]. #[allow(dead_code)] - pub fn init(capacity: usize) -> Result<(), AllocError> { + pub fn init(capacity: usize) -> io::Result<()> { unsafe { for s in &mut S_SCRATCH[..] { *s = release::Arena::new(capacity)?; @@ -120,6 +120,7 @@ mod single_threaded { mod multi_threaded { use std::cell::Cell; use std::ptr; + use std::sync::atomic::{AtomicUsize, Ordering}; use super::*; @@ -128,9 +129,13 @@ mod multi_threaded { const { [Cell::new(release::Arena::empty()), Cell::new(release::Arena::empty())] }; } - /// Does nothing. - #[allow(dead_code)] - pub fn init(_: usize) -> Result<(), AllocError> { + static INIT_SIZE: AtomicUsize = AtomicUsize::new(128 * MEBI); + + /// Sets the default scratch arena size. + pub fn init(capacity: usize) -> io::Result<()> { + if capacity != 0 { + INIT_SIZE.store(capacity, Ordering::Relaxed); + } Ok(()) } @@ -142,23 +147,24 @@ mod multi_threaded { #[cold] fn init(s: &[Cell; 2]) { + let capacity = INIT_SIZE.load(Ordering::Relaxed); for s in s { - s.set(release::Arena::new(128 * 1024 * 1024).unwrap()); + s.set(release::Arena::new(capacity).unwrap()); } } - S_SCRATCH.with(|s| { - let index = ptr::eq(opt_ptr(conflict), s[0].as_ptr()) as usize; - let arena = unsafe { &*s[index].as_ptr() }; + S_SCRATCH.with(|arenas| { + let index = ptr::eq(opt_ptr(conflict), arenas[0].as_ptr()) as usize; + let arena = unsafe { &*arenas[index].as_ptr() }; if arena.is_empty() { - init(s); + init(arenas); } ScratchArena::new(arena) }) } } -#[cfg(test)] +#[cfg(not(feature = "single-threaded"))] pub use multi_threaded::*; -#[cfg(not(test))] +#[cfg(feature = "single-threaded")] pub use single_threaded::*; diff --git a/crates/stdext/src/arena/string.rs b/crates/stdext/src/arena/string.rs index 9c24b2b7e600..1322fb154175 100644 --- a/crates/stdext/src/arena/string.rs +++ b/crates/stdext/src/arena/string.rs @@ -36,8 +36,9 @@ impl<'a> ArenaString<'a> { res } - pub fn from_utf8(arena: &'a Arena, vec: &[u8]) -> Result { - Ok(Self::from_str(arena, str::from_utf8(vec)?)) + pub fn from_utf8(vec: Vec) -> Result { + str::from_utf8(&vec)?; + Ok(Self { vec }) } /// It says right here that you checked if `bytes` is valid UTF-8 @@ -273,7 +274,7 @@ impl DerefMut for ArenaString<'_> { impl fmt::Display for ArenaString<'_> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.write_str(self.as_str()) + fmt::Display::fmt(&**self, f) } } diff --git a/crates/stdext/src/sys/windows.rs b/crates/stdext/src/sys/windows.rs index e008814efcbd..59282bec9ff9 100644 --- a/crates/stdext/src/sys/windows.rs +++ b/crates/stdext/src/sys/windows.rs @@ -1,7 +1,7 @@ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. -use std::alloc::AllocError; +use std::io; use std::ptr::{NonNull, null_mut}; const MEM_COMMIT: u32 = 0x00001000; @@ -27,10 +27,14 @@ unsafe extern "system" { /// /// This function is unsafe because it uses raw pointers. /// Don't forget to release the memory when you're done with it or you'll leak it. -pub unsafe fn virtual_reserve(size: usize) -> Result, AllocError> { +pub unsafe fn virtual_reserve(size: usize) -> io::Result> { unsafe { let res = VirtualAlloc(null_mut(), size, MEM_RESERVE, PAGE_READWRITE); - if res.is_null() { Err(AllocError) } else { Ok(NonNull::new_unchecked(res)) } + if res.is_null() { + Err(io::Error::last_os_error()) + } else { + Ok(NonNull::new_unchecked(res)) + } } } @@ -55,9 +59,9 @@ pub unsafe fn virtual_release(base: NonNull, _size: usize) { /// This function is unsafe because it uses raw pointers. /// Make sure to only pass pointers acquired from [`virtual_reserve`] /// and to pass a size less than or equal to the size passed to [`virtual_reserve`]. -pub unsafe fn virtual_commit(base: NonNull, size: usize) -> Result<(), AllocError> { +pub unsafe fn virtual_commit(base: NonNull, size: usize) -> io::Result<()> { unsafe { let res = VirtualAlloc(base.as_ptr() as *mut _, size, MEM_COMMIT, PAGE_READWRITE); - if res.is_null() { Err(AllocError) } else { Ok(()) } + if res.is_null() { Err(io::Error::last_os_error()) } else { Ok(()) } } } From f462d08a6866a5ccbc44ad9d97ecd8fd3d14fa16 Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Tue, 20 Jan 2026 20:37:49 +0100 Subject: [PATCH 2/2] Fix unix build --- crates/stdext/src/sys/unix.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/crates/stdext/src/sys/unix.rs b/crates/stdext/src/sys/unix.rs index dd634ca8c723..2d0575b1dc53 100644 --- a/crates/stdext/src/sys/unix.rs +++ b/crates/stdext/src/sys/unix.rs @@ -1,8 +1,8 @@ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. -use std::alloc::AllocError; use std::ffi::c_int; +use std::io; use std::ptr::{self, NonNull, null_mut}; /// Reserves a virtual memory region of the given size. @@ -13,7 +13,7 @@ use std::ptr::{self, NonNull, null_mut}; /// /// This function is unsafe because it uses raw pointers. /// Don't forget to release the memory when you're done with it or you'll leak it. -pub unsafe fn virtual_reserve(size: usize) -> Result, AllocError> { +pub unsafe fn virtual_reserve(size: usize) -> io::Result> { unsafe { let ptr = libc::mmap( null_mut(), @@ -24,7 +24,7 @@ pub unsafe fn virtual_reserve(size: usize) -> Result, AllocError> { 0, ); if ptr.is_null() || ptr::eq(ptr, libc::MAP_FAILED) { - Err(AllocError) + Err(io::Error::last_os_error()) } else { Ok(NonNull::new_unchecked(ptr as *mut u8)) } @@ -65,9 +65,9 @@ pub unsafe fn virtual_release(base: NonNull, size: usize) { /// This function is unsafe because it uses raw pointers. /// Make sure to only pass pointers acquired from `virtual_reserve` /// and to pass a size less than or equal to the size passed to `virtual_reserve`. -pub unsafe fn virtual_commit(base: NonNull, size: usize) -> Result<(), AllocError> { +pub unsafe fn virtual_commit(base: NonNull, size: usize) -> io::Result<()> { unsafe { let status = libc::mprotect(base.cast().as_ptr(), size, libc::PROT_READ | libc::PROT_WRITE); - if status != 0 { Err(AllocError) } else { Ok(()) } + if status != 0 { Err(io::Error::last_os_error()) } else { Ok(()) } } }