From 52922b4d6976d12b0353b2fea2580c0590b28ac1 Mon Sep 17 00:00:00 2001 From: Joshua Liebow-Feeser Date: Sun, 3 Sep 2023 17:24:38 +0000 Subject: [PATCH] Add MaybeUninit type The standard library's `MaybeUninit` type does not currently support wrapping unsized types. This commit introduces a polyfill with the same behavior as `MaybeUninit` which does support wrapping unsized types. In this commit, the only supported types are sized types and slice types. Later (as part of #29), we will add the ability to derive the `AsMaybeUninit` trait, which will extend support to custom DSTs. Makes progress on #29 --- src/lib.rs | 293 ++++++++++++++++++++++++++++++++++++++++++++++++-- src/macros.rs | 8 +- 2 files changed, 288 insertions(+), 13 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index c462c15d7e0..97f13732845 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -173,7 +173,7 @@ use core::{ fmt::{self, Debug, Display, Formatter}, hash::{Hash, Hasher}, marker::PhantomData, - mem::{self, ManuallyDrop, MaybeUninit}, + mem::{self, ManuallyDrop}, num::{ NonZeroI128, NonZeroI16, NonZeroI32, NonZeroI64, NonZeroI8, NonZeroIsize, NonZeroU128, NonZeroU16, NonZeroU32, NonZeroU64, NonZeroU8, NonZeroUsize, Wrapping, @@ -1004,17 +1004,16 @@ safety_comment! { /// - `Unaligned`: `MaybeUninit` is guaranteed by its documentation [1] /// to have the same alignment as `T`. /// - /// [1] - /// https://doc.rust-lang.org/nightly/core/mem/union.MaybeUninit.html#layout-1 + /// [1] https://doc.rust-lang.org/nightly/core/mem/union.MaybeUninit.html#layout-1 /// /// TODO(https://github.com/google/zerocopy/issues/251): If we split /// `FromBytes` and `RefFromBytes`, or if we introduce a separate /// `NoCell`/`Freeze` trait, we can relax the trait bounds for `FromZeroes` /// and `FromBytes`. - unsafe_impl!(T: FromZeroes => FromZeroes for MaybeUninit); - unsafe_impl!(T: FromBytes => FromBytes for MaybeUninit); - unsafe_impl!(T: Unaligned => Unaligned for MaybeUninit); - assert_unaligned!(MaybeUninit<()>, MaybeUninit); + unsafe_impl!(T: FromZeroes => FromZeroes for mem::MaybeUninit); + unsafe_impl!(T: FromBytes => FromBytes for mem::MaybeUninit); + unsafe_impl!(T: Unaligned => Unaligned for mem::MaybeUninit); + assert_unaligned!(mem::MaybeUninit<()>, mem::MaybeUninit); } safety_comment! { /// SAFETY: @@ -1201,6 +1200,279 @@ mod simd { simd_arch_mod!(arm, int8x4_t, uint8x4_t); } +/// An alternative to the standard library's [`MaybeUninit`] that supports +/// unsized types. +/// +/// `MaybeUninit` is identical to the standard library's `MaybeUninit` type +/// with the exception that it supports wrapping unsized types. Namely, +/// `MaybeUninit` has the same layout as `T`, but it has no bit validity +/// constraints - any byte of a `MaybeUninit` may have any value, including +/// uninitialized. +/// +/// [`MaybeUninit`]: core::mem::MaybeUninit +#[derive(Copy, Clone)] +#[repr(transparent)] +pub struct MaybeUninit { + inner: T::MaybeUninit, +} + +impl Debug for MaybeUninit { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + f.pad(core::any::type_name::()) + } +} + +impl MaybeUninit { + /// Gets a shared reference to the contained value. + /// + /// # Safety + /// + /// Calling this when the content is not yet fully initialized causes + /// undefined behavior. It is up to the caller to guarantee that `self` is + /// really in an initialized state. + pub unsafe fn assume_init_ref(&self) -> &T { + let ptr = T::raw_from_maybe_uninit(&self.inner); + // SAFETY: The caller has promised that `self` contains an initialized + // `T`. Since `Self` is `repr(transparent)`, it has the same layout as + // `T::MaybeUninit`, which in turn is guaranteed (by safety invariant) + // to have the same layout as `T`. Thus, it is sound to treat `ptr` as + // pointing to a valid `T` of the correct size and alignment. + unsafe { &*ptr } + } + + /// Gets a mutable reference to the contained value. + /// + /// # Safety + /// + /// Calling this when the content is not yet fully initialized causes + /// undefined behavior. It is up to the caller to guarantee that `self` is + /// really in an initialized state. + pub unsafe fn assume_init_mut(&mut self) -> &mut T { + let ptr = T::raw_mut_from_maybe_uninit(&mut self.inner); + // SAFETY: The caller has promised that `self` contains an initialized + // `T`. Since `Self` is `repr(transparent)`, it has the same layout as + // `T::MaybeUninit`, which in turn is guaranteed (by safety invariant) + // to have the same layout as `T`. Thus, it is sound to treat `ptr` as + // pointing to a valid `T` of the correct size and alignment. + unsafe { &mut *ptr } + } +} + +impl MaybeUninit { + /// Creates a new `MaybeUninit` in an uninitialized state. + pub const fn uninit() -> MaybeUninit { + MaybeUninit { inner: mem::MaybeUninit::uninit() } + } + + /// Extracts the value from the `MaybeUninit` container. + /// + /// # Safety + /// + /// `assume_init` has the same safety requirements and guarantees as the + /// standard library's [`MaybeUninit::assume_init`] method. + /// + /// [`MaybeUninit::assume_init`]: mem::MaybeUninit::assume_init + pub const unsafe fn assume_init(self) -> T { + // SAFETY: The caller has promised to uphold the safety invariants of + // the exact function we're calling here. Since, for `T: Sized`, + // `MaybeUninit` is a `repr(transparent)` wrapper around + // `mem::MaybeUninit`, it is sound to treat `Self` as equivalent to a + // `mem::MaybeUninit` for the purposes of + // `mem::MaybeUninit::assume_init`'s safety invariants. + unsafe { self.inner.assume_init() } + } +} + +/// A type which can be wrapped in [`MaybeUninit`]. +/// +/// # Safety +/// +/// The safety invariants on the associated `MaybeUninit` type and on all +/// methods must be upheld. +pub unsafe trait AsMaybeUninit { + /// A type which has the same layout as `Self`, but which has no validity + /// constraints. + /// + /// Roughly speaking, this type is equivalent to what the standard library's + /// [`MaybeUninit`] would be if it supported unsized types. + /// + /// # Safety + /// + /// For `T: AsMaybeUninit`, the following must hold: + /// - Given `m: T::MaybeUninit`, it is sound to write any byte value, + /// including an uninitialized byte, at any byte offset in `m` + /// - `T` and `T::MaybeUninit` have the same alignment requirement + /// - It is valid to use an `as` cast to convert a `t: *const T` to a `m: + /// *const T::MaybeUninit` and vice-versa (and likewise for `*mut T`/`*mut + /// T::MaybeUninit`). Regardless of which direction the conversion was + /// performed, the sizes of the pointers' referents are always equal (in + /// terms of an API which is not yet stable, `size_of_val_raw(t) == + /// size_of_val_raw(m)`). + /// - `T::MaybeUninit` contains [`UnsafeCell`]s at exactly the same byte + /// ranges that `T` does. + /// + /// [`MaybeUninit`]: core::mem::MaybeUninit + /// [`UnsafeCell`]: core::cell::UnsafeCell + type MaybeUninit: ?Sized; + + /// Converts a const pointer at the type level. + /// + /// # Safety + /// + /// Callers may assume that the memory region addressed by the return value + /// is the same as that addressed by the argument, and that both the return + /// value and the argument have the same provenance. + fn raw_from_maybe_uninit(maybe_uninit: *const Self::MaybeUninit) -> *const Self; + + /// Converts a mut pointer at the type level. + /// + /// # Safety + /// + /// Callers may assume that the memory region addressed by the return value + /// is the same as that addressed by the argument, and that both the return + /// value and the argument have the same provenance. + fn raw_mut_from_maybe_uninit(maybe_uninit: *mut Self::MaybeUninit) -> *mut Self; +} + +// SAFETY: See inline safety comments. +unsafe impl AsMaybeUninit for T { + // SAFETY: + // - `MaybeUninit` has no validity requirements, so it is sound to write any + // byte value, including an uninitialized byte, at any offset. + // - `MaybeUninit` has the same layout as `T`, so they have the same + // alignment requirement. For the same reason, their sizes are equal. + // - Since their sizes are equal, raw pointers to both types are thin + // pointers, and thus can be converted using as casts. For the same + // reason, the sizes of these pointers' referents are always equal. + // - `MaybeUninit` has the same field offsets as `T`, and so it contains + // `UnsafeCell`s at exactly the same byte ranges as `T`. + type MaybeUninit = mem::MaybeUninit; + + // SAFETY: `.cast` preserves pointer address and provenance. + fn raw_from_maybe_uninit(maybe_uninit: *const mem::MaybeUninit) -> *const T { + maybe_uninit.cast::() + } + + // SAFETY: `.cast` preserves pointer address and provenance. + fn raw_mut_from_maybe_uninit(maybe_uninit: *mut mem::MaybeUninit) -> *mut T { + maybe_uninit.cast::() + } +} + +// SAFETY: See inline safety comments. +unsafe impl AsMaybeUninit for [T] { + // SAFETY: + // - `MaybeUninit` has no bit validity requirements and `[U]` has the same + // bit validity requirements as `U`, so `[MaybeUninit]` has no bit + // validity requirements. Thus, it is sound to write any byte value, + // including an uninitialized byte, at any byte offset. + // - Since `MaybeUninit` has the same layout as `T`, and `[U]` has the + // same alignment as `U`, `[MaybeUninit]` has the same alignment as + // `[T]`. + // - `[T]` and `[MaybeUninit]` are both slice types, and so pointers can + // be converted using an `as` cast. Since `T` and `MaybeUninit` have + // the same size, and since such a cast preserves the number of elements + // in the slice, the referent slices themselves will have the same size. + // - `MaybeUninit` has the same field offsets as `[T]`, and so it + // contains `UnsafeCell`s at exactly the same byte ranges as `[T]`. + type MaybeUninit = [mem::MaybeUninit]; + + // SAFETY: `as` preserves pointer address and provenance. + #[allow(clippy::as_conversions)] + fn raw_from_maybe_uninit(maybe_uninit: *const [mem::MaybeUninit]) -> *const [T] { + maybe_uninit as *const [T] + } + + // SAFETY: `as` preserves pointer address and provenance. + #[allow(clippy::as_conversions)] + fn raw_mut_from_maybe_uninit(maybe_uninit: *mut [mem::MaybeUninit]) -> *mut [T] { + maybe_uninit as *mut [T] + } +} + +// SAFETY: See inline safety comments. +unsafe impl AsMaybeUninit for str { + // SAFETY: `str` has the same layout as `[u8]`. Thus, the same safety + // argument for `<[u8] as AsMaybeUninit>::MaybeUninit` applies here. + type MaybeUninit = <[u8] as AsMaybeUninit>::MaybeUninit; + + // SAFETY: `as` preserves pointer address and provenance. + #[allow(clippy::as_conversions)] + fn raw_from_maybe_uninit( + maybe_uninit: *const <[u8] as AsMaybeUninit>::MaybeUninit, + ) -> *const str { + maybe_uninit as *const str + } + + // SAFETY: `as` preserves pointer address and provenance. + #[allow(clippy::as_conversions)] + fn raw_mut_from_maybe_uninit( + maybe_uninit: *mut <[u8] as AsMaybeUninit>::MaybeUninit, + ) -> *mut str { + maybe_uninit as *mut str + } +} + +// SAFETY: See inline safety comments. +unsafe impl AsMaybeUninit for MaybeUninit<[T]> { + // SAFETY: `MaybeUninit<[T]>` is a `repr(transparent)` wrapper around + // `[T::MaybeUninit]`. Thus: + // - Given `m: Self::MaybeUninit = [T::MaybeUninit]`, it is sound to write + // any byte value, including an uninitialized byte, at any byte offset in + // `m` because that is already required of `T::MaybeUninit`, and thus of + // [`T::MaybeUninit`] + // - `Self` and `[T::MaybeUninit]` have the same representation, and so: + // - Alignments are equal + // - Pointer casts are valid, and sizes of referents of both pointer types + // are equal. + // - `Self::MaybeUninit = [T::MaybeUninit]` contains `UnsafeCell`s at + // exactly the same byte ranges that `Self` does because `Self` has the + // same bit validity as `[T::MaybeUninit]`. + type MaybeUninit = [::MaybeUninit]; + + // SAFETY: `as` preserves pointer address and provenance. + #[allow(clippy::as_conversions)] + fn raw_from_maybe_uninit( + maybe_uninit: *const [::MaybeUninit], + ) -> *const MaybeUninit<[T]> { + maybe_uninit as *const MaybeUninit<[T]> + } + + // SAFETY: `as` preserves pointer address and provenance. + #[allow(clippy::as_conversions)] + fn raw_mut_from_maybe_uninit( + maybe_uninit: *mut [::MaybeUninit], + ) -> *mut MaybeUninit<[T]> { + maybe_uninit as *mut MaybeUninit<[T]> + } +} + +safety_comment! { + // `MaybeUninit` is `FromZeroes` and `FromBytes`, but never `AsBytes` + // since it may contain uninitialized bytes. + // + /// SAFETY: + /// - `FromZeroes`, `FromBytes`: `MaybeUninit` has no restrictions on its + /// contents. Unfortunately, in addition to bit validity, `FromZeroes` and + /// `FromBytes` also require that implementers contain no `UnsafeCell`s. + /// Thus, we require `T: FromZeroes` and `T: FromBytes` in order to ensure + /// that `T` - and thus `MaybeUninit` - contains to `UnsafeCell`s. + /// Thus, requiring that `T` implement each of these traits is sufficient + /// - `Unaligned`: `MaybeUninit` is guaranteed by its documentation [1] + /// to have the same alignment as `T`. + /// + /// [1] https://doc.rust-lang.org/nightly/core/mem/union.MaybeUninit.html#layout-1 + /// + /// TODO(https://github.com/google/zerocopy/issues/251): If we split + /// `FromBytes` and `RefFromBytes`, or if we introduce a separate + /// `NoCell`/`Freeze` trait, we can relax the trait bounds for `FromZeroes` + /// and `FromBytes`. + unsafe_impl!(T: ?Sized + AsMaybeUninit + FromZeroes => FromZeroes for MaybeUninit); + unsafe_impl!(T: ?Sized + AsMaybeUninit + FromBytes => FromBytes for MaybeUninit); + unsafe_impl!(T: ?Sized + AsMaybeUninit + Unaligned => Unaligned for MaybeUninit); + assert_unaligned!(mem::MaybeUninit<()>, MaybeUninit); +} + /// A type with no alignment requirement. /// /// An `Unalign` wraps a `T`, removing any alignment requirement. `Unalign` @@ -4068,8 +4340,15 @@ mod tests { assert_impls!(ManuallyDrop: !FromZeroes, !FromBytes, !AsBytes, !Unaligned); assert_impls!(ManuallyDrop<[NotZerocopy]>: !FromZeroes, !FromBytes, !AsBytes, !Unaligned); + assert_impls!(mem::MaybeUninit: FromZeroes, FromBytes, Unaligned, !AsBytes); + assert_impls!(mem::MaybeUninit: !FromZeroes, !FromBytes, !AsBytes, !Unaligned); + assert_impls!(MaybeUninit: FromZeroes, FromBytes, Unaligned, !AsBytes); + assert_impls!(MaybeUninit>: FromZeroes, FromBytes, Unaligned, !AsBytes); + assert_impls!(MaybeUninit<[u8]>: FromZeroes, FromBytes, Unaligned, !AsBytes); + assert_impls!(MaybeUninit>: FromZeroes, FromBytes, Unaligned, !AsBytes); assert_impls!(MaybeUninit: !FromZeroes, !FromBytes, !AsBytes, !Unaligned); + assert_impls!(MaybeUninit>: !FromZeroes, !FromBytes, !AsBytes, !Unaligned); assert_impls!(Wrapping: FromZeroes, FromBytes, AsBytes, Unaligned); assert_impls!(Wrapping: !FromZeroes, !FromBytes, !AsBytes, !Unaligned); diff --git a/src/macros.rs b/src/macros.rs index 02ec6112543..5ffdbd16e8a 100644 --- a/src/macros.rs +++ b/src/macros.rs @@ -39,17 +39,13 @@ macro_rules! unsafe_impl { ($tyvar:ident => $trait:ident for $ty:ty) => { unsafe impl<$tyvar> $trait for $ty { fn only_derive_is_allowed_to_implement_this_trait() {} } }; - // For all `$tyvar: ?Sized` with no bounds, implement `$trait` for `$ty`. - ($tyvar:ident: ?Sized => $trait:ident for $ty:ty) => { - unsafe impl<$tyvar: ?Sized> $trait for $ty { fn only_derive_is_allowed_to_implement_this_trait() {} } - }; // For all `$tyvar: $bound`, implement `$trait` for `$ty`. ($tyvar:ident: $bound:path => $trait:ident for $ty:ty) => { unsafe impl<$tyvar: $bound> $trait for $ty { fn only_derive_is_allowed_to_implement_this_trait() {} } }; // For all `$tyvar: $bound + ?Sized`, implement `$trait` for `$ty`. - ($tyvar:ident: ?Sized + $bound:path => $trait:ident for $ty:ty) => { - unsafe impl<$tyvar: ?Sized + $bound> $trait for $ty { fn only_derive_is_allowed_to_implement_this_trait() {} } + ($tyvar:ident: ?Sized $(+ $bounds:path)* => $trait:ident for $ty:ty) => { + unsafe impl<$tyvar: ?Sized $(+ $bounds)*> $trait for $ty { fn only_derive_is_allowed_to_implement_this_trait() {} } }; // For all `$tyvar: $bound` and for all `const $constvar: $constty`, // implement `$trait` for `$ty`.