Skip to content

Commit

Permalink
Help the compiler avoid inlining lazy init functions (#443)
Browse files Browse the repository at this point in the history
Before this change, the compiler generates code that looks like this:

```
  if not initialized:
     goto init
do_work:
  do the actual work
  goto exit
init:
  inilned init()
  goto do_work
exit:
  ret
```

If the initialization code is small, this works fine. But, for (bad)
reasons, is_rdrand_good is particularly huge. Thus, jumping over its
inined code is wasteful because it puts bad pressure on the instruction
cache.

With this change, the generated code looks like this:

```
  if not initialized:
     goto init
do_work:
  do the actual work
  goto exit
init:
  call init()
  goto do_work
exit:
  ret
```

I verified these claims by running:
```
$ cargo asm --rust getrandom_inner --lib --target=x86_64-fortanix-unknown-sgx
```

This is also what other implementations (e.g. OnceCell) do.

While here, I made the analogous change to LazyPtr, and rewrote LazyPtr
to the same form as LazyUsize. I didn't check the generated code for
LazyPtr though.

(Why is `is_rdrand_good` huge? The compiler unrolls the 10 iteration
retry loop, and then it unrolls the 8 iteration self-test loop, so the
result is `rdrand()` is inlined 80 times inside is_rdrand_good. This is
something to address separately as it also affects `getrandom_inner`
itself.)
  • Loading branch information
briansmith authored Jun 4, 2024
1 parent a4b1f2f commit 8933c05
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 32 deletions.
36 changes: 24 additions & 12 deletions src/lazy.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,13 +36,20 @@ impl LazyUsize {
// init(). Multiple callers can run their init() functions in parallel.
// init() should always return the same value, if it succeeds.
pub fn unsync_init(&self, init: impl FnOnce() -> usize) -> usize {
#[cold]
fn do_init(this: &LazyUsize, init: impl FnOnce() -> usize) -> usize {
let val = init();
this.0.store(val, Ordering::Relaxed);
val
}

// Relaxed ordering is fine, as we only have a single atomic variable.
let mut val = self.0.load(Ordering::Relaxed);
if val == Self::UNINIT {
val = init();
self.0.store(val, Ordering::Relaxed);
let val = self.0.load(Ordering::Relaxed);
if val != Self::UNINIT {
val
} else {
do_init(self, init)
}
val
}
}

Expand Down Expand Up @@ -92,19 +99,24 @@ impl LazyPtr {
// init(). Multiple callers can run their init() functions in parallel.
// init() should always return the same value, if it succeeds.
pub fn unsync_init(&self, init: impl Fn() -> *mut c_void) -> *mut c_void {
#[cold]
fn do_init(this: &LazyPtr, init: impl Fn() -> *mut c_void) -> *mut c_void {
let addr = init();
this.addr.store(addr, Ordering::Release);
addr
}

// Despite having only a single atomic variable (self.addr), we still
// cannot always use Ordering::Relaxed, as we need to make sure a
// successful call to `init` is "ordered before" any data read through
// the returned pointer (which occurs when the function is called).
// Our implementation mirrors that of the one in libstd, meaning that
// the use of non-Relaxed operations is probably unnecessary.
match self.addr.load(Ordering::Acquire) {
Self::UNINIT => {
let addr = init();
self.addr.store(addr, Ordering::Release);
addr
}
addr => addr,
let val = self.addr.load(Ordering::Acquire);
if val != Self::UNINIT {
val
} else {
do_init(self, init)
}
}
}
49 changes: 29 additions & 20 deletions src/use_file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@ use core::{
const FILE_PATH: &[u8] = b"/dev/urandom\0";
const FD_UNINIT: usize = usize::max_value();

// Do not inline this when it is the fallback implementation, but don't mark it
// `#[cold]` because it is hot when it is actually used.
#[cfg_attr(any(target_os = "android", target_os = "linux"), inline(never))]
pub fn getrandom_inner(dest: &mut [MaybeUninit<u8>]) -> Result<(), Error> {
let fd = get_rng_fd()?;
sys_fill_exact(dest, |buf| unsafe {
Expand All @@ -31,38 +34,44 @@ pub fn getrandom_inner(dest: &mut [MaybeUninit<u8>]) -> Result<(), Error> {
// return the same file descriptor. This file descriptor is never closed.
fn get_rng_fd() -> Result<libc::c_int, Error> {
static FD: AtomicUsize = AtomicUsize::new(FD_UNINIT);

fn get_fd() -> Option<libc::c_int> {
match FD.load(Relaxed) {
FD_UNINIT => None,
val => Some(val as libc::c_int),
}
}

// Use double-checked locking to avoid acquiring the lock if possible.
if let Some(fd) = get_fd() {
return Ok(fd);
}
#[cold]
fn get_fd_locked() -> Result<libc::c_int, Error> {
// SAFETY: We use the mutex only in this method, and we always unlock it
// before returning, making sure we don't violate the pthread_mutex_t API.
static MUTEX: Mutex = Mutex::new();
unsafe { MUTEX.lock() };
let _guard = DropGuard(|| unsafe { MUTEX.unlock() });

// SAFETY: We use the mutex only in this method, and we always unlock it
// before returning, making sure we don't violate the pthread_mutex_t API.
static MUTEX: Mutex = Mutex::new();
unsafe { MUTEX.lock() };
let _guard = DropGuard(|| unsafe { MUTEX.unlock() });
if let Some(fd) = get_fd() {
return Ok(fd);
}

if let Some(fd) = get_fd() {
return Ok(fd);
}
// On Linux, /dev/urandom might return insecure values.
#[cfg(any(target_os = "android", target_os = "linux"))]
wait_until_rng_ready()?;

// On Linux, /dev/urandom might return insecure values.
#[cfg(any(target_os = "android", target_os = "linux"))]
wait_until_rng_ready()?;
let fd = open_readonly(FILE_PATH)?;
// The fd always fits in a usize without conflicting with FD_UNINIT.
debug_assert!(fd >= 0 && (fd as usize) < FD_UNINIT);
FD.store(fd as usize, Relaxed);

let fd = open_readonly(FILE_PATH)?;
// The fd always fits in a usize without conflicting with FD_UNINIT.
debug_assert!(fd >= 0 && (fd as usize) < FD_UNINIT);
FD.store(fd as usize, Relaxed);
Ok(fd)
}

Ok(fd)
// Use double-checked locking to avoid acquiring the lock if possible.
if let Some(fd) = get_fd() {
Ok(fd)
} else {
get_fd_locked()
}
}

// Polls /dev/random to make sure it is ok to read from /dev/urandom.
Expand Down

0 comments on commit 8933c05

Please sign in to comment.