Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

perf: Preallocate records #1185

Merged
merged 2 commits into from
Jan 8, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion crates/vm/src/arch/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,11 +54,13 @@ pub struct MemoryConfig {
pub decomp: usize,
/// Maximum N AccessAdapter AIR to support.
pub max_access_adapter_n: usize,
/// An expected upper bound on the number of memory accesses.
pub access_capacity: usize,
}

impl Default for MemoryConfig {
fn default() -> Self {
Self::new(29, 1, 29, 29, 17, 64)
Self::new(29, 1, 29, 29, 17, 64, 1 << 24)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

224 seems pretty high, maybe just 220?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is for memory. If a chip has up to 2^20 records and we have dozens of chips and each record does several accesses, then 2^24 might still be small?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you lmk the execution performance diff (via criterion bench) between 2^20 and 2^24, just for a sense?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

from 1 << 20 to 1 << 24 (regex_execute bench):

regex/execute           time:   [377.90 ms 391.66 ms 405.92 ms]
                        change: [-11.309% -6.3323% -0.8434%] (p = 0.05 < 0.05)
                        Change within noise threshold.

on the border of statistical signficance

}
}

Expand Down
1 change: 0 additions & 1 deletion crates/vm/src/arch/extensions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -516,7 +516,6 @@ impl<F: PrimeField32> SystemComplex<F> {
range_checker.clone(),
MemoryMerkleBus(bus_idx_max - 2),
DirectCompressionBus(bus_idx_max - 1),
MemoryImage::<F>::default(),
)
} else {
MemoryController::with_volatile_memory(
Expand Down
5 changes: 4 additions & 1 deletion crates/vm/src/arch/integration_api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,9 @@ pub struct VmChipWrapper<F, A: VmAdapterChip<F>, C: VmCoreChip<F, A::Interface>>
offline_memory: Arc<Mutex<OfflineMemory<F>>>,
}

// TODO: Make this configurable.
const DEFAULT_RECORDS_CAPACITY: usize = 1 << 20;

impl<F, A, C> VmChipWrapper<F, A, C>
where
A: VmAdapterChip<F>,
Expand All @@ -201,7 +204,7 @@ where
Self {
adapter,
core,
records: vec![],
records: Vec::with_capacity(DEFAULT_RECORDS_CAPACITY),
offline_memory,
}
}
Expand Down
2 changes: 1 addition & 1 deletion crates/vm/src/arch/testing/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@ impl VmChipTestBuilder<BabyBear> {

impl<F: PrimeField32> Default for VmChipTestBuilder<F> {
fn default() -> Self {
let mem_config = MemoryConfig::new(2, 1, 29, 29, 17, 64);
let mem_config = MemoryConfig::new(2, 1, 29, 29, 17, 64, 1 << 22);
let range_checker = Arc::new(VariableRangeCheckerChip::new(VariableRangeCheckerBus::new(
RANGE_CHECKER_BUS,
mem_config.decomp,
Expand Down
27 changes: 16 additions & 11 deletions crates/vm/src/system/memory/controller/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@ impl<F: PrimeField32> MemoryController<F> {
range_checker.clone(),
),
},
memory: Memory::new(),
memory: Memory::new(mem_config.access_capacity),
offline_memory: Arc::new(Mutex::new(OfflineMemory::new(
initial_memory,
1,
Expand All @@ -261,20 +261,21 @@ impl<F: PrimeField32> MemoryController<F> {
}
}

/// Creates a new memory controller for persistent memory.
///
/// Call `set_initial_memory` to set the initial memory state after construction.
pub fn with_persistent_memory(
memory_bus: MemoryBus,
mem_config: MemoryConfig,
range_checker: Arc<VariableRangeCheckerChip>,
merkle_bus: MemoryMerkleBus,
compression_bus: DirectCompressionBus,
initial_memory: MemoryImage<F>,
) -> Self {
let memory_dims = MemoryDimensions {
as_height: mem_config.as_height,
address_height: mem_config.pointer_max_bits - log2_strict_usize(CHUNK),
as_offset: 1,
};
let memory = Memory::from_image(initial_memory.clone());
let range_checker_bus = range_checker.bus();
let interface_chip = MemoryInterface::Persistent {
boundary_chip: PersistentBoundaryChip::new(
Expand All @@ -284,15 +285,15 @@ impl<F: PrimeField32> MemoryController<F> {
compression_bus,
),
merkle_chip: MemoryMerkleChip::new(memory_dims, merkle_bus, compression_bus),
initial_memory: initial_memory.clone(),
initial_memory: MemoryImage::default(),
};
Self {
memory_bus,
mem_config,
interface_chip,
memory,
memory: Memory::new(0), // it is expected that the memory will be set later
offline_memory: Arc::new(Mutex::new(OfflineMemory::new(
initial_memory,
MemoryImage::default(),
CHUNK,
memory_bus,
range_checker.clone(),
Expand Down Expand Up @@ -347,15 +348,17 @@ impl<F: PrimeField32> MemoryController<F> {
let mut offline_memory = self.offline_memory.lock().unwrap();
offline_memory.set_initial_memory(memory.clone());

self.memory = Memory::from_image(memory.clone(), self.mem_config.access_capacity);

match &mut self.interface_chip {
MemoryInterface::Volatile { .. } => {
if !memory.is_empty() {
panic!("Cannot set initial memory for volatile memory");
}
assert!(
memory.is_empty(),
"Cannot set initial memory for volatile memory"
);
}
MemoryInterface::Persistent { initial_memory, .. } => {
*initial_memory = memory;
self.memory = Memory::from_image(initial_memory.clone());
}
}
}
Expand Down Expand Up @@ -449,9 +452,11 @@ impl<F: PrimeField32> MemoryController<F> {
}

fn replay_access_log(&mut self) {
let mut offline_memory = self.offline_memory.lock().unwrap();
let log = mem::take(&mut self.memory.log);

let mut offline_memory = self.offline_memory.lock().unwrap();
offline_memory.set_log_capacity(log.len());

for entry in log {
Self::replay_access(
entry,
Expand Down
5 changes: 5 additions & 0 deletions crates/vm/src/system/memory/offline.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,11 @@ impl<F: PrimeField32> OfflineMemory<F> {
self.data = initial_memory;
}

pub(super) fn set_log_capacity(&mut self, access_capacity: usize) {
assert!(self.log.is_empty());
self.log = Vec::with_capacity(access_capacity);
}

pub fn memory_bridge(&self) -> MemoryBridge {
MemoryBridge::new(
self.memory_bus,
Expand Down
18 changes: 6 additions & 12 deletions crates/vm/src/system/memory/online.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,34 +26,28 @@ pub(crate) type Address = (u32, u32);
/// A simple data structure to read to/write from memory.
///
/// Stores a log of memory accesses to reconstruct aspects of memory state for trace generation.
#[derive(Debug, Clone)]
#[derive(Debug)]
pub struct Memory<F> {
pub(super) data: FxHashMap<Address, F>,
pub(super) log: Vec<MemoryLogEntry<F>>,
timestamp: u32,
}

impl<F: PrimeField32> Default for Memory<F> {
fn default() -> Self {
Self::new()
}
}

impl<F: PrimeField32> Memory<F> {
pub fn new() -> Self {
pub fn new(access_capacity: usize) -> Self {
Self {
data: MemoryImage::default(),
timestamp: INITIAL_TIMESTAMP + 1,
log: vec![],
log: Vec::with_capacity(access_capacity),
}
}

/// Instantiates a new `Memory` data structure from an image.
pub fn from_image(image: MemoryImage<F>) -> Self {
pub fn from_image(image: MemoryImage<F>, access_capacity: usize) -> Self {
Self {
data: image,
timestamp: INITIAL_TIMESTAMP + 1,
log: vec![],
log: Vec::with_capacity(access_capacity),
}
}

Expand Down Expand Up @@ -141,7 +135,7 @@ mod tests {

#[test]
fn test_write_read() {
let mut memory = Memory::new();
let mut memory = Memory::new(0);
let address_space = 1;

memory.write(address_space, 0, bba![1, 2, 3, 4]);
Expand Down
3 changes: 1 addition & 2 deletions crates/vm/src/system/memory/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ use crate::{
memory::{
merkle::MemoryMerkleBus,
offline_checker::{MemoryBridge, MemoryBus, MemoryReadAuxCols, MemoryWriteAuxCols},
MemoryAddress, MemoryImage, OfflineMemory, RecordId,
MemoryAddress, OfflineMemory, RecordId,
},
poseidon2::Poseidon2PeripheryChip,
},
Expand Down Expand Up @@ -249,7 +249,6 @@ fn test_memory_controller_persistent() {
range_checker.clone(),
merkle_bus,
compression_bus,
MemoryImage::default(),
);

let mut rng = create_seeded_rng();
Expand Down
4 changes: 2 additions & 2 deletions crates/vm/tests/integration_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -374,7 +374,7 @@ fn test_vm_initial_memory() {
fn test_vm_1_persistent() {
let engine = BabyBearPoseidon2Engine::new(FriParameters::standard_fast());
let config = NativeConfig {
system: SystemConfig::new(3, MemoryConfig::new(1, 1, 16, 10, 6, 64), 0),
system: SystemConfig::new(3, MemoryConfig::new(1, 1, 16, 10, 6, 64, 1024), 0),
native: Default::default(),
}
.with_continuations();
Expand Down Expand Up @@ -725,7 +725,7 @@ fn test_vm_field_extension_arithmetic_persistent() {

let program = Program::from_instructions(&instructions);
let config = NativeConfig {
system: SystemConfig::new(3, MemoryConfig::new(1, 1, 16, 10, 6, 64), 0)
system: SystemConfig::new(3, MemoryConfig::new(1, 1, 16, 10, 6, 64, 1024), 0)
.with_continuations(),
native: Default::default(),
};
Expand Down