From 47d72bd462cbf62d762e089cb83ed8c19f60d535 Mon Sep 17 00:00:00 2001 From: cryscan Date: Sun, 22 Sep 2024 15:23:59 +0800 Subject: [PATCH] Do not load LoRA for embed. --- Cargo.toml | 3 +-- src/model/loader.rs | 25 +++---------------------- src/runtime/loader.rs | 24 +++--------------------- 3 files changed, 7 insertions(+), 45 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 28117ab..c4b88e4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -38,7 +38,6 @@ trait-variant = "0.1" uid = "0.1" wasm-bindgen = "0.2" wgpu = "22.1.0" -pollster = "0.3" [dependencies.web-rwkv-derive] path = "crates/web-rwkv-derive" @@ -47,7 +46,7 @@ version = "0.2.5" [dependencies.tokio] default-features = false features = ["macros", "rt", "sync", "time"] -version = "1.37" +version = "1.40" [dev-dependencies] cbor4ii = { version = "0.3.2", features = ["half-f16", "serde1"] } diff --git a/src/model/loader.rs b/src/model/loader.rs index e61b31b..ab4eb1a 100644 --- a/src/model/loader.rs +++ b/src/model/loader.rs @@ -608,28 +608,9 @@ impl Loader { } pub fn load_embed(&self) -> Result> { - let context = &self.context; - let name = "emb.weight"; - - let (dt, shape, tensor) = self.model.tensor(name)?; - let lora = self.lora_vectors(name)?; - - if lora.is_empty() { - let tensor = TensorCpu::from_reader((dt, shape, tensor))?; - Ok(tensor) - } else { - let tensor = TensorCpu::from_reader((dt, shape, tensor))?.transfer_into(context); - let mut ops = vec![]; - for lora in lora { - let factor = vec![lora.alpha, 1.0, 0.0, 0.0]; - let factor = context.tensor_from_data([4, 1, 1, 1], factor)?; - let op = TensorOp::blend(&factor, &lora.tensor, &tensor)?; - ops.push(op); - } - - context.queue.submit(context.encode(&TensorOp::List(ops))); - Ok(pollster::block_on(tensor.back())) - } + let (dt, shape, tensor) = self.model.tensor("emb.weight")?; + let tensor = TensorCpu::from_reader((dt, shape, tensor))?; + Ok(tensor) } pub fn load_head(&self, chunk_size: usize) -> Result>> { diff --git a/src/runtime/loader.rs b/src/runtime/loader.rs index 16235ca..b7d0a97 100644 --- a/src/runtime/loader.rs +++ b/src/runtime/loader.rs @@ -607,27 +607,9 @@ impl Loader { } pub fn load_embed(&self) -> Result> { - let context = &self.context; - let name = "emb.weight"; - - let (dt, shape, tensor) = self.model.tensor(name)?; - let lora = self.lora_vectors(name)?; - - if lora.is_empty() { - let tensor = TensorCpu::from_reader((dt, shape, tensor))?; - Ok(tensor) - } else { - let tensor = TensorCpu::from_reader((dt, shape, tensor))?.transfer_into(context); - let mut ops = vec![]; - for lora in lora { - let factor = vec![lora.alpha, 1.0, 0.0, 0.0]; - let factor = context.tensor_from_data([4, 1, 1, 1], factor)?; - let op = TensorOp::blend(&factor, &lora.tensor, &tensor)?; - ops.push(op); - } - context.queue.submit(context.encode(&TensorOp::List(ops))); - Ok(pollster::block_on(tensor.back())) - } + let (dt, shape, tensor) = self.model.tensor("emb.weight")?; + let tensor = TensorCpu::from_reader((dt, shape, tensor))?; + Ok(tensor) } pub fn load_head(&self, chunk_size: usize) -> Result>> {