Skip to content

Commit

Permalink
fix(scheduler): add absolute path as part of cache key (#2063)
Browse files Browse the repository at this point in the history
  • Loading branch information
wsxiaoys committed May 8, 2024
1 parent df6f27c commit 566e4c1
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 22 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions crates/tabby-scheduler/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ npm-package-json = "0.1.3"
yarn-lock-parser = "0.7.0"
text-splitter = "0.10.0"
kv = { version = "0.24.0", features = ["json-value"] }
serde.workspace = true
serde_json.workspace = true

[dev-dependencies]
temp_testdir = { workspace = true }
Expand Down
71 changes: 49 additions & 22 deletions crates/tabby-scheduler/src/cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@ use std::{
};

use anyhow::{bail, Context, Result};
use kv::{Batch, Bucket, Config, Item, Json, Store};
use kv::{Batch, Bucket, Config, Json, Store};
use serde::{Deserialize, Serialize};
use tabby_common::{config::RepositoryConfig, languages::get_language_by_ext, SourceFile};
use tracing::{info, warn};

Expand All @@ -19,17 +20,46 @@ fn get_git_hash(path: &Path) -> Result<String> {
Ok(String::from_utf8(output.stdout)?.trim().to_string())
}

fn compute_source_file_key(path: &Path) -> Result<String> {
if !path.is_file() {
bail!("Path is not a file");
#[derive(Deserialize, Serialize)]
struct SourceFileKey {
path: PathBuf,
language: String,
git_hash: String,
}

impl TryFrom<&str> for SourceFileKey {
type Error = serde_json::Error;

fn try_from(s: &str) -> Result<Self, Self::Error> {
serde_json::from_str(s)
}
}

let git_hash = get_git_hash(path)?;
let ext = path.extension().context("Failed to get extension")?;
let Some(lang) = get_language_by_ext(ext) else {
bail!("Unknown language for extension {:?}", ext);
};
Ok(format!("{}-{}", lang.language(), git_hash))
impl TryFrom<&Path> for SourceFileKey {
type Error = anyhow::Error;

fn try_from(path: &Path) -> Result<Self> {
if !path.is_file() {
bail!("Path is not a file");
}

let git_hash = get_git_hash(path)?;
let ext = path.extension().context("Failed to get extension")?;
let Some(lang) = get_language_by_ext(ext) else {
bail!("Unknown language for extension {:?}", ext);
};
Ok(Self {
path: path.to_owned(),
language: lang.language().to_string(),
git_hash: git_hash.to_string(),
})
}
}

impl ToString for SourceFileKey {
fn to_string(&self) -> String {
serde_json::to_string(&self).expect("Failed to serialize SourceFileKey")
}
}

pub struct CacheStore {
Expand All @@ -50,7 +80,7 @@ impl CacheStore {
config: &RepositoryConfig,
path: &Path,
) -> Option<SourceFile> {
let key = compute_source_file_key(path).ok()?;
let key: String = SourceFileKey::try_from(path).ok()?.to_string();

let dataset_bucket: Bucket<String, Json<Option<SourceFile>>> = self
.store
Expand All @@ -75,7 +105,7 @@ impl CacheStore {

pub fn garbage_collection(&self) {
info!("Running garbage collection");
let bucket = self
let bucket: Bucket<String, Json<SourceFile>> = self
.store
.bucket(Some(SOURCE_FILE_BUCKET_KEY))
.expect("Could not access dataset bucket");
Expand All @@ -88,7 +118,8 @@ impl CacheStore {
.iter()
.filter_map(|item| {
let item = item.expect("Failed to read item");
if is_item_key_matched(&item) {
let item_key: String = item.key().expect("Failed to get key");
if is_item_key_matched(&item_key) {
num_keep += 1;
None
} else {
Expand All @@ -106,21 +137,17 @@ impl CacheStore {
}
}

fn is_item_key_matched(item: &Item<String, Json<SourceFile>>) -> bool {
let Ok(item_key) = item.key::<String>() else {
return false;
};

let Ok(Json(file)) = item.value() else {
fn is_item_key_matched(item_key: &str) -> bool {
let Ok(key) = SourceFileKey::try_from(item_key) else {
return false;
};

let filepath = PathBuf::from(file.basedir).join(file.filepath);
let Ok(file_key) = compute_source_file_key(&filepath) else {
let Ok(file_key) = SourceFileKey::try_from(key.path.as_path()) else {
return false;
};

file_key == item_key
// If key doesn't match, means file has been removed / modified.
file_key.to_string() == item_key
}

fn create_source_file(
Expand Down

0 comments on commit 566e4c1

Please sign in to comment.