Skip to content

Commit

Permalink
Apply suggestions
Browse files Browse the repository at this point in the history
  • Loading branch information
boxbeam committed May 7, 2024
1 parent fe348f6 commit 3630455
Show file tree
Hide file tree
Showing 4 changed files with 56 additions and 38 deletions.
6 changes: 3 additions & 3 deletions crates/tabby-common/src/index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ pub struct CodeSearchSchema {
pub field_git_url: Field,
pub field_filepath: Field,
/// Indexed field uniquely identifying a file in a repository, stringified SourceFileKey
pub field_file_id: Field,
pub field_source_file_key: Field,
pub field_language: Field,
pub field_body: Field,
}
Expand All @@ -38,7 +38,7 @@ impl CodeSearchSchema {

let field_git_url = builder.add_text_field("git_url", STRING | STORED);
let field_filepath = builder.add_text_field("filepath", STRING | STORED);
let field_file_id = builder.add_text_field("file_id", STRING | STORED);
let field_source_file_key = builder.add_text_field("file_id", STRING | STORED);
let field_language = builder.add_text_field("language", STRING | STORED);
let field_body = builder.add_text_field("body", code_options);
let schema = builder.build();
Expand All @@ -47,7 +47,7 @@ impl CodeSearchSchema {
schema,
field_git_url,
field_filepath,
field_file_id,
field_source_file_key,
field_language,
field_body,
}
Expand Down
39 changes: 20 additions & 19 deletions crates/tabby-scheduler/src/cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ fn get_git_hash(path: &Path) -> Result<String> {
}

#[derive(Deserialize, Serialize, Debug)]

Check warning on line 31 in crates/tabby-scheduler/src/cache.rs

View check run for this annotation

Codecov / codecov/patch

crates/tabby-scheduler/src/cache.rs#L31

Added line #L31 was not covered by tests
pub(crate) struct SourceFileKey {
struct SourceFileKey {
path: PathBuf,
language: String,
git_hash: String,
Expand Down Expand Up @@ -89,20 +89,25 @@ impl CacheStore {
.expect("Failed to access indexed files bucket")
}

Check warning on line 90 in crates/tabby-scheduler/src/cache.rs

View check run for this annotation

Codecov / codecov/patch

crates/tabby-scheduler/src/cache.rs#L86-L90

Added lines #L86 - L90 were not covered by tests

pub fn is_indexed(&self, key: &SourceFileKey) -> bool {
self.index_bucket()
.contains(&key.to_string())
.expect("Failed to read index bucket")
pub fn check_indexed(&self, path: &Path) -> (String, bool) {
let key = SourceFileKey::try_from(path)
.expect("Failed to create source file key")
.to_string();
let indexed = self
.index_bucket()
.contains(&key)
.expect("Failed to read index bucket");
(key, indexed)
}

Check warning on line 101 in crates/tabby-scheduler/src/cache.rs

View check run for this annotation

Codecov / codecov/patch

crates/tabby-scheduler/src/cache.rs#L92-L101

Added lines #L92 - L101 were not covered by tests

pub fn set_indexed(&self, key: &SourceFileKey) {
pub fn set_indexed(&self, batch: Batch<String, String>) {
self.index_bucket()
.set(&key.to_string(), &String::new())
.expect("Failed to write to index bucket");
.batch(batch)
.expect("Failed to commit batched index update")
}

Check warning on line 107 in crates/tabby-scheduler/src/cache.rs

View check run for this annotation

Codecov / codecov/patch

crates/tabby-scheduler/src/cache.rs#L103-L107

Added lines #L103 - L107 were not covered by tests

pub fn cleanup_old_indexed_files(&self, key_remover: impl Fn(&String)) {
info!("Cleaning up indexed file cache");
pub fn garbage_collection_for_indexed_files(&self, key_remover: impl Fn(&String)) {
info!("Started cleaning up 'indexed_files' bucket");
let bucket = self.index_bucket();
let mut batch = Batch::new();

Expand All @@ -123,13 +128,9 @@ impl CacheStore {
}
})
.inspect(key_remover)
.for_each(|key| {
batch
.remove(&key)
.expect("Failed to remove indexed source file")
});
.for_each(|key| batch.remove(&key).expect("Failed to remove key"));

info!("Finished cleaning up indexed files: {num_keep} items kept, {num_removed} items removed");
info!("Finished garbage collection for 'indexed_files': {num_keep} items kept, {num_removed} items removed");
bucket
.batch(batch)
.expect("Failed to execute batched delete");
Expand Down Expand Up @@ -163,8 +164,8 @@ impl CacheStore {
}
}

pub fn cleanup_old_source_files(&self) {
info!("Cleaning up synced file cache");
pub fn garbage_collection_for_source_files(&self) {
info!("Started cleaning up 'source_files' bucket");

Check warning on line 168 in crates/tabby-scheduler/src/cache.rs

View check run for this annotation

Codecov / codecov/patch

crates/tabby-scheduler/src/cache.rs#L167-L168

Added lines #L167 - L168 were not covered by tests
let bucket: Bucket<String, Json<SourceFile>> = self
.store
.bucket(Some(SOURCE_FILE_BUCKET_KEY))
Expand All @@ -190,7 +191,7 @@ impl CacheStore {
.for_each(|key| batch.remove(&key).expect("Failed to remove key"));

info!(
"Finished garbage collection: {} items kept, {} items removed",
"Finished garbage collection for 'source_files': {} items kept, {} items removed",

Check warning on line 194 in crates/tabby-scheduler/src/cache.rs

View check run for this annotation

Codecov / codecov/patch

crates/tabby-scheduler/src/cache.rs#L194

Added line #L194 was not covered by tests
num_keep, num_removed
);
bucket.batch(batch).expect("to batch remove staled files");
Expand Down
45 changes: 31 additions & 14 deletions crates/tabby-scheduler/src/index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ use std::{fs, io::IsTerminal, path::Path};

use ignore::Walk;
use kdam::BarExt;
use kv::Batch;
use tabby_common::{
config::RepositoryConfig,
index::{register_tokenizers, CodeSearchSchema},
Expand All @@ -10,11 +11,7 @@ use tabby_common::{
use tantivy::{directory::MmapDirectory, doc, Index, Term};
use tracing::{debug, warn};

use crate::{
cache::{CacheStore, SourceFileKey},
code::CodeIntelligence,
utils::tqdm,
};
use crate::{cache::CacheStore, code::CodeIntelligence, utils::tqdm};

// Magic numbers
static MAX_LINE_LENGTH_THRESHOLD: usize = 300;
Expand All @@ -41,6 +38,7 @@ pub fn index_repositories(cache: &mut CacheStore, config: &[RepositoryConfig]) {
.then(|| tqdm(total_file_size));

let intelligence = CodeIntelligence::default();
let mut indexed_files_batch = Batch::new();
for repository in config {
for file in Walk::new(repository.dir()) {
let file = match file {
Expand All @@ -56,10 +54,9 @@ pub fn index_repositories(cache: &mut CacheStore, config: &[RepositoryConfig]) {
if !is_valid_file(&source_file) {
continue;
}
let file_id =
SourceFileKey::try_from(file.path()).expect("Failed to create source file key");
let (file_id, indexed) = cache.check_indexed(file.path());

if cache.is_indexed(&file_id) {
if indexed {

Check warning on line 59 in crates/tabby-scheduler/src/index.rs

View check run for this annotation

Codecov / codecov/patch

crates/tabby-scheduler/src/index.rs#L54-L59

Added lines #L54 - L59 were not covered by tests
continue;
}
let text = match source_file.read_content() {
Expand All @@ -82,25 +79,45 @@ pub fn index_repositories(cache: &mut CacheStore, config: &[RepositoryConfig]) {
writer
.add_document(doc! {
code.field_git_url => source_file.git_url.clone(),
code.field_file_id => file_id.to_string(),
code.field_source_file_key => file_id.to_string(),
code.field_filepath => source_file.filepath.clone(),
code.field_language => source_file.language.clone(),
code.field_body => body,
})
.expect("Failed to add document");
}
cache.set_indexed(&file_id);
indexed_files_batch
.set(&file_id, &String::new())
.expect("Failed to mark file as indexed");

Check warning on line 91 in crates/tabby-scheduler/src/index.rs

View check run for this annotation

Codecov / codecov/patch

crates/tabby-scheduler/src/index.rs#L73-L91

Added lines #L73 - L91 were not covered by tests
}
}

cache.cleanup_old_indexed_files(|key| {
writer.delete_term(Term::from_field_text(code.field_file_id, key));
});

// Commit updating indexed documents
writer.commit().expect("Failed to commit index");
writer
.wait_merging_threads()
.expect("Failed to wait for merging threads");

// Mark all indexed documents as indexed
cache.set_indexed(indexed_files_batch);

// Create a new writer to commit deletion of removed indexed files
let mut writer = index
.writer(150_000_000)
.expect("Failed to create index writer");

cache.garbage_collection_for_indexed_files(|key| {
writer.delete_term(Term::from_field_text(code.field_source_file_key, key));
});

// Commit garbage collection
writer
.commit()
.expect("Failed to commit garbage collection");

writer
.wait_merging_threads()
.expect("Failed to wait for merging threads on garbage collection");

Check warning on line 120 in crates/tabby-scheduler/src/index.rs

View check run for this annotation

Codecov / codecov/patch

crates/tabby-scheduler/src/index.rs#L100-L120

Added lines #L100 - L120 were not covered by tests
}

fn is_valid_file(file: &SourceFile) -> bool {
Expand Down
4 changes: 2 additions & 2 deletions crates/tabby-scheduler/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ pub async fn scheduler<T: RepositoryAccess + 'static>(now: bool, access: T) {
job_sync(&mut cache, &repositories);
job_index(&mut cache, &repositories);

Check warning on line 25 in crates/tabby-scheduler/src/lib.rs

View check run for this annotation

Codecov / codecov/patch

crates/tabby-scheduler/src/lib.rs#L25

Added line #L25 was not covered by tests

cache.cleanup_old_source_files();
cache.garbage_collection_for_source_files();

Check warning on line 27 in crates/tabby-scheduler/src/lib.rs

View check run for this annotation

Codecov / codecov/patch

crates/tabby-scheduler/src/lib.rs#L27

Added line #L27 was not covered by tests
} else {
let access = Arc::new(access);
let scheduler = JobScheduler::new()
Expand Down Expand Up @@ -53,7 +53,7 @@ pub async fn scheduler<T: RepositoryAccess + 'static>(now: bool, access: T) {

job_sync(&mut cache, &repositories);
job_index(&mut cache, &repositories);
cache.cleanup_old_source_files();
cache.garbage_collection_for_source_files();

Check warning on line 56 in crates/tabby-scheduler/src/lib.rs

View check run for this annotation

Codecov / codecov/patch

crates/tabby-scheduler/src/lib.rs#L55-L56

Added lines #L55 - L56 were not covered by tests
})
})
.expect("Failed to create job"),
Expand Down

0 comments on commit 3630455

Please sign in to comment.