Skip to content

Commit

Permalink
Introduce GitPackfileBaseItem as interim packfile state
Browse files Browse the repository at this point in the history
Summary:
Currently, when importing Git commits, trees, tags or blobs, we just store the raw Git object in our data stores. These objects are then used when generating packfiles/bundles for Git protocol or for landing through Git workflow. When Mark was profiling the bundle generation code, he realized that a major bottleneck is the ZLib encoding that we need to perform on-demand during bundle generation. One way to mitigate this problem is to store the encoded data during the initial object upload, which adds a minimal overhead during write (gitimport, remote-gitimport) but would provide a considerable benefit during read (which is the most prominent workflow in Git).

This diff in particular introduces the GitPackfileBaseItem type which would represent the encoded data that we can directly use in Packfiles

Reviewed By: markbt

Differential Revision: D52073664

fbshipit-source-id: 0cbaf1e52ce29b49bc91269a0b8b1dc2dfd24260
  • Loading branch information
RajivTS authored and facebook-github-bot committed Dec 13, 2023
1 parent ce59485 commit d07a437
Show file tree
Hide file tree
Showing 14 changed files with 416 additions and 14 deletions.
2 changes: 2 additions & 0 deletions eden/mononoke/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,8 @@ members = [
"git/import_direct",
"git/import_tools",
"git/packfile",
"git/packfile/if",
"git/packfile/if/types",
"git/protocol",
"git_symbolic_refs",
"gotham_ext",
Expand Down
5 changes: 5 additions & 0 deletions eden/mononoke/git/packfile/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,19 @@ path = "test/packfile_test.rs"

[dependencies]
anyhow = "1.0.75"
blobstore = { version = "0.1.0", path = "../../blobstore" }
bytes = { version = "1.1", features = ["serde"] }
fbthrift = { version = "0.0.1+unstable", git = "https://github.com/facebook/fbthrift.git", branch = "main" }
flate2 = { version = "1.0.26", features = ["rust_backend"], default-features = false }
futures = { version = "0.3.28", features = ["async-await", "compat"] }
gix-features = { version = "0.32", features = ["parallel", "rustsha1"] }
gix-hash = "0.11"
gix-object = "0.33"
gix-pack = "0.40"
mononoke_types = { version = "0.1.0", path = "../../mononoke_types" }
packfile_thrift = { version = "0.1.0", path = "if" }
pin-project = "0.4.30"
quickcheck = "1.0"
sha1 = "0.10.5"
thiserror = "1.0.49"
tokio = { version = "1.29.1", features = ["full", "test-util", "tracing"] }
Expand Down
6 changes: 6 additions & 0 deletions eden/mononoke/git/packfile/TARGETS
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,14 @@ rust_library(
"fbsource//third-party/rust:gix-object",
"fbsource//third-party/rust:gix-pack",
"fbsource//third-party/rust:pin-project",
"fbsource//third-party/rust:quickcheck",
"fbsource//third-party/rust:sha1",
"fbsource//third-party/rust:thiserror",
"fbsource//third-party/rust:tokio",
"//eden/mononoke/blobstore:blobstore",
"//eden/mononoke/git/packfile/if:packfile-thrift-rust",
"//eden/mononoke/mononoke_types:mononoke_types",
"//thrift/lib/rust:fbthrift",
],
)

Expand All @@ -36,6 +41,7 @@ rust_unittest(
"fbsource//third-party/rust:gix-hash",
"fbsource//third-party/rust:gix-object",
"fbsource//third-party/rust:gix-pack",
"fbsource//third-party/rust:quickcheck",
"fbsource//third-party/rust:tempfile",
":packfile",
"//common/rust/shed/fbinit:fbinit",
Expand Down
34 changes: 34 additions & 0 deletions eden/mononoke/git/packfile/if/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# @generated by autocargo from //eden/mononoke/git/packfile/if:packfile-thrift-rust

[package]
name = "packfile_thrift"
version = "0.1.0"
authors = ["Facebook"]
edition = "2021"
license = "GPLv2+"
build = "thrift_build.rs"

[lib]
path = "thrift_lib.rs"
test = false
doctest = false

[dependencies]
anyhow = "1.0.75"
async-trait = "0.1.71"
codegen_includer_proc_macro = { version = "0.1.0", git = "https://github.com/facebookexperimental/rust-shed.git", branch = "main" }
const-cstr = "0.3.0"
fbthrift = { version = "0.0.1+unstable", git = "https://github.com/facebook/fbthrift.git", branch = "main" }
futures = { version = "0.3.28", features = ["async-await", "compat"] }
packfile_thrift__types = { package = "packfile_thrift_types", version = "0.1.0", path = "types" }
ref-cast = "1.0.18"
thiserror = "1.0.49"
tracing = "0.1.40"
tracing-futures = { version = "0.2.5", features = ["futures-03"] }

[build-dependencies]
thrift_compiler = { version = "0.1.0", git = "https://github.com/facebookexperimental/rust-shed.git", branch = "main" }

[features]
default = ["thrift_library_unittests_disabled"]
thrift_library_unittests_disabled = []
13 changes: 13 additions & 0 deletions eden/mononoke/git/packfile/if/TARGETS
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
load("@fbcode_macros//build_defs:thrift_library.bzl", "thrift_library")

oncall("mononoke")

thrift_library(
name = "packfile-thrift",
languages = [
"rust",
"cpp2",
],
thrift_rust_options = ["deprecated_default_enum_min_i32"],
thrift_srcs = {"packfile_thrift.thrift": []},
)
22 changes: 22 additions & 0 deletions eden/mononoke/git/packfile/if/packfile_thrift.thrift
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This software may be used and distributed according to the terms of the
* GNU General Public License version 2.
*/

/// Struct representing the raw packfile item for base objects in Git
struct GitPackfileBaseItem {
1: binary id;
2: i64 decompressed_size;
3: binary compressed_data;
4: GitObjectKind kind;
} (rust.exhaustive)

/// Enum determining the type of Git base object
enum GitObjectKind {
Tree = 0,
Blob = 1,
Commit = 2,
Tag = 3,
} (rust.exhaustive)
69 changes: 69 additions & 0 deletions eden/mononoke/git/packfile/if/thrift_build.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
// @generated by autocargo
use std::env;
use std::fs;
use std::path::Path;

use thrift_compiler::Config;
use thrift_compiler::GenContext;

#[rustfmt::skip]
fn main() {
// Rerun if this gets rewritten.
println!("cargo:rerun-if-changed=thrift_build.rs");

let out_dir = env::var_os("OUT_DIR").expect("OUT_DIR env not provided");
let out_dir: &Path = out_dir.as_ref();
fs::write(
out_dir.join("cratemap"),
"packfile_thrift crate",
).expect("Failed to write cratemap");

let conf = {
let mut conf = Config::from_env(GenContext::Lib).expect("Failed to instantiate thrift_compiler::Config");

let path_from_manifest_to_base: &Path = "../../../../..".as_ref();
let cargo_manifest_dir =
env::var_os("CARGO_MANIFEST_DIR").expect("CARGO_MANIFEST_DIR not provided");
let cargo_manifest_dir: &Path = cargo_manifest_dir.as_ref();
let base_path = cargo_manifest_dir
.join(path_from_manifest_to_base)
.canonicalize()
.expect("Failed to canonicalize base_path");
// TODO: replace canonicalize() with std::path::absolute() when
// https://github.com/rust-lang/rust/pull/91673 is available (~Rust 1.60)
// and remove this block.
#[cfg(windows)]
let base_path = Path::new(
base_path
.as_path()
.to_string_lossy()
.trim_start_matches(r"\\?\"),
)
.to_path_buf();

conf.base_path(base_path);

conf.types_crate("packfile-thrift__types");

let options = "deprecated_default_enum_min_i32";
if !options.is_empty() {
conf.options(options);
}

let lib_include_srcs = vec![

];
let types_include_srcs = vec![

];
conf.lib_include_srcs(lib_include_srcs);
conf.types_include_srcs(types_include_srcs);

conf
};

let srcs: &[&str] = &[
"packfile_thrift.thrift"
];
conf.run(srcs).expect("Failed while running thrift compilation");
}
2 changes: 2 additions & 0 deletions eden/mononoke/git/packfile/if/thrift_lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
// @generated by autocargo
::codegen_includer_proc_macro::include!();
32 changes: 32 additions & 0 deletions eden/mononoke/git/packfile/if/types/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# @generated by autocargo from //eden/mononoke/git/packfile/if:packfile-thrift-rust-types

[package]
name = "packfile_thrift_types"
version = "0.1.0"
authors = ["Facebook"]
edition = "2021"
license = "GPLv2+"
build = "thrift_build.rs"

[lib]
path = "thrift_lib.rs"
test = false
doctest = false

[dependencies]
anyhow = "1.0.75"
codegen_includer_proc_macro = { version = "0.1.0", git = "https://github.com/facebookexperimental/rust-shed.git", branch = "main" }
fbthrift = { version = "0.0.1+unstable", git = "https://github.com/facebook/fbthrift.git", branch = "main" }
futures = { version = "0.3.28", features = ["async-await", "compat"] }
once_cell = "1.12"
ref-cast = "1.0.18"
serde = { version = "1.0.185", features = ["derive", "rc"] }
serde_derive = "1.0.185"
thiserror = "1.0.49"

[build-dependencies]
thrift_compiler = { version = "0.1.0", git = "https://github.com/facebookexperimental/rust-shed.git", branch = "main" }

[features]
default = ["thrift_library_unittests_disabled"]
thrift_library_unittests_disabled = []
69 changes: 69 additions & 0 deletions eden/mononoke/git/packfile/if/types/thrift_build.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
// @generated by autocargo
use std::env;
use std::fs;
use std::path::Path;

use thrift_compiler::Config;
use thrift_compiler::GenContext;

#[rustfmt::skip]
fn main() {
// Rerun if this gets rewritten.
println!("cargo:rerun-if-changed=thrift_build.rs");

let out_dir = env::var_os("OUT_DIR").expect("OUT_DIR env not provided");
let out_dir: &Path = out_dir.as_ref();
fs::write(
out_dir.join("cratemap"),
"packfile_thrift crate",
).expect("Failed to write cratemap");

let conf = {
let mut conf = Config::from_env(GenContext::Types).expect("Failed to instantiate thrift_compiler::Config");

let path_from_manifest_to_base: &Path = "../../../../../..".as_ref();
let cargo_manifest_dir =
env::var_os("CARGO_MANIFEST_DIR").expect("CARGO_MANIFEST_DIR not provided");
let cargo_manifest_dir: &Path = cargo_manifest_dir.as_ref();
let base_path = cargo_manifest_dir
.join(path_from_manifest_to_base)
.canonicalize()
.expect("Failed to canonicalize base_path");
// TODO: replace canonicalize() with std::path::absolute() when
// https://github.com/rust-lang/rust/pull/91673 is available (~Rust 1.60)
// and remove this block.
#[cfg(windows)]
let base_path = Path::new(
base_path
.as_path()
.to_string_lossy()
.trim_start_matches(r"\\?\"),
)
.to_path_buf();

conf.base_path(base_path);

conf.types_crate("packfile-thrift__types");

let options = "deprecated_default_enum_min_i32";
if !options.is_empty() {
conf.options(options);
}

let lib_include_srcs = vec![

];
let types_include_srcs = vec![

];
conf.lib_include_srcs(lib_include_srcs);
conf.types_include_srcs(types_include_srcs);

conf
};

let srcs: &[&str] = &[
"../packfile_thrift.thrift"
];
conf.run(srcs).expect("Failed while running thrift compilation");
}
2 changes: 2 additions & 0 deletions eden/mononoke/git/packfile/if/types/thrift_lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
// @generated by autocargo
::codegen_includer_proc_macro::include!();
2 changes: 2 additions & 0 deletions eden/mononoke/git/packfile/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,5 @@ pub mod bundle;
mod hash_writer;
pub mod pack;
pub mod types;

pub use packfile_thrift as thrift;
Loading

0 comments on commit d07a437

Please sign in to comment.