From 0d6025112e3ead51196c0013572c5200b1f7f352 Mon Sep 17 00:00:00 2001 From: Luisa Vasquez Gomez Date: Wed, 11 Dec 2024 04:57:17 -0800 Subject: [PATCH] modern sync: traverse hg manifest Summary: This uses `compare_tree_manifest` to find all new elements between a commit and it's parents, helping us find all the information needed to send identical commit through edenapi. Reviewed By: markbt Differential Revision: D66945660 fbshipit-source-id: d4d7c95f024eba395fc46151a32458a5aa2f7b71 --- eden/mononoke/modern_sync/Cargo.toml | 3 + eden/mononoke/modern_sync/src/sync.rs | 83 +++++++++++++++++++ .../modern_sync/test-modern-sync.t | 28 +++++++ .../integration/modern_sync/test-sync-one.t | 4 + 4 files changed, 118 insertions(+) diff --git a/eden/mononoke/modern_sync/Cargo.toml b/eden/mononoke/modern_sync/Cargo.toml index 79d44df631370..7f4a57771477e 100644 --- a/eden/mononoke/modern_sync/Cargo.toml +++ b/eden/mononoke/modern_sync/Cargo.toml @@ -23,6 +23,9 @@ executor_lib = { version = "0.1.0", path = "../cmdlib/sharding" } facet = { version = "0.1.0", git = "https://github.com/facebookexperimental/rust-shed.git", branch = "main" } fbinit = { version = "0.2.0", git = "https://github.com/facebookexperimental/rust-shed.git", branch = "main" } futures = { version = "0.3.30", features = ["async-await", "compat"] } +manifest = { version = "0.1.0", path = "../manifest" } +mercurial_derivation = { version = "0.1.0", path = "../derived_data/mercurial_derivation" } +mercurial_types = { version = "0.1.0", path = "../mercurial/types" } metaconfig_types = { version = "0.1.0", path = "../metaconfig/types" } metadata = { version = "0.1.0", path = "../server/metadata" } mononoke_app = { version = "0.1.0", path = "../cmdlib/mononoke_app" } diff --git a/eden/mononoke/modern_sync/src/sync.rs b/eden/mononoke/modern_sync/src/sync.rs index 6f21aeeb04c5f..cfe24f2826fac 100644 --- a/eden/mononoke/modern_sync/src/sync.rs +++ b/eden/mononoke/modern_sync/src/sync.rs @@ -23,12 +23,20 @@ use context::CoreContext; use context::SessionContainer; use futures::StreamExt; use futures::TryStreamExt; +use manifest::compare_manifest_tree; +use manifest::Comparison; +use manifest::Entry; +use manifest::ManifestOps; +use mercurial_derivation::derive_hg_changeset::DeriveHgChangeset; +use mercurial_types::blobs::HgBlobManifest; +use mercurial_types::HgManifestId; use metadata::Metadata; use mononoke_app::args::RepoArg; use mononoke_app::MononokeApp; use mononoke_types::ChangesetId; use mononoke_types::FileChange; use mutable_counters::MutableCountersRef; +use repo_blobstore::RepoBlobstore; use repo_blobstore::RepoBlobstoreRef; use repo_derived_data::RepoDerivedDataRef; use repo_identity::RepoIdentityRef; @@ -158,6 +166,8 @@ pub async fn sync( Err(e) } Ok(entries) => { + // TODO: We probably want to get these in inverse order so once we derive the top parent + // the children will already be derived. bul_util::get_commit_stream(entries, repo.commit_graph_arc(), ctx) .await .fuse() @@ -214,9 +224,82 @@ pub async fn process_one_changeset( } } + let mut mf_ids_p = vec![]; + + // TODO: Parallelize + for parent in cs_info.parents() { + let hg_cs_id = repo.derive_hg_changeset(ctx, parent).await?; + let hg_cs = hg_cs_id.load(ctx, repo.repo_blobstore()).await?; + let hg_mf_id = hg_cs.manifestid(); + mf_ids_p.push(hg_mf_id); + } + + let hg_cs_id = repo.derive_hg_changeset(ctx, *cs_id).await?; + let hg_cs = hg_cs_id.load(ctx, repo.repo_blobstore()).await?; + let hg_mf_id = hg_cs.manifestid(); + + sync_manifest_changes(logger, ctx, repo.repo_blobstore(), hg_mf_id, mf_ids_p).await?; + if log_completion { STATS::synced_commits.add_value(1, (repo.repo_identity().name().to_string(),)); } Ok(()) } + +async fn sync_manifest_changes( + logger: &Logger, + ctx: &CoreContext, + repo_blobstore: &RepoBlobstore, + mf_id: HgManifestId, + mf_ids_p: Vec, +) -> Result<()> { + let comparison_stream = + compare_manifest_tree::(ctx, repo_blobstore, mf_id, mf_ids_p); + futures::pin_mut!(comparison_stream); + while let Some(mf) = comparison_stream.try_next().await? { + match mf { + Comparison::New(_elem, entry) => { + info!(logger, "New manifest"); + match entry { + Entry::Tree(mf_id) => { + info!(logger, "Tree {:?}", mf_id); + let entries = mf_id + .list_all_entries(ctx.clone(), repo_blobstore.clone()) + .try_collect::>() + .await?; + info!(logger, "Tree entries {:?}", entries); + } + Entry::Leaf((_ftype, nodeid)) => { + info!(logger, "Leaf {:?}", nodeid); + } + } + } + Comparison::Changed(_path, _mf_id, _changes) => { + info!(logger, "Changed manifest"); + } + Comparison::ManyNew(_path, _prefix, map) => { + info!(logger, "Many new or changed manifests"); + for (_path, entry) in map { + match entry { + Entry::Tree(mf_id) => { + info!(logger, "Tree {:?}", mf_id); + let entries = mf_id + .list_all_entries(ctx.clone(), repo_blobstore.clone()) + .try_collect::>() + .await?; + + info!(logger, "Tree entries {:?}", entries); + } + Entry::Leaf((_ftype, nodeid)) => { + info!(logger, "Found filenode id {:?}", nodeid); + } + } + } + } + _ => (), + } + } + + Ok(()) +} diff --git a/eden/mononoke/tests/integration/modern_sync/test-modern-sync.t b/eden/mononoke/tests/integration/modern_sync/test-modern-sync.t index 5f0ecc2cf2e66..b818f00e2dd4a 100644 --- a/eden/mononoke/tests/integration/modern_sync/test-modern-sync.t +++ b/eden/mononoke/tests/integration/modern_sync/test-modern-sync.t @@ -70,6 +70,10 @@ Sync all bookmarks moves Uploading content with id: ContentId(Blake2(be87911855af0fc33a75f2c1cba2269dd90faa7f5c5358eb640d9d65f55fced3)) Uploading bytes: b"abc\n" Upload response: [UploadToken { data: UploadTokenData { id: AnyFileContentId(ContentId(ContentId("be87911855af0fc33a75f2c1cba2269dd90faa7f5c5358eb640d9d65f55fced3"))), bubble_id: None, metadata: Some(FileContentTokenMetadata(FileContentTokenMetadata { content_size: 4 })) }, signature: UploadTokenSignature { signature: [102, 97, 107, 101, 116, 111, 107, 101, 110, 115, 105, 103, 110, 97, 116, 117, 114, 101] } }] + Many new or changed manifests + Found filenode id HgFileNodeId(HgNodeHash(Sha1(005d992c5dcf32993668f7cede29d296c494a5d9))) + Tree HgManifestId(HgNodeHash(Sha1(c1afe800646ee45232ab5e70c57247b78dbf3899))) + Tree entries [(MPath(""), Tree(HgManifestId(HgNodeHash(Sha1(c1afe800646ee45232ab5e70c57247b78dbf3899))))), (MPath("dir2"), Tree(HgManifestId(HgNodeHash(Sha1(53b19c5f23977836390e5880ec30fd252a311384))))), (MPath("dir2/first"), Leaf((Regular, HgFileNodeId(HgNodeHash(Sha1(f9304d84edb8a8ee2d3ce3f9de3ea944c82eba8f))))))] Found commit ChangesetId(Blake2(5b1c7130dde8e54b4285b9153d8e56d69fbf4ae685eaf9e9766cc409861995f8)) Commit info ChangesetInfo { changeset_id: ChangesetId(Blake2(5b1c7130dde8e54b4285b9153d8e56d69fbf4ae685eaf9e9766cc409861995f8)), parents: [ChangesetId(Blake2(ba1a2b3ca64cead35117cb2b707da1211cf43639ade917aee655f3875f4922c3))], author: "test", author_date: DateTime(1970-01-01T00:00:00+00:00), committer: None, committer_date: None, message: Message("E"), hg_extra: {}, git_extra_headers: None } File change Change(TrackedFileChange { inner: BasicFileChange { content_id: ContentId(Blake2(1b1e26f01a806e123b37492672d2756e1c25bb31f1e15cfda410c149c317e130)), file_type: Regular, size: 1, git_lfs: FullContent }, copy_from: None }) @@ -82,6 +86,12 @@ Sync all bookmarks moves Uploading content with id: ContentId(Blake2(96475ef07b63bf02679e9964ff65f0f96883f53d0718671bd44cce830bbf2ebd)) Uploading bytes: b"abcdefg\n" Upload response: [UploadToken { data: UploadTokenData { id: AnyFileContentId(ContentId(ContentId("96475ef07b63bf02679e9964ff65f0f96883f53d0718671bd44cce830bbf2ebd"))), bubble_id: None, metadata: Some(FileContentTokenMetadata(FileContentTokenMetadata { content_size: 8 })) }, signature: UploadTokenSignature { signature: [102, 97, 107, 101, 116, 111, 107, 101, 110, 115, 105, 103, 110, 97, 116, 117, 114, 101] } }] + Many new or changed manifests + Found filenode id HgFileNodeId(HgNodeHash(Sha1(dba92ad67dc1f3732ab73a5f51b77129275a1724))) + Changed manifest + Changed manifest + Many new or changed manifests + Found filenode id HgFileNodeId(HgNodeHash(Sha1(b31c6c30a54b89020d5ac28a67917349512d75eb))) Found commit ChangesetId(Blake2(ba1a2b3ca64cead35117cb2b707da1211cf43639ade917aee655f3875f4922c3)) Commit info ChangesetInfo { changeset_id: ChangesetId(Blake2(ba1a2b3ca64cead35117cb2b707da1211cf43639ade917aee655f3875f4922c3)), parents: [ChangesetId(Blake2(41deea4804cd27d1f4efbec135d839338804a5dfcaf364863bd0289067644db5))], author: "test", author_date: DateTime(1970-01-01T00:00:00+00:00), committer: None, committer_date: None, message: Message("D"), hg_extra: {}, git_extra_headers: None } File change Change(TrackedFileChange { inner: BasicFileChange { content_id: ContentId(Blake2(90c8e211c758a9bbcd33e463c174f1693692677cb76c7aaf4ce41aa0a29334c0)), file_type: Regular, size: 1, git_lfs: FullContent }, copy_from: None }) @@ -94,6 +104,12 @@ Sync all bookmarks moves Uploading content with id: ContentId(Blake2(5d3bfab620332130430c7f540f9fe0b3b0079d0b9b632e0dae96a1424a7a4242)) Uploading bytes: b"abcdef\n" Upload response: [UploadToken { data: UploadTokenData { id: AnyFileContentId(ContentId(ContentId("5d3bfab620332130430c7f540f9fe0b3b0079d0b9b632e0dae96a1424a7a4242"))), bubble_id: None, metadata: Some(FileContentTokenMetadata(FileContentTokenMetadata { content_size: 7 })) }, signature: UploadTokenSignature { signature: [102, 97, 107, 101, 116, 111, 107, 101, 110, 115, 105, 103, 110, 97, 116, 117, 114, 101] } }] + Many new or changed manifests + Found filenode id HgFileNodeId(HgNodeHash(Sha1(4eec8cfdabce9565739489483b6ad93ef7657ea9))) + Changed manifest + Changed manifest + Many new or changed manifests + Found filenode id HgFileNodeId(HgNodeHash(Sha1(aae2838d921bcc14ccbb9212f4175f300fd9f2f8))) Found commit ChangesetId(Blake2(41deea4804cd27d1f4efbec135d839338804a5dfcaf364863bd0289067644db5)) Commit info ChangesetInfo { changeset_id: ChangesetId(Blake2(41deea4804cd27d1f4efbec135d839338804a5dfcaf364863bd0289067644db5)), parents: [ChangesetId(Blake2(8a9d572a899acdef764b88671c24b94a8b0780c1591a5a9bca97184c2ef0f304))], author: "test", author_date: DateTime(1970-01-01T00:00:00+00:00), committer: None, committer_date: None, message: Message("C"), hg_extra: {}, git_extra_headers: None } File change Change(TrackedFileChange { inner: BasicFileChange { content_id: ContentId(Blake2(896ad5879a5df0403bfc93fc96507ad9c93b31b11f3d0fa05445da7918241e5d)), file_type: Regular, size: 1, git_lfs: FullContent }, copy_from: None }) @@ -106,6 +122,12 @@ Sync all bookmarks moves Uploading content with id: ContentId(Blake2(c86e7a7ee4c102efc1e5166dd95c1c73fcbff59dc3b04dc79fbbf3d1d10350ed)) Uploading bytes: b"abcde\n" Upload response: [UploadToken { data: UploadTokenData { id: AnyFileContentId(ContentId(ContentId("c86e7a7ee4c102efc1e5166dd95c1c73fcbff59dc3b04dc79fbbf3d1d10350ed"))), bubble_id: None, metadata: Some(FileContentTokenMetadata(FileContentTokenMetadata { content_size: 6 })) }, signature: UploadTokenSignature { signature: [102, 97, 107, 101, 116, 111, 107, 101, 110, 115, 105, 103, 110, 97, 116, 117, 114, 101] } }] + Many new or changed manifests + Found filenode id HgFileNodeId(HgNodeHash(Sha1(a2e456504a5e61f763f1a0b36a6c247c7541b2b3))) + Changed manifest + Changed manifest + Many new or changed manifests + Found filenode id HgFileNodeId(HgNodeHash(Sha1(9bad1c227e9133a5bbae1652c889406d35e6dac1))) Found commit ChangesetId(Blake2(8a9d572a899acdef764b88671c24b94a8b0780c1591a5a9bca97184c2ef0f304)) Commit info ChangesetInfo { changeset_id: ChangesetId(Blake2(8a9d572a899acdef764b88671c24b94a8b0780c1591a5a9bca97184c2ef0f304)), parents: [ChangesetId(Blake2(53b034a90fe3002a707a7da9cdf6eac3dea460ad72f7c6969dfb88fd0e69f856))], author: "test", author_date: DateTime(1970-01-01T00:00:00+00:00), committer: None, committer_date: None, message: Message("B"), hg_extra: {}, git_extra_headers: None } File change Change(TrackedFileChange { inner: BasicFileChange { content_id: ContentId(Blake2(55662471e2a28db8257939b2f9a2d24e65b46a758bac12914a58f17dcde6905f)), file_type: Regular, size: 1, git_lfs: FullContent }, copy_from: None }) @@ -118,6 +140,12 @@ Sync all bookmarks moves Uploading content with id: ContentId(Blake2(fbc4b9b407225e86008840c4095edb4f66a62bad80529b6e120bfa7d605f9423)) Uploading bytes: b"abcd\n" Upload response: [UploadToken { data: UploadTokenData { id: AnyFileContentId(ContentId(ContentId("fbc4b9b407225e86008840c4095edb4f66a62bad80529b6e120bfa7d605f9423"))), bubble_id: None, metadata: Some(FileContentTokenMetadata(FileContentTokenMetadata { content_size: 5 })) }, signature: UploadTokenSignature { signature: [102, 97, 107, 101, 116, 111, 107, 101, 110, 115, 105, 103, 110, 97, 116, 117, 114, 101] } }] + Many new or changed manifests + Found filenode id HgFileNodeId(HgNodeHash(Sha1(35e7525ce3a48913275d7061dd9a867ffef1e34d))) + Changed manifest + Changed manifest + Many new or changed manifests + Found filenode id HgFileNodeId(HgNodeHash(Sha1(778675f9ec8d35ff2fce23a34f68edd15d783853))) $ cat $TESTTMP/modern_sync_scuba_logs | jq | rg "start_id|dry_run|repo" "start_id": 0, diff --git a/eden/mononoke/tests/integration/modern_sync/test-sync-one.t b/eden/mononoke/tests/integration/modern_sync/test-sync-one.t index ee42ff74f42ad..20716f89c3ba7 100644 --- a/eden/mononoke/tests/integration/modern_sync/test-sync-one.t +++ b/eden/mononoke/tests/integration/modern_sync/test-sync-one.t @@ -69,6 +69,10 @@ Uploading content with id: ContentId(Blake2(be87911855af0fc33a75f2c1cba2269dd90faa7f5c5358eb640d9d65f55fced3)) Uploading bytes: b"abc\n" Upload response: [UploadToken { data: UploadTokenData { id: AnyFileContentId(ContentId(ContentId("be87911855af0fc33a75f2c1cba2269dd90faa7f5c5358eb640d9d65f55fced3"))), bubble_id: None, metadata: Some(FileContentTokenMetadata(FileContentTokenMetadata { content_size: 4 })) }, signature: UploadTokenSignature { signature: [102, 97, 107, 101, 116, 111, 107, 101, 110, 115, 105, 103, 110, 97, 116, 117, 114, 101] } }] + Many new or changed manifests + Found filenode id HgFileNodeId(HgNodeHash(Sha1(005d992c5dcf32993668f7cede29d296c494a5d9))) + Tree HgManifestId(HgNodeHash(Sha1(c1afe800646ee45232ab5e70c57247b78dbf3899))) + Tree entries [(MPath(""), Tree(HgManifestId(HgNodeHash(Sha1(c1afe800646ee45232ab5e70c57247b78dbf3899))))), (MPath("dir2"), Tree(HgManifestId(HgNodeHash(Sha1(53b19c5f23977836390e5880ec30fd252a311384))))), (MPath("dir2/first"), Leaf((Regular, HgFileNodeId(HgNodeHash(Sha1(f9304d84edb8a8ee2d3ce3f9de3ea944c82eba8f))))))] $ mononoke_admin filestore -R orig fetch --content-id eb56488e97bb4cf5eb17f05357b80108a4a71f6c3bab52dfcaec07161d105ec9 A (no-eol)