Skip to content

Commit

Permalink
Merge pull request #591 from forcedotcom/sm/file-moves-memory-usage
Browse files Browse the repository at this point in the history
fix: memory usage on giant repos
  • Loading branch information
iowillhoit authored May 29, 2024
2 parents 9be6d8b + 8e5b569 commit 4a33284
Show file tree
Hide file tree
Showing 19 changed files with 612 additions and 1,059 deletions.
906 changes: 242 additions & 664 deletions CHANGELOG.md

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,8 @@
"dependencies": {
"@oclif/core": "^3.26.6",
"@salesforce/core": "^7.3.9",
"@salesforce/kit": "^3.1.1",
"@salesforce/source-deploy-retrieve": "^11.6.2",
"@salesforce/kit": "^3.1.2",
"@salesforce/source-deploy-retrieve": "^11.6.3",
"@salesforce/ts-types": "^2.0.9",
"fast-xml-parser": "^4.3.6",
"graceful-fs": "^4.2.11",
Expand Down
18 changes: 18 additions & 0 deletions src/shared/local/functions.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
/*
* Copyright (c) 2023, salesforce.com, inc.
* All rights reserved.
* Licensed under the BSD 3-Clause license.
* For full license text, see LICENSE.txt file in the repo root or https://opensource.org/licenses/BSD-3-Clause
*/
import path from 'node:path';
import { WORKDIR, HEAD } from './localShadowRepo';
import { FILE } from './localShadowRepo';

import { StatusRow } from './types';

// filenames were normalized when read from isogit

export const toFilenames = (rows: StatusRow[]): string[] => rows.map((row) => row[FILE]);
export const isDeleted = (status: StatusRow): boolean => status[WORKDIR] === 0;
export const isAdded = (status: StatusRow): boolean => status[HEAD] === 0 && status[WORKDIR] === 2;
export const ensureWindows = (filepath: string): string => path.win32.normalize(filepath);
273 changes: 49 additions & 224 deletions src/shared/localShadowRepo.ts → src/shared/local/localShadowRepo.ts

Large diffs are not rendered by default.

220 changes: 220 additions & 0 deletions src/shared/local/moveDetection.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,220 @@
/*
* Copyright (c) 2023, salesforce.com, inc.
* All rights reserved.
* Licensed under the BSD 3-Clause license.
* For full license text, see LICENSE.txt file in the repo root or https://opensource.org/licenses/BSD-3-Clause
*/
import path from 'node:path';
import { Logger, Lifecycle } from '@salesforce/core';
import {
MetadataResolver,
SourceComponent,
RegistryAccess,
VirtualTreeContainer,
} from '@salesforce/source-deploy-retrieve';
// @ts-expect-error isogit has both ESM and CJS exports but node16 module/resolution identifies it as ESM
import git from 'isomorphic-git';
import * as fs from 'graceful-fs';
import { Performance } from '@oclif/core';
import { sourceComponentGuard } from '../guards';
import { isDeleted, isAdded, ensureWindows, toFilenames } from './functions';
import { AddAndDeleteMaps, FilenameBasenameHash, StatusRow, StringMap } from './types';

type AddAndDeleteFileInfos = { addedInfo: FilenameBasenameHash[]; deletedInfo: FilenameBasenameHash[] };
type AddedAndDeletedFilenames = { added: Set<string>; deleted: Set<string> };

/** composed functions to simplified use by the shadowRepo class */
export const filenameMatchesToMap =
(isWindows: boolean) =>
(registry: RegistryAccess) =>
(projectPath: string) =>
(gitDir: string) =>
async ({ added, deleted }: AddedAndDeletedFilenames): Promise<StringMap> =>
removeNonMatches(isWindows)(registry)(
compareHashes(
await buildMaps(
await toFileInfo({
projectPath,
gitDir,
added,
deleted,
})
)
)
);

/** compare delete and adds from git.status, matching basenames of the files. returns early when there's nothing to match */
export const getMatches = (status: StatusRow[]): AddedAndDeletedFilenames => {
// We check for moved files in incremental steps and exit as early as we can to avoid any performance degradation
// Deleted files will be more rare than added files, so we'll check them first and exit early if there are none
const emptyResult = { added: new Set<string>(), deleted: new Set<string>() };
const deletedFiles = status.filter(isDeleted);
if (!deletedFiles.length) return emptyResult;

const addedFiles = status.filter(isAdded);
if (!addedFiles.length) return emptyResult;

// Both arrays have contents, look for matching basenames
const addedFilenames = toFilenames(addedFiles);
const deletedFilenames = toFilenames(deletedFiles);

// Build Sets of basenames for added and deleted files for quick lookups
const addedBasenames = new Set(addedFilenames.map((filename) => path.basename(filename)));
const deletedBasenames = new Set(deletedFilenames.map((filename) => path.basename(filename)));

// TODO: when node 22 is everywhere, we can use Set.prototype.intersection
// Again, we filter over the deleted files first and exit early if there are no filename matches
const deletedFilenamesWithMatches = new Set(deletedFilenames.filter((f) => addedBasenames.has(path.basename(f))));
if (!deletedFilenamesWithMatches.size) return emptyResult;

const addedFilenamesWithMatches = new Set(addedFilenames.filter((f) => deletedBasenames.has(path.basename(f))));
if (!addedFilenamesWithMatches.size) return emptyResult;

return { added: addedFilenamesWithMatches, deleted: deletedFilenamesWithMatches };
};

/** build maps of the add/deletes with filenames, returning the matches Logs if non-matches */
const buildMaps = async ({ addedInfo, deletedInfo }: AddAndDeleteFileInfos): Promise<AddAndDeleteMaps> => {
const [addedMap, addedIgnoredMap] = buildMap(addedInfo);
const [deletedMap, deletedIgnoredMap] = buildMap(deletedInfo);

// If we detected any files that have the same basename and hash, emit a warning and send telemetry
// These files will still show up as expected in the `sf project deploy preview` output
// We could add more logic to determine and display filepaths that we ignored...
// but this is likely rare enough to not warrant the added complexity
// Telemetry will help us determine how often this occurs
if (addedIgnoredMap.size || deletedIgnoredMap.size) {
const message = 'Files were found that have the same basename and hash. Skipping the commit of these files';
const logger = Logger.childFromRoot('ShadowRepo.compareHashes');
logger.warn(message);
const lifecycle = Lifecycle.getInstance();
await Promise.all([
lifecycle.emitWarning(message),
lifecycle.emitTelemetry({ eventName: 'moveFileHashBasenameCollisionsDetected' }),
]);
}
return { addedMap, deletedMap };
};

/** builds a map of the values from both maps */
const compareHashes = ({ addedMap, deletedMap }: AddAndDeleteMaps): StringMap => {
const matches: StringMap = new Map();

for (const [addedKey, addedValue] of addedMap) {
const deletedValue = deletedMap.get(addedKey);
if (deletedValue) {
matches.set(addedValue, deletedValue);
}
}

return matches;
};

/** given a StringMap, resolve the metadata types and return things that having matching type/parent */
const removeNonMatches =
(isWindows: boolean) =>
(registry: RegistryAccess) =>
(matches: StringMap): StringMap => {
if (!matches.size) return matches;
const addedFiles = isWindows ? [...matches.keys()].map(ensureWindows) : [...matches.keys()];
const deletedFiles = isWindows ? [...matches.values()].map(ensureWindows) : [...matches.values()];
const resolverAdded = new MetadataResolver(registry, VirtualTreeContainer.fromFilePaths(addedFiles));
const resolverDeleted = new MetadataResolver(registry, VirtualTreeContainer.fromFilePaths(deletedFiles));

return new Map(
[...matches.entries()].filter(([addedFile, deletedFile]) => {
// we're only ever using the first element of the arrays
const [resolvedAdded] = resolveType(resolverAdded, isWindows ? [ensureWindows(addedFile)] : [addedFile]);
const [resolvedDeleted] = resolveType(
resolverDeleted,
isWindows ? [ensureWindows(deletedFile)] : [deletedFile]
);
return (
// they could match, or could both be undefined (because unresolved by SDR)
resolvedAdded?.type.name === resolvedDeleted?.type.name &&
// parent names match, if resolved and there are parents
resolvedAdded?.parent?.name === resolvedDeleted?.parent?.name &&
// parent types match, if resolved and there are parents
resolvedAdded?.parent?.type.name === resolvedDeleted?.parent?.type.name
);
})
);
};

/** enrich the filenames with basename and oid (hash) */
const toFileInfo = async ({
projectPath,
gitDir,
added,
deleted,
}: {
projectPath: string;
gitDir: string;
added: Set<string>;
deleted: Set<string>;
}): Promise<AddAndDeleteFileInfos> => {
// Track how long it takes to gather the oid information from the git trees
const getInfoMarker = Performance.mark('@salesforce/source-tracking', 'localShadowRepo.detectMovedFiles#toFileInfo', {
addedFiles: added.size,
deletedFiles: deleted.size,
});

const headRef = await git.resolveRef({ fs, dir: projectPath, gitdir: gitDir, ref: 'HEAD' });
const [addedInfo, deletedInfo] = await Promise.all([
await Promise.all(Array.from(added).map(getHashForAddedFile(projectPath))),
await Promise.all(Array.from(deleted).map(getHashFromActualFileContents(gitDir)(projectPath)(headRef))),
]);

getInfoMarker?.stop();

return { addedInfo, deletedInfo };
};

const buildMap = (info: FilenameBasenameHash[]): StringMap[] => {
const map: StringMap = new Map();
const ignore: StringMap = new Map();
info.map((i) => {
const key = `${i.hash}#${i.basename}`;
// If we find a duplicate key, we need to remove it and ignore it in the future.
// Finding duplicate hash#basename means that we cannot accurately determine where it was moved to or from
if (map.has(key) || ignore.has(key)) {
map.delete(key);
ignore.set(key, i.filename);
} else {
map.set(key, i.filename);
}
});
return [map, ignore];
};

const getHashForAddedFile =
(projectPath: string) =>
async (filepath: string): Promise<FilenameBasenameHash> => ({
filename: filepath,
basename: path.basename(filepath),
hash: (await git.hashBlob({ object: await fs.promises.readFile(path.join(projectPath, filepath)) })).oid,
});

const resolveType = (resolver: MetadataResolver, filenames: string[]): SourceComponent[] =>
filenames
.flatMap((filename) => {
try {
return resolver.getComponentsFromPath(filename);
} catch (e) {
const logger = Logger.childFromRoot('ShadowRepo.compareTypes');
logger.warn(`unable to resolve ${filename}`);
return undefined;
}
})
.filter(sourceComponentGuard);

/** where we don't have git objects to use, read the file contents to generate the hash */
const getHashFromActualFileContents =
(gitdir: string) =>
(projectPath: string) =>
(oid: string) =>
async (filepath: string): Promise<FilenameBasenameHash> => ({
filename: filepath,
basename: path.basename(filepath),
hash: (await git.readBlob({ fs, dir: projectPath, gitdir, filepath, oid })).oid,
});
11 changes: 11 additions & 0 deletions src/shared/local/types.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
/*
* Copyright (c) 2023, salesforce.com, inc.
* All rights reserved.
* Licensed under the BSD 3-Clause license.
* For full license text, see LICENSE.txt file in the repo root or https://opensource.org/licenses/BSD-3-Clause
*/
export type FilenameBasenameHash = { filename: string; hash: string; basename: string };
export type StringMap = Map<string, string>;
export type AddAndDeleteMaps = { addedMap: StringMap; deletedMap: StringMap }; // https://isomorphic-git.org/docs/en/statusMatrix#docsNav

export type StatusRow = [file: string, head: number, workdir: number, stage: number];
2 changes: 1 addition & 1 deletion src/sourceTracking.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ import {
import { filePathsFromMetadataComponent } from '@salesforce/source-deploy-retrieve/lib/src/utils/filePathGenerator';
import { Performance } from '@oclif/core';
import { RemoteSourceTrackingService, remoteChangeElementToChangeResult } from './shared/remoteSourceTrackingService';
import { ShadowRepo } from './shared/localShadowRepo';
import { ShadowRepo } from './shared/local/localShadowRepo';
import { throwIfConflicts, findConflictsInComponentSet, getDedupedConflictsFromChanges } from './shared/conflicts';
import {
RemoteSyncInput,
Expand Down
2 changes: 1 addition & 1 deletion test/nuts/local/commitPerf.nut.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import path from 'node:path';
import { TestSession } from '@salesforce/cli-plugins-testkit';
import { expect } from 'chai';
import { RegistryAccess } from '@salesforce/source-deploy-retrieve';
import { ShadowRepo } from '../../../src/shared/localShadowRepo';
import { ShadowRepo } from '../../../src/shared/local/localShadowRepo';

describe('perf testing for big commits', () => {
let session: TestSession;
Expand Down
63 changes: 35 additions & 28 deletions test/nuts/local/localTrackingFileMovesDecomposedChild.nut.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,15 @@ import { TestSession } from '@salesforce/cli-plugins-testkit';
import { expect } from 'chai';
import * as fs from 'graceful-fs';
import { RegistryAccess } from '@salesforce/source-deploy-retrieve';
import { ShadowRepo } from '../../../src/shared/localShadowRepo';
import { ShadowRepo } from '../../../src/shared/local/localShadowRepo';

/* eslint-disable no-unused-expressions */

describe('ignores moved files that are children of a decomposed metadata type', () => {
const FIELD = path.join('fields', 'Account__c.field-meta.xml');
let session: TestSession;
let repo: ShadowRepo;
let filesToSync: string[];
let objectsDir: string;

before(async () => {
session = await TestSession.create({
Expand All @@ -26,12 +27,17 @@ describe('ignores moved files that are children of a decomposed metadata type',
},
devhubAuthStrategy: 'NONE',
});
objectsDir = path.join(session.project.dir, 'force-app', 'main', 'default', 'objects');
});

after(async () => {
await session?.clean();
});

afterEach(() => {
delete process.env.SF_BETA_TRACK_FILE_MOVES;
});

it('initialize the local tracking', async () => {
repo = await ShadowRepo.getInstance({
orgId: 'fakeOrgId',
Expand All @@ -44,32 +50,12 @@ describe('ignores moved files that are children of a decomposed metadata type',
it('should ignore moved child metadata', async () => {
expect(process.env.SF_BETA_TRACK_FILE_MOVES).to.be.undefined;
process.env.SF_BETA_TRACK_FILE_MOVES = 'true';
// Commit the existing class files
filesToSync = await repo.getChangedFilenames();
// Commit the existing files
const filesToSync = await repo.getChangedFilenames();
await repo.commitChanges({ deployedFiles: filesToSync });

// move all the classes to the new folder
const objectFieldOld = path.join(
session.project.dir,
'force-app',
'main',
'default',
'objects',
'Order__c',
'fields',
'Account__c.field-meta.xml'
);
const objectFieldNew = path.join(
session.project.dir,
'force-app',
'main',
'default',
'objects',
'Product__c',
'fields',
'Account__c.field-meta.xml'
);
// fs.mkdirSync(path.join(session.project.dir, 'force-app', 'main', 'foo'), { recursive: true });
// move the field from one object to another
const objectFieldOld = path.join(objectsDir, 'Order__c', FIELD);
const objectFieldNew = path.join(objectsDir, 'Product__c', FIELD);
fs.renameSync(objectFieldOld, objectFieldNew);

await repo.getStatus(true);
Expand All @@ -78,6 +64,27 @@ describe('ignores moved files that are children of a decomposed metadata type',
.to.be.an('array')
.with.lengthOf(2);

delete process.env.SF_BETA_TRACK_FILE_MOVES;
// put it back how it was and verify the tracking
fs.renameSync(objectFieldNew, objectFieldOld);
await repo.getStatus(true);

expect(await repo.getChangedFilenames())
.to.be.an('array')
.with.lengthOf(0);
});

it('should clear tracking when the field is moved to another dir', async () => {
const newDir = path.join(session.project.dir, 'force-app', 'other', 'objects', 'Order__c', 'fields');
await fs.promises.mkdir(newDir, {
recursive: true,
});
const objectFieldOld = path.join(objectsDir, 'Order__c', FIELD);
const objectFieldNew = path.join(objectsDir, 'Order__c', FIELD);
fs.renameSync(objectFieldOld, objectFieldNew);
await repo.getStatus(true);

expect(await repo.getChangedFilenames())
.to.be.an('array')
.with.lengthOf(0);
});
});
Loading

0 comments on commit 4a33284

Please sign in to comment.