Skip to content
This repository has been archived by the owner on Aug 15, 2023. It is now read-only.

Commit

Permalink
fix(shared): replace 'jsdom' with 'linkedom' to prevent the parser fr…
Browse files Browse the repository at this point in the history
…om crashing for memory allocation failure (#859)
  • Loading branch information
ascariandrea authored Jan 20, 2023
1 parent 027d3af commit 71f2501
Show file tree
Hide file tree
Showing 64 changed files with 249 additions and 138 deletions.
4 changes: 3 additions & 1 deletion .dockerignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,6 @@ __mocks__
node_modules
docker
!docker/guardoni/guardoni.config.json
#.yarn/cache
#.yarn/cache
**/__tests__
**/fixtures
27 changes: 27 additions & 0 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,33 @@
},
"args": ["."],
"outputCapture": "std"
},
{
"type": "node",
"request": "attach",
"name": "Attach to yt:server:watch (yt:watch)",
"protocol": "inspector",
"port": 4320,
"restart": true,
"cwd": "${workspaceRoot}/platforms/yttrex/backend"
},
{
"type": "node",
"request": "attach",
"name": "Attach to yt:parserv:watch",
"protocol": "inspector",
"port": 4321,
"restart": true,
"cwd": "${workspaceRoot}/platforms/yttrex/backend"
},
{
"type": "node",
"request": "attach",
"name": "Attach to yt:leaveserv:watch",
"protocol": "inspector",
"port": 4322,
"restart": true,
"cwd": "${workspaceRoot}/platforms/yttrex/backend"
}
]
}
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file removed .yarn/cache/ws-npm-8.9.0-d620434feb-23aa0f021b.zip
Binary file not shown.
1 change: 0 additions & 1 deletion Dockerfile.ytbackend
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,5 @@ WORKDIR /app/platforms/yttrex/backend
RUN yarn workspaces focus --production

ENV key=fuffa
ENV DEBUG=@trex*

CMD ["yarn", "start"]
33 changes: 31 additions & 2 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -65,18 +65,47 @@ services:
tty: true # docker run -t

yt-backend:
image: trex:yt-backend
build:
dockerfile: Dockerfile.ytbackend
context: .
container_name: yt-backend

yt-server:
image: trex:yt-backend
container_name: yt-server
command: yarn start
ports:
- '9000:9000'
env_file:
- ./platforms/yttrex/backend/.env
environment:
- DEBUG=*
- BACKEND_DEBUG=yttrex*,@trex*
- mongoHost=mongodb
depends_on:
- yt-backend
- mongodb
- mongo-yt-indexes
deploy:
resources:
limits:
memory: 1000M

yt-parser:
image: trex:yt-backend
container_name: yt-parser
command: yarn parserv
env_file:
- ./platforms/yttrex/backend/.env
environment:
- PARSER_DEBUG=@trex:htmls:*,-@trex:htmls:debug
- mongoHost=mongodb
depends_on:
- mongodb
- mongo-yt-indexes
deploy:
resources:
limits:
memory: 2048M

tk-backend:
build:
Expand Down
4 changes: 1 addition & 3 deletions packages/shared/src/models/MetadataBase.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,9 @@ export const MetadataBase = t.type(
href: t.string,
/**
* The supporter publicKey
*
* TODO: it may be replaced by the supporter id
*/
blang: t.union([t.string, t.null, t.undefined]),
supporter: t.string,
blang: t.union([t.string, t.null, t.undefined]),
researchTag: t.union([t.string, t.undefined]),
experimentId: t.union([t.string, t.undefined]),
/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import {
GetParserProvider,
ParserProviderContext,
wrapDissector,
} from '../parser.provider';
} from '../parser/parser.provider';

const logger = GetLogger('parser-spec');

Expand Down Expand Up @@ -112,7 +112,7 @@ describe('Parser Provider', () => {

const output = getSuccessfulOutput<
Contribution,
Metadata,
any,
any,
{ [key: string]: any }
>(
Expand All @@ -134,9 +134,8 @@ describe('Parser Provider', () => {

expect(output).toMatchObject({
[source.id.substring(0, 6)]: {
...metadata,
failures: '[]',
nature: { type: metadata.type },
nature: metadata.type,
count: JSON.stringify({ metadata: 1, source: 1 }),
},
});
Expand Down
1 change: 1 addition & 0 deletions packages/shared/src/providers/parser/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
export * from './parser.provider';
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@ import subMinutes from 'date-fns/subMinutes';
import * as t from 'io-ts';
import _ from 'lodash';
import { MongoClient } from 'mongodb';
import { Logger, trexLogger } from '../logger';
import { sleep } from '../utils/promise.utils';
import type * as mongo3 from './mongo.provider';
import { Logger, trexLogger } from '../../logger';
import { sleep } from '../../utils/promise.utils';
import type * as mongo3 from '../mongo.provider';

/**
* The parser configuration
Expand Down Expand Up @@ -491,7 +491,8 @@ export const parseContributions =
ctx.log.debug('Parsed %O', result);
const oldMetadata = await ctx.getMetadata(entry);
const metadata = ctx.buildMetadata(result, oldMetadata);
ctx.log.debug('Metadata %O', metadata);

// ctx.log.debug('Metadata %O', metadata);

if (metadata) {
const m = await ctx.saveResults(result.source, metadata);
Expand All @@ -513,6 +514,15 @@ export const parseContributions =
return results;
};

const getMemoryUsed = (): NodeJS.MemoryUsage => {
const used = process.memoryUsage();
const memoryLog: any = {};
for (const key in used) {
memoryLog[key] = Math.round(((used as any)[key] / 1024 / 1024) * 100) / 100;
}
return memoryLog;
};

/* yes a better way might exist */
let previousFrequency = 0;

Expand Down Expand Up @@ -552,6 +562,8 @@ export const executionLoop =
stop,
processedCounter
);
ctx.log.info('Memory usage %O (MB)', getMemoryUsed());

let htmlFilter: Record<string, any> = {
savingTime: {
$gt: lastExecution,
Expand Down Expand Up @@ -663,14 +675,14 @@ export const getSuccessfulOutput = <
): any => {
return output.reduce((acc, { source, metadata, failures, log, count }) => {
const index = getEntryId(source).substring(0, 6);
const { id, nature } = (metadata as any) ?? {};
const n: any = nature;
return {
...acc,
[index]: {
...log,
// log: JSON.stringify(log),
// findings: markOutputField(findings),
// metadata: (metadata as any)?.id ?? null,
...metadata,
id,
nature: n?.nature?.type ?? n?.type,
failures: JSON.stringify(
Object.entries(failures).map(([key, value]) => ({
[key]: value.message,
Expand Down
2 changes: 1 addition & 1 deletion packages/shared/src/test/utils/parser.utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ import {
ParserFn,
ParserProviderContext,
printResultOutput,
} from '../../providers/parser.provider';
} from '../../providers/parser/parser.provider';

/**
* Read fixtures file from path
Expand Down
2 changes: 1 addition & 1 deletion platforms/tktrex/backend/bin/parser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ nconf.argv().env().file({ file: 'config/settings.json' });

import { FixtureReporter } from '@shared/parser/reporters/FixtureReporter';
import * as mongo3 from '@shared/providers/mongo.provider';
import { GetParserProvider } from '@shared/providers/parser.provider';
import { GetParserProvider } from '@shared/providers/parser';
import { TKMetadata } from '@tktrex/shared/models/metadata';
import { parsers } from '@tktrex/shared/parser/parsers';
import { HTMLSource } from '@tktrex/shared/parser/source';
Expand Down
6 changes: 3 additions & 3 deletions platforms/tktrex/backend/lib/parser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,15 @@ import {
GetMetadataFn,
ParserProviderContextDB,
SaveResults,
} from '@shared/providers/parser.provider';
} from '@shared/providers/parser';
import { sanitizeHTML } from '@shared/utils/html.utils';
import { TKMetadata } from '@tktrex/shared/models/metadata';
import { TKParserConfig } from '@tktrex/shared/parser/config';
import { HTMLSource } from '@tktrex/shared/parser/source';
import D from 'debug';
import { JSDOM } from 'jsdom';
import _ from 'lodash';
import nconf from 'nconf';
import { parseHTML } from 'linkedom';

const debug = D('lib:parserchain');

Expand All @@ -35,7 +35,7 @@ export const parserConfig: TKParserConfig = {

export const addDom: ContributionAndDOMFn<HTMLSource> = (e) => ({
...e,
jsdom: new JSDOM(sanitizeHTML(e.html.html)).window.document,
jsdom: parseHTML(sanitizeHTML(e.html.html)).window.document,
});

export const getLastHTMLs =
Expand Down
3 changes: 1 addition & 2 deletions platforms/tktrex/backend/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
"geoip-lite": "^1.4.6",
"io-ts": "^2.2.16",
"io-ts-types": "^0.5.19",
"jsdom": "^16.7.0",
"linkedom": "^0.14.21",
"lodash": "^4.17.21",
"module-alias": "^2.2.2",
"moment": "^2.29.4",
Expand All @@ -50,7 +50,6 @@
"@types/eslint": "^8",
"@types/express": "^4.17.13",
"@types/geoip-lite": "^1.4.1",
"@types/jsdom": "^16",
"@types/lodash": "^4.14.186",
"@types/module-alias": "^2",
"@types/node": "^16.11.68",
Expand Down
2 changes: 1 addition & 1 deletion platforms/tktrex/backend/routes/__tests__/personal.e2e.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import bs58 from '@shared/providers/bs58.provider';
import {
GetParserProvider,
ParserProvider,
} from '@shared/providers/parser.provider';
} from '@shared/providers/parser';
import { fc } from '@shared/test';
import {
readFixtureJSON,
Expand Down
2 changes: 1 addition & 1 deletion platforms/tktrex/shared/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
"dependencies": {
"fp-ts": "^2.11.9",
"io-ts": "^2.2.16",
"linkedom": "^0.14.4",
"linkedom": "^0.14.21",
"mongodb": "^4.12.1",
"ts-endpoint": "^2.0.0"
},
Expand Down
2 changes: 1 addition & 1 deletion platforms/tktrex/shared/src/parser/config.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { ParserConfiguration } from '@shared/providers/parser.provider';
import { ParserConfiguration } from '@shared/providers/parser';

/**
* The TK Parser configuration interface
Expand Down
2 changes: 1 addition & 1 deletion platforms/tktrex/shared/src/parser/metadata.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { BuildMetadataFn } from '@shared/providers/parser.provider';
import { BuildMetadataFn } from '@shared/providers/parser';
import _ from 'lodash';
import { TKMetadata } from '../models/metadata';
import { TKParsers } from './parsers';
Expand Down
2 changes: 1 addition & 1 deletion platforms/tktrex/shared/src/parser/parsers/author.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { ParserFn } from '@shared/providers/parser.provider';
import { ParserFn } from '@shared/providers/parser';
import { TKParserConfig } from '../config';
import { HTMLSource } from '../source';

Expand Down
2 changes: 1 addition & 1 deletion platforms/tktrex/shared/src/parser/parsers/description.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import {
ContributionWithDOM,
ParserFn,
} from '@shared/providers/parser.provider';
} from '@shared/providers/parser';
import { TKParserConfig } from '../config';
import { HTMLSource } from '../source';
import _ from 'lodash';
Expand Down
2 changes: 1 addition & 1 deletion platforms/tktrex/shared/src/parser/parsers/downloader.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { ParserFn } from '@shared/providers/parser.provider';
import { ParserFn } from '@shared/providers/parser';
import _ from 'lodash';
import D from 'debug';
import { HTMLSource } from '../source';
Expand Down
2 changes: 1 addition & 1 deletion platforms/tktrex/shared/src/parser/parsers/foryou.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { ParserFn } from '@shared/providers/parser.provider';
import { ParserFn } from '@shared/providers/parser';
import D from 'debug';
import { MediaFile } from '../../models/metadata/MediaFile';
import { TKParserConfig } from '../config';
Expand Down
2 changes: 1 addition & 1 deletion platforms/tktrex/shared/src/parser/parsers/hashtags.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { map } from 'lodash';
import { ParserFn } from '@shared/providers/parser.provider';
import { ParserFn } from '@shared/providers/parser';
import type { HTMLSource } from '../source';
import type { TKParserConfig } from '../config';
import D from 'debug';
Expand Down
2 changes: 1 addition & 1 deletion platforms/tktrex/shared/src/parser/parsers/metrics.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { ParserFn } from '@shared/providers/parser.provider';
import { ParserFn } from '@shared/providers/parser';
import { HTMLSource } from '../source';
import { Metrics } from '../../models/metadata/Metrics';
import { TKParserConfig } from '../config';
Expand Down
2 changes: 1 addition & 1 deletion platforms/tktrex/shared/src/parser/parsers/music.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { ParserFn } from '@shared/providers/parser.provider';
import { ParserFn } from '@shared/providers/parser';
import { TKParserConfig } from '../config';
import { HTMLSource } from '../source';
import D from 'debug';
Expand Down
2 changes: 1 addition & 1 deletion platforms/tktrex/shared/src/parser/parsers/native.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import {
ContributionWithDOM,
ParserFn,
} from '@shared/providers/parser.provider';
} from '@shared/providers/parser';
import D from 'debug';
import { HTMLSource } from '../source';
import { TKParserConfig } from '../config';
Expand Down
2 changes: 1 addition & 1 deletion platforms/tktrex/shared/src/parser/parsers/nature.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { ParserFn } from '@shared/providers/parser.provider';
import { ParserFn } from '@shared/providers/parser';
import { throwEitherError } from '@shared/utils/fp.utils';
import { Nature } from '../../models/Nature';
import { TKParserConfig } from '../config';
Expand Down
2 changes: 1 addition & 1 deletion platforms/tktrex/shared/src/parser/parsers/profile.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { trexLogger } from '@shared/logger';
import { ParserFn } from '@shared/providers/parser.provider';
import { ParserFn } from '@shared/providers/parser';
import { throwEitherError } from '@shared/utils/fp.utils';
import _ from 'lodash';
import { NativeVideoN } from '../../models/Nature';
Expand Down
2 changes: 1 addition & 1 deletion platforms/tktrex/shared/src/parser/parsers/search.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { ParserFn } from '@shared/providers/parser.provider';
import { ParserFn } from '@shared/providers/parser';
import { throwEitherError } from '@shared/utils/fp.utils';
import { TKParserConfig } from '../config';
import { HTMLSource } from '../source';
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,7 @@ describe('Parser: home', () => {
thumbnailHref,
recommendedRelativeSeconds,
publicationTime,
elems,
...s
}) => ({
...s,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { GetTest, Test } from '../../../tests/Test';

import { ParserProviderContextDB } from '@shared/providers/parser.provider';
import { ParserProviderContextDB } from '@shared/providers/parser';
import {
readFixtureJSON,
readFixtureJSONPaths,
Expand Down
Loading

0 comments on commit 71f2501

Please sign in to comment.