Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add validation for exported ActivityPub tarballs #7

Draft
wants to merge 8 commits into
base: main
Choose a base branch
from
3 changes: 2 additions & 1 deletion build-dist.sh
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
mkdir ./dist/esm
mkdir -p ./dist/esm
cat >dist/esm/index.js <<!EOF
import cjsModule from "../index.js";
export const exportActorProfile = cjsModule.exportActorProfile;
export const importActorProfile = cjsModule.importActorProfile;
export const validateExportStream = cjsModule.validateExportStream;
!EOF

cat >dist/esm/package.json <<!EOF
Expand Down
Binary file modified out/test-export-2024-01-01.tar
Binary file not shown.
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
"./package.json": "./package.json"
},
"dependencies": {
"stream": "^0.0.3",
"tar-stream": "^3.1.7",
"yaml": "^2.5.1"
},
Expand Down
49 changes: 25 additions & 24 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import * as tar from 'tar-stream'
import { type Pack } from 'tar-stream'
import YAML from 'yaml'
import { Readable } from 'stream'
import { type Readable } from 'stream'

export interface ActorProfileOptions {
actorProfile?: any
Expand Down Expand Up @@ -178,57 +178,56 @@ export async function exportActorProfile({
}
}

export async function importActorProfile(tarBuffer: Buffer): Promise<any> {
/**
* Imports an ActivityPub profile from a .tar archive stream.
* @param tarStream - A ReadableStream containing the .tar archive.
* @returns A promise that resolves to the parsed profile data.
*/
export async function importActorProfile(
tarStream: Readable
): Promise<Record<string, any>> {
const extract = tar.extract()
const result: Record<string, any> = {}

return await new Promise((resolve, reject) => {
extract.on('entry', (header, stream, next) => {
const fileName = header.name
let content = ''
console.log(`Extracting file: ${header.name}`)

stream.on('data', (chunk) => {
content += chunk.toString()
})

stream.on('end', () => {
try {
if (header.name.endsWith('.json')) {
result[header.name] = JSON.parse(content)
} else if (
header.name.endsWith('.yaml') ||
header.name.endsWith('.yml')
) {
result[header.name] = YAML.parse(content)
} else if (header.name.endsWith('.csv')) {
result[header.name] = content
if (fileName.endsWith('.json')) {
result[fileName] = JSON.parse(content)
} else if (fileName.endsWith('.yaml') || fileName.endsWith('.yml')) {
result[fileName] = YAML.parse(content)
} else if (fileName.endsWith('.csv')) {
result[fileName] = content
}
console.log(`Successfully parsed: ${header.name}`)
} catch (error) {
console.error(`Error processing file ${header.name}:`, error)
reject(error)
} catch (error: any) {
reject(new Error(`Error processing file ${fileName}: ${error}`))
}
next()
})

stream.on('error', (error) => {
console.error(`Stream error on file ${header.name}:`, error)
reject(error)
stream.on('error', (error: any) => {
reject(new Error(`Stream error on file ${fileName}: ${error}`))
})
})

extract.on('finish', () => {
console.log('Extraction complete', result)
resolve(result)
})

extract.on('error', (error) => {
console.error('Error during extraction:', error)
reject(error)
reject(new Error(`Error during extraction: ${error}`))
})

const stream = Readable.from(tarBuffer)
stream.pipe(extract)
// Pipe the ReadableStream into the extractor
tarStream.pipe(extract)
})
}

Expand All @@ -254,3 +253,5 @@ function addMediaFile(
lastModified: new Date().toISOString()
}
}

export * from './verify'
89 changes: 89 additions & 0 deletions src/verify.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
import * as tar from 'tar-stream'
import { type Readable } from 'stream'
import YAML from 'yaml'

/**
* Validates the structure and content of an exported ActivityPub tarball.
* @param tarStream - A ReadableStream containing the .tar archive.
* @returns A promise that resolves to an object with `valid` (boolean) and `errors` (string[]).
*/
export async function validateExportStream(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will you please make it so tarBuffer can be a ReadableStream? That way, if the export is really big and the tar is really big, it doesn't have to be buffered in memory all at once.

I think you should be able have tar-stream parse the stream, async iterate through the tar entries, and ensure each entry is valid, all without every buffering all the entries in memory

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you r abs right, i should consider that, thanks

tarStream: Readable
): Promise<{ valid: boolean; errors: string[] }> {
console.log('Validating export stream...')
const extract = tar.extract()
const errors: string[] = []
const requiredFiles = [
'manifest.yaml', // or 'manifest.yml'
'activitypub/actor.json',
'activitypub/outbox.json'
].map((file) => file.toLowerCase()) // Normalize to lowercase for consistent comparison
const foundFiles = new Set<string>()

return await new Promise((resolve) => {
extract.on('entry', (header, stream, next) => {
const fileName = header.name.toLowerCase() // Normalize file name
foundFiles.add(fileName)

let content = ''
stream.on('data', (chunk) => {
content += chunk.toString()
})

stream.on('end', () => {
try {
// Validate JSON files
if (fileName.endsWith('.json')) {
JSON.parse(content) // Throws an error if content is not valid JSON
}

// Validate manifest file
if (fileName === 'manifest.yaml' || fileName === 'manifest.yml') {
const manifest = YAML.parse(content)
if (!manifest['ubc-version']) {
errors.push('Manifest is missing required field: ubc-version')
}
if (!manifest.contents?.activitypub) {
errors.push(
'Manifest is missing required field: contents.activitypub'
)
}
}
} catch (error: any) {
errors.push(`Error processing file ${fileName}: ${error.message}`)
}
next()
})

stream.on('error', (error) => {
errors.push(`Stream error on file ${fileName}: ${error.message}`)
next()
})
})

extract.on('finish', () => {
// Check if all required files are present
for (const file of requiredFiles) {
if (!foundFiles.has(file)) {
errors.push(`Missing required file: ${file}`)
}
}

resolve({
valid: errors.length === 0,
errors
})
})

extract.on('error', (error) => {
errors.push(`Error during extraction: ${error.message}`)
resolve({
valid: false,
errors
})
})

// Pipe the ReadableStream into the extractor
tarStream.pipe(extract)
})
}
Binary file removed test/fixtures/account2.tar
Binary file not shown.
Binary file added test/fixtures/tarball-samples/invalid-actor.tar
Binary file not shown.
Binary file not shown.
Binary file added test/fixtures/tarball-samples/missing-actor.tar
Binary file not shown.
Binary file not shown.
Binary file added test/fixtures/tarball-samples/missing-outbox.tar
Binary file not shown.
Binary file added test/fixtures/tarball-samples/valid-export.tar
Binary file not shown.
10 changes: 7 additions & 3 deletions test/index.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import { exportActorProfile, importActorProfile } from '../src'
import { outbox } from './fixtures/outbox'
import { actorProfile } from './fixtures/actorProfile'
import { expect } from 'chai'
import { Readable } from 'node:stream'

describe('exportActorProfile', () => {
it('calls function', async () => {
Expand Down Expand Up @@ -35,13 +36,16 @@ describe('exportActorProfile', () => {
describe('importActorProfile', () => {
it('extracts and verifies contents from account2.tar', async () => {
// Load the tar file as a buffer
const tarBuffer = fs.readFileSync('test/fixtures/account2.tar')
const tarBuffer = fs.readFileSync(
'test/fixtures/tarball-samples/valid-export.tar'
)

// Use the importActorProfile function to parse the tar contents
const importedData = await importActorProfile(tarBuffer)
const tarStream = Readable.from(tarBuffer)
const importedData = await importActorProfile(tarStream)

// Log or inspect the imported data structure
console.log('Imported Data:', importedData)
// console.log('Imported Data:', importedData)

// Example assertions to check specific files and content
expect(importedData).to.have.property('activitypub/actor.json')
Expand Down
76 changes: 76 additions & 0 deletions test/verify.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
import { expect } from 'chai'
import { readFileSync } from 'fs'
import { validateExportStream } from '../dist'
import { Readable } from 'stream'

describe('validateExportStream', () => {
it('should validate a valid tarball', async () => {
// Load a valid tarball (e.g., exported-profile-valid.tar)
const tarBuffer = readFileSync(
'test/fixtures/tarball-samples/valid-export.tar'
)
const tarStream = Readable.from(tarBuffer)
const result = await validateExportStream(tarStream)
console.log('🚀 ~ it ~ valid result:', result)

expect(result.valid).to.be.true
expect(result.errors).to.be.an('array').that.is.empty
})

it('should fail if manifest.yaml is missing', async () => {
// Load a tarball with missing manifest.yaml
const tarBuffer = readFileSync(
'test/fixtures/tarball-samples/missing-manifest.tar'
)
const tarStream = Readable.from(tarBuffer)
const result = await validateExportStream(tarStream)
console.log('🚀 ~ it ~ miss mani result:', result)

expect(result.valid).to.be.false
})

it('should fail if actor.json is missing', async () => {
// Load a tarball with missing actor.json
const tarBuffer = readFileSync(
'test/fixtures/tarball-samples/missing-actor.tar'
)
const tarStream = Readable.from(tarBuffer)
const result = await validateExportStream(tarStream)

expect(result.valid).to.be.false
console.log(JSON.stringify(result.errors))
})

it('should fail if outbox.json is missing', async () => {
// Load a tarball with missing outbox.json
const tarBuffer = readFileSync(
'test/fixtures/tarball-samples/missing-outbox.tar'
)
const tarStream = Readable.from(tarBuffer)
const result = await validateExportStream(tarStream)

expect(result.valid).to.be.false
})

it('should fail if actor.json contains invalid JSON', async () => {
// Load a tarball with invalid JSON in actor.json
const tarBuffer = readFileSync(
'test/fixtures/tarball-samples/invalid-actor.tar'
)
const tarStream = Readable.from(tarBuffer)
const result = await validateExportStream(tarStream)

expect(result.valid).to.be.false
})

it('should fail if manifest.yaml is invalid', async () => {
// Load a tarball with invalid manifest.yaml
const tarBuffer = readFileSync(
'test/fixtures/tarball-samples/invalid-manifest.tar'
)
const tarStream = Readable.from(tarBuffer)
const result = await validateExportStream(tarStream)

expect(result.valid).to.be.false
})
})
Loading