Skip to content

Commit

Permalink
Take out full transload to AZ.
Browse files Browse the repository at this point in the history
Architecture may come back as part of S3 transload but not to AZ; there's a stupid byte limit.
  • Loading branch information
nelsonjchen committed Mar 6, 2023
1 parent 6ae2225 commit ce4b157
Show file tree
Hide file tree
Showing 3 changed files with 1 addition and 324 deletions.
26 changes: 0 additions & 26 deletions src/azb.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,32 +23,6 @@ export function azBlobSASUrlToProxyPathname(azb_url: URL, base: string): URL {
return proxified_path
}

export function azBlobSASUrlToTransloadProxyPathname(azb_url: URL, base: string): URL {
const hostname_parts = azb_url.hostname.split('.')
const url_parts = azb_url.pathname.split('/')
const account_name = hostname_parts[0]
if (!account_name) {
throw new Error('invalid azblob url')
}
const container_name = url_parts[1]
if (!container_name) {
throw new Error('invalid azblob url')
}
const blob_name = url_parts.slice(2).join('/')
if (!blob_name) {
throw new Error('invalid azblob url')
}

const query_params = azb_url.searchParams.toString()

const proxified_path = new URL(
`/t-azb/${account_name}/${container_name}/${blob_name}?${query_params}`,
base,
)
return proxified_path
}


export function proxyPathnameToAzBlobSASUrl(proxy_path: URL): URL {
const url_parts = proxy_path.pathname.split('/')
const account_name = url_parts[2]
Expand Down
141 changes: 0 additions & 141 deletions src/handler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,6 @@ export async function handleRequest(request: Request): Promise<Response> {
return handleProxyToAzStorageRequest(request)
}

if (url.pathname.startsWith('/t-azb/')) {
return handleFullTransloadFromGoogleTakeoutToAzBlobRequest(request)
}

if (url.pathname.startsWith('/version/')) {
return new Response(
JSON.stringify(
Expand Down Expand Up @@ -117,143 +113,6 @@ export async function handleProxyToAzStorageRequest(request: Request): Promise<R
}
}

export async function handleFullTransloadFromGoogleTakeoutToAzBlobRequest(request: Request): Promise<Response> {
// These headers go to Azure
const toAzureHeaders = new Headers()
// These headers go to the source
const copySourceHeaders = new Headers()

// Copy all headers from the request that astart with x-ms- to the Azure headers
for (const [key, value] of request.headers) {
if (key.startsWith('x-ms-')) {
toAzureHeaders.set(key, value)
}
}

// Check for a 'x-gtr-copy-source' header
const copySource = request.headers.get('x-gtr-copy-source')
if (!copySource) {
return new Response('missing x-gtr-copy-source header', {
status: 400,
})
}


// If a x-gtr-copy-source-range exists, process it
// x-gtr-copy-source-range format is like "bytes=start-end"
const copySourceRange = request.headers.get('x-gtr-copy-source-range')
if (copySourceRange) {
// toAzureHeaders.delete('x-gtr-copy-source-range')
// Set the length header to the length of the range
const rangeParts = copySourceRange.split('=')
if (rangeParts.length !== 2) {
return new Response('invalid x-gtr-copy-source-range header', {
status: 400,
})
}
const range = rangeParts[1]
const rangeBounds = range.split('-')
if (rangeBounds.length !== 2) {
return new Response('invalid x-gtr-copy-source-range header', {
status: 400,
})
}
const start = parseInt(rangeBounds[0])
const end = parseInt(rangeBounds[1])
if (isNaN(start) || isNaN(end)) {
return new Response('invalid x-gtr-copy-source-range header', {
status: 400,
})
}
const length = end - start + 1
toAzureHeaders.set('Content-Length', length.toString())
}

// Get a readable stream of the request body from the url of x-gtr-copy-source
const copySourceUrl = new URL(copySource)
// Make sure hostname is a valid test server or google URL
if (!validGoogleTakeoutUrl(copySourceUrl) && !validTestServerURL(copySourceUrl)) {
return new Response('invalid x-gtr-copy-source header: not takeout url or test server url', {
status: 403,
})
}
console.log('fetching original file from', copySourceUrl.href)
const sourceRange = request.headers.get('x-gtr-copy-source-range')
if (sourceRange) {
copySourceHeaders.set('Range', sourceRange)
console.log('setting range header', sourceRange)
}

const copySourceResponse = await fetch(copySourceUrl.toString(), {
method: 'GET',
headers: copySourceHeaders,
})

console.log('original file response status', copySourceResponse.status)
// If the original request has some sort of error, return that error
if (!copySourceResponse.ok) {
return new Response(copySourceResponse.body, {
status: copySourceResponse.status,
headers: copySourceResponse.headers,
})
}
// Get a readable stream of the original
const copySourceBody = copySourceResponse.body
// Return an error if body isn't a ReadableStream
if (!(copySourceBody instanceof ReadableStream)) {
return new Response('copySourceBody is not a ReadableStream', {
status: 500,
})
}
// Set content length of toAzureHeaders to the content length of the source
toAzureHeaders.set('Content-Length', copySourceResponse.headers.get('Content-Length') || '0')

// remove all upstream that start with cloudflare stuff
for (const [key, _] of toAzureHeaders.entries()) {
if (key.startsWith('cf-')) {
toAzureHeaders.delete(key)
}
}

const url = new URL(request.url)
try {
const azUrl = proxyPathnameToAzBlobSASUrl(url)
console.log('proxying to', azUrl)

console.log('toAzureHeaders', JSON.stringify(Object.fromEntries(toAzureHeaders.entries())))
const originalResponse = await fetch(
azUrl.toString(), {
method: request.method,
headers: toAzureHeaders,
body: copySourceBody
})

const body2 = await originalResponse.text()

console.log('az response status', originalResponse.status)
console.log('az response body', body2)

const response = new Response(body2, {
status: originalResponse.status,
headers: originalResponse.headers,
})


return response
} catch (e) {
if (e instanceof Error) {
const error = serializeError(e)
return new Response(JSON.stringify(error),
{
status: 500,
})
}
return new Response('unknown error', {
status: 500,
})
}
}

export function validTestServerURL(url: URL): boolean {
return (
// Cloudflare Bucket test server with unlimited download bandwidth
Expand Down
158 changes: 1 addition & 157 deletions test/handler.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import {
validTestServerURL,
} from '../src/handler'
import {
azBlobSASUrlToProxyPathname, azBlobSASUrlToTransloadProxyPathname,
azBlobSASUrlToProxyPathname,
proxyPathnameToAzBlobSASUrl,
} from '../src/azb'

Expand Down Expand Up @@ -36,147 +36,6 @@ describe('handler', () => {
})
})

describe('transload handler', () => {

test('can facilitate transload of a large file in a single block', async () => {
const AZ_STORAGE_TEST_URL_SEGMENT = process.env.AZ_STORAGE_TEST_URL_SEGMENT
if (!AZ_STORAGE_TEST_URL_SEGMENT) {
throw new Error(
'AZ_STORAGE_TEST_URL_SEGMENT environment variable is not set',
)
}

const file_source_url = file_test_large_url

const requestUrl = new URL(
`https://example.com/t-azb/${AZ_STORAGE_TEST_URL_SEGMENT}`,
)
// Change filename of request URL
requestUrl.pathname = requestUrl.pathname.replace(
'test.dat',
't-azb-small.dat',
)

const request = new Request(requestUrl, {
method: 'PUT',
headers: {
'x-ms-blob-type': 'BlockBlob',
'x-gtr-copy-source': file_source_url.toString(),
},
})

const result = await handleRequest(request)
const ok = await result.text()
expect(ok).toEqual('')

expect(result.status).toEqual(201)
}, 60000)


test('can facilitate transload of a large file with multiple blocks', async () => {
const AZ_STORAGE_TEST_URL_SEGMENT = process.env.AZ_STORAGE_TEST_URL_SEGMENT
if (!AZ_STORAGE_TEST_URL_SEGMENT) {
throw new Error(
'AZ_STORAGE_TEST_URL_SEGMENT environment variable is not set',
)
}

const file_source_url = file_test_large_url

const base_request_url = new URL(
`https://example.com/t-azb/${AZ_STORAGE_TEST_URL_SEGMENT}`,
)
// Change filename of request URL
base_request_url.pathname = base_request_url.pathname.replace(
'test.dat',
't-azb-large.dat',
)

// Transfer size is 100MB Chunk size
const transfer_size = 1024 * 1024 * 100

// First block

// Construct the first block request url
const first_block_request_url = new URL(base_request_url)
first_block_request_url.searchParams.append('comp', 'block')
// Generate a block id, must be base64 encoded
const first_block_id = Buffer.from('1').toString('base64')
first_block_request_url.searchParams.append('blockid', first_block_id)

const first_block_request = new Request(first_block_request_url, {
method: 'PUT',
headers: {
'x-ms-blob-type': 'BlockBlob',
'x-gtr-copy-source': file_source_url.toString(),
'x-gtr-copy-source-range': `bytes=0-${transfer_size - 1}`,
}
})

const first_block_result = await handleRequest(first_block_request)
const first_block_ok = await first_block_result.text()
expect(first_block_ok).toEqual('')
expect(first_block_result.status).toEqual(201)

// Second block

// Construct the second block request url
const second_block_request_url = new URL(base_request_url)
second_block_request_url.searchParams.append('comp', 'block')
// Generate a block id, must be base64 encoded
const second_block_id = Buffer.from('2').toString('base64')
second_block_request_url.searchParams.append('blockid', second_block_id)

const second_block_request = new Request(second_block_request_url, {
method: 'PUT',
headers: {
'x-ms-blob-type': 'BlockBlob',
'x-gtr-copy-source': file_source_url.toString(),
'x-gtr-copy-source-range': `bytes=${transfer_size}-${transfer_size * 2 - 1}`,
}
})

const second_block_result = await handleRequest(second_block_request)
const second_block_ok = await second_block_result.text()
expect(second_block_ok).toEqual('')
expect(second_block_result.status).toEqual(201)

// Commit the blocks

// Construct the commit request url to directly contact Azure from the request url
// Get the container from the base request url

// Get container from AZ_STORAGE_TEST_URL_SEGMENT
const test_url_split = AZ_STORAGE_TEST_URL_SEGMENT.split('/')
const container = test_url_split[0]
// Get rest of URL from AZ_STORAGE_TEST_URL_SEGMENT
const rest_of_url = test_url_split.slice(1).join('/')

const commit_request_url = new URL(`https://${container}.blob.core.windows.net/${rest_of_url}`)
commit_request_url.searchParams.append('comp', 'blocklist')
// Change pathname of request URL
commit_request_url.pathname = commit_request_url.pathname.replace(
'test.dat',
't-azb-large.dat',
)

const commit_request = new Request(commit_request_url, {
method: 'PUT',
headers: {},
body: `<?xml version="1.0" encoding="utf-8"?>
<BlockList><Latest>${first_block_id}</Latest><Latest>${second_block_id}</Latest></BlockList>
`
})

// Send request with fetch
const commit_result = await fetch(commit_request)
const commit_ok = await commit_result.text()
expect(commit_ok).toEqual('')
expect(commit_result.status).toEqual(201)

}, 120000)
})

describe('azure proxy handler', () => {

test('handles proxying to azure', async () => {
Expand Down Expand Up @@ -362,21 +221,6 @@ describe('takeout proxy handler', () => {
})

describe('url-parser', () => {
test('can transload proxify the azure blob SAS URL', async () => {
const path = azBlobSASUrlToTransloadProxyPathname(
real_azb_url,
'https://example.com',
)
expect(path).toEqual(
new URL(
'/t-azb/urlcopytest/some-container/some_file.dat?sp=racwd&st=2022-04-03T02%3A09%3A13Z&se=2022-04-03T02%3A20%3A13Z&spr=https&sv=2020-08-04&sr=c&sig=u72iEGi5SLkPg8B7QVI5HXfHSnr3MOse%2FzWzhaYdbbU%3D',
'https://example.com',
),
)
const url = proxyPathnameToAzBlobSASUrl(path)
expect(url).toEqual(real_azb_url)
})


test('can proxify the azure blob SAS URL', async () => {
const path = azBlobSASUrlToProxyPathname(
Expand Down

0 comments on commit ce4b157

Please sign in to comment.