From ce4b1578b31b59d1e0e03f510c8109be73ac00dd Mon Sep 17 00:00:00 2001 From: Nelson Chen Date: Sun, 5 Mar 2023 19:45:06 -0800 Subject: [PATCH] Take out full transload to AZ. Architecture may come back as part of S3 transload but not to AZ; there's a stupid byte limit. --- src/azb.ts | 26 ------- src/handler.ts | 141 -------------------------------------- test/handler.test.ts | 158 +------------------------------------------ 3 files changed, 1 insertion(+), 324 deletions(-) diff --git a/src/azb.ts b/src/azb.ts index d98c875..983936e 100644 --- a/src/azb.ts +++ b/src/azb.ts @@ -23,32 +23,6 @@ export function azBlobSASUrlToProxyPathname(azb_url: URL, base: string): URL { return proxified_path } -export function azBlobSASUrlToTransloadProxyPathname(azb_url: URL, base: string): URL { - const hostname_parts = azb_url.hostname.split('.') - const url_parts = azb_url.pathname.split('/') - const account_name = hostname_parts[0] - if (!account_name) { - throw new Error('invalid azblob url') - } - const container_name = url_parts[1] - if (!container_name) { - throw new Error('invalid azblob url') - } - const blob_name = url_parts.slice(2).join('/') - if (!blob_name) { - throw new Error('invalid azblob url') - } - - const query_params = azb_url.searchParams.toString() - - const proxified_path = new URL( - `/t-azb/${account_name}/${container_name}/${blob_name}?${query_params}`, - base, - ) - return proxified_path -} - - export function proxyPathnameToAzBlobSASUrl(proxy_path: URL): URL { const url_parts = proxy_path.pathname.split('/') const account_name = url_parts[2] diff --git a/src/handler.ts b/src/handler.ts index 8664e34..c24d8c4 100644 --- a/src/handler.ts +++ b/src/handler.ts @@ -13,10 +13,6 @@ export async function handleRequest(request: Request): Promise { return handleProxyToAzStorageRequest(request) } - if (url.pathname.startsWith('/t-azb/')) { - return handleFullTransloadFromGoogleTakeoutToAzBlobRequest(request) - } - if (url.pathname.startsWith('/version/')) { return new Response( JSON.stringify( @@ -117,143 +113,6 @@ export async function handleProxyToAzStorageRequest(request: Request): Promise { - // These headers go to Azure - const toAzureHeaders = new Headers() - // These headers go to the source - const copySourceHeaders = new Headers() - - // Copy all headers from the request that astart with x-ms- to the Azure headers - for (const [key, value] of request.headers) { - if (key.startsWith('x-ms-')) { - toAzureHeaders.set(key, value) - } - } - - // Check for a 'x-gtr-copy-source' header - const copySource = request.headers.get('x-gtr-copy-source') - if (!copySource) { - return new Response('missing x-gtr-copy-source header', { - status: 400, - }) - } - - - // If a x-gtr-copy-source-range exists, process it - // x-gtr-copy-source-range format is like "bytes=start-end" - const copySourceRange = request.headers.get('x-gtr-copy-source-range') - if (copySourceRange) { - // toAzureHeaders.delete('x-gtr-copy-source-range') - // Set the length header to the length of the range - const rangeParts = copySourceRange.split('=') - if (rangeParts.length !== 2) { - return new Response('invalid x-gtr-copy-source-range header', { - status: 400, - }) - } - const range = rangeParts[1] - const rangeBounds = range.split('-') - if (rangeBounds.length !== 2) { - return new Response('invalid x-gtr-copy-source-range header', { - status: 400, - }) - } - const start = parseInt(rangeBounds[0]) - const end = parseInt(rangeBounds[1]) - if (isNaN(start) || isNaN(end)) { - return new Response('invalid x-gtr-copy-source-range header', { - status: 400, - }) - } - const length = end - start + 1 - toAzureHeaders.set('Content-Length', length.toString()) - } - - // Get a readable stream of the request body from the url of x-gtr-copy-source - const copySourceUrl = new URL(copySource) - // Make sure hostname is a valid test server or google URL - if (!validGoogleTakeoutUrl(copySourceUrl) && !validTestServerURL(copySourceUrl)) { - return new Response('invalid x-gtr-copy-source header: not takeout url or test server url', { - status: 403, - }) - } - console.log('fetching original file from', copySourceUrl.href) - const sourceRange = request.headers.get('x-gtr-copy-source-range') - if (sourceRange) { - copySourceHeaders.set('Range', sourceRange) - console.log('setting range header', sourceRange) - } - - const copySourceResponse = await fetch(copySourceUrl.toString(), { - method: 'GET', - headers: copySourceHeaders, - }) - - console.log('original file response status', copySourceResponse.status) - // If the original request has some sort of error, return that error - if (!copySourceResponse.ok) { - return new Response(copySourceResponse.body, { - status: copySourceResponse.status, - headers: copySourceResponse.headers, - }) - } - // Get a readable stream of the original - const copySourceBody = copySourceResponse.body - // Return an error if body isn't a ReadableStream - if (!(copySourceBody instanceof ReadableStream)) { - return new Response('copySourceBody is not a ReadableStream', { - status: 500, - }) - } - // Set content length of toAzureHeaders to the content length of the source - toAzureHeaders.set('Content-Length', copySourceResponse.headers.get('Content-Length') || '0') - - // remove all upstream that start with cloudflare stuff - for (const [key, _] of toAzureHeaders.entries()) { - if (key.startsWith('cf-')) { - toAzureHeaders.delete(key) - } - } - - const url = new URL(request.url) - try { - const azUrl = proxyPathnameToAzBlobSASUrl(url) - console.log('proxying to', azUrl) - - console.log('toAzureHeaders', JSON.stringify(Object.fromEntries(toAzureHeaders.entries()))) - const originalResponse = await fetch( - azUrl.toString(), { - method: request.method, - headers: toAzureHeaders, - body: copySourceBody - }) - - const body2 = await originalResponse.text() - - console.log('az response status', originalResponse.status) - console.log('az response body', body2) - - const response = new Response(body2, { - status: originalResponse.status, - headers: originalResponse.headers, - }) - - - return response - } catch (e) { - if (e instanceof Error) { - const error = serializeError(e) - return new Response(JSON.stringify(error), - { - status: 500, - }) - } - return new Response('unknown error', { - status: 500, - }) - } -} - export function validTestServerURL(url: URL): boolean { return ( // Cloudflare Bucket test server with unlimited download bandwidth diff --git a/test/handler.test.ts b/test/handler.test.ts index 3235f2d..0e5e26a 100644 --- a/test/handler.test.ts +++ b/test/handler.test.ts @@ -4,7 +4,7 @@ import { validTestServerURL, } from '../src/handler' import { - azBlobSASUrlToProxyPathname, azBlobSASUrlToTransloadProxyPathname, + azBlobSASUrlToProxyPathname, proxyPathnameToAzBlobSASUrl, } from '../src/azb' @@ -36,147 +36,6 @@ describe('handler', () => { }) }) -describe('transload handler', () => { - - test('can facilitate transload of a large file in a single block', async () => { - const AZ_STORAGE_TEST_URL_SEGMENT = process.env.AZ_STORAGE_TEST_URL_SEGMENT - if (!AZ_STORAGE_TEST_URL_SEGMENT) { - throw new Error( - 'AZ_STORAGE_TEST_URL_SEGMENT environment variable is not set', - ) - } - - const file_source_url = file_test_large_url - - const requestUrl = new URL( - `https://example.com/t-azb/${AZ_STORAGE_TEST_URL_SEGMENT}`, - ) - // Change filename of request URL - requestUrl.pathname = requestUrl.pathname.replace( - 'test.dat', - 't-azb-small.dat', - ) - - const request = new Request(requestUrl, { - method: 'PUT', - headers: { - 'x-ms-blob-type': 'BlockBlob', - 'x-gtr-copy-source': file_source_url.toString(), - }, - }) - - const result = await handleRequest(request) - const ok = await result.text() - expect(ok).toEqual('') - - expect(result.status).toEqual(201) - }, 60000) - - - test('can facilitate transload of a large file with multiple blocks', async () => { - const AZ_STORAGE_TEST_URL_SEGMENT = process.env.AZ_STORAGE_TEST_URL_SEGMENT - if (!AZ_STORAGE_TEST_URL_SEGMENT) { - throw new Error( - 'AZ_STORAGE_TEST_URL_SEGMENT environment variable is not set', - ) - } - - const file_source_url = file_test_large_url - - const base_request_url = new URL( - `https://example.com/t-azb/${AZ_STORAGE_TEST_URL_SEGMENT}`, - ) - // Change filename of request URL - base_request_url.pathname = base_request_url.pathname.replace( - 'test.dat', - 't-azb-large.dat', - ) - - // Transfer size is 100MB Chunk size - const transfer_size = 1024 * 1024 * 100 - - // First block - - // Construct the first block request url - const first_block_request_url = new URL(base_request_url) - first_block_request_url.searchParams.append('comp', 'block') - // Generate a block id, must be base64 encoded - const first_block_id = Buffer.from('1').toString('base64') - first_block_request_url.searchParams.append('blockid', first_block_id) - - const first_block_request = new Request(first_block_request_url, { - method: 'PUT', - headers: { - 'x-ms-blob-type': 'BlockBlob', - 'x-gtr-copy-source': file_source_url.toString(), - 'x-gtr-copy-source-range': `bytes=0-${transfer_size - 1}`, - } - }) - - const first_block_result = await handleRequest(first_block_request) - const first_block_ok = await first_block_result.text() - expect(first_block_ok).toEqual('') - expect(first_block_result.status).toEqual(201) - - // Second block - - // Construct the second block request url - const second_block_request_url = new URL(base_request_url) - second_block_request_url.searchParams.append('comp', 'block') - // Generate a block id, must be base64 encoded - const second_block_id = Buffer.from('2').toString('base64') - second_block_request_url.searchParams.append('blockid', second_block_id) - - const second_block_request = new Request(second_block_request_url, { - method: 'PUT', - headers: { - 'x-ms-blob-type': 'BlockBlob', - 'x-gtr-copy-source': file_source_url.toString(), - 'x-gtr-copy-source-range': `bytes=${transfer_size}-${transfer_size * 2 - 1}`, - } - }) - - const second_block_result = await handleRequest(second_block_request) - const second_block_ok = await second_block_result.text() - expect(second_block_ok).toEqual('') - expect(second_block_result.status).toEqual(201) - - // Commit the blocks - - // Construct the commit request url to directly contact Azure from the request url - // Get the container from the base request url - - // Get container from AZ_STORAGE_TEST_URL_SEGMENT - const test_url_split = AZ_STORAGE_TEST_URL_SEGMENT.split('/') - const container = test_url_split[0] - // Get rest of URL from AZ_STORAGE_TEST_URL_SEGMENT - const rest_of_url = test_url_split.slice(1).join('/') - - const commit_request_url = new URL(`https://${container}.blob.core.windows.net/${rest_of_url}`) - commit_request_url.searchParams.append('comp', 'blocklist') - // Change pathname of request URL - commit_request_url.pathname = commit_request_url.pathname.replace( - 'test.dat', - 't-azb-large.dat', - ) - - const commit_request = new Request(commit_request_url, { - method: 'PUT', - headers: {}, - body: ` - ${first_block_id}${second_block_id} - ` - }) - - // Send request with fetch - const commit_result = await fetch(commit_request) - const commit_ok = await commit_result.text() - expect(commit_ok).toEqual('') - expect(commit_result.status).toEqual(201) - - }, 120000) -}) - describe('azure proxy handler', () => { test('handles proxying to azure', async () => { @@ -362,21 +221,6 @@ describe('takeout proxy handler', () => { }) describe('url-parser', () => { - test('can transload proxify the azure blob SAS URL', async () => { - const path = azBlobSASUrlToTransloadProxyPathname( - real_azb_url, - 'https://example.com', - ) - expect(path).toEqual( - new URL( - '/t-azb/urlcopytest/some-container/some_file.dat?sp=racwd&st=2022-04-03T02%3A09%3A13Z&se=2022-04-03T02%3A20%3A13Z&spr=https&sv=2020-08-04&sr=c&sig=u72iEGi5SLkPg8B7QVI5HXfHSnr3MOse%2FzWzhaYdbbU%3D', - 'https://example.com', - ), - ) - const url = proxyPathnameToAzBlobSASUrl(path) - expect(url).toEqual(real_azb_url) - }) - test('can proxify the azure blob SAS URL', async () => { const path = azBlobSASUrlToProxyPathname(