From a8553c5f5f0c914d7b770f2c39e5504ded119bd0 Mon Sep 17 00:00:00 2001 From: Daniel Bachler Date: Thu, 22 Aug 2024 15:47:08 +0000 Subject: [PATCH] =?UTF-8?q?=E2=9C=A8=20Adds=20consistent=20redirects=20han?= =?UTF-8?q?dling=20for=20cf=20workers.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This means that both request for the html pages at /grapher/[slug] and requests for /grapher/[slug].config.json will be redirected according to the _grapherRedirects.json file. This is done as a catch handler that checks for 404 pages so that the common, happy path does not have to fetch the redirects file. --- functions/_common/grapherRenderer.ts | 33 ++++--- functions/grapher/[slug].ts | 138 ++++++++++++--------------- 2 files changed, 84 insertions(+), 87 deletions(-) diff --git a/functions/_common/grapherRenderer.ts b/functions/_common/grapherRenderer.ts index e98d1d26b58..8e4e42b9f89 100644 --- a/functions/_common/grapherRenderer.ts +++ b/functions/_common/grapherRenderer.ts @@ -7,7 +7,7 @@ import { } from "@ourworldindata/utils" import { svg2png, initialize as initializeSvg2Png } from "svg2png-wasm" import { TimeLogger } from "./timeLogger" -import { png } from "itty-router" +import { png, StatusError } from "itty-router" import svg2png_wasm from "../../node_modules/svg2png-wasm/svg2png_wasm_bg.wasm" @@ -152,6 +152,8 @@ async function fetchFromR2( headers, } const primaryResponse = await fetch(url.toString(), init) + // The fallback URL here is used so that on staging or dev we can fallback + // to the production bucket if the file is not found in the branch bucket if (primaryResponse.status === 404 && fallbackUrl) { return fetch(fallbackUrl.toString(), init) } @@ -215,6 +217,13 @@ export async function fetchGrapherConfig( ): Promise { const fetchResponse = await fetchUnparsedGrapherConfig(slug, env, etag) + if (fetchResponse.status === 404) { + // we throw 404 errors instead of returning a 404 response so that the router + // catch handler can do a lookup in the redirects file and maybe send + // a 302 redirect response + throw new StatusError(404) + } + if (fetchResponse.status !== 200) { console.log( "Status code is not 200, returning empty response with status code", @@ -241,13 +250,16 @@ async function fetchAndRenderGrapherToSvg( options: ImageOptions, searchParams: URLSearchParams, env: Env -) { +): Promise { const grapherLogger = new TimeLogger("grapher") const grapherConfigResponse = await fetchGrapherConfig(slug, env) - if (grapherConfigResponse.status !== 200) { - return null + if (grapherConfigResponse.status === 404) { + // we throw 404 errors instad of returning a 404 response so that the router + // catch handler can do a lookup in the redirects file and maybe send + // a 302 redirect response + throw new StatusError(grapherConfigResponse.status) } const bounds = new Bounds(0, 0, options.svgWidth, options.svgHeight) @@ -300,10 +312,6 @@ export const fetchAndRenderGrapher = async ( ) console.log("fetched svg") - if (!svg) { - return new Response("Not found", { status: 404 }) - } - switch (outType) { case "png": return png(await renderSvgToPng(svg, options)) @@ -341,7 +349,7 @@ export async function getOptionalRedirectForSlug( slug: string, baseUrl: URL, env: Env -) { +): Promise { const redirects: Record = await env.ASSETS.fetch( new URL("/grapher/_grapherRedirects.json", baseUrl), { cf: { cacheTtl: 2 * 60 } } @@ -354,8 +362,11 @@ export async function getOptionalRedirectForSlug( return redirects[slug] } -export function createRedirectResponse(redirSlug: string, currentUrl: URL) { - new Response(null, { +export function createRedirectResponse( + redirSlug: string, + currentUrl: URL +): Response { + return new Response(null, { status: 302, headers: { Location: `/grapher/${redirSlug}${currentUrl.search}` }, }) diff --git a/functions/grapher/[slug].ts b/functions/grapher/[slug].ts index 435adf87082..dcc54b9b846 100644 --- a/functions/grapher/[slug].ts +++ b/functions/grapher/[slug].ts @@ -4,11 +4,14 @@ import { createRedirectResponse, fetchUnparsedGrapherConfig, } from "../_common/grapherRenderer.js" -import { IRequestStrict, Router, error, cors } from "itty-router" +import { IRequestStrict, Router, StatusError, error, cors } from "itty-router" const { preflight, corsify } = cors({ allowMethods: ["GET", "OPTIONS", "HEAD"], }) +const extensions = { + configJson: ".config.json", +} const router = Router({ before: [preflight], @@ -16,7 +19,7 @@ const router = Router({ }) router .get( - "/grapher/:slug.config.json", + `/grapher/:slug${extensions.configJson}`, async ({ params: { slug } }, { searchParams }, env, etag) => handleConfigRequest(slug, searchParams, env, etag) ) @@ -41,39 +44,64 @@ export const onRequest: PagesFunction = async (context) => { { ...env, url }, request.headers.get("if-none-match") ) - .catch((e) => error(500, e)) + .catch(async (e) => { + // Here we do a unified after the fact handling of 404s to check + // if we have a redirect in the _grapherRedirects.json file. + // This is done as a catch handler that checks for 404 pages + // so that the common, happy path does not have to fetch the redirects file. + if (e instanceof StatusError && e.status === 404) { + console.log("Handling 404 for ", url.pathname) + + const fullslug = url.pathname.split("/").pop() as string + + const allExtensions = Object.values(extensions) + .map((ext) => ext.replace(".", "\\.")) // for the regex make sure we match only a single dot, not any character + .join("|") + const regexForKnownExtensions = new RegExp( + `^(?.*?)(?${allExtensions})?$` + ) + + const matchResult = fullslug.match(regexForKnownExtensions) + const slug = matchResult?.groups?.slug ?? fullslug + const extension = matchResult?.groups?.extension ?? "" + + if (slug.toLowerCase() !== slug) + return createRedirectResponse( + `${slug.toLowerCase()}${extension}`, + url + ) + + console.log("Looking up slug and extension", { + slug, + extension, + }) + + const redirectSlug = await getOptionalRedirectForSlug( + slug, + url, + { + ...env, + url, + } as Env + ) + console.log("Redirect slug", redirectSlug) + if (redirectSlug && redirectSlug !== slug) { + return createRedirectResponse( + `${redirectSlug}${extension}`, + url + ) + } else return error(404, "Not found") + } + return error(500, e) + }) } async function handleHtmlPageRequest( slug: string, - searchParams: URLSearchParams, + _searchParams: URLSearchParams, env: Env ) { const url = env.url - // Redirects handling is performed by the worker, and is done by fetching the (baked) _grapherRedirects.json file. - // That file is a mapping from old slug to new slug. - - /** - * REDIRECTS HANDLING: - * We want to optimize for the case where the user visits a page using the correct slug, i.e. there's no redirect. - * That's why: - * 1. We first check if the slug is lowercase. If it's not, we convert it to lowercase _and check for any redirects already_, and send a redirect already. - * 2. If the slug is lowercase, we check if we can find the page at the requested slug. If we can find it, we return it already. - * 3. If we can't find it, we _then_ check if there's a redirect for it. If there is, we redirect to the new page. - */ - - // All our grapher slugs are lowercase by convention. - // To allow incoming links that may contain uppercase characters to work, we redirect to the lowercase version. - const lowerCaseSlug = slug.toLowerCase() - if (lowerCaseSlug !== slug) { - const redirectSlug = await getOptionalRedirectForSlug( - lowerCaseSlug, - url, - env - ) - - return createRedirectResponse(redirectSlug ?? lowerCaseSlug, url) - } // For local testing // const grapherPageResp = await fetch( @@ -84,25 +112,17 @@ async function handleHtmlPageRequest( const grapherPageResp = await env.ASSETS.fetch(url, { redirect: "manual" }) if (grapherPageResp.status === 404) { - // If the request is a 404, we check if there's a redirect for it. - // If there is, we redirect to the new page. - const redirectSlug = await getOptionalRedirectForSlug(slug, url, env) - if (redirectSlug && redirectSlug !== slug) { - return createRedirectResponse(redirectSlug, url) - } else { - // Otherwise we just return the 404 page. - return grapherPageResp - } + throw new StatusError(404) } - // A non-200 status code is most likely a redirect (301 or 302) or a 404, all of which we want to pass through as-is. + // A non-200 status code is most likely a redirect (301 or 302), all of which we want to pass through as-is. // In the case of the redirect, the browser will then request the new URL which will again be handled by this worker. if (grapherPageResp.status !== 200) return grapherPageResp - const openGraphThumbnailUrl = `/grapher/thumbnail/${lowerCaseSlug}.png?imType=og${ + const openGraphThumbnailUrl = `/grapher/thumbnail/${slug}.png?imType=og${ url.search ? "&" + url.search.slice(1) : "" }` - const twitterThumbnailUrl = `/grapher/thumbnail/${lowerCaseSlug}.png?imType=twitter${ + const twitterThumbnailUrl = `/grapher/thumbnail/${slug}.png?imType=twitter${ url.search ? "&" + url.search.slice(1) : "" }` @@ -146,21 +166,6 @@ async function handleConfigRequest( ) { const shouldCache = searchParams.get("nocache") === null console.log("Preparing json response for ", slug) - // All our grapher slugs are lowercase by convention. - // To allow incoming links that may contain uppercase characters to work, we redirect to the lowercase version. - const lowerCaseSlug = slug.toLowerCase() - if (lowerCaseSlug !== slug) { - const redirectSlug = await getOptionalRedirectForSlug( - lowerCaseSlug, - env.url, - env - ) - - return createRedirectResponse( - `${redirectSlug ?? lowerCaseSlug}.config.json`, - env.url - ) - } const grapherPageResp = await fetchUnparsedGrapherConfig(slug, env, etag) @@ -169,29 +174,10 @@ async function handleConfigRequest( return new Response(null, { status: 304 }) } - if (grapherPageResp.status !== 200) { - // If the request is a 404, we check if there's a redirect for it. - // If there is, we redirect to the new page. - const redirectSlug = await getOptionalRedirectForSlug( - slug, - env.url, - env - ) - if (redirectSlug && redirectSlug !== slug) { - console.log("Redirecting to ", redirectSlug) - return createRedirectResponse( - `${redirectSlug}.config.json`, - env.url - ) - } else { - console.log("Returning 404 for ", slug) - // Otherwise we just return the status code. - return new Response(null, { status: grapherPageResp.status }) - } + if (grapherPageResp.status === 404) { + throw new StatusError(404) } - console.log("Returning 200 for ", slug) - const cacheControl = shouldCache ? "public, s-maxage=3600, max-age=0, must-revalidate" : "public, s-maxage=0, max-age=0, must-revalidate"