From 184c192c7585a668e3ff97aaa5948f94597d8dbc Mon Sep 17 00:00:00 2001 From: Ricky Date: Thu, 5 Sep 2024 16:54:29 +0100 Subject: [PATCH] Support Git references in subset merger (#987) * Support Git references in subset merger In YAML, you can now do: repo: slug: notofonts/latin-greek-cyrillic ref: e7f1736c5ad0dc2abfc4dcd49ebca50abf612b29 Where before repo could only take a string (being the repo slug) This is still supported with the old behaviour of assuming you want the latest of the branch * Simplify reference specifying approach Keep the repo key as a string, interpret anything after @ as the git ref * Type hinting fairy visits * Support '@latest' to get latest GitHub release for a subset * Check if sources are already downloaded after resolving 'latest' * Return resolved ref to fix designspace discovery * Resolve 'latest' to tag early and don't use official zipball URL Add GitHubClient.get_latest_tag_name() This massively simplifies code paths and doesn't require SubsetMerger.download_for_subsetting to have to do more than it should This also allows us to know the name of the top level folder within the zip file in all scenarios download_for_subsetting code is now a fair bit simpler and far more similar to before the ref feature was added --- Lib/gftools/gfgithub.py | 4 ++ Lib/gftools/scripts/add_ds_subsets.py | 7 ++- Lib/gftools/subsetmerger.py | 76 ++++++++++++++++++++------- 3 files changed, 68 insertions(+), 19 deletions(-) diff --git a/Lib/gftools/gfgithub.py b/Lib/gftools/gfgithub.py index 06fe741b..80eba2fe 100644 --- a/Lib/gftools/gfgithub.py +++ b/Lib/gftools/gfgithub.py @@ -85,8 +85,12 @@ def get_blob(self, file_sha): return response def get_latest_release(self): + """https://docs.github.com/en/rest/releases/releases?apiVersion=2022-11-28#get-the-latest-release""" return self._get(self.rest_url("releases/latest")) + def get_latest_release_tag(self) -> str: + return self.get_latest_release()["tag_name"] + def open_prs(self, pr_head: str, pr_base_branch: str) -> typing.List: return self._get( self.rest_url("pulls", state="open", head=pr_head, base=pr_base_branch) diff --git a/Lib/gftools/scripts/add_ds_subsets.py b/Lib/gftools/scripts/add_ds_subsets.py index 7ed517dd..b888987d 100644 --- a/Lib/gftools/scripts/add_ds_subsets.py +++ b/Lib/gftools/scripts/add_ds_subsets.py @@ -74,7 +74,11 @@ def main(args=None): parser.add_argument("--yaml", "-y", help="YAML file describing subsets") - parser.add_argument("--repo", "-r", help="GitHub repository to use for subsetting") + parser.add_argument( + "--repo", + "-r", + help="GitHub repository slug to use for subsetting. Use @ after slug to specify branch/tag/commit, e.g. org/repo@v0.1.0; 'latest' is supported for latest release", + ) parser.add_argument("--file", "-f", help="Source file within GitHub repository") parser.add_argument("--name", "-n", help="Name of subset to use from glyphset") parser.add_argument( @@ -107,6 +111,7 @@ def main(args=None): print("Must specify --name or --codepoints") sys.exit(1) # And then construct the YAML-like object ourselves + # See subsets_schema in ..subsetmerger subsets = [ { "from": { diff --git a/Lib/gftools/subsetmerger.py b/Lib/gftools/subsetmerger.py index 5ccf82aa..42f20f5d 100644 --- a/Lib/gftools/subsetmerger.py +++ b/Lib/gftools/subsetmerger.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import logging import os import re @@ -6,12 +8,14 @@ from collections import defaultdict from pathlib import Path from tempfile import TemporaryDirectory +from typing import Any from zipfile import ZipFile import ufoLib2 import yaml from fontmake.font_project import FontProject from fontTools.designspaceLib import DesignSpaceDocument +from gftools.gfgithub import GitHubClient from glyphsets import unicodes_per_glyphset from strictyaml import HexInt, Int, Map, Optional, Seq, Str, Enum from ufomerge import merge_ufos @@ -22,7 +26,9 @@ logger = logging.getLogger(__name__) logging.basicConfig(level=logging.INFO) -SUBSET_SOURCES = { +FALLBACK_BRANCH_NAME = "main" + +SUBSET_SOURCES: dict[str, tuple[str, str]] = { "Noto Sans": ("notofonts/latin-greek-cyrillic", "sources/NotoSans.glyphspackage"), "Noto Serif": ("notofonts/latin-greek-cyrillic", "sources/NotoSerif.glyphspackage"), "Noto Sans Devanagari": ( @@ -143,7 +149,7 @@ def add_subsets(self): ds.write(self.output) - def add_subset(self, target_ufo, ds, ds_source, subset): + def add_subset(self, target_ufo, ds, ds_source, subset) -> bool: # First, we find a donor UFO that matches the location of the # UFO to merge. location = dict(ds_source.location) @@ -171,7 +177,9 @@ def add_subset(self, target_ufo, ds, ds_source, subset): ) return True - def obtain_upstream(self, upstream, location): + def obtain_upstream( + self, upstream: str | dict[str, Any], location + ) -> ufoLib2.Font | None: # Either the upstream is a string, in which case we try looking # it up in the SUBSET_SOURCES table, or it's a dict, in which # case it's a repository / path pair. @@ -179,14 +187,27 @@ def obtain_upstream(self, upstream, location): if upstream not in SUBSET_SOURCES: raise ValueError("Unknown subsetting font %s" % upstream) repo, path = SUBSET_SOURCES[upstream] - font_name = upstream + ref = FALLBACK_BRANCH_NAME + font_name = f"{upstream}/{ref}" else: - repo = upstream["repo"] + repo: str = upstream["repo"] + parts = repo.split("@", 1) + if len(parts) == 1: + # Repo was already just the slug, use fallback ref + ref = FALLBACK_BRANCH_NAME + else: + # Guaranteed to be 2 parts + repo, ref = parts + if ref == "latest": + # Resolve latest release's tag name + ref = GitHubClient.from_url( + f"https://github.com/{repo}" + ).get_latest_release_tag() path = upstream["path"] - font_name = "%s/%s" % (repo, path) - path = os.path.join(self.cache_dir, repo, path) + font_name = f"{repo}/{ref}/{path}" + path = os.path.join(self.cache_dir, repo, ref, path) - self.download_for_subsetting(repo) + self.download_for_subsetting(repo, ref) # We're doing a UFO-UFO merge, so Glyphs files will need to be converted if path.endswith((".glyphs", ".glyphspackage")): @@ -206,7 +227,7 @@ def obtain_upstream(self, upstream, location): return open_ufo(source_ufo.path) return None - def glyphs_to_ufo(self, source, directory=None): + def glyphs_to_ufo(self, source: str, directory: Path | None = None) -> str: source = Path(source) if directory is None: directory = source.resolve().parent @@ -310,15 +331,34 @@ def generate_subset_instances(self, source_ds, font_name, instance): os.path.dirname(source_ds.path), instance.filename ) - def download_for_subsetting(self, fullrepo): - dest = os.path.join(self.cache_dir, fullrepo) + def download_for_subsetting(self, fullrepo: str, ref: str) -> None: + """Downloads a GitHub repository at a given reference""" + dest = os.path.join(self.cache_dir, f"{fullrepo}/{ref}") if os.path.exists(dest): + # Assume sources exist & are up-to-date (we have no good way of + # checking this); do nothing + logger.info("Subset files present on disk, skipping download") return - user, repo = fullrepo.split("/") - os.makedirs(os.path.join(self.cache_dir, user), exist_ok=True) - repo_zipball = f"https://github.com/{fullrepo}/archive/refs/heads/main.zip" - logger.info(f"Downloading {fullrepo}") + # Make the parent folder to dest but not dest itself. This means that + # the shutil.move at the end of this function won't create + # dest/repo-ref, instead having dest contain the contents of repo-ref + os.makedirs(os.path.join(self.cache_dir, fullrepo), exist_ok=True) + + # This URL scheme doesn't appear to be 100% official for tags & + # branches, but it seems to accept any valid git reference + # See https://stackoverflow.com/a/13636954 and + # https://docs.github.com/en/repositories/working-with-files/using-files/downloading-source-code-archives#source-code-archive-urls + repo_zipball = f"https://github.com/{fullrepo}/archive/{ref}.zip" + logger.info(f"Downloading {fullrepo} {ref}") + repo_zip = ZipFile(download_file(repo_zipball)) - with TemporaryDirectory() as d: - repo_zip.extractall(d) - shutil.move(os.path.join(d, repo + "-main"), dest) + _user, repo = fullrepo.split("/", 1) + # If the tag name began with a "v" and looked like a version (i.e. has a + # digit immediately afterwards), the "v" is stripped by GitHub. We have + # to match this behaviour to get the correct name of the top-level + # directory within the zip file + if re.match(r"^v\d", ref): + ref = ref[1:] + with TemporaryDirectory() as temp_dir: + repo_zip.extractall(temp_dir) + shutil.move(os.path.join(temp_dir, f"{repo}-{ref}"), dest)