Skip to content

Commit

Permalink
Merge pull request #771 from googlefonts/issue-770
Browse files Browse the repository at this point in the history
[glyphdata] handle production names for ligatures with script suffixes
  • Loading branch information
khaledhosny authored Aug 20, 2023
2 parents 9066b26 + 7a3271b commit 273496b
Show file tree
Hide file tree
Showing 3 changed files with 270 additions and 13 deletions.
62 changes: 50 additions & 12 deletions Lib/glyphsLib/glyphdata.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,22 +196,16 @@ def _construct_category(glyph_name, data):
# Glyph variants (e.g. "fi.alt") don't have their own entry, so we strip e.g. the
# ".alt" and try a second lookup with just the base name. A variant is hopefully in
# the same category as its base glyph.
base_name = glyph_name.split(".", 1)[0]
base_attribute = data.names.get(base_name) or {}
base_name = _split_glyph_name(glyph_name, data)[0]
base_attribute = _lookup_attributes(base_name, data) or {}
if base_attribute:
category = base_attribute.get("category")
sub_category = base_attribute.get("subCategory")
return category, sub_category

# Detect ligatures.
if "_" in base_name:
base_names = base_name.split("_")
# The last name has a suffix, add it to all the names.
if "-" in base_names[-1]:
_, s = base_names[-1].rsplit("-", 1)
base_names = [
(n if n.endswith(f"-{s}") else f"{n}-{s}") for n in base_names
]
base_names = _split_ligature_glyph_name(base_name, data)
base_names_attributes = [_lookup_attributes(name, data) for name in base_names]
first_attribute = base_names_attributes[0]

Expand Down Expand Up @@ -300,6 +294,50 @@ def _translate_category(glyph_name, unicode_category):
return glyphs_category


def _split_ligature_glyph_name(name, data):
# Split name to ligature parts
parts = name.split("_")

# If the last part has a script suffix, strip it and re-split the name.
if "-" in parts[-1]:
base, script = name.rsplit("-", 1)
parts = base.split("_")

# If there is more than one part, try adding the script suffix to each
# part, if this results in a known glyph name, use it as the part name.
if len(parts) > 1:
for i, part in enumerate(parts):
new = f"{part}-{script}"
# If the part already has a script suffix, keep it unchanged.
if "-" in part:
continue
# If the non suffixed name exists and the suffixed name does
# not exist, keep the part name unchanged.
if _lookup_attributes(part, data) and not _lookup_attributes(new, data):
continue
parts[i] = new
else:
parts = name.split("_")
return parts


def _split_glyph_name(name, data):
# Split glyph name into base and suffix
base, dot, suffix = name.partition(".")

# If there are more than one suffix (e.g. ".below.ro"), try adding each
# suffix to the base name, if it results in a known glyph name, use that as
# base name.
if dot and dot in suffix:
suffixes = suffix.split(dot)
new = base
while suffixes:
new += dot + suffixes.pop(0)
if _lookup_attributes(new, data):
return new, dot, dot.join(suffixes)
return base, dot, suffix


def _construct_production_name(glyph_name, data=None):
"""Return the production name for a glyph name from the GlyphData.xml
database according to the AGL specification.
Expand All @@ -321,7 +359,7 @@ def _construct_production_name(glyph_name, data=None):

# At this point, we have already checked the data for the full glyph name, so
# directly go to the base name here (e.g. when looking at "fi.alt").
base_name, dot, suffix = glyph_name.partition(".")
base_name, dot, suffix = _split_glyph_name(glyph_name, data)
glyphinfo = _lookup_attributes(base_name, data)
if glyphinfo and glyphinfo.get("production"):
# Found the base glyph.
Expand All @@ -339,7 +377,7 @@ def _construct_production_name(glyph_name, data=None):

# So we have a ligature that is not mapped in the data. Split it up and
# look up the individual parts.
base_name_parts = base_name.split("_")
base_name_parts = _split_ligature_glyph_name(base_name, data)

# If all parts are in the AGLFN list, the glyph name is our production
# name already.
Expand All @@ -354,7 +392,7 @@ def _construct_production_name(glyph_name, data=None):
# A name present in the AGLFN is a production name already.
production_names.append(part)
else:
part_entry = data.names.get(part) or {}
part_entry = _lookup_attributes(part, data) or {}
part_production_name = part_entry.get("production")
if part_production_name:
production_names.append(part_production_name)
Expand Down
2 changes: 1 addition & 1 deletion requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ fs==2.4.16
# via fonttools
iniconfig==1.1.1
# via pytest
lxml==4.9.1
lxml==4.9.3
# via xmldiff
mccabe==0.7.0
# via flake8
Expand Down
219 changes: 219 additions & 0 deletions tests/glyphdata_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
import unittest
import xml.etree.ElementTree

import pytest

from glyphsLib.glyphdata import get_glyph


Expand Down Expand Up @@ -81,6 +83,7 @@ def prod(n):
self.assertEqual(prod("vaphalaa-malayalam"), "uni0D030D35.1")
self.assertEqual(prod("onethird"), "uni2153")
self.assertEqual(prod("Jacute"), "uni004A0301")
self.assertEqual(prod("Ech_Vew-arm.liga"), "uni0535054E.liga")

def test_unicode(self):
def uni(n):
Expand Down Expand Up @@ -190,5 +193,221 @@ def test_glyphdata_no_duplicates(self):
production_names.add(glyph_name_production)


# Testing more production names separately because parameterizing is easier.
PRODUCTION_NAMES = {
"Ech_Vew-arm.liga": "uni0535054E.liga",
"Men_Ech-arm.liga": "uni05440535.liga",
"Men_Ini-arm.liga": "uni0544053B.liga",
"Men_Now-arm.liga": "uni05440546.liga",
"Men_Xeh-arm.liga": "uni0544053D.liga",
"Vew_Now-arm.liga": "uni054E0546.liga",
"aiMatra_anusvara-deva": "uni09480902",
"aiMatra_candraBindu-deva": "uni09480901",
"aiMatra_reph-deva": "uni09480930094D",
"aiMatra_reph_anusvara-deva": "uni09480930094D0902",
"ca_iMatra-tamil": "uni0B9A0BBF",
"ca_uMatra-tamil": "uni0B9A0BC1",
"ca_uuMatra-tamil": "uni0B9A0BC2",
"ch_ya-deva": "uni091B094D092F",
"d_ba-deva": "uni0926094D092C",
"d_bha-deva": "uni0926094D092D",
"d_da-deva": "uni0926094D0926",
"d_dh_ya-deva": "uni0926094D0927094D092F",
"d_dha-deva": "uni0926094D0927",
"d_ga-deva": "uni0926094D0917",
"d_gha-deva": "uni0926094D0918",
"d_ma-deva": "uni0926094D092E",
"d_ra-deva": "uni0926094D0930",
"d_va-deva": "uni0926094D0935",
"d_ya-deva": "uni0926094D092F",
"da-khmer.below.ro": "uni17D2178A.ro",
"da_rVocalicMatra-deva": "uni09260943",
"da_uMatra-deva": "uni09260941",
"da_uuMatra-deva": "uni09260942",
"dd_dda-deva": "uni0921094D0921",
"dd_ddha-deva": "uni0921094D0922",
"dd_ya-deva": "uni0921094D092F",
"ddh_ddha-deva": "uni0922094D0922",
"ddh_ya-deva": "uni0922094D092F",
"eCandraMatra_anusvara-deva": "uni09450902",
"eCandraMatra_reph-deva": "uni09450930094D",
"eMatra_anusvara-deva": "uni09470902",
"eMatra_candraBindu-deva": "uni09470901",
"eMatra_reph-deva": "uni09470930094D",
"eMatra_reph_anusvara-deva": "uni09470930094D0902",
"eShortMatra_anusvara-deva": "uni09460902",
"eShortMatra_candraBindu-deva": "uni09460901",
"eShortMatra_reph-deva": "uni09460930094D",
"eShortMatra_reph_anusvara-deva": "uni09460930094D0902",
"ech_vew-arm.liga.sc": "uni0565057E.liga.sc",
"finalkaf_qamats-hb": "uni05DA05B8",
"finalkaf_sheva-hb": "uni05DA05B0",
"finalkafdagesh_qamats-hb": "uniFB3A05B8",
"finalkafdagesh_sheva-hb": "uniFB3A05B0",
"h_la-deva": "uni0939094D0932",
"h_ma-deva": "uni0939094D092E",
"h_na-deva": "uni0939094D0928",
"h_nna-deva": "uni0939094D0923",
"h_ra-deva": "uni0939094D0930",
"h_ra_uMatra-deva": "uni0939094D09300941",
"h_ra_uuMatra-deva": "uni0939094D09300942",
"h_va-deva": "uni0939094D0935",
"h_ya-deva": "uni0939094D092F",
"ha_iMatra-tamil": "uni0BB90BBF",
"ha_iiMatra-tamil": "uni0BB90BC0",
"ha_rVocalicMatra-deva": "uni09390943",
"ha_rrVocalicMatra-deva": "uni09390944",
"ha_uMatra-deva": "uni09390941",
"ha_uMatra-tamil": "uni0BB90BC1",
"ha_uuMatra-deva": "uni09390942",
"ha_uuMatra-tamil": "uni0BB90BC2",
"hatafpatah_siluqleft-hb": "uni05B205BD",
"hatafqamats_siluqleft-hb": "uni05B305BD",
"hatafsegol_siluqleft-hb": "uni05B105BD",
"iMark_toandakhiat-khmer": "uni17B717CD",
"iMark_toandakhiat-khmer.narrow": "uni17B717CD.narrow",
"idotaccent.sc": "i.loclTRK.sc", # i.sc.loclTRK
"iiMatra_reph-deva": "uni09400930094D",
"iiMatra_reph-deva.alt2": "uni09400930094D.alt2",
"iiMatra_reph_anusvara-deva": "uni09400930094D0902",
"iiMatra_reph_anusvara-deva.alt2": "uni09400930094D0902.alt2",
"j_ny-deva": "uni091C094D091E094D",
"j_ny-deva.alt2": "uni091C094D091E094D.alt2",
"j_ny-deva.alt3": "uni091C094D091E094D.alt3",
"j_ny-deva.alt4": "uni091C094D091E094D.alt4",
"j_ny-deva.alt5": "uni091C094D091E094D.alt5",
"j_ny-deva.alt6": "uni091C094D091E094D.alt6",
"j_ny-deva.alt7": "uni091C094D091E094D.alt7",
"j_ny-deva.alt8": "uni091C094D091E094D.alt8",
"j_nya-deva": "uni091C094D091E",
"ja_iMatra-tamil": "uni0B9C0BBF",
"ja_iiMatra-tamil": "uni0B9C0BC0",
"k_ss-deva": "uni0915094D0937094D",
"k_ss-deva.alt2": "uni0915094D0937094D.alt2",
"k_ss-deva.alt3": "uni0915094D0937094D.alt3",
"k_ss-deva.alt4": "uni0915094D0937094D.alt4",
"k_ss-deva.alt5": "uni0915094D0937094D.alt5",
"k_ss-deva.alt6": "uni0915094D0937094D.alt6",
"k_ss-deva.alt7": "uni0915094D0937094D.alt7",
"k_ssa-deva": "uni0915094D0937",
"k_ssa-tamil": "uni0B950BCD0BB7",
"k_ssa_iMatra-tamil": "uni0B950BCD0BB70BBF",
"k_ssa_iiMatra-tamil": "uni0B950BCD0BB70BC0",
"k_ssa_uMatra-tamil": "uni0B950BCD0BB70BC1",
"k_ssa_uuMatra-tamil": "uni0B950BCD0BB70BC2",
"ka_iMatra-tamil": "uni0B950BBF",
"ka_uMatra-tamil": "uni0B950BC1",
"ka_uuMatra-tamil": "uni0B950BC2",
"la_iMatra-tamil": "uni0BB20BBF",
"la_iiMatra-tamil": "uni0BB20BC0",
"la_uMatra-tamil": "uni0BB20BC1",
"la_uuMatra-tamil": "uni0BB20BC2",
"lamed_dagesh_holam-hb": "uni05DC05BC05B9",
"lamed_holam-hb": "uni05DC05B9",
"lla_uMatra-tamil": "uni0BB30BC1",
"lla_uuMatra-tamil": "uni0BB30BC2",
"llla_iMatra-tamil": "uni0BB40BBF",
"llla_iiMatra-tamil": "uni0BB40BC0",
"llla_uMatra-tamil": "uni0BB40BC1",
"llla_uuMatra-tamil": "uni0BB40BC2",
"ma_iMatra-tamil": "uni0BAE0BBF",
"ma_iiMatra-tamil": "uni0BAE0BC0",
"ma_uMatra-tamil": "uni0BAE0BC1",
"ma_uuMatra-tamil": "uni0BAE0BC2",
"mo-khmer.below.ro": "uni17D21798.ro",
"moMa_underscore-thai": "uni0E21005F", # uni0E21_uni005F
"na_iMatra-tamil": "uni0BA80BBF",
"na_uMatra-tamil": "uni0BA80BC1",
"na_uuMatra-tamil": "uni0BA80BC2",
"ng_ya-deva": "uni0919094D092F",
"nga_uMatra-tamil": "uni0B990BC1",
"nga_uuMatra-tamil": "uni0B990BC2",
"ngoNgu_underscore-thai": "uni0E07005F", # uni0E07_uni005F
"niggahita_maiCatawa-lao": "uni0ECD0ECB",
"niggahita_maiCatawa-lao.right": "uni0ECD0ECB.right",
"niggahita_maiEk-lao": "uni0ECD0EC8",
"niggahita_maiEk-lao.right": "uni0ECD0EC8.right",
"niggahita_maiTho-lao": "uni0ECD0EC9",
"niggahita_maiTho-lao.right": "uni0ECD0EC9.right",
"niggahita_maiTi-lao": "uni0ECD0ECA",
"niggahita_maiTi-lao.right": "uni0ECD0ECA.right",
"nikhahit_maiChattawa-thai": "uni0E4D0E4B",
"nikhahit_maiChattawa-thai.narrow": "uni0E4D0E4B.narrow",
"nikhahit_maiEk-thai": "uni0E4D0E48",
"nikhahit_maiEk-thai.narrow": "uni0E4D0E48.narrow",
"nikhahit_maiTho-thai": "uni0E4D0E49",
"nikhahit_maiTho-thai.narrow": "uni0E4D0E49.narrow",
"nikhahit_maiTri-thai": "uni0E4D0E4A",
"nikhahit_maiTri-thai.narrow": "uni0E4D0E4A.narrow",
"nna_uMatra-tamil": "uni0BA30BC1",
"nna_uuMatra-tamil": "uni0BA30BC2",
"nnna_uMatra-tamil": "uni0BA90BC1",
"nnna_uuMatra-tamil": "uni0BA90BC2",
"nno-khmer.below.narrow1": "uni17D2178E.narrow1",
"nno-khmer.below.narrow2": "uni17D2178E.narrow2",
"noNu_underscore-thai": "uni0E19005F", # uni0E19_uni005F
"nya_iMatra-tamil": "uni0B9E0BBF",
"nya_uMatra-tamil": "uni0B9E0BC1",
"nya_uuMatra-tamil": "uni0B9E0BC2",
"nyo-khmer.full.below.narrow": "uni17D21789.full.below.narrow",
"p_ta-deva": "uni092A094D0924",
"pa_uMatra-tamil": "uni0BAA0BC1",
"pa_uuMatra-tamil": "uni0BAA0BC2",
"pho-khmer.below.ro": "uni17D21797.ro",
"po-khmer.below.ro": "uni17D21796.ro",
"ra_uMatra-deva": "uni09300941",
"ra_uMatra-tamil": "uni0BB00BC1",
"ra_uuMatra-deva": "uni09300942",
"ra_uuMatra-tamil": "uni0BB00BC2",
"reph_anusvara-deva": "uni0930094D0902",
"ro-khmer.pre.narrow": "uni17D2179A.narrow",
"rra_iMatra-tamil": "uni0BB10BBF",
"rra_iiMatra-tamil": "uni0BB10BC0",
"rra_uMatra-tamil": "uni0BB10BC1",
"rra_uuMatra-tamil": "uni0BB10BC2",
"sa_iMatra-tamil": "uni0BB80BBF",
"sa_iiMatra-tamil": "uni0BB80BC0",
"sa_uMatra-tamil": "uni0BB80BC1",
"sa_uuMatra-tamil": "uni0BB80BC2",
"sh_r-deva": "uni0936094D094D0930", # uni0936094D0930094D
"sh_ra-deva": "uni0936094D0930",
"sh_ra_iiMatra-tamil": "uni0BB60BCD0BB00BC0",
"ss_tta-deva": "uni0937094D091F",
"ss_ttha-deva": "uni0937094D0920",
"ssa_iMatra-tamil": "uni0BB70BBF",
"ssa_iiMatra-tamil": "uni0BB70BC0",
"ssa_uMatra-tamil": "uni0BB70BC1",
"ssa_uuMatra-tamil": "uni0BB70BC2",
"t_r-deva": "uni0924094D094D0930", # uni0924094D0930094D
"t_ra-deva": "uni0924094D0930",
"t_ta-deva": "uni0924094D0924",
"ta-khmer.below.ro": "uni17D2178F.ro",
"ta_iMatra-tamil": "uni0BA40BBF",
"ta_uMatra-tamil": "uni0BA40BC1",
"ta_uuMatra-tamil": "uni0BA40BC2",
"tt_tta-deva": "uni091F094D091F",
"tt_ttha-deva": "uni091F094D0920",
"tt_ya-deva": "uni091F094D092F",
"tta_iMatra-tamil": "uni0B9F0BBF",
"tta_uMatra-tamil": "uni0B9F0BC1",
"tta_uuMatra-tamil": "uni0B9F0BC2",
"tth_ttha-deva": "uni0920094D0920",
"tth_ya-deva": "uni0920094D092F",
"va_uMatra-tamil": "uni0BB50BC1",
"va_uuMatra-tamil": "uni0BB50BC2",
"ya_uMatra-tamil": "uni0BAF0BC1",
"ya_uuMatra-tamil": "uni0BAF0BC2",
"yoYing_underscore-thai": "uni0E0D005F", # uni0E0D_uni005F
}


@pytest.mark.parametrize("test_input,expected", PRODUCTION_NAMES.items())
def test_prod_names(test_input, expected):
def prod(n):
return get_glyph(n).production_name

assert prod(test_input) == expected


if __name__ == "__main__":
unittest.main()

0 comments on commit 273496b

Please sign in to comment.