Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CLDR-17014 No code fallbacks for language/script paths #4290

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion common/testData/localeIdentifiers/localeDisplayName.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2366,7 +2366,7 @@ zh-Hans-fonipa; zh (Hans, FONIPA)

en-MM; en (MM)
es; es
es-419; es_419
es-419; es (419)
es-Cyrl-MX; es (Cyrl, MX)
hi-Latn; hi (Latn)
nl-BE; nl (BE)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -595,6 +595,10 @@ public String getStringValue(String xpath) {
result = dataSource.getValueAtPath(fallbackPath);
}
}
// Note: the following can occur even when result != null at this point, and it can
// improve the result.For example, the code above may give "zh_Hans (FONIPA)", while the
// constructed value gotten below is "xitoy [soddalashgan] (FONIPA)" (in locale uz),
// which is expected by TestLocaleDisplay.
if (isResolved()
&& GlossonymConstructor.valueIsBogus(result)
&& GlossonymConstructor.pathIsEligible(xpath)) {
Expand Down Expand Up @@ -3148,6 +3152,10 @@ public Set<String> getRawExtraPaths() {
private List<String> getRawExtraPathsPrivate() {
Set<String> toAddTo = new HashSet<>();
SupplementalDataInfo supplementalData = CLDRConfig.getInstance().getSupplementalDataInfo();

ExtraPaths.getInstance(NameType.LANGUAGE).append(toAddTo);
ExtraPaths.getInstance(NameType.SCRIPT).append(toAddTo);

// units
PluralInfo plurals = supplementalData.getPlurals(PluralType.cardinal, getLocaleID());
if (plurals == null && DEBUG) {
Expand Down Expand Up @@ -3527,7 +3535,7 @@ public String getFillInValue(String distinguishedPath) {
public boolean isNotRoot(String distinguishedPath) {
String source = getSourceLocaleID(distinguishedPath, null);
return source != null
&& !source.equals("root")
&& !source.equals(LocaleNames.ROOT)
&& !source.equals(XMLSource.CODE_FALLBACK_ID);
}

Expand Down
100 changes: 100 additions & 0 deletions tools/cldr-code/src/main/java/org/unicode/cldr/util/ExtraPaths.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
package org.unicode.cldr.util;

import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;

public class ExtraPaths {

private static final Map<NameType, ExtraPaths> instances = new HashMap<>();

public static ExtraPaths getInstance(NameType nameType) {
return instances.computeIfAbsent(nameType, ExtraPaths::new);
}

private final NameType nameType;
private final Collection<String> paths;

private ExtraPaths(NameType nameType) {
this.nameType = nameType;
paths = new HashSet<>();
}

void append(Collection<String> toAddTo) {
if (paths.isEmpty()) {
populatePaths();
}
toAddTo.addAll(paths);
}

private void populatePaths() {
// TODO: https://unicode-org.atlassian.net/browse/CLDR-17014
// StandardCodes.CodeType codeType = StandardCodes.CodeType.fromNameType(nameType);
// See https://github.com/unicode-org/cldr/pull/4287
StandardCodes.CodeType codeType;
switch (nameType) {
case LANGUAGE:
codeType = StandardCodes.CodeType.language;
break;
case SCRIPT:
codeType = StandardCodes.CodeType.script;
break;
default:
throw new IllegalArgumentException("TODO: CodeType.fromNameType");
}
StandardCodes sc = StandardCodes.make();
Set<String> codes = new TreeSet<>(sc.getGoodAvailableCodes(codeType));
adjustCodeSet(codes);
for (String code : codes) {
paths.add(nameType.getKeyPath(code));
}
addAltPaths();
}

private void adjustCodeSet(Set<String> codes) {
if (nameType == NameType.LANGUAGE) {
codes.remove(LocaleNames.ROOT);
codes.addAll(
List.of(
"ar_001", "de_AT", "de_CH", "en_AU", "en_CA", "en_GB", "en_US",
"es_419", "es_ES", "es_MX", "fa_AF", "fr_CA", "fr_CH", "frc", "hi_Latn",
"lou", "nds_NL", "nl_BE", "pt_BR", "pt_PT", "ro_MD", "sw_CD", "zh_Hans",
"zh_Hant"));
}
}

private void addAltPaths() {
switch (nameType) {
case LANGUAGE:
addAltPath("en_GB", "short");
addAltPath("en_US", "short");
addAltPath("az", "short");
addAltPath("ckb", "menu");
addAltPath("ckb", "variant");
addAltPath("hi_Latn", "variant");
addAltPath("yue", "menu");
addAltPath("zh", "menu");
addAltPath("zh_Hans", "long");
addAltPath("zh_Hant", "long");
break;
case SCRIPT:
addAltPath("Hans", "stand-alone");
addAltPath("Hant", "stand-alone");
}
}

private void addAltPath(String code, String alt) {
String fullpath = nameType.getKeyPath(code);
// Insert the @alt= string after the last occurrence of "]"
StringBuilder fullpathBuf = new StringBuilder(fullpath);
String altPath =
fullpathBuf
.insert(fullpathBuf.lastIndexOf("]") + 1, "[@alt=\"" + alt + "\"]")
.toString();
paths.add(altPath);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -1605,8 +1605,6 @@ public String getLocaleID() {
Map<String, String> zone_countries = sc.getZoneToCountry();
List<NameType> nameTypeList =
List.of(
NameType.LANGUAGE,
NameType.SCRIPT,
NameType.TERRITORY,
NameType.VARIANT,
NameType.CURRENCY,
Expand All @@ -1629,39 +1627,10 @@ public String getLocaleID() {
if (s != null && s.size() == 1) continue;
}
value = TimezoneFormatter.getFallbackName(value);
} else if (nameType == NameType.LANGUAGE) {
if (ROOT_ID.equals(value)) {
continue;
}
}
addFallbackCode(nameType, code, value);
}
}

String[] extraCodes = {
"ar_001", "de_AT", "de_CH", "en_AU", "en_CA", "en_GB", "en_US", "es_419", "es_ES",
"es_MX", "fa_AF", "fr_CA", "fr_CH", "frc", "hi_Latn", "lou", "nds_NL", "nl_BE",
"pt_BR", "pt_PT", "ro_MD", "sw_CD", "zh_Hans", "zh_Hant"
};
for (String extraCode : extraCodes) {
addFallbackCode(NameType.LANGUAGE, extraCode, extraCode);
}

addFallbackCode(NameType.LANGUAGE, "en_GB", "en_GB", "short");
addFallbackCode(NameType.LANGUAGE, "en_US", "en_US", "short");
addFallbackCode(NameType.LANGUAGE, "az", "az", "short");

addFallbackCode(NameType.LANGUAGE, "ckb", "ckb", "menu");
addFallbackCode(NameType.LANGUAGE, "ckb", "ckb", "variant");
addFallbackCode(NameType.LANGUAGE, "hi_Latn", "hi_Latn", "variant");
addFallbackCode(NameType.LANGUAGE, "yue", "yue", "menu");
addFallbackCode(NameType.LANGUAGE, "zh", "zh", "menu");
addFallbackCode(NameType.LANGUAGE, "zh_Hans", "zh", "long");
addFallbackCode(NameType.LANGUAGE, "zh_Hant", "zh", "long");

addFallbackCode(NameType.SCRIPT, "Hans", "Hans", "stand-alone");
addFallbackCode(NameType.SCRIPT, "Hant", "Hant", "stand-alone");

addFallbackCode(NameType.TERRITORY, "GB", "GB", "short");
addFallbackCode(NameType.TERRITORY, "HK", "HK", "short");
addFallbackCode(NameType.TERRITORY, "MO", "MO", "short");
Expand Down Expand Up @@ -1743,9 +1712,7 @@ private static void addFallbackCode(
NameType nameType, String code, String value, String alt) {
String fullpath = nameType.getKeyPath(code);
String distinguishingPath = addFallbackCodeToConstructedItems(fullpath, value, alt);
if (nameType == NameType.LANGUAGE
|| nameType == NameType.SCRIPT
|| nameType == NameType.TERRITORY) {
if (nameType == NameType.SCRIPT || nameType == NameType.TERRITORY) {
allowDuplicates.put(distinguishingPath, code);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,9 @@
path.contains("/metazone")
|| path.contains("/timeZoneNames")
|| path.contains("/gender")
|| path.startsWith(
"//ldml/localeDisplayNames/languages/language")
|| path.startsWith("//ldml/localeDisplayNames/scripts/script")
|| path.startsWith("//ldml/numbers/currencies/currency")
|| path.startsWith("//ldml/personNames/sampleName")
|| path.contains("/availableFormats")
Expand Down Expand Up @@ -914,7 +917,7 @@
}
String value = swissHighGerman.getStringValue(xpath);
if (value != null && value.indexOf('ß') >= 0) {
warnln("«" + value + "» contains ß at " + xpath);

Check warning on line 920 in tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestCLDRFile.java

View workflow job for this annotation

GitHub Actions / build

(TestCLDRFile.java:920) Warning: «Deko | Emotion | Gefühl | Herz | Herzdekoration | Liebe | lila | violett | weiß» contains ß at //ldml/annotations/annotation[@cp="💟"]

Check warning on line 920 in tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestCLDRFile.java

View workflow job for this annotation

GitHub Actions / build

(TestCLDRFile.java:920) Warning: «Emotion | Gefühl | Herz | Herzen | Hochzeitstag | Jahrestag | kreisen | Liebe | süß» contains ß at //ldml/annotations/annotation[@cp="💞"]

Check warning on line 920 in tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestCLDRFile.java

View workflow job for this annotation

GitHub Actions / build

(TestCLDRFile.java:920) Warning: «alt | älter | erwachsen | Großeltern | Mensch | Person | weise» contains ß at //ldml/annotations/annotation[@cp="🧓"]

Check warning on line 920 in tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestCLDRFile.java

View workflow job for this annotation

GitHub Actions / build

(TestCLDRFile.java:920) Warning: «aquamarin | Emotion | gefallen | gefällt | Gefühl | hdl | hellblau | Herz | Liebe | mag | mögen | niedlich | süß | türkis | zyan» contains ß at //ldml/annotations/annotation[@cp="🩵"]

Check warning on line 920 in tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestCLDRFile.java

View workflow job for this annotation

GitHub Actions / build

(TestCLDRFile.java:920) Warning: «Gesicht | keine Ahnung | Smiley | traurig | verwirrt | verwundert | weiß nicht» contains ß at //ldml/annotations/annotation[@cp="😕"]
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
import org.unicode.cldr.util.LocaleNames;
import org.unicode.cldr.util.LogicalGrouping;
import org.unicode.cldr.util.LogicalGrouping.PathType;
import org.unicode.cldr.util.NameGetter;
import org.unicode.cldr.util.NameType;
import org.unicode.cldr.util.Organization;
import org.unicode.cldr.util.PathHeader;
Expand Down Expand Up @@ -1060,8 +1061,9 @@ private String stringForm(Level level2) {
}
}

public void testLSR() {
public void testLSR() { // LSR = Language/Script/Region
SupplementalDataInfo supplementalData = testInfo.getSupplementalDataInfo();

org.unicode.cldr.util.Factory factory = testInfo.getCldrFactory();
CLDRFile root = factory.make(LocaleNames.ROOT, true);
CoverageLevel2 coverageLevel =
Expand All @@ -1073,7 +1075,7 @@ public void testLSR() {

// Get root LSR codes

for (String path : root) {
for (String path : root.fullIterable()) {
if (!path.startsWith("//ldml/localeDisplayNames/")) {
continue;
}
Expand Down Expand Up @@ -1141,6 +1143,7 @@ public void testLSR() {
NameType.TERRITORY,
Row.of("region", regions, regionsRoot, Level.MODERATE));

NameGetter englishNameGetter = testInfo.getEnglish().nameGetter();
for (Entry<NameType, R4<String, Map<String, Level>, Set<String>, Level>> typeAndInfo :
typeToInfo.entrySet()) {
NameType type = typeAndInfo.getKey();
Expand All @@ -1152,8 +1155,7 @@ public void testLSR() {
typeAndInfo.getValue().get3(); // it looks like the targetLevel is ignored

for (String code : Sets.union(idPartMap.keySet(), setRoot)) {
String displayName =
testInfo.getEnglish().nameGetter().getNameFromTypeEnumCode(type, code);
String displayName = englishNameGetter.getNameFromTypeEnumCode(type, code);
String path = type.getKeyPath(code);
Level level = coverageLevel.getLevel(path);
data.put(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -270,8 +270,6 @@ public void testGetPaths() {
List<LocaleInheritanceInfo> pwf = f.getPathsWhereFound(p);
assertEquals(
List.of(
new LocaleInheritanceInfo(
XMLSource.CODE_FALLBACK_ID, GERMAN, Reason.constructed),
new LocaleInheritanceInfo(
XMLSource.ROOT_ID,
"//ldml/localeDisplayNames/localeDisplayPattern/localePattern",
Expand Down Expand Up @@ -321,8 +319,6 @@ public void testGetPaths() {
List<LocaleInheritanceInfo> pwf = f.getPathsWhereFound(p);
assertEquals(
List.of(
new LocaleInheritanceInfo(
XMLSource.CODE_FALLBACK_ID, GERMAN, Reason.constructed),
new LocaleInheritanceInfo(
XMLSource
.CODE_FALLBACK_ID /* test data does not have this in root */,
Expand Down
Loading