Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CLDR-17014 No code fallbacks for language paths #4254

Draft
wants to merge 6 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion common/testData/localeIdentifiers/localeDisplayName.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2368,7 +2368,7 @@ zh-Hans-fonipa; zh (Hans, FONIPA)

en-MM; en (MM)
es; es
es-419; es_419
es-419; es (419)
es-Cyrl-MX; es (Cyrl, MX)
hi-Latn; hi (Latn)
nl-BE; nl (BE)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -595,6 +595,10 @@ public String getStringValue(String xpath) {
result = dataSource.getValueAtPath(fallbackPath);
}
}
// Note: the following can occur even when result != null at this point, and it can
// improve the result.For example, the code above may give "zh_Hans (FONIPA)", while the
// constructed value gotten below is "xitoy [soddalashgan] (FONIPA)" (in locale uz),
// which is expected by TestLocaleDisplay.
if (isResolved()
&& GlossonymConstructor.valueIsBogus(result)
&& GlossonymConstructor.pathIsEligible(xpath)) {
Expand Down Expand Up @@ -3148,6 +3152,10 @@ public Set<String> getRawExtraPaths() {
private List<String> getRawExtraPathsPrivate() {
Set<String> toAddTo = new HashSet<>();
SupplementalDataInfo supplementalData = CLDRConfig.getInstance().getSupplementalDataInfo();

// languages
ExtraPaths.appendLanguages(toAddTo);

// units
PluralInfo plurals = supplementalData.getPlurals(PluralType.cardinal, getLocaleID());
if (plurals == null && DEBUG) {
Expand Down Expand Up @@ -3527,7 +3535,7 @@ public String getFillInValue(String distinguishedPath) {
public boolean isNotRoot(String distinguishedPath) {
String source = getSourceLocaleID(distinguishedPath, null);
return source != null
&& !source.equals("root")
&& !source.equals(LocaleNames.ROOT)
&& !source.equals(XMLSource.CODE_FALLBACK_ID);
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
package org.unicode.cldr.util;

import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.TreeSet;

public class ExtraPaths {

private static final Collection<String> languagePaths = new HashSet<>();

static void appendLanguages(Collection<String> toAddTo) {
if (languagePaths.isEmpty()) {
populateLanguagePaths();
}
toAddTo.addAll(languagePaths);
}

private static void populateLanguagePaths() {
Set<String> codes = new TreeSet<>();
StandardCodes sc = StandardCodes.make();
codes.addAll(sc.getGoodAvailableCodes(StandardCodes.CodeType.language));
codes.remove(LocaleNames.ROOT);
codes.addAll(
List.of(
"ar_001", "de_AT", "de_CH", "en_AU", "en_CA", "en_GB", "en_US", "es_419",
"es_ES", "es_MX", "fa_AF", "fr_CA", "fr_CH", "frc", "hi_Latn", "lou",
"nds_NL", "nl_BE", "pt_BR", "pt_PT", "ro_MD", "sw_CD", "zh_Hans",
"zh_Hant"));
for (String code : codes) {
languagePaths.add(NameType.LANGUAGE.getKeyPath(code));
}
languagePaths.add(languageAltPath("en_GB", "short"));
languagePaths.add(languageAltPath("en_US", "short"));
languagePaths.add(languageAltPath("az", "short"));
languagePaths.add(languageAltPath("ckb", "menu"));
languagePaths.add(languageAltPath("ckb", "variant"));
languagePaths.add(languageAltPath("hi_Latn", "variant"));
languagePaths.add(languageAltPath("yue", "menu"));
languagePaths.add(languageAltPath("zh", "menu"));
languagePaths.add(languageAltPath("zh_Hans", "long"));
languagePaths.add(languageAltPath("zh_Hant", "long"));
}

private static String languageAltPath(String code, String alt) {
String fullpath = NameType.LANGUAGE.getKeyPath(code);
// Insert the @alt= string after the last occurrence of "]"
StringBuilder fullpathBuf = new StringBuilder(fullpath);
return fullpathBuf
.insert(fullpathBuf.lastIndexOf("]") + 1, "[@alt=\"" + alt + "\"]")
.toString();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -1604,7 +1604,6 @@ public String getLocaleID() {
Map<String, String> zone_countries = sc.getZoneToCountry();
List<NameType> nameTypeList =
List.of(
NameType.LANGUAGE,
NameType.SCRIPT,
NameType.TERRITORY,
NameType.VARIANT,
Expand All @@ -1628,36 +1627,10 @@ public String getLocaleID() {
if (s != null && s.size() == 1) continue;
}
value = TimezoneFormatter.getFallbackName(value);
} else if (nameType == NameType.LANGUAGE) {
if (ROOT_ID.equals(value)) {
continue;
}
}
addFallbackCode(nameType, code, value);
}
}

String[] extraCodes = {
"ar_001", "de_AT", "de_CH", "en_AU", "en_CA", "en_GB", "en_US", "es_419", "es_ES",
"es_MX", "fa_AF", "fr_CA", "fr_CH", "frc", "hi_Latn", "lou", "nds_NL", "nl_BE",
"pt_BR", "pt_PT", "ro_MD", "sw_CD", "zh_Hans", "zh_Hant"
};
for (String extraCode : extraCodes) {
addFallbackCode(NameType.LANGUAGE, extraCode, extraCode);
}

addFallbackCode(NameType.LANGUAGE, "en_GB", "en_GB", "short");
addFallbackCode(NameType.LANGUAGE, "en_US", "en_US", "short");
addFallbackCode(NameType.LANGUAGE, "az", "az", "short");

addFallbackCode(NameType.LANGUAGE, "ckb", "ckb", "menu");
addFallbackCode(NameType.LANGUAGE, "ckb", "ckb", "variant");
addFallbackCode(NameType.LANGUAGE, "hi_Latn", "hi_Latn", "variant");
addFallbackCode(NameType.LANGUAGE, "yue", "yue", "menu");
addFallbackCode(NameType.LANGUAGE, "zh", "zh", "menu");
addFallbackCode(NameType.LANGUAGE, "zh_Hans", "zh", "long");
addFallbackCode(NameType.LANGUAGE, "zh_Hant", "zh", "long");

addFallbackCode(NameType.SCRIPT, "Hans", "Hans", "stand-alone");
addFallbackCode(NameType.SCRIPT, "Hant", "Hant", "stand-alone");

Expand Down Expand Up @@ -1742,9 +1715,7 @@ private static void addFallbackCode(
NameType nameType, String code, String value, String alt) {
String fullpath = nameType.getKeyPath(code);
String distinguishingPath = addFallbackCodeToConstructedItems(fullpath, value, alt);
if (nameType == NameType.LANGUAGE
|| nameType == NameType.SCRIPT
|| nameType == NameType.TERRITORY) {
if (nameType == NameType.SCRIPT || nameType == NameType.TERRITORY) {
allowDuplicates.put(distinguishingPath, code);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,8 @@
path.contains("/metazone")
|| path.contains("/timeZoneNames")
|| path.contains("/gender")
|| path.startsWith(
"//ldml/localeDisplayNames/languages/language")
|| path.startsWith("//ldml/numbers/currencies/currency")
|| path.startsWith("//ldml/personNames/sampleName")
|| path.contains("/availableFormats")
Expand Down Expand Up @@ -914,7 +916,7 @@
}
String value = swissHighGerman.getStringValue(xpath);
if (value != null && value.indexOf('ß') >= 0) {
warnln("«" + value + "» contains ß at " + xpath);

Check warning on line 919 in tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestCLDRFile.java

View workflow job for this annotation

GitHub Actions / build

(TestCLDRFile.java:919) Warning: «Deko | Emotion | Gefühl | Herz | Herzdekoration | Liebe | lila | violett | weiß» contains ß at //ldml/annotations/annotation[@cp="💟"]

Check warning on line 919 in tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestCLDRFile.java

View workflow job for this annotation

GitHub Actions / build

(TestCLDRFile.java:919) Warning: «Emotion | Gefühl | Herz | Herzen | Hochzeitstag | Jahrestag | kreisen | Liebe | süß» contains ß at //ldml/annotations/annotation[@cp="💞"]

Check warning on line 919 in tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestCLDRFile.java

View workflow job for this annotation

GitHub Actions / build

(TestCLDRFile.java:919) Warning: «alt | älter | erwachsen | Großeltern | Mensch | Person | weise» contains ß at //ldml/annotations/annotation[@cp="🧓"]

Check warning on line 919 in tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestCLDRFile.java

View workflow job for this annotation

GitHub Actions / build

(TestCLDRFile.java:919) Warning: «aquamarin | Emotion | gefallen | gefällt | Gefühl | hdl | hellblau | Herz | Liebe | mag | mögen | niedlich | süß | türkis | zyan» contains ß at //ldml/annotations/annotation[@cp="🩵"]

Check warning on line 919 in tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestCLDRFile.java

View workflow job for this annotation

GitHub Actions / build

(TestCLDRFile.java:919) Warning: «Gesicht | keine Ahnung | Smiley | traurig | verwirrt | verwundert | weiß nicht» contains ß at //ldml/annotations/annotation[@cp="😕"]
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
import org.unicode.cldr.util.LocaleNames;
import org.unicode.cldr.util.LogicalGrouping;
import org.unicode.cldr.util.LogicalGrouping.PathType;
import org.unicode.cldr.util.NameGetter;
import org.unicode.cldr.util.NameType;
import org.unicode.cldr.util.Organization;
import org.unicode.cldr.util.PathHeader;
Expand Down Expand Up @@ -1060,8 +1061,9 @@ private String stringForm(Level level2) {
}
}

public void testLSR() {
public void testLSR() { // LSR = Language/Script/Region
SupplementalDataInfo supplementalData = testInfo.getSupplementalDataInfo();

org.unicode.cldr.util.Factory factory = testInfo.getCldrFactory();
CLDRFile root = factory.make(LocaleNames.ROOT, true);
CoverageLevel2 coverageLevel =
Expand All @@ -1073,7 +1075,7 @@ public void testLSR() {

// Get root LSR codes

for (String path : root) {
for (String path : root.fullIterable()) {
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

without fullIterable, the loop skipped the extra paths

this may be a concern in general: how often do we loop through a CLDRFile the simple way, which skips extra paths, and what are the consequences and rationale for that?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point.

I think in most cases we want to iterate through the extra paths. So we might want make the iteration choice explicit, so that we can check to make sure. Maybe refactor (in separate ticket/PR) as follows.

for (String path : cldrFile) { // make this fail

for (String path : cldrFile.withoutExtras()) { // make this do what the line above did

for (String path : cldrFile.withExtras()) { // new name for fullIterable

Then we can search for the withoutExtras() calls and make sure that they are right.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The TestVettingDataDriven failure may be related to this. Part of that test involves writing the CLDRFile to a temporary XML file and then reading it back to verify the newly voted-on value, and that's where it's failing. It looks like the language paths, which are now extra paths, don't get written to the file. To test that, I'm starting a new branch from main that will just have a few lines added to TestSTFactory.xml...

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See #4256

If the same failure happens there that I see locally, it seems to indicate a long-existing bug where CldrXmlWriter doesn't write extra paths

if (!path.startsWith("//ldml/localeDisplayNames/")) {
continue;
}
Expand Down Expand Up @@ -1141,6 +1143,7 @@ public void testLSR() {
NameType.TERRITORY,
Row.of("region", regions, regionsRoot, Level.MODERATE));

NameGetter englishNameGetter = testInfo.getEnglish().nameGetter();
for (Entry<NameType, R4<String, Map<String, Level>, Set<String>, Level>> typeAndInfo :
typeToInfo.entrySet()) {
NameType type = typeAndInfo.getKey();
Expand All @@ -1152,8 +1155,7 @@ public void testLSR() {
typeAndInfo.getValue().get3(); // it looks like the targetLevel is ignored

for (String code : Sets.union(idPartMap.keySet(), setRoot)) {
String displayName =
testInfo.getEnglish().nameGetter().getNameFromTypeEnumCode(type, code);
String displayName = englishNameGetter.getNameFromTypeEnumCode(type, code);
String path = type.getKeyPath(code);
Level level = coverageLevel.getLevel(path);
data.put(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -270,8 +270,6 @@ public void testGetPaths() {
List<LocaleInheritanceInfo> pwf = f.getPathsWhereFound(p);
assertEquals(
List.of(
new LocaleInheritanceInfo(
XMLSource.CODE_FALLBACK_ID, GERMAN, Reason.constructed),
new LocaleInheritanceInfo(
XMLSource.ROOT_ID,
"//ldml/localeDisplayNames/localeDisplayPattern/localePattern",
Expand Down Expand Up @@ -321,8 +319,6 @@ public void testGetPaths() {
List<LocaleInheritanceInfo> pwf = f.getPathsWhereFound(p);
assertEquals(
List.of(
new LocaleInheritanceInfo(
XMLSource.CODE_FALLBACK_ID, GERMAN, Reason.constructed),
new LocaleInheritanceInfo(
XMLSource
.CODE_FALLBACK_ID /* test data does not have this in root */,
Expand Down
Loading