From 642ec270ce41559c84dbe2f4ab465ead679cdef8 Mon Sep 17 00:00:00 2001 From: Sebastian Thomschke Date: Mon, 13 Jan 2025 14:43:29 +0100 Subject: [PATCH] fix: use human-friendly sorting in Outline --- .../internal/HumanFriendlyComparatorTest.java | 79 ++++++++ .../internal/HumanFriendlyComparator.java | 182 ++++++++++++++++++ .../eclipse/lsp4e/outline/OutlineSorter.java | 3 +- 3 files changed, 263 insertions(+), 1 deletion(-) create mode 100644 org.eclipse.lsp4e.test/src/org/eclipse/lsp4e/test/internal/HumanFriendlyComparatorTest.java create mode 100644 org.eclipse.lsp4e/src/org/eclipse/lsp4e/internal/HumanFriendlyComparator.java diff --git a/org.eclipse.lsp4e.test/src/org/eclipse/lsp4e/test/internal/HumanFriendlyComparatorTest.java b/org.eclipse.lsp4e.test/src/org/eclipse/lsp4e/test/internal/HumanFriendlyComparatorTest.java new file mode 100644 index 000000000..78b45a5e7 --- /dev/null +++ b/org.eclipse.lsp4e.test/src/org/eclipse/lsp4e/test/internal/HumanFriendlyComparatorTest.java @@ -0,0 +1,79 @@ +/******************************************************************************* + * Copyright (c) 2025 Sebastian Thomschke and others. + * This program and the accompanying materials are made + * available under the terms of the Eclipse Public License 2.0 + * which is available at https://www.eclipse.org/legal/epl-2.0/ + * + * SPDX-License-Identifier: EPL-2.0 + * + * Contributors: + * Sebastian Thomschke - initial implementation + *******************************************************************************/ +package org.eclipse.lsp4e.test.internal; + +import static org.junit.Assert.assertEquals; + +import java.text.Collator; +import java.util.Arrays; +import java.util.List; +import java.util.Locale; + +import org.eclipse.lsp4e.internal.HumanFriendlyComparator; +import org.junit.Test; + +public class HumanFriendlyComparatorTest { + + @Test + public void testNumericSorting() { + List input = Arrays.asList("file10.txt", "file2.txt", "file1.txt", "file100.txt"); + List expected = Arrays.asList("file1.txt", "file2.txt", "file10.txt", "file100.txt"); + + input.sort(HumanFriendlyComparator.DEFAULT); + + assertEquals(expected, input); + } + + @Test + public void testMixedTextAndNumbers() { + List input = Arrays.asList("a10b", "a2b", "a10a", "a2a"); + List expected = Arrays.asList("a2a", "a2b", "a10a", "a10b"); + + input.sort(HumanFriendlyComparator.DEFAULT); + + assertEquals(expected, input); + } + + @Test + public void testLocaleSpecificSorting() { + List input = Arrays.asList("ä2", "a10", "a2", "ä10", "á100", "A2"); + List expected = Arrays.asList("a2", "a10", "A2", "á100", "ä2", "ä10"); + + Collator collator = Collator.getInstance(Locale.GERMAN); + collator.setStrength(Collator.TERTIARY); + + var comparator = new HumanFriendlyComparator(collator); + input.sort(comparator); + + assertEquals(expected, input); + } + + @Test + public void testLeadingZeros() { + List input = Arrays.asList("file002.txt", "file2.txt", "file0002.txt", "file01.txt"); + List expected = Arrays.asList("file01.txt", "file2.txt", "file002.txt", "file0002.txt"); + + input.sort(HumanFriendlyComparator.DEFAULT); + + assertEquals(expected, input); + } + + @Test + public void testEmptyStringsAndSpecialCases() { + List input = Arrays.asList("", "file", "file10", "file2", "file 10", "file 2", "file2", " "); + List expected = Arrays.asList("", " ", "file", "file2", "file2", "file10", "file 2", "file 10"); + + input.sort(HumanFriendlyComparator.DEFAULT); + + assertEquals(expected, input); + } +} diff --git a/org.eclipse.lsp4e/src/org/eclipse/lsp4e/internal/HumanFriendlyComparator.java b/org.eclipse.lsp4e/src/org/eclipse/lsp4e/internal/HumanFriendlyComparator.java new file mode 100644 index 000000000..3e4e71bec --- /dev/null +++ b/org.eclipse.lsp4e/src/org/eclipse/lsp4e/internal/HumanFriendlyComparator.java @@ -0,0 +1,182 @@ +/******************************************************************************* + * Copyright (c) 2025 Sebastian Thomschke and others. + * This program and the accompanying materials are made + * available under the terms of the Eclipse Public License 2.0 + * which is available at https://www.eclipse.org/legal/epl-2.0/ + * + * SPDX-License-Identifier: EPL-2.0 + * + * Contributors: + * Sebastian Thomschke - initial implementation + *******************************************************************************/ +package org.eclipse.lsp4e.internal; + +import java.text.Collator; +import java.util.ArrayList; +import java.util.Comparator; +import java.util.List; +import java.util.Locale; + +/** + * A comparator for strings that compares alphanumeric strings in a + * human-friendly order. + * + *

+ * This comparator handles strings containing both alphabetic and numeric + * sequences. Numeric parts of the strings are compared numerically, while + * alphabetic parts are compared lexicographically. + *

+ * + *

+ * Example: Given strings "file2.txt", "file10.txt", and "file1.txt", this + * comparator will sort them as "file1.txt", "file2.txt", "file10.txt". + *

+ */ +public final class HumanFriendlyComparator implements Comparator { + + public static final HumanFriendlyComparator DEFAULT = new HumanFriendlyComparator(); + + private final Collator collator; + + /** + * Convenience constructor that uses the default Locale's Collator. + */ + public HumanFriendlyComparator() { + this(Collator.getInstance(Locale.getDefault())); + } + + /** + * Creates a HumanFriendlyComparator that uses the given Collator for comparing + * non-numeric substrings (Unicode-aware, locale-specific). + */ + public HumanFriendlyComparator(final Collator collator) { + this.collator = collator; + } + + @Override + public int compare(final String str1, final String str2) { + final List tokens1 = tokenize(str1); + final List tokens2 = tokenize(str2); + + final int tokenCount1 = tokens1.size(); + final int tokenCount2 = tokens2.size(); + int i = 0; + + while (i < tokenCount1 && i < tokenCount2) { + final Token t1 = tokens1.get(i); + final Token t2 = tokens2.get(i); + + if (t1.isNumeric && t2.isNumeric) { + // Compare numeric tokens + final int cmp = compareNumeric(str1, t1, str2, t2); + if (cmp != 0) { + return cmp; + } + } else if (!t1.isNumeric && !t2.isNumeric) { + // Compare text tokens using Collator + final String sub1 = str1.substring(t1.start, t1.end); + final String sub2 = str2.substring(t2.start, t2.end); + + final int cmp = collator.compare(sub1, sub2); + if (cmp != 0) { + return cmp; + } + } else { + // One is numeric, the other is text + return t1.isNumeric ? -1 : 1; + } + i++; + } + + return tokenCount1 - tokenCount2; + } + + /** + * Compares numeric tokens from the original strings. + */ + private int compareNumeric(final String str1, final Token tok1, final String str2, final Token tok2) { + // Skip leading zeros + final int start1 = skipLeadingZeros(str1, tok1.start, tok1.end); + final int start2 = skipLeadingZeros(str2, tok2.start, tok2.end); + + final int len1 = tok1.end - start1; + final int len2 = tok2.end - start2; + + // Compare lengths of numeric parts after leading zeros + if (len1 != len2) { + return len1 - len2; + } + + // Same length => compare digit by digit + for (int i = 0; i < len1; i++) { + final char ch1 = str1.charAt(start1 + i); + final char ch2 = str2.charAt(start2 + i); + if (ch1 != ch2) { + return ch1 - ch2; + } + } + + // If numeric values are identical, compare number of leading zeros + final int leadingZeros1 = start1 - tok1.start; + final int leadingZeros2 = start2 - tok2.start; + return leadingZeros1 - leadingZeros2; + } + + /** + * Skips leading zeros within the specified range of a string. + */ + private int skipLeadingZeros(final String str, int start, final int end) { + while (start < end && str.charAt(start) == '0') { + start++; + } + return start; + } + + /** + * Tokenizes a string into numeric and non-numeric segments. + */ + private List tokenize(final String str) { + final List tokens = new ArrayList<>(); + final int len = str.length(); + int i = 0; + + while (i < len) { + final int start = i; + final char ch = str.charAt(i++); + if (isDigit(ch)) { + while (i < len && isDigit(str.charAt(i))) { + i++; + } + tokens.add(new Token(true, start, i)); + } else { + while (i < len && !isDigit(str.charAt(i))) { + i++; + } + tokens.add(new Token(false, start, i)); + } + } + return tokens; + } + + /** + * Faster alternative to Character.isDigit(c) for ASCII digits. + */ + private boolean isDigit(char ch) { + return ch >= '0' && ch <= '9'; + } + + /** + * Represents a token within a string, identified by start and end indices. + */ + private static final class Token { + final boolean isNumeric; + final int start; + final int end; + + Token(final boolean isNumeric, final int start, final int end) { + this.isNumeric = isNumeric; + this.start = start; + this.end = end; + } + } +} diff --git a/org.eclipse.lsp4e/src/org/eclipse/lsp4e/outline/OutlineSorter.java b/org.eclipse.lsp4e/src/org/eclipse/lsp4e/outline/OutlineSorter.java index f8229203b..1c2113bb0 100644 --- a/org.eclipse.lsp4e/src/org/eclipse/lsp4e/outline/OutlineSorter.java +++ b/org.eclipse.lsp4e/src/org/eclipse/lsp4e/outline/OutlineSorter.java @@ -17,6 +17,7 @@ import org.eclipse.jface.viewers.Viewer; import org.eclipse.jface.viewers.ViewerComparator; import org.eclipse.lsp4e.LanguageServerPlugin; +import org.eclipse.lsp4e.internal.HumanFriendlyComparator; import org.eclipse.lsp4e.outline.SymbolsModel.DocumentSymbolWithURI; import org.eclipse.lsp4j.DocumentSymbol; import org.eclipse.lsp4j.SymbolInformation; @@ -40,7 +41,7 @@ public int compare(final @Nullable Viewer viewer, final @Nullable Object o1, fin if (name2 == null) return 1; - return name1.compareTo(name2); + return HumanFriendlyComparator.DEFAULT.compare(name1, name2); } private @Nullable String getName(@Nullable Object element) {