fix: use human-friendly sorting in Outline

eclipse-lsp4e · Jan 13, 2025 · 642ec27 · 642ec27
1 parent bbb919d
commit 642ec27
Show file tree

Hide file tree

Showing 3 changed files with 263 additions and 1 deletion.
diff --git a/org.eclipse.lsp4e.test/src/org/eclipse/lsp4e/test/internal/HumanFriendlyComparatorTest.java b/org.eclipse.lsp4e.test/src/org/eclipse/lsp4e/test/internal/HumanFriendlyComparatorTest.java
@@ -0,0 +1,79 @@
+/*******************************************************************************
+ * Copyright (c) 2025 Sebastian Thomschke and others.
+ * This program and the accompanying materials are made
+ * available under the terms of the Eclipse Public License 2.0
+ * which is available at https://www.eclipse.org/legal/epl-2.0/
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ * Contributors:
+ * Sebastian Thomschke - initial implementation
+ *******************************************************************************/
+package org.eclipse.lsp4e.test.internal;
+
+import static org.junit.Assert.assertEquals;
+
+import java.text.Collator;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Locale;
+
+import org.eclipse.lsp4e.internal.HumanFriendlyComparator;
+import org.junit.Test;
+
+public class HumanFriendlyComparatorTest {
+
+	@Test
+	public void testNumericSorting() {
+		List<String> input = Arrays.asList("file10.txt", "file2.txt", "file1.txt", "file100.txt");
+		List<String> expected = Arrays.asList("file1.txt", "file2.txt", "file10.txt", "file100.txt");
+
+		input.sort(HumanFriendlyComparator.DEFAULT);
+
+		assertEquals(expected, input);
+	}
+
+	@Test
+	public void testMixedTextAndNumbers() {
+		List<String> input = Arrays.asList("a10b", "a2b", "a10a", "a2a");
+		List<String> expected = Arrays.asList("a2a", "a2b", "a10a", "a10b");
+
+		input.sort(HumanFriendlyComparator.DEFAULT);
+
+		assertEquals(expected, input);
+	}
+
+	@Test
+	public void testLocaleSpecificSorting() {
+		List<String> input = Arrays.asList("ä2", "a10", "a2", "ä10", "á100", "A2");
+		List<String> expected = Arrays.asList("a2", "a10", "A2", "á100", "ä2", "ä10");
+
+		Collator collator = Collator.getInstance(Locale.GERMAN);
+		collator.setStrength(Collator.TERTIARY);
+
+		var comparator = new HumanFriendlyComparator(collator);
+		input.sort(comparator);
+
+		assertEquals(expected, input);
+	}
+
+	@Test
+	public void testLeadingZeros() {
+		List<String> input = Arrays.asList("file002.txt", "file2.txt", "file0002.txt", "file01.txt");
+		List<String> expected = Arrays.asList("file01.txt", "file2.txt", "file002.txt", "file0002.txt");
+
+		input.sort(HumanFriendlyComparator.DEFAULT);
+
+		assertEquals(expected, input);
+	}
+
+	@Test
+	public void testEmptyStringsAndSpecialCases() {
+		List<String> input = Arrays.asList("", "file", "file10", "file2", "file 10", "file 2", "file2", " ");
+		List<String> expected = Arrays.asList("", " ", "file", "file2", "file2", "file10", "file 2", "file 10");
+
+		input.sort(HumanFriendlyComparator.DEFAULT);
+
+		assertEquals(expected, input);
+	}
+}
diff --git a/org.eclipse.lsp4e/src/org/eclipse/lsp4e/internal/HumanFriendlyComparator.java b/org.eclipse.lsp4e/src/org/eclipse/lsp4e/internal/HumanFriendlyComparator.java
@@ -0,0 +1,182 @@
+/*******************************************************************************
+ * Copyright (c) 2025 Sebastian Thomschke and others.
+ * This program and the accompanying materials are made
+ * available under the terms of the Eclipse Public License 2.0
+ * which is available at https://www.eclipse.org/legal/epl-2.0/
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ * Contributors:
+ * Sebastian Thomschke - initial implementation
+ *******************************************************************************/
+package org.eclipse.lsp4e.internal;
+
+import java.text.Collator;
+import java.util.ArrayList;
+import java.util.Comparator;
+import java.util.List;
+import java.util.Locale;
+
+/**
+ * A comparator for strings that compares alphanumeric strings in a
+ * human-friendly order.
+ *
+ * <p>
+ * This comparator handles strings containing both alphabetic and numeric
+ * sequences. Numeric parts of the strings are compared numerically, while
+ * alphabetic parts are compared lexicographically.
+ * </p>
+ *
+ * <p>
+ * Example: Given strings "file2.txt", "file10.txt", and "file1.txt", this
+ * comparator will sort them as "file1.txt", "file2.txt", "file10.txt".
+ * </p>
+ */
+public final class HumanFriendlyComparator implements Comparator<String> {
+
+	public static final HumanFriendlyComparator DEFAULT = new HumanFriendlyComparator();
+
+	private final Collator collator;
+
+	/**
+	 * Convenience constructor that uses the default Locale's Collator.
+	 */
+	public HumanFriendlyComparator() {
+		this(Collator.getInstance(Locale.getDefault()));
+	}
+
+	/**
+	 * Creates a HumanFriendlyComparator that uses the given Collator for comparing
+	 * non-numeric substrings (Unicode-aware, locale-specific).
+	 */
+	public HumanFriendlyComparator(final Collator collator) {
+		this.collator = collator;
+	}
+
+	@Override
+	public int compare(final String str1, final String str2) {
+		final List<Token> tokens1 = tokenize(str1);
+		final List<Token> tokens2 = tokenize(str2);
+
+		final int tokenCount1 = tokens1.size();
+		final int tokenCount2 = tokens2.size();
+		int i = 0;
+
+		while (i < tokenCount1 && i < tokenCount2) {
+			final Token t1 = tokens1.get(i);
+			final Token t2 = tokens2.get(i);
+
+			if (t1.isNumeric && t2.isNumeric) {
+				// Compare numeric tokens
+				final int cmp = compareNumeric(str1, t1, str2, t2);
+				if (cmp != 0) {
+					return cmp;
+				}
+			} else if (!t1.isNumeric && !t2.isNumeric) {
+				// Compare text tokens using Collator
+				final String sub1 = str1.substring(t1.start, t1.end);
+				final String sub2 = str2.substring(t2.start, t2.end);
+
+				final int cmp = collator.compare(sub1, sub2);
+				if (cmp != 0) {
+					return cmp;
+				}
+			} else {
+				// One is numeric, the other is text
+				return t1.isNumeric ? -1 : 1;
+			}
+			i++;
+		}
+
+		return tokenCount1 - tokenCount2;
+	}
+
+	/**
+	 * Compares numeric tokens from the original strings.
+	 */
+	private int compareNumeric(final String str1, final Token tok1, final String str2, final Token tok2) {
+		// Skip leading zeros
+		final int start1 = skipLeadingZeros(str1, tok1.start, tok1.end);
+		final int start2 = skipLeadingZeros(str2, tok2.start, tok2.end);
+
+		final int len1 = tok1.end - start1;
+		final int len2 = tok2.end - start2;
+
+		// Compare lengths of numeric parts after leading zeros
+		if (len1 != len2) {
+			return len1 - len2;
+		}
+
+		// Same length => compare digit by digit
+		for (int i = 0; i < len1; i++) {
+			final char ch1 = str1.charAt(start1 + i);
+			final char ch2 = str2.charAt(start2 + i);
+			if (ch1 != ch2) {
+				return ch1 - ch2;
+			}
+		}
+
+		// If numeric values are identical, compare number of leading zeros
+		final int leadingZeros1 = start1 - tok1.start;
+		final int leadingZeros2 = start2 - tok2.start;
+		return leadingZeros1 - leadingZeros2;
+	}
+
+	/**
+	 * Skips leading zeros within the specified range of a string.
+	 */
+	private int skipLeadingZeros(final String str, int start, final int end) {
+		while (start < end && str.charAt(start) == '0') {
+			start++;
+		}
+		return start;
+	}
+
+	/**
+	 * Tokenizes a string into numeric and non-numeric segments.
+	 */
+	private List<Token> tokenize(final String str) {
+		final List<Token> tokens = new ArrayList<>();
+		final int len = str.length();
+		int i = 0;
+
+		while (i < len) {
+			final int start = i;
+			final char ch = str.charAt(i++);
+			if (isDigit(ch)) {
+				while (i < len && isDigit(str.charAt(i))) {
+					i++;
+				}
+				tokens.add(new Token(true, start, i));
+			} else {
+				while (i < len && !isDigit(str.charAt(i))) {
+					i++;
+				}
+				tokens.add(new Token(false, start, i));
+			}
+		}
+		return tokens;
+	}
+
+	/**
+	 * Faster alternative to Character.isDigit(c) for ASCII digits.
+	 */
+	private boolean isDigit(char ch) {
+		return ch >= '0' && ch <= '9';
+	}
+
+	/**
+	 * Represents a token within a string, identified by start and end indices.
+	 */
+	private static final class Token {
+		final boolean isNumeric;
+		final int start;
+		final int end;
+
+		Token(final boolean isNumeric, final int start, final int end) {
+			this.isNumeric = isNumeric;
+			this.start = start;
+			this.end = end;
+		}
+	}
+}
diff --git a/org.eclipse.lsp4e/src/org/eclipse/lsp4e/outline/OutlineSorter.java b/org.eclipse.lsp4e/src/org/eclipse/lsp4e/outline/OutlineSorter.java
@@ -17,6 +17,7 @@
 import org.eclipse.jface.viewers.Viewer;
 import org.eclipse.jface.viewers.ViewerComparator;
 import org.eclipse.lsp4e.LanguageServerPlugin;
+import org.eclipse.lsp4e.internal.HumanFriendlyComparator;
 import org.eclipse.lsp4e.outline.SymbolsModel.DocumentSymbolWithURI;
 import org.eclipse.lsp4j.DocumentSymbol;
 import org.eclipse.lsp4j.SymbolInformation;
@@ -40,7 +41,7 @@ public int compare(final @Nullable Viewer viewer, final @Nullable Object o1, fin
 		if (name2 == null)
 			return 1;
 
-		return name1.compareTo(name2);
+		return HumanFriendlyComparator.DEFAULT.compare(name1, name2);
 	}
 
 	private @Nullable String getName(@Nullable Object element) {