diff --git a/app/src/main/java/helium314/keyboard/latin/common/StringUtils.java b/app/src/main/java/helium314/keyboard/latin/common/StringUtils.java index 36da83ac2..e2b5af42d 100644 --- a/app/src/main/java/helium314/keyboard/latin/common/StringUtils.java +++ b/app/src/main/java/helium314/keyboard/latin/common/StringUtils.java @@ -15,6 +15,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Locale; +import java.util.regex.Matcher; public final class StringUtils { @@ -418,6 +419,15 @@ public static String capitalizeEachWord(@NonNull final String text, return builder.toString(); } + // Use regex pattern matching to test if a CharSequence contains a URL. + // Returns the last index of the URL, or -1 if not found. + public static int findURLEndIndex(@NonNull final CharSequence sequence) { + Matcher matcher = android.util.Patterns.WEB_URL.matcher(sequence); + if (matcher.find()) + return matcher.end(); + return -1; + } + /** * Approximates whether the text before the cursor looks like a URL. *
diff --git a/app/src/main/java/helium314/keyboard/latin/spellcheck/AndroidWordLevelSpellCheckerSession.java b/app/src/main/java/helium314/keyboard/latin/spellcheck/AndroidWordLevelSpellCheckerSession.java
index cd36a0764..96e8a2cfd 100644
--- a/app/src/main/java/helium314/keyboard/latin/spellcheck/AndroidWordLevelSpellCheckerSession.java
+++ b/app/src/main/java/helium314/keyboard/latin/spellcheck/AndroidWordLevelSpellCheckerSession.java
@@ -13,6 +13,7 @@
import android.provider.UserDictionary.Words;
import android.service.textservice.SpellCheckerService.Session;
import android.text.TextUtils;
+import helium314.keyboard.latin.settings.Settings;
import helium314.keyboard.latin.utils.Log;
import android.util.LruCache;
import android.view.inputmethod.InputMethodManager;
@@ -47,6 +48,8 @@ public abstract class AndroidWordLevelSpellCheckerSession extends Session {
public final static String[] EMPTY_STRING_ARRAY = new String[0];
+ public final static int FLAG_UNCHECKABLE = 0;
+
// Immutable, but not available in the constructor.
private Locale mLocale;
// Cache this for performance
@@ -59,6 +62,7 @@ public abstract class AndroidWordLevelSpellCheckerSession extends Session {
"(\\u0022|\\u0027|\\u0060|\\u00B4|\\u2018|\\u2018|\\u201C|\\u201D)";
private static final Map
- * This will loosely match URLs, numbers, symbols. To avoid always underlining words that
- * we know we will never recognize, this accepts a script identifier that should be one
- * of the SCRIPT_* constants defined above, to rule out quickly characters from very
- * different languages.
- *
+ * This will match URLs if URL detection is enabled,
+ * as well as text that is too short or starts with a special symbol.
* @param text the string to evaluate.
- * @param script the identifier for the script this spell checker recognizes
* @return one of the FILTER_OUT_* constants above.
*/
- private static int getCheckabilityInScript(final String text, final String script) {
+ private static int getCheckability(final String text) {
if (TextUtils.isEmpty(text) || text.length() <= 1) return CHECKABILITY_TOO_SHORT;
-
- // TODO: check if an equivalent processing can't be done more quickly with a
- // compiled regexp.
// Filter by first letter
final int firstCodePoint = text.codePointAt(0);
- // Filter out words that don't start with a letter or an apostrophe
- if (!ScriptUtils.isLetterPartOfScript(firstCodePoint, script)
- && '\'' != firstCodePoint) return CHECKABILITY_FIRST_LETTER_UNCHECKABLE;
-
- // Filter contents
- final int length = text.length();
- int letterCount = 0;
- for (int i = 0; i < length; i = text.offsetByCodePoints(i, 1)) {
- final int codePoint = text.codePointAt(i);
- // Any word containing a COMMERCIAL_AT is probably an e-mail address
- // Any word containing a SLASH is probably either an ad-hoc combination of two
- // words or a URI - in either case we don't want to spell check that
- if (Constants.CODE_COMMERCIAL_AT == codePoint || Constants.CODE_SLASH == codePoint) {
- return CHECKABILITY_EMAIL_OR_URL;
- }
- // If the string contains a period, native returns strange suggestions (it seems
- // to return suggestions for everything up to the period only and to ignore the
- // rest), so we suppress lookup if there is a period.
- // TODO: investigate why native returns these suggestions and remove this code.
- if (Constants.CODE_PERIOD == codePoint) {
- return CHECKABILITY_CONTAINS_PERIOD;
- }
- if (ScriptUtils.isLetterPartOfScript(codePoint, script)) ++letterCount;
+ // Filter out words that start with '@' (at sign),
+ // which usually indicates the word is a username.
+ if (firstCodePoint == Constants.CODE_COMMERCIAL_AT) {
+ return CHECKABILITY_FIRST_LETTER_UNCHECKABLE;
+ }
+ // Filter out e-mail address and URL
+ if (Settings.getInstance().getCurrent().mUrlDetectionEnabled && StringUtils.findURLEndIndex(text) != -1) {
+ return CHECKABILITY_EMAIL_OR_URL;
}
- // Guestimate heuristic: perform spell checking if at least 3/4 of the characters
- // in this word are letters
- return (letterCount * 4 < length * 3)
- ? CHECKABILITY_TOO_MANY_NON_LETTERS : CHECKABILITY_CHECKABLE;
+ return CHECKABILITY_CHECKABLE;
}
/**
@@ -267,49 +274,58 @@ private SuggestionsInfo onGetSuggestionsInternal(final TextInfo textInfo,
protected SuggestionsInfo onGetSuggestionsInternal(
final TextInfo textInfo, final NgramContext ngramContext, final int suggestionsLimit) {
try {
- updateLocale();
// It's good to keep this not local specific since the standard
// ones may show up in other languages also.
- String text = textInfo.getText()
+ final String textWithLocalePunctuations = textInfo.getText()
.replaceAll(AndroidSpellCheckerService.APOSTROPHE, AndroidSpellCheckerService.SINGLE_QUOTE)
.replaceAll("^" + quotesRegexp, "")
.replaceAll(quotesRegexp + "$", "");
- final String localeRegex = scriptToPunctuationRegexMap.get(ScriptUtils.script(mLocale));
+ final SuggestionsParams cachedSuggestions = mSuggestionsCache.getSuggestionsFromCache(textWithLocalePunctuations);
+ // Return quickly when the text is cached as uncheckable or as a word in dictionary
+ if (cachedSuggestions != null) {
+ final int flag = cachedSuggestions.mFlags;
+ if (flag == FLAG_UNCHECKABLE) {
+ return AndroidSpellCheckerService.getNotInDictEmptySuggestions(false);
+ } else if (flag == SuggestionsInfo.RESULT_ATTR_IN_THE_DICTIONARY) {
+ return AndroidSpellCheckerService.getInDictEmptySuggestions();
+ }
+ }
- if (localeRegex != null) {
- text = text.replaceAll(localeRegex, "");
+ // Find out which locale should be used
+ updateLocale();
+ final Locale likelyLocale = findLikelyLocaleOfText(textWithLocalePunctuations);
+
+ // If no locale was found, then the text probably contains numbers
+ // or special characters only, so it should not be spell checked.
+ if (likelyLocale == null || !mService.hasMainDictionaryForLocale(mLocale)) {
+ mSuggestionsCache.putSuggestionsToCache(textWithLocalePunctuations, EMPTY_STRING_ARRAY, FLAG_UNCHECKABLE, mLocale);
+ return AndroidSpellCheckerService.getNotInDictEmptySuggestions(false);
+ } else if (!likelyLocale.equals(mLocale)) {
+ Log.d(TAG, "Updating locale from " + mLocale + " to " + likelyLocale);
+ mLocale = likelyLocale;
+ mScript = ScriptUtils.script(likelyLocale);
}
- if (!mService.hasMainDictionaryForLocale(mLocale)) {
- return AndroidSpellCheckerService.getNotInDictEmptySuggestions(false /* reportAsTypo */);
+ // Return cached suggestions for a word not in dictionary
+ // only if the current locale matches the cached locale.
+ if (cachedSuggestions != null && cachedSuggestions.mLocale.equals(mLocale)) {
+ return new SuggestionsInfo(cachedSuggestions.mFlags, cachedSuggestions.mSuggestions);
}
- // Handle special patterns like email, URI, telephone number.
- final int checkability = getCheckabilityInScript(text, mScript);
+ final String localeRegex = scriptToPunctuationRegexMap.get(ScriptUtils.script(mLocale));
+ final String text;
+ if (localeRegex != null) {
+ text = textWithLocalePunctuations.replaceAll(localeRegex, "");
+ } else {
+ text = textWithLocalePunctuations;
+ }
+
+ // Check if the text is too short and handle special patterns like email, URI.
+ final int checkability = getCheckability(text);
if (CHECKABILITY_CHECKABLE != checkability) {
- // CHECKABILITY_CONTAINS_PERIOD Typo should not be reported when text is a valid word followed by a single period (end of sentence).
- boolean periodOnlyAtLastIndex = text.indexOf(Constants.CODE_PERIOD) == (text.length() - 1);
- if (CHECKABILITY_CONTAINS_PERIOD == checkability) {
- final String[] splitText = text.split(Constants.REGEXP_PERIOD);
- boolean allWordsAreValid = true;
- // Validate all words on both sides of periods, skip empty tokens due to periods at first/last index
- for (final String word : splitText) {
- if (!word.isEmpty() && !mService.isValidWord(mLocale, word) && !mService.isValidWord(mLocale, word.toLowerCase(mLocale))) {
- allWordsAreValid = false;
- break;
- }
- }
- if (allWordsAreValid && !periodOnlyAtLastIndex) {
- return new SuggestionsInfo(SuggestionsInfo.RESULT_ATTR_LOOKS_LIKE_TYPO
- | SuggestionsInfo.RESULT_ATTR_HAS_RECOMMENDED_SUGGESTIONS,
- new String[] {
- TextUtils.join(Constants.STRING_SPACE, splitText) });
- }
- }
- return mService.isValidWord(mLocale, text) ?
- AndroidSpellCheckerService.getInDictEmptySuggestions() :
- AndroidSpellCheckerService.getNotInDictEmptySuggestions(!periodOnlyAtLastIndex);
+ mSuggestionsCache.putSuggestionsToCache(textWithLocalePunctuations, EMPTY_STRING_ARRAY, FLAG_UNCHECKABLE, mLocale);
+ return AndroidSpellCheckerService.getNotInDictEmptySuggestions(false); // Typo should not be reported when text is uncheckable
}
// Handle normal words.
@@ -319,6 +335,8 @@ protected SuggestionsInfo onGetSuggestionsInternal(
if (DebugFlags.DEBUG_ENABLED) {
Log.i(TAG, "onGetSuggestionsInternal() : [" + text + "] is a valid word");
}
+ mSuggestionsCache.putSuggestionsToCache(textWithLocalePunctuations, EMPTY_STRING_ARRAY,
+ SuggestionsInfo.RESULT_ATTR_IN_THE_DICTIONARY, mLocale);
return AndroidSpellCheckerService.getInDictEmptySuggestions();
}
if (DebugFlags.DEBUG_ENABLED) {
@@ -326,12 +344,6 @@ protected SuggestionsInfo onGetSuggestionsInternal(
}
final Keyboard keyboard = mService.getKeyboardForLocale(mLocale);
- if (null == keyboard) {
- Log.w(TAG, "onGetSuggestionsInternal() : No keyboard for locale: " + mLocale);
- // If there is no keyboard for this locale, don't do any spell-checking.
- return AndroidSpellCheckerService.getNotInDictEmptySuggestions(false);
- }
-
final WordComposer composer = new WordComposer();
final int[] codePoints = StringUtils.toCodePointArray(text);
final int[] coordinates;
@@ -367,7 +379,7 @@ protected SuggestionsInfo onGetSuggestionsInternal(
? SuggestionsInfo.RESULT_ATTR_HAS_RECOMMENDED_SUGGESTIONS
: 0);
final SuggestionsInfo retval = new SuggestionsInfo(flags, result.mSuggestions);
- mSuggestionsCache.putSuggestionsToCache(text, result.mSuggestions, flags);
+ mSuggestionsCache.putSuggestionsToCache(textWithLocalePunctuations, result.mSuggestions, flags, mLocale);
return retval;
} catch (RuntimeException e) {
// Don't kill the keyboard if there is a bug in the spell checker
diff --git a/app/src/main/java/helium314/keyboard/latin/spellcheck/SentenceLevelAdapter.java b/app/src/main/java/helium314/keyboard/latin/spellcheck/SentenceLevelAdapter.java
index 7694d1804..57fb7785b 100644
--- a/app/src/main/java/helium314/keyboard/latin/spellcheck/SentenceLevelAdapter.java
+++ b/app/src/main/java/helium314/keyboard/latin/spellcheck/SentenceLevelAdapter.java
@@ -12,6 +12,8 @@
import android.view.textservice.TextInfo;
import helium314.keyboard.latin.common.Constants;
+import helium314.keyboard.latin.common.StringUtils;
+import helium314.keyboard.latin.settings.Settings;
import helium314.keyboard.latin.settings.SpacingAndPunctuations;
import helium314.keyboard.latin.utils.RunInLocaleKt;
@@ -71,22 +73,15 @@ public WordIterator(final Resources res, final Locale locale) {
public int getEndOfWord(final CharSequence sequence, final int fromIndex) {
final int length = sequence.length();
int index = fromIndex < 0 ? 0 : Character.offsetByCodePoints(sequence, fromIndex, 1);
+ if (Settings.getInstance().getCurrent().mUrlDetectionEnabled) {
+ final int urlEndIndex = StringUtils.findURLEndIndex(sequence.subSequence(index, length));
+ if (urlEndIndex != - 1)
+ return urlEndIndex + Character.charCount(Character.codePointAt(sequence, urlEndIndex));
+ }
while (index < length) {
final int codePoint = Character.codePointAt(sequence, index);
- if (mSpacingAndPunctuations.isWordSeparator(codePoint)) {
- // If it's a period, we want to stop here only if it's followed by another
- // word separator. In all other cases we stop here.
- if (Constants.CODE_PERIOD == codePoint) {
- final int indexOfNextCodePoint =
- index + Character.charCount(Constants.CODE_PERIOD);
- if (indexOfNextCodePoint < length
- && mSpacingAndPunctuations.isWordSeparator(
- Character.codePointAt(sequence, indexOfNextCodePoint))) {
- return index;
- }
- } else {
- return index;
- }
+ if (mSpacingAndPunctuations.isWordSeparator(codePoint) || codePoint == Constants.CODE_DASH) {
+ return index;
}
index += Character.charCount(codePoint);
}
diff --git a/app/src/main/java/helium314/keyboard/latin/utils/SubtypeSettings.kt b/app/src/main/java/helium314/keyboard/latin/utils/SubtypeSettings.kt
index 7ed43ac0d..a2ec44af6 100644
--- a/app/src/main/java/helium314/keyboard/latin/utils/SubtypeSettings.kt
+++ b/app/src/main/java/helium314/keyboard/latin/utils/SubtypeSettings.kt
@@ -33,6 +33,20 @@ fun getEnabledSubtypes(prefs: SharedPreferences, fallback: Boolean = false): Lis
return enabledSubtypes
}
+fun getUniqueScriptLocalesFromEnabledSubtypes(prefs: SharedPreferences): List