Skip to content

Commit

Permalink
Revert "feat: pangu in EN[,.!?]CJK / EN[,.!?]EN / CJK[,.!?]EN"
Browse files Browse the repository at this point in the history
This reverts commit fdc8726.
  • Loading branch information
NextAlone committed Oct 13, 2024
1 parent fdc8726 commit fa21309
Showing 1 changed file with 1 addition and 34 deletions.
35 changes: 1 addition & 34 deletions libs/pangu/src/main/java/ws/vinta/pangu/Pangu.kt
Original file line number Diff line number Diff line change
Expand Up @@ -71,16 +71,6 @@ class Pangu {
"((\\S+)#)" +
"([\\p{InHiragana}\\p{InKatakana}\\p{InBopomofo}\\p{InCJKCompatibilityIdeographs}\\p{InCJKUnifiedIdeographs}])"
)

private val EN_PUNCTUATION_EN = Pattern.compile(
"([a-zA-Z])([.,!?])([a-zA-Z])"
)
private val EN_PUNCTUATION_CJK = Pattern.compile(
"([a-zA-Z0-9])([.,!?])([\\p{InHiragana}\\p{InKatakana}\\p{InBopomofo}\\p{InCJKCompatibilityIdeographs}\\p{InCJKUnifiedIdeographs}])"
)
private val CJK_PUNCTUATION_EN = Pattern.compile(
"([\\p{InHiragana}\\p{InKatakana}\\p{InBopomofo}\\p{InCJKCompatibilityIdeographs}\\p{InCJKUnifiedIdeographs}])([.,!?])([a-zA-Z0-9])"
)
}

private fun processUrl(text: String) = Pattern.compile("://").matcher(text).let { matcher ->
Expand Down Expand Up @@ -180,14 +170,6 @@ class Pangu {
val acMatcher = ANS_CJK.matcher(text)
text = acMatcher.replaceAll("$1 $2")

// CJK or ANS with EN Punctuation
val enPunctuationEnMatcher = EN_PUNCTUATION_EN.matcher(text)
text = enPunctuationEnMatcher.replaceAll("$1$2 $3")
val enPunctuationCjkMatcher = EN_PUNCTUATION_CJK.matcher(text)
text = enPunctuationCjkMatcher.replaceAll("$1$2 $3")
val cjkPunctuationEnMatcher = CJK_PUNCTUATION_EN.matcher(text)
text = cjkPunctuationEnMatcher.replaceAll("$1$2 $3")

return text
}

Expand All @@ -202,21 +184,6 @@ internal object Test {
@JvmStatic
fun main(args: Array<String>) {
val pangu = Pangu()
val testCases = listOf(
"當你凝視著 https://telegra.ph/八尋ぽち-ひみチュッ-中国翻訳-無修正-DL版-06-17-3 ,bug也凝視著 https://telegra.ph/ASDF-DL版-06-17-3",
"Hello,world!This is a test.",
"你好,世界!这是一个测试。",
"你好,世界!这是一个测试.",
"123,456.789!0",
"1,1 1.1 1!1 1?1 1,a 1.a 1!a 1?a a,a a.a a!a a?a a,1 a.1 a!1 a?1",
"CJK和ANS混合测试:你好world123,你好!world123",
"URL测试:https://example.com/path?query=测试",
"带引号的测试:\"你好\" '世界'",
"带括号的测试:(你a好) [世b,b界] {测c试}"
)

testCases.forEach { testCase ->
println("$testCase\n${pangu.spacingText(testCase)}\n")
}
println(pangu.spacingText("當你凝視著 https://telegra.ph/八尋ぽち-ひみチュッ-中国翻訳-無修正-DL版-06-17-3 ,bug也凝視著 https://telegra.ph/ASDF-DL版-06-17-3"))
}
}

0 comments on commit fa21309

Please sign in to comment.