Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DUCK] 修复“一九五六年的昨天”抽取错误 #209

Merged
merged 1 commit into from
Jan 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -146,9 +146,12 @@ trait Rules extends DimRules {

val ruleYearNumericWithYearSymbol = Rule(
name = "date - year (numeric with year symbol)",
pattern = List(seqYearOf1000to9999.predicate, "(年?版|年)".regex),
pattern = List(seqYearOf1000to9999or00to99.predicate, "(年?版|年)".regex),
prod = tokens {
case token :: _ => getIntValue(token).map(i => Token(Date, year(i.toInt)))
case token :: _ => getIntValue(token).map(i => {
val y = if (i < 30) i + 2000 else if (i < 100) i + 1900 else i
Token(Date, year(y.toInt))
})
}
)

Expand All @@ -174,34 +177,6 @@ trait Rules extends DimRules {

val singleNumberPredicate = singleNumeber.predicate

val ruleTwoDigitYear = Rule(
name = "date - year (like 九八年)",
pattern = List(singleNumberPredicate, singleNumberPredicate, "(年?版|年)".regex),
prod = tokens {
case t1 :: t2 :: _ =>
val y = for {
thirdDigit <- getIntValue(t1)
fourthDigit <- getIntValue(t2)
} yield {
val lastTwo = thirdDigit * 10 + fourthDigit
if (thirdDigit >= 3) 1900 + lastTwo
else 2000 + lastTwo
}
y.map(i => Token(Date, year(i.toInt)))
}
)

val ruleTowDigitYear06 = Rule(
name = "date - year (like 06年)",
pattern = List(raw"\d{2}".regex, "(年?版|年)".regex),
prod = singleRegexMatch {
case s =>
val lastTwo = Integer.parseInt(s)
val y = adjustYear(lastTwo, s).get
Token(Date, year(y))
}
)

val ruleMonthNumericWithMonthSymbol = Rule(
name = "date: month (numeric with month symbol)",
pattern = List(isIntegerBetween(1, 12).predicate, "月(份)?".regex),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -114,10 +114,15 @@ object predicates {

val singleNumeber: Predicate = isIntegerBetween(0, 9)

val seqYearOf1000to9999: Predicate = {
case Token(DigitSequence, DigitSequenceData(seq, zh, raw)) if seq.length == 4 =>
val v = seq.toDouble
v > 1000 && v < 9999
val seqYearOf1000to9999or00to99: Predicate = {
case Token(DigitSequence, DigitSequenceData(seq, zh, raw)) =>
if (seq.length == 4) {
val v = seq.toDouble
v > 1000 && v < 9999
} else if (seq.length == 2) {
val v = seq.toDouble
v >= -0.1 && v < 99.1
} else false
}

val arabicSeqOf1950to2050: Predicate = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ object Examples extends DimExamples {
(ymd(2013, 2, 14), List("后天", "后日")),
(ymd(2013, 2, 10), List("前天", "前日")),
(ymd(2012, 2, 12), List("去年的今天")),
(ymd(1956, 2, 11), List("一九五六年的昨天")),
(ymd(2010, 2, 12), List("去年的前年的今天")),
(ymd(2013, 2, 15), List("明天的后天")),
(ymd(2013, 2, 28), List("这个月月底")),
Expand Down
Loading