Skip to content

Commit

Permalink
- 修复“一九五六年的昨天”抽取错误的问题 (#209)
Browse files Browse the repository at this point in the history
- 精简相关规则
  • Loading branch information
du00cs authored Jan 26, 2024
1 parent 8e91e92 commit 05ec5a5
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 34 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -146,9 +146,12 @@ trait Rules extends DimRules {

val ruleYearNumericWithYearSymbol = Rule(
name = "date - year (numeric with year symbol)",
pattern = List(seqYearOf1000to9999.predicate, "(年?版|年)".regex),
pattern = List(seqYearOf1000to9999or00to99.predicate, "(年?版|年)".regex),
prod = tokens {
case token :: _ => getIntValue(token).map(i => Token(Date, year(i.toInt)))
case token :: _ => getIntValue(token).map(i => {
val y = if (i < 30) i + 2000 else if (i < 100) i + 1900 else i
Token(Date, year(y.toInt))
})
}
)

Expand All @@ -174,34 +177,6 @@ trait Rules extends DimRules {

val singleNumberPredicate = singleNumeber.predicate

val ruleTwoDigitYear = Rule(
name = "date - year (like 九八年)",
pattern = List(singleNumberPredicate, singleNumberPredicate, "(年?版|年)".regex),
prod = tokens {
case t1 :: t2 :: _ =>
val y = for {
thirdDigit <- getIntValue(t1)
fourthDigit <- getIntValue(t2)
} yield {
val lastTwo = thirdDigit * 10 + fourthDigit
if (thirdDigit >= 3) 1900 + lastTwo
else 2000 + lastTwo
}
y.map(i => Token(Date, year(i.toInt)))
}
)

val ruleTowDigitYear06 = Rule(
name = "date - year (like 06年)",
pattern = List(raw"\d{2}".regex, "(年?版|年)".regex),
prod = singleRegexMatch {
case s =>
val lastTwo = Integer.parseInt(s)
val y = adjustYear(lastTwo, s).get
Token(Date, year(y))
}
)

val ruleMonthNumericWithMonthSymbol = Rule(
name = "date: month (numeric with month symbol)",
pattern = List(isIntegerBetween(1, 12).predicate, "月(份)?".regex),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -114,10 +114,15 @@ object predicates {

val singleNumeber: Predicate = isIntegerBetween(0, 9)

val seqYearOf1000to9999: Predicate = {
case Token(DigitSequence, DigitSequenceData(seq, zh, raw)) if seq.length == 4 =>
val v = seq.toDouble
v > 1000 && v < 9999
val seqYearOf1000to9999or00to99: Predicate = {
case Token(DigitSequence, DigitSequenceData(seq, zh, raw)) =>
if (seq.length == 4) {
val v = seq.toDouble
v > 1000 && v < 9999
} else if (seq.length == 2) {
val v = seq.toDouble
v >= -0.1 && v < 99.1
} else false
}

val arabicSeqOf1950to2050: Predicate = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ object Examples extends DimExamples {
(ymd(2013, 2, 14), List("后天", "后日")),
(ymd(2013, 2, 10), List("前天", "前日")),
(ymd(2012, 2, 12), List("去年的今天")),
(ymd(1956, 2, 11), List("一九五六年的昨天")),
(ymd(2010, 2, 12), List("去年的前年的今天")),
(ymd(2013, 2, 15), List("明天的后天")),
(ymd(2013, 2, 28), List("这个月月底")),
Expand Down

0 comments on commit 05ec5a5

Please sign in to comment.