diff --git a/src/main/java/com/xkzhangsan/time/enums/RegexEnum.java b/src/main/java/com/xkzhangsan/time/enums/RegexEnum.java index a6c8416..66ac5b8 100644 --- a/src/main/java/com/xkzhangsan/time/enums/RegexEnum.java +++ b/src/main/java/com/xkzhangsan/time/enums/RegexEnum.java @@ -1,643 +1,617 @@ package com.xkzhangsan.time.enums; -import java.util.regex.Pattern; - import com.xkzhangsan.time.utils.RegexCache; +import java.util.regex.Pattern; /** * 正则枚举 - * + * * @author xkzhangsan + * @author buhuaqi */ public enum RegexEnum { - - //================================nlp normStandardTime================================ - /** - * 标准时间 - *
- * yyyy-MM-dd HH:mm:ss - * yyyy-MM-dd HH:mm - * yyyy-MM-dd - *- *
- * 正则: "\\d{4}-\\d{1,2}-\\d{1,2}( \\d{1,2}:\\d{1,2}(:\\d{1,2})?)?" - */ - NormStandard("\\d{4}-\\d{1,2}-\\d{1,2}( \\d{1,2}:\\d{1,2}(:\\d{1,2})?)?", 0, "标准时间"), - - /** - * 标准时间中文 - *
- * yyyy年MM月dd日 HH:mm:ss - * yyyy年MM月dd日 HH:mm - * yyyy年MM月dd日 - *- *
- * 正则: "\\d{4}(年)\\d{1,2}(月)\\d{1,2}(日)( \\d{1,2}:\\d{1,2}(:\\d{1,2})?)?" - */ - NormStandardCn("\\d{4}(年)\\d{1,2}(月)\\d{1,2}(日)( \\d{1,2}:\\d{1,2}(:\\d{1,2})?)?", 0, "标准时间中文"), - - - - //================================nlp normYear================================ - /** - * 两位数来表示年份 - *
- * 正则: "[0-9]{2}(?=年)" - */ - NormYearTwo("[0-9]{2}(?=年)", 0, "两位数来表示年份"), - /** - * 三位数和四位数表示的年份 - *
- * 正则: "[0-9]?[0-9]{3}(?=年)" - */ - NormYearFour("[0-9]?[0-9]{3}(?=年)", 0, "三位数和四位数表示的年份"), - - //================================nlp normMonth================================ - /** - * 月字段 - *
- * 正则: "((10)|(11)|(12)|([1-9]))(?=月)" - */ - NormMonth("((10)|(11)|(12)|([1-9]))(?=月)", 0, "月字段"), - - - //================================nlp normDay================================ - /** - * 日字段 - *
- * 正则:{@code "((? - * 正则: "((10)|(11)|(12)|([1-9]))(月|\\.|\\-)([0-3][0-9]|[1-9])" - */ - NormMonthFuzzyDay("((10)|(11)|(12)|([1-9]))(月|\\.|\\-)([0-3][0-9]|[1-9])", 0, "月-日 兼容模糊写法"), - - /** - * 月-日 兼容模糊写法分隔符 - *
- * 正则: "(月|\\.|\\-)" - */ - NormMonthFuzzyDaySeparator("(月|\\.|\\-)", 0, "月-日 兼容模糊写法分隔符"), - - - //================================nlp normBaseRelated================================ - /** - * 几天前 - *
- * 正则:{@code "\\d+(?=天[以之]?前)" } - */ - NormBaseRelatedDayBefore("\\d+(?=天[以之]?前)", 0, "几天前"), - - - /** - * 几天后 - *
- * 正则:{@code "\\d+(?=天[以之]?后)" } - */ - NormBaseRelatedDayAfter("\\d+(?=天[以之]?后)", 0, "几天后"), - - - /** - * 几月前 - *
- * 正则:{@code "\\d+(?=(个)?月[以之]?前)" } - */ - NormBaseRelatedMonthBefore("\\d+(?=(个)?月[以之]?前)", 0, "几月前"), - - - /** - * 几月后 - *
- * 正则:{@code "\\d+(?=(个)?月[以之]?后)" } - */ - NormBaseRelatedMonthAfter("\\d+(?=(个)?月[以之]?后)", 0, "几月后"), - - - /** - * 几年前 - *
- * 正则:{@code "\\d+(?=年[以之]?前)" } - */ - NormBaseRelatedYearBefore("\\d+(?=年[以之]?前)", 0, "几年前"), - - - /** - * 几年后 - *
- * 正则:{@code "\\d+(?=年[以之]?后)" } - */ - NormBaseRelatedYearAfter("\\d+(?=年[以之]?后)", 0, "几年后"), - - - //================================nlp normBaseTimeRelated================================ - - /** - * 3小时前 - *
- * 正则:{@code "\\d+(?=个?半?(小时|钟头|h|H)[以之]?前)" } - */ - NormBaseTimeRelatedHourBefore("\\d+(?=个?半?(小时|钟头|h|H)[以之]?前)", 0, "3小时前"), - - - /** - * 3小时后 - *
- * 正则:{@code "\\d+(?=个?半?(小时|钟头|h|H)[以之]?后)" } - */ - NormBaseTimeRelatedHourAfter("\\d+(?=个?半?(小时|钟头|h|H)[以之]?后)", 0, "3小时后"), - - - /** - * 半个小时前 - *
- * 正则:{@code "半个?(小时|钟头)[以之]?前" } - */ - NormBaseTimeRelatedHalfHourBefore("半个?(小时|钟头)[以之]?前", 0, "半个小时前"), - - - /** - * 半个小时后 - *
- * 正则:{@code "半个?(小时|钟头)[以之]?后" } - */ - NormBaseTimeRelatedHalfHourAfter("半个?(小时|钟头)[以之]?后", 0, "半个小时后"), - - - /** - * 1个小时 - *
- * 正则:{@code "\\d+(?=个?半?(小时|钟头|h|H))" } - */ - NormBaseTimeRelatedHour("\\d+(?=个?半?(小时|钟头|h|H))", 0, "1个小时"), - - /** - * 3分钟前 - *
- * 正则:{@code "\\d+(?=(分钟|分|min)[以之]?前)" } - */ - NormBaseTimeRelatedMinuteBefore("\\d+(?=(分钟|分|min)[以之]?前)", 0, "3分钟前"), - - - /** - * 3分钟后 - *
- * 正则:{@code "\\d+(?=(分钟|分|min)[以之]?后)" } - */ - NormBaseTimeRelatedMinuteAfter("\\d+(?=(分钟|分|min)[以之]?后)", 0, "3分钟后"), - - /** - * 1分钟 - *
- * 正则:{@code "\\d+(?=(分钟|min))" } - */ - NormBaseTimeRelatedMinute("\\d+(?=(分钟|min))", 0, "1分钟"), - - - /** - * 3秒钟前 - *
- * 正则:{@code "\\d+(?=(秒钟|秒|sec)[以之]?前)" } - */ - NormBaseTimeRelatedSecondBefore("\\d+(?=(秒钟|秒|sec)[以之]?前)", 0, "3秒钟前"), - - - /** - * 3秒钟后 - *
- * 正则:{@code "\\d+(?=(秒钟|秒|sec)[以之]?后)" } - */ - NormBaseTimeRelatedSecondAfter("\\d+(?=(秒钟|秒|sec)[以之]?后)", 0, "3秒钟后"), - - - //================================nlp normCurRelated================================ - - /** - * 前年 - *
- * 正则:{@code "前年" } - */ - NormCurRelatedYearBeforeLast("前年", 0, "前年"), - - /** - * 去年 - *
- * 正则:{@code "去年" } - */ - NormCurRelatedYearBefore("去年", 0, "去年"), - - /** - * 今年 - *
- * 正则:{@code "今年" } - */ - NormCurRelatedYear("今年", 0, "今年"), - - /** - * 明年 - *
- * 正则:{@code "明年" } - */ - NormCurRelatedYearAfter("明年", 0, "明年"), - - /** - * 后年 - *
- * 正则:{@code "后年" } - */ - NormCurRelatedYearAfterNext("后年", 0, "后年"), - - /** - * 上个月 - *
- * 正则:{@code "上(个)?月" } - */ - NormCurRelatedMonthBefore("上(个)?月", 0, "上个月"), - - /** - * 这个月 - *
- * 正则:{@code "(本|这个)月" } - */ - NormCurRelatedMonth("(本|这个)月", 0, "这个月"), - - /** - * 下个月 - *
- * 正则:{@code "下(个)?月" } - */ - NormCurRelatedMonthAfter("下(个)?月", 0, "下个月"), - - /** - * 大前天 - *
- * 正则:{@code "大前天" } - */ - NormCurRelatedDayBeforeThree("大前天", 0, "大前天"), - - /** - * 前天 - *
- * 正则:{@code "(? - * 正则:{@code "昨" } - */ - NormCurRelatedDayYesterday("昨", 0, "昨天"), - /** - * 今天 - *
- * 正则:{@code "今(?!年)" } - */ - NormCurRelatedDayToday("今(?!年)", 0, "今天"), - - /** - * 明天 - *
- * 正则:{@code "明(?!年)" } - */ - NormCurRelatedDayTomorrow("明(?!年)", 0, "明天"), - - /** - * 后天 - *
- * 正则:{@code "(? - * 正则:{@code "大后天" } - */ - NormCurRelatedDayAfterThree("大后天", 0, "大后天"), - - - /** - * 上上周 - *
- * 正则:{@code "(?<=(上上(周|星期)))[1-7]?" } - */ - NormCurRelatedWeekBeforeLast("(?<=(上上(周|星期)))[1-7]?", 0, "上上周"), - - /** - * 上周 - *
- * 正则:{@code "(?<=((? - * 正则:{@code "(?<=((? - * 正则:{@code "(?<=((? - * 正则:{@code "(?<=(下下(周|星期)))[1-7]?" } - */ - NormCurRelatedWeekAfterNext("(?<=(下下(周|星期)))[1-7]?", 0, "下下周"), - - //================================nlp normHour================================ - /** - * 时字段 - *
- * 正则:{@code "(? - * 正则:{@code "凌晨" } - */ - NormHourDayBreak("凌晨", 0, "凌晨"), - - /** - * 早上 - *
- * 正则:{@code "早上|早晨|早间|晨间|今早|明早" } - */ - NormHourEarlyMorning("早上|早晨|早间|晨间|今早|明早", 0, "早上"), - - - /** - * 上午 - *
- * 正则:{@code "上午" } - */ - NormHourMorning("上午", 0, "上午"), - - /** - * 中午 - *
- * 正则:{@code "(中午)|(午间)" } - */ - NormHourNoon("(中午)|(午间)", 0, "中午"), - - /** - * 下午 - *
- * 正则:{@code "(下午)|(午后)|(pm)|(PM)" } - */ - NormHourAfternoon("(下午)|(午后)|(pm)|(PM)", 0, "下午"), - - /** - * 晚上 - *
- * 正则:{@code "晚上|夜间|夜里|今晚|明晚" } - */ - NormHourNight("晚上|夜间|夜里|今晚|明晚", 0, "晚上"), - - - //================================nlp normMinute================================ - /** - * 分字段 - *
- * 正则:{@code "([0-5]?[0-9](?=分(?!钟)))|((?<=((? - * 正则:{@code "(\\d+(分钟|分|min)[以之]?[前后])" } - */ - NormMinuteSpec("(\\d+(分钟|分|min)[以之]?[前后])", 0, "排除30分后"), - - /** - * 一刻 - *
- * 正则:{@code "(?<=[点时])[1一]刻(?!钟)" } - */ - NormMinuteOneQuarter("(?<=[点时])[1一]刻(?!钟)", 0, "一刻"), - - /** - * 半 - *
- * 正则:{@code "(?<=[点时])半" } - */ - NormMinuteHalf("(?<=[点时])半", 0, "点半"), - - /** - * 3刻 - *
- * 正则:{@code "(?<=[点时])[3三]刻(?!钟)" } - */ - NormMinuteThreeQuarter("(?<=[点时])[3三]刻(?!钟)", 0, "3刻"), - - - //================================nlp normSecond================================ - - /** - * 排除30秒后 - *
- * 正则:{@code "(\\d+(秒钟|秒|sec)[以之]?[前后])" } - */ - NormSecondSpec("(\\d+(秒钟|秒|sec)[以之]?[前后])", 0, "排除30秒后"), - - /** - * 秒字段 - *
- * 正则:{@code "([0-5]?[0-9](?=秒))|((?<=分)[0-5]?[0-9])" } - */ - NormSecond("([0-5]?[0-9](?=秒))|((?<=分)[0-5]?[0-9])", 0, "秒字段"), - - - //================================nlp normTotal================================ - - - /** - * 时分秒 - *
- * 正则:{@code "(? - * 正则:{@code "(? - * 正则:{@code "晚" } - */ - NormTotalNight("晚", 0, "晚上"), - - /** - * 年月日 - - *
- * 正则:{@code "[0-9]?[0-9]?[0-9]{2}-((10)|(11)|(12)|([1-9]))-((? - * 正则:{@code "((10)|(11)|(12)|([1-9]))/((? - * 正则:{@code "[0-9]?[0-9]?[0-9]{2}\\.((10)|(11)|(12)|([1-9]))\\.((? - * 正则:{@code "[的]+" } - */ - TextPreprocessSeparator("[的]+", 0, "凌晨"), - - /** - * 数字正则1 - *
- * 正则:{@code "[一二两三四五六七八九123456789]万[一二两三四五六七八九123456789](?!(千|百|十))" } - */ - TextPreprocessNumberTranslatorOne("[一二两三四五六七八九123456789]万[一二两三四五六七八九123456789](?!(千|百|十))", 0, "数字正则1"), - - /** - * 数字正则2 - *
- * 正则:{@code "[一二两三四五六七八九123456789]千[一二两三四五六七八九123456789](?!(百|十))" } - */ - TextPreprocessNumberTranslatorTwo("[一二两三四五六七八九123456789]千[一二两三四五六七八九123456789](?!(百|十))", 0, "数字正则2"), - - /** - * 数字正则3 - *
- * 正则:{@code "[一二两三四五六七八九123456789]百[一二两三四五六七八九123456789](?!十)" } - */ - TextPreprocessNumberTranslatorThree("[一二两三四五六七八九123456789]百[一二两三四五六七八九123456789](?!十)", 0, "数字正则3"), - - /** - * 数字正则4 - *
- * 正则:{@code "[零一二两三四五六七八九]" } - */ - TextPreprocessNumberTranslatorFour("[零一二两三四五六七八九]", 0, "数字正则4"), - - /** - * 数字正则5 - *
- * 正则:{@code "(?<=(周|星期))[末天日]" } - */ - TextPreprocessNumberTranslatorFive("(?<=(周|星期))[末天日]", 0, "数字正则5"), - - /** - * 数字正则6 - *
- * 正则:{@code "(? - * 正则:{@code "0?[1-9]百[0-9]?[0-9]?" } - */ - TextPreprocessNumberTranslatorSeven("0?[1-9]百[0-9]?[0-9]?", 0, "数字正则7"), - - /** - * 数字正则8 - *
- * 正则:{@code "0?[1-9]千[0-9]?[0-9]?[0-9]?" } - */ - TextPreprocessNumberTranslatorEight("0?[1-9]千[0-9]?[0-9]?[0-9]?", 0, "数字正则8"), - - /** - * 数字正则9 - *
- * 正则:{@code "[0-9]+万[0-9]?[0-9]?[0-9]?[0-9]?" } - */ - TextPreprocessNumberTranslatorNine("[0-9]+万[0-9]?[0-9]?[0-9]?[0-9]?", 0, "数字正则9"), - - /** - * 日号 - *
- * 正则:{@code "[日号]" } - */ - TextPreprocessDelDecimalStrSeparator("[日号]", 0, "日号"), - - /** - * 小数 - *
- * 正则:{@code "{0,1}\\d+\\.\\d*|{0,1}\\d*\\.\\d+" } - */ - TextPreprocessDelDecimalStr("{0,1}\\d+\\.\\d*|{0,1}\\d*\\.\\d+", 0, "小数"), - ; - - - private String rule; - - private int flags; - - private String desc; - - private RegexEnum(String rule, int flags, String desc) { - this.rule = rule; - this.flags = flags; - this.desc = desc; - } - - /** - * 先从缓存中查找正则,没有编译后放入缓存 - * - * @return Pattern - */ - public Pattern getPattern(){ - return RegexCache.get(this.rule, this.flags); - } - - public String getName() { - return this.name(); - } - - public String getRule() { - return rule; - } - - public int getFlags() { - return flags; - } - - public String getDesc() { - return desc; - } + + //================================nlp normStandardTime================================ + /** + * 标准时间 + *
+ * yyyy-MM-dd HH:mm:ss + * yyyy-MM-dd HH:mm + * yyyy-MM-dd + *+ *
+ * 正则: "\\d{4}-\\d{1,2}-\\d{1,2}( \\d{1,2}:\\d{1,2}(:\\d{1,2})?)?" + */ + NormStandard("\\d{4}-\\d{1,2}-\\d{1,2}( \\d{1,2}:\\d{1,2}(:\\d{1,2})?)?", 0, "标准时间"), + + /** + * 标准时间中文 + *
+ * yyyy年MM月dd日 HH:mm:ss + * yyyy年MM月dd日 HH:mm + * yyyy年MM月dd日 + *+ *
+ * 正则: "\\d{4}(年)\\d{1,2}(月)\\d{1,2}(日)( \\d{1,2}:\\d{1,2}(:\\d{1,2})?)?" + */ + NormStandardCn("\\d{4}(年)\\d{1,2}(月)\\d{1,2}(日)( \\d{1,2}:\\d{1,2}(:\\d{1,2})?)?", 0, "标准时间中文"), + + //================================nlp normYear================================ + /** + * 两位数来表示年份 + *
+ * 正则: "[0-9]{2}(?=年)" + */ + NormYearTwo("[0-9]{2}(?=年)", 0, "两位数来表示年份"), + /** + * 三位数和四位数表示的年份 + *
+ * 正则: "[0-9]?[0-9]{3}(?=年)" + */ + NormYearFour("[0-9]?[0-9]{3}(?=年)", 0, "三位数和四位数表示的年份"), + + //================================nlp normMonth================================ + /** + * 月字段 + *
+ * 正则: "((10)|(11)|(12)|([1-9]))(?=月)" + */ + NormMonth("((10)|(11)|(12)|([1-9]))(?=月)", 0, "月字段"), + + //================================nlp normDay================================ + /** + * 日字段 + *
+ * 正则:{@code "((? + * 正则: "((10)|(11)|(12)|([1-9]))(月|\\.|\\-)([0-3][0-9]|[1-9])" + */ + NormMonthFuzzyDay("((10)|(11)|(12)|([1-9]))(月|\\.|\\-)([0-3][0-9]|[1-9])", 0, "月-日 兼容模糊写法"), + + /** + * 月-日 兼容模糊写法分隔符 + *
+ * 正则: "(月|\\.|\\-)" + */ + NormMonthFuzzyDaySeparator("(月|\\.|\\-)", 0, "月-日 兼容模糊写法分隔符"), + + //================================nlp normBaseRelated================================ + /** + * 几天前 + *
+ * 正则:{@code "\\d+(?=天[以之]?前)" } + */ + NormBaseRelatedDayBefore("\\d+(?=天[以之]?前)", 0, "几天前"), + + /** + * 几天后 + *
+ * 正则:{@code "\\d+(?=天[以之]?后)" } + */ + NormBaseRelatedDayAfter("\\d+(?=天[以之]?后)", 0, "几天后"), + + /** + * 几月前 + *
+ * 正则:{@code "\\d+(?=(个)?月[以之]?前)" } + */ + NormBaseRelatedMonthBefore("\\d+(?=(个)?月[以之]?前)", 0, "几月前"), + + /** + * 几月后 + *
+ * 正则:{@code "\\d+(?=(个)?月[以之]?后)" } + */ + NormBaseRelatedMonthAfter("\\d+(?=(个)?月[以之]?后)", 0, "几月后"), + + /** + * 几年前 + *
+ * 正则:{@code "\\d+(?=年[以之]?前)" } + */ + NormBaseRelatedYearBefore("\\d+(?=年[以之]?前)", 0, "几年前"), + + /** + * 几年后 + *
+ * 正则:{@code "\\d+(?=年[以之]?后)" } + */ + NormBaseRelatedYearAfter("\\d+(?=年[以之]?后)", 0, "几年后"), + + //================================nlp normBaseTimeRelated================================ + + /** + * 3小时前 + *
+ * 正则:{@code "\\d+(?=个?半?(小时|钟头|h|H)[以之]?前)" } + */ + NormBaseTimeRelatedHourBefore("\\d+(?=个?半?(小时|钟头|h|H)[以之]?前)", 0, "3小时前"), + + /** + * 3小时后 + *
+ * 正则:{@code "\\d+(?=个?半?(小时|钟头|h|H)[以之]?后)" } + */ + NormBaseTimeRelatedHourAfter("\\d+(?=个?半?(小时|钟头|h|H)[以之]?后)", 0, "3小时后"), + + /** + * 半个小时前 + *
+ * 正则:{@code "半个?(小时|钟头)[以之]?前" } + */ + NormBaseTimeRelatedHalfHourBefore("半个?(小时|钟头)[以之]?前", 0, "半个小时前"), + + /** + * 半个小时后 + *
+ * 正则:{@code "半个?(小时|钟头)[以之]?后" } + */ + NormBaseTimeRelatedHalfHourAfter("半个?(小时|钟头)[以之]?后", 0, "半个小时后"), + + /** + * 1个小时 + *
+ * 正则:{@code "\\d+(?=个?半?(小时|钟头|h|H))" } + */ + NormBaseTimeRelatedHour("\\d+(?=个?半?(小时|钟头|h|H))", 0, "1个小时"), + + /** + * 3分钟前 + *
+ * 正则:{@code "\\d+(?=(分钟|分|min)[以之]?前)" } + */ + NormBaseTimeRelatedMinuteBefore("\\d+(?=(分钟|分|min)[以之]?前)", 0, "3分钟前"), + + /** + * 3分钟后 + *
+ * 正则:{@code "\\d+(?=(分钟|分|min)[以之]?后)" } + */ + NormBaseTimeRelatedMinuteAfter("\\d+(?=(分钟|分|min)[以之]?后)", 0, "3分钟后"), + + /** + * 1分钟 + *
+ * 正则:{@code "\\d+(?=(分钟|min))" } + */ + NormBaseTimeRelatedMinute("\\d+(?=(分钟|min))", 0, "1分钟"), + + /** + * 3秒钟前 + *
+ * 正则:{@code "\\d+(?=(秒钟|秒|sec)[以之]?前)" } + */ + NormBaseTimeRelatedSecondBefore("\\d+(?=(秒钟|秒|sec)[以之]?前)", 0, "3秒钟前"), + + /** + * 3秒钟后 + *
+ * 正则:{@code "\\d+(?=(秒钟|秒|sec)[以之]?后)" } + */ + NormBaseTimeRelatedSecondAfter("\\d+(?=(秒钟|秒|sec)[以之]?后)", 0, "3秒钟后"), + + //================================nlp normCurRelated================================ + + /** + * 前年 + *
+ * 正则:{@code "前年" } + */ + NormCurRelatedYearBeforeLast("前年", 0, "前年"), + + /** + * 去年 + *
+ * 正则:{@code "去年" } + */ + NormCurRelatedYearBefore("去年", 0, "去年"), + + /** + * 今年 + *
+ * 正则:{@code "今年" } + */ + NormCurRelatedYear("今年", 0, "今年"), + + /** + * 明年 + *
+ * 正则:{@code "明年" } + */ + NormCurRelatedYearAfter("明年", 0, "明年"), + + /** + * 后年 + *
+ * 正则:{@code "后年" } + */ + NormCurRelatedYearAfterNext("后年", 0, "后年"), + + /** + * 上个月 + *
+ * 正则:{@code "上(个)?月" } + */ + NormCurRelatedMonthBefore("上(个)?月", 0, "上个月"), + + /** + * 这个月 + *
+ * 正则:{@code "(本|这个)月" } + */ + NormCurRelatedMonth("(本|这个)月", 0, "这个月"), + + /** + * 下个月 + *
+ * 正则:{@code "下(个)?月" } + */ + NormCurRelatedMonthAfter("下(个)?月", 0, "下个月"), + + /** + * 大前天 + *
+ * 正则:{@code "大前天" } + */ + NormCurRelatedDayBeforeThree("大前天", 0, "大前天"), + + /** + * 前天 + *
+ * 正则:{@code "(? + * 正则:{@code "昨" } + */ + NormCurRelatedDayYesterday("昨", 0, "昨天"), + /** + * 今天 + *
+ * 正则:{@code "今(?!年)" } + */ + NormCurRelatedDayToday("今(?!年)", 0, "今天"), + + /** + * 明天 + *
+ * 正则:{@code "明(?!年)" } + */ + NormCurRelatedDayTomorrow("明(?!年)", 0, "明天"), + + /** + * 后天 + *
+ * 正则:{@code "(? + * 正则:{@code "大后天" } + */ + NormCurRelatedDayAfterThree("大后天", 0, "大后天"), + + /** + * 上上周 + *
+ * 正则:{@code "(?<=(上上(周|星期)))[1-7]?" } + */ + NormCurRelatedWeekBeforeLast("(?<=(上上(周|星期)))[1-7]?", 0, "上上周"), + + /** + * 上周 + *
+ * 正则:{@code "(?<=((? + * 正则:{@code "\d+(?=周[以之]?前)" } + */ + NormBaseRelatedWeekBefore("\\d+(?=周[以之]?前)", 0, "几周前"), + + /** + * 周一 二等 + *
+ * 正则:{@code "(?<=((? + * 正则:{@code "(?<=((? + * 正则:{@code "(?<=(下下(周|星期)))[1-7]?" } + */ + NormCurRelatedWeekAfterNext("(?<=(下下(周|星期)))[1-7]?", 0, "下下周"), + + //================================nlp normHour================================ + /** + * 时字段 + *
+ * 正则:{@code "(? + * 正则:{@code "凌晨" } + */ + NormHourDayBreak("凌晨", 0, "凌晨"), + + /** + * 早上 + *
+ * 正则:{@code "早上|早晨|早间|晨间|今早|明早" } + */ + NormHourEarlyMorning("早上|早晨|早间|晨间|今早|明早", 0, "早上"), + + /** + * 上午 + *
+ * 正则:{@code "上午" } + */ + NormHourMorning("上午", 0, "上午"), + + /** + * 中午 + *
+ * 正则:{@code "(中午)|(午间)" } + */ + NormHourNoon("(中午)|(午间)", 0, "中午"), + + /** + * 下午 + *
+ * 正则:{@code "(下午)|(午后)|(pm)|(PM)" } + */ + NormHourAfternoon("(下午)|(午后)|(pm)|(PM)", 0, "下午"), + + /** + * 晚上 + *
+ * 正则:{@code "晚上|夜间|夜里|今晚|明晚" } + */ + NormHourNight("晚上|夜间|夜里|今晚|明晚", 0, "晚上"), + + //================================nlp normMinute================================ + /** + * 分字段 + *
+ * 正则:{@code "([0-5]?[0-9](?=分(?!钟)))|((?<=((? + * 正则:{@code "(\\d+(分钟|分|min)[以之]?[前后])" } + */ + NormMinuteSpec("(\\d+(分钟|分|min)[以之]?[前后])", 0, "排除30分后"), + + /** + * 一刻 + *
+ * 正则:{@code "(?<=[点时])[1一]刻(?!钟)" } + */ + NormMinuteOneQuarter("(?<=[点时])[1一]刻(?!钟)", 0, "一刻"), + + /** + * 半 + *
+ * 正则:{@code "(?<=[点时])半" } + */ + NormMinuteHalf("(?<=[点时])半", 0, "点半"), + + /** + * 3刻 + *
+ * 正则:{@code "(?<=[点时])[3三]刻(?!钟)" } + */ + NormMinuteThreeQuarter("(?<=[点时])[3三]刻(?!钟)", 0, "3刻"), + + //================================nlp normSecond================================ + + /** + * 排除30秒后 + *
+ * 正则:{@code "(\\d+(秒钟|秒|sec)[以之]?[前后])" } + */ + NormSecondSpec("(\\d+(秒钟|秒|sec)[以之]?[前后])", 0, "排除30秒后"), + + /** + * 秒字段 + *
+ * 正则:{@code "([0-5]?[0-9](?=秒))|((?<=分)[0-5]?[0-9])" } + */ + NormSecond("([0-5]?[0-9](?=秒))|((?<=分)[0-5]?[0-9])", 0, "秒字段"), + + //================================nlp normTotal================================ + + /** + * 时分秒 + *
+ * 正则:{@code "(? + * 正则:{@code "(? + * 正则:{@code "晚" } + */ + NormTotalNight("晚", 0, "晚上"), + + /** + * 年月日 - + *
+ * 正则:{@code "[0-9]?[0-9]?[0-9]{2}-((10)|(11)|(12)|([1-9]))-((? + * 正则:{@code "((10)|(11)|(12)|([1-9]))/((? + * 正则:{@code "[0-9]?[0-9]?[0-9]{2}\\.((10)|(11)|(12)|([1-9]))\\.((? + * 正则:{@code "[的]+" } + */ + TextPreprocessSeparator("[的]+", 0, "凌晨"), + + /** + * 数字正则1 + *
+ * 正则:{@code "[一二两三四五六七八九123456789]万[一二两三四五六七八九123456789](?!(千|百|十))" } + */ + TextPreprocessNumberTranslatorOne("[一二两三四五六七八九123456789]万[一二两三四五六七八九123456789](?!(千|百|十))", 0, "数字正则1"), + + /** + * 数字正则2 + *
+ * 正则:{@code "[一二两三四五六七八九123456789]千[一二两三四五六七八九123456789](?!(百|十))" } + */ + TextPreprocessNumberTranslatorTwo("[一二两三四五六七八九123456789]千[一二两三四五六七八九123456789](?!(百|十))", 0, "数字正则2"), + + /** + * 数字正则3 + *
+ * 正则:{@code "[一二两三四五六七八九123456789]百[一二两三四五六七八九123456789](?!十)" } + */ + TextPreprocessNumberTranslatorThree("[一二两三四五六七八九123456789]百[一二两三四五六七八九123456789](?!十)", 0, "数字正则3"), + + /** + * 数字正则4 + *
+ * 正则:{@code "[零一二两三四五六七八九]" } + */ + TextPreprocessNumberTranslatorFour("[零一二两三四五六七八九]", 0, "数字正则4"), + + /** + * 数字正则5 + *
+ * 正则:{@code "(?<=(周|星期))[末天日]" } + */ + TextPreprocessNumberTranslatorFive("(?<=(周|星期))[末天日]", 0, "数字正则5"), + + /** + * 数字正则6 + *
+ * 正则:{@code "(? + * 正则:{@code "0?[1-9]百[0-9]?[0-9]?" } + */ + TextPreprocessNumberTranslatorSeven("0?[1-9]百[0-9]?[0-9]?", 0, "数字正则7"), + + /** + * 数字正则8 + *
+ * 正则:{@code "0?[1-9]千[0-9]?[0-9]?[0-9]?" } + */ + TextPreprocessNumberTranslatorEight("0?[1-9]千[0-9]?[0-9]?[0-9]?", 0, "数字正则8"), + + /** + * 数字正则9 + *
+ * 正则:{@code "[0-9]+万[0-9]?[0-9]?[0-9]?[0-9]?" } + */ + TextPreprocessNumberTranslatorNine("[0-9]+万[0-9]?[0-9]?[0-9]?[0-9]?", 0, "数字正则9"), + + /** + * 日号 + *
+ * 正则:{@code "[日号]" } + */ + TextPreprocessDelDecimalStrSeparator("[日号]", 0, "日号"), + + /** + * 小数 + *
+ * 正则:{@code "{0,1}\\d+\\.\\d*|{0,1}\\d*\\.\\d+" }
+ */
+ TextPreprocessDelDecimalStr("{0,1}\\d+\\.\\d*|{0,1}\\d*\\.\\d+", 0, "小数"),
+ ;
+
+ private String rule;
+
+ private int flags;
+
+ private String desc;
+
+ private RegexEnum(String rule, int flags, String desc) {
+ this.rule = rule;
+ this.flags = flags;
+ this.desc = desc;
+ }
+
+ /**
+ * 先从缓存中查找正则,没有编译后放入缓存
+ *
+ * @return Pattern
+ */
+ public Pattern getPattern() {
+ return RegexCache.get(this.rule, this.flags);
+ }
+
+ public String getName() {
+ return this.name();
+ }
+
+ public String getRule() {
+ return rule;
+ }
+
+ public int getFlags() {
+ return flags;
+ }
+
+ public String getDesc() {
+ return desc;
+ }
}
diff --git a/src/main/java/com/xkzhangsan/time/nlp/TimeNLP.java b/src/main/java/com/xkzhangsan/time/nlp/TimeNLP.java
index 0314332..e7ed034 100644
--- a/src/main/java/com/xkzhangsan/time/nlp/TimeNLP.java
+++ b/src/main/java/com/xkzhangsan/time/nlp/TimeNLP.java
@@ -22,6 +22,7 @@
* 修改自 https://github.com/shinyke/Time-NLP
*
* @author xkzhangsan
+ * @author buhuaqi
*/
public class TimeNLP {
@@ -531,6 +532,8 @@ else if (timeContext.getTunit()[3] == 12)
/**
* 设置以上文时间为基准的时间偏移计算,日期部分
+ *
+ * @DESC 2022-01-28 新增 ”几周前“ 偏移计算
*/
private void normBaseRelated() {
String[] timeGrid = new String[6];
@@ -543,8 +546,16 @@ private void normBaseRelated() {
boolean flag = false;//观察时间表达式是否因当前相关时间表达式而改变时间
- Pattern pattern = RegexEnum.NormBaseRelatedDayBefore.getPattern();
+ Pattern pattern = RegexEnum.NormBaseRelatedWeekBefore.getPattern();
Matcher match = pattern.matcher(timeExpression);
+ if (match.find()) {
+ flag = true;
+ int week = Integer.parseInt(match.group());
+ localDateTime = localDateTime.minusWeeks(week);
+ }
+
+ pattern = RegexEnum.NormBaseRelatedDayBefore.getPattern();
+ match = pattern.matcher(timeExpression);
if (match.find()) {
flag = true;
int day = Integer.parseInt(match.group());
@@ -919,18 +930,18 @@ private void normCurRelated() {
pattern = RegexEnum.NormCurRelatedWeek.getPattern();
match = pattern.matcher(timeExpression);
- if (match.find()) {
- flag[2] = true;
- int week;
- try {
- week = Integer.parseInt(match.group());
- } catch (NumberFormatException e) {
- week = 1;
+
+ try {
+ if (match.find()) {
+ int week = Integer.parseInt(match.group());
+ localDateTime = localDateTime.plusWeeks(0);
+ localDateTime = DateTimeCalculatorUtil.withDayOfWeek(localDateTime, week);
+ /**处理未来时间倾向 @author kexm*/
+ localDateTime = preferFutureWeek(week, localDateTime);
+ flag[2] = true;
}
- localDateTime = localDateTime.plusWeeks(0);
- localDateTime = DateTimeCalculatorUtil.withDayOfWeek(localDateTime, week);
- /**处理未来时间倾向 @author kexm*/
- localDateTime = preferFutureWeek(week, localDateTime);
+ } catch (Exception e) {
+ //
}
String s = DateTimeFormatterUtil.format(localDateTime, "yyyy-MM-dd-HH-mm-ss");
diff --git a/src/test/java/com/xkzhangsan/time/test/TimeNLPUtilTest.java b/src/test/java/com/xkzhangsan/time/test/TimeNLPUtilTest.java
index 7dc2c29..0fa3ae6 100644
--- a/src/test/java/com/xkzhangsan/time/test/TimeNLPUtilTest.java
+++ b/src/test/java/com/xkzhangsan/time/test/TimeNLPUtilTest.java
@@ -14,6 +14,7 @@
* 时间自然语言解析工具类测试
*
* @author xkzhangsan
+ * @author buhuaqi
*/
public class TimeNLPUtilTest {
@@ -23,11 +24,26 @@ public class TimeNLPUtilTest {
@Test
public void timeNLPTest() {
- List