diff --git a/src/main/java/com/xkzhangsan/time/enums/RegexEnum.java b/src/main/java/com/xkzhangsan/time/enums/RegexEnum.java index a6c8416..66ac5b8 100644 --- a/src/main/java/com/xkzhangsan/time/enums/RegexEnum.java +++ b/src/main/java/com/xkzhangsan/time/enums/RegexEnum.java @@ -1,643 +1,617 @@ package com.xkzhangsan.time.enums; -import java.util.regex.Pattern; - import com.xkzhangsan.time.utils.RegexCache; +import java.util.regex.Pattern; /** * 正则枚举 - * + * * @author xkzhangsan + * @author buhuaqi */ public enum RegexEnum { - - //================================nlp normStandardTime================================ - /** - * 标准时间 - *
-	 *     yyyy-MM-dd HH:mm:ss
-	 *     yyyy-MM-dd HH:mm
-	 *     yyyy-MM-dd
-	 * 
- *

- * 正则: "\\d{4}-\\d{1,2}-\\d{1,2}( \\d{1,2}:\\d{1,2}(:\\d{1,2})?)?" - */ - NormStandard("\\d{4}-\\d{1,2}-\\d{1,2}( \\d{1,2}:\\d{1,2}(:\\d{1,2})?)?", 0, "标准时间"), - - /** - * 标准时间中文 - *

-	 *     yyyy年MM月dd日 HH:mm:ss
-	 *     yyyy年MM月dd日 HH:mm
-	 *     yyyy年MM月dd日
-	 * 
- *

- * 正则: "\\d{4}(年)\\d{1,2}(月)\\d{1,2}(日)( \\d{1,2}:\\d{1,2}(:\\d{1,2})?)?" - */ - NormStandardCn("\\d{4}(年)\\d{1,2}(月)\\d{1,2}(日)( \\d{1,2}:\\d{1,2}(:\\d{1,2})?)?", 0, "标准时间中文"), - - - - //================================nlp normYear================================ - /** - * 两位数来表示年份 - *

- * 正则: "[0-9]{2}(?=年)" - */ - NormYearTwo("[0-9]{2}(?=年)", 0, "两位数来表示年份"), - /** - * 三位数和四位数表示的年份 - *

- * 正则: "[0-9]?[0-9]{3}(?=年)" - */ - NormYearFour("[0-9]?[0-9]{3}(?=年)", 0, "三位数和四位数表示的年份"), - - //================================nlp normMonth================================ - /** - * 月字段 - *

- * 正则: "((10)|(11)|(12)|([1-9]))(?=月)" - */ - NormMonth("((10)|(11)|(12)|([1-9]))(?=月)", 0, "月字段"), - - - //================================nlp normDay================================ - /** - * 日字段 - *

- * 正则:{@code "((? - * 正则: "((10)|(11)|(12)|([1-9]))(月|\\.|\\-)([0-3][0-9]|[1-9])" - */ - NormMonthFuzzyDay("((10)|(11)|(12)|([1-9]))(月|\\.|\\-)([0-3][0-9]|[1-9])", 0, "月-日 兼容模糊写法"), - - /** - * 月-日 兼容模糊写法分隔符 - *

- * 正则: "(月|\\.|\\-)" - */ - NormMonthFuzzyDaySeparator("(月|\\.|\\-)", 0, "月-日 兼容模糊写法分隔符"), - - - //================================nlp normBaseRelated================================ - /** - * 几天前 - *

- * 正则:{@code "\\d+(?=天[以之]?前)" } - */ - NormBaseRelatedDayBefore("\\d+(?=天[以之]?前)", 0, "几天前"), - - - /** - * 几天后 - *

- * 正则:{@code "\\d+(?=天[以之]?后)" } - */ - NormBaseRelatedDayAfter("\\d+(?=天[以之]?后)", 0, "几天后"), - - - /** - * 几月前 - *

- * 正则:{@code "\\d+(?=(个)?月[以之]?前)" } - */ - NormBaseRelatedMonthBefore("\\d+(?=(个)?月[以之]?前)", 0, "几月前"), - - - /** - * 几月后 - *

- * 正则:{@code "\\d+(?=(个)?月[以之]?后)" } - */ - NormBaseRelatedMonthAfter("\\d+(?=(个)?月[以之]?后)", 0, "几月后"), - - - /** - * 几年前 - *

- * 正则:{@code "\\d+(?=年[以之]?前)" } - */ - NormBaseRelatedYearBefore("\\d+(?=年[以之]?前)", 0, "几年前"), - - - /** - * 几年后 - *

- * 正则:{@code "\\d+(?=年[以之]?后)" } - */ - NormBaseRelatedYearAfter("\\d+(?=年[以之]?后)", 0, "几年后"), - - - //================================nlp normBaseTimeRelated================================ - - /** - * 3小时前 - *

- * 正则:{@code "\\d+(?=个?半?(小时|钟头|h|H)[以之]?前)" } - */ - NormBaseTimeRelatedHourBefore("\\d+(?=个?半?(小时|钟头|h|H)[以之]?前)", 0, "3小时前"), - - - /** - * 3小时后 - *

- * 正则:{@code "\\d+(?=个?半?(小时|钟头|h|H)[以之]?后)" } - */ - NormBaseTimeRelatedHourAfter("\\d+(?=个?半?(小时|钟头|h|H)[以之]?后)", 0, "3小时后"), - - - /** - * 半个小时前 - *

- * 正则:{@code "半个?(小时|钟头)[以之]?前" } - */ - NormBaseTimeRelatedHalfHourBefore("半个?(小时|钟头)[以之]?前", 0, "半个小时前"), - - - /** - * 半个小时后 - *

- * 正则:{@code "半个?(小时|钟头)[以之]?后" } - */ - NormBaseTimeRelatedHalfHourAfter("半个?(小时|钟头)[以之]?后", 0, "半个小时后"), - - - /** - * 1个小时 - *

- * 正则:{@code "\\d+(?=个?半?(小时|钟头|h|H))" } - */ - NormBaseTimeRelatedHour("\\d+(?=个?半?(小时|钟头|h|H))", 0, "1个小时"), - - /** - * 3分钟前 - *

- * 正则:{@code "\\d+(?=(分钟|分|min)[以之]?前)" } - */ - NormBaseTimeRelatedMinuteBefore("\\d+(?=(分钟|分|min)[以之]?前)", 0, "3分钟前"), - - - /** - * 3分钟后 - *

- * 正则:{@code "\\d+(?=(分钟|分|min)[以之]?后)" } - */ - NormBaseTimeRelatedMinuteAfter("\\d+(?=(分钟|分|min)[以之]?后)", 0, "3分钟后"), - - /** - * 1分钟 - *

- * 正则:{@code "\\d+(?=(分钟|min))" } - */ - NormBaseTimeRelatedMinute("\\d+(?=(分钟|min))", 0, "1分钟"), - - - /** - * 3秒钟前 - *

- * 正则:{@code "\\d+(?=(秒钟|秒|sec)[以之]?前)" } - */ - NormBaseTimeRelatedSecondBefore("\\d+(?=(秒钟|秒|sec)[以之]?前)", 0, "3秒钟前"), - - - /** - * 3秒钟后 - *

- * 正则:{@code "\\d+(?=(秒钟|秒|sec)[以之]?后)" } - */ - NormBaseTimeRelatedSecondAfter("\\d+(?=(秒钟|秒|sec)[以之]?后)", 0, "3秒钟后"), - - - //================================nlp normCurRelated================================ - - /** - * 前年 - *

- * 正则:{@code "前年" } - */ - NormCurRelatedYearBeforeLast("前年", 0, "前年"), - - /** - * 去年 - *

- * 正则:{@code "去年" } - */ - NormCurRelatedYearBefore("去年", 0, "去年"), - - /** - * 今年 - *

- * 正则:{@code "今年" } - */ - NormCurRelatedYear("今年", 0, "今年"), - - /** - * 明年 - *

- * 正则:{@code "明年" } - */ - NormCurRelatedYearAfter("明年", 0, "明年"), - - /** - * 后年 - *

- * 正则:{@code "后年" } - */ - NormCurRelatedYearAfterNext("后年", 0, "后年"), - - /** - * 上个月 - *

- * 正则:{@code "上(个)?月" } - */ - NormCurRelatedMonthBefore("上(个)?月", 0, "上个月"), - - /** - * 这个月 - *

- * 正则:{@code "(本|这个)月" } - */ - NormCurRelatedMonth("(本|这个)月", 0, "这个月"), - - /** - * 下个月 - *

- * 正则:{@code "下(个)?月" } - */ - NormCurRelatedMonthAfter("下(个)?月", 0, "下个月"), - - /** - * 大前天 - *

- * 正则:{@code "大前天" } - */ - NormCurRelatedDayBeforeThree("大前天", 0, "大前天"), - - /** - * 前天 - *

- * 正则:{@code "(? - * 正则:{@code "昨" } - */ - NormCurRelatedDayYesterday("昨", 0, "昨天"), - /** - * 今天 - *

- * 正则:{@code "今(?!年)" } - */ - NormCurRelatedDayToday("今(?!年)", 0, "今天"), - - /** - * 明天 - *

- * 正则:{@code "明(?!年)" } - */ - NormCurRelatedDayTomorrow("明(?!年)", 0, "明天"), - - /** - * 后天 - *

- * 正则:{@code "(? - * 正则:{@code "大后天" } - */ - NormCurRelatedDayAfterThree("大后天", 0, "大后天"), - - - /** - * 上上周 - *

- * 正则:{@code "(?<=(上上(周|星期)))[1-7]?" } - */ - NormCurRelatedWeekBeforeLast("(?<=(上上(周|星期)))[1-7]?", 0, "上上周"), - - /** - * 上周 - *

- * 正则:{@code "(?<=((? - * 正则:{@code "(?<=((? - * 正则:{@code "(?<=((? - * 正则:{@code "(?<=(下下(周|星期)))[1-7]?" } - */ - NormCurRelatedWeekAfterNext("(?<=(下下(周|星期)))[1-7]?", 0, "下下周"), - - //================================nlp normHour================================ - /** - * 时字段 - *

- * 正则:{@code "(? - * 正则:{@code "凌晨" } - */ - NormHourDayBreak("凌晨", 0, "凌晨"), - - /** - * 早上 - *

- * 正则:{@code "早上|早晨|早间|晨间|今早|明早" } - */ - NormHourEarlyMorning("早上|早晨|早间|晨间|今早|明早", 0, "早上"), - - - /** - * 上午 - *

- * 正则:{@code "上午" } - */ - NormHourMorning("上午", 0, "上午"), - - /** - * 中午 - *

- * 正则:{@code "(中午)|(午间)" } - */ - NormHourNoon("(中午)|(午间)", 0, "中午"), - - /** - * 下午 - *

- * 正则:{@code "(下午)|(午后)|(pm)|(PM)" } - */ - NormHourAfternoon("(下午)|(午后)|(pm)|(PM)", 0, "下午"), - - /** - * 晚上 - *

- * 正则:{@code "晚上|夜间|夜里|今晚|明晚" } - */ - NormHourNight("晚上|夜间|夜里|今晚|明晚", 0, "晚上"), - - - //================================nlp normMinute================================ - /** - * 分字段 - *

- * 正则:{@code "([0-5]?[0-9](?=分(?!钟)))|((?<=((? - * 正则:{@code "(\\d+(分钟|分|min)[以之]?[前后])" } - */ - NormMinuteSpec("(\\d+(分钟|分|min)[以之]?[前后])", 0, "排除30分后"), - - /** - * 一刻 - *

- * 正则:{@code "(?<=[点时])[1一]刻(?!钟)" } - */ - NormMinuteOneQuarter("(?<=[点时])[1一]刻(?!钟)", 0, "一刻"), - - /** - * 半 - *

- * 正则:{@code "(?<=[点时])半" } - */ - NormMinuteHalf("(?<=[点时])半", 0, "点半"), - - /** - * 3刻 - *

- * 正则:{@code "(?<=[点时])[3三]刻(?!钟)" } - */ - NormMinuteThreeQuarter("(?<=[点时])[3三]刻(?!钟)", 0, "3刻"), - - - //================================nlp normSecond================================ - - /** - * 排除30秒后 - *

- * 正则:{@code "(\\d+(秒钟|秒|sec)[以之]?[前后])" } - */ - NormSecondSpec("(\\d+(秒钟|秒|sec)[以之]?[前后])", 0, "排除30秒后"), - - /** - * 秒字段 - *

- * 正则:{@code "([0-5]?[0-9](?=秒))|((?<=分)[0-5]?[0-9])" } - */ - NormSecond("([0-5]?[0-9](?=秒))|((?<=分)[0-5]?[0-9])", 0, "秒字段"), - - - //================================nlp normTotal================================ - - - /** - * 时分秒 - *

- * 正则:{@code "(? - * 正则:{@code "(? - * 正则:{@code "晚" } - */ - NormTotalNight("晚", 0, "晚上"), - - /** - * 年月日 - - *

- * 正则:{@code "[0-9]?[0-9]?[0-9]{2}-((10)|(11)|(12)|([1-9]))-((? - * 正则:{@code "((10)|(11)|(12)|([1-9]))/((? - * 正则:{@code "[0-9]?[0-9]?[0-9]{2}\\.((10)|(11)|(12)|([1-9]))\\.((? - * 正则:{@code "[的]+" } - */ - TextPreprocessSeparator("[的]+", 0, "凌晨"), - - /** - * 数字正则1 - *

- * 正则:{@code "[一二两三四五六七八九123456789]万[一二两三四五六七八九123456789](?!(千|百|十))" } - */ - TextPreprocessNumberTranslatorOne("[一二两三四五六七八九123456789]万[一二两三四五六七八九123456789](?!(千|百|十))", 0, "数字正则1"), - - /** - * 数字正则2 - *

- * 正则:{@code "[一二两三四五六七八九123456789]千[一二两三四五六七八九123456789](?!(百|十))" } - */ - TextPreprocessNumberTranslatorTwo("[一二两三四五六七八九123456789]千[一二两三四五六七八九123456789](?!(百|十))", 0, "数字正则2"), - - /** - * 数字正则3 - *

- * 正则:{@code "[一二两三四五六七八九123456789]百[一二两三四五六七八九123456789](?!十)" } - */ - TextPreprocessNumberTranslatorThree("[一二两三四五六七八九123456789]百[一二两三四五六七八九123456789](?!十)", 0, "数字正则3"), - - /** - * 数字正则4 - *

- * 正则:{@code "[零一二两三四五六七八九]" } - */ - TextPreprocessNumberTranslatorFour("[零一二两三四五六七八九]", 0, "数字正则4"), - - /** - * 数字正则5 - *

- * 正则:{@code "(?<=(周|星期))[末天日]" } - */ - TextPreprocessNumberTranslatorFive("(?<=(周|星期))[末天日]", 0, "数字正则5"), - - /** - * 数字正则6 - *

- * 正则:{@code "(? - * 正则:{@code "0?[1-9]百[0-9]?[0-9]?" } - */ - TextPreprocessNumberTranslatorSeven("0?[1-9]百[0-9]?[0-9]?", 0, "数字正则7"), - - /** - * 数字正则8 - *

- * 正则:{@code "0?[1-9]千[0-9]?[0-9]?[0-9]?" } - */ - TextPreprocessNumberTranslatorEight("0?[1-9]千[0-9]?[0-9]?[0-9]?", 0, "数字正则8"), - - /** - * 数字正则9 - *

- * 正则:{@code "[0-9]+万[0-9]?[0-9]?[0-9]?[0-9]?" } - */ - TextPreprocessNumberTranslatorNine("[0-9]+万[0-9]?[0-9]?[0-9]?[0-9]?", 0, "数字正则9"), - - /** - * 日号 - *

- * 正则:{@code "[日号]" } - */ - TextPreprocessDelDecimalStrSeparator("[日号]", 0, "日号"), - - /** - * 小数 - *

- * 正则:{@code "{0,1}\\d+\\.\\d*|{0,1}\\d*\\.\\d+" } - */ - TextPreprocessDelDecimalStr("{0,1}\\d+\\.\\d*|{0,1}\\d*\\.\\d+", 0, "小数"), - ; - - - private String rule; - - private int flags; - - private String desc; - - private RegexEnum(String rule, int flags, String desc) { - this.rule = rule; - this.flags = flags; - this.desc = desc; - } - - /** - * 先从缓存中查找正则,没有编译后放入缓存 - * - * @return Pattern - */ - public Pattern getPattern(){ - return RegexCache.get(this.rule, this.flags); - } - - public String getName() { - return this.name(); - } - - public String getRule() { - return rule; - } - - public int getFlags() { - return flags; - } - - public String getDesc() { - return desc; - } + + //================================nlp normStandardTime================================ + /** + * 标准时间 + *

+     *     yyyy-MM-dd HH:mm:ss
+     *     yyyy-MM-dd HH:mm
+     *     yyyy-MM-dd
+     * 
+ *

+ * 正则: "\\d{4}-\\d{1,2}-\\d{1,2}( \\d{1,2}:\\d{1,2}(:\\d{1,2})?)?" + */ + NormStandard("\\d{4}-\\d{1,2}-\\d{1,2}( \\d{1,2}:\\d{1,2}(:\\d{1,2})?)?", 0, "标准时间"), + + /** + * 标准时间中文 + *

+     *     yyyy年MM月dd日 HH:mm:ss
+     *     yyyy年MM月dd日 HH:mm
+     *     yyyy年MM月dd日
+     * 
+ *

+ * 正则: "\\d{4}(年)\\d{1,2}(月)\\d{1,2}(日)( \\d{1,2}:\\d{1,2}(:\\d{1,2})?)?" + */ + NormStandardCn("\\d{4}(年)\\d{1,2}(月)\\d{1,2}(日)( \\d{1,2}:\\d{1,2}(:\\d{1,2})?)?", 0, "标准时间中文"), + + //================================nlp normYear================================ + /** + * 两位数来表示年份 + *

+ * 正则: "[0-9]{2}(?=年)" + */ + NormYearTwo("[0-9]{2}(?=年)", 0, "两位数来表示年份"), + /** + * 三位数和四位数表示的年份 + *

+ * 正则: "[0-9]?[0-9]{3}(?=年)" + */ + NormYearFour("[0-9]?[0-9]{3}(?=年)", 0, "三位数和四位数表示的年份"), + + //================================nlp normMonth================================ + /** + * 月字段 + *

+ * 正则: "((10)|(11)|(12)|([1-9]))(?=月)" + */ + NormMonth("((10)|(11)|(12)|([1-9]))(?=月)", 0, "月字段"), + + //================================nlp normDay================================ + /** + * 日字段 + *

+ * 正则:{@code "((? + * 正则: "((10)|(11)|(12)|([1-9]))(月|\\.|\\-)([0-3][0-9]|[1-9])" + */ + NormMonthFuzzyDay("((10)|(11)|(12)|([1-9]))(月|\\.|\\-)([0-3][0-9]|[1-9])", 0, "月-日 兼容模糊写法"), + + /** + * 月-日 兼容模糊写法分隔符 + *

+ * 正则: "(月|\\.|\\-)" + */ + NormMonthFuzzyDaySeparator("(月|\\.|\\-)", 0, "月-日 兼容模糊写法分隔符"), + + //================================nlp normBaseRelated================================ + /** + * 几天前 + *

+ * 正则:{@code "\\d+(?=天[以之]?前)" } + */ + NormBaseRelatedDayBefore("\\d+(?=天[以之]?前)", 0, "几天前"), + + /** + * 几天后 + *

+ * 正则:{@code "\\d+(?=天[以之]?后)" } + */ + NormBaseRelatedDayAfter("\\d+(?=天[以之]?后)", 0, "几天后"), + + /** + * 几月前 + *

+ * 正则:{@code "\\d+(?=(个)?月[以之]?前)" } + */ + NormBaseRelatedMonthBefore("\\d+(?=(个)?月[以之]?前)", 0, "几月前"), + + /** + * 几月后 + *

+ * 正则:{@code "\\d+(?=(个)?月[以之]?后)" } + */ + NormBaseRelatedMonthAfter("\\d+(?=(个)?月[以之]?后)", 0, "几月后"), + + /** + * 几年前 + *

+ * 正则:{@code "\\d+(?=年[以之]?前)" } + */ + NormBaseRelatedYearBefore("\\d+(?=年[以之]?前)", 0, "几年前"), + + /** + * 几年后 + *

+ * 正则:{@code "\\d+(?=年[以之]?后)" } + */ + NormBaseRelatedYearAfter("\\d+(?=年[以之]?后)", 0, "几年后"), + + //================================nlp normBaseTimeRelated================================ + + /** + * 3小时前 + *

+ * 正则:{@code "\\d+(?=个?半?(小时|钟头|h|H)[以之]?前)" } + */ + NormBaseTimeRelatedHourBefore("\\d+(?=个?半?(小时|钟头|h|H)[以之]?前)", 0, "3小时前"), + + /** + * 3小时后 + *

+ * 正则:{@code "\\d+(?=个?半?(小时|钟头|h|H)[以之]?后)" } + */ + NormBaseTimeRelatedHourAfter("\\d+(?=个?半?(小时|钟头|h|H)[以之]?后)", 0, "3小时后"), + + /** + * 半个小时前 + *

+ * 正则:{@code "半个?(小时|钟头)[以之]?前" } + */ + NormBaseTimeRelatedHalfHourBefore("半个?(小时|钟头)[以之]?前", 0, "半个小时前"), + + /** + * 半个小时后 + *

+ * 正则:{@code "半个?(小时|钟头)[以之]?后" } + */ + NormBaseTimeRelatedHalfHourAfter("半个?(小时|钟头)[以之]?后", 0, "半个小时后"), + + /** + * 1个小时 + *

+ * 正则:{@code "\\d+(?=个?半?(小时|钟头|h|H))" } + */ + NormBaseTimeRelatedHour("\\d+(?=个?半?(小时|钟头|h|H))", 0, "1个小时"), + + /** + * 3分钟前 + *

+ * 正则:{@code "\\d+(?=(分钟|分|min)[以之]?前)" } + */ + NormBaseTimeRelatedMinuteBefore("\\d+(?=(分钟|分|min)[以之]?前)", 0, "3分钟前"), + + /** + * 3分钟后 + *

+ * 正则:{@code "\\d+(?=(分钟|分|min)[以之]?后)" } + */ + NormBaseTimeRelatedMinuteAfter("\\d+(?=(分钟|分|min)[以之]?后)", 0, "3分钟后"), + + /** + * 1分钟 + *

+ * 正则:{@code "\\d+(?=(分钟|min))" } + */ + NormBaseTimeRelatedMinute("\\d+(?=(分钟|min))", 0, "1分钟"), + + /** + * 3秒钟前 + *

+ * 正则:{@code "\\d+(?=(秒钟|秒|sec)[以之]?前)" } + */ + NormBaseTimeRelatedSecondBefore("\\d+(?=(秒钟|秒|sec)[以之]?前)", 0, "3秒钟前"), + + /** + * 3秒钟后 + *

+ * 正则:{@code "\\d+(?=(秒钟|秒|sec)[以之]?后)" } + */ + NormBaseTimeRelatedSecondAfter("\\d+(?=(秒钟|秒|sec)[以之]?后)", 0, "3秒钟后"), + + //================================nlp normCurRelated================================ + + /** + * 前年 + *

+ * 正则:{@code "前年" } + */ + NormCurRelatedYearBeforeLast("前年", 0, "前年"), + + /** + * 去年 + *

+ * 正则:{@code "去年" } + */ + NormCurRelatedYearBefore("去年", 0, "去年"), + + /** + * 今年 + *

+ * 正则:{@code "今年" } + */ + NormCurRelatedYear("今年", 0, "今年"), + + /** + * 明年 + *

+ * 正则:{@code "明年" } + */ + NormCurRelatedYearAfter("明年", 0, "明年"), + + /** + * 后年 + *

+ * 正则:{@code "后年" } + */ + NormCurRelatedYearAfterNext("后年", 0, "后年"), + + /** + * 上个月 + *

+ * 正则:{@code "上(个)?月" } + */ + NormCurRelatedMonthBefore("上(个)?月", 0, "上个月"), + + /** + * 这个月 + *

+ * 正则:{@code "(本|这个)月" } + */ + NormCurRelatedMonth("(本|这个)月", 0, "这个月"), + + /** + * 下个月 + *

+ * 正则:{@code "下(个)?月" } + */ + NormCurRelatedMonthAfter("下(个)?月", 0, "下个月"), + + /** + * 大前天 + *

+ * 正则:{@code "大前天" } + */ + NormCurRelatedDayBeforeThree("大前天", 0, "大前天"), + + /** + * 前天 + *

+ * 正则:{@code "(? + * 正则:{@code "昨" } + */ + NormCurRelatedDayYesterday("昨", 0, "昨天"), + /** + * 今天 + *

+ * 正则:{@code "今(?!年)" } + */ + NormCurRelatedDayToday("今(?!年)", 0, "今天"), + + /** + * 明天 + *

+ * 正则:{@code "明(?!年)" } + */ + NormCurRelatedDayTomorrow("明(?!年)", 0, "明天"), + + /** + * 后天 + *

+ * 正则:{@code "(? + * 正则:{@code "大后天" } + */ + NormCurRelatedDayAfterThree("大后天", 0, "大后天"), + + /** + * 上上周 + *

+ * 正则:{@code "(?<=(上上(周|星期)))[1-7]?" } + */ + NormCurRelatedWeekBeforeLast("(?<=(上上(周|星期)))[1-7]?", 0, "上上周"), + + /** + * 上周 + *

+ * 正则:{@code "(?<=((? + * 正则:{@code "\d+(?=周[以之]?前)" } + */ + NormBaseRelatedWeekBefore("\\d+(?=周[以之]?前)", 0, "几周前"), + + /** + * 周一 二等 + *

+ * 正则:{@code "(?<=((? + * 正则:{@code "(?<=((? + * 正则:{@code "(?<=(下下(周|星期)))[1-7]?" } + */ + NormCurRelatedWeekAfterNext("(?<=(下下(周|星期)))[1-7]?", 0, "下下周"), + + //================================nlp normHour================================ + /** + * 时字段 + *

+ * 正则:{@code "(? + * 正则:{@code "凌晨" } + */ + NormHourDayBreak("凌晨", 0, "凌晨"), + + /** + * 早上 + *

+ * 正则:{@code "早上|早晨|早间|晨间|今早|明早" } + */ + NormHourEarlyMorning("早上|早晨|早间|晨间|今早|明早", 0, "早上"), + + /** + * 上午 + *

+ * 正则:{@code "上午" } + */ + NormHourMorning("上午", 0, "上午"), + + /** + * 中午 + *

+ * 正则:{@code "(中午)|(午间)" } + */ + NormHourNoon("(中午)|(午间)", 0, "中午"), + + /** + * 下午 + *

+ * 正则:{@code "(下午)|(午后)|(pm)|(PM)" } + */ + NormHourAfternoon("(下午)|(午后)|(pm)|(PM)", 0, "下午"), + + /** + * 晚上 + *

+ * 正则:{@code "晚上|夜间|夜里|今晚|明晚" } + */ + NormHourNight("晚上|夜间|夜里|今晚|明晚", 0, "晚上"), + + //================================nlp normMinute================================ + /** + * 分字段 + *

+ * 正则:{@code "([0-5]?[0-9](?=分(?!钟)))|((?<=((? + * 正则:{@code "(\\d+(分钟|分|min)[以之]?[前后])" } + */ + NormMinuteSpec("(\\d+(分钟|分|min)[以之]?[前后])", 0, "排除30分后"), + + /** + * 一刻 + *

+ * 正则:{@code "(?<=[点时])[1一]刻(?!钟)" } + */ + NormMinuteOneQuarter("(?<=[点时])[1一]刻(?!钟)", 0, "一刻"), + + /** + * 半 + *

+ * 正则:{@code "(?<=[点时])半" } + */ + NormMinuteHalf("(?<=[点时])半", 0, "点半"), + + /** + * 3刻 + *

+ * 正则:{@code "(?<=[点时])[3三]刻(?!钟)" } + */ + NormMinuteThreeQuarter("(?<=[点时])[3三]刻(?!钟)", 0, "3刻"), + + //================================nlp normSecond================================ + + /** + * 排除30秒后 + *

+ * 正则:{@code "(\\d+(秒钟|秒|sec)[以之]?[前后])" } + */ + NormSecondSpec("(\\d+(秒钟|秒|sec)[以之]?[前后])", 0, "排除30秒后"), + + /** + * 秒字段 + *

+ * 正则:{@code "([0-5]?[0-9](?=秒))|((?<=分)[0-5]?[0-9])" } + */ + NormSecond("([0-5]?[0-9](?=秒))|((?<=分)[0-5]?[0-9])", 0, "秒字段"), + + //================================nlp normTotal================================ + + /** + * 时分秒 + *

+ * 正则:{@code "(? + * 正则:{@code "(? + * 正则:{@code "晚" } + */ + NormTotalNight("晚", 0, "晚上"), + + /** + * 年月日 - + *

+ * 正则:{@code "[0-9]?[0-9]?[0-9]{2}-((10)|(11)|(12)|([1-9]))-((? + * 正则:{@code "((10)|(11)|(12)|([1-9]))/((? + * 正则:{@code "[0-9]?[0-9]?[0-9]{2}\\.((10)|(11)|(12)|([1-9]))\\.((? + * 正则:{@code "[的]+" } + */ + TextPreprocessSeparator("[的]+", 0, "凌晨"), + + /** + * 数字正则1 + *

+ * 正则:{@code "[一二两三四五六七八九123456789]万[一二两三四五六七八九123456789](?!(千|百|十))" } + */ + TextPreprocessNumberTranslatorOne("[一二两三四五六七八九123456789]万[一二两三四五六七八九123456789](?!(千|百|十))", 0, "数字正则1"), + + /** + * 数字正则2 + *

+ * 正则:{@code "[一二两三四五六七八九123456789]千[一二两三四五六七八九123456789](?!(百|十))" } + */ + TextPreprocessNumberTranslatorTwo("[一二两三四五六七八九123456789]千[一二两三四五六七八九123456789](?!(百|十))", 0, "数字正则2"), + + /** + * 数字正则3 + *

+ * 正则:{@code "[一二两三四五六七八九123456789]百[一二两三四五六七八九123456789](?!十)" } + */ + TextPreprocessNumberTranslatorThree("[一二两三四五六七八九123456789]百[一二两三四五六七八九123456789](?!十)", 0, "数字正则3"), + + /** + * 数字正则4 + *

+ * 正则:{@code "[零一二两三四五六七八九]" } + */ + TextPreprocessNumberTranslatorFour("[零一二两三四五六七八九]", 0, "数字正则4"), + + /** + * 数字正则5 + *

+ * 正则:{@code "(?<=(周|星期))[末天日]" } + */ + TextPreprocessNumberTranslatorFive("(?<=(周|星期))[末天日]", 0, "数字正则5"), + + /** + * 数字正则6 + *

+ * 正则:{@code "(? + * 正则:{@code "0?[1-9]百[0-9]?[0-9]?" } + */ + TextPreprocessNumberTranslatorSeven("0?[1-9]百[0-9]?[0-9]?", 0, "数字正则7"), + + /** + * 数字正则8 + *

+ * 正则:{@code "0?[1-9]千[0-9]?[0-9]?[0-9]?" } + */ + TextPreprocessNumberTranslatorEight("0?[1-9]千[0-9]?[0-9]?[0-9]?", 0, "数字正则8"), + + /** + * 数字正则9 + *

+ * 正则:{@code "[0-9]+万[0-9]?[0-9]?[0-9]?[0-9]?" } + */ + TextPreprocessNumberTranslatorNine("[0-9]+万[0-9]?[0-9]?[0-9]?[0-9]?", 0, "数字正则9"), + + /** + * 日号 + *

+ * 正则:{@code "[日号]" } + */ + TextPreprocessDelDecimalStrSeparator("[日号]", 0, "日号"), + + /** + * 小数 + *

+ * 正则:{@code "{0,1}\\d+\\.\\d*|{0,1}\\d*\\.\\d+" } + */ + TextPreprocessDelDecimalStr("{0,1}\\d+\\.\\d*|{0,1}\\d*\\.\\d+", 0, "小数"), + ; + + private String rule; + + private int flags; + + private String desc; + + private RegexEnum(String rule, int flags, String desc) { + this.rule = rule; + this.flags = flags; + this.desc = desc; + } + + /** + * 先从缓存中查找正则,没有编译后放入缓存 + * + * @return Pattern + */ + public Pattern getPattern() { + return RegexCache.get(this.rule, this.flags); + } + + public String getName() { + return this.name(); + } + + public String getRule() { + return rule; + } + + public int getFlags() { + return flags; + } + + public String getDesc() { + return desc; + } } diff --git a/src/main/java/com/xkzhangsan/time/nlp/TimeNLP.java b/src/main/java/com/xkzhangsan/time/nlp/TimeNLP.java index 0314332..e7ed034 100644 --- a/src/main/java/com/xkzhangsan/time/nlp/TimeNLP.java +++ b/src/main/java/com/xkzhangsan/time/nlp/TimeNLP.java @@ -22,6 +22,7 @@ * 修改自 https://github.com/shinyke/Time-NLP * * @author xkzhangsan + * @author buhuaqi */ public class TimeNLP { @@ -531,6 +532,8 @@ else if (timeContext.getTunit()[3] == 12) /** * 设置以上文时间为基准的时间偏移计算,日期部分 + * + * @DESC 2022-01-28 新增 ”几周前“ 偏移计算 */ private void normBaseRelated() { String[] timeGrid = new String[6]; @@ -543,8 +546,16 @@ private void normBaseRelated() { boolean flag = false;//观察时间表达式是否因当前相关时间表达式而改变时间 - Pattern pattern = RegexEnum.NormBaseRelatedDayBefore.getPattern(); + Pattern pattern = RegexEnum.NormBaseRelatedWeekBefore.getPattern(); Matcher match = pattern.matcher(timeExpression); + if (match.find()) { + flag = true; + int week = Integer.parseInt(match.group()); + localDateTime = localDateTime.minusWeeks(week); + } + + pattern = RegexEnum.NormBaseRelatedDayBefore.getPattern(); + match = pattern.matcher(timeExpression); if (match.find()) { flag = true; int day = Integer.parseInt(match.group()); @@ -919,18 +930,18 @@ private void normCurRelated() { pattern = RegexEnum.NormCurRelatedWeek.getPattern(); match = pattern.matcher(timeExpression); - if (match.find()) { - flag[2] = true; - int week; - try { - week = Integer.parseInt(match.group()); - } catch (NumberFormatException e) { - week = 1; + + try { + if (match.find()) { + int week = Integer.parseInt(match.group()); + localDateTime = localDateTime.plusWeeks(0); + localDateTime = DateTimeCalculatorUtil.withDayOfWeek(localDateTime, week); + /**处理未来时间倾向 @author kexm*/ + localDateTime = preferFutureWeek(week, localDateTime); + flag[2] = true; } - localDateTime = localDateTime.plusWeeks(0); - localDateTime = DateTimeCalculatorUtil.withDayOfWeek(localDateTime, week); - /**处理未来时间倾向 @author kexm*/ - localDateTime = preferFutureWeek(week, localDateTime); + } catch (Exception e) { + // } String s = DateTimeFormatterUtil.format(localDateTime, "yyyy-MM-dd-HH-mm-ss"); diff --git a/src/test/java/com/xkzhangsan/time/test/TimeNLPUtilTest.java b/src/test/java/com/xkzhangsan/time/test/TimeNLPUtilTest.java index 7dc2c29..0fa3ae6 100644 --- a/src/test/java/com/xkzhangsan/time/test/TimeNLPUtilTest.java +++ b/src/test/java/com/xkzhangsan/time/test/TimeNLPUtilTest.java @@ -14,6 +14,7 @@ * 时间自然语言解析工具类测试 * * @author xkzhangsan + * @author buhuaqi */ public class TimeNLPUtilTest { @@ -23,11 +24,26 @@ public class TimeNLPUtilTest { @Test public void timeNLPTest() { - List timeNLPList = TimeNLPUtil.parse("去年5月"); + List timeNLPList = TimeNLPUtil.parse("记得3周前的下午5点我们一起去郊游"); + System.out.println("记得3周前的下午5点我们一起去郊游"); + System.out.println(DateTimeFormatterUtil.formatToDateTimeStr(timeNLPList.get(0).getTime()) + "-" + + timeNLPList.get(0).getIsAllDayTime()); + + timeNLPList = TimeNLPUtil.parse("记得四周前我们一起去郊游"); + System.out.println("记得四周前我们一起去郊游"); + System.out.println(DateTimeFormatterUtil.formatToDateTimeStr(timeNLPList.get(0).getTime()) + "-" + + timeNLPList.get(0).getIsAllDayTime()); + + timeNLPList = TimeNLPUtil.parse("记得上周三下午3点我们一起去郊游"); + System.out.println("记得上周三下午3点我们一起去郊游"); + System.out.println(DateTimeFormatterUtil.formatToDateTimeStr(timeNLPList.get(0).getTime()) + "-" + + timeNLPList.get(0).getIsAllDayTime()); + + timeNLPList = TimeNLPUtil.parse("去年5月"); System.out.println("去年5月"); System.out.println(DateTimeFormatterUtil.formatToDateTimeStr(timeNLPList.get(0).getTime()) + "-" + timeNLPList.get(0).getIsAllDayTime()); - + timeNLPList = TimeNLPUtil.parse("上月21日"); System.out.println("上月21日"); System.out.println(DateTimeFormatterUtil.formatToDateTimeStr(timeNLPList.get(0).getTime()) + "-" @@ -135,7 +151,7 @@ public void normStandardTimeTest() { System.out.println("2016-07-19 15:30对应时间"); System.out.println(DateTimeFormatterUtil.formatToDateTimeStr(timeNLPList.get(0).getTime()) + "-" + timeNLPList.get(0).getIsAllDayTime()); - + timeNLPList = TimeNLPUtil.parse("2016年07月19日对应时间"); System.out.println("2016年07月19日对应时间"); System.out.println(DateTimeFormatterUtil.formatToDateTimeStr(timeNLPList.get(0).getTime()) + "-" @@ -208,11 +224,11 @@ public void normHourTest() { System.out.println("1个半小时前对应时间"); System.out.println(DateTimeFormatterUtil.formatToDateTimeStr(timeNLPList.get(0).getTime()) + "-" + timeNLPList.get(0).getIsAllDayTime()); - + timeNLPList = TimeNLPUtil.parse("1小时50分钟10秒后对应时间"); System.out.println("1小时50分钟10秒后对应时间"); System.out.println(DateTimeFormatterUtil.formatToDateTimeStr(timeNLPList.get(0).getTime()) + "-" - + timeNLPList.get(0).getIsAllDayTime()); + + timeNLPList.get(0).getIsAllDayTime()); } /**