From d61372510ac7161a46534d18c772a5ba6d466ca5 Mon Sep 17 00:00:00 2001 From: Simon Tzanakis Date: Tue, 3 Oct 2023 12:54:58 +0200 Subject: [PATCH] MET-5132: Update code after answers from rnd --- .../dates/edtf/AbstractEdtfDate.java | 8 ++- .../dates/edtf/DateQualification.java | 58 +++++++++++++------ .../dates/edtf/InstantEdtfDate.java | 23 ++++---- .../dates/edtf/InstantEdtfDateBuilder.java | 18 +++--- .../dates/edtf/IntervalEdtfDate.java | 25 ++++++-- .../extractors/AbstractDateExtractor.java | 11 +++- .../extractors/BriefRangeDateExtractor.java | 15 ++--- .../CenturyNumericDateExtractor.java | 5 +- .../extractors/CenturyRomanDateExtractor.java | 4 +- .../extractors/DecadeDateExtractor.java | 8 +-- .../extractors/EdtfDateExtractor.java | 16 ++--- .../LongNegativeYearDateExtractor.java | 7 +-- .../extractors/NumericPartsDateExtractor.java | 15 ++--- .../normalizers/DatesNormalizer.java | 6 +- .../BriefRangeDateExtractorTest.java | 20 +++---- .../extractors/DateExtractorTest.java | 13 +++-- ...ongNegativeYearRangeDateExtractorTest.java | 5 -- 17 files changed, 146 insertions(+), 111 deletions(-) diff --git a/metis-normalization/src/main/java/eu/europeana/normalization/dates/edtf/AbstractEdtfDate.java b/metis-normalization/src/main/java/eu/europeana/normalization/dates/edtf/AbstractEdtfDate.java index d79390326..9c79e906e 100644 --- a/metis-normalization/src/main/java/eu/europeana/normalization/dates/edtf/AbstractEdtfDate.java +++ b/metis-normalization/src/main/java/eu/europeana/normalization/dates/edtf/AbstractEdtfDate.java @@ -1,5 +1,7 @@ package eu.europeana.normalization.dates.edtf; +import java.util.Set; + /** * An abstract class that contains the template that an EDTF date with compliance level 1 should implement. *

See more in the specification of EDTF

@@ -18,17 +20,17 @@ protected AbstractEdtfDate(String label) { } /** - * Overwrite the date qualification, mainly used for pre-sanitized values. + * Add the date qualification, mainly used for pre-sanitized values. * * @param dateQualification the date qualification */ - public abstract void overwriteQualification(DateQualification dateQualification); + public abstract void addQualification(DateQualification dateQualification); public String getLabel() { return label; } - public abstract DateQualification getDateQualification(); + public abstract Set getDateQualifications(); public abstract boolean isOpen(); diff --git a/metis-normalization/src/main/java/eu/europeana/normalization/dates/edtf/DateQualification.java b/metis-normalization/src/main/java/eu/europeana/normalization/dates/edtf/DateQualification.java index d49007735..1202eb6e7 100644 --- a/metis-normalization/src/main/java/eu/europeana/normalization/dates/edtf/DateQualification.java +++ b/metis-normalization/src/main/java/eu/europeana/normalization/dates/edtf/DateQualification.java @@ -1,6 +1,7 @@ package eu.europeana.normalization.dates.edtf; -import java.util.Arrays; +import java.util.EnumSet; +import java.util.Set; import java.util.regex.Pattern; /** @@ -8,32 +9,53 @@ * Specification */ public enum DateQualification { + UNCERTAIN, APPROXIMATE; - NO_QUALIFICATION(""), - UNCERTAIN("?"), - APPROXIMATE("~"), - UNCERTAIN_APPROXIMATE("%"); - - public static final Pattern CHECK_QUALIFICATION_PATTERN = Pattern.compile("^[^?~%]*([?~%])$"); - private final String character; - - DateQualification(String character) { - this.character = character; - } + private static final String CHARACTER_UNCERTAIN = "?"; + private static final String CHARACTER_APPROXIMATE = "~"; + private static final String CHARACTER_UNCERTAIN_APPROXIMATE = "%"; + private static final String QUALIFICATION_CHARACTER_REGEX = + CHARACTER_UNCERTAIN + CHARACTER_APPROXIMATE + CHARACTER_UNCERTAIN_APPROXIMATE; + public static final Pattern CHECK_QUALIFICATION_PATTERN = Pattern.compile( + "^[^" + QUALIFICATION_CHARACTER_REGEX + "]*([" + QUALIFICATION_CHARACTER_REGEX + "])$"); /** - * Get the enum value based on the character provided. - *

It will return a matched enum value or {@link #NO_QUALIFICATION}.

+ * Get the enum values based on the character provided. + *

It will return an empty set or the set with the applicable qualifications.

* * @param character the provided character * @return the enum value */ - public static DateQualification fromCharacter(String character) { - return Arrays.stream(DateQualification.values()).filter(value -> value.character.equals(character)).findFirst().orElse( - NO_QUALIFICATION); + public static Set fromCharacter(String character) { + final Set dateQualifications = EnumSet.noneOf(DateQualification.class); + if (CHARACTER_UNCERTAIN_APPROXIMATE.equals(character)) { + dateQualifications.add(DateQualification.UNCERTAIN); + dateQualifications.add(DateQualification.APPROXIMATE); + } else if (CHARACTER_UNCERTAIN.equals(character)) { + dateQualifications.add(DateQualification.UNCERTAIN); + } else if (CHARACTER_APPROXIMATE.equals(character)) { + dateQualifications.add(DateQualification.APPROXIMATE); + } + return dateQualifications; } - public String getCharacter() { + /** + * Get the string representation based on the provided date qualifications. + * + * @param dateQualifications the date qualifications + * @return the string representation + */ + public static String getCharacterFromQualifications(Set dateQualifications) { + final String character; + if (dateQualifications.contains(UNCERTAIN) && dateQualifications.contains(APPROXIMATE)) { + character = CHARACTER_UNCERTAIN_APPROXIMATE; + } else if (dateQualifications.contains(UNCERTAIN)) { + character = CHARACTER_UNCERTAIN; + } else if (dateQualifications.contains(APPROXIMATE)) { + character = CHARACTER_APPROXIMATE; + } else { + character = ""; + } return character; } } diff --git a/metis-normalization/src/main/java/eu/europeana/normalization/dates/edtf/InstantEdtfDate.java b/metis-normalization/src/main/java/eu/europeana/normalization/dates/edtf/InstantEdtfDate.java index 3a1818c80..883515d19 100644 --- a/metis-normalization/src/main/java/eu/europeana/normalization/dates/edtf/InstantEdtfDate.java +++ b/metis-normalization/src/main/java/eu/europeana/normalization/dates/edtf/InstantEdtfDate.java @@ -3,7 +3,6 @@ import static eu.europeana.normalization.dates.edtf.DateBoundaryType.DECLARED; import static eu.europeana.normalization.dates.edtf.DateBoundaryType.OPEN; import static eu.europeana.normalization.dates.edtf.DateBoundaryType.UNKNOWN; -import static eu.europeana.normalization.dates.edtf.DateQualification.NO_QUALIFICATION; import static eu.europeana.normalization.dates.edtf.InstantEdtfDateBuilder.THRESHOLD_4_DIGITS_YEAR; import static eu.europeana.normalization.dates.edtf.Iso8601Parser.ISO_8601_MINIMUM_YEAR_DIGITS; import static java.lang.Math.abs; @@ -19,7 +18,9 @@ import java.time.Year; import java.time.YearMonth; import java.time.temporal.TemporalAccessor; +import java.util.EnumSet; import java.util.Objects; +import java.util.Set; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -38,11 +39,13 @@ public final class InstantEdtfDate extends AbstractEdtfDate implements Comparabl private Month month; private LocalDate yearMonthDay; private YearPrecision yearPrecision; - private DateQualification dateQualification = NO_QUALIFICATION; + private Set dateQualifications = EnumSet.noneOf(DateQualification.class); private DateBoundaryType dateBoundaryType = DECLARED; /** * Restricted constructor by provided {@link InstantEdtfDateBuilder}. + *

All fields apart from {@link #dateQualifications} are strictly contained in the constructor. The date qualifications can + * be further extended to, for example, add an approximate qualification for a date that was sanitized.

* * @param instantEdtfDateBuilder the builder with all content verified */ @@ -51,7 +54,7 @@ public final class InstantEdtfDate extends AbstractEdtfDate implements Comparabl year = instantEdtfDateBuilder.getYearObj(); month = instantEdtfDateBuilder.getMonthObj(); yearMonthDay = instantEdtfDateBuilder.getYearMonthDayObj(); - dateQualification = instantEdtfDateBuilder.getDateQualification(); + dateQualifications = instantEdtfDateBuilder.getDateQualifications(); } private InstantEdtfDate(DateBoundaryType dateBoundaryType) { @@ -59,8 +62,8 @@ private InstantEdtfDate(DateBoundaryType dateBoundaryType) { } @Override - public void overwriteQualification(DateQualification dateQualification) { - this.dateQualification = dateQualification; + public void addQualification(DateQualification dateQualification) { + this.dateQualifications.add(dateQualification); } /** @@ -235,7 +238,7 @@ public String toString() { stringBuilder.append( ofNullable(yearMonthDay).map(LocalDate::getDayOfMonth).map(decimalFormat::format).map(d -> "-" + d).orElse("")); } - stringBuilder.append(dateQualification.getCharacter()); + stringBuilder.append(DateQualification.getCharacterFromQualifications(dateQualifications)); return stringBuilder.toString(); } @@ -261,13 +264,13 @@ public boolean equals(Object o) { } InstantEdtfDate that = (InstantEdtfDate) o; return yearPrecision == that.yearPrecision && Objects.equals(year, that.year) && Objects.equals(month, - that.month) && Objects.equals(yearMonthDay, that.yearMonthDay) && dateQualification == that.dateQualification + that.month) && Objects.equals(yearMonthDay, that.yearMonthDay) && dateQualifications == that.dateQualifications && dateBoundaryType == that.dateBoundaryType; } @Override public int hashCode() { - return Objects.hash(yearPrecision, year, month, yearMonthDay, dateQualification, dateBoundaryType); + return Objects.hash(yearPrecision, year, month, yearMonthDay, dateQualifications, dateBoundaryType); } public Year getYear() { @@ -286,8 +289,8 @@ public YearPrecision getYearPrecision() { return yearPrecision; } - public DateQualification getDateQualification() { - return dateQualification; + public Set getDateQualifications() { + return EnumSet.copyOf(dateQualifications); } public DateBoundaryType getDateBoundaryType() { diff --git a/metis-normalization/src/main/java/eu/europeana/normalization/dates/edtf/InstantEdtfDateBuilder.java b/metis-normalization/src/main/java/eu/europeana/normalization/dates/edtf/InstantEdtfDateBuilder.java index b99bc30d4..efaee3d8c 100644 --- a/metis-normalization/src/main/java/eu/europeana/normalization/dates/edtf/InstantEdtfDateBuilder.java +++ b/metis-normalization/src/main/java/eu/europeana/normalization/dates/edtf/InstantEdtfDateBuilder.java @@ -1,6 +1,5 @@ package eu.europeana.normalization.dates.edtf; -import static eu.europeana.normalization.dates.edtf.DateQualification.NO_QUALIFICATION; import static java.lang.String.format; import eu.europeana.normalization.dates.YearPrecision; @@ -13,7 +12,9 @@ import java.time.YearMonth; import java.time.temporal.ChronoField; import java.time.temporal.TemporalAccessor; +import java.util.EnumSet; import java.util.Objects; +import java.util.Set; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -41,7 +42,7 @@ public class InstantEdtfDateBuilder { private Integer month; private Integer day; private YearPrecision yearPrecision = YearPrecision.YEAR; - private DateQualification dateQualification = NO_QUALIFICATION; + private final Set dateQualifications = EnumSet.noneOf(DateQualification.class); private boolean flexibleDateBuild = true; private boolean isLongYear = false; @@ -155,7 +156,8 @@ private void validateStrict() throws DateExtractionException { //If it is not a long year, and we want to be strict we further validate boolean notLongYearAndStrictBuild = !isLongYear && !flexibleDateBuild; boolean isDateNonPrecise = - dateQualification == DateQualification.UNCERTAIN || (yearPrecision != null && yearPrecision != YearPrecision.YEAR); + dateQualifications.contains(DateQualification.UNCERTAIN) || (yearPrecision != null + && yearPrecision != YearPrecision.YEAR); boolean notCompleteDate = monthObj == null || yearMonthDayObj == null; if (notLongYearAndStrictBuild && (isDateNonPrecise || notCompleteDate)) { throw new DateExtractionException("Date is invalid according to our strict profile!"); @@ -204,11 +206,11 @@ public InstantEdtfDateBuilder withYearPrecision(YearPrecision yearPrecision) { /** * Add date qualification. * - * @param dateQualification the date qualification + * @param dateQualifications the date qualifications * @return the extended builder */ - public InstantEdtfDateBuilder withDateQualification(DateQualification dateQualification) { - this.dateQualification = dateQualification; + public InstantEdtfDateBuilder withDateQualification(Set dateQualifications) { + this.dateQualifications.addAll(dateQualifications); return this; } @@ -249,7 +251,7 @@ public YearPrecision getYearPrecision() { return yearPrecision; } - public DateQualification getDateQualification() { - return dateQualification; + public Set getDateQualifications() { + return EnumSet.copyOf(dateQualifications); } } diff --git a/metis-normalization/src/main/java/eu/europeana/normalization/dates/edtf/IntervalEdtfDate.java b/metis-normalization/src/main/java/eu/europeana/normalization/dates/edtf/IntervalEdtfDate.java index 385701ae3..7ccae9802 100644 --- a/metis-normalization/src/main/java/eu/europeana/normalization/dates/edtf/IntervalEdtfDate.java +++ b/metis-normalization/src/main/java/eu/europeana/normalization/dates/edtf/IntervalEdtfDate.java @@ -3,6 +3,9 @@ import static eu.europeana.normalization.dates.extraction.DefaultDatesSeparator.SLASH_DELIMITER; import static java.lang.String.format; +import java.util.EnumSet; +import java.util.Set; + /** * An EDTF date that represents a period of time specified by a start and end date with various degrees of precision */ @@ -11,6 +14,15 @@ public class IntervalEdtfDate extends AbstractEdtfDate { private InstantEdtfDate start; private InstantEdtfDate end; + + /** + * Restricted constructor by provided {@link InstantEdtfDateBuilder}. + *

All fields apart from the internal {@link IntervalEdtfDate#addQualification(DateQualification)}(for each boundary) are + * strictly contained in the constructor. The date qualifications can be further extended to, for example, add an approximate + * qualification for a date that was sanitized.

+ * + * @param intervalEdtfDateBuilder the builder with all content verified + */ IntervalEdtfDate(IntervalEdtfDateBuilder intervalEdtfDateBuilder) { super(intervalEdtfDateBuilder.getLabel()); this.start = intervalEdtfDateBuilder.getStart(); @@ -18,15 +30,16 @@ public class IntervalEdtfDate extends AbstractEdtfDate { } @Override - public void overwriteQualification(DateQualification dateQualification) { - start.overwriteQualification(dateQualification); - end.overwriteQualification(dateQualification); + public void addQualification(DateQualification dateQualification) { + start.addQualification(dateQualification); + end.addQualification(dateQualification); } @Override - public DateQualification getDateQualification() { - return start.getDateQualification().compareTo(end.getDateQualification()) >= 0 - ? start.getDateQualification() : end.getDateQualification(); + public Set getDateQualifications() { + Set dateQualifications = EnumSet.copyOf(start.getDateQualifications()); + dateQualifications.addAll(end.getDateQualifications()); + return dateQualifications; } @Override diff --git a/metis-normalization/src/main/java/eu/europeana/normalization/dates/extraction/extractors/AbstractDateExtractor.java b/metis-normalization/src/main/java/eu/europeana/normalization/dates/extraction/extractors/AbstractDateExtractor.java index 20c3fdfa5..72dd8242e 100644 --- a/metis-normalization/src/main/java/eu/europeana/normalization/dates/extraction/extractors/AbstractDateExtractor.java +++ b/metis-normalization/src/main/java/eu/europeana/normalization/dates/extraction/extractors/AbstractDateExtractor.java @@ -1,7 +1,6 @@ package eu.europeana.normalization.dates.extraction.extractors; import static eu.europeana.normalization.dates.DateNormalizationResult.getNoMatchResult; -import static eu.europeana.normalization.dates.edtf.DateQualification.NO_QUALIFICATION; import static eu.europeana.normalization.dates.edtf.DateQualification.UNCERTAIN; import static java.lang.String.format; @@ -10,6 +9,8 @@ import eu.europeana.normalization.dates.extraction.DateExtractionException; import eu.europeana.normalization.dates.sanitize.DateFieldSanitizer; import java.lang.invoke.MethodHandles; +import java.util.EnumSet; +import java.util.Set; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -27,8 +28,12 @@ public abstract class AbstractDateExtractor implements DateExtractor { * @param inputValue the input value * @return the date qualification */ - public DateQualification checkDateQualification(String inputValue) { - return (inputValue.startsWith("?") || inputValue.endsWith("?")) ? UNCERTAIN : NO_QUALIFICATION; + public Set getQualification(String inputValue) { + final Set dateQualifications = EnumSet.noneOf(DateQualification.class); + if (inputValue.startsWith("?") || inputValue.endsWith("?")) { + dateQualifications.add(UNCERTAIN); + } + return dateQualifications; } /** diff --git a/metis-normalization/src/main/java/eu/europeana/normalization/dates/extraction/extractors/BriefRangeDateExtractor.java b/metis-normalization/src/main/java/eu/europeana/normalization/dates/extraction/extractors/BriefRangeDateExtractor.java index c780c4994..bf301e439 100644 --- a/metis-normalization/src/main/java/eu/europeana/normalization/dates/extraction/extractors/BriefRangeDateExtractor.java +++ b/metis-normalization/src/main/java/eu/europeana/normalization/dates/extraction/extractors/BriefRangeDateExtractor.java @@ -12,9 +12,8 @@ import eu.europeana.normalization.dates.extraction.DateExtractionException; import eu.europeana.normalization.dates.extraction.DefaultDatesSeparator; import java.time.Month; -import java.util.ArrayList; -import java.util.EnumSet; import java.util.List; +import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -34,7 +33,7 @@ */ public class BriefRangeDateExtractor extends AbstractRangeDateExtractor { - private static final Pattern YEAR_PATTERN = Pattern.compile(OPTIONAL_QUESTION_MARK + "(-?\\d{2,4})" + OPTIONAL_QUESTION_MARK); + private static final Pattern YEAR_PATTERN = Pattern.compile(OPTIONAL_QUESTION_MARK + "(\\d{2,4})" + OPTIONAL_QUESTION_MARK); @Override public DateNormalizationResultRangePair extractDateNormalizationResult(String startString, @@ -70,7 +69,7 @@ private DateNormalizationResult extractEndDateNormalizationResult(DateNormalizat final DateNormalizationResult endDateNormalizationResult = extractYear(dateString, flexibleDateBuild); if (endDateNormalizationResult.getDateNormalizationResultStatus() == DateNormalizationResultStatus.MATCHED) { - final DateQualification endDateQualification = endDateNormalizationResult.getEdtfDate().getDateQualification(); + final Set endDateQualifications = endDateNormalizationResult.getEdtfDate().getDateQualifications(); final int startYearFourDigits = ((InstantEdtfDate) startDateNormalizationResult.getEdtfDate()).getYear().getValue(); final int startYearLastTwoDigits = startYearFourDigits % CENTURY.getDuration(); @@ -81,7 +80,7 @@ private DateNormalizationResult extractEndDateNormalizationResult(DateNormalizat if (endYearDigitsLength == 2 && Math.abs(endYear) > Month.DECEMBER.getValue() && startYearLastTwoDigits < endYear) { final int endYearFourDigits = (startYearFourDigits / CENTURY.getDuration()) * CENTURY.getDuration() + endYear; final InstantEdtfDate endInstantEdtfDate = new InstantEdtfDateBuilder(endYearFourDigits).withDateQualification( - endDateQualification).withFlexibleDateBuild(flexibleDateBuild).build(); + endDateQualifications).withFlexibleDateBuild(flexibleDateBuild).build(); dateNormalizationResult = new DateNormalizationResult(DateNormalizationExtractorMatchId.BRIEF_DATE_RANGE, dateString, endInstantEdtfDate); } @@ -92,13 +91,11 @@ private DateNormalizationResult extractEndDateNormalizationResult(DateNormalizat } private DateNormalizationResult extractYear(String inputValue, boolean flexibleDateBuild) throws DateExtractionException { - final DateQualification dateQualification = checkDateQualification(inputValue); - DateNormalizationResult dateNormalizationResult = DateNormalizationResult.getNoMatchResult(inputValue); final Matcher matcher = YEAR_PATTERN.matcher(inputValue); if (matcher.matches()) { final int year = Integer.parseInt(matcher.group(1)); - final InstantEdtfDate instantEdtfDate = new InstantEdtfDateBuilder(year).withDateQualification(dateQualification) + final InstantEdtfDate instantEdtfDate = new InstantEdtfDateBuilder(year).withDateQualification(getQualification(inputValue)) .withFlexibleDateBuild(flexibleDateBuild).build(); dateNormalizationResult = new DateNormalizationResult(DateNormalizationExtractorMatchId.BRIEF_DATE_RANGE, inputValue, instantEdtfDate); @@ -108,7 +105,7 @@ private DateNormalizationResult extractYear(String inputValue, boolean flexibleD @Override public List getRangeDateQualifiers() { - return new ArrayList<>(EnumSet.of(DefaultDatesSeparator.SLASH_DELIMITER)); + return List.of(DefaultDatesSeparator.values()); } @Override diff --git a/metis-normalization/src/main/java/eu/europeana/normalization/dates/extraction/extractors/CenturyNumericDateExtractor.java b/metis-normalization/src/main/java/eu/europeana/normalization/dates/extraction/extractors/CenturyNumericDateExtractor.java index 1c0bb18c6..7d83a995e 100644 --- a/metis-normalization/src/main/java/eu/europeana/normalization/dates/extraction/extractors/CenturyNumericDateExtractor.java +++ b/metis-normalization/src/main/java/eu/europeana/normalization/dates/extraction/extractors/CenturyNumericDateExtractor.java @@ -6,7 +6,6 @@ import eu.europeana.normalization.dates.DateNormalizationExtractorMatchId; import eu.europeana.normalization.dates.DateNormalizationResult; -import eu.europeana.normalization.dates.edtf.DateQualification; import eu.europeana.normalization.dates.edtf.InstantEdtfDate; import eu.europeana.normalization.dates.edtf.InstantEdtfDateBuilder; import eu.europeana.normalization.dates.extraction.DateExtractionException; @@ -69,15 +68,13 @@ public DateNormalizationResult extract(String inputValue, boolean flexibleDateBuild) throws DateExtractionException { DateNormalizationResult dateNormalizationResult = DateNormalizationResult.getNoMatchResult(inputValue); for (CenturyNumericDatePattern centerNumericDatePattern : CenturyNumericDatePattern.values()) { - final DateQualification dateQualification = checkDateQualification(inputValue); - final Matcher matcher = centerNumericDatePattern.getPattern().matcher(inputValue); if (matcher.matches()) { final String century = matcher.group(1); InstantEdtfDateBuilder instantEdtfDateBuilder = new InstantEdtfDateBuilder( centerNumericDatePattern.getCenturyExtractorFunction().applyAsInt(century)) .withYearPrecision(CENTURY); - InstantEdtfDate instantEdtfDate = instantEdtfDateBuilder.withDateQualification(dateQualification) + InstantEdtfDate instantEdtfDate = instantEdtfDateBuilder.withDateQualification(getQualification(inputValue)) .withFlexibleDateBuild(flexibleDateBuild).build(); dateNormalizationResult = new DateNormalizationResult(centerNumericDatePattern.getDateNormalizationExtractorMatchId(), inputValue, diff --git a/metis-normalization/src/main/java/eu/europeana/normalization/dates/extraction/extractors/CenturyRomanDateExtractor.java b/metis-normalization/src/main/java/eu/europeana/normalization/dates/extraction/extractors/CenturyRomanDateExtractor.java index 0a3ed0b19..d6b59c852 100644 --- a/metis-normalization/src/main/java/eu/europeana/normalization/dates/extraction/extractors/CenturyRomanDateExtractor.java +++ b/metis-normalization/src/main/java/eu/europeana/normalization/dates/extraction/extractors/CenturyRomanDateExtractor.java @@ -6,7 +6,6 @@ import eu.europeana.normalization.dates.DateNormalizationExtractorMatchId; import eu.europeana.normalization.dates.DateNormalizationResult; -import eu.europeana.normalization.dates.edtf.DateQualification; import eu.europeana.normalization.dates.edtf.InstantEdtfDateBuilder; import eu.europeana.normalization.dates.extraction.DateExtractionException; import eu.europeana.normalization.dates.extraction.RomanToNumber; @@ -37,12 +36,11 @@ public class CenturyRomanDateExtractor extends AbstractDateExtractor { public DateNormalizationResult extract(String inputValue, boolean flexibleDateBuild) throws DateExtractionException { DateNormalizationResult dateNormalizationResult = DateNormalizationResult.getNoMatchResult(inputValue); - final DateQualification dateQualification = checkDateQualification(inputValue); final Matcher matcher = ROMAN_2_TO_21_PATTERN.matcher(inputValue); if (matcher.matches()) { final int century = RomanToNumber.romanToDecimal(matcher.group(1)) - 1; final InstantEdtfDateBuilder instantEdtfDateBuilder = - new InstantEdtfDateBuilder(century).withYearPrecision(CENTURY).withDateQualification(dateQualification); + new InstantEdtfDateBuilder(century).withYearPrecision(CENTURY).withDateQualification(getQualification(inputValue)); dateNormalizationResult = new DateNormalizationResult(DateNormalizationExtractorMatchId.CENTURY_ROMAN, inputValue, instantEdtfDateBuilder.build()); } diff --git a/metis-normalization/src/main/java/eu/europeana/normalization/dates/extraction/extractors/DecadeDateExtractor.java b/metis-normalization/src/main/java/eu/europeana/normalization/dates/extraction/extractors/DecadeDateExtractor.java index 19cc53740..015822baa 100644 --- a/metis-normalization/src/main/java/eu/europeana/normalization/dates/extraction/extractors/DecadeDateExtractor.java +++ b/metis-normalization/src/main/java/eu/europeana/normalization/dates/extraction/extractors/DecadeDateExtractor.java @@ -4,7 +4,6 @@ import eu.europeana.normalization.dates.DateNormalizationExtractorMatchId; import eu.europeana.normalization.dates.DateNormalizationResult; -import eu.europeana.normalization.dates.edtf.DateQualification; import eu.europeana.normalization.dates.edtf.InstantEdtfDate; import eu.europeana.normalization.dates.edtf.InstantEdtfDateBuilder; import eu.europeana.normalization.dates.extraction.DateExtractionException; @@ -36,16 +35,13 @@ public class DecadeDateExtractor extends AbstractDateExtractor { OPTIONAL_QUESTION_MARK + "(\\d{3})(?:[XU]" + OPTIONAL_QUESTION_MARK + "|\\?\\?)", Pattern.CASE_INSENSITIVE); @Override - public DateNormalizationResult extract(String inputValue, - boolean flexibleDateBuild) throws DateExtractionException { - final DateQualification dateQualification = checkDateQualification(inputValue); - + public DateNormalizationResult extract(String inputValue, boolean flexibleDateBuild) throws DateExtractionException { DateNormalizationResult dateNormalizationResult = DateNormalizationResult.getNoMatchResult(inputValue); final Matcher matcher = decadePattern.matcher(inputValue); if (matcher.matches()) { final InstantEdtfDate datePart = new InstantEdtfDateBuilder(Integer.parseInt(matcher.group(1))) .withYearPrecision(DECADE) - .withDateQualification(dateQualification) + .withDateQualification(getQualification(inputValue)) .withFlexibleDateBuild(flexibleDateBuild) .build(); dateNormalizationResult = new DateNormalizationResult(DateNormalizationExtractorMatchId.DECADE, inputValue, datePart); diff --git a/metis-normalization/src/main/java/eu/europeana/normalization/dates/extraction/extractors/EdtfDateExtractor.java b/metis-normalization/src/main/java/eu/europeana/normalization/dates/extraction/extractors/EdtfDateExtractor.java index 1cb97b456..682aed57e 100644 --- a/metis-normalization/src/main/java/eu/europeana/normalization/dates/extraction/extractors/EdtfDateExtractor.java +++ b/metis-normalization/src/main/java/eu/europeana/normalization/dates/extraction/extractors/EdtfDateExtractor.java @@ -13,6 +13,8 @@ import eu.europeana.normalization.dates.extraction.DateExtractionException; import java.lang.invoke.MethodHandles; import java.time.temporal.TemporalAccessor; +import java.util.EnumSet; +import java.util.Set; import java.util.regex.Matcher; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -67,27 +69,27 @@ private static Integer getLongYear(String dateInput) { } @Override - public DateQualification checkDateQualification(String inputValue) { + public Set getQualification(String inputValue) { final Matcher qualificationMatcher = CHECK_QUALIFICATION_PATTERN.matcher(inputValue); - DateQualification dateQualification = DateQualification.NO_QUALIFICATION; + Set dateQualifications = EnumSet.noneOf(DateQualification.class); if (qualificationMatcher.matches()) { final String modifier = qualificationMatcher.group(1); - dateQualification = DateQualification.fromCharacter(String.valueOf(modifier.charAt(0))); + dateQualifications = DateQualification.fromCharacter(String.valueOf(modifier.charAt(0))); } - return dateQualification; + return dateQualifications; } private InstantEdtfDate extractInstantEdtfDate(String inputValue, boolean flexibleDateBuild) throws DateExtractionException { - final DateQualification dateQualification = checkDateQualification(inputValue); + final Set dateQualifications = getQualification(inputValue); String dateInputStrippedModifier = inputValue; - if (dateQualification != DateQualification.NO_QUALIFICATION) { + if (!dateQualifications.isEmpty()) { dateInputStrippedModifier = inputValue.substring(0, inputValue.length() - 1); } final TemporalAccessor temporalAccessor = ISO_8601_PARSER.parseDatePart(dateInputStrippedModifier); return new InstantEdtfDateBuilder(temporalAccessor) - .withDateQualification(dateQualification) + .withDateQualification(dateQualifications) .withFlexibleDateBuild(flexibleDateBuild) .build(); } diff --git a/metis-normalization/src/main/java/eu/europeana/normalization/dates/extraction/extractors/LongNegativeYearDateExtractor.java b/metis-normalization/src/main/java/eu/europeana/normalization/dates/extraction/extractors/LongNegativeYearDateExtractor.java index 7e621089b..7406009d1 100644 --- a/metis-normalization/src/main/java/eu/europeana/normalization/dates/extraction/extractors/LongNegativeYearDateExtractor.java +++ b/metis-normalization/src/main/java/eu/europeana/normalization/dates/extraction/extractors/LongNegativeYearDateExtractor.java @@ -2,7 +2,6 @@ import eu.europeana.normalization.dates.DateNormalizationExtractorMatchId; import eu.europeana.normalization.dates.DateNormalizationResult; -import eu.europeana.normalization.dates.edtf.DateQualification; import eu.europeana.normalization.dates.edtf.InstantEdtfDate; import eu.europeana.normalization.dates.edtf.InstantEdtfDateBuilder; import eu.europeana.normalization.dates.extraction.DateExtractionException; @@ -15,19 +14,17 @@ */ public class LongNegativeYearDateExtractor extends AbstractDateExtractor { - private static final Pattern YEAR_PATTERN = Pattern.compile(OPTIONAL_QUESTION_MARK + "(-?\\d{5,9})" + OPTIONAL_QUESTION_MARK); + private static final Pattern YEAR_PATTERN = Pattern.compile("(-?\\d{5,9})"); @Override public DateNormalizationResult extract(String inputValue, boolean flexibleDateBuild) throws DateExtractionException { - final DateQualification dateQualification = checkDateQualification(inputValue); - DateNormalizationResult dateNormalizationResult = DateNormalizationResult.getNoMatchResult(inputValue); final Matcher matcher = YEAR_PATTERN.matcher(inputValue); if (matcher.matches()) { final int year = Integer.parseInt(matcher.group(1)); final InstantEdtfDate instantEdtfDate = - new InstantEdtfDateBuilder(year).withDateQualification(dateQualification) + new InstantEdtfDateBuilder(year).withDateQualification(getQualification(inputValue)) .withLongYear() .withFlexibleDateBuild(flexibleDateBuild).build(); dateNormalizationResult = new DateNormalizationResult(DateNormalizationExtractorMatchId.LONG_NEGATIVE_YEAR, inputValue, diff --git a/metis-normalization/src/main/java/eu/europeana/normalization/dates/extraction/extractors/NumericPartsDateExtractor.java b/metis-normalization/src/main/java/eu/europeana/normalization/dates/extraction/extractors/NumericPartsDateExtractor.java index 1fbca66a4..65058d877 100644 --- a/metis-normalization/src/main/java/eu/europeana/normalization/dates/extraction/extractors/NumericPartsDateExtractor.java +++ b/metis-normalization/src/main/java/eu/europeana/normalization/dates/extraction/extractors/NumericPartsDateExtractor.java @@ -1,6 +1,5 @@ package eu.europeana.normalization.dates.extraction.extractors; -import static eu.europeana.normalization.dates.edtf.DateQualification.NO_QUALIFICATION; import static eu.europeana.normalization.dates.edtf.DateQualification.UNCERTAIN; import static java.util.Optional.ofNullable; import static java.util.regex.Pattern.compile; @@ -12,6 +11,7 @@ import eu.europeana.normalization.dates.edtf.InstantEdtfDateBuilder; import eu.europeana.normalization.dates.extraction.DateExtractionException; import eu.europeana.normalization.dates.extraction.NumericPartsPattern; +import java.util.EnumSet; import java.util.Locale; import java.util.Set; import java.util.regex.Matcher; @@ -45,9 +45,12 @@ public DateNormalizationResult extract(String inputValue, } @Override - public DateQualification checkDateQualification(String inputValue) { - return (STARTING_UNCERTAIN_PATTERN.matcher(inputValue).find() || ENDING_UNCERTAIN_PATTERN.matcher(inputValue).find()) - ? UNCERTAIN : NO_QUALIFICATION; + public Set getQualification(String inputValue) { + final Set dateQualifications = EnumSet.noneOf(DateQualification.class); + if (STARTING_UNCERTAIN_PATTERN.matcher(inputValue).find() || ENDING_UNCERTAIN_PATTERN.matcher(inputValue).find()) { + dateQualifications.add(UNCERTAIN); + } + return dateQualifications; } /** @@ -60,14 +63,12 @@ public DateQualification checkDateQualification(String inputValue) { */ protected DateNormalizationResult extract(String inputValue, Set numericPatternValues, boolean flexibleDateBuild) throws DateExtractionException { - final DateQualification dateQualification = checkDateQualification(inputValue); - DateNormalizationResult dateNormalizationResult = DateNormalizationResult.getNoMatchResult(inputValue); for (NumericPartsPattern numericWithMissingPartsPattern : numericPatternValues) { final Matcher matcher = numericWithMissingPartsPattern.getPattern().matcher(inputValue); if (matcher.matches()) { InstantEdtfDateBuilder instantEdtfDateBuilder = extractDateProperty(numericWithMissingPartsPattern, matcher); - final InstantEdtfDate instantEdtfDate = instantEdtfDateBuilder.withDateQualification(dateQualification) + final InstantEdtfDate instantEdtfDate = instantEdtfDateBuilder.withDateQualification(getQualification(inputValue)) .withFlexibleDateBuild(flexibleDateBuild).build(); dateNormalizationResult = new DateNormalizationResult( numericWithMissingPartsPattern.getDateNormalizationExtractorMatchId(), inputValue, instantEdtfDate); diff --git a/metis-normalization/src/main/java/eu/europeana/normalization/normalizers/DatesNormalizer.java b/metis-normalization/src/main/java/eu/europeana/normalization/normalizers/DatesNormalizer.java index ddab954eb..f8bc0b7b3 100644 --- a/metis-normalization/src/main/java/eu/europeana/normalization/normalizers/DatesNormalizer.java +++ b/metis-normalization/src/main/java/eu/europeana/normalization/normalizers/DatesNormalizer.java @@ -299,7 +299,7 @@ private DateNormalizationResult normalizeInputSanitized(List date dateNormalizationResult = normalizeFunction.apply(dateExtractors, sanitizedDate); if (dateNormalizationResult.getDateNormalizationResultStatus() == MATCHED) { if (checkIfApproximateCleanOperationId.test(sanitizedDate.getSanitizeOperation())) { - dateNormalizationResult.getEdtfDate().overwriteQualification(DateQualification.APPROXIMATE); + dateNormalizationResult.getEdtfDate().addQualification(DateQualification.APPROXIMATE); } //Re-create result containing sanitization operation. dateNormalizationResult = new DateNormalizationResult(dateNormalizationResult, sanitizedDate.getSanitizeOperation()); @@ -369,11 +369,11 @@ private void appendTimespanEntity(Document document, AbstractEdtfDate edtfDate, } // Create and add skosNote elements to timespan in case of approximate or uncertain dates. - if (edtfDate.getDateQualification() == DateQualification.APPROXIMATE) { + if (edtfDate.getDateQualifications().contains(DateQualification.APPROXIMATE)) { final Element skosNote = XmlUtil.createElement(SKOS_NOTE, timeSpan, null); skosNote.appendChild(document.createTextNode("approximate")); } - if (edtfDate.getDateQualification() == DateQualification.UNCERTAIN) { + if (edtfDate.getDateQualifications().contains(DateQualification.UNCERTAIN)) { final Element skosNote = XmlUtil.createElement(SKOS_NOTE, timeSpan, null); skosNote.appendChild(document.createTextNode("uncertain")); } diff --git a/metis-normalization/src/test/java/eu/europeana/normalization/dates/extraction/extractors/BriefRangeDateExtractorTest.java b/metis-normalization/src/test/java/eu/europeana/normalization/dates/extraction/extractors/BriefRangeDateExtractorTest.java index 752936f0b..1995294e7 100644 --- a/metis-normalization/src/test/java/eu/europeana/normalization/dates/extraction/extractors/BriefRangeDateExtractorTest.java +++ b/metis-normalization/src/test/java/eu/europeana/normalization/dates/extraction/extractors/BriefRangeDateExtractorTest.java @@ -29,15 +29,16 @@ private static Stream extractBrief() { //Slash of("1989/90", "1989/1990"), of("1989/90?", "1989/1990?"), - of("-1989/-88", "-1989/-1988"), - of("-1989/-88?", "-1989/-1988?"), - of("-1989/-13", "-1989/-1913"), + of("?1989/90", "1989?/1990"), + of("?1989/90?", "1989?/1990?"), + of("-1989/-88", null), - //Dash not supported - of("1989-90", null), - of("1989-90?", null), - of("1989-90", null), - of("989-90", null), + //Dash + of("1989-90", "1989/1990"), + of("1989-90?", "1989/1990?"), + of("?1989-90", "1989?/1990"), + of("?1989-90?", "1989?/1990?"), + of("989-90", "0989/0990"), //End date lower rightmost two digits than start year of("1989/89", null), @@ -49,10 +50,9 @@ private static Stream extractBrief() { of("1989/990", null), of("1989-990", null), - //End year cannot be lower or equal than +-12 + //End year cannot be lower or equal than 12 of("1900/01", null), of("1900/12", null), - of("-1989/-12", null), //Less than three digits on start year of("89-90", null) diff --git a/metis-normalization/src/test/java/eu/europeana/normalization/dates/extraction/extractors/DateExtractorTest.java b/metis-normalization/src/test/java/eu/europeana/normalization/dates/extraction/extractors/DateExtractorTest.java index 48f56bc64..5ca8d3ee1 100644 --- a/metis-normalization/src/test/java/eu/europeana/normalization/dates/extraction/extractors/DateExtractorTest.java +++ b/metis-normalization/src/test/java/eu/europeana/normalization/dates/extraction/extractors/DateExtractorTest.java @@ -4,7 +4,6 @@ import static eu.europeana.normalization.dates.edtf.DateBoundaryType.UNKNOWN; import static eu.europeana.normalization.dates.edtf.DateQualification.APPROXIMATE; import static eu.europeana.normalization.dates.edtf.DateQualification.UNCERTAIN; -import static eu.europeana.normalization.dates.edtf.DateQualification.UNCERTAIN_APPROXIMATE; import static eu.europeana.normalization.dates.extraction.DefaultDatesSeparator.SLASH_DELIMITER; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -18,9 +17,15 @@ public interface DateExtractorTest { default void assertQualification(String expected, InstantEdtfDate instantEdtfDate) { - assertEquals(expected.contains("?"), instantEdtfDate.getDateQualification() == UNCERTAIN); - assertEquals(expected.contains("~"), instantEdtfDate.getDateQualification() == APPROXIMATE); - assertEquals(expected.contains("%"), instantEdtfDate.getDateQualification() == UNCERTAIN_APPROXIMATE); + assertEquals(expected.contains("?"), + instantEdtfDate.getDateQualifications().contains(UNCERTAIN) && + !instantEdtfDate.getDateQualifications().contains(APPROXIMATE)); + assertEquals(expected.contains("~"), + instantEdtfDate.getDateQualifications().contains(APPROXIMATE) && + !instantEdtfDate.getDateQualifications().contains(UNCERTAIN)); + assertEquals(expected.contains("%"), + instantEdtfDate.getDateQualifications().contains(UNCERTAIN) && + instantEdtfDate.getDateQualifications().contains(APPROXIMATE)); } default void assertBoundaryType(String expected, InstantEdtfDate instantEdtfDate) { diff --git a/metis-normalization/src/test/java/eu/europeana/normalization/dates/extraction/extractors/LongNegativeYearRangeDateExtractorTest.java b/metis-normalization/src/test/java/eu/europeana/normalization/dates/extraction/extractors/LongNegativeYearRangeDateExtractorTest.java index cfaa9bc50..df28e26cc 100644 --- a/metis-normalization/src/test/java/eu/europeana/normalization/dates/extraction/extractors/LongNegativeYearRangeDateExtractorTest.java +++ b/metis-normalization/src/test/java/eu/europeana/normalization/dates/extraction/extractors/LongNegativeYearRangeDateExtractorTest.java @@ -32,11 +32,6 @@ private static Stream extract() { of("-12345678/-12345677", "Y-12345678/Y-12345677"), of("-123456789/-123456788", "Y-123456789/Y-123456788"), - //Uncertain - of("-12345?/-12344", "Y-12345?/Y-12344"), - of("-12345/-12344?", "Y-12345/Y-12344?"), - of("-12345?/-12344?", "Y-12345?/Y-12344?"), - //Dash of("-12345--12344", null), of("-123456--123455", null),