Skip to content

Commit

Permalink
MET-5132: Update code after answers from rnd
Browse files Browse the repository at this point in the history
  • Loading branch information
stzanakis committed Oct 3, 2023
1 parent 2ab80b0 commit d613725
Show file tree
Hide file tree
Showing 17 changed files with 146 additions and 111 deletions.
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
package eu.europeana.normalization.dates.edtf;

import java.util.Set;

/**
* An abstract class that contains the template that an EDTF date with compliance level 1 should implement.
* <p>See more in the specification of <a href="https://www.loc.gov/standards/datetime/">EDTF</a></p>
Expand All @@ -18,17 +20,17 @@ protected AbstractEdtfDate(String label) {
}

/**
* Overwrite the date qualification, mainly used for pre-sanitized values.
* Add the date qualification, mainly used for pre-sanitized values.
*
* @param dateQualification the date qualification
*/
public abstract void overwriteQualification(DateQualification dateQualification);
public abstract void addQualification(DateQualification dateQualification);

public String getLabel() {
return label;
}

public abstract DateQualification getDateQualification();
public abstract Set<DateQualification> getDateQualifications();

public abstract boolean isOpen();

Expand Down
Original file line number Diff line number Diff line change
@@ -1,39 +1,61 @@
package eu.europeana.normalization.dates.edtf;

import java.util.Arrays;
import java.util.EnumSet;
import java.util.Set;
import java.util.regex.Pattern;

/**
* Date qualification characters according to <a href="https://www.loc.gov/standards/datetime/">Extended Date/Time Format (EDTF)
* Specification</a>
*/
public enum DateQualification {
UNCERTAIN, APPROXIMATE;

NO_QUALIFICATION(""),
UNCERTAIN("?"),
APPROXIMATE("~"),
UNCERTAIN_APPROXIMATE("%");

public static final Pattern CHECK_QUALIFICATION_PATTERN = Pattern.compile("^[^?~%]*([?~%])$");
private final String character;

DateQualification(String character) {
this.character = character;
}
private static final String CHARACTER_UNCERTAIN = "?";
private static final String CHARACTER_APPROXIMATE = "~";
private static final String CHARACTER_UNCERTAIN_APPROXIMATE = "%";
private static final String QUALIFICATION_CHARACTER_REGEX =
CHARACTER_UNCERTAIN + CHARACTER_APPROXIMATE + CHARACTER_UNCERTAIN_APPROXIMATE;
public static final Pattern CHECK_QUALIFICATION_PATTERN = Pattern.compile(
"^[^" + QUALIFICATION_CHARACTER_REGEX + "]*([" + QUALIFICATION_CHARACTER_REGEX + "])$");

/**
* Get the enum value based on the character provided.
* <p>It will return a matched enum value or {@link #NO_QUALIFICATION}.</p>
* Get the enum values based on the character provided.
* <p>It will return an empty set or the set with the applicable qualifications.</p>
*
* @param character the provided character
* @return the enum value
*/
public static DateQualification fromCharacter(String character) {
return Arrays.stream(DateQualification.values()).filter(value -> value.character.equals(character)).findFirst().orElse(
NO_QUALIFICATION);
public static Set<DateQualification> fromCharacter(String character) {
final Set<DateQualification> dateQualifications = EnumSet.noneOf(DateQualification.class);
if (CHARACTER_UNCERTAIN_APPROXIMATE.equals(character)) {
dateQualifications.add(DateQualification.UNCERTAIN);
dateQualifications.add(DateQualification.APPROXIMATE);
} else if (CHARACTER_UNCERTAIN.equals(character)) {
dateQualifications.add(DateQualification.UNCERTAIN);
} else if (CHARACTER_APPROXIMATE.equals(character)) {
dateQualifications.add(DateQualification.APPROXIMATE);
}
return dateQualifications;
}

public String getCharacter() {
/**
* Get the string representation based on the provided date qualifications.
*
* @param dateQualifications the date qualifications
* @return the string representation
*/
public static String getCharacterFromQualifications(Set<DateQualification> dateQualifications) {
final String character;
if (dateQualifications.contains(UNCERTAIN) && dateQualifications.contains(APPROXIMATE)) {
character = CHARACTER_UNCERTAIN_APPROXIMATE;
} else if (dateQualifications.contains(UNCERTAIN)) {
character = CHARACTER_UNCERTAIN;
} else if (dateQualifications.contains(APPROXIMATE)) {
character = CHARACTER_APPROXIMATE;
} else {
character = "";
}
return character;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import static eu.europeana.normalization.dates.edtf.DateBoundaryType.DECLARED;
import static eu.europeana.normalization.dates.edtf.DateBoundaryType.OPEN;
import static eu.europeana.normalization.dates.edtf.DateBoundaryType.UNKNOWN;
import static eu.europeana.normalization.dates.edtf.DateQualification.NO_QUALIFICATION;
import static eu.europeana.normalization.dates.edtf.InstantEdtfDateBuilder.THRESHOLD_4_DIGITS_YEAR;
import static eu.europeana.normalization.dates.edtf.Iso8601Parser.ISO_8601_MINIMUM_YEAR_DIGITS;
import static java.lang.Math.abs;
Expand All @@ -19,7 +18,9 @@
import java.time.Year;
import java.time.YearMonth;
import java.time.temporal.TemporalAccessor;
import java.util.EnumSet;
import java.util.Objects;
import java.util.Set;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

Expand All @@ -38,11 +39,13 @@ public final class InstantEdtfDate extends AbstractEdtfDate implements Comparabl
private Month month;
private LocalDate yearMonthDay;
private YearPrecision yearPrecision;
private DateQualification dateQualification = NO_QUALIFICATION;
private Set<DateQualification> dateQualifications = EnumSet.noneOf(DateQualification.class);
private DateBoundaryType dateBoundaryType = DECLARED;

/**
* Restricted constructor by provided {@link InstantEdtfDateBuilder}.
* <p>All fields apart from {@link #dateQualifications} are strictly contained in the constructor. The date qualifications can
* be further extended to, for example, add an approximate qualification for a date that was sanitized.</p>
*
* @param instantEdtfDateBuilder the builder with all content verified
*/
Expand All @@ -51,16 +54,16 @@ public final class InstantEdtfDate extends AbstractEdtfDate implements Comparabl
year = instantEdtfDateBuilder.getYearObj();
month = instantEdtfDateBuilder.getMonthObj();
yearMonthDay = instantEdtfDateBuilder.getYearMonthDayObj();
dateQualification = instantEdtfDateBuilder.getDateQualification();
dateQualifications = instantEdtfDateBuilder.getDateQualifications();
}

private InstantEdtfDate(DateBoundaryType dateBoundaryType) {
this.dateBoundaryType = dateBoundaryType;
}

@Override
public void overwriteQualification(DateQualification dateQualification) {
this.dateQualification = dateQualification;
public void addQualification(DateQualification dateQualification) {
this.dateQualifications.add(dateQualification);
}

/**
Expand Down Expand Up @@ -235,7 +238,7 @@ public String toString() {
stringBuilder.append(
ofNullable(yearMonthDay).map(LocalDate::getDayOfMonth).map(decimalFormat::format).map(d -> "-" + d).orElse(""));
}
stringBuilder.append(dateQualification.getCharacter());
stringBuilder.append(DateQualification.getCharacterFromQualifications(dateQualifications));
return stringBuilder.toString();
}

Expand All @@ -261,13 +264,13 @@ public boolean equals(Object o) {
}
InstantEdtfDate that = (InstantEdtfDate) o;
return yearPrecision == that.yearPrecision && Objects.equals(year, that.year) && Objects.equals(month,
that.month) && Objects.equals(yearMonthDay, that.yearMonthDay) && dateQualification == that.dateQualification
that.month) && Objects.equals(yearMonthDay, that.yearMonthDay) && dateQualifications == that.dateQualifications
&& dateBoundaryType == that.dateBoundaryType;
}

@Override
public int hashCode() {
return Objects.hash(yearPrecision, year, month, yearMonthDay, dateQualification, dateBoundaryType);
return Objects.hash(yearPrecision, year, month, yearMonthDay, dateQualifications, dateBoundaryType);
}

public Year getYear() {
Expand All @@ -286,8 +289,8 @@ public YearPrecision getYearPrecision() {
return yearPrecision;
}

public DateQualification getDateQualification() {
return dateQualification;
public Set<DateQualification> getDateQualifications() {
return EnumSet.copyOf(dateQualifications);
}

public DateBoundaryType getDateBoundaryType() {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
package eu.europeana.normalization.dates.edtf;

import static eu.europeana.normalization.dates.edtf.DateQualification.NO_QUALIFICATION;
import static java.lang.String.format;

import eu.europeana.normalization.dates.YearPrecision;
Expand All @@ -13,7 +12,9 @@
import java.time.YearMonth;
import java.time.temporal.ChronoField;
import java.time.temporal.TemporalAccessor;
import java.util.EnumSet;
import java.util.Objects;
import java.util.Set;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

Expand Down Expand Up @@ -41,7 +42,7 @@ public class InstantEdtfDateBuilder {
private Integer month;
private Integer day;
private YearPrecision yearPrecision = YearPrecision.YEAR;
private DateQualification dateQualification = NO_QUALIFICATION;
private final Set<DateQualification> dateQualifications = EnumSet.noneOf(DateQualification.class);
private boolean flexibleDateBuild = true;
private boolean isLongYear = false;

Expand Down Expand Up @@ -155,7 +156,8 @@ private void validateStrict() throws DateExtractionException {
//If it is not a long year, and we want to be strict we further validate
boolean notLongYearAndStrictBuild = !isLongYear && !flexibleDateBuild;
boolean isDateNonPrecise =
dateQualification == DateQualification.UNCERTAIN || (yearPrecision != null && yearPrecision != YearPrecision.YEAR);
dateQualifications.contains(DateQualification.UNCERTAIN) || (yearPrecision != null
&& yearPrecision != YearPrecision.YEAR);
boolean notCompleteDate = monthObj == null || yearMonthDayObj == null;
if (notLongYearAndStrictBuild && (isDateNonPrecise || notCompleteDate)) {
throw new DateExtractionException("Date is invalid according to our strict profile!");
Expand Down Expand Up @@ -204,11 +206,11 @@ public InstantEdtfDateBuilder withYearPrecision(YearPrecision yearPrecision) {
/**
* Add date qualification.
*
* @param dateQualification the date qualification
* @param dateQualifications the date qualifications
* @return the extended builder
*/
public InstantEdtfDateBuilder withDateQualification(DateQualification dateQualification) {
this.dateQualification = dateQualification;
public InstantEdtfDateBuilder withDateQualification(Set<DateQualification> dateQualifications) {
this.dateQualifications.addAll(dateQualifications);
return this;
}

Expand Down Expand Up @@ -249,7 +251,7 @@ public YearPrecision getYearPrecision() {
return yearPrecision;
}

public DateQualification getDateQualification() {
return dateQualification;
public Set<DateQualification> getDateQualifications() {
return EnumSet.copyOf(dateQualifications);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@
import static eu.europeana.normalization.dates.extraction.DefaultDatesSeparator.SLASH_DELIMITER;
import static java.lang.String.format;

import java.util.EnumSet;
import java.util.Set;

/**
* An EDTF date that represents a period of time specified by a start and end date with various degrees of precision
*/
Expand All @@ -11,22 +14,32 @@ public class IntervalEdtfDate extends AbstractEdtfDate {
private InstantEdtfDate start;
private InstantEdtfDate end;


/**
* Restricted constructor by provided {@link InstantEdtfDateBuilder}.
* <p>All fields apart from the internal {@link IntervalEdtfDate#addQualification(DateQualification)}(for each boundary) are
* strictly contained in the constructor. The date qualifications can be further extended to, for example, add an approximate
* qualification for a date that was sanitized.</p>
*
* @param intervalEdtfDateBuilder the builder with all content verified
*/
IntervalEdtfDate(IntervalEdtfDateBuilder intervalEdtfDateBuilder) {
super(intervalEdtfDateBuilder.getLabel());
this.start = intervalEdtfDateBuilder.getStart();
this.end = intervalEdtfDateBuilder.getEnd();
}

@Override
public void overwriteQualification(DateQualification dateQualification) {
start.overwriteQualification(dateQualification);
end.overwriteQualification(dateQualification);
public void addQualification(DateQualification dateQualification) {
start.addQualification(dateQualification);
end.addQualification(dateQualification);
}

@Override
public DateQualification getDateQualification() {
return start.getDateQualification().compareTo(end.getDateQualification()) >= 0
? start.getDateQualification() : end.getDateQualification();
public Set<DateQualification> getDateQualifications() {
Set<DateQualification> dateQualifications = EnumSet.copyOf(start.getDateQualifications());
dateQualifications.addAll(end.getDateQualifications());
return dateQualifications;
}

@Override
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package eu.europeana.normalization.dates.extraction.extractors;

import static eu.europeana.normalization.dates.DateNormalizationResult.getNoMatchResult;
import static eu.europeana.normalization.dates.edtf.DateQualification.NO_QUALIFICATION;
import static eu.europeana.normalization.dates.edtf.DateQualification.UNCERTAIN;
import static java.lang.String.format;

Expand All @@ -10,6 +9,8 @@
import eu.europeana.normalization.dates.extraction.DateExtractionException;
import eu.europeana.normalization.dates.sanitize.DateFieldSanitizer;
import java.lang.invoke.MethodHandles;
import java.util.EnumSet;
import java.util.Set;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

Expand All @@ -27,8 +28,12 @@ public abstract class AbstractDateExtractor implements DateExtractor {
* @param inputValue the input value
* @return the date qualification
*/
public DateQualification checkDateQualification(String inputValue) {
return (inputValue.startsWith("?") || inputValue.endsWith("?")) ? UNCERTAIN : NO_QUALIFICATION;
public Set<DateQualification> getQualification(String inputValue) {
final Set<DateQualification> dateQualifications = EnumSet.noneOf(DateQualification.class);
if (inputValue.startsWith("?") || inputValue.endsWith("?")) {
dateQualifications.add(UNCERTAIN);
}
return dateQualifications;
}

/**
Expand Down
Loading

0 comments on commit d613725

Please sign in to comment.