-
Notifications
You must be signed in to change notification settings - Fork 8
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Debt/met 5132 dates normalization cleanup part 4 (#622)
* MET-5132: Update naming of numeric spaces variant * MET-5132: Prepare first sample of tests * MET-5132: More tests * MET-5132: Restructure with enum MontNameDateExtractor * MET-5132: Reusable DatePartsIndices * MET-5132: Cleanup * MET-5132: Add spaces clean and trim * MET-5132: Cleanup * MET-5132: First split the range part * MET-5132: Refactor PatternBcAdDateExtractor with tests * MET-5132: PatternBcAdRangeDateExtractor cleanup * MET-5132: Reuse range code for dates * MET-5132: Reuse range code for BriefRangeDateExtractor * MET-5132: Simplify hierarchy for ranges * MET-5132: Add sample tests for PatternLongNegativeYearDateExtractor and adapt edtf builder * MET-5132: Refactor LongNegativeYearDateExtractor with a separate range class reusing already existent code * MET-5132: Split century extraction to numeric, roman, roman range reusing code * MET-5132: Centralize sanitization operation for all extractors. * MET-5132: Add tests for generic properties * MET-5132: Reuse test code * MET-5132: Repackage * MET-5132: Split EdtfDateExtractor to handle ranges separately with code reuse * MET-5132: Centralize date qualification overwriting * MET-5132: Simplify date qualification overwriting * MET-5132: Update code after answers from rnd * MET-5132: Cleanup * MET-5132: PatternFormattedFullDateDateExtractor cleanup * MET-5132: Add millisecond support * MET-5132: Process review * MET-5132: Process review 2
- Loading branch information
Showing
69 changed files
with
3,017 additions
and
2,143 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
56 changes: 38 additions & 18 deletions
56
...-normalization/src/main/java/eu/europeana/normalization/dates/edtf/DateQualification.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,39 +1,59 @@ | ||
package eu.europeana.normalization.dates.edtf; | ||
|
||
import java.util.Arrays; | ||
import java.util.EnumSet; | ||
import java.util.Set; | ||
import java.util.regex.Pattern; | ||
|
||
/** | ||
* Date qualification characters according to <a href="https://www.loc.gov/standards/datetime/">Extended Date/Time Format (EDTF) | ||
* Specification</a> | ||
*/ | ||
public enum DateQualification { | ||
UNCERTAIN, APPROXIMATE; | ||
|
||
NO_QUALIFICATION(""), | ||
UNCERTAIN("?"), | ||
APPROXIMATE("~"), | ||
UNCERTAIN_APPROXIMATE("%"); | ||
|
||
public static final Pattern CHECK_QUALIFICATION_PATTERN = Pattern.compile("^[^\\?~%]*([\\?~%]?)$"); | ||
private final String character; | ||
|
||
DateQualification(String character) { | ||
this.character = character; | ||
} | ||
private static final String UNCERTAIN_CHARACTER = "?"; | ||
private static final String APPROXIMATE_CHARACTER = "~"; | ||
private static final String UNCERTAIN_APPROXIMATE_CHARACTER = "%"; | ||
private static final String CHARACTERS_REGEX = UNCERTAIN_CHARACTER + APPROXIMATE_CHARACTER + UNCERTAIN_APPROXIMATE_CHARACTER; | ||
public static final Pattern PATTERN = Pattern.compile("^[^" + CHARACTERS_REGEX + "]*([" + CHARACTERS_REGEX + "])$"); | ||
|
||
/** | ||
* Get the enum value based on the character provided. | ||
* <p>It will return a matched enum value or {@link #NO_QUALIFICATION}.</p> | ||
* Get the enum values based on the character provided. | ||
* <p>It will return an empty set or the set with the applicable qualifications.</p> | ||
* | ||
* @param character the provided character | ||
* @return the enum value | ||
*/ | ||
public static DateQualification fromCharacter(String character) { | ||
return Arrays.stream(DateQualification.values()).filter(value -> value.character.equals(character)).findFirst().orElse( | ||
NO_QUALIFICATION); | ||
public static Set<DateQualification> fromCharacter(String character) { | ||
final Set<DateQualification> dateQualifications = EnumSet.noneOf(DateQualification.class); | ||
if (UNCERTAIN_APPROXIMATE_CHARACTER.equals(character)) { | ||
dateQualifications.add(DateQualification.UNCERTAIN); | ||
dateQualifications.add(DateQualification.APPROXIMATE); | ||
} else if (UNCERTAIN_CHARACTER.equals(character)) { | ||
dateQualifications.add(DateQualification.UNCERTAIN); | ||
} else if (APPROXIMATE_CHARACTER.equals(character)) { | ||
dateQualifications.add(DateQualification.APPROXIMATE); | ||
} | ||
return dateQualifications; | ||
} | ||
|
||
public String getCharacter() { | ||
/** | ||
* Get the string representation based on the provided date qualifications. | ||
* | ||
* @param dateQualifications the date qualifications | ||
* @return the string representation | ||
*/ | ||
public static String getCharacterFromQualifications(Set<DateQualification> dateQualifications) { | ||
final String character; | ||
if (dateQualifications.contains(UNCERTAIN) && dateQualifications.contains(APPROXIMATE)) { | ||
character = UNCERTAIN_APPROXIMATE_CHARACTER; | ||
} else if (dateQualifications.contains(UNCERTAIN)) { | ||
character = UNCERTAIN_CHARACTER; | ||
} else if (dateQualifications.contains(APPROXIMATE)) { | ||
character = APPROXIMATE_CHARACTER; | ||
} else { | ||
character = ""; | ||
} | ||
return character; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.