Skip to content

Commit

Permalink
provides default prepareArchive + comments
Browse files Browse the repository at this point in the history
  • Loading branch information
Christian Gendreau committed Jul 18, 2017
1 parent 5a0f06e commit a7f756a
Showing 1 changed file with 27 additions and 9 deletions.
36 changes: 27 additions & 9 deletions src/main/java/org/gbif/dwc/DwcFiles.java
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
*/
public class DwcFiles {

private static FileUtils F_UTILS = new FileUtils();
private static final FileUtils F_UTILS = new FileUtils();

/**
* Collections of static methods, no constructors.
Expand All @@ -38,10 +38,14 @@ private DwcFiles() {
}

/**
* @see #iterator(ArchiveFile, boolean, boolean)
* Same as calling #iterator(ArchiveFile, true, true)
*
* @param source
*
* @return
*
* @throws IOException
* @see #iterator(ArchiveFile, boolean, boolean)
*/
public static ClosableIterator<Record> iterator(ArchiveFile source) throws IOException {
return iterator(source, true, true);
Expand All @@ -51,7 +55,7 @@ public static ClosableIterator<Record> iterator(ArchiveFile source) throws IOExc
* Get a {@link ClosableIterator} on the provided {@link ArchiveFile}.
*
* @param source
* @param replaceNulls if true record values will have literal nulls replaced with NULL.
* @param replaceNulls if true replaces common, literal NULL values with real nulls, e.g. "\N" or "NULL"
* @param replaceEntities if true html & xml entities in record values will be replaced with the interpreted value.
*/
public static ClosableIterator<Record> iterator(ArchiveFile source, boolean replaceNulls, boolean replaceEntities) throws IOException {
Expand Down Expand Up @@ -103,15 +107,29 @@ private static Integer getLineToSkipBeforeHeader(Integer ignoreHeaderLines) {
return null;
}

/**
* Same as calling #prepareArchive(ArchiveFile, true, true)
*
* @param archive source archive
*
* @return new {@link NormalizedDwcArchive} instance
*
* @throws IOException
* @see #prepareArchive(Archive, boolean, boolean)
*/
public static NormalizedDwcArchive prepareArchive(final Archive archive) throws IOException {
return prepareArchive(archive, true, true);
}

/**
* Prepare an {@link Archive} into a {@link NormalizedDwcArchive} which allows to get {@link StarRecord} {@link
* ClosableIterator}.
* This method will initiate the normalization process. This process can take some times depending on the size of
* files and number of extension.
*
* @param archive
* @param replaceNulls
* @param replaceEntities
* @param archive source archive
* @param replaceNulls if true replaces common, literal NULL values with real nulls, e.g. "\N" or "NULL"
* @param replaceEntities if true html & xml entities in record values will be replaced with the interpreted value.
*
* @return new {@link NormalizedDwcArchive} instance
*
Expand Down Expand Up @@ -147,6 +165,7 @@ private static void normalizeAndSortArchiveFiles(Archive archive) throws IOExcep
* we want to sort.
*
* @param archiveFile
*
* @return the file was sorted or not. If the file was not sorted it simply means it was not required.
*
* @throws IOException
Expand All @@ -156,7 +175,6 @@ protected static boolean normalizeAndSort(ArchiveFile archiveFile) throws IOExce

File fileToSort = archiveFile.getLocationFile();
File sortedFile = ArchiveFile.getLocationFileSorted(archiveFile.getLocationFile());
String linesTerminatedBy = archiveFile.getLinesTerminatedBy();

//if we already sorted the file and its source didn't change we can avoid doing it again
if (sortedFile.exists() && Files.getLastModifiedTime(sortedFile.toPath()).toInstant().isAfter(
Expand All @@ -165,15 +183,15 @@ protected static boolean normalizeAndSort(ArchiveFile archiveFile) throws IOExce
}

File normalizedFile = normalizeIfRequired(archiveFile);
if(normalizedFile != null){
if (normalizedFile != null) {
fileToSort = normalizedFile;
}

F_UTILS.sort(fileToSort, ArchiveFile.getLocationFileSorted(archiveFile.getLocationFile()), archiveFile.getEncoding(),
archiveFile.getId().getIndex(), archiveFile.getFieldsTerminatedBy(), archiveFile.getFieldsEnclosedBy(),
TabularFileNormalizer.NORMALIZED_END_OF_LINE, archiveFile.getIgnoreHeaderLines());

if(normalizedFile != null ){
if (normalizedFile != null) {
Files.deleteIfExists(normalizedFile.toPath());
}

Expand Down

0 comments on commit a7f756a

Please sign in to comment.