From f3c30ffc02e6f5f965d7a885f5325b8b76786e7c Mon Sep 17 00:00:00 2001 From: Rob Kooper Date: Tue, 25 Jan 2022 18:00:32 -0600 Subject: [PATCH 1/6] consistent spelling of BagIt --- CHANGELOG.md | 5 +++++ app/api/Collections.scala | 2 +- app/api/Datasets.scala | 6 +++--- app/api/Selected.scala | 2 +- conf/application.conf | 2 +- 5 files changed, 11 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c65012675..886363052 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,11 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/) and this project adheres to [Semantic Versioning](http://semver.org/). +## Unreleased + +### Fixed +- conf file and code had incosistent spelling of BagIt. Now all have capital B and I. + ## 1.19.5 - 2022-01-21 ### Fixed diff --git a/app/api/Collections.scala b/app/api/Collections.scala index 1960b373b..a10d2ffd1 100644 --- a/app/api/Collections.scala +++ b/app/api/Collections.scala @@ -805,7 +805,7 @@ class Collections @Inject() (datasets: DatasetService, implicit val user = request.user collections.get(id) match { case Some(collection) => { - val bagit = play.api.Play.configuration.getBoolean("downloadCollectionBagit").getOrElse(true) + val bagit = play.api.Play.configuration.getBoolean("downloadCollectionBagIt").getOrElse(true) // Use custom enumerator to create the zip file on the fly // Use a 1MB in memory byte array Ok.chunked(enumeratorFromCollection(collection,1024*1024, compression,bagit,user)).withHeaders( diff --git a/app/api/Datasets.scala b/app/api/Datasets.scala index 07c7b9a55..27cea3917 100644 --- a/app/api/Datasets.scala +++ b/app/api/Datasets.scala @@ -2838,7 +2838,7 @@ class Datasets @Inject()( implicit val user = request.user datasets.get(id) match { case Some(dataset) => { - val bagit = play.api.Play.configuration.getBoolean("downloadDatasetBagit").getOrElse(true) + val bagit = play.api.Play.configuration.getBoolean("downloadDatasetBagIt").getOrElse(true) val baseURL = controllers.routes.Datasets.dataset(id).absoluteURL(https(request)) // Increment download count if tracking is enabled @@ -2867,7 +2867,7 @@ class Datasets @Inject()( datasets.get(id) match { case Some(dataset) => { val fileIDs = fileList.split(',').map(fid => new UUID(fid)).toList - val bagit = play.api.Play.configuration.getBoolean("downloadDatasetBagit").getOrElse(true) + val bagit = play.api.Play.configuration.getBoolean("downloadDatasetBagIt").getOrElse(true) val baseURL = controllers.routes.Datasets.dataset(id).absoluteURL(https(request)) // Increment download count for each file @@ -2892,7 +2892,7 @@ class Datasets @Inject()( implicit val user = request.user datasets.get(id) match { case Some(dataset) => { - val bagit = play.api.Play.configuration.getBoolean("downloadDatasetBagit").getOrElse(true) + val bagit = play.api.Play.configuration.getBoolean("downloadDatasetBagIt").getOrElse(true) val baseURL = controllers.routes.Datasets.dataset(id).absoluteURL(https(request)) diff --git a/app/api/Selected.scala b/app/api/Selected.scala index 0ef35436f..edc5f60e7 100644 --- a/app/api/Selected.scala +++ b/app/api/Selected.scala @@ -109,7 +109,7 @@ class Selected @Inject()(selections: SelectionService, Logger.debug("Requesting Selected.downloadAll") request.user match { case Some(user) => { - val bagit = play.api.Play.configuration.getBoolean("downloadDatasetBagit").getOrElse(true) + val bagit = play.api.Play.configuration.getBoolean("downloadDatasetBagIt").getOrElse(true) val selected = selections.get(user.email.get) Ok.chunked(enumeratorFromSelected(selected,1024*1024,bagit,Some(user))).withHeaders( "Content-Type" -> "application/zip", diff --git a/conf/application.conf b/conf/application.conf index 2da5cf5e2..77962d544 100644 --- a/conf/application.conf +++ b/conf/application.conf @@ -477,7 +477,7 @@ addDatasetToCollectionSpace=false # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Whether or not collections or datasets download in bagit format # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -downloadCollectionBagit = true +downloadCollectionBagIt = true downloadDatasetBagIt = false # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ From 215537dff0f22ce6b44a8100930998aa6eeb08b3 Mon Sep 17 00:00:00 2001 From: Rob Kooper Date: Tue, 1 Feb 2022 10:43:46 -0600 Subject: [PATCH 2/6] download dataset/collection as bagit (#319) * download dataset/collection as bagit there is now an optional parameter to download dataset/collection as a bagit zip file. * Fixed download button to include bagit parameter (would result in an error). Set default to no bagit. * remove println Co-authored-by: Luigi Marini --- CHANGELOG.md | 3 + app/Iterators/CollectionIterator.scala | 19 +-- app/Iterators/DatasetIterator.scala | 13 +- .../DatasetsInCollectionIterator.scala | 7 +- app/Iterators/FileIterator.scala | 4 +- app/Iterators/RootCollectionIterator.scala | 14 +- app/Iterators/SelectedIterator.scala | 4 +- app/api/Collections.scala | 9 +- app/api/Datasets.scala | 148 +++++++++++++++--- app/views/dataset.scala.html | 2 +- conf/application.conf | 6 - conf/routes | 8 +- 12 files changed, 168 insertions(+), 69 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 886363052..60eceb9b6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,9 @@ and this project adheres to [Semantic Versioning](http://semver.org/). ### Fixed - conf file and code had incosistent spelling of BagIt. Now all have capital B and I. +### Changed +- download of dataset/collection now has optional parameter bagit (default false) to download items in bagit format. + ## 1.19.5 - 2022-01-21 ### Fixed diff --git a/app/Iterators/CollectionIterator.scala b/app/Iterators/CollectionIterator.scala index cbe326122..10deb8a05 100644 --- a/app/Iterators/CollectionIterator.scala +++ b/app/Iterators/CollectionIterator.scala @@ -13,11 +13,12 @@ import scala.collection.mutable.ListBuffer //this is to download collections //that are not at the root level -class CollectionIterator(pathToFolder : String, parent_collection : models.Collection,zip : ZipOutputStream, md5Files : scala.collection.mutable.HashMap[String, MessageDigest], user : Option[User], - collections: CollectionService, datasets : DatasetService, files : FileService, folders : FolderService, metadataService : MetadataService, +class CollectionIterator(pathToFolder: String, parent_collection: models.Collection, zip: ZipOutputStream, + md5Files: scala.collection.mutable.HashMap[String, MessageDigest], user : Option[User], + bagit: Boolean, collections: CollectionService, datasets: DatasetService, files: FileService, + folders: FolderService, metadataService: MetadataService, spaces : SpaceService) extends Iterator[Option[InputStream]] { - def getNextGenerationCollections(currentCollections : List[Collection]) : List[Collection] = { var nextGenerationCollections : ListBuffer[Collection] = ListBuffer.empty[Collection] for (currentCollection <- currentCollections){ @@ -28,7 +29,7 @@ class CollectionIterator(pathToFolder : String, parent_collection : models.Colle nextGenerationCollections.toList } - val datasetIterator = new DatasetsInCollectionIterator(pathToFolder,parent_collection,zip,md5Files,user, + val datasetIterator = new DatasetsInCollectionIterator(pathToFolder,parent_collection,zip,bagit, md5Files,user, datasets,files, folders, metadataService,spaces) var currentCollectionIterator : Option[CollectionIterator] = None @@ -40,7 +41,7 @@ class CollectionIterator(pathToFolder : String, parent_collection : models.Colle var childCollectionCount = 0 var numChildCollections = child_collections.size - var file_type = 0 + var file_type = if (bagit) 0 else 2 // TODO: Repeat from api/Collections def jsonCollection(collection: Collection): JsValue = { @@ -74,12 +75,12 @@ class CollectionIterator(pathToFolder : String, parent_collection : models.Colle if (file_type < 2){ true } - else if (file_type ==2){ + else if (file_type == 2){ if (datasetIterator.hasNext()){ true } else if (numChildCollections > 0){ - - currentCollectionIterator = Some(new CollectionIterator(pathToFolder+"/"+child_collections(childCollectionCount).name, child_collections(childCollectionCount),zip,md5Files,user,collections,datasets,files, + currentCollectionIterator = Some(new CollectionIterator(pathToFolder+"/"+child_collections(childCollectionCount).name, + child_collections(childCollectionCount),zip,md5Files,user, bagit, collections,datasets,files, folders,metadataService,spaces)) file_type +=1 true @@ -94,7 +95,7 @@ class CollectionIterator(pathToFolder : String, parent_collection : models.Colle } else if (childCollectionCount < numChildCollections -2){ childCollectionCount+=1 currentCollectionIterator = Some(new CollectionIterator(pathToFolder+"/"+child_collections(childCollectionCount).name, child_collections(childCollectionCount),zip,md5Files,user, - collections,datasets,files, + bagit, collections,datasets,files, folders,metadataService,spaces)) true } else { diff --git a/app/Iterators/DatasetIterator.scala b/app/Iterators/DatasetIterator.scala index ac56bd496..c8d2c9d4a 100644 --- a/app/Iterators/DatasetIterator.scala +++ b/app/Iterators/DatasetIterator.scala @@ -14,7 +14,7 @@ import scala.collection.mutable.ListBuffer //this iterator is used for downloading a dataset //it has a file iterator -class DatasetIterator(pathToFolder : String, dataset : models.Dataset, zip: ZipOutputStream, md5Files :scala.collection.mutable.HashMap[String, MessageDigest], +class DatasetIterator(pathToFolder : String, dataset : models.Dataset, zip: ZipOutputStream, bagit: Boolean, md5Files :scala.collection.mutable.HashMap[String, MessageDigest], folders : FolderService, files: FileService, metadataService : MetadataService, datasets: DatasetService, spaces : SpaceService) extends Iterator[Option[InputStream]] { //get files in the dataset @@ -111,10 +111,14 @@ class DatasetIterator(pathToFolder : String, dataset : models.Dataset, zip: ZipO var fileCounter = 0 - var currentFileIterator : Option[FileIterator] = None + var currentFileIterator : Option[FileIterator] = if (numFiles > 0){ + Some(new FileIterator(folderNameMap(inputFiles(fileCounter).id),inputFiles(fileCounter),bagit, zip,md5Files,files,folders,metadataService)) + } else { + None + } var is : Option[InputStream] = None - var file_type : Int = 0 + var file_type : Int = if (bagit) 0 else 2 def hasNext() = { if (file_type < 2){ @@ -126,7 +130,7 @@ class DatasetIterator(pathToFolder : String, dataset : models.Dataset, zip: ZipO true } else if (fileCounter < numFiles -1){ fileCounter +=1 - currentFileIterator = Some(new FileIterator(folderNameMap(inputFiles(fileCounter).id),inputFiles(fileCounter),zip,md5Files,files,folders,metadataService)) + currentFileIterator = Some(new FileIterator(folderNameMap(inputFiles(fileCounter).id),inputFiles(fileCounter),bagit, zip,md5Files,files,folders,metadataService)) true } else { false @@ -155,7 +159,6 @@ class DatasetIterator(pathToFolder : String, dataset : models.Dataset, zip: ZipO md5Files.put("_metadata.json",md5) if (numFiles > 0){ file_type+=1 - currentFileIterator = Some(new FileIterator(folderNameMap(inputFiles(fileCounter).id),inputFiles(fileCounter),zip,md5Files,files,folders,metadataService)) } else { file_type+=2 } diff --git a/app/Iterators/DatasetsInCollectionIterator.scala b/app/Iterators/DatasetsInCollectionIterator.scala index 24f6b11ba..721386cce 100644 --- a/app/Iterators/DatasetsInCollectionIterator.scala +++ b/app/Iterators/DatasetsInCollectionIterator.scala @@ -11,7 +11,7 @@ import scala.collection.mutable.ListBuffer //this is used to download the datasets in a collection //it creates an iterator for each dataset in the collection -class DatasetsInCollectionIterator(pathToFolder : String, collection : models.Collection, zip : ZipOutputStream, md5Files : scala.collection.mutable.HashMap[String, MessageDigest], user : Option[User], +class DatasetsInCollectionIterator(pathToFolder : String, collection : models.Collection, zip : ZipOutputStream, bagit: Boolean, md5Files : scala.collection.mutable.HashMap[String, MessageDigest], user : Option[User], datasets : DatasetService, files : FileService, folders : FolderService, metadataService : MetadataService, spaces : SpaceService) extends Iterator[Option[InputStream]] { @@ -36,8 +36,7 @@ class DatasetsInCollectionIterator(pathToFolder : String, collection : models.Co } var currentDatasetIterator : Option[DatasetIterator] = if (numDatasets > 0){ - - Some(new DatasetIterator(pathToFolder+"/"+currentDataset.get.name,currentDataset.get, zip, md5Files, + Some(new DatasetIterator(pathToFolder+"/"+currentDataset.get.name,currentDataset.get, zip, bagit, md5Files, folders, files,metadataService,datasets,spaces)) } else { None @@ -56,7 +55,7 @@ class DatasetsInCollectionIterator(pathToFolder : String, collection : models.Co currentDataset = Some(datasetsInCollection(datasetCount)) currentDataset match { case Some(cd) => { - currentDatasetIterator = Some(new DatasetIterator(pathToFolder+"/"+cd.name,cd, zip, md5Files, + currentDatasetIterator = Some(new DatasetIterator(pathToFolder+"/"+cd.name,cd, zip, bagit, md5Files, folders, files,metadataService,datasets,spaces)) true } diff --git a/app/Iterators/FileIterator.scala b/app/Iterators/FileIterator.scala index 315d1bdb9..98d5b75db 100644 --- a/app/Iterators/FileIterator.scala +++ b/app/Iterators/FileIterator.scala @@ -11,7 +11,7 @@ import util.JSONLD //this is used for file downloads //called by the dataset interator -class FileIterator (pathToFile : String, file : models.File,zip : ZipOutputStream, md5Files :scala.collection.mutable.HashMap[String, MessageDigest], files : FileService, folders : FolderService , metadataService : MetadataService) extends Iterator[Option[InputStream]] { +class FileIterator (pathToFile : String, file : models.File, bagit: Boolean, zip : ZipOutputStream, md5Files :scala.collection.mutable.HashMap[String, MessageDigest], files : FileService, folders : FolderService , metadataService : MetadataService) extends Iterator[Option[InputStream]] { def getFileInfoAsJson(file : models.File) : JsValue = { val rightsHolder = { @@ -54,7 +54,7 @@ class FileIterator (pathToFile : String, file : models.File,zip : ZipOutputStrea Some(new ByteArrayInputStream(s.getBytes("UTF-8"))) } - var file_type : Int = 0 + var file_type : Int = if (bagit) 0 else 2 var is : Option[InputStream] = None def hasNext() = { if ( file_type < 3){ diff --git a/app/Iterators/RootCollectionIterator.scala b/app/Iterators/RootCollectionIterator.scala index b112dfd86..1a990bf96 100644 --- a/app/Iterators/RootCollectionIterator.scala +++ b/app/Iterators/RootCollectionIterator.scala @@ -17,11 +17,11 @@ import scala.collection.mutable.ListBuffer class RootCollectionIterator(pathToFolder : String, root_collection : models.Collection,zip : ZipOutputStream, md5Files : scala.collection.mutable.HashMap[String, MessageDigest], md5Bag : scala.collection.mutable.HashMap[String, MessageDigest], - user : Option[User],totalBytes : Long,bagit : Boolean, + user : Option[User],totalBytes : Long, bagit : Boolean, collections: CollectionService, datasets : DatasetService, files : FileService, folders : FolderService, metadataService : MetadataService, spaces : SpaceService) extends Iterator[Option[InputStream]] { - val datasetIterator = new DatasetsInCollectionIterator(root_collection.name,root_collection,zip,md5Files,user, + val datasetIterator = new DatasetsInCollectionIterator(root_collection.name,root_collection,zip,bagit, md5Files,user, datasets,files,folders,metadataService,spaces) var currentCollectionIterator : Option[CollectionIterator] = None @@ -34,7 +34,7 @@ class RootCollectionIterator(pathToFolder : String, root_collection : models.Col var numCollections = child_collections.size var bytesSoFar : Long = 0L - var file_type = 0 + var file_type = if (bagit) 0 else 2 private def addCollectionInfoToZip(folderName: String, collection: models.Collection, zip: ZipOutputStream): Option[InputStream] = { @@ -107,9 +107,9 @@ class RootCollectionIterator(pathToFolder : String, root_collection : models.Col true } else if (numCollections > 0){ - currentCollectionIterator = Some(new CollectionIterator(pathToFolder+"/"+child_collections(collectionCount).name, child_collections(collectionCount),zip,md5Files,user, - collections,datasets,files, - folders,metadataService,spaces)) + currentCollectionIterator = Some(new CollectionIterator(pathToFolder+"/"+child_collections(collectionCount).name, + child_collections(collectionCount), zip, md5Files, user, bagit, + collections, datasets,files, folders, metadataService, spaces)) file_type +=1 true } else if (bagit){ @@ -127,7 +127,7 @@ class RootCollectionIterator(pathToFolder : String, root_collection : models.Col } else if (collectionCount < numCollections -2){ collectionCount+=1 currentCollectionIterator = Some(new CollectionIterator(pathToFolder+"/"+child_collections(collectionCount).name, child_collections(collectionCount),zip,md5Files,user, - collections,datasets,files, + bagit, collections,datasets,files, folders,metadataService,spaces)) true } else { diff --git a/app/Iterators/SelectedIterator.scala b/app/Iterators/SelectedIterator.scala index 4885473d2..c469bcfe2 100644 --- a/app/Iterators/SelectedIterator.scala +++ b/app/Iterators/SelectedIterator.scala @@ -21,7 +21,7 @@ class SelectedIterator(pathToFolder : String, selected : List[Dataset], zip : Zi var datasetCount = 0 var currDs = selected(datasetCount) - var datasetIterator = new DatasetIterator(pathToFolder+"/"+currDs.name, currDs, zip, md5Files, folders, files, + var datasetIterator = new DatasetIterator(pathToFolder+"/"+currDs.name, currDs, zip, bagit, md5Files, folders, files, metadataService,datasets,spaces) var file_type = 0 @@ -54,7 +54,7 @@ class SelectedIterator(pathToFolder : String, selected : List[Dataset], zip : Zi } else if (selected.length > datasetCount+1){ datasetCount += 1 currDs = selected(datasetCount) - datasetIterator = new DatasetIterator(pathToFolder+"/"+currDs.name,currDs,zip,md5Files,folders,files, + datasetIterator = new DatasetIterator(pathToFolder+"/"+currDs.name,currDs,zip,bagit, md5Files,folders,files, metadataService,datasets,spaces) true } else if (bagit) { diff --git a/app/api/Collections.scala b/app/api/Collections.scala index a10d2ffd1..bbfb8ba29 100644 --- a/app/api/Collections.scala +++ b/app/api/Collections.scala @@ -801,11 +801,10 @@ class Collections @Inject() (datasets: DatasetService, } } - def download(id: UUID, compression: Int) = PermissionAction(Permission.DownloadFiles, Some(ResourceRef(ResourceRef.collection, id))) { implicit request => + def download(id: UUID, compression: Int, bagit: Boolean) = PermissionAction(Permission.DownloadFiles, Some(ResourceRef(ResourceRef.collection, id))) { implicit request => implicit val user = request.user collections.get(id) match { case Some(collection) => { - val bagit = play.api.Play.configuration.getBoolean("downloadCollectionBagIt").getOrElse(true) // Use custom enumerator to create the zip file on the fly // Use a 1MB in memory byte array Ok.chunked(enumeratorFromCollection(collection,1024*1024, compression,bagit,user)).withHeaders( @@ -821,7 +820,9 @@ class Collections @Inject() (datasets: DatasetService, } - def enumeratorFromCollection(collection: Collection, chunkSize: Int = 1024 * 8, compression: Int = Deflater.DEFAULT_COMPRESSION, bagit: Boolean, user : Option[User]) + def enumeratorFromCollection(collection: Collection, chunkSize: Int = 1024 * 8, + compression: Int = Deflater.DEFAULT_COMPRESSION, + bagit: Boolean, user : Option[User]) (implicit ec: ExecutionContext): Enumerator[Array[Byte]] = { implicit val pec = ec.prepare() @@ -839,8 +840,6 @@ class Collections @Inject() (datasets: DatasetService, var current_iterator = new RootCollectionIterator(collection.name,collection,zip,md5Files,md5Bag,user, totalBytes,bagit,collections, datasets,files,folders,metadataService,spaces) - - //var current_iterator = new FileIterator(folderNameMap(inputFiles(1).id),inputFiles(1), zip,md5Files) var is = current_iterator.next() diff --git a/app/api/Datasets.scala b/app/api/Datasets.scala index 27cea3917..a5e191eab 100644 --- a/app/api/Datasets.scala +++ b/app/api/Datasets.scala @@ -2369,7 +2369,6 @@ class Datasets @Inject()( * @param dataset dataset from which to get teh files * @param chunkSize chunk size in memory in which to buffer the stream * @param compression java built in compression value. Use 0 for no compression. - * @param bagit whether or not to include bagit structures in zip * @param baseURL the root Clowder URL for metadata files, from original request * @param user an optional user to include in metadata * @param fileIDs a list of UUIDs of files in the dataset to include (i.e. marked file downloads) @@ -2377,12 +2376,12 @@ class Datasets @Inject()( * @return Enumerator to produce array of bytes from a zipped stream containing the bytes of each file * in the dataset */ - def enumeratorFromDataset(dataset: Dataset, chunkSize: Int = 1024 * 8, - compression: Int = Deflater.DEFAULT_COMPRESSION, bagit: Boolean, baseURL: String, - user : Option[User], fileIDs: Option[List[UUID]], folderId: Option[UUID]) - (implicit ec: ExecutionContext): Enumerator[Array[Byte]] = { + def enumeratorFromDatasetBagIt(dataset: Dataset, chunkSize: Int = 1024 * 8, + compression: Int = Deflater.DEFAULT_COMPRESSION, baseURL: String, + user : Option[User], fileIDs: Option[List[UUID]], folderId: Option[UUID]) + (implicit ec: ExecutionContext): Enumerator[Array[Byte]] = { implicit val pec = ec.prepare() - val dataFolder = if (bagit) "data/" else "" + val dataFolder = "data/" val filenameMap = scala.collection.mutable.Map.empty[UUID, String] val inputFiles = scala.collection.mutable.ListBuffer.empty[models.File] @@ -2468,14 +2467,9 @@ class Datasets @Inject()( is = addMD5Entry(filename, is, md5Files) file_index +=1 if (file_index >= inputFiles.size) { - if (bagit) { - file_index = 0 - level = "bag" - file_type = "bagit.txt" - } else { - level = "done" - file_type = "none" - } + file_index = 0 + level = "bag" + file_type = "bagit.txt" } } case ("bag", "bagit.txt") => { @@ -2541,6 +2535,99 @@ class Datasets @Inject()( })(pec) } + /** + * Enumerator to loop over all files in a dataset and return chunks for the result zip file that will be + * streamed to the client. The zip files are streamed and not stored on disk. + * + * @param dataset dataset from which to get teh files + * @param chunkSize chunk size in memory in which to buffer the stream + * @param compression java built in compression value. Use 0 for no compression. + * @param baseURL the root Clowder URL for metadata files, from original request + * @param user an optional user to include in metadata + * @param fileIDs a list of UUIDs of files in the dataset to include (i.e. marked file downloads) + * @param folderId a folder UUID in the dataset to include (i.e. folder download) + * @return Enumerator to produce array of bytes from a zipped stream containing the bytes of each file + * in the dataset + */ + def enumeratorFromDatasetFiles(dataset: Dataset, chunkSize: Int = 1024 * 8, + compression: Int = Deflater.DEFAULT_COMPRESSION, baseURL: String, + user : Option[User], fileIDs: Option[List[UUID]], folderId: Option[UUID]) + (implicit ec: ExecutionContext): Enumerator[Array[Byte]] = { + implicit val pec = ec.prepare() + val dataFolder = "" + val filenameMap = scala.collection.mutable.Map.empty[UUID, String] + val inputFiles = scala.collection.mutable.ListBuffer.empty[models.File] + + // Get list of all files and folder in dataset and enforce unique names + fileIDs match { + case Some(fids) => { + Logger.info("Downloading only some files") + Logger.info(fids.toString) + listFilesInFolder(fids, List.empty, dataFolder, filenameMap, inputFiles) + } + case None => { + folderId match { + case Some(fid) => listFilesInFolder(List.empty, List(fid), dataFolder, filenameMap, inputFiles) + case None => listFilesInFolder(dataset.files, dataset.folders, dataFolder, filenameMap, inputFiles) + } + } + } + + // create the zipfile + val byteArrayOutputStream = new ByteArrayOutputStream(chunkSize) + val zip = new ZipOutputStream(byteArrayOutputStream) + zip.setLevel(compression) + + var file_index = 0 + val buffer = new Array[Byte](chunkSize) + var is: Option[InputStream] = None + + Enumerator.generateM({ + val bytesRead = is match { + case Some(inputStream: InputStream) => { + val bytesRead = scala.concurrent.blocking { + inputStream.read(buffer) + } + if (bytesRead == -1) { + // finished individual file + zip.closeEntry() + inputStream.close() + } + bytesRead + } + case None => -1 + } + + val chunk = if (bytesRead == -1) { + if (file_index == -1) { + None + } else if (file_index < inputFiles.length) { + val filename = filenameMap(inputFiles(file_index).id) + is = addFileToZip(filename, inputFiles(file_index), zip) + file_index += 1 + val result = Some(byteArrayOutputStream.toByteArray) + byteArrayOutputStream.reset() + result + } else { + zip.close() + val result = Some(byteArrayOutputStream.toByteArray) + byteArrayOutputStream.reset() + is = None + file_index = -1 + result + } + } else { + zip.write(buffer, 0, bytesRead) + val result = Some(byteArrayOutputStream.toByteArray) + byteArrayOutputStream.reset() + result + } + + Future.successful(chunk) + })(pec) + + } + private def addMD5Entry(name: String, is: Option[InputStream], md5HashMap: scala.collection.mutable.HashMap[String, MessageDigest]) = { val md5 = MessageDigest.getInstance("MD5") md5HashMap.put(name, md5) @@ -2834,11 +2921,10 @@ class Datasets @Inject()( Some(new ByteArrayInputStream(s.getBytes("UTF-8"))) } - def download(id: UUID, compression: Int, tracking: Boolean) = PermissionAction(Permission.DownloadFiles, Some(ResourceRef(ResourceRef.dataset, id))) { implicit request => + def download(id: UUID, bagit: Boolean, compression: Int, tracking: Boolean) = PermissionAction(Permission.DownloadFiles, Some(ResourceRef(ResourceRef.dataset, id))) { implicit request => implicit val user = request.user datasets.get(id) match { case Some(dataset) => { - val bagit = play.api.Play.configuration.getBoolean("downloadDatasetBagIt").getOrElse(true) val baseURL = controllers.routes.Datasets.dataset(id).absoluteURL(https(request)) // Increment download count if tracking is enabled @@ -2849,7 +2935,13 @@ class Datasets @Inject()( // Use custom enumerator to create the zip file on the fly // Use a 1MB in memory byte array - Ok.chunked(enumeratorFromDataset(dataset,1024*1024, compression, bagit, baseURL, user, None, None)).withHeaders( + val enumerator = if (bagit) { + enumeratorFromDatasetBagIt(dataset,1024*1024, -1, baseURL, user, None, None) + } else { + enumeratorFromDatasetFiles(dataset,1024*1024, -1, baseURL, user, None, None) + } + + Ok.chunked(enumerator).withHeaders( CONTENT_TYPE -> "application/zip", CONTENT_DISPOSITION -> (FileUtils.encodeAttachment(dataset.name+ ".zip", request.headers.get("user-agent").getOrElse(""))) ) @@ -2862,12 +2954,11 @@ class Datasets @Inject()( } // Takes dataset ID and a comma-separated string of file UUIDs in the dataset and streams just those files as a zip - def downloadPartial(id: UUID, fileList: String) = PermissionAction(Permission.DownloadFiles, Some(ResourceRef(ResourceRef.dataset, id))) { implicit request => + def downloadPartial(id: UUID, fileList: String, bagit: Boolean) = PermissionAction(Permission.DownloadFiles, Some(ResourceRef(ResourceRef.dataset, id))) { implicit request => implicit val user = request.user datasets.get(id) match { case Some(dataset) => { val fileIDs = fileList.split(',').map(fid => new UUID(fid)).toList - val bagit = play.api.Play.configuration.getBoolean("downloadDatasetBagIt").getOrElse(true) val baseURL = controllers.routes.Datasets.dataset(id).absoluteURL(https(request)) // Increment download count for each file @@ -2875,7 +2966,12 @@ class Datasets @Inject()( // Use custom enumerator to create the zip file on the fly // Use a 1MB in memory byte array - Ok.chunked(enumeratorFromDataset(dataset,1024*1024, -1, bagit, baseURL, user, Some(fileIDs), None)).withHeaders( + val enumerator = if (bagit) { + enumeratorFromDatasetBagIt(dataset,1024*1024, -1, baseURL, user, Some(fileIDs), None) + } else { + enumeratorFromDatasetFiles(dataset,1024*1024, -1, baseURL, user, Some(fileIDs), None) + } + Ok.chunked(enumerator).withHeaders( CONTENT_TYPE -> "application/zip", CONTENT_DISPOSITION -> (FileUtils.encodeAttachment(dataset.name+ " (Partial).zip", request.headers.get("user-agent").getOrElse(""))) ) @@ -2888,22 +2984,26 @@ class Datasets @Inject()( } // Takes dataset ID and a folder ID in that dataset and streams just that folder and sub-folders as a zip - def downloadFolder(id: UUID, folderId: UUID) = PermissionAction(Permission.DownloadFiles, Some(ResourceRef(ResourceRef.dataset, id))) { implicit request => + def downloadFolder(id: UUID, folderId: UUID, bagit: Boolean) = PermissionAction(Permission.DownloadFiles, Some(ResourceRef(ResourceRef.dataset, id))) { implicit request => implicit val user = request.user datasets.get(id) match { case Some(dataset) => { - val bagit = play.api.Play.configuration.getBoolean("downloadDatasetBagIt").getOrElse(true) val baseURL = controllers.routes.Datasets.dataset(id).absoluteURL(https(request)) - // Increment download count for each file in folder folders.get(folderId) match { case Some(fo) => { fo.files.foreach(fid => files.incrementDownloads(fid, user)) + val enumerator = if (bagit) { + enumeratorFromDatasetBagIt(dataset, 1024*1024, -1, baseURL, user, None, Some(folderId)) + } else { + enumeratorFromDatasetFiles(dataset, 1024*1024, -1, baseURL, user, None, Some(folderId)) + } + // Use custom enumerator to create the zip file on the fly // Use a 1MB in memory byte array - Ok.chunked(enumeratorFromDataset(dataset,1024*1024, -1, bagit, baseURL, user, None, Some(folderId))).withHeaders( + Ok.chunked(enumerator).withHeaders( CONTENT_TYPE -> "application/zip", CONTENT_DISPOSITION -> (FileUtils.encodeAttachment(dataset.name+ " ("+fo.name+" Folder).zip", request.headers.get("user-agent").getOrElse(""))) ) diff --git a/app/views/dataset.scala.html b/app/views/dataset.scala.html index 87e0589a0..acee9bc20 100644 --- a/app/views/dataset.scala.html +++ b/app/views/dataset.scala.html @@ -190,7 +190,7 @@

} @if(showDownload && Permission.checkPermission(Permission.DownloadFiles, ResourceRef(ResourceRef.dataset, dataset.id)) && !dataset.trash) { Download All Files diff --git a/conf/application.conf b/conf/application.conf index 77962d544..c05870d3d 100644 --- a/conf/application.conf +++ b/conf/application.conf @@ -474,12 +474,6 @@ clowder.tagLength=100 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ addDatasetToCollectionSpace=false -# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -# Whether or not collections or datasets download in bagit format -# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -downloadCollectionBagIt = true -downloadDatasetBagIt = false - # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Polyglot # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/conf/routes b/conf/routes index 333fd699a..0eb5f62b6 100644 --- a/conf/routes +++ b/conf/routes @@ -516,7 +516,7 @@ POST /api/collections GET /api/collections/rootCollections @api.Collections.getRootCollections GET /api/collections/topLevelCollections @api.Collections.getTopLevelCollections GET /api/collections/allCollections @api.Collections.getAllCollections(limit : Int ?= 0, showAll: Boolean ?=false) -GET /api/collections/:id/download @api.Collections.download(id: UUID, compression: Int ?= -1) +GET /api/collections/:id/download @api.Collections.download(id: UUID, compression: Int ?= -1, bagit: Boolean ?= false) GET /api/collections/listTrash @api.Collections.listCollectionsInTrash(limit : Int ?= 12) DELETE /api/collections/emptyTrash @api.Collections.emptyTrash() DELETE /api/collections/clearOldCollectionsTrash @api.Collections.clearOldCollectionsTrash(days : Int ?= 30) @@ -592,9 +592,9 @@ GET /api/datasets/:id/listAllFiles GET /api/datasets/:id/files @api.Datasets.datasetAllFilesList(id: UUID, max: Int ?= -1) POST /api/datasets/:id/files @api.Datasets.uploadToDatasetFile(id: UUID) POST /api/datasets/:id/urls @api.Datasets.uploadToDatasetJSON(id: UUID) -GET /api/datasets/:id/download @api.Datasets.download(id: UUID, compression: Int ?= -1, tracking: Boolean ?= true) -GET /api/datasets/:id/downloadPartial @api.Datasets.downloadPartial(id: UUID, fileList: String) -GET /api/datasets/:id/downloadFolder @api.Datasets.downloadFolder(id: UUID, folderId: UUID) +GET /api/datasets/:id/download @api.Datasets.download(id: UUID, bagit: Boolean ?= false, compression: Int ?= -1, tracking: Boolean ?= true) +GET /api/datasets/:id/downloadPartial @api.Datasets.downloadPartial(id: UUID, fileList: String, bagit: Boolean ?= false) +GET /api/datasets/:id/downloadFolder @api.Datasets.downloadFolder(id: UUID, folderId: UUID, bagit: Boolean ?= false) POST /api/datasets/:id/comment @api.Datasets.comment(id: UUID) POST /api/datasets/:id/reindex @api.Datasets.reindex(id:UUID, recursive: Boolean ?= true) POST /api/datasets/:id/follow @api.Datasets.follow(id: UUID) From 4eab5752f933b38d6209978dcdbe1f1398a47b2a Mon Sep 17 00:00:00 2001 From: Rob Kooper Date: Mon, 7 Feb 2022 21:41:19 -0600 Subject: [PATCH 3/6] release 1.20.0 --- CHANGELOG.md | 2 +- doc/src/sphinx/conf.py | 2 +- project/Build.scala | 2 +- public/swagger.yml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 60eceb9b6..73ef2769c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,7 +4,7 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/) and this project adheres to [Semantic Versioning](http://semver.org/). -## Unreleased +## 1.20.0 - 2022-02-07 ### Fixed - conf file and code had incosistent spelling of BagIt. Now all have capital B and I. diff --git a/doc/src/sphinx/conf.py b/doc/src/sphinx/conf.py index 38333c292..10e3af6bc 100644 --- a/doc/src/sphinx/conf.py +++ b/doc/src/sphinx/conf.py @@ -22,7 +22,7 @@ author = 'Luigi Marini' # The full version, including alpha/beta/rc tags -release = '1.19.5' +release = '1.20.0' # -- General configuration --------------------------------------------------- diff --git a/project/Build.scala b/project/Build.scala index dec27f13f..2de113d02 100644 --- a/project/Build.scala +++ b/project/Build.scala @@ -13,7 +13,7 @@ import NativePackagerKeys._ object ApplicationBuild extends Build { val appName = "clowder" - val version = "1.19.5" + val version = "1.20.0" val jvm = "1.7" def appVersion: String = { diff --git a/public/swagger.yml b/public/swagger.yml index 9de34a5dc..fad91a7c5 100644 --- a/public/swagger.yml +++ b/public/swagger.yml @@ -9,7 +9,7 @@ info: Clowder is a customizable and scalable data management system to support any data format and multiple research domains. It is under active development and deployed for a variety of research projects. - version: 1.19.5 + version: 1.20.0 termsOfService: https://clowder.ncsa.illinois.edu/clowder/tos contact: name: Clowder From 9574099a7911565e0172c9fba3b518f7e7760823 Mon Sep 17 00:00:00 2001 From: Rob Kooper Date: Tue, 8 Feb 2022 13:10:44 -0600 Subject: [PATCH 4/6] use model/ for three.js (#320) * use model/ for three.js * Removed model/ply because it wasn't working with three_js reviewer. Removed obj because it was not working and it was messing with the 3DWebGL previewer. Co-authored-by: Luigi Marini --- CHANGELOG.md | 1 + conf/mimetypes.conf | 1 + public/javascripts/previewers/three_js/package.json | 4 ++-- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 73ef2769c..8eba02a8b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/). ### Fixed - conf file and code had incosistent spelling of BagIt. Now all have capital B and I. +- three.js is no longer associated with application/octet-stream, now with models [#305](https://github.com/clowder-framework/clowder/issues/305) ### Changed - download of dataset/collection now has optional parameter bagit (default false) to download items in bagit format. diff --git a/conf/mimetypes.conf b/conf/mimetypes.conf index 9326af494..404888aaf 100644 --- a/conf/mimetypes.conf +++ b/conf/mimetypes.conf @@ -12,6 +12,7 @@ mimetype.nxz=model/nxz mimetype.NXZ=model/nxz mimetype.mtl=model/mtl mimetype.MTL=model/mtl +mimetype.fbx=model/fbx mimetype.cnv=application/cnv mimetype.CNV=application/cnv mimetype.oni=application/oni diff --git a/public/javascripts/previewers/three_js/package.json b/public/javascripts/previewers/three_js/package.json index 58d485b70..692a9cfec 100644 --- a/public/javascripts/previewers/three_js/package.json +++ b/public/javascripts/previewers/three_js/package.json @@ -2,5 +2,5 @@ "name": "Three-JS", "main": "viewer_three.js", "file": true, - "contentType": [ "application/octet-stream"] -} \ No newline at end of file + "contentType": [ "model/fbx", "model/x3d-xml", "model/x3d-binary" ] +} From d4ea11a6f1f21b0a7a609766aebb4e06e5259b19 Mon Sep 17 00:00:00 2001 From: Rob Kooper Date: Tue, 8 Feb 2022 13:36:13 -0600 Subject: [PATCH 5/6] don't show event stream even when logged in (fixes #280) (#321) Co-authored-by: Luigi Marini --- CHANGELOG.md | 5 +++-- app/controllers/Application.scala | 20 +++++++++---------- app/views/eventsList.scala.html | 32 +++++++++++++++++++------------ 3 files changed, 32 insertions(+), 25 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8eba02a8b..2064b2875 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,11 +7,12 @@ and this project adheres to [Semantic Versioning](http://semver.org/). ## 1.20.0 - 2022-02-07 ### Fixed -- conf file and code had incosistent spelling of BagIt. Now all have capital B and I. +- Conf file and code had incosistent spelling of BagIt. Now all have capital B and I. +- When event stream is disabled don't show for logged in user [#280](https://github.com/clowder-framework/clowder/issues/280) - three.js is no longer associated with application/octet-stream, now with models [#305](https://github.com/clowder-framework/clowder/issues/305) ### Changed -- download of dataset/collection now has optional parameter bagit (default false) to download items in bagit format. +- Download of dataset/collection now has optional parameter bagit (default false) to download items in bagit format. ## 1.19.5 - 2022-01-21 diff --git a/app/controllers/Application.scala b/app/controllers/Application.scala index 5a1cbf114..344474f35 100644 --- a/app/controllers/Application.scala +++ b/app/controllers/Application.scala @@ -93,23 +93,21 @@ class Application @Inject()(files: FileService, collections: CollectionService, implicit val user = request.user var newsfeedEvents = List.empty[Event] - if (!play.Play.application().configuration().getBoolean("clowder.disable.events", false)) { - newsfeedEvents = user.fold(List.empty[Event])(u => events.getEvents(u.followedEntities, Some(20))) - newsfeedEvents = newsfeedEvents ::: events.getRequestEvents(user, Some(20)) - if (user.isDefined) { - newsfeedEvents = (newsfeedEvents ::: events.getEventsByUser(user.get, Some(20))) - .sorted(Ordering.by((_: Event).created).reverse).distinct.take(20) - } - } user match { case Some(clowderUser) if (clowderUser.status == UserStatus.Inactive) => { Redirect(routes.Error.notActivated()) } case Some(clowderUser) if !(clowderUser.status == UserStatus.Inactive) => { - newsfeedEvents = newsfeedEvents ::: events.getEventsByUser(clowderUser, Some(20)) - if (play.Play.application().configuration().getBoolean("showCommentOnHomepage")) newsfeedEvents = newsfeedEvents ::: events.getCommentEvent(clowderUser, Some(20)) - newsfeedEvents = newsfeedEvents.sorted(Ordering.by((_: Event).created).reverse).distinct.take(20) + if (!play.Play.application().configuration().getBoolean("clowder.disable.events", false)) { + newsfeedEvents = newsfeedEvents ::: events.getEventsByUser(clowderUser, Some(20)) + newsfeedEvents = newsfeedEvents ::: events.getRequestEvents(user, Some(20)) + newsfeedEvents = newsfeedEvents ::: events.getEvents(clowderUser.followedEntities, Some(20)) + if (play.Play.application().configuration().getBoolean("showCommentOnHomepage")) { + newsfeedEvents = newsfeedEvents ::: events.getCommentEvent(clowderUser, Some(20)) + } + newsfeedEvents = newsfeedEvents.sorted(Ordering.by((_: Event).created).reverse).distinct.take(20) + } val datasetsUser = datasets.listUser(12, Some(clowderUser), request.user.fold(false)(_.superAdminMode), clowderUser) val collectionList = collections.listUser(12, Some(clowderUser), request.user.fold(false)(_.superAdminMode), clowderUser) val collectionsWithThumbnails = collectionList.map { c => diff --git a/app/views/eventsList.scala.html b/app/views/eventsList.scala.html index c0da77a7b..7481e5209 100644 --- a/app/views/eventsList.scala.html +++ b/app/views/eventsList.scala.html @@ -1,6 +1,7 @@ @(newsfeed: List[models.Event])(implicit user: Option[models.User]) @import _root_.util.Formatters @import play.api.i18n.Messages +@import play.api.Play.current @for(event <- newsfeed) {
@@ -27,18 +28,25 @@ } \ No newline at end of file + From 951e16aee3bc3aa78ec26aded2511872088ebe1e Mon Sep 17 00:00:00 2001 From: Max Burnette Date: Wed, 9 Feb 2022 09:22:55 -0600 Subject: [PATCH 6/6] remove debug line --- app/api/Datasets.scala | 1 - 1 file changed, 1 deletion(-) diff --git a/app/api/Datasets.scala b/app/api/Datasets.scala index a5e191eab..c0fe3fcbc 100644 --- a/app/api/Datasets.scala +++ b/app/api/Datasets.scala @@ -1153,7 +1153,6 @@ class Datasets @Inject()( resultCount += 1 } }) - print("done with folder") } } case None => Logger.error(s"Error getting dataset $id")