From 875086da61c86206c4bf46b3f4a5cfa322ca2118 Mon Sep 17 00:00:00 2001 From: Ankita Victor Date: Wed, 10 Apr 2024 23:37:59 +0530 Subject: [PATCH] Address PR comments --- velox/common/file/FileSystems.cpp | 2 +- velox/common/file/FileSystems.h | 7 +- .../storage_adapters/abfs/AbfsFileSystem.cpp | 9 ++- .../abfs/tests/AbfsFileSystemTest.cpp | 64 +++++++++---------- .../storage_adapters/gcs/GCSFileSystem.cpp | 9 ++- .../storage_adapters/hdfs/HdfsFileSystem.cpp | 2 +- .../storage_adapters/s3fs/S3FileSystem.cpp | 9 ++- 7 files changed, 48 insertions(+), 54 deletions(-) diff --git a/velox/common/file/FileSystems.cpp b/velox/common/file/FileSystems.cpp index 59730f1cf9a1..28e66dc2bd11 100644 --- a/velox/common/file/FileSystems.cpp +++ b/velox/common/file/FileSystems.cpp @@ -86,7 +86,7 @@ class LocalFileSystem : public FileSystem { std::unique_ptr openFileForRead( std::string_view path, - const FileOptions& options) override { + const FileOptions& /*unused*/) override { return std::make_unique(extractPath(path)); } diff --git a/velox/common/file/FileSystems.h b/velox/common/file/FileSystems.h index 685f069943d7..ae7ec0d18013 100644 --- a/velox/common/file/FileSystems.h +++ b/velox/common/file/FileSystems.h @@ -43,11 +43,8 @@ struct FileOptions { std::unordered_map values; memory::MemoryPool* pool{nullptr}; - std::optional fileSize{std::nullopt}; - - std::optional getFileSize() const { - return fileSize; - } + std::optional fileSize{ + std::nullopt}; // If specified then can be trusted to be the file size. }; /// An abstract FileSystem diff --git a/velox/connectors/hive/storage_adapters/abfs/AbfsFileSystem.cpp b/velox/connectors/hive/storage_adapters/abfs/AbfsFileSystem.cpp index 2732f9d97636..362460dbdde7 100644 --- a/velox/connectors/hive/storage_adapters/abfs/AbfsFileSystem.cpp +++ b/velox/connectors/hive/storage_adapters/abfs/AbfsFileSystem.cpp @@ -61,11 +61,10 @@ class AbfsReadFile::Impl { } void initialize(const FileOptions& options) { - auto fileSize = options.getFileSize(); - - if (fileSize.has_value()) { - VELOX_CHECK_GE(fileSize.value(), 0, "Length must be non-negative"); - length_ = fileSize.value(); + if (options.fileSize.has_value()) { + VELOX_CHECK_GE( + options.fileSize.value(), 0, "File size must be non-negative"); + length_ = options.fileSize.value(); } if (length_ != -1) { diff --git a/velox/connectors/hive/storage_adapters/abfs/tests/AbfsFileSystemTest.cpp b/velox/connectors/hive/storage_adapters/abfs/tests/AbfsFileSystemTest.cpp index 2d867ef02a33..b7bffa997270 100644 --- a/velox/connectors/hive/storage_adapters/abfs/tests/AbfsFileSystemTest.cpp +++ b/velox/connectors/hive/storage_adapters/abfs/tests/AbfsFileSystemTest.cpp @@ -168,14 +168,23 @@ void readData(ReadFile* readFile) { "ccccc"); } +TEST_F(AbfsFileSystemTest, readFile) { + auto hiveConfig = AbfsFileSystemTest::hiveConfig( + {{"fs.azure.account.key.test.dfs.core.windows.net", + azuriteServer->connectionStr()}}); + AbfsFileSystem abfs{hiveConfig}; + auto readFile = abfs.openFileForRead(fullFilePath); + readData(readFile.get()); +} + TEST_F(AbfsFileSystemTest, openFileForReadWithOptions) { auto hiveConfig = AbfsFileSystemTest::hiveConfig( {{"fs.azure.account.key.test.dfs.core.windows.net", azuriteServer->connectionStr()}}); - auto abfs = std::make_shared(hiveConfig); + AbfsFileSystem abfs{hiveConfig}; FileOptions options; options.fileSize = 15 + kOneMB; - auto readFile = abfs->openFileForRead(fullFilePath, options); + auto readFile = abfs.openFileForRead(fullFilePath, options); readData(readFile.get()); } @@ -183,21 +192,12 @@ TEST_F(AbfsFileSystemTest, openFileForReadWithInvalidOptions) { auto hiveConfig = AbfsFileSystemTest::hiveConfig( {{"fs.azure.account.key.test.dfs.core.windows.net", azuriteServer->connectionStr()}}); - auto abfs = std::make_shared(hiveConfig); + AbfsFileSystem abfs{hiveConfig}; FileOptions options; options.fileSize = -kOneMB; VELOX_ASSERT_THROW( - abfs->openFileForRead(fullFilePath, options), - "Length must be non-negative"); -} - -TEST_F(AbfsFileSystemTest, readFile) { - auto hiveConfig = AbfsFileSystemTest::hiveConfig( - {{"fs.azure.account.key.test.dfs.core.windows.net", - azuriteServer->connectionStr()}}); - auto abfs = std::make_shared(hiveConfig); - auto readFile = abfs->openFileForRead(fullFilePath); - readData(readFile.get()); + abfs.openFileForRead(fullFilePath, options), + "File size must be non-negative"); } TEST_F(AbfsFileSystemTest, multipleThreadsWithReadFile) { @@ -205,7 +205,7 @@ TEST_F(AbfsFileSystemTest, multipleThreadsWithReadFile) { auto hiveConfig = AbfsFileSystemTest::hiveConfig( {{"fs.azure.account.key.test.dfs.core.windows.net", azuriteServer->connectionStr()}}); - auto abfs = std::make_shared(hiveConfig); + AbfsFileSystem abfs{hiveConfig}; std::vector threads; std::mt19937 generator(std::random_device{}()); @@ -220,7 +220,7 @@ TEST_F(AbfsFileSystemTest, multipleThreadsWithReadFile) { } std::this_thread::sleep_for( std::chrono::microseconds(sleepTimesInMicroseconds[index])); - auto readFile = abfs->openFileForRead(fullFilePath); + auto readFile = abfs.openFileForRead(fullFilePath); readData(readFile.get()); }); threads.emplace_back(std::move(thread)); @@ -237,9 +237,9 @@ TEST_F(AbfsFileSystemTest, missingFile) { azuriteServer->connectionStr()}}); const std::string abfsFile = facebook::velox::filesystems::test::AzuriteABFSEndpoint + "test.txt"; - auto abfs = std::make_shared(hiveConfig); + AbfsFileSystem abfs{hiveConfig}; VELOX_ASSERT_RUNTIME_THROW_CODE( - abfs->openFileForRead(abfsFile), error_code::kFileNotFound, "404"); + abfs.openFileForRead(abfsFile), error_code::kFileNotFound, "404"); } TEST_F(AbfsFileSystemTest, OpenFileForWriteTest) { @@ -286,56 +286,56 @@ TEST_F(AbfsFileSystemTest, renameNotImplemented) { auto hiveConfig = AbfsFileSystemTest::hiveConfig( {{"fs.azure.account.key.test.dfs.core.windows.net", azuriteServer->connectionStr()}}); - auto abfs = std::make_shared(hiveConfig); + AbfsFileSystem abfs{hiveConfig}; VELOX_ASSERT_THROW( - abfs->rename("text", "text2"), "rename for abfs not implemented"); + abfs.rename("text", "text2"), "rename for abfs not implemented"); } TEST_F(AbfsFileSystemTest, removeNotImplemented) { auto hiveConfig = AbfsFileSystemTest::hiveConfig( {{"fs.azure.account.key.test.dfs.core.windows.net", azuriteServer->connectionStr()}}); - auto abfs = std::make_shared(hiveConfig); - VELOX_ASSERT_THROW(abfs->remove("text"), "remove for abfs not implemented"); + AbfsFileSystem abfs{hiveConfig}; + VELOX_ASSERT_THROW(abfs.remove("text"), "remove for abfs not implemented"); } TEST_F(AbfsFileSystemTest, existsNotImplemented) { auto hiveConfig = AbfsFileSystemTest::hiveConfig( {{"fs.azure.account.key.test.dfs.core.windows.net", azuriteServer->connectionStr()}}); - auto abfs = std::make_shared(hiveConfig); - VELOX_ASSERT_THROW(abfs->exists("text"), "exists for abfs not implemented"); + AbfsFileSystem abfs{hiveConfig}; + VELOX_ASSERT_THROW(abfs.exists("text"), "exists for abfs not implemented"); } TEST_F(AbfsFileSystemTest, listNotImplemented) { auto hiveConfig = AbfsFileSystemTest::hiveConfig( {{"fs.azure.account.key.test.dfs.core.windows.net", azuriteServer->connectionStr()}}); - auto abfs = std::make_shared(hiveConfig); - VELOX_ASSERT_THROW(abfs->list("dir"), "list for abfs not implemented"); + AbfsFileSystem abfs{hiveConfig}; + VELOX_ASSERT_THROW(abfs.list("dir"), "list for abfs not implemented"); } TEST_F(AbfsFileSystemTest, mkdirNotImplemented) { auto hiveConfig = AbfsFileSystemTest::hiveConfig( {{"fs.azure.account.key.test.dfs.core.windows.net", azuriteServer->connectionStr()}}); - auto abfs = std::make_shared(hiveConfig); - VELOX_ASSERT_THROW(abfs->mkdir("dir"), "mkdir for abfs not implemented"); + AbfsFileSystem abfs{hiveConfig}; + VELOX_ASSERT_THROW(abfs.mkdir("dir"), "mkdir for abfs not implemented"); } TEST_F(AbfsFileSystemTest, rmdirNotImplemented) { auto hiveConfig = AbfsFileSystemTest::hiveConfig( {{"fs.azure.account.key.test.dfs.core.windows.net", azuriteServer->connectionStr()}}); - auto abfs = std::make_shared(hiveConfig); - VELOX_ASSERT_THROW(abfs->rmdir("dir"), "rmdir for abfs not implemented"); + AbfsFileSystem abfs{hiveConfig}; + VELOX_ASSERT_THROW(abfs.rmdir("dir"), "rmdir for abfs not implemented"); } TEST_F(AbfsFileSystemTest, credNotFOund) { const std::string abfsFile = std::string("abfs://test@test1.dfs.core.windows.net/test"); auto hiveConfig = AbfsFileSystemTest::hiveConfig({}); - auto abfs = std::make_shared(hiveConfig); + AbfsFileSystem abfs{hiveConfig}; VELOX_ASSERT_THROW( - abfs->openFileForRead(abfsFile), "Failed to find storage credentials"); + abfs.openFileForRead(abfsFile), "Failed to find storage credentials"); } diff --git a/velox/connectors/hive/storage_adapters/gcs/GCSFileSystem.cpp b/velox/connectors/hive/storage_adapters/gcs/GCSFileSystem.cpp index 53052d8d02e3..36a428b64994 100644 --- a/velox/connectors/hive/storage_adapters/gcs/GCSFileSystem.cpp +++ b/velox/connectors/hive/storage_adapters/gcs/GCSFileSystem.cpp @@ -73,11 +73,10 @@ class GCSReadFile final : public ReadFile { // Gets the length of the file. // Checks if there are any issues reading the file. void initialize(const filesystems::FileOptions& options) { - auto fileSize = options.getFileSize(); - - if (fileSize.has_value()) { - VELOX_CHECK_GE(fileSize.value(), 0, "Length must be non-negative"); - length_ = fileSize.value(); + if (options.fileSize.has_value()) { + VELOX_CHECK_GE( + options.fileSize.value(), 0, "File size must be non-negative"); + length_ = options.fileSize.value(); } // Make it a no-op if invoked twice. diff --git a/velox/connectors/hive/storage_adapters/hdfs/HdfsFileSystem.cpp b/velox/connectors/hive/storage_adapters/hdfs/HdfsFileSystem.cpp index 3d94bba8d166..af18aa32e5e9 100644 --- a/velox/connectors/hive/storage_adapters/hdfs/HdfsFileSystem.cpp +++ b/velox/connectors/hive/storage_adapters/hdfs/HdfsFileSystem.cpp @@ -69,7 +69,7 @@ std::string HdfsFileSystem::name() const { std::unique_ptr HdfsFileSystem::openFileForRead( std::string_view path, - const FileOptions& options) { + const FileOptions& /*unused*/) { if (path.find(kScheme) == 0) { path.remove_prefix(kScheme.length()); } diff --git a/velox/connectors/hive/storage_adapters/s3fs/S3FileSystem.cpp b/velox/connectors/hive/storage_adapters/s3fs/S3FileSystem.cpp index e94d0a17e488..ff838a03a64d 100644 --- a/velox/connectors/hive/storage_adapters/s3fs/S3FileSystem.cpp +++ b/velox/connectors/hive/storage_adapters/s3fs/S3FileSystem.cpp @@ -80,11 +80,10 @@ class S3ReadFile final : public ReadFile { // Gets the length of the file. // Checks if there are any issues reading the file. void initialize(const filesystems::FileOptions& options) { - auto fileSize = options.getFileSize(); - - if (fileSize.has_value()) { - VELOX_CHECK_GE(fileSize.value(), 0, "Length must be non-negative"); - length_ = fileSize.value(); + if (options.fileSize.has_value()) { + VELOX_CHECK_GE( + options.fileSize.value(), 0, "File size must be non-negative"); + length_ = options.fileSize.value(); } // Make it a no-op if invoked twice.