diff --git a/fbpcs/pc_translator/PCTranslator.cpp b/fbpcs/pc_translator/PCTranslator.cpp index ea27e69eb..af03f281d 100644 --- a/fbpcs/pc_translator/PCTranslator.cpp +++ b/fbpcs/pc_translator/PCTranslator.cpp @@ -14,24 +14,27 @@ #include #include #include +#include +#include #include #include +#include #include "fbpcs/emp_games/common/Csv.h" #include "folly/String.h" namespace pc_translator { -std::string PCTranslator::encode(const std::string& inputDataset) { +std::string PCTranslator::encode(const std::string& inputDatasetPath) { auto validInstructionSetNames = PCTranslator::retrieveInstructionSetNamesForRun(pcsFeatures_); auto pcInstructionSets = PCTranslator::retrieveInstructionSets(validInstructionSetNames); if (pcInstructionSets.empty()) { // No instruction set found. return the input dataset path. - return inputDataset; + return inputDatasetPath; } return PCTranslator::transformDataset( - inputDataset, pcInstructionSets.front()); + inputDatasetPath, pcInstructionSets.front()); } std::string PCTranslator::decode( @@ -79,30 +82,43 @@ std::vector PCTranslator::retrieveInstructionSetNamesForRun( } std::string PCTranslator::transformDataset( - const std::string& inputData, + const std::string& inputDatasetPath, std::shared_ptr pcInstructionSet) { // Parse the input CSV auto lineNo = 0; std::vector> inputColums; + std::vector outputHeader; + std::vector> outputContent; private_measurement::csv::readCsv( - inputData, + inputDatasetPath, [&](const std::vector& header, const std::vector& parts) { std::vector inputColumnPerRow; + std::string column; + std::uint32_t value; + bool found = false; + std::vector outputContentPerRow; for (std::vector::size_type i = 0; i < header.size(); ++i) { - auto& column = header[i]; - auto value = std::atoi(parts[i].c_str()); - auto iter = std::find( - pcInstructionSet->getGroupByIds().begin(), - pcInstructionSet->getGroupByIds().end(), - column); - if (iter != pcInstructionSet->getGroupByIds().end()) { + column = header[i]; + value = std::atoi(parts[i].c_str()); + found = + (std::find( + pcInstructionSet->getGroupByIds().begin(), + pcInstructionSet->getGroupByIds().end(), + column) != pcInstructionSet->getGroupByIds().end()); + if (found) { inputColumnPerRow.push_back(value); + } else { + if (lineNo == 0) { + outputHeader.push_back(header[i]); + } + outputContentPerRow.push_back(parts[i]); } } inputColums.push_back(inputColumnPerRow); + outputContent.push_back(outputContentPerRow); lineNo++; }); @@ -114,9 +130,34 @@ std::string PCTranslator::transformDataset( auto encodedIndexes = encoder->generateORAMIndexes(inputColums); - // TODO : Append the enodedIndexes at the end of publisher output and return - // output path. - return ""; + auto dir = inputDatasetPath.substr(0, inputDatasetPath.rfind("/") + 1); + auto output_dataset_path = dir + "transformed_publisher_input.csv"; + + PCTranslator::putOutputData( + output_dataset_path, outputHeader, outputContent, encodedIndexes); + return output_dataset_path; +} + +void PCTranslator::putOutputData( + const std::string& output_dataset_path, + std::vector& outputHeader, + std::vector>& outputContent, + const std::vector& encodedIndexes) { + outputHeader.push_back("breakdown_id"); + + if (outputContent.size() != encodedIndexes.size()) { + throw std::runtime_error( + "Encoded index vector size should match the input vector size."); + } + + for (std::vector::size_type i = 0; i < encodedIndexes.size(); + ++i) { + auto indexVec = std::to_string(encodedIndexes[i]); + outputContent[i].push_back(indexVec); + } + + private_measurement::csv::writeCsv( + output_dataset_path, outputHeader, outputContent); } std::shared_ptr PCTranslator::parseInstructionSet( diff --git a/fbpcs/pc_translator/PCTranslator.h b/fbpcs/pc_translator/PCTranslator.h index 3196d6304..d0a978855 100644 --- a/fbpcs/pc_translator/PCTranslator.h +++ b/fbpcs/pc_translator/PCTranslator.h @@ -55,6 +55,12 @@ class PCTranslator { std::string transformDataset( const std::string& inputData, std::shared_ptr pcInstructionSet); + + void putOutputData( + const std::string& output_dataset_path, + std::vector& outputHeader, + std::vector>& outputContent, + const std::vector& encodedIndexes); }; } // namespace pc_translator diff --git a/fbpcs/pc_translator/tests/TestPCTranslator.cpp b/fbpcs/pc_translator/tests/TestPCTranslator.cpp index f182efa9a..7febfea45 100644 --- a/fbpcs/pc_translator/tests/TestPCTranslator.cpp +++ b/fbpcs/pc_translator/tests/TestPCTranslator.cpp @@ -5,6 +5,7 @@ * LICENSE file in the root directory of this source tree. */ +#include #include #include "../../emp_games/common/TestUtil.h" #include "fbpcs/pc_translator/PCTranslator.h" @@ -16,6 +17,8 @@ class TestPCTranslator : public ::testing::Test { std::string pcs_features_; std::string test_instruction_set_base_path_; std::string test_publisher_input_path_; + std::string test_transformed_output_path_; + std::string expected_transformed_output_path_; void SetUp() override { pcs_features_ = @@ -23,7 +26,18 @@ class TestPCTranslator : public ::testing::Test { std::string baseDir = private_measurement::test_util::getBaseDirFromPath(__FILE__); test_instruction_set_base_path_ = baseDir + "input_processing/"; - test_publisher_input_path_ = baseDir + "publisher_unittest.csv"; + test_publisher_input_path_ = "/tmp/publisher_unittest.csv"; + test_transformed_output_path_ = "/tmp/transformed_publisher_input.csv"; + expected_transformed_output_path_ = + baseDir + "expected_transformed_publisher_input.csv"; + auto contents = + fbpcf::io::FileIOWrappers::readFile(baseDir + "publisher_unittest.csv"); + fbpcf::io::FileIOWrappers::writeFile(test_publisher_input_path_, contents); + } + + void TearDown() override { + std::remove(test_publisher_input_path_.c_str()); + std::remove(test_transformed_output_path_.c_str()); } }; @@ -31,6 +45,10 @@ TEST_F(TestPCTranslator, TestEncode) { auto pcTranslator = std::make_shared( pcs_features_, test_instruction_set_base_path_); auto outputPath = pcTranslator->encode(test_publisher_input_path_); - EXPECT_EQ(outputPath, ""); + auto contents = fbpcf::io::FileIOWrappers::readFile(outputPath); + auto expectedContents = + fbpcf::io::FileIOWrappers::readFile(expected_transformed_output_path_); + EXPECT_EQ(outputPath, test_transformed_output_path_); + EXPECT_EQ(contents, expectedContents); } } // namespace pc_translator diff --git a/fbpcs/pc_translator/tests/expected_transformed_publisher_input.csv b/fbpcs/pc_translator/tests/expected_transformed_publisher_input.csv new file mode 100644 index 000000000..247407907 --- /dev/null +++ b/fbpcs/pc_translator/tests/expected_transformed_publisher_input.csv @@ -0,0 +1,13 @@ +id_,opportunity,test_flag,opportunity_timestamp,breakdown_id +cfcd208495d565ef66e7dff9f98764da,1,0,1600000430,0 +c4ca4238a0b923820dcc509a6f75849b,1,1,1600000401,1 +c81e728d9d4c2f636f067f89cc14862c,0,0,0,2 +eccbc87e4b5ce2fe28308fd9f2a7baf3,0,0,0,3 +a87ff679a2f3e71d9181a67b7542122c,0,0,0,0 +e4da3b7fbbce2345d7772b0674a318d5,1,1,1600000461,4 +1679091c5a880faf6fb5e6087eb1b2dc,1,0,1600000052,5 +8f14e45fceea167a5a36dedd4bea2543,1,0,1600000831,6 +c9f0f895fb98ab9159f51fd0297e236d,1,0,1600000530,7 +45c48cce2e2d7fbdea1afc51c7c6ad26,1,0,1600000972,5 +d3d9446802a44259755d38e6d163e820,0,0,0,0 +6512bd43d9caa6e02c990b0a82652dca,0,0,0,0