From 2b0c37990e91f1bcb0bfebb61791bcff684a3832 Mon Sep 17 00:00:00 2001 From: Ajinkya Ghonge Date: Wed, 5 Apr 2023 16:30:49 -0700 Subject: [PATCH] Add logic to write output with encoded Ids. (#2286) Summary: Pull Request resolved: https://github.com/facebookresearch/fbpcs/pull/2286 # Context As per PC Translator design, we need a runtime library will be called during PC run. This library will be called at the beginning of PC run to encode specified fields in publisher side input into a encoded breakdown (aggregation) Ids based on active PC instruction sets for the run. The library will filter the active PC Instruction sets for the run based on parsing the pcs_features i.e. gatekeepers for the particular run. # Product decisions In this stack we would focus solely on functionality required for private lift runs. We would focus on the MVP implementation of the library and its integration with fbpcf ORAM encoder library in this stack. # Stack 1. Create runtime pc_translator library. 2. Add logic to retrieve and parse PC instruction set, filtered based on the active gatekeepers for the run. 3. Integrate pc_translator library with fbpcf ORAM encoder. 4. Add logic to generate transformed publisher output with encoded breakdown ID and write the output. 5. Add support for filter constraints in pc_translator. # In this diff Add logic to generate transformed publisher output with encoded breakdown ID and write the output. Differential Revision: D44645325 Privacy Context Container: L416713 fbshipit-source-id: 953989f8440827ff528f51c202e498c01fcf31bd --- fbpcs/pc_translator/PCTranslator.cpp | 71 +++++++++++++++---- fbpcs/pc_translator/PCTranslator.h | 6 ++ .../pc_translator/tests/TestPCTranslator.cpp | 22 +++++- .../expected_transformed_publisher_input.csv | 13 ++++ 4 files changed, 95 insertions(+), 17 deletions(-) create mode 100644 fbpcs/pc_translator/tests/expected_transformed_publisher_input.csv diff --git a/fbpcs/pc_translator/PCTranslator.cpp b/fbpcs/pc_translator/PCTranslator.cpp index ea27e69eb..af03f281d 100644 --- a/fbpcs/pc_translator/PCTranslator.cpp +++ b/fbpcs/pc_translator/PCTranslator.cpp @@ -14,24 +14,27 @@ #include #include #include +#include +#include #include #include +#include #include "fbpcs/emp_games/common/Csv.h" #include "folly/String.h" namespace pc_translator { -std::string PCTranslator::encode(const std::string& inputDataset) { +std::string PCTranslator::encode(const std::string& inputDatasetPath) { auto validInstructionSetNames = PCTranslator::retrieveInstructionSetNamesForRun(pcsFeatures_); auto pcInstructionSets = PCTranslator::retrieveInstructionSets(validInstructionSetNames); if (pcInstructionSets.empty()) { // No instruction set found. return the input dataset path. - return inputDataset; + return inputDatasetPath; } return PCTranslator::transformDataset( - inputDataset, pcInstructionSets.front()); + inputDatasetPath, pcInstructionSets.front()); } std::string PCTranslator::decode( @@ -79,30 +82,43 @@ std::vector PCTranslator::retrieveInstructionSetNamesForRun( } std::string PCTranslator::transformDataset( - const std::string& inputData, + const std::string& inputDatasetPath, std::shared_ptr pcInstructionSet) { // Parse the input CSV auto lineNo = 0; std::vector> inputColums; + std::vector outputHeader; + std::vector> outputContent; private_measurement::csv::readCsv( - inputData, + inputDatasetPath, [&](const std::vector& header, const std::vector& parts) { std::vector inputColumnPerRow; + std::string column; + std::uint32_t value; + bool found = false; + std::vector outputContentPerRow; for (std::vector::size_type i = 0; i < header.size(); ++i) { - auto& column = header[i]; - auto value = std::atoi(parts[i].c_str()); - auto iter = std::find( - pcInstructionSet->getGroupByIds().begin(), - pcInstructionSet->getGroupByIds().end(), - column); - if (iter != pcInstructionSet->getGroupByIds().end()) { + column = header[i]; + value = std::atoi(parts[i].c_str()); + found = + (std::find( + pcInstructionSet->getGroupByIds().begin(), + pcInstructionSet->getGroupByIds().end(), + column) != pcInstructionSet->getGroupByIds().end()); + if (found) { inputColumnPerRow.push_back(value); + } else { + if (lineNo == 0) { + outputHeader.push_back(header[i]); + } + outputContentPerRow.push_back(parts[i]); } } inputColums.push_back(inputColumnPerRow); + outputContent.push_back(outputContentPerRow); lineNo++; }); @@ -114,9 +130,34 @@ std::string PCTranslator::transformDataset( auto encodedIndexes = encoder->generateORAMIndexes(inputColums); - // TODO : Append the enodedIndexes at the end of publisher output and return - // output path. - return ""; + auto dir = inputDatasetPath.substr(0, inputDatasetPath.rfind("/") + 1); + auto output_dataset_path = dir + "transformed_publisher_input.csv"; + + PCTranslator::putOutputData( + output_dataset_path, outputHeader, outputContent, encodedIndexes); + return output_dataset_path; +} + +void PCTranslator::putOutputData( + const std::string& output_dataset_path, + std::vector& outputHeader, + std::vector>& outputContent, + const std::vector& encodedIndexes) { + outputHeader.push_back("breakdown_id"); + + if (outputContent.size() != encodedIndexes.size()) { + throw std::runtime_error( + "Encoded index vector size should match the input vector size."); + } + + for (std::vector::size_type i = 0; i < encodedIndexes.size(); + ++i) { + auto indexVec = std::to_string(encodedIndexes[i]); + outputContent[i].push_back(indexVec); + } + + private_measurement::csv::writeCsv( + output_dataset_path, outputHeader, outputContent); } std::shared_ptr PCTranslator::parseInstructionSet( diff --git a/fbpcs/pc_translator/PCTranslator.h b/fbpcs/pc_translator/PCTranslator.h index 3196d6304..d0a978855 100644 --- a/fbpcs/pc_translator/PCTranslator.h +++ b/fbpcs/pc_translator/PCTranslator.h @@ -55,6 +55,12 @@ class PCTranslator { std::string transformDataset( const std::string& inputData, std::shared_ptr pcInstructionSet); + + void putOutputData( + const std::string& output_dataset_path, + std::vector& outputHeader, + std::vector>& outputContent, + const std::vector& encodedIndexes); }; } // namespace pc_translator diff --git a/fbpcs/pc_translator/tests/TestPCTranslator.cpp b/fbpcs/pc_translator/tests/TestPCTranslator.cpp index f182efa9a..7febfea45 100644 --- a/fbpcs/pc_translator/tests/TestPCTranslator.cpp +++ b/fbpcs/pc_translator/tests/TestPCTranslator.cpp @@ -5,6 +5,7 @@ * LICENSE file in the root directory of this source tree. */ +#include #include #include "../../emp_games/common/TestUtil.h" #include "fbpcs/pc_translator/PCTranslator.h" @@ -16,6 +17,8 @@ class TestPCTranslator : public ::testing::Test { std::string pcs_features_; std::string test_instruction_set_base_path_; std::string test_publisher_input_path_; + std::string test_transformed_output_path_; + std::string expected_transformed_output_path_; void SetUp() override { pcs_features_ = @@ -23,7 +26,18 @@ class TestPCTranslator : public ::testing::Test { std::string baseDir = private_measurement::test_util::getBaseDirFromPath(__FILE__); test_instruction_set_base_path_ = baseDir + "input_processing/"; - test_publisher_input_path_ = baseDir + "publisher_unittest.csv"; + test_publisher_input_path_ = "/tmp/publisher_unittest.csv"; + test_transformed_output_path_ = "/tmp/transformed_publisher_input.csv"; + expected_transformed_output_path_ = + baseDir + "expected_transformed_publisher_input.csv"; + auto contents = + fbpcf::io::FileIOWrappers::readFile(baseDir + "publisher_unittest.csv"); + fbpcf::io::FileIOWrappers::writeFile(test_publisher_input_path_, contents); + } + + void TearDown() override { + std::remove(test_publisher_input_path_.c_str()); + std::remove(test_transformed_output_path_.c_str()); } }; @@ -31,6 +45,10 @@ TEST_F(TestPCTranslator, TestEncode) { auto pcTranslator = std::make_shared( pcs_features_, test_instruction_set_base_path_); auto outputPath = pcTranslator->encode(test_publisher_input_path_); - EXPECT_EQ(outputPath, ""); + auto contents = fbpcf::io::FileIOWrappers::readFile(outputPath); + auto expectedContents = + fbpcf::io::FileIOWrappers::readFile(expected_transformed_output_path_); + EXPECT_EQ(outputPath, test_transformed_output_path_); + EXPECT_EQ(contents, expectedContents); } } // namespace pc_translator diff --git a/fbpcs/pc_translator/tests/expected_transformed_publisher_input.csv b/fbpcs/pc_translator/tests/expected_transformed_publisher_input.csv new file mode 100644 index 000000000..247407907 --- /dev/null +++ b/fbpcs/pc_translator/tests/expected_transformed_publisher_input.csv @@ -0,0 +1,13 @@ +id_,opportunity,test_flag,opportunity_timestamp,breakdown_id +cfcd208495d565ef66e7dff9f98764da,1,0,1600000430,0 +c4ca4238a0b923820dcc509a6f75849b,1,1,1600000401,1 +c81e728d9d4c2f636f067f89cc14862c,0,0,0,2 +eccbc87e4b5ce2fe28308fd9f2a7baf3,0,0,0,3 +a87ff679a2f3e71d9181a67b7542122c,0,0,0,0 +e4da3b7fbbce2345d7772b0674a318d5,1,1,1600000461,4 +1679091c5a880faf6fb5e6087eb1b2dc,1,0,1600000052,5 +8f14e45fceea167a5a36dedd4bea2543,1,0,1600000831,6 +c9f0f895fb98ab9159f51fd0297e236d,1,0,1600000530,7 +45c48cce2e2d7fbdea1afc51c7c6ad26,1,0,1600000972,5 +d3d9446802a44259755d38e6d163e820,0,0,0,0 +6512bd43d9caa6e02c990b0a82652dca,0,0,0,0