From 000d6e4c2395d3dd82b7f817d07425ff833b62b9 Mon Sep 17 00:00:00 2001 From: Chizhov Anton Date: Sat, 25 Nov 2023 00:04:38 +0300 Subject: [PATCH 1/3] Remove unused class `ColumnCombination` --- src/core/algorithms/fd/depminer/depminer.cpp | 1 - src/core/algorithms/fd/fd_mine/fd_mine.cpp | 2 ++ src/core/algorithms/fd/fd_mine/fd_mine.h | 26 ++++++++++--------- src/core/algorithms/fd/tane/lattice_level.cpp | 9 +++---- src/core/algorithms/fd/tane/tane.cpp | 3 ++- src/core/model/table/column_combination.cpp | 25 ------------------ src/core/model/table/column_combination.h | 22 ---------------- 7 files changed, 22 insertions(+), 66 deletions(-) diff --git a/src/core/algorithms/fd/depminer/depminer.cpp b/src/core/algorithms/fd/depminer/depminer.cpp index ffd0cc50f2..d6f951dae2 100644 --- a/src/core/algorithms/fd/depminer/depminer.cpp +++ b/src/core/algorithms/fd/depminer/depminer.cpp @@ -7,7 +7,6 @@ #include #include "model/table/agree_set_factory.h" -#include "model/table/column_combination.h" #include "model/table/relational_schema.h" namespace algos { diff --git a/src/core/algorithms/fd/fd_mine/fd_mine.cpp b/src/core/algorithms/fd/fd_mine/fd_mine.cpp index e0d0518304..a2764835b6 100644 --- a/src/core/algorithms/fd/fd_mine/fd_mine.cpp +++ b/src/core/algorithms/fd/fd_mine/fd_mine.cpp @@ -8,6 +8,8 @@ namespace algos { +using boost::dynamic_bitset; + Fd_mine::Fd_mine() : PliBasedFDAlgorithm({kDefaultPhaseName}) {} void Fd_mine::ResetStateFd() { diff --git a/src/core/algorithms/fd/fd_mine/fd_mine.h b/src/core/algorithms/fd/fd_mine/fd_mine.h index e5438a9c00..62224c404e 100644 --- a/src/core/algorithms/fd/fd_mine/fd_mine.h +++ b/src/core/algorithms/fd/fd_mine/fd_mine.h @@ -3,10 +3,10 @@ #include #include +#include #include #include "algorithms/fd/pli_based_fd_algorithm.h" -#include "model/table/column_combination.h" #include "model/table/column_layout_relation_data.h" #include "model/table/position_list_index.h" #include "model/table/vertical.h" @@ -17,17 +17,19 @@ class Fd_mine : public PliBasedFDAlgorithm { private: const RelationalSchema* schema_; - std::set> candidate_set_; - boost::unordered_map, std::unordered_set>> eq_set_; - boost::unordered_map, dynamic_bitset<>> fd_set_; - boost::unordered_map, dynamic_bitset<>> final_fd_set_; - std::set> key_set_; - boost::unordered_map, dynamic_bitset<>> closure_; - boost::unordered_map, std::shared_ptr> plis_; - dynamic_bitset<> relation_indices_; - - void ComputeNonTrivialClosure(dynamic_bitset<> const& xi); - void ObtainFDandKey(dynamic_bitset<> const& xi); + std::set> candidate_set_; + boost::unordered_map, std::unordered_set>> + eq_set_; + boost::unordered_map, boost::dynamic_bitset<>> fd_set_; + boost::unordered_map, boost::dynamic_bitset<>> final_fd_set_; + std::set> key_set_; + boost::unordered_map, boost::dynamic_bitset<>> closure_; + boost::unordered_map, std::shared_ptr> + plis_; + boost::dynamic_bitset<> relation_indices_; + + void ComputeNonTrivialClosure(boost::dynamic_bitset<> const& xi); + void ObtainFDandKey(boost::dynamic_bitset<> const& xi); void ObtainEqSet(); void PruneCandidates(); void GenerateNextLevelCandidates(); diff --git a/src/core/algorithms/fd/tane/lattice_level.cpp b/src/core/algorithms/fd/tane/lattice_level.cpp index 6faaacadc1..3d9714e92a 100644 --- a/src/core/algorithms/fd/tane/lattice_level.cpp +++ b/src/core/algorithms/fd/tane/lattice_level.cpp @@ -4,8 +4,6 @@ #include -#include "model/table/column_combination.h" - namespace model { using std::move, std::min, std::shared_ptr, std::vector, std::sort, std::make_shared; @@ -63,9 +61,10 @@ void LatticeLevel::GenerateNextLevel(std::vector>& Vertical child_columns = vertex1->GetVertical().Union(vertex2->GetVertical()); std::unique_ptr child_vertex = - std::make_unique(child_columns); + std::make_unique(child_columns); - dynamic_bitset<> parent_indices(vertex1->GetVertical().GetSchema()->GetNumColumns()); + boost::dynamic_bitset<> parent_indices( + vertex1->GetVertical().GetSchema()->GetNumColumns()); parent_indices |= vertex1->GetVertical().GetColumnIndices(); parent_indices |= vertex2->GetVertical().GetColumnIndices(); @@ -79,7 +78,7 @@ void LatticeLevel::GenerateNextLevel(std::vector>& i++, skip_index = parent_indices.find_next(skip_index)) { parent_indices[skip_index] = false; LatticeVertex const* parent_vertex = - current_level->GetLatticeVertex(parent_indices); + current_level->GetLatticeVertex(parent_indices); if (parent_vertex == nullptr) { goto continueMidOuter; diff --git a/src/core/algorithms/fd/tane/tane.cpp b/src/core/algorithms/fd/tane/tane.cpp index e1d2b9fc66..e24df3e7c1 100644 --- a/src/core/algorithms/fd/tane/tane.cpp +++ b/src/core/algorithms/fd/tane/tane.cpp @@ -11,13 +11,14 @@ #include "config/max_lhs/option.h" #include "lattice_level.h" #include "lattice_vertex.h" -#include "model/table/column_combination.h" #include "model/table/column_data.h" #include "model/table/column_layout_relation_data.h" #include "model/table/relational_schema.h" namespace algos { +using boost::dynamic_bitset; + Tane::Tane() : PliBasedFDAlgorithm({kDefaultPhaseName}) { RegisterOptions(); } diff --git a/src/core/model/table/column_combination.cpp b/src/core/model/table/column_combination.cpp index f0348c79bf..e69de29bb2 100644 --- a/src/core/model/table/column_combination.cpp +++ b/src/core/model/table/column_combination.cpp @@ -1,25 +0,0 @@ -// -// Created by kek on 16.08.2019. -// - -//#include "column_combination.h" - -//#include "relational_schema.h" - -/*ColumnCombination::ColumnCombination(dynamic_bitset<> column_indices_, shared_ptr schema): - Vertical(){ - this->column_indices_ = std::move(column_indices_); - this->schema = schema; -} - -//full of errors - get rid of it -string ColumnCombination::ToString() { - string ans = "["; - string separator; - auto relation = schema.lock(); - for (unsigned long index = column_indices_.find_first(); index < column_indices_.size(); index = column_indices_.find_next(index)){ - ans += separator + relation->getColumn(index)->GetName(); - separator = ", "; - } - return ans; -}*/ \ No newline at end of file diff --git a/src/core/model/table/column_combination.h b/src/core/model/table/column_combination.h index 3d09a9fa32..e69de29bb2 100644 --- a/src/core/model/table/column_combination.h +++ b/src/core/model/table/column_combination.h @@ -1,22 +0,0 @@ -// -// Created by Ilya Vologin -// https://github.com/cupertank -// - - -#pragma once - -#include "vertical.h" - -#include - -using boost::dynamic_bitset, std::string; - -//useless class - think about deprecation -/*class ColumnCombination : public Vertical { - -public: - ColumnCombination(dynamic_bitset<> column_indices_, shared_ptr schema); - explicit ColumnCombination(Vertical&& vertical) : Vertical(vertical) {} //or const&?? - string ToString() override ; -};*/ From 5e37f81c3b9cbc40ee859df49bcb08b8e893441b Mon Sep 17 00:00:00 2001 From: Chizhov Anton Date: Sat, 25 Nov 2023 00:17:13 +0300 Subject: [PATCH 2/3] Add type alias `ColumnIndex` --- src/core/algorithms/fd/hycommon/types.h | 4 +++- src/core/config/indices/type.h | 4 +++- src/core/model/table/column.h | 5 +++-- src/core/model/table/column_index.h | 8 ++++++++ 4 files changed, 17 insertions(+), 4 deletions(-) create mode 100644 src/core/model/table/column_index.h diff --git a/src/core/algorithms/fd/hycommon/types.h b/src/core/algorithms/fd/hycommon/types.h index 4d51fb8522..26c232d55c 100644 --- a/src/core/algorithms/fd/hycommon/types.h +++ b/src/core/algorithms/fd/hycommon/types.h @@ -2,6 +2,8 @@ #include #include +#include "model/table/column_index.h" + namespace model { class PositionListIndex; @@ -11,7 +13,7 @@ class PositionListIndex; namespace algos::hy { // Row (or column) position in the table -using TablePos = unsigned int; +using TablePos = model::ColumnIndex; using ClusterId = unsigned int; // Represents a relation as a list of position list indexes. i-th PLI is a PLI built on i-th column diff --git a/src/core/config/indices/type.h b/src/core/config/indices/type.h index 464484b713..bfa7f27750 100644 --- a/src/core/config/indices/type.h +++ b/src/core/config/indices/type.h @@ -2,7 +2,9 @@ #include +#include "model/table/column_index.h" + namespace config { -using IndexType = unsigned int; +using IndexType = model::ColumnIndex; using IndicesType = std::vector; } // namespace config diff --git a/src/core/model/table/column.h b/src/core/model/table/column.h index 252738a1f0..eb20c72dd0 100644 --- a/src/core/model/table/column.h +++ b/src/core/model/table/column.h @@ -1,17 +1,18 @@ #pragma once #include +#include #include -#include +#include "column_index.h" #include "relational_schema.h" class Column { friend RelationalSchema; public: - using IndexType = size_t; + using IndexType = model::ColumnIndex; private: std::string name_; diff --git a/src/core/model/table/column_index.h b/src/core/model/table/column_index.h new file mode 100644 index 0000000000..10186a430b --- /dev/null +++ b/src/core/model/table/column_index.h @@ -0,0 +1,8 @@ +#pragma once + +namespace model { + +// Represents an index for table column +using ColumnIndex = unsigned int; + +} // namespace model From de1e33be3ca98124dc7c7efa7cc18ac33888f02a Mon Sep 17 00:00:00 2001 From: Alexandr Smirnov Date: Sat, 5 Aug 2023 18:10:25 +0300 Subject: [PATCH 3/3] Implement infrastructure for IND algorithms Co-authored-by: Chizhov Anton --- src/core/algorithms/ind/ind.cpp | 13 ++++++ src/core/algorithms/ind/ind.h | 30 +++++++++++++ src/core/algorithms/ind/ind_algorithm.cpp | 14 ++++++ src/core/algorithms/ind/ind_algorithm.h | 50 +++++++++++++++++++++ src/core/model/table/column_combination.cpp | 19 ++++++++ src/core/model/table/column_combination.h | 32 +++++++++++++ 6 files changed, 158 insertions(+) create mode 100644 src/core/algorithms/ind/ind.cpp create mode 100644 src/core/algorithms/ind/ind.h create mode 100644 src/core/algorithms/ind/ind_algorithm.cpp create mode 100644 src/core/algorithms/ind/ind_algorithm.h diff --git a/src/core/algorithms/ind/ind.cpp b/src/core/algorithms/ind/ind.cpp new file mode 100644 index 0000000000..1fc62c8d54 --- /dev/null +++ b/src/core/algorithms/ind/ind.cpp @@ -0,0 +1,13 @@ +#include "ind.h" + +#include + +namespace model { + +std::string IND::ToString() const { + std::stringstream ss; + ss << GetLhs().ToString() << " -> " << GetRhs().ToString(); + return ss.str(); +} + +} // namespace model diff --git a/src/core/algorithms/ind/ind.h b/src/core/algorithms/ind/ind.h new file mode 100644 index 0000000000..30b0682ede --- /dev/null +++ b/src/core/algorithms/ind/ind.h @@ -0,0 +1,30 @@ +#pragma once + +#include +#include + +#include "model/table/column_combination.h" + +namespace model { + +// Inclusion dependency is a relation between attributes of tables +// that indicates possible Primary Key–Foreign Key references. +class IND { +private: + std::shared_ptr lhs_; + std::shared_ptr rhs_; + +public: + IND(std::shared_ptr lhs, std::shared_ptr rhs) + : lhs_(std::move(lhs)), rhs_(std::move(rhs)) {} + + ColumnCombination const& GetLhs() const { + return *lhs_; + } + ColumnCombination const& GetRhs() const { + return *rhs_; + } + std::string ToString() const; +}; + +} // namespace model diff --git a/src/core/algorithms/ind/ind_algorithm.cpp b/src/core/algorithms/ind/ind_algorithm.cpp new file mode 100644 index 0000000000..4dc4245986 --- /dev/null +++ b/src/core/algorithms/ind/ind_algorithm.cpp @@ -0,0 +1,14 @@ +#include "ind_algorithm.h" + +#include "config/names_and_descriptions.h" +#include "config/tabular_data/input_tables/option.h" + +namespace algos { + +INDAlgorithm::INDAlgorithm(std::vector phase_names) + : Algorithm(std::move(phase_names)) { + RegisterOption(config::TablesOpt(&input_tables_)); + MakeOptionsAvailable({config::TablesOpt.GetName()}); +} + +} // namespace algos diff --git a/src/core/algorithms/ind/ind_algorithm.h b/src/core/algorithms/ind/ind_algorithm.h new file mode 100644 index 0000000000..9cc967bc25 --- /dev/null +++ b/src/core/algorithms/ind/ind_algorithm.h @@ -0,0 +1,50 @@ +#pragma once + +#include +#include +#include +#include + +#include "algorithms/algorithm.h" +#include "ind.h" +#include "tabular_data/input_tables_type.h" +#include "util/primitive_collection.h" + +namespace algos { + +class INDAlgorithm : public Algorithm { +public: + using IND = model::IND; + +private: + util::PrimitiveCollection ind_collection_; + + void ResetState() final { + ind_collection_.Clear(); + ResetINDAlgorithmState(); + } + + virtual void ResetINDAlgorithmState() = 0; + +protected: + constexpr static std::string_view kDefaultPhaseName = "IND mining"; + + config::InputTables input_tables_; + + explicit INDAlgorithm(std::vector phase_names); + + virtual void RegisterIND(std::shared_ptr lhs, + std::shared_ptr rhs) { + ind_collection_.Register(std::move(lhs), std::move(rhs)); + } + virtual void RegisterIND(IND ind) { + ind_collection_.Register(std::move(ind)); + } + +public: + std::list const& INDList() const noexcept { + return ind_collection_.AsList(); + } +}; + +} // namespace algos diff --git a/src/core/model/table/column_combination.cpp b/src/core/model/table/column_combination.cpp index e69de29bb2..956ce6912f 100644 --- a/src/core/model/table/column_combination.cpp +++ b/src/core/model/table/column_combination.cpp @@ -0,0 +1,19 @@ +#include "column_combination.h" + +#include + +namespace model { + +std::string ColumnCombination::ToString() const { + std::vector const& col_ids = GetColumnIndices(); + std::stringstream ss; + for (auto it = col_ids.begin(); it != col_ids.end(); ++it) { + if (it != col_ids.begin()) { + ss << ", "; + } + ss << GetTableIndex() << '.' << *it; + } + return ss.str(); +} + +} // namespace model diff --git a/src/core/model/table/column_combination.h b/src/core/model/table/column_combination.h index e69de29bb2..3b719fa075 100644 --- a/src/core/model/table/column_combination.h +++ b/src/core/model/table/column_combination.h @@ -0,0 +1,32 @@ +#pragma once + +#include +#include + +#include "column_index.h" + +namespace model { + +// Represents an index for a table within a set of tables +// As an example, this type is used in inclusion dependencies +using TableIndex = unsigned int; + +class ColumnCombination { +protected: + TableIndex table_index_; + std::vector column_indices_; + +public: + ColumnCombination(TableIndex table_index, std::vector col_indices) + : table_index_(table_index), column_indices_(std::move(col_indices)) {} + + TableIndex GetTableIndex() const { + return table_index_; + } + std::vector const& GetColumnIndices() const { + return column_indices_; + } + std::string ToString() const; +}; + +} // namespace model