Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add INDAlgorithm infrastructure #296

Merged
merged 3 commits into from
Nov 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion src/core/algorithms/fd/depminer/depminer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
#include <easylogging++.h>

#include "model/table/agree_set_factory.h"
#include "model/table/column_combination.h"
#include "model/table/relational_schema.h"

namespace algos {
Expand Down
2 changes: 2 additions & 0 deletions src/core/algorithms/fd/fd_mine/fd_mine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@

namespace algos {

using boost::dynamic_bitset;

Fd_mine::Fd_mine() : PliBasedFDAlgorithm({kDefaultPhaseName}) {}

void Fd_mine::ResetStateFd() {
Expand Down
26 changes: 14 additions & 12 deletions src/core/algorithms/fd/fd_mine/fd_mine.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
#include <filesystem>
#include <set>

#include <boost/dynamic_bitset.hpp>
#include <boost/unordered_map.hpp>

#include "algorithms/fd/pli_based_fd_algorithm.h"
#include "model/table/column_combination.h"
#include "model/table/column_layout_relation_data.h"
#include "model/table/position_list_index.h"
#include "model/table/vertical.h"
Expand All @@ -17,17 +17,19 @@ class Fd_mine : public PliBasedFDAlgorithm {
private:
const RelationalSchema* schema_;

std::set<dynamic_bitset<>> candidate_set_;
boost::unordered_map<dynamic_bitset<>, std::unordered_set<dynamic_bitset<>>> eq_set_;
boost::unordered_map<dynamic_bitset<>, dynamic_bitset<>> fd_set_;
boost::unordered_map<dynamic_bitset<>, dynamic_bitset<>> final_fd_set_;
std::set<dynamic_bitset<>> key_set_;
boost::unordered_map<dynamic_bitset<>, dynamic_bitset<>> closure_;
boost::unordered_map<dynamic_bitset<>, std::shared_ptr<model::PositionListIndex const>> plis_;
dynamic_bitset<> relation_indices_;

void ComputeNonTrivialClosure(dynamic_bitset<> const& xi);
void ObtainFDandKey(dynamic_bitset<> const& xi);
std::set<boost::dynamic_bitset<>> candidate_set_;
boost::unordered_map<boost::dynamic_bitset<>, std::unordered_set<boost::dynamic_bitset<>>>
eq_set_;
boost::unordered_map<boost::dynamic_bitset<>, boost::dynamic_bitset<>> fd_set_;
boost::unordered_map<boost::dynamic_bitset<>, boost::dynamic_bitset<>> final_fd_set_;
std::set<boost::dynamic_bitset<>> key_set_;
boost::unordered_map<boost::dynamic_bitset<>, boost::dynamic_bitset<>> closure_;
boost::unordered_map<boost::dynamic_bitset<>, std::shared_ptr<model::PositionListIndex const>>
plis_;
boost::dynamic_bitset<> relation_indices_;

void ComputeNonTrivialClosure(boost::dynamic_bitset<> const& xi);
void ObtainFDandKey(boost::dynamic_bitset<> const& xi);
void ObtainEqSet();
void PruneCandidates();
void GenerateNextLevelCandidates();
Expand Down
4 changes: 3 additions & 1 deletion src/core/algorithms/fd/hycommon/types.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
#include <utility>
#include <vector>

#include "model/table/column_index.h"

namespace model {

class PositionListIndex;
Expand All @@ -11,7 +13,7 @@ class PositionListIndex;
namespace algos::hy {

// Row (or column) position in the table
using TablePos = unsigned int;
using TablePos = model::ColumnIndex;
using ClusterId = unsigned int;

// Represents a relation as a list of position list indexes. i-th PLI is a PLI built on i-th column
Expand Down
9 changes: 4 additions & 5 deletions src/core/algorithms/fd/tane/lattice_level.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@

#include <easylogging++.h>

#include "model/table/column_combination.h"

namespace model {

using std::move, std::min, std::shared_ptr, std::vector, std::sort, std::make_shared;
Expand Down Expand Up @@ -63,9 +61,10 @@ void LatticeLevel::GenerateNextLevel(std::vector<std::unique_ptr<LatticeLevel>>&

Vertical child_columns = vertex1->GetVertical().Union(vertex2->GetVertical());
std::unique_ptr<LatticeVertex> child_vertex =
std::make_unique<LatticeVertex>(child_columns);
std::make_unique<LatticeVertex>(child_columns);

dynamic_bitset<> parent_indices(vertex1->GetVertical().GetSchema()->GetNumColumns());
boost::dynamic_bitset<> parent_indices(
vertex1->GetVertical().GetSchema()->GetNumColumns());
parent_indices |= vertex1->GetVertical().GetColumnIndices();
parent_indices |= vertex2->GetVertical().GetColumnIndices();

Expand All @@ -79,7 +78,7 @@ void LatticeLevel::GenerateNextLevel(std::vector<std::unique_ptr<LatticeLevel>>&
i++, skip_index = parent_indices.find_next(skip_index)) {
parent_indices[skip_index] = false;
LatticeVertex const* parent_vertex =
current_level->GetLatticeVertex(parent_indices);
current_level->GetLatticeVertex(parent_indices);

if (parent_vertex == nullptr) {
goto continueMidOuter;
Expand Down
3 changes: 2 additions & 1 deletion src/core/algorithms/fd/tane/tane.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,14 @@
#include "config/max_lhs/option.h"
#include "lattice_level.h"
#include "lattice_vertex.h"
#include "model/table/column_combination.h"
#include "model/table/column_data.h"
#include "model/table/column_layout_relation_data.h"
#include "model/table/relational_schema.h"

namespace algos {

using boost::dynamic_bitset;

Tane::Tane() : PliBasedFDAlgorithm({kDefaultPhaseName}) {
RegisterOptions();
}
Expand Down
13 changes: 13 additions & 0 deletions src/core/algorithms/ind/ind.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#include "ind.h"

#include <sstream>

namespace model {

std::string IND::ToString() const {
std::stringstream ss;
ss << GetLhs().ToString() << " -> " << GetRhs().ToString();
return ss.str();
}

} // namespace model
30 changes: 30 additions & 0 deletions src/core/algorithms/ind/ind.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#pragma once

#include <memory>
#include <string>

#include "model/table/column_combination.h"

namespace model {

// Inclusion dependency is a relation between attributes of tables
// that indicates possible Primary Key–Foreign Key references.
class IND {
vs9h marked this conversation as resolved.
Show resolved Hide resolved
private:
std::shared_ptr<ColumnCombination> lhs_;
std::shared_ptr<ColumnCombination> rhs_;

public:
IND(std::shared_ptr<ColumnCombination> lhs, std::shared_ptr<ColumnCombination> rhs)
: lhs_(std::move(lhs)), rhs_(std::move(rhs)) {}

ColumnCombination const& GetLhs() const {
return *lhs_;
}
ColumnCombination const& GetRhs() const {
return *rhs_;
}
std::string ToString() const;
};

} // namespace model
14 changes: 14 additions & 0 deletions src/core/algorithms/ind/ind_algorithm.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#include "ind_algorithm.h"

#include "config/names_and_descriptions.h"
#include "config/tabular_data/input_tables/option.h"

namespace algos {

INDAlgorithm::INDAlgorithm(std::vector<std::string_view> phase_names)
: Algorithm(std::move(phase_names)) {
RegisterOption(config::TablesOpt(&input_tables_));
MakeOptionsAvailable({config::TablesOpt.GetName()});
}

} // namespace algos
50 changes: 50 additions & 0 deletions src/core/algorithms/ind/ind_algorithm.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#pragma once

#include <memory>
#include <string>
#include <string_view>
#include <vector>

#include "algorithms/algorithm.h"
#include "ind.h"
#include "tabular_data/input_tables_type.h"
#include "util/primitive_collection.h"

namespace algos {

class INDAlgorithm : public Algorithm {
public:
using IND = model::IND;

private:
util::PrimitiveCollection<IND> ind_collection_;

void ResetState() final {
ind_collection_.Clear();
ResetINDAlgorithmState();
}

virtual void ResetINDAlgorithmState() = 0;

protected:
constexpr static std::string_view kDefaultPhaseName = "IND mining";

config::InputTables input_tables_;

explicit INDAlgorithm(std::vector<std::string_view> phase_names);

virtual void RegisterIND(std::shared_ptr<model::ColumnCombination> lhs,
std::shared_ptr<model::ColumnCombination> rhs) {
ind_collection_.Register(std::move(lhs), std::move(rhs));
}
virtual void RegisterIND(IND ind) {
ind_collection_.Register(std::move(ind));
}

public:
std::list<IND> const& INDList() const noexcept {
return ind_collection_.AsList();
}
};

} // namespace algos
4 changes: 3 additions & 1 deletion src/core/config/indices/type.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@

#include <vector>

#include "model/table/column_index.h"

namespace config {
using IndexType = unsigned int;
using IndexType = model::ColumnIndex;
using IndicesType = std::vector<IndexType>;
} // namespace config
5 changes: 3 additions & 2 deletions src/core/model/table/column.h
Original file line number Diff line number Diff line change
@@ -1,17 +1,18 @@
#pragma once

#include <string>
#include <utility>

#include <boost/dynamic_bitset.hpp>
#include <utility>

#include "column_index.h"
#include "relational_schema.h"

class Column {
friend RelationalSchema;

public:
using IndexType = size_t;
using IndexType = model::ColumnIndex;

private:
std::string name_;
Expand Down
34 changes: 14 additions & 20 deletions src/core/model/table/column_combination.cpp
Original file line number Diff line number Diff line change
@@ -1,25 +1,19 @@
//
// Created by kek on 16.08.2019.
//
#include "column_combination.h"

//#include "column_combination.h"
#include <sstream>

//#include "relational_schema.h"
namespace model {

/*ColumnCombination::ColumnCombination(dynamic_bitset<> column_indices_, shared_ptr<RelationalSchema> schema):
Vertical(){
this->column_indices_ = std::move(column_indices_);
this->schema = schema;
std::string ColumnCombination::ToString() const {
std::vector<ColumnIndex> const& col_ids = GetColumnIndices();
std::stringstream ss;
for (auto it = col_ids.begin(); it != col_ids.end(); ++it) {
if (it != col_ids.begin()) {
ss << ", ";
}
ss << GetTableIndex() << '.' << *it;
}
return ss.str();
}

//full of errors - get rid of it
string ColumnCombination::ToString() {
string ans = "[";
string separator;
auto relation = schema.lock();
for (unsigned long index = column_indices_.find_first(); index < column_indices_.size(); index = column_indices_.find_next(index)){
ans += separator + relation->getColumn(index)->GetName();
separator = ", ";
}
return ans;
}*/
} // namespace model
40 changes: 25 additions & 15 deletions src/core/model/table/column_combination.h
Original file line number Diff line number Diff line change
@@ -1,22 +1,32 @@
//
// Created by Ilya Vologin
// https://github.com/cupertank
//


#pragma once

#include "vertical.h"
#include <string>
#include <vector>

#include "column_index.h"

#include <boost/dynamic_bitset.hpp>
namespace model {

using boost::dynamic_bitset, std::string;
// Represents an index for a table within a set of tables
// As an example, this type is used in inclusion dependencies
using TableIndex = unsigned int;

//useless class - think about deprecation
/*class ColumnCombination : public Vertical {
class ColumnCombination {
protected:
TableIndex table_index_;
std::vector<ColumnIndex> column_indices_;

public:
ColumnCombination(dynamic_bitset<> column_indices_, shared_ptr<RelationalSchema> schema);
explicit ColumnCombination(Vertical&& vertical) : Vertical(vertical) {} //or const&??
string ToString() override ;
};*/
ColumnCombination(TableIndex table_index, std::vector<ColumnIndex> col_indices)
: table_index_(table_index), column_indices_(std::move(col_indices)) {}

TableIndex GetTableIndex() const {
return table_index_;
}
std::vector<ColumnIndex> const& GetColumnIndices() const {
return column_indices_;
}
std::string ToString() const;
};

} // namespace model
8 changes: 8 additions & 0 deletions src/core/model/table/column_index.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#pragma once

namespace model {

// Represents an index for table column
using ColumnIndex = unsigned int;

} // namespace model