Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GFD validation #154

Merged
merged 6 commits into from
Jan 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ endif()

# configuring boost
set(Boost_USE_STATIC_LIBS OFF)
find_package(Boost 1.72.0 REQUIRED COMPONENTS container thread)
find_package(Boost 1.72.0 REQUIRED COMPONENTS container thread graph)
include_directories(${Boost_INCLUDE_DIRS})
message(${Boost_INCLUDE_DIRS})

Expand Down
10 changes: 8 additions & 2 deletions src/core/algorithms/algorithm_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ namespace algos {
using AlgorithmTypes =
std::tuple<Depminer, DFD, FastFDs, FDep, Fd_mine, Pyro, Tane, FUN, hyfd::HyFD, Aid, Apriori,
metric::MetricVerifier, DataStats, fd_verifier::FDVerifier, HyUCC, PyroUCC,
cfd::FDFirstAlgorithm, ACAlgorithm, UCCVerifier, Faida>;
cfd::FDFirstAlgorithm, ACAlgorithm, UCCVerifier, Faida, GfdValidation,
EGfdValidation, NaiveGfdValidation>;

// clang-format off
/* Enumeration of all supported non-pipeline algorithms. If you implement a new
Expand Down Expand Up @@ -57,7 +58,12 @@ BETTER_ENUM(AlgorithmType, char,
ucc_verifier,

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

warning: expected identifier [clang-diagnostic-error]

<<<<<<< HEAD
^

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

warning: expected unqualified-id [clang-diagnostic-error]

<<<<<<< HEAD
^

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

warning: expected expression [clang-diagnostic-error]

<<<<<<< HEAD
  ^

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

warning: expected expression [clang-diagnostic-error]

<<<<<<< HEAD
    ^

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

warning: expected expression [clang-diagnostic-error]

<<<<<<< HEAD
      ^

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

warning: use of undeclared identifier 'HEAD' [clang-diagnostic-error]

<<<<<<< HEAD
        ^

/* Inclusion dependency mining algorithms */
faida
faida,

/* Graph functional dependency mining algorithms */
gfdvalid,
Mstrutov marked this conversation as resolved.
Show resolved Hide resolved

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

warning: use of undeclared identifier 'gfdvalid'; did you mean 'egfdvalid'? [clang-diagnostic-error]

Suggested change
gfdvalid,
egfdvalid,

(skipping 42 expansions in backtrace; use -fmacro-backtrace-limit=0 to see all)

egfdvalid,
naivegfdvalid
)
// clang-format on

Expand Down
5 changes: 5 additions & 0 deletions src/core/algorithms/algorithms.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,3 +37,8 @@

/* Inclusion dependency mining algorithms */
#include "algorithms/ind/faida/faida.h"

/* Graph functional dependency mining algorithms */
#include "algorithms/gfd/egfd_validation.h"
#include "algorithms/gfd/gfd_validation.h"
#include "algorithms/gfd/naivegfd_validation.h"
172 changes: 172 additions & 0 deletions src/core/algorithms/gfd/balancer.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
#include "balancer.h"

#include <algorithm>
#include <map>
#include <numeric>
#include <vector>

std::vector<std::vector<int>> Balancer::Balance(std::vector<int> const& weights,
int const& processors_num) {
m_ = std::min(processors_num, static_cast<int>(weights.size()));
result_ = {};
if (weights.empty()) {
result_.resize(processors_num);
return result_;
}
for (std::size_t i = 0; i < m_; ++i) {
// the first value is index
std::vector<int> temp = {static_cast<int>(i)};
result_.push_back(temp);
}
// fill processors initially
// count optimal
optimal_ = 0;
std::size_t i = 0;
for (int const& weight : weights) {
result_.at(i++).push_back(weight);
i = i == m_ ? 0 : i;
optimal_ += weight;
}
optimal_ /= m_;
// sort processors (for convenience)
for (std::vector<int>& processor : result_) {
std::sort(processor.begin() + 1, processor.end());
}
// ALGORITHM
DeleteLarge();
Prepare();
DeleteFirstSmall();
DeleteSecondSmall();
FullLarge();
FullSmall();
// delete indices
for (std::vector<int>& processor : result_) {
processor.erase(processor.begin());
}
for (std::size_t i = 0; i < processors_num - m_; ++i) {
std::vector<int> empty = {};
result_.push_back(empty);
}
return result_;
}

void Balancer::DeleteLarge() {
deleted_large_ = {};
for (std::vector<int>& processor : result_) {
auto border = processor.end();
for (auto it = --processor.end(); it != processor.begin() + 1; --it) {
if (*(it - 1) > optimal_ / 2) {
deleted_large_.push_back(*it);
border = it;
} else {
break;
}
}
processor.erase(border, processor.end());
}
}

void Balancer::Prepare() {
for (std::size_t i = 0; i < m_; ++i) {
quality_.emplace(i, std::tuple<int, int, int>(0, 0, 0));
}
for (std::vector<int> const& processor : result_) {
auto last_small = processor.end();
auto last = processor.end();
if (*(--processor.end()) > optimal_ / 2) {
--last_small;
}
if (processor.begin() + 1 == last_small) {
continue;
}
int a = 0;
int b = 0;
float sum_small = std::accumulate(processor.begin() + 1, last_small, 0, std::plus<int>());
float sum = std::accumulate(processor.begin() + 1, last, 0, std::plus<int>());
while (sum_small > optimal_ / 2) {
++a;
--last_small;
sum_small -= *last_small;
}
while (sum > optimal_) {
++b;
--last;
sum -= *last;
}
std::get<0>(quality_.at(processor.at(0))) = a;
std::get<1>(quality_.at(processor.at(0))) = b;
std::get<2>(quality_.at(processor.at(0))) = a - b;
}
}

void Balancer::DeleteFirstSmall() {
// sort for convenience
deleted_small_ = {};
std::vector<std::vector<int>> small_processors = {};
std::vector<std::vector<int>> large_processors = {};
for (std::vector<int> const& processor : result_) {
if (*(--processor.end()) > optimal_ / 2) {
large_processors.push_back(processor);
} else {
small_processors.push_back(processor);
}
}
auto cGreater = [this](std::vector<int> const& a, std::vector<int> const& b) {
return std::get<2>(quality_.at(a.at(0))) > std::get<2>(quality_.at(b.at(0)));
};
sort(small_processors.begin(), small_processors.end(), cGreater);
sort(large_processors.begin(), large_processors.end(), cGreater);
result_.clear();
result_.insert(result_.end(), small_processors.begin(), small_processors.end());
result_.insert(result_.end(), large_processors.begin(), large_processors.end());
large_procs_num_ = large_processors.size();
std::size_t larges_num = large_processors.size() + deleted_large_.size();
// work
border_ = larges_num < m_ ? result_.end() - larges_num : result_.begin();
for (auto it = border_; it != result_.end(); ++it) {
auto last = it->end();
if (*(last - 1) > optimal_ / 2) {
--last;
}
for (auto cur = last - std::get<0>(quality_.at(*it->begin())); cur != last; ++cur) {
deleted_small_.push_back(*cur);
}
it->erase(last - std::get<0>(quality_.at(*it->begin())), last);
}
}

void Balancer::DeleteSecondSmall() {
for (auto it = result_.begin(); it != border_; ++it) {
auto last = it->end();
for (auto cur = last - std::get<1>(quality_.at(*it->begin())); cur != last; ++cur) {
deleted_small_.push_back(*cur);
}
it->erase(last - std::get<1>(quality_.at(*it->begin())), last);
}
}

void Balancer::PutWeight(int const& weight) {
sort(result_.begin(), result_.end(), [](std::vector<int> const& a, std::vector<int> const& b) {
return std::accumulate(a.begin(), a.end(), 0, std::plus<int>()) <
std::accumulate(b.begin(), b.end(), 0, std::plus<int>());
});
result_.begin()->push_back(weight);
}

void Balancer::FullLarge() {
std::size_t i = 0;
for (int const& weight : deleted_large_) {
if (i < m_ - large_procs_num_) {
(result_.begin() + i)->push_back(weight);
} else {
PutWeight(weight);
}
++i;
}
}

void Balancer::FullSmall() {
for (int const& weight : deleted_small_) {
PutWeight(weight);
}
}
27 changes: 27 additions & 0 deletions src/core/algorithms/gfd/balancer.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#pragma once
#include <map>
#include <vector>

class Balancer {
private:
std::size_t m_;
double optimal_;
std::vector<std::vector<int>>::iterator border_;
std::vector<std::vector<int>> result_;
std::vector<int> deleted_large_ = {};
std::vector<int> deleted_small_ = {};
std::size_t large_procs_num_;
std::map<int, std::tuple<int, int, int>> quality_;

void DeleteLarge();
void Prepare();
void DeleteFirstSmall();
void DeleteSecondSmall();
void PutWeight(int const& weight);
void FullLarge();
void FullSmall();

public:
std::vector<std::vector<int>> Balance(std::vector<int> const& weights,
int const& processors_num);
};
Loading