diff --git a/CMakeLists.txt b/CMakeLists.txt index 0b29cfd658..8f8788fe78 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -53,6 +53,7 @@ include_directories(SYSTEM "lib/easyloggingpp/src" "lib/better-enums/") # adding submodules add_subdirectory("lib/googletest") +add_subdirectory("lib/eigen") set( CMAKE_BUILD_TYPE_COPY "${CMAKE_BUILD_TYPE}" ) set( CMAKE_BUILD_TYPE "Release" ) diff --git a/build.sh b/build.sh index 87d4cc9ac2..695db76d78 100755 --- a/build.sh +++ b/build.sh @@ -3,6 +3,7 @@ cd lib git clone https://github.com/google/googletest/ --branch release-1.10.0 git clone https://github.com/amrayn/easyloggingpp/ --branch v9.97.0 git clone https://github.com/aantron/better-enums.git --branch 0.11.3 +git clone https://github.com/libigl/eigen cd .. mkdir build cd build diff --git a/datasets/datasets.zip b/datasets/datasets.zip index a02319282a..dec7ce55fa 100644 Binary files a/datasets/datasets.zip and b/datasets/datasets.zip differ diff --git a/src/algorithms/gfd/const_literal.h b/src/algorithms/gfd/const_literal.h new file mode 100644 index 0000000000..3d2c5df46c --- /dev/null +++ b/src/algorithms/gfd/const_literal.h @@ -0,0 +1,12 @@ +#pragma once +#include "literal.h" + +class ConstLiteral : public Literal { +public: + ConstLiteral(std::pair node, std::string value) { + this->vars.push_back(node.first); + + this->values.push_back(node.second); + this->values.push_back(value); + } +}; diff --git a/src/algorithms/gfd/gfd.cpp b/src/algorithms/gfd/gfd.cpp new file mode 100644 index 0000000000..f0f543a68d --- /dev/null +++ b/src/algorithms/gfd/gfd.cpp @@ -0,0 +1,33 @@ +#include + +#include "gfd.h" + +void GFD::print() const { + this->pattern.print(); + std::cout << std::endl << "Premises: "; + for (const Literal& l : this->premises) { + std::vector vars = l.getVars(); + std::vector values = l.getValues(); + if (vars.size() == 1) { + // ConstLiteral + std::cout << vars.at(0) << "." << values.at(0) << "=" << values.at(1) << "; "; + } else { + // VarLiteral + std::cout << vars.at(0) << "." << values.at(0) << "=" + << vars.at(1) << "." << values.at(1) << "; "; + } + } + std::cout << std::endl << "=>" << std::endl << "Conclusion: "; + for (const Literal& l : this->conclusion) { + std::vector vars = l.getVars(); + std::vector values = l.getValues(); + if (vars.size() == 1) { + // ConstLiteral + std::cout << vars.at(0) << "." << values.at(0) << "=" << values.at(1) << "; "; + } else { + // VarLiteral + std::cout << vars.at(0) << "." << values.at(0) << "=" + << vars.at(1) << "." << values.at(1) << "; "; + } + } +} diff --git a/src/algorithms/gfd/gfd.h b/src/algorithms/gfd/gfd.h new file mode 100644 index 0000000000..033468b4fe --- /dev/null +++ b/src/algorithms/gfd/gfd.h @@ -0,0 +1,20 @@ +#pragma once +#include + +#include "pattern.h" +#include "literal.h" + +class GFD { +private: + Pattern pattern; + std::vector premises; + std::vector conclusion; +public: + GFD(Pattern& pattern_, std::vector& premises_, std::vector& conclusion_) : pattern(pattern_), premises(premises_), conclusion(conclusion_) {} + + Pattern getPattern() const { return this->pattern; } + std::vector getPremises() const { return this->premises; } + std::vector getConclusion() const { return this->conclusion; } + + void print() const; +}; diff --git a/src/algorithms/gfd/gfd_validation.cpp b/src/algorithms/gfd/gfd_validation.cpp new file mode 100644 index 0000000000..35f826923f --- /dev/null +++ b/src/algorithms/gfd/gfd_validation.cpp @@ -0,0 +1,534 @@ +#include +#include +#include +#include +#include +#include + +#include "gfd_validation.h" +#include "graph.h" +#include "gfd.h" + +namespace algos { + +Eigen::MatrixXi GFDValidation::convert(const Eigen::VectorXi& vec, const int& cols) { + Eigen::MatrixXi result = Eigen::MatrixXi::Zero(vec.rows(), cols); + for (int i = 0; i < vec.rows(); ++i) { + result(i, vec(i)) = 1; + } + return result; +} + +bool GFDValidation::isSubgraph(const Pattern& query, const Pattern& graph, + const Eigen::MatrixXi& match) { + Eigen::MatrixXi Q = query.getAdjacencyMatrix(); + Eigen::MatrixXi G = graph.getAdjacencyMatrix(); + if ((match * (match * G).transpose()).transpose().cwiseProduct(Q) != Q) { + return false; + } + Eigen::VectorXi mask = Eigen::VectorXi::Zero(G.rows()); + for (int i = 0; i < G.rows(); ++i) { + mask(i) = i; + } + for (const auto& edge_label : query.getEdges()) { + std::pair edge = edge_label.first; + Eigen::VectorXi index1 = Eigen::VectorXi::Zero(Q.rows()); + index1(edge.first) = 1; + int i = mask.cwiseProduct((index1.transpose() * match).transpose()).sum(); + Eigen::VectorXi index2 = Eigen::VectorXi::Zero(Q.rows()); + index2(edge.second) = 1; + int j = mask.cwiseProduct((index2.transpose() * match).transpose()).sum(); + if (edge_label.second != graph.getEdges().at(std::pair(i, j))) { + return false; + } + } + return true; +} + +std::vector GFDValidation::getCandidateMatches(const Pattern& query, + const Pattern& graph, const std::pair& link) { + std::vector result = {}; + Eigen::MatrixXi Q = query.getAdjacencyMatrix(); + Eigen::MatrixXi G = graph.getAdjacencyMatrix(); + + Eigen::MatrixXi graph_degrees = graph.getVertexDegrees(); + Eigen::MatrixXi query_degrees = query.getVertexDegrees(); + + std::vector> available_vertices = {}; + for (int i = 0; i < Q.rows(); ++i) { + std::vector temp = {}; + available_vertices.push_back(temp); + } + + for (int i = 0; i < Q.rows(); ++i) { + if (i == link.first) { + available_vertices.at(i).push_back(link.second); + } else { + for (int j = 0; j < G.rows(); ++j) { + if (query.getVertices().at(i) == graph.getVertices().at(j) && + query_degrees(i, 0) <= graph_degrees(j, 0) && + query_degrees(i, 1) <= graph_degrees(j, 1)) { + available_vertices.at(i).push_back(j); + } + } + } + } + + std::vector current = {}; + for (int j = 0; j < available_vertices.at(0).size(); ++j) { + Eigen::VectorXi temp = Eigen::VectorXi::Zero(Q.rows()); + temp(0) = available_vertices.at(0).at(j); + current.push_back(temp); + } + for (int i = 1; i < available_vertices.size(); ++i) { + std::vector new_vecs = {}; + for (const Eigen::VectorXi& vec : current) { + for (int j = 0; j < available_vertices.at(i).size(); ++j) { + bool contains = false; + int index = available_vertices.at(i).at(j); + for (int k = 0; k < i; ++k) { + if (vec(k) == index) { + contains = true; + break; + } + } + if (!contains) { + Eigen::VectorXi temp = vec; + temp(i) = index; + new_vecs.push_back(temp); + } + } + } + current.clear(); + current = new_vecs; + } + + for (const Eigen::VectorXi& vec : current) { + result.push_back(vec); + } + return result; +} + +std::vector GFDValidation::getMatches(const Pattern& query, const Pattern& graph, + const std::pair& link) { + std::vector result = {}; + std::vector candidates = getCandidateMatches(query, graph, link); + for (const Eigen::VectorXi& match : candidates) { + if (isSubgraph(query, graph, convert(match, graph.getAdjacencyMatrix().rows()))) { + result.push_back(match); + } + } + return result; +} + +bool GFDValidation::isSatisfiedLinked(const Graph& graph, const GFD& gfd, + const std::pair& link) { + std::vector matches = getMatches(gfd.getPattern(), graph, link); + for (const Eigen::VectorXi& match : matches) { + bool satisfied = true; + for (const Literal& l : gfd.getPremises()) { + std::vector vars = l.getVars(); + std::vector values = l.getValues(); + int query_index1 = vars.at(0); + int graph_index1 = match(query_index1); + std::map current_attrs1 = graph.getAttributes().at(graph_index1); + if (vars.size() == 1) { + // ConstLiteral + if ((current_attrs1.find(values.at(0)) != current_attrs1.end()) && + (current_attrs1.at(values.at(0)) != values.at(1))) { + // next match + satisfied = false; + break; + } + } else { + // VarLiteral + int query_index2 = vars.at(1); + int graph_index2 = match(query_index2); + std::map current_attrs2 = graph.getAttributes().at(graph_index2); + if ((current_attrs1.find(values.at(0)) != current_attrs1.end()) && + (current_attrs2.find(values.at(1)) != current_attrs2.end()) && + (current_attrs1.at(values.at(0)) != current_attrs2.at(values.at(1)))) { + // next match + satisfied = false; + break; + } + } + } + + if (!satisfied) { + break; + } + + for (const Literal& l : gfd.getConclusion()) { + std::vector vars = l.getVars(); + std::vector values = l.getValues(); + int query_index1 = vars.at(0); + int graph_index1 = match(query_index1); + std::map current_attrs1 = graph.getAttributes().at(graph_index1); + if (vars.size() == 1) { + // ConstLiteral + if ((current_attrs1.find(values.at(0)) == current_attrs1.end()) || + (current_attrs1.at(values.at(0)) != values.at(1))) { + return false; + } + } else { + // VarLiteral + int query_index2 = vars.at(1); + int graph_index2 = match(query_index2); + std::map current_attrs2 = graph.getAttributes().at(graph_index2); + if ((current_attrs1.find(values.at(0)) == current_attrs1.end()) || + (current_attrs2.find(values.at(1)) == current_attrs2.end()) || + (current_attrs1.at(values.at(0)) != current_attrs2.at(values.at(1)))) { + return false; + } + } + } + } + return true; +} + +std::vector> GFDValidation::balanced(const std::vector& weights, const int& m) { + std::vector> result = {}; + if (weights.begin() == weights.end()) { + for (int i = 0; i < m; ++i) { + std::vector temp = {}; + result.push_back(temp); + } + return result; + } + for (int i = 0; i < m; ++i) { + // the first value is index + std::vector temp = { i }; + result.push_back(temp); + } + // fill processors initially + // count optimal + double optimal = 0; + int i = 0; + for (const int& weight : weights) { + result.at(i++).push_back(weight); + i = i == m ? 0 : i; + optimal += weight; + } + optimal /= m; + // sort processors (for convenience) + for (std::vector& processor : result) { + std::sort(processor.begin() + 1, processor.end()); + } + // ALGORITHM + // 1st step + std::vector deleted_large = {}; + std::vector deleted_small = {}; + for (std::vector& processor : result) { + auto border = processor.end(); + for (auto it = --processor.end(); it != processor.begin() + 1; --it) { + if (*(it - 1) > optimal / 2) { + deleted_large.push_back(*it); + border = it; + } else { + break; + } + } + processor.erase(border, processor.end()); + } + // 2nd step + Eigen::MatrixXi quality = Eigen::MatrixXi::Zero(m, 3); + for (const std::vector& processor : result) { + auto last_small = processor.end(); + auto last = processor.end(); + if (*(--processor.end()) > optimal / 2) { + --last_small; + } + if (processor.begin() + 1 == last_small) { + continue; + } + int a = 0; + int b = 0; + float sum_small = std::accumulate(processor.begin() + 1, last_small, 0, std::plus()); + float sum = std::accumulate(processor.begin() + 1, last, 0, std::plus()); + while (sum_small > optimal / 2) { + ++a; + --last_small; + sum_small -= *last_small; + } + while (sum > optimal) { + ++b; + --last; + sum -= *last; + } + quality(processor.at(0), 0) = a; + quality(processor.at(0), 1) = b; + quality(processor.at(0), 2) = a - b; + } + // 3rd step + // sort for convenience + std::vector> small_processors = {}; + std::vector> large_processors = {}; + for (const std::vector& processor : result) { + if (*(--processor.end()) > optimal / 2) { + large_processors.push_back(processor); + } else { + small_processors.push_back(processor); + } + } + auto cGreater = [&quality](std::vector a, std::vector b) { + return quality(a.at(0), 2) > quality(b.at(0), 2); + }; + sort(small_processors.begin(), small_processors.end(), cGreater); + sort(large_processors.begin(), large_processors.end(), cGreater); + result.clear(); + result.insert(result.end(), small_processors.begin(), small_processors.end()); + result.insert(result.end(), large_processors.begin(), large_processors.end()); + int numOfLarges = large_processors.size() + deleted_large.size(); + // work + auto border = numOfLarges < m ? result.end() - numOfLarges : result.begin(); + for (auto it = border; it != result.end(); ++it) { + auto last = it->end(); + if (*(last - 1) > optimal / 2) { + --last; + } + for (auto cur = last - quality(*it->begin(), 0); cur != last; ++cur) { + deleted_small.push_back(*cur); + } + it->erase(last - quality(*it->begin(), 0), last); + } + // 4th step + for (auto it = result.begin(); it != border; ++it) { + auto last = it->end(); + for (auto cur = last - quality(*it->begin(), 1); cur != last; ++cur) { + deleted_small.push_back(*cur); + } + it->erase(last - quality(*it->begin(), 1), last); + } + // 5th step + auto cur = result.begin(); + i = 0; + for (const int& weight : deleted_large) { + if (i < m - large_processors.size()) { + (result.begin() + i)->push_back(weight); + } else { + sort(result.begin(), result.end(), [](std::vector a, std::vector b) { + return std::accumulate(a.begin(), a.end(), 0, std::plus()) < + std::accumulate(b.begin(), b.end(), 0, std::plus()); + }); + result.begin()->push_back(weight); + } + ++i; + } + // 6th step + for (const int& weight : deleted_small) { + sort(result.begin(), result.end(), [](std::vector a, std::vector b) { + return std::accumulate(a.begin(), a.end(), 0, std::plus()) < + std::accumulate(b.begin(), b.end(), 0, std::plus()); + }); + result.begin()->push_back(weight); + } + // delete indices + for (std::vector& processor : result) { + processor.erase(processor.begin()); + } + return result; +} + +Graph GFDValidation::getSubgraph(const Graph& graph, const int& index, const int& radius, int& out) { + Eigen::MatrixXi m = graph.getAdjacencyMatrix(); + Eigen::MatrixXi temp = m + m.transpose(); + Eigen::MatrixXi neighbours = Eigen::MatrixXi::Zero(temp.rows(), temp.cols()); + for (int i = 0; i < radius; ++i) { + Eigen::MatrixXi cur = temp; + for (int j = 0; j < i; ++j) { + cur = cur * temp; + } + neighbours += cur; + } + std::set needed_vertices = { index }; + + for (int i = 0; i < neighbours.rows(); ++i) { + if (neighbours(index, i) != 0) { + needed_vertices.insert(i); + } + } + + std::map vertices{}; + std::map> attributes{}; + int j = 0; + std::map isomorphism{}; + for (const int& i : needed_vertices) { + vertices.emplace(j, graph.getVertices().at(i)); + attributes.emplace(j, graph.getAttributes().at(i)); + isomorphism.emplace(i, j++); + } + std::map, std::string> edges{}; + for (const auto& edge_label : graph.getEdges()) { + std::pair edge = edge_label.first; + if ((needed_vertices.find(edge.first) != needed_vertices.end()) && + (needed_vertices.find(edge.second) != needed_vertices.end())) { + edges.emplace(std::pair(isomorphism.at(edge.first), isomorphism.at(edge.second)), + edge_label.second); + } + } + Graph result = Graph(vertices, edges, attributes); + out = isomorphism.at(index); + return result; +} + +std::vector GFDValidation::getCandidateVertices(const Pattern& pattern, const std::string& label, + const std::pair& degrees) { + std::vector result = {}; + Eigen::MatrixXi G = pattern.getAdjacencyMatrix(); + for (const auto& vertex_label : pattern.getVertices()) { + std::string current_label = vertex_label.second; + if (current_label != label) { + continue; + } + int index = vertex_label.first; + if ((G.row(index).sum() >= degrees.first) && (G.col(index).sum() >= degrees.second)) { + result.push_back(index); + } + } + return result; +} + +int GFDValidation::getRadius(const Pattern& pattern, const int& index) { + Eigen::MatrixXi G = pattern.getAdjacencyMatrix(); + Eigen::MatrixXi m = G + G.transpose(); + Eigen::VectorXi result = Eigen::VectorXi::Zero(G.rows()); + result(index) = 1; + Eigen::VectorXi mask = Eigen::VectorXi(G.rows()); + mask.setConstant(1); + mask(index) = 0; + Eigen::VectorXi prev = Eigen::VectorXi(G.rows()); + prev.setConstant(1); + int answer = -1; + while (mask != prev) { + prev = mask; + result = (result.transpose() * m).transpose().cwiseProduct(mask); + // optimize? + for (int i = 0; i < G.rows(); ++i) { + if (result(i) != 0) { + mask(i) = 0; + } + } + ++answer; + } + return answer; +} + +int GFDValidation::getCenter(const Pattern& pattern, int& out) { + int min = pattern.getAdjacencyMatrix().rows(); + int result = 0; + for (const int& index : pattern.getIndices()) { + int radius = getRadius(pattern, index); + if (radius <= min) { + min = radius; + result = index; + } + } + out = min; + return result; +} + +void GFDValidation::calculateUnsatisfied(const std::vector>& messages, + const std::map& coded_gfds, const std::map& coded_subgraphs, std::set& out) { + for (const auto& message : messages) { + int gfd_index = std::get<0>(message); + int subgraph_index = std::get<1>(message); + int center = std::get<2>(message); + int candidate = std::get<3>(message); + if (!isSatisfiedLinked(coded_subgraphs.at(subgraph_index), coded_gfds.at(gfd_index), + std::pair(center, candidate))) { + out.insert(gfd_index); + } + } +} + +std::vector GFDValidation::getSatisfiedGFDs(const Graph& graph, const std::vector& gfds, const int& m) { + std::vector result = {}; + std::set unsatisfied = {}; + std::map>> weighted_messages; + std::vector weights = {}; + + std::map coded_gfds; + std::map coded_subgraphs; + int i = 0; + int j = 0; + for (const GFD& gfd : gfds) { + coded_gfds.emplace(i, gfd); + + int radius = 0; + int center = getCenter(gfd.getPattern(), radius); + std::pair degrees(gfd.getPattern().getVertexDegrees()(center, 0), + gfd.getPattern().getVertexDegrees()(center, 1)); + std::vector candidate_vertices = getCandidateVertices( + graph, gfd.getPattern().getVertices().at(center), degrees + ); + + for (const int& candidate : candidate_vertices) { + int pin = 0; + Graph subgraph = getSubgraph(graph, candidate, radius, pin); + coded_subgraphs.emplace(j, subgraph); + std::tuple temp(i, j, center, pin); + int weight = subgraph.getSize(); + if (weighted_messages.find(weight) != weighted_messages.end()) { + weighted_messages.at(weight).push_back(temp); + } else { + std::vector> temps = { temp }; + weighted_messages.emplace(weight, temps); + } + weights.push_back(weight); + ++j; + } + ++i; + } + + std::vector> balanced_weights = balanced(weights, m); + + std::vector> answers = {}; + std::vector>> groups = {}; + for (int i = 0; i < m; ++i) { + std::set current = {}; + answers.push_back(current); + std::vector> messages = {}; + for (int& weight : balanced_weights.at(i)) { + std::tuple temp = *(--weighted_messages.at(weight).end()); + weighted_messages.at(weight).erase(--weighted_messages.at(weight).end()); + messages.push_back(temp); + } + groups.push_back(messages); + } + std::vector threads = {}; + for (int i = 0; i < m; ++i) { + std::thread thrd(&GFDValidation::calculateUnsatisfied, this, + std::cref(groups.at(i)), std::cref(coded_gfds), std::cref(coded_subgraphs), std::ref(answers.at(i))); + threads.push_back(std::move(thrd)); + } + for (std::thread& thrd : threads) { + if (thrd.joinable()) { + thrd.join(); + } + } + for (const std::set& answer : answers) { + // optimize? + for (const int& gfd_index : answer) { + unsatisfied.insert(gfd_index); + } + } + for (int i = 0; i < gfds.size(); ++i) { + if (unsatisfied.find(i) == unsatisfied.end()) { + result.push_back(coded_gfds.at(i)); + } + } + return result; +} + +unsigned long long GFDValidation::Execute() { + auto start_time = std::chrono::system_clock::now(); + + this->result = this->getSatisfiedGFDs(this->graph, this->gfds, std::thread::hardware_concurrency()); + + auto elapsed_milliseconds = std::chrono::duration_cast( + std::chrono::system_clock::now() - start_time); + return elapsed_milliseconds.count(); +} + +} diff --git a/src/algorithms/gfd/gfd_validation.h b/src/algorithms/gfd/gfd_validation.h new file mode 100644 index 0000000000..4170ab1069 --- /dev/null +++ b/src/algorithms/gfd/gfd_validation.h @@ -0,0 +1,36 @@ +#pragma once +#include + +#include "graph.h" +#include "gfd.h" + +namespace algos { + +class GFDValidation { +private: + Graph graph; + std::vector gfds; + std::vector result; + + Eigen::MatrixXi convert(const Eigen::VectorXi&, const int&); + bool isSubgraph(const Pattern& query, const Pattern&, const Eigen::MatrixXi&); + std::vector getCandidateMatches(const Pattern&, const Pattern&, const std::pair&); + std::vector getMatches(const Pattern&, const Pattern&, const std::pair&); + bool isSatisfiedLinked(const Graph& graph, const GFD&, const std::pair&); + std::vector> balanced(const std::vector&, const int&); + Graph getSubgraph(const Graph&, const int&, const int&, int&); + std::vector getCandidateVertices(const Pattern&, const std::string&, const std::pair&); + int getRadius(const Pattern&, const int&); + int getCenter(const Pattern&, int&); + void calculateUnsatisfied(const std::vector>&, const std::map&, const std::map&, std::set&); + std::vector getSatisfiedGFDs(const Graph&, const std::vector&, const int&); +public: + GFDValidation() = default; + GFDValidation(Graph graph_, std::vector gfds_) : graph(graph_), gfds(gfds_) {} + + unsigned long long Execute(); + + std::vector GFDList() { return this->result; } +}; + +} diff --git a/src/algorithms/gfd/graph.cpp b/src/algorithms/gfd/graph.cpp new file mode 100644 index 0000000000..a317e8f1b1 --- /dev/null +++ b/src/algorithms/gfd/graph.cpp @@ -0,0 +1,14 @@ +#include + +#include "graph.h" + +void Graph::print() const { + Pattern::print(); + std::cout << std::endl << "Attributes:"; + for (const auto& attr : this->attributes) { + std::cout << std::endl << attr.first << " -> "; + for (const auto& value : attr.second) { + std::cout << value.first << ":" << value.second << "; "; + } + } +} diff --git a/src/algorithms/gfd/graph.h b/src/algorithms/gfd/graph.h new file mode 100644 index 0000000000..1d6fbe66b2 --- /dev/null +++ b/src/algorithms/gfd/graph.h @@ -0,0 +1,21 @@ +#pragma once +#include <../../../lib/eigen/Eigen/Dense> +#include +#include +#include + +#include "pattern.h" + +class Graph : public Pattern { +private: + std::map> attributes; +public: + Graph(const std::map& vertices_, const std::map, std::string>& edges_, + const std::map>& attributes_) noexcept(false) : Pattern(vertices_, edges_) { + this->attributes = attributes_; + } + + std::map> getAttributes() const { return this->attributes; } + + void print() const; +}; diff --git a/src/algorithms/gfd/literal.h b/src/algorithms/gfd/literal.h new file mode 100644 index 0000000000..921ff0bea4 --- /dev/null +++ b/src/algorithms/gfd/literal.h @@ -0,0 +1,15 @@ +#pragma once +#include +#include + +class Literal { +protected: + std::vector vars; + std::vector values; +public: + Literal() = default; + Literal(std::vector& vars_, std::vector& values_) : vars(vars_), values(values_) {} + + std::vector getVars() const { return this->vars; } + std::vector getValues() const { return this->values; } +}; \ No newline at end of file diff --git a/src/algorithms/gfd/pattern.cpp b/src/algorithms/gfd/pattern.cpp new file mode 100644 index 0000000000..1faea74168 --- /dev/null +++ b/src/algorithms/gfd/pattern.cpp @@ -0,0 +1,97 @@ +#include + +#include "pattern.h" + +Pattern::Pattern(const std::map& vertices_, const std::map, std::string>& edges_) noexcept(false) { + int max = 0; + for (auto node : vertices_) { + int index = node.first; + this->indices.insert(index); + max = max < index ? index : max; + } + if (max != this->indices.size() - 1) { + throw std::out_of_range("index mismatch"); + } + this->vertices = vertices_; + this->edges = edges_; + + this->adjacencyMatrix = Eigen::MatrixXi::Zero(this->indices.size(), this->indices.size()); + for (auto edge_label : this->edges) { + std::pair edge = edge_label.first; + this->adjacencyMatrix(edge.first, edge.second) = 1; + } + this->degrees = Eigen::MatrixXi::Zero(this->adjacencyMatrix.rows(), 2); + for (int i = 0; i < this->adjacencyMatrix.rows(); ++i) { + this->degrees(i, 0) = this->adjacencyMatrix.row(i).sum(); + this->degrees(i, 1) = this->adjacencyMatrix.col(i).sum(); + } + this->vertexNum = this->vertices.size(); + this->size = vertexNum + this->edges.size(); +} + +bool Pattern::operator==(const Pattern& other) const { + if (this->vertexNum != other.getVertexNum()) { + return false; + } + if (this->size != other.getSize()) { + return false; + } + Eigen::MatrixXi other_degrees = other.getVertexDegrees(); + Eigen::VectorXi check_out = Eigen::VectorXi::Zero(this->vertexNum); + Eigen::VectorXi isomorphism = Eigen::VectorXi::Zero(this->vertexNum); + for (int i = 0; i < this->vertexNum; ++i) { + for (int j = 0; j < this->vertexNum; ++j) { + if (this->degrees.row(i) == other_degrees.row(j) && !check_out(j)) { + check_out(j) = 1; + isomorphism(i) = j; + break; + } + } + } + if (check_out.sum() != this->vertexNum) { + return false; + } + Eigen::MatrixXi match = Eigen::MatrixXi::Zero(this->vertexNum, this->vertexNum); + for (int i = 0; i < this->vertexNum; ++i) { + match(i, isomorphism(i)) = 1; + } + Eigen::MatrixXi Q = this->adjacencyMatrix; + Eigen::MatrixXi G = other.getAdjacencyMatrix(); + if ((match * (match * G).transpose()).transpose().cwiseProduct(Q) != Q) { + return false; + } + Eigen::VectorXi mask = Eigen::VectorXi::Zero(G.rows()); + for (int i = 0; i < G.rows(); ++i) { + mask(i) = i; + } + for (const auto& edge_label : this->edges) { + std::pair edge = edge_label.first; + Eigen::VectorXi index1 = Eigen::VectorXi::Zero(Q.rows()); + index1(edge.first) = 1; + int i = mask.cwiseProduct((index1.transpose() * match).transpose()).sum(); + Eigen::VectorXi index2 = Eigen::VectorXi::Zero(Q.rows()); + index2(edge.second) = 1; + int j = mask.cwiseProduct((index2.transpose() * match).transpose()).sum(); + if (edge_label.second != other.getEdges().at(std::pair(i, j))) { + return false; + } + } + return true; +} + +bool Pattern::operator!=(const Pattern& other) const { + return !(*this == other); +} + +void Pattern::print() const { + std::cout << "Adjacency matrix:" << std::endl << this->adjacencyMatrix << std::endl; + std::cout << "Vertex labels:" << std::endl; + for (const auto& node : this->vertices) { + std::cout << node.first << ":" << node.second << "; "; + } + std::cout << std::endl << "Edge labels:" << std::endl; + for (const auto& edge_label : this->edges) { + auto edge = edge_label.first; + std::cout << "(" << edge.first << "," << edge.second << "):" << edge_label.second << "; "; + } +} diff --git a/src/algorithms/gfd/pattern.h b/src/algorithms/gfd/pattern.h new file mode 100644 index 0000000000..b9e1f27200 --- /dev/null +++ b/src/algorithms/gfd/pattern.h @@ -0,0 +1,58 @@ +#pragma once +#include <../../../lib/eigen/Eigen/Dense> +#include +#include +#include + +class Pattern { +protected: + std::set indices; + std::map vertices; + std::map, std::string> edges; + + Eigen::MatrixXi adjacencyMatrix; + Eigen::MatrixXi degrees; + int size; + int vertexNum; +public: + Pattern() = default; + Pattern(const std::map&, const std::map, std::string>&) noexcept(false); + //Pattern(const std::map& vertices_, const std::map, std::string>& edges_) noexcept(false) { + // int max = 0; + // for (auto node : vertices_) { + // int index = node.first; + // this->indices.insert(index); + // max = max < index ? index : max; + // } + // if (max != this->indices.size() - 1) { + // throw std::out_of_range("index mismatch"); + // } + // this->vertices = vertices_; + // this->edges = edges_; + + // this->adjacencyMatrix = Eigen::MatrixXi::Zero(this->indices.size(), this->indices.size()); + // for (auto edge_label : this->edges) { + // std::pair edge = edge_label.first; + // this->adjacencyMatrix(edge.first, edge.second) = 1; + // } + // this->degrees = Eigen::MatrixXi::Zero(this->adjacencyMatrix.rows(), 2); + // for (int i = 0; i < this->adjacencyMatrix.rows(); ++i) { + // this->degrees(i, 0) = this->adjacencyMatrix.row(i).sum(); + // this->degrees(i, 1) = this->adjacencyMatrix.col(i).sum(); + // } + // this->vertexNum = this->vertices.size(); + // this->size = vertexNum + this->edges.size(); + //} + bool operator==(const Pattern&) const; + bool operator!=(const Pattern&) const; + + std::set getIndices() const { return this->indices; } + std::map getVertices() const { return this->vertices; } + std::map, std::string> getEdges() const { return this->edges; } + Eigen::MatrixXi getAdjacencyMatrix() const { return this->adjacencyMatrix; } + Eigen::MatrixXi getVertexDegrees() const { return this->degrees; } + int getSize() const { return this->size; } + int getVertexNum() const { return this->vertexNum; } + + void print() const; +}; diff --git a/src/algorithms/gfd/var_literal.h b/src/algorithms/gfd/var_literal.h new file mode 100644 index 0000000000..0960be7042 --- /dev/null +++ b/src/algorithms/gfd/var_literal.h @@ -0,0 +1,13 @@ +#pragma once +#include "literal.h" + +class VarLiteral : public Literal { +public: + VarLiteral(std::pair node1, std::pair node2) { + this->vars.push_back(node1.first); + this->vars.push_back(node2.first); + + this->values.push_back(node1.second); + this->values.push_back(node2.second); + } +}; diff --git a/tests/test_gfd_validation.cpp b/tests/test_gfd_validation.cpp new file mode 100644 index 0000000000..2412b3e0cf --- /dev/null +++ b/tests/test_gfd_validation.cpp @@ -0,0 +1,200 @@ +#include +#include + +#include "algorithms/gfd/gfd_validation.h" +#include "algorithms/gfd/pattern.h" +#include "algorithms/gfd/var_literal.h" +#include "algorithms/gfd/const_literal.h" + +#define edge std::pair +#define attr std::map +#define field std::pair + +TEST(GFDValidationTest, TestTrivially) { + std::map vertices{ + {0, "quadrilateral"}, + {1, "parallelogram"}, + {2, "trapezoid"}, + {3, "rectangle"}, + {4, "rhombus"}, + {5, "square"} + }; + std::map edges{ + {edge(0, 1), "two pairs of parallel sides"}, {edge(0, 2), "one pair of parallel sides"}, + {edge(1, 3), "equality of angles"}, {edge(1, 4), "equality of sides"}, + {edge(3, 5), "equality of sides"}, + {edge(4, 5), "equality of angles"} + }; + std::map attributes{ + {0, attr{{"angles", "arbitrary"}, {"sides", "arbitrary"}}}, + {1, attr{{"angles", "pairwise equal"}, {"sides", "pairwise equal"}}}, + {2, attr{{"angles", "arbitrary"}, {"sides", "parallel and arbitrary"}}}, + {3, attr{{"angles", "equal"}, {"sides", "pairwise equal"}}}, + {4, attr{{"angles", "pairwise equal"}, {"sides", "equal"}}}, + {5, attr{{"angles", "equal"}, {"sides", "equal"}}} + }; + Graph quadrilaterals = Graph(vertices, edges, attributes); + + std::map pat_vertices{ + {0, "polygon"}, + {1, "triangle"}, + }; + std::map pat_edges{ + {edge(0, 1), "three sides"} + }; + VarLiteral pat_literal = VarLiteral(field(0, "sides"), field(1, "sides")); + Pattern pattern = Pattern(pat_vertices, pat_edges); + std::vector premises = std::vector{}; + std::vector conclusion = std::vector{ pat_literal }; + GFD gfd = GFD(pattern, premises, conclusion); + int expected_size = 1; + + auto algorithm = algos::GFDValidation(quadrilaterals, std::vector{ gfd }); + algorithm.Execute(); + std::vector GFDList = algorithm.GFDList(); + + EXPECT_EQ(expected_size, GFDList.size()); + EXPECT_EQ(gfd.getPattern(), GFDList.at(0).getPattern()); +} + +TEST(GFDValidationTest, TestExistingMatches0) { + std::map nodes{ + {0, "person"}, {1, "film"}, {2, "film"}, {3, "film"}, {4, "film"}, + {5, "person"}, {6, "film"}, {7, "film"}, {8, "film"}, + {9, "person"}, {10, "film"}, {11, "film"} + }; + std::map directed_ties{ + {edge(0, 1), "directed"}, {edge(0, 2), "directed"}, {edge(0, 3), "directed"}, {edge(0, 4), "directed"}, + {edge(5, 6), "directed"}, {edge(5, 7), "directed"}, {edge(5, 8), "directed"}, + {edge(9, 10), "directed"}, {edge(9, 11), "directed"} + }; + std::map info{ + {0, attr{{"name", "James Cameron"}, {"celebrity", "high"}}}, + {5, attr{{"name", "Robert Zemeckis"}, {"celebrity", "high"}}}, + {9, attr{{"name", "James Toback"}, {"celebrity", "low"}}}, + {1, attr{{"year", "2009"}, {"success", "high"}, {"name", "Avatar"}}}, + {2, attr{{"year", "1997"}, {"success", "high"}, {"name", "Titanic"}}}, + {3, attr{{"year", "1981"}, {"success", "low"}, {"name", "Piranha II"}}}, + {4, attr{{"year", "1984"}, {"success", "high"}, {"name", "Terminator"}}}, + {6, attr{{"year", "2015"}, {"success", "high"}, {"name", "The Walk"}}}, + {7, attr{{"year", "1985"}, {"success", "high"}, {"name", "Back to the future"}}}, + {8, attr{{"year", "1994"}, {"success", "high"}, {"name", "Forrest Gump"}}}, + {10, attr{{"year", "2008"}, {"success", "high"}, {"name", "Tyson"}}}, + {11, attr{{"year", "1978"}, {"success", "high"}, {"name", "Fingers"}}} + }; + Graph directors = Graph(nodes, directed_ties, info); + + std::map pat_nodes{ + {0, "person"}, + {1, "film"} + }; + std::map pat_directed_tie{ + {edge(0, 1), "directed"} + }; + ConstLiteral premises_literal = ConstLiteral(field(0, "celebrity"), "high"); + ConstLiteral conclusion_literal = ConstLiteral(field(1, "success"), "high"); + Pattern pattern = Pattern(pat_nodes, pat_directed_tie); + std::vector premises = std::vector{ premises_literal }; + std::vector conclusion = std::vector{ conclusion_literal }; + GFD connection_director_film = GFD(pattern, premises, conclusion); + int expected_size = 0; + + auto algorithm = algos::GFDValidation(directors, std::vector{ connection_director_film }); + algorithm.Execute(); + std::vector GFDList = algorithm.GFDList(); + + EXPECT_EQ(expected_size, GFDList.size()); +} + +TEST(GFDValidationTest, TestExistingMatches1) { + std::map people{ + {0, "person"}, {1, "person"}, {2, "person"}, {3, "person"}, {4, "person"}, + {5, "person"}, {6, "person"}, {7, "person"}, {8, "person"}, {9, "person"} + }; + std::map family_ties{ + {edge(4, 0), "mom"}, {edge(4, 1), "dad"}, + {edge(5, 0), "mom"}, {edge(5, 1), "dad"}, + {edge(6, 2), "mom"}, {edge(6, 3), "dad"}, + {edge(7, 2), "mom"}, {edge(7, 3), "dad"}, + {edge(8, 5), "mom"}, {edge(8, 6), "dad"}, + {edge(9, 5), "mom"}, {edge(9, 6), "dad"} + }; + std::map characteristics{ + {0, attr{{"name", "Milana"}, {"eyes", "brown"}, {"body_type", "mesomorph"}}}, + {1, attr{{"name", "Kirill"}, {"eyes", "brown"}, {"body_type", "ectomorph"}}}, + {2, attr{{"name", "Ksenia"}, {"eyes", "brown"}, {"body_type", "endomorph"}}}, + {3, attr{{"name", "Dmitry"}, {"eyes", "blue"}, {"body_type", "mesomorph"}}}, + {4, attr{{"name", "Nastya"}, {"eyes", "brown"}, {"body_type", "endomorph"}}}, + {5, attr{{"name", "Angela"}, {"eyes", "brown"}, {"body_type", "mesomorph"}}}, + {6, attr{{"name", "Daniil"}, {"eyes", "green"}, {"body_type", "mesomorph"}}}, + {7, attr{{"name", "Geralt"}, {"eyes", "blue"}, {"body_type", "mesomorph"}}}, + {8, attr{{"name", "Maksim"}, {"eyes", "green"}, {"body_type", "ectomorph"}}}, + {9, attr{{"name", "Matvey"}, {"eyes", "brown"}, {"body_type", "ectomorph"}}} + }; + Graph family = Graph(people, family_ties, characteristics); + + std::map pat_vertices0{ + {0, "person"}, + {1, "person"} + }; + std::map pat_edges0{ + {edge(0, 1), "mom"} + }; + ConstLiteral premises_literal0 = ConstLiteral(field(1, "eyes"), "brown"); + ConstLiteral conclusion_literal0 = ConstLiteral(field(0, "eyes"), "brown"); + Pattern pattern0 = Pattern(pat_vertices0, pat_edges0); + std::vector premises0 = std::vector{ premises_literal0 }; + std::vector conclusion0 = std::vector{ conclusion_literal0 }; + GFD eyes_simple = GFD(pattern0, premises0, conclusion0); + + std::map pat_vertices1{ + {0, "person"}, + {1, "person"}, + {2, "person"} + }; + std::map pat_edges1{ + {edge(0, 1), "mom"}, + {edge(0, 2), "dad"} + }; + ConstLiteral premises_literal11 = ConstLiteral(field(1, "eyes"), "brown"); + ConstLiteral premises_literal12 = ConstLiteral(field(2, "eyes"), "brown"); + ConstLiteral conclusion_literal1 = ConstLiteral(field(0, "eyes"), "brown"); + Pattern pattern1 = Pattern(pat_vertices1, pat_edges1); + std::vector premises1 = std::vector{ premises_literal11, premises_literal12 }; + std::vector conclusion1 = std::vector{ conclusion_literal1 }; + GFD eyes_complex = GFD(pattern1, premises1, conclusion1); + + std::map pat_vertices2{ + {0, "person"}, + {1, "person"}, + {2, "person"} + }; + std::map pat_edges2{ + {edge(0, 1), "mom"}, + {edge(1, 2), "dad"} + }; + VarLiteral conclusion_literal2 = VarLiteral(field(2, "body_type"), field(0, "body_type")); + Pattern pattern2 = Pattern(pat_vertices2, pat_edges2); + std::vector premises2 = std::vector{}; + std::vector conclusion2 = std::vector{ conclusion_literal2 }; + GFD body_types = GFD(pattern2, premises2, conclusion2); + int expected_size = 2; + + auto algorithm = algos::GFDValidation(family, std::vector{ eyes_simple, eyes_complex, body_types }); + algorithm.Execute(); + std::vector GFDList = algorithm.GFDList(); + + EXPECT_EQ(expected_size, GFDList.size()); + + for (const GFD& current : GFDList) { + EXPECT_FALSE(current.getPattern() == eyes_simple.getPattern()); + } + EXPECT_FALSE(GFDList.begin()->getPattern() == (++GFDList.begin())->getPattern()); + + for (const GFD& current : GFDList) { + EXPECT_TRUE( + current.getPattern() == eyes_complex.getPattern() || + current.getPattern() == body_types.getPattern() + ); + } +}