diff --git a/Project.toml b/Project.toml index bed67ec..c8f39e1 100644 --- a/Project.toml +++ b/Project.toml @@ -8,6 +8,7 @@ Conda = "8f4d0f93-b110-5947-807f-2305c1781a2d" DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" JSON3 = "0f8b85d8-7281-11e9-16c2-39a750bddbf1" LightGraphs = "093fc24a-ae57-5d10-9952-331d41423f4d" +LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" PyCall = "438e738f-606a-5dbb-bf0a-cddfbfd45ab0" PyPlot = "d330b81b-6aea-500a-939a-2ce795aea3ee" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" @@ -19,7 +20,7 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" Conda = "1.4.0" DataStructures = "0.17.10" JSON3 = "1.0.1" -LightGraphs = "^1.1.0" +LightGraphs = "1.3" PyCall = "1.91.2" PyPlot = "2.8.2" StatsBase = "0.32" diff --git a/src/SimpleHypergraphs.jl b/src/SimpleHypergraphs.jl index 5fb705c..09f4362 100644 --- a/src/SimpleHypergraphs.jl +++ b/src/SimpleHypergraphs.jl @@ -7,27 +7,36 @@ using PyCall using Conda using PyPlot using JSON3 +using Random +using LinearAlgebra export Hypergraph, getvertices, gethyperedges -export add_vertex!, add_hyperedge!, remove_vertex!, remove_hyperedge!, prune_hypergraph!, prune_hypergraph +export add_vertex!, add_hyperedge!, remove_vertex!, remove_hyperedge! +export prune_hypergraph!, prune_hypergraph export set_vertex_meta!, get_vertex_meta export set_hyperedge_meta!, get_hyperedge_meta -export BipartiteView, shortest_path -export TwoSectionView +export adjacency_matrix, edge_adjacency_matrix + +export BipartiteView, TwoSectionView +export shortest_path export get_twosection_adjacency_mx, get_twosection_weighted_adjacency_mx +export dual +export random_model, random_kuniform_model, random_dregular_model, random_preferential_model export Abstract_HG_format, HGF_Format, JSON_Format export hg_load, hg_save -export nhv, nhe -export modularity, randompartition -export AbstractCommunityFinder, CFModularityRandom, CFModularityCNMLike +export modularity, nmi +export randompartition +export AbstractCommunityFinder, CFModularityRandom, CFModularityCNMLike, CFLabelPropagationFinder export findcommunities + +export nhv, nhe export random_walk export get_connected_components export conductance - -export random_model, random_kuniform_model, random_dregular_model, random_preferential_model +export AbstractDistance, SnodeDistanceDijkstra, SedgeDistanceDijkstra +export distance export HyperNetX, GraphBased export draw @@ -59,16 +68,22 @@ function support_hypernetx() end + include("hypergraph.jl") -include("bipartite.jl") include("io.jl") -include("twosection.jl") -include("modularity.jl") -include("conductance.jl") -include("models.jl") + +include("models/bipartite.jl") +include("models/twosection.jl") +include("models/random-models.jl") +include("models/dual.jl") + +include("algorithms/conductance.jl") +include("algorithms/distance.jl") + +include("algorithms/community/modularity.jl") +include("algorithms/community/label-propagation.jl") include("viz/drawing.jl") include("viz/widget.jl") - end # module diff --git a/src/algorithms/community/label-propagation.jl b/src/algorithms/community/label-propagation.jl new file mode 100644 index 0000000..5add7b1 --- /dev/null +++ b/src/algorithms/community/label-propagation.jl @@ -0,0 +1,189 @@ +""" + CFLabelPropagationFinder() <: AbstractCommunityFinder + +Represents a label propagation search over the hypergraph `h` that finds +a partition into communities (subsets). +""" +struct CFLabelPropagationFinder <: AbstractCommunityFinder + max_iter::Int + seed::Int +end + + +""" + findcommunities(h::Hypergraph, method::CFLabelPropagationFinder) + +Implements the label propagation algorithm over a hypergraph `h`. + +NOTE +The algorithm works on a single connected component. +An AssertionError is thrown otherwise. + +This algorithm generalizes the one proposed for graphs by Raghavan et al. +(Raghavan, U. N., Albert, R., and Kumara, S. Near linear time algorithm to detect +community structures in large-scale networks. Physical review. E, Statistical, +nonlinear, and soft matter physics 76 (2007).) + +The proposed algorithm modifies the propagation rule, splitting it into two phases: +hyperedge labeling and vertex labeling. + +For more information see `Section 4` in the paper +Alessia Antelmi, Gennaro Cordasco, Bogumił Kamiński, Paweł Prałat, +Vittorio Scarano, Carmine Spagnuolo, Przemyslaw Szufel +*Analyzing, Exploring, and Visualizing Complex Networks via Hypergraphs Using SimpleHypergraphs.jl.* +Journal Internet Mathematics (2020). https://doi.org/10.24166/im.01.2020 +""" +function findcommunities(h::Hypergraph, method::CFLabelPropagationFinder) + @assert length(get_connected_components(h)) == 1 + + rng = MersenneTwister(method.seed) + vlabels = Dict{Int64,Int64}() + helabels = Dict{Int64,Int64}() + + for v in 1:size(h)[1] + push!(vlabels, v=>v) + end + + stop = false + iter = 0 + + edges = Array{Int64}(undef, size(h)[2]) + for ie in 1:size(h)[2] + edges[ie] = ie + end + + vertices = Array{Int64}(undef, size(h)[1]) + for iv in 1:size(h)[1] + vertices[iv] = iv + end + + while !stop && iter < method.max_iter + stop = true + shuffle!(rng, edges) + + for e in edges + l = SimpleHypergraphs.compute_edge_label(h, e, vlabels, rng) + push!(helabels, e=>l) + end + + shuffle!(rng,vertices) + for v in vertices + l = SimpleHypergraphs.compute_vertex_label(h, v, vlabels, helabels, rng) + if l != vlabels[v] + stop = false + push!(vlabels, v=>l) + end + end + + iter+=1 + end + + np_vertices = unique(values(vlabels)) + np_edges = unique(values(helabels)) + + comms_vertices = Dict{Int, Set}() + comms_hyperedges = Dict{Int, Set}() + + for pv in vlabels + push!( + get!(comms_vertices, pv[2], Set{Int}()), + pv[1] + ) + end + + for pe in helabels + push!( + get!(comms_hyperedges, pe[2], Set{Int}()), + pe[1] + ) + end + + labels = Array{Int64}(undef, nhv(h)) + for i in 1:nhv(h) + labels[i] = vlabels[i] + end + + hlabels = Array{Int64}(undef, nhe(h)) + for i in 1:nhe(h) + hlabels[i] = helabels[i] + end + + (np=collect(values(comms_vertices)), hep=collect(values(comms_hyperedges)), vlabels=labels, helabels=hlabels, iter=iter) +end + + +""" + compute_vertex_label(h::Hypergraph, v::Int64, vlabels::Dict{Int64,Int64}, helabels::Dict{Int64,Int64}, rng::MersenneTwister) + +Vertices labeling phase. Computes the label of each vertex according to the most +frequent label among the hyperedges it belongs to. +""" +function compute_vertex_label(h::Hypergraph, v::Int64, vlabels::Dict{Int64,Int64}, helabels::Dict{Int64,Int64}, rng::MersenneTwister) + hesᵥ = gethyperedges(h, v) + vL = Dict{Int64,Int64}() + + max = 0 + maxL = Set{Int64}() + + for e in shuffle!(rng, collect(keys(hesᵥ))) + l = helabels[e] + + if !haskey(vL, l) + push!(vL, l=>0) + end + + push!( + vL, + l => vL[l] + (length(getvertices(h, e))) + ) + + if vL[l] == max + push!(maxL, l) + elseif vL[l] > max + max = vL[l] + maxL = Set{Int64}() + push!(maxL, l) + end + end + + if in(vlabels[v], maxL) + return vlabels[v] + end + + return collect(maxL)[1] +end + + +""" + compute_edge_label(h::Hypergraph, e::Int64, vlabels::Dict{Int64,Int64}, rng::MersenneTwister) + +Hyperedges labeling phase. Computes the labels of the hyperedges according to +the most frequent label among the vertices contained in that hyperedge. +""" +function compute_edge_label(h::Hypergraph, e::Int64, vlabels::Dict{Int64,Int64}, rng::MersenneTwister) + vₑ = getvertices(h,e) + eL = Dict{Int64,Int64}() + + max = 0 + maxL = -1 + + for v in shuffle!(rng, collect(keys(vₑ))) + l = vlabels[v] + + if !haskey(eL, l) + push!(eL, l=>0) + end + + push!( + eL, + l => eL[l]+1 + ) + + if eL[l] > max + max = eL[l] + maxL = l + end + end + + return maxL +end diff --git a/src/modularity.jl b/src/algorithms/community/modularity.jl similarity index 78% rename from src/modularity.jl rename to src/algorithms/community/modularity.jl index 6ba25e4..47465be 100644 --- a/src/modularity.jl +++ b/src/algorithms/community/modularity.jl @@ -5,6 +5,7 @@ Generates a random partition for vertices of a hypergraph `h` into `n` subsets. """ randompartition(h::Hypergraph, n::Int) = randompartition(nhv(h), n) + """ randompartition(N::Int, n::Int)::Vector{Set{Int}} @@ -18,6 +19,7 @@ function randompartition(N::Int, n::Int) res end + """ HypergraphAggs(h::Hypergraph) @@ -45,6 +47,7 @@ struct HypergraphAggs end end + """ LightGraphs.modularity(h::Hypergraph, partition::Vector{Set{Int}}, ha::HypergraphAggs=HypergraphAggs(h)) @@ -54,7 +57,7 @@ the precomputed aggregates `ha`. """ @inline function LightGraphs.modularity(h::Hypergraph, partition::Vector{Set{Int}}, ha::HypergraphAggs=HypergraphAggs(h)) - + @boundscheck sum(length.(partition)) == nhv(h) @boundscheck union(partition...) == Set(1:nhv(h)) volP_volV = [sum(ha.deg_vs[i] for i in p)/ha.volV for p in partition] @@ -64,7 +67,7 @@ end """ -The base type for all algorithms representing various community search patterns. +The base type for all algorithms representing various community search patterns. """ abstract type AbstractCommunityFinder end @@ -84,8 +87,6 @@ struct CFModularityRandom <: AbstractCommunityFinder end - - """ findcommunities(h::Hypergraph, method::CFModularityRandom) @@ -127,11 +128,10 @@ function find_first(c::Array{Set{Int}}, vals) end - """ CFModularityCNMLike(n::Int, reps::Int) <: AbstractCommunityFinder -Represents a CNM-Like algorithm for finding communities. +Represents a CNM-Like algorithm for finding communities. In the algorithm we start with a partition where each node is in its own part. Then in each step, we randomly select a hyperedge. Subsequently, we consider merging each set of that parts it touches. @@ -142,30 +142,31 @@ The algortithm iterates through `reps` of repetitions. For more information see `Algorithm 1` at: Clustering via Hypergraph Modularity (submitted to Plos ONE), auhtors: Bogumil Kaminski, Valerie Poulin, Pawel Pralat, Przemyslaw Szufel, Francois Theberge - + """ struct CFModularityCNMLike <: AbstractCommunityFinder reps::Int end + """ findcommunities(h::Hypergraph, method::CFModularityCNMLike) -Iterates a CNM-Like algorithm for finding communities. +Iterates a CNM-Like algorithm for finding communities. In the algorithm we start with a partition where each node is in its own part. -Then in each step, we randomly select a hyperedge. -Subsequently, we consider merging each set of that parts it touches. +Then in each step, we randomly select a hyperedge. +Subsequently, we consider merging each set of that parts it touches. We actually merge the parts if the new best modularity is at least as high -as the modularity from the previous step. +as the modularity from the previous step. Returns a `NamedTuple` where the field `bp` contains partition and the field `bm` contains the modularity value for that partition, -finally, the fiel `mod_history` represents modularities achieved +finally, the fiel `mod_history` represents modularities achieved in subsequent steps of the algorithm. For more information see `Algorithm 1` at: Clustering via Hypergraph Modularity (submitted to Plos ONE), authors: -Bogumil Kaminski, Valerie Poulin, Pawel Pralat, Przemyslaw Szufel, +Bogumil Kaminski, Valerie Poulin, Pawel Pralat, Przemyslaw Szufel, Francois Theberge. """ @@ -203,3 +204,62 @@ function findcommunities(h::Hypergraph, method::CFModularityCNMLike) return (bm=best_modularity, bp=comms, mod_history=mod_history) end + +""" + nmi(p1::Array{Int64}, p2::Array{Int64}) + +Evaluate the mutual information conveyed by two collections `p1` and `p2`. + +For more information see the paper +Vinh, N.X., Epps, J. and Bailey, J. +_Information theoretic measures for clusterings comparison: variants, properties, +normalization and correction for chance_ +Journal of Machine Learning Research, 2010, Vol. 11, No. 10, pp.2837–2854. +""" +function nmi(p1::Array{Int64}, p2::Array{Int64}) + hp1 = Dict{Int64,Set{Int64}}() + hp2 = Dict{Int64,Set{Int64}}() + n = length(p1) + + for i in 1:length(p1) + v = p1[i] + if !haskey(hp1, v) + push!(hp1, v=>Set{Int64}()) + end + push!(hp1[v],i) + end + + for i in 1:length(p2) + v = p2[i] + if !haskey(hp2, v) + push!(hp2, v=>Set{Int64}()) + end + push!(hp2[v],i) + end + + np1 = length(values(hp2)) + np2 = length(values(hp2)) + nhl = Dict{Pair{Int64,Int64},Int64}() + IAB = 0.0 + + for i in keys(hp1) + for j in keys(hp2) + nhl = length(intersect(hp2[j],hp1[i])) + if nhl != 0 + IAB+= nhl * log2(n * nhl / (length(hp1[i])*length(hp2[j]))) + end + end + end + + HA = 0.0 + for i in keys(hp1) + HA += length(hp1[i]) * log2(length(hp1[i])/n) + end + + HB = 0.0 + for j in keys(hp2) + HB += length(hp2[j]) * log2(length(hp2[j])/n) + end + + return - (2 * IAB) / (HA + HB) +end diff --git a/src/conductance.jl b/src/algorithms/conductance.jl similarity index 100% rename from src/conductance.jl rename to src/algorithms/conductance.jl diff --git a/src/algorithms/distance.jl b/src/algorithms/distance.jl new file mode 100644 index 0000000..9c288c4 --- /dev/null +++ b/src/algorithms/distance.jl @@ -0,0 +1,84 @@ +""" +The base type for all algorithms representing various distances metrics. +""" +abstract type AbstractDistance end + + +""" + struct SnodeDistanceDijkstra(source_node::Int, target_node::Int, s::Int) <: AbstractDistance + +Represent a distance between two nodes of the hypergraph `h`, which is +the minimum `s`-walk lenght between the two nodes. An `s`-walk between nodes +is a sequence of nodes that pairwise share at least `s` edges. +""" +struct SnodeDistanceDijkstra <: AbstractDistance + source_node::Int + target_node::Int + s::Int +end + + +""" + struct SedgeDistanceDijkstra(source_edge::Int, target_edge::Int, s::Int) <: AbstractDistance + +Represent a distance between two hyperedges of the hypergraph `h`, which is +the minimum `s`-walk lenght between the two hyperedge. An `s`-walk between edges is a sequence +of edges such that consecutive pairwise edges intersect in at least `s` nodes. +""" +struct SedgeDistanceDijkstra <: AbstractDistance + source_edge::Int + target_edge::Int + s::Int +end + + +""" + distance(h::Hypergraph, distance_method::SnodeDistanceDijkstra) + +Return the shortest `distance_method.s`-walk distance between the `distance_method.source_node` and +the node `distance_method.target_node` in the hypergraph `h`. + +NOTE +The concepts of `s`-distance and `s`-walk have been defined in the +Python library [HyperNetX](https://github.com/pnnl/HyperNetX) + +From [HyperNetX](https://pnnl.github.io/HyperNetX/build/_modules/classes/hypergraph.html#Hypergraph.distance) +The `s`-distance is the shortest `s`-walk length between two nodes. +An `s`-walk between nodes is a sequence of nodes that pairwise share +at least `s` edges. The length of the shortest `s`-walk is 1 less than +the number of nodes in the path sequence. If no such path exists returns typemax(T). +""" +function distance(h::Hypergraph, distance_method::SnodeDistanceDijkstra) + checkbounds(h.v2he, distance_method.source_node) + checkbounds(h.v2he, distance_method.target_node) + A = adjacency_matrix(h; s=distance_method.s) + g = LightGraphs.Graph(A) + dj = LightGraphs.dijkstra_shortest_paths(g, distance_method.source_node) + dj.dists[distance_method.target_node] +end + + +""" + distance(h::Hypergraph, distance_method::SedgeDistanceDijkstra) + +Return the shortest `distance_method.s`-walk distance between the `distance_method.source_edge` and +the node `distance_method.target_edge` in the hypergraph `h`. + +NOTE +The concepts of `s`-distance and `s`-walk have been defined in the +Python library [HyperNetX](https://github.com/pnnl/HyperNetX) + +From [HyperNetX](https://pnnl.github.io/HyperNetX/build/_modules/classes/hypergraph.html#Hypergraph.edge_distance) +The `s`-distance is the shortest `s`-walk length between the edges. +An `s`-walk between edges is a sequence of edges such that consecutive pairwise +edges intersect in at least `s` nodes. The length of the shortest `s`-walk is 1 less than +the number of edges in the path sequence. If no such path exists returns typemax(T). +""" +function distance(h::Hypergraph, distance_method::SedgeDistanceDijkstra) + checkbounds(h.he2v, distance_method.source_edge) + checkbounds(h.he2v, distance_method.target_edge) + A = edge_adjacency_matrix(h; s=distance_method.s) + g = LightGraphs.Graph(A) + dj = LightGraphs.dijkstra_shortest_paths(g, distance_method.source_edge) + dj.dists[distance_method.target_edge] +end diff --git a/src/hypergraph.jl b/src/hypergraph.jl index 1d5c8f6..85c61b0 100644 --- a/src/hypergraph.jl +++ b/src/hypergraph.jl @@ -166,6 +166,7 @@ Note that trying to remove a vertex from a hyperedge when it is not present will h end + """ Base.setindex!(h::Hypergraph, v::Real, idx::Vararg{Int,2}) @@ -180,6 +181,7 @@ Adds a vertex to a hyperedge (represented by indices `idx`) and assigns value h end + """ getvertices(h::Hypergraph, he_id::Int) @@ -188,6 +190,7 @@ Returns vertices from a hypergraph `a` for a given hyperedge `he_id`. """ @inline getvertices(h::Hypergraph, he_id::Int) = h.he2v[he_id] + """ gethyperedges(h::Hypergraph, v_id::Int) @@ -196,6 +199,7 @@ Returns hyperedges for a given vertex `v_id` in a hypergraph `h`. """ @inline gethyperedges(h::Hypergraph, v_id::Int) = h.v2he[v_id] + """ add_vertex!(h::Hypergraph{T, V, E, D}; hyperedges::D = D(), v_meta::Union{V,Nothing} = nothing @@ -221,6 +225,7 @@ function add_vertex!(h::Hypergraph{T, V, E, D}; ix end + """ remove_vertex!(h::Hypergraph, v::Int) @@ -274,6 +279,7 @@ function add_hyperedge!(h::Hypergraph{T, V, E, D}; ix end + """ remove_hyperedge!(h::Hypergraph, e::Int) Removes the heyperedge `e` from a given hypergraph `h`. @@ -301,11 +307,13 @@ function remove_hyperedge!(h::Hypergraph, e::Int) h end + """ prune_hypergraph!(h::Hypergraph) -Removes all vertices with degree 0 and all hyperedges of size 0. -""" +Remove all vertices with degree 0 and all hyperedges of size 0. + +""" function prune_hypergraph!(h::Hypergraph) for e in reverse(1:nhe(h)) length(h.he2v[e]) == 0 && remove_hyperedge!(h,e) @@ -316,11 +324,14 @@ function prune_hypergraph!(h::Hypergraph) h end + """ prune_hypergraph(h::Hypergraph) -Returns a pruned copy of `h`, removing all vertices with degree 0 and all hyperedges of size 0. -""" +Return a pruned copy of `h`, removing all vertices with degree 0 and +all hyperedges of size 0. + +""" function prune_hypergraph(h::Hypergraph) prune_hypergraph!(deepcopy(h)) end @@ -340,11 +351,13 @@ function set_vertex_meta!(h::Hypergraph{T, V, E, D}, h.v_meta end + """ get_vertex_meta(h::Hypergraph{T, V, E, D}, id::Int ) where {T <: Real, V, E, D <: AbstractDict{Int,T}} Returns a meta value stored at the vertex `id` in the hypergraph `h`. + """ function get_vertex_meta(h::Hypergraph{T, V, E, D}, id::Int ) where {T <: Real, V, E, D <: AbstractDict{Int,T}} @@ -352,6 +365,7 @@ function get_vertex_meta(h::Hypergraph{T, V, E, D}, id::Int h.v_meta[id] end + """ set_hyperedge_meta!(h::Hypergraph{T, V, E, D}, new_value::Union{E,Nothing}, id::Int @@ -368,6 +382,7 @@ function set_hyperedge_meta!(h::Hypergraph{T, V, E, D}, h.he_meta end + """ get_hyperedge_meta(h::Hypergraph{T, V, E, D}, id::Int) where {T <: Real, V, E, D <: AbstractDict{Int,T}} @@ -380,6 +395,7 @@ function get_hyperedge_meta(h::Hypergraph{T, V, E, D}, id::Int h.he_meta[id] end + """ nhe(h::Hypergraph) @@ -389,6 +405,7 @@ function nhe(h::Hypergraph) length(h.he2v) end + """ nhv(h::Hypergraph) @@ -398,16 +415,19 @@ function nhv(h::Hypergraph) length(h.v2he) end + function _default_heselect(h::Hypergraph, v::Int) hes = gethyperedges(h, v) sort!(collect(keys(hes))), ones(length(hes)) end + function _default_vselect(h::Hypergraph, he::Int) vs = getvertices(h, he) sort!(collect(keys(vs))), ones(length(vs)) end + """ random_walk(h::Hypergraph, start::Int; heselect::Function, vselect::Function) @@ -431,6 +451,7 @@ function random_walk(h::Hypergraph, start::Int; return sample(ves, Weights(vw)) end + """ _walk!(h::Hypergraph, s::AbstractVector{Int}, i::Int, visited::AbstractVector{Bool}) @@ -447,6 +468,7 @@ function _walk!(h::Hypergraph, s::AbstractVector{Int}, i::Int, visited::Abstract end end + """ get_connected_components(h::Hypergraph) @@ -466,5 +488,82 @@ function get_connected_components(h::Hypergraph) cc end + +""" + adjacency_matrix(h::Hypergraph; s::Int=1, weighted::Bool=false) + +The sparse weighted `s`-adjacency matrix. + +NOTE +The concept of `s`-adjacency matrix has been firstly defined in the +Python library [HyperNetX](https://github.com/pnnl/HyperNetX) + +From [HyperNetX](https://pnnl.github.io/HyperNetX/build/classes/classes.html#classes.hypergraph.Hypergraph.adjacency_matrix) +If weighted is `true` each off diagonal cell will equal the number +of edges shared by the nodes indexing the row and column if that number is +greater than `s`, otherwise the cell will equal 0. If weighted is `false`, +the off diagonal cell will equal 1 if the nodes indexed by the row and column +share at least `s` edges and 0 otherwise. + +! information about the weight of a vertex in a he will be lost. + +""" +function adjacency_matrix(h; s::Int=1, weighted::Bool=true) + M = Matrix(h) + _incidence_to_adjacency(M; s=s, weighted=weighted) +end + + +""" + edge_adjacency_matrix(h::Hypergraph; s::Int=1, weighted::Bool=false) + +The sparse weighted `s`-adjacency matrix for the dual hypergraph. + +NOTE +The concept of `s`-adjacency matrix has been firstly defined in the +Python library [HyperNetX](https://github.com/pnnl/HyperNetX) + +From [HyperNetX](https://pnnl.github.io/HyperNetX/build/classes/classes.html#classes.hypergraph.Hypergraph.edge_adjacency_matrix) +This is also the adjacency matrix for the line graph. +Two edges are `s`-adjacent if they share at least `s` nodes. + +If weighted is `true` each off diagonal cell will equal the number +of nodes shared by the hyperedges indexing the row and column if that number is +greater than `s`, otherwise the cell will equal 0. If weighted is `false`, +the off diagonal cell will equal 1 if the hyperedges indexed by the row and column +share at least `s` nodes and 0 otherwise. + +""" +function edge_adjacency_matrix(h; s::Int=1, weighted::Bool=true) + M = Matrix(h) + M[M .== nothing] .= 0 + _incidence_to_adjacency(transpose(M); s=s, weighted=weighted) +end + + +""" + _incidence_to_adjacency(M; s::Int=1, weighted::Bool=true) + +Helper method to obtain adjacency matrix from incidence matrix. + +""" +function _incidence_to_adjacency(M; s::Int=1, weighted::Bool=true) + M[M .== nothing] .= 0 + M[M .> 0] .= 1 + + A = *(M, transpose(M)) + A[diagind(A)] .= 0 + + if s > 1 + A = A .* (A .>= s) + end + if !weighted + A = (A .> 0) .* 1 + end + + A +end + + # TODO find connected components without recurrence # TODO needs validate_hypergraph!(h::Hypergraph{T}) diff --git a/src/bipartite.jl b/src/models/bipartite.jl similarity index 100% rename from src/bipartite.jl rename to src/models/bipartite.jl diff --git a/src/models/dual.jl b/src/models/dual.jl new file mode 100644 index 0000000..3672ba2 --- /dev/null +++ b/src/models/dual.jl @@ -0,0 +1,27 @@ + +""" + dual(h::Hypergraph) + +Return the dual of the hypergraph `h`. + +NOTE +`h` needs to have at least one dimension greater than 0. +""" + +function dual(h::Hypergraph) + @assert(nhv(h)>0 || nhe(h)>0) + + T = nhv(h) > 0 ? eltype(values(h.v2he[1])) : eltype(values(h.he2v[1])) + V = isa(eltype(h.he_meta), Union) ? eltype(h.he_meta).b : Nothing + E = isa(eltype(h.v_meta), Union) ? eltype(h.v_meta).b : Nothing + + mx = Matrix{Union{Nothing,T}}(nothing, nhe(h), nhv(h)) + + for v=1:nhv(h) + for he in keys(h.v2he[v]) + mx[he, v] = h.v2he[v][he] + end + end + + Hypergraph{T, V, E}(mx; v_meta=h.he_meta, he_meta=h.v_meta) +end diff --git a/src/models.jl b/src/models/random-models.jl similarity index 100% rename from src/models.jl rename to src/models/random-models.jl diff --git a/src/twosection.jl b/src/models/twosection.jl similarity index 100% rename from src/twosection.jl rename to src/models/twosection.jl diff --git a/test/runtests.jl b/test/runtests.jl index 0a8d5f2..20ffdba 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -15,37 +15,8 @@ h1[4,3:4] .= 4.5 h1[5,4] = 5.5 h1[5,2] = 6.5 -@testset "SimpleHypergraphs building models " begin - Hᵣ = random_model(5,5) - @test nhv(Hᵣ) == 5 - @test nhe(Hᵣ) == 5 - @test all(length.(Hᵣ.v2he) .> 0) - @test all(length.(Hᵣ.v2he) .<= 5) - - Hᵣ2 = random_model(5,0) - add_hyperedge!(Hᵣ2;vertices=Dict(2 => true, 4 => true)) - @test nhv(Hᵣ2) == 5 - @test nhe(Hᵣ2) == 1 - - Hᵣ3 = random_model(5,1) - @test nhe(Hᵣ3) == 1 - - Hκ = random_kuniform_model(5, 5, 3) - @test nhv(Hκ) == 5 - @test nhe(Hκ) == 5 - @test all(length.(Hκ.he2v) .== 3) - - Hδ = random_dregular_model(5, 5, 3) - @test nhv(Hδ) == 5 - @test nhe(Hδ) == 5 - @test all(length.(Hδ.v2he) .== 3) - - H∂ = random_preferential_model(20, 0.5) - @test nhv(H∂) == 20 -end; - -@testset "SimpleHypergraphs Hypergraph " begin +@testset "SimpleHypergraphs Hypergraph " begin h = hg_load("data/test1.hgf"; T=Int) @test size(h) == (4, 4) @@ -185,7 +156,8 @@ end; @test size(h1_1)[2] == 3 end; -@testset "SimpleHypergraphs BipartiteView " begin + +@testset "SimpleHypergraphs BipartiteView " begin h2 = deepcopy(h1) @test LightGraphs.nv(LightGraphs.zero(BipartiteView{Int})) == 0 @@ -231,7 +203,8 @@ end; @test sort!(LightGraphs.SimpleGraphs.fadj(b,2)) == [7,9] end; -@testset "SimpleHypergraphs TwoSectionView " begin + +@testset "SimpleHypergraphs TwoSectionView " begin ht = Hypergraph{Float64}(3,3) ht[1:2,1:2] .= 2. @@ -290,8 +263,38 @@ end; end; +@testset "SimpleHypergraphs random-models " begin + + Hᵣ = random_model(5,5) + @test nhv(Hᵣ) == 5 + @test nhe(Hᵣ) == 5 + @test all(length.(Hᵣ.v2he) .> 0) + @test all(length.(Hᵣ.v2he) .<= 5) + + Hᵣ2 = random_model(5,0) + add_hyperedge!(Hᵣ2;vertices=Dict(2 => true, 4 => true)) + @test nhv(Hᵣ2) == 5 + @test nhe(Hᵣ2) == 1 + + Hᵣ3 = random_model(5,1) + @test nhe(Hᵣ3) == 1 + + Hκ = random_kuniform_model(5, 5, 3) + @test nhv(Hκ) == 5 + @test nhe(Hκ) == 5 + @test all(length.(Hκ.he2v) .== 3) -@testset "SimpleHypergraphs Modularity " begin + Hδ = random_dregular_model(5, 5, 3) + @test nhv(Hδ) == 5 + @test nhe(Hδ) == 5 + @test all(length.(Hδ.v2he) .== 3) + + H∂ = random_preferential_model(20, 0.5) + @test nhv(H∂) == 20 +end; + + +@testset "SimpleHypergraphs modularity " begin Random.seed!(1234); hg = Hypergraph{Bool}(10, 12) for i in eachindex(hg) @@ -338,11 +341,10 @@ end; @test findcommunities(hh, CFModularityRandom(4,10000)).bm ≈ findcommunities(hh, cnm).bm Random.seed!(0); @test findcommunities(hh, cnm).bm ≈ 223/972 +end; -end; - # -@testset "SimpleHypergraphs randomized tests" begin +@testset "SimpleHypergraphs randomized tests " begin Random.seed!(0) N = 100 res = Vector{Bool}(undef, N) @@ -358,7 +360,30 @@ end; @test sum(res) >= N*0.80 end -@testset "SimpleHypergraphs randomwalk " begin + +@testset "SimpleHypergraphs label propagation " begin + Random.seed!(1234); + hg = Hypergraph{Bool}(10, 12) + for i in eachindex(hg) + if rand() < 0.2 + hg[i] = true + end + end + + cflp = CFLabelPropagationFinder(100, 1234) + @test_throws AssertionError findcommunities(hg, cflp) + + h = Hypergraph(11, 2) + h[1:5, 1] .= true + h[5:11, 2] .= true + + comms = findcommunities(h, cflp) + @test comms.np == [Set([7, 9, 10, 11, 8, 5, 6]), Set([4, 2, 3, 1])] + @test comms.hep == Set[Set([2]), Set([1])] +end; + + +@testset "SimpleHypergraphs randomwalk " begin h1 = Hypergraph{Float64}(5,4) h1[1:3,1] .= 1.5 h1[3,4] = 2.5 @@ -381,6 +406,7 @@ end @test_throws ArgumentError random_walk(h1, 0) end + @testset "SimpleHypergraphs connected components" begin bip = LightGraphs.SimpleGraph(BipartiteView(h1)) cc = LightGraphs.connected_components(bip) @@ -392,7 +418,8 @@ end @test typeof(cc2) == Vector{Vector{Int}} end -@testset "SimpleHypergraphs hypernetx bridge" begin + +@testset "SimpleHypergraphs hypernetx bridge " begin if (!SimpleHypergraphs.support_hypernetx()) @warn "HyperNetX is not installed. Skipping hypernetx tests" @@ -432,7 +459,8 @@ end @test SimpleHypergraphs.get_next_div_id() == 2 end; -@testset "SimpleHypergraphs conductance" begin + +@testset "SimpleHypergraphs conductance " begin h = Hypergraph{Float64, Int}(5,4) h[1:3,1] .= 1 h[3,4] = 1 @@ -458,3 +486,117 @@ end; @test_throws ErrorException SimpleHypergraphs.conductance(h, Set{Int}()) @test_throws ErrorException SimpleHypergraphs.conductance(h, Set(1:nhv(h))) end; + + +@testset "SimpleHypergraphs dual " begin + m = [ + 1 nothing nothing 4 + 1 2 3 nothing + 1 2 3 4 + nothing 2 3 nothing + nothing nothing 3 nothing + nothing nothing nothing 4 + ] + + v_meta = Array{Union{Nothing, Char}, 1}(collect('a':'f')) + he_meta = Array{Union{Nothing, Symbol}, 1}(Symbol.(collect('A':'D'))) + + h = Hypergraph{Int, Char, Symbol}(m; v_meta=v_meta, he_meta=he_meta) + h_dual = dual(h) + + @test nhv(h_dual) == nhe(h) + @test nhe(h_dual) == nhv(h) + + @test h.v_meta == h_dual.he_meta + @test h.he_meta == h_dual.v_meta + + m_dual = Matrix(h_dual) + m_dual[m_dual .== nothing] .= 0 + m[m .== nothing] .= 0 + + @test m == transpose(m_dual) + + @test_throws AssertionError dual(Hypergraph(0, 0)) +end; + + +@testset "SimpleHypergraphs s-distance " begin + h = Hypergraph{Int}(6,4) + + h[1:3, 1] .= 1 + h[2:4, 2] .= 2 + h[2:5, 3] .= 3 + h[1, 4] = 4 + h[3, 4] = 4 + h[6, 4] = 4 + + @test adjacency_matrix(Matrix(h); s=1) == [ + 0 1 2 0 0 1 + 1 0 3 2 1 0 + 2 3 0 2 1 1 + 0 2 2 0 1 0 + 0 1 1 1 0 0 + 1 0 1 0 0 0 + ] + + @test adjacency_matrix(Matrix(h); s=1, weighted=false) == [ + 0 1 1 0 0 1 + 1 0 1 1 1 0 + 1 1 0 1 1 1 + 0 1 1 0 1 0 + 0 1 1 1 0 0 + 1 0 1 0 0 0 + ] + + @test edge_adjacency_matrix(h; s=1, weighted=false) == [ + 0 1 1 1 + 1 0 1 1 + 1 1 0 1 + 1 1 1 0 + ] + + @test edge_adjacency_matrix(h; s=1) == [ + 0 2 2 2 + 2 0 3 1 + 2 3 0 1 + 2 1 1 0 + ] + + @test adjacency_matrix(Matrix(h); s=2) == [ + 0 0 2 0 0 0 + 0 0 3 2 0 0 + 2 3 0 2 0 0 + 0 2 2 0 0 0 + 0 0 0 0 0 0 + 0 0 0 0 0 0 + ] + + + @test distance(h, SnodeDistanceDijkstra(1, 5, 1)) == 2 + @test distance(h, SnodeDistanceDijkstra(3, 6, 1)) == 1 + + @test distance(h, SnodeDistanceDijkstra(1, 2, 2)) == 2 + @test distance(h, SnodeDistanceDijkstra(1, 6, 2)) == typemax(Int) + + @test distance(h, SedgeDistanceDijkstra(1, 3, 1)) == 1 + @test distance(h, SedgeDistanceDijkstra(2, 3, 3)) == 1 + @test distance(h, SedgeDistanceDijkstra(1, 3, 3)) == typemax(Int) +end; + + +@testset "SimpleHypergraphs nmi " begin + h = Hypergraph{Int}(6,4) + + h[1:3, 1] .= 1 + h[2:4, 2] .= 2 + h[2:5, 3] .= 3 + h[1, 4] = 4 + h[3, 4] = 4 + h[6, 4] = 4 + + cflp = CFLabelPropagationFinder(100, 1234) + comms_lp = findcommunities(h, cflp) + + @test nmi(comms_lp.vlabels, fill(1, 5)) < 0.1 + @test abs(nmi(comms_lp.vlabels, comms_lp.vlabels) - 1) < 1e-15 +end;