diff --git a/Manifest.toml b/Manifest.toml new file mode 100644 index 0000000..44c844d --- /dev/null +++ b/Manifest.toml @@ -0,0 +1,278 @@ +# This file is machine-generated - editing it directly is not advised + +julia_version = "1.11.0" +manifest_format = "2.0" +project_hash = "13d218d6d2706fb3bd517429ded6424f857ee091" + +[[deps.ArgParse]] +deps = ["Logging", "TextWrap"] +git-tree-sha1 = "22cf435ac22956a7b45b0168abbc871176e7eecc" +uuid = "c7e460c6-2fb9-53a9-8c5b-16f535851c63" +version = "1.2.0" + +[[deps.ArgTools]] +uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f" +version = "1.1.2" + +[[deps.Artifacts]] +uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33" +version = "1.11.0" + +[[deps.Base64]] +uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f" +version = "1.11.0" + +[[deps.Compat]] +deps = ["TOML", "UUIDs"] +git-tree-sha1 = "8ae8d32e09f0dcf42a36b90d4e17f5dd2e4c4215" +uuid = "34da2185-b29b-5c13-b0c7-acf172513d20" +version = "4.16.0" +weakdeps = ["Dates", "LinearAlgebra"] + + [deps.Compat.extensions] + CompatLinearAlgebraExt = "LinearAlgebra" + +[[deps.CompilerSupportLibraries_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae" +version = "1.1.1+0" + +[[deps.DataAPI]] +git-tree-sha1 = "abe83f3a2f1b857aac70ef8b269080af17764bbe" +uuid = "9a962f9c-6df0-11e9-0e5d-c546b8b5ee8a" +version = "1.16.0" + +[[deps.DataStructures]] +deps = ["Compat", "InteractiveUtils", "OrderedCollections"] +git-tree-sha1 = "1d0a14036acb104d9e89698bd408f63ab58cdc82" +uuid = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" +version = "0.18.20" + +[[deps.Dates]] +deps = ["Printf"] +uuid = "ade2ca70-3891-5945-98fb-dc099432e06a" +version = "1.11.0" + +[[deps.DocStringExtensions]] +deps = ["LibGit2"] +git-tree-sha1 = "2fb1e02f2b635d0845df5d7c167fec4dd739b00d" +uuid = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae" +version = "0.9.3" + +[[deps.Downloads]] +deps = ["ArgTools", "FileWatching", "LibCURL", "NetworkOptions"] +uuid = "f43a241f-c20a-4ad4-852c-f6b1247861c6" +version = "1.6.0" + +[[deps.FileWatching]] +uuid = "7b1f6079-737a-58dc-b8bc-7a2ca5c1b5ee" +version = "1.11.0" + +[[deps.InteractiveUtils]] +deps = ["Markdown"] +uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240" +version = "1.11.0" + +[[deps.IrrationalConstants]] +git-tree-sha1 = "630b497eafcc20001bba38a4651b327dcfc491d2" +uuid = "92d709cd-6900-40b7-9082-c6be49f344b6" +version = "0.2.2" + +[[deps.LibCURL]] +deps = ["LibCURL_jll", "MozillaCACerts_jll"] +uuid = "b27032c2-a3e7-50c8-80cd-2d36dbcbfd21" +version = "0.6.4" + +[[deps.LibCURL_jll]] +deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll", "Zlib_jll", "nghttp2_jll"] +uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0" +version = "8.6.0+0" + +[[deps.LibGit2]] +deps = ["Base64", "LibGit2_jll", "NetworkOptions", "Printf", "SHA"] +uuid = "76f85450-5226-5b5a-8eaa-529ad045b433" +version = "1.11.0" + +[[deps.LibGit2_jll]] +deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll"] +uuid = "e37daf67-58a4-590a-8e99-b0245dd2ffc5" +version = "1.7.2+0" + +[[deps.LibSSH2_jll]] +deps = ["Artifacts", "Libdl", "MbedTLS_jll"] +uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8" +version = "1.11.0+1" + +[[deps.Libdl]] +uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb" +version = "1.11.0" + +[[deps.LinearAlgebra]] +deps = ["Libdl", "OpenBLAS_jll", "libblastrampoline_jll"] +uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +version = "1.11.0" + +[[deps.LogExpFunctions]] +deps = ["DocStringExtensions", "IrrationalConstants", "LinearAlgebra"] +git-tree-sha1 = "a2d09619db4e765091ee5c6ffe8872849de0feea" +uuid = "2ab3a3ac-af41-5b50-aa03-7779005ae688" +version = "0.3.28" + + [deps.LogExpFunctions.extensions] + LogExpFunctionsChainRulesCoreExt = "ChainRulesCore" + LogExpFunctionsChangesOfVariablesExt = "ChangesOfVariables" + LogExpFunctionsInverseFunctionsExt = "InverseFunctions" + + [deps.LogExpFunctions.weakdeps] + ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" + ChangesOfVariables = "9e997f8a-9a97-42d5-a9f1-ce6bfc15e2c0" + InverseFunctions = "3587e190-3f89-42d0-90ee-14403ec27112" + +[[deps.Logging]] +uuid = "56ddb016-857b-54e1-b83d-db4d58db5568" +version = "1.11.0" + +[[deps.Markdown]] +deps = ["Base64"] +uuid = "d6f4376e-aef5-505a-96c1-9c027394607a" +version = "1.11.0" + +[[deps.MbedTLS_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1" +version = "2.28.6+0" + +[[deps.Missings]] +deps = ["DataAPI"] +git-tree-sha1 = "ec4f7fbeab05d7747bdf98eb74d130a2a2ed298d" +uuid = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28" +version = "1.2.0" + +[[deps.MozillaCACerts_jll]] +uuid = "14a3606d-f60d-562e-9121-12d972cd8159" +version = "2023.12.12" + +[[deps.NetworkOptions]] +uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908" +version = "1.2.0" + +[[deps.OpenBLAS_jll]] +deps = ["Artifacts", "CompilerSupportLibraries_jll", "Libdl"] +uuid = "4536629a-c528-5b80-bd46-f80d51c5b363" +version = "0.3.27+1" + +[[deps.OrderedCollections]] +git-tree-sha1 = "dfdf5519f235516220579f949664f1bf44e741c5" +uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" +version = "1.6.3" + +[[deps.Pkg]] +deps = ["Artifacts", "Dates", "Downloads", "FileWatching", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "Random", "SHA", "TOML", "Tar", "UUIDs", "p7zip_jll"] +uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" +version = "1.11.0" + + [deps.Pkg.extensions] + REPLExt = "REPL" + + [deps.Pkg.weakdeps] + REPL = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb" + +[[deps.Printf]] +deps = ["Unicode"] +uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7" +version = "1.11.0" + +[[deps.Random]] +deps = ["SHA"] +uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" +version = "1.11.0" + +[[deps.SHA]] +uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce" +version = "0.7.0" + +[[deps.Serialization]] +uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b" +version = "1.11.0" + +[[deps.SortingAlgorithms]] +deps = ["DataStructures"] +git-tree-sha1 = "66e0a8e672a0bdfca2c3f5937efb8538b9ddc085" +uuid = "a2af1166-a08f-5f64-846c-94a0d3cef48c" +version = "1.2.1" + +[[deps.SparseArrays]] +deps = ["Libdl", "LinearAlgebra", "Random", "Serialization", "SuiteSparse_jll"] +uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" +version = "1.11.0" + +[[deps.Statistics]] +deps = ["LinearAlgebra"] +git-tree-sha1 = "ae3bb1eb3bba077cd276bc5cfc337cc65c3075c0" +uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" +version = "1.11.1" +weakdeps = ["SparseArrays"] + + [deps.Statistics.extensions] + SparseArraysExt = ["SparseArrays"] + +[[deps.StatsAPI]] +deps = ["LinearAlgebra"] +git-tree-sha1 = "1ff449ad350c9c4cbc756624d6f8a8c3ef56d3ed" +uuid = "82ae8749-77ed-4fe6-ae5f-f523153014b0" +version = "1.7.0" + +[[deps.StatsBase]] +deps = ["DataAPI", "DataStructures", "LinearAlgebra", "LogExpFunctions", "Missings", "Printf", "Random", "SortingAlgorithms", "SparseArrays", "Statistics", "StatsAPI"] +git-tree-sha1 = "5cf7606d6cef84b543b483848d4ae08ad9832b21" +uuid = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" +version = "0.34.3" + +[[deps.SuiteSparse_jll]] +deps = ["Artifacts", "Libdl", "libblastrampoline_jll"] +uuid = "bea87d4a-7f5b-5778-9afe-8cc45184846c" +version = "7.7.0+0" + +[[deps.TOML]] +deps = ["Dates"] +uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76" +version = "1.0.3" + +[[deps.Tar]] +deps = ["ArgTools", "SHA"] +uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e" +version = "1.10.0" + +[[deps.TextWrap]] +git-tree-sha1 = "43044b737fa70bc12f6105061d3da38f881a3e3c" +uuid = "b718987f-49a8-5099-9789-dcd902bef87d" +version = "1.0.2" + +[[deps.UUIDs]] +deps = ["Random", "SHA"] +uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" +version = "1.11.0" + +[[deps.Unicode]] +uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5" +version = "1.11.0" + +[[deps.Zlib_jll]] +deps = ["Libdl"] +uuid = "83775a58-1f1d-513f-b197-d71354ab007a" +version = "1.2.13+1" + +[[deps.libblastrampoline_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "8e850b90-86db-534c-a0d3-1478176c7d93" +version = "5.11.0+0" + +[[deps.nghttp2_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d" +version = "1.59.0+0" + +[[deps.p7zip_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0" +version = "17.4.0+2" diff --git a/src/ABCDGraphGenerator.jl b/src/ABCDGraphGenerator.jl index d0a7a62..d977455 100644 --- a/src/ABCDGraphGenerator.jl +++ b/src/ABCDGraphGenerator.jl @@ -5,6 +5,7 @@ using StatsBase using ArgParse include("pl_sampler.jl") +include("community_sampler.jl") include("graph_sampler.jl") end # module diff --git a/src/community_sampler.jl b/src/community_sampler.jl new file mode 100644 index 0000000..c4c24ae --- /dev/null +++ b/src/community_sampler.jl @@ -0,0 +1,73 @@ +using Random +using StatsBase + +function sample_points(n) + points = randn(n, 2) + points ./= sqrt.(sum(x -> x^2, points, dims=2)) + points .*= rand(n) .^ 0.5 + return points +end + +function assign_points(x, c, p) + @assert ndims(x) == 2 + @assert sum(c) == size(x, 1) + @assert length(c) == length(p) + x = copy(x) + all_idxs = collect(1:size(x, 1)) + dist = vec(sum(x -> x^2, x, dims=2)) + res = Vector{Vector{Int}}(UndefInitializer(), length(c)) + for idx in p + com = c[idx] + ind = argmax(dist) + ref = x[ind, :] + dist_c = [sum(x -> x^2, (r - ref)) for r in eachrow(x)] + idxs = partialsortperm(dist_c, 1:com) + res[idx] = all_idxs[idxs] + to_keep = setdiff(1:size(x, 1), idxs) + x = x[to_keep, :] + dist = dist[to_keep] + all_idxs = all_idxs[to_keep] + end + @assert size(x, 1) == 0 + @assert length(all_idxs) == 0 + @assert length(dist) == 0 + @assert sort(union(res...)) == 1:sum(c) + return res +end + +# note that this function returns node numbers from 1 to number_of_non_outlier_nodes +# for each community we get a set of nodes assigned to it +function populate_overlapping_clusters(coms::Vector{Int}, η::Float64) + true_coms = coms[2:end] # we are interested only in non-outlier communities + grow_coms = [randround(s * η) for s in true_coms] # this is a target size of communities, as coms is primary community sizes + p = randperm(length(true_coms)) # order in which communities are handled + n = sum(true_coms) + x = sample_points(n) + a = assign_points(x, true_coms, p) + @assert length.(a) == true_coms + @assert sum(length, a) == sum(true_coms) + + # below we grow communities + @assert length(a) == length(grow_coms) + for (com, target) in zip(a, grow_coms) + community_center = vec(mean(x[com], dims=1)) + distances = [sum((v .- community_center) .^ 2) for v in eachrow(x)] + ordering = sortperm(distances) + com_set = Set(com) + loc = 1 + while length(com) < target + if loc > length(ordering) + throw(ArgumentError("η was too large")) + end + point = ordering[loc] + if !(point in com_set) + push!(com, point) + end + loc += 1 + end + end + @assert length.(a) == grow_coms + @assert sum(length, a) == sum(grow_coms) + @assert all(allunique(c) for c in a) + return [Set(c) for c in a] +end \ No newline at end of file diff --git a/src/graph_sampler.jl b/src/graph_sampler.jl index 965c834..b7dc112 100644 --- a/src/graph_sampler.jl +++ b/src/graph_sampler.jl @@ -2,59 +2,39 @@ ABCDParams A structure holding parameters for ABCD graph generator. Fields: -* w::Vector{Int}: a sorted in descending order list of vertex degrees -* s::Vector{Int}: a sorted in descending order list of cluster sizes - if hasoutliers then first community is outliers -* μ::Union{Float64, Nothing}: mixing parameter -* ξ::Union{Float64, Nothing}: background graph fraction -* isCL::Bool: if `true` a Chung-Lu model is used, otherwise configuration model -* islocal::Bool: if `true` mixing parameter restriction is cluster local, otherwise - it is only global -* hasoutliers::Bool: if first community is outliers - -Exactly one of ξ and μ must be passed as `Float64`. Also if `ξ` is passed then -`islocal` must be `false`. - -The base ABCD graph is generated when ξ is passed and `isCL` is set to `false`. +* w::Vector{Int}: a sorted in descending order list of vertex degrees +* s::Vector{Int}: a sorted in descending order list of cluster sizes + (except first community which is outliers); + cluster sizes must be for primary community +* ξ::Float64: background graph fraction +* η::Float64: average number of communities a non-outlier node is part of; default 1 + +The graph will be generated using configuration model global approach. """ struct ABCDParams w::Vector{Int} s::Vector{Int} - μ::Union{Float64, Nothing} - ξ::Union{Float64, Nothing} - isCL::Bool - islocal::Bool - hasoutliers::Bool + ξ::Float64 + η::Float64 - function ABCDParams(w, s, μ, ξ, isCL, islocal, hasoutliers=false) + function ABCDParams(w::Vector{Int}, s::Vector{Int}, ξ::Float64, η::Float64) + all(>(0), w) || throw(ArgumentError("all degrees must be positive")) length(w) == sum(s) || throw(ArgumentError("inconsistent data")) - if !isnothing(μ) - 0 ≤ μ ≤ 1 || throw(ArgumentError("inconsistent data on μ")) - end - if !isnothing(ξ) - 0 ≤ ξ ≤ 1 || throw(ArgumentError("inconsistent data ξ")) - if islocal - throw(ArgumentError("when ξ is provided local model is not allowed")) - end - end - if isnothing(μ) && isnothing(ξ) - throw(ArgumentError("inconsistent data: either μ or ξ must be provided")) - end + length(s) < 2 && throw(ArgumentError("no communities requested")) + s[1] >= 0 || throw(ArgumentError("negative count of outliers passed")) + 0 ≤ ξ ≤ 1 || throw(ArgumentError("inconsistent data ξ")) + η < 1 && throw(ArgumentError("η must be greater or equal than 1")) - if !(isnothing(μ) || isnothing(ξ)) - throw(ArgumentError("inconsistent data: only μ or ξ may be provided")) - end + news = copy(s) + all(>(0), @view(news[2:end])) || throw(ArgumentError("all community sizes must be positive")) + sort!(@view(news[2:end]), rev=true) - if hasoutliers - news = copy(s) - sort!(@view(news[2:end]), rev=true) - else - news = sort(s, rev=true) + largest = news[2] # size of largest non-outlier community + if η * largest > length(w) - news[1] + throw(ArgumentError("η must be small enough so that overlapping communities are not too big")) end - new(sort(w, rev=true), - news, - μ, ξ, isCL, islocal, hasoutliers) + new(sort(w, rev=true), news, ξ, η) end end @@ -65,311 +45,81 @@ end function populate_clusters(params::ABCDParams) w, s = params.w, params.s - if isnothing(params.ξ) - mul = 1.0 - params.μ - else - n = length(w) - if params.hasoutliers - s0 = s[1] - n = length(params.w) - ϕ = 1.0 - sum((sl/(n-s0))^2 for sl in s[2:end]) * (n-s0)*params.ξ / ((n-s0)*params.ξ + s0) - else - ϕ = 1.0 - sum((sl/n)^2 for sl in s) - end - mul = 1.0 - params.ξ*ϕ - end + + n = length(w) + s0 = s[1] + ϕ = 1.0 - sum((sl/(n-s0))^2 for sl in s[2:end]) * (n-s0)*params.ξ / ((n-s0)*params.ξ + s0) + mul = 1.0 - params.ξ*ϕ + @assert length(w) == sum(s) @assert 0 ≤ mul ≤ 1 @assert issorted(w, rev=true) - if params.hasoutliers - @assert issorted(s[2:end], rev=true) - else - @assert issorted(s, rev=true) - end - - slots = copy(s) - clusters = fill(-1, length(w)) - - if params.hasoutliers - nout = s[1] - n = length(params.w) - L = sum(d -> min(1.0, params.ξ * d), params.w) - threshold = L + nout - L * nout / n - 1.0 - idx = findfirst(<=(threshold), params.w) - @assert all(i -> params.w[i] <= threshold, idx:n) - if length(idx:n) < nout - throw(ArgumentError("not enough nodes feasible for classification as outliers")) - end - tabu = sample(idx:n, nout, replace=false) - clusters[tabu] .= 1 - slots[1] = 0 - stabu = Set(tabu) - else - stabu = Set{Int}() - end - - j0 = params.hasoutliers ? 1 : 0 - j = j0 - for (i, vw) in enumerate(w) - i in stabu && continue - while j + 1 ≤ length(s) && mul * vw + 1 ≤ s[j + 1] - j += 1 - end - j == j0 && throw(ArgumentError("could not find a large enough cluster for vertex of weight $vw")) - wts = Weights(view(slots, (j0+1):j)) - wts.sum == 0 && throw(ArgumentError("could not find an empty slot for vertex of weight $vw")) - loc = sample((j0+1):j, wts) - clusters[i] = loc - slots[loc] -= 1 - end - @assert sum(slots) == 0 - @assert minimum(clusters) == 1 - return clusters -end + @assert issorted(s[2:end], rev=true) -function CL_model(clusters, params) - @assert !params.hasoutliers - @assert params.isCL - w, s, μ = params.w, params.s, params.μ - cluster_weight = zeros(Int, length(s)) - for i in axes(w, 1) - cluster_weight[clusters[i]] += w[i] - end - total_weight = sum(cluster_weight) - if params.islocal - ξl = @. μ / (1.0 - cluster_weight / total_weight) - maximum(ξl) >= 1 && throw(ArgumentError("μ is too large to generate a graph")) - else - if isnothing(params.ξ) - ξg = μ / (1.0 - sum(x -> x^2, cluster_weight) / total_weight^2) - ξg >= 1 && throw(ArgumentError("μ is too large to generate a graph")) - else - ξg = params.ξ - end - end + slots = copy(s) # number of slots left in a community to be assigned + clusters = [Int[] for i in 1:length(w)] # primary cluster of a node, [1] is outlier community, Int[] is no community yet - wf = float.(w) - edges = Set{Tuple{Int, Int}}() - for i in axes(s, 1) - local_edges = Set{Tuple{Int, Int}}() - idxᵢ = findall(==(i), clusters) - wᵢ = wf[idxᵢ] - ξ = params.islocal ? ξl[i] : ξg - m = randround((1-ξ) * sum(wᵢ) / 2) - ww = Weights(wᵢ) - while length(local_edges) < m - a = sample(idxᵢ, ww, m - length(local_edges)) - b = sample(idxᵢ, ww, m - length(local_edges)) - for (p, q) in zip(a, b) - p != q && push!(local_edges, minmax(p, q)) - end - end - union!(edges, local_edges) - end - wwt = if params.islocal - Weights([ξl[clusters[i]]*x for (i,x) in enumerate(wf)]) - else - Weights(ξg * wf) + # handle outliers + nout = s[1] + n = length(params.w) + L = sum(d -> min(1.0, params.ξ * d), params.w) + threshold = L + nout - L * nout / n - 1.0 # we cannot put too heavy nodes as outliers + idx = findfirst(<=(threshold), params.w) + @assert all(i -> params.w[i] <= threshold, idx:n) + if length(idx:n) < nout + throw(ArgumentError("not enough nodes feasible for classification as outliers")) end - while 2*length(edges) < total_weight - a = sample(axes(w, 1), wwt, randround(total_weight / 2) - length(edges)) - b = sample(axes(w, 1), wwt, randround(total_weight / 2) - length(edges)) - for (p, q) in zip(a, b) - p != q && push!(edges, minmax(p, q)) - end + tabu = sample(idx:n, nout, replace=false) + for i in tabu + push!(clusters[i], 1) # outlier community end - edges -end + stabu = Set(tabu) # stabu is a set of indices already used up -function config_model(clusters, params) - @assert !params.isCL - @assert !params.islocal - w, s, μ = params.w, params.s, params.μ - - cluster_weight = zeros(Int, length(s)) - for i in axes(w, 1) - cluster_weight[clusters[i]] += w[i] - end - total_weight = sum(cluster_weight) - if params.islocal - ξl = @. μ / (1.0 - cluster_weight / total_weight) - maximum(ξl) >= 1 && throw(ArgumentError("μ is too large to generate a graph")) - w_internal_raw = [w[i] * (1 - ξl[clusters[i]]) for i in axes(w, 1)] - else - if isnothing(params.ξ) - @assert !params.hasoutliers - ξg = μ / (1.0 - sum(x -> x^2, cluster_weight) / total_weight^2) - ξg >= 1 && throw(ArgumentError("μ is too large to generate a graph")) - else - ξg = params.ξ - end - w_internal_raw = [w[i] * (1 - ξg) for i in axes(w, 1)] - if params.hasoutliers - for i in findall(==(1), clusters) - w_internal_raw[i] = 0 - end - end - end + # handle normal communities + # note that numbers assigned to communities are from 1 to sum(slots[2:end]) so remapping is needed later + slots_less_1 = slots[2:end] + cluster_assignments = populate_overlapping_clusters(slots, params.η) + ηu = [count(c -> i in c, cluster_assignments) for i in 1:sum(slots_less_1)] # count in how many communities each number belongs + @assert minimum(ηu) >= 1 + min_com = [minimum(length(c) - 1 for c in cluster_assignments if i in c) for i in 1:sum(slots_less_1)] # minimum size of a community less one for each number + max_degree = (ηu .* min_com) / mul - clusterlist = [Int[] for i in axes(s, 1)] - for i in axes(clusters, 1) - push!(clusterlist[clusters[i]], i) + for (i, vw) in enumerate(w) + i in stabu && continue # skip nodes in outlier community + good_idxs = findall(md -> vw <= md, max_degree) # later make it faster, but for now leave a simple implementation + isempty(good_idxs) && throw(ArgumentError("could not find a large enough cluster for vertex of weight $vw with index $i")) + chosen_idx = rand(good_idxs) + clusters[i] = findall(c -> chosen_idx in c, cluster_assignments) .+ 1 # write down cluster numbers of chosen node; need to add 1 as first cluster is for outliers + max_degree[chosen_idx] = -1 # make sure we will not use chosen_idx later; note that this needs refactoring if the code is optimized for speed later end - edges = Set{Tuple{Int, Int}}() - - unresolved_collisions = 0 - w_internal = zeros(Int, length(w_internal_raw)) - for cluster in clusterlist - maxw_idx = argmax(view(w_internal_raw, cluster)) - wsum = 0 - for i in axes(cluster, 1) - if i != maxw_idx - neww = randround(w_internal_raw[cluster[i]]) - w_internal[cluster[i]] = neww - wsum += neww - end - end - maxw = floor(Int, w_internal_raw[cluster[maxw_idx]]) - w_internal[cluster[maxw_idx]] = maxw + (isodd(wsum) ? iseven(maxw) : isodd(maxw)) - if w_internal[cluster[maxw_idx]] > w[cluster[maxw_idx]] - @assert w[cluster[maxw_idx]] + 1 == w_internal[cluster[maxw_idx]] - w[cluster[maxw_idx]] += 1 - end - - if params.hasoutliers && cluster === clusterlist[1] - @assert findall(clusters .== 1) == cluster - @assert all(iszero, w_internal[cluster]) - end - stubs = Int[] - for i in cluster - for j in 1:w_internal[i] - push!(stubs, i) - end - end - @assert sum(w_internal[cluster]) == length(stubs) - @assert iseven(length(stubs)) - if params.hasoutliers && cluster === clusterlist[1] - @assert isempty(stubs) - end - shuffle!(stubs) - local_edges = Set{Tuple{Int, Int}}() - recycle = Tuple{Int,Int}[] - for i in 1:2:length(stubs) - e = minmax(stubs[i], stubs[i+1]) - if (e[1] == e[2]) || (e in local_edges) - push!(recycle, e) - else - push!(local_edges, e) - end - end - last_recycle = length(recycle) - recycle_counter = last_recycle - while !isempty(recycle) - recycle_counter -= 1 - if recycle_counter < 0 - if length(recycle) < last_recycle - last_recycle = length(recycle) - recycle_counter = last_recycle - else - break - end - end - p1 = popfirst!(recycle) - from_recycle = 2 * length(recycle) / length(stubs) - success = false - for _ in 1:2:length(stubs) - p2 = if rand() < from_recycle - used_recycle = true - recycle_idx = rand(axes(recycle, 1)) - recycle[recycle_idx] - else - used_recycle = false - rand(local_edges) - end - if rand() < 0.5 - newp1 = minmax(p1[1], p2[1]) - newp2 = minmax(p1[2], p2[2]) - else - newp1 = minmax(p1[1], p2[2]) - newp2 = minmax(p1[2], p2[1]) - end - if newp1 == newp2 - good_choice = false - elseif (newp1[1] == newp1[2]) || (newp1 in local_edges) - good_choice = false - elseif (newp2[1] == newp2[2]) || (newp2 in local_edges) - good_choice = false - else - good_choice = true - end - if good_choice - if used_recycle - recycle[recycle_idx], recycle[end] = recycle[end], recycle[recycle_idx] - pop!(recycle) - else - pop!(local_edges, p2) - end - success = true - push!(local_edges, newp1) - push!(local_edges, newp2) - break - end - end - success || push!(recycle, p1) - end - old_len = length(edges) - union!(edges, local_edges) - @assert length(edges) == old_len + length(local_edges) - @assert 2 * (length(local_edges) + length(recycle)) == length(stubs) - for (a, b) in recycle - w_internal[a] -= 1 - w_internal[b] -= 1 - end - unresolved_collisions += length(recycle) - end + @assert sum(length, clusters) == s0 + sum(length, cluster_assignments) - if unresolved_collisions > 0 - println("Unresolved_collisions: ", unresolved_collisions, - "; fraction: ", 2 * unresolved_collisions / total_weight) - end + return clusters # which clusters a given node is assigned to +end +function generate_initial_graph(weights::Vector{Int}) stubs = Int[] - for i in axes(w, 1) - for j in w_internal[i]+1:w[i] + for i in 1:length(weights) + for _ in 1:weights[i] push!(stubs, i) end end - @assert sum(w) == length(stubs) + sum(w_internal) - if params.hasoutliers - if 2 * sum(w[clusters .== 1]) > length(stubs) - @warn "Because of low value of ξ the outlier nodes form a community. " * - "It is recommended to increase ξ." - end - end + + @assert sum(weights) == length(stubs) + @assert iseven(length(stubs)) + shuffle!(stubs) - if isodd(length(stubs)) - maxi = 1 - @assert w[stubs[maxi]] > w_internal[stubs[maxi]] - for i in 2:length(stubs) - si = stubs[i] - @assert w[si] > w_internal[si] - if w[si] > w[stubs[maxi]] - maxi = i - end - end - si = popat!(stubs, maxi) - @assert w[si] > w_internal[si] - w[si] -= 1 - end - global_edges = Set{Tuple{Int, Int}}() + + local_edges = Set{Tuple{Int,Int}}() recycle = Tuple{Int,Int}[] + for i in 1:2:length(stubs) e = minmax(stubs[i], stubs[i+1]) - if (e[1] == e[2]) || (e in global_edges) || (e in edges) + if (e[1] == e[2]) || (e in local_edges) push!(recycle, e) else - push!(global_edges, e) + push!(local_edges, e) end end last_recycle = length(recycle) @@ -384,52 +134,18 @@ function config_model(clusters, params) break end end - p1 = pop!(recycle) + p1 = popfirst!(recycle) from_recycle = 2 * length(recycle) / length(stubs) - p2 = if rand() < from_recycle - i = rand(axes(recycle, 1)) - recycle[i], recycle[end] = recycle[end], recycle[i] - pop!(recycle) - else - x = rand(global_edges) - pop!(global_edges, x) - end - if rand() < 0.5 - newp1 = minmax(p1[1], p2[1]) - newp2 = minmax(p1[2], p2[2]) - else - newp1 = minmax(p1[1], p2[2]) - newp2 = minmax(p1[2], p2[1]) - end - for newp in (newp1, newp2) - if (newp[1] == newp[2]) || (newp in global_edges) || (newp in edges) - push!(recycle, newp) + success = false + for _ in 1:2:length(stubs) + p2 = if rand() < from_recycle + used_recycle = true + recycle_idx = rand(axes(recycle, 1)) + recycle[recycle_idx] else - push!(global_edges, newp) - end - end - end - old_len = length(edges) - union!(edges, global_edges) - @assert length(edges) == old_len + length(global_edges) - if isempty(recycle) - @assert 2 * length(global_edges) == length(stubs) - else - last_recycle = length(recycle) - recycle_counter = last_recycle - while !isempty(recycle) - recycle_counter -= 1 - if recycle_counter < 0 - if length(recycle) < last_recycle - last_recycle = length(recycle) - recycle_counter = last_recycle - else - break - end + used_recycle = false + rand(local_edges) end - p1 = pop!(recycle) - x = rand(edges) - p2 = pop!(edges, x) if rand() < 0.5 newp1 = minmax(p1[1], p2[1]) newp2 = minmax(p1[2], p2[2]) @@ -437,20 +153,153 @@ function config_model(clusters, params) newp1 = minmax(p1[1], p2[2]) newp2 = minmax(p1[2], p2[1]) end - for newp in (newp1, newp2) - if (newp[1] == newp[2]) || (newp in edges) - push!(recycle, newp) + if newp1 == newp2 + good_choice = false + elseif (newp1[1] == newp1[2]) || (newp1 in local_edges) + good_choice = false + elseif (newp2[1] == newp2[2]) || (newp2 in local_edges) + good_choice = false + else + good_choice = true + end + if good_choice + if used_recycle + recycle[recycle_idx], recycle[end] = recycle[end], recycle[recycle_idx] + pop!(recycle) else - push!(edges, newp) + pop!(local_edges, p2) end + success = true + push!(local_edges, newp1) + push!(local_edges, newp2) + break end end + success || push!(recycle, p1) end - if !isempty(recycle) - unresolved_collisions = length(recycle) - println("Very hard graph. Failed to generate ", unresolved_collisions, - "edges; fraction: ", 2 * unresolved_collisions / total_weight) + + unused_stubs = Int[] + for (a, b) in recycle + push!(unused_stubs, a, b) end + + @assert sum(weights) == length(local_edges) * 2 + length(unused_stubs) + + return local_edges, unused_stubs +end + +function config_model(clusters, params) + w, s, ξ = params.w, params.s, params.ξ + + @assert iseven(sum(w)) + w_internal_raw = randround.([w[i] * (1 - ξ) for i in axes(w, 1)]) + for i in findall(==([1]), clusters) + w_internal_raw[i] = 0 + end + + w_external = w - w_internal_raw + + clusterlist = [Int[] for i in 1:maximum(c -> maximum(c), clusters)] # list of nodes in each cluster + @show length(clusters) + @show length(clusterlist) + for i in axes(clusters, 1) + c = clusters[i] + for x in c + push!(clusterlist[x], i) + end + end + + w_internal_comm = [zeros(Int, length(w_internal_raw)) for i in 1:length(clusterlist)] # this holds internal degree of each community + + for i in axes(clusters, 1) + wi = w_internal_raw[i] + nc = length(clusters[i]) + share = floor(Int, wi / nc) + extra = wi - nc * share + z = shuffle(1:nc)[1:extra] + for j in 1:nc + w_internal_comm[clusters[i][j]][i] = share + (j in z) + end + end + + for wic in w_internal_comm # make sure that for each community sum of its degrees is even + if isodd(sum(wic)) + largest = argmax(wic) + @assert wic[largest] > 0 + wic[largest] -= 1 + w_external[largest] += 1 + end + end + + @assert sum(w_internal_comm) + w_external == w + @assert iseven(length(w_external)) + @assert all(x -> iseven(length(x)), w_internal_comm) + @assert all(==(0), w_internal_comm[1]) + + + partial_graphs = Set{Tuple{Int,Int}}[] + unused_stubs = Int[] + + idxs_com = 0 + for w_int in w_internal_comm + idxs_com += 1 + if idxs_com == 1 # outlier community + @assert sum(w_int) == 0 + else + g, s = generate_initial_graph(w_int) + push!(partial_graphs, g) + append!(unused_stubs, s) + end + end + + let + g, s = generate_initial_graph(w_external) + push!(partial_graphs, g) + append!(unused_stubs, s) + end + + edges = Set{Tuple{Int,Int}}() + + for g in partial_graphs + for e in g + if e in edges + push!(unused_stubs, e[1], e[2]) # duplicate across subgraphs + else + push!(edges, e) + end + end + end + + @assert sum(w) == length(edges) * 2 + length(unused_stubs) + @assert iseven(length(unused_stubs)) + + last_recycle_len = length(unused_stubs) + if length(unused_stubs) > 0 + println("Duplicates or self loops generated in total number of $(div(length(unused_stubs), 2)). Fixing.") + + while true + shuffle!(unused_stubs) + recycle = Int[] + for i in 1:2:length(unused_stubs) + e = minmax(unused_stubs[i], unused_stubs[i+1]) + if (e[1] == e[2]) || (e in edges) + push!(recycle, e[1], e[2]) + else + push!(edges, e) + end + end + if length(recycle) == last_recycle_len # no success in generating new feasible edges + println("Could not fix creation of $(div(last_recycle_len, 2)) edges. Giving up.") + break + end + last_recycle_len = length(recycle) + unused_stubs = recycle + @assert iseven(length(recycle)) + isempty(recycle) && break + end + end + + @assert sum(w) == length(edges) * 2 + last_recycle_len return edges end @@ -465,6 +314,6 @@ The ordering of vertices and clusters is in descending order (as in `params`). """ function gen_graph(params::ABCDParams) clusters = populate_clusters(params) - edges = params.isCL ? CL_model(clusters, params) : config_model(clusters, params) + edges = config_model(clusters, params) (edges=edges, clusters=clusters) end diff --git a/utils/abcd_sampler.jl b/utils/abcd_sampler.jl index a29bc6d..f975be8 100644 --- a/utils/abcd_sampler.jl +++ b/utils/abcd_sampler.jl @@ -9,34 +9,26 @@ filename = ARGS[1] conf = Pkg.TOML.parsefile(filename) isempty(conf["seed"]) || Random.seed!(parse(Int, conf["seed"])) -nout = haskey(conf, "nout") ? parse(Int, conf["nout"]) : 0 - -μ = haskey(conf, "mu") ? parse(Float64, conf["mu"]) : nothing -ξ = haskey(conf, "xi") ? parse(Float64, conf["xi"]) : nothing -if !(isnothing(μ) || isnothing(ξ)) - throw(ArgumentError("inconsistent data: only μ or ξ may be provided")) +nout = parse(Int, conf["nout"]) +if nout < 0 + throw(ArgumentError("nout cannot be negative")) end -if !isnothing(μ) && nout > 0 - throw(ArgumentError("μ is not supported with outliers")) +ξ = parse(Float64, conf["xi"]) +η = parse(Float64, conf["eta"]) +if η < 1 + throw(ArgumentError("eta must be at least 1")) end n = parse(Int, conf["n"]) +if n < 0 + throw(ArgumentError("n cannot be negative")) +end if nout > n throw(ArgumentError("number of outliers cannot be larger than graph size")) end -islocal = haskey(conf, "islocal") ? parse(Bool, conf["islocal"]) : false -if islocal && nout > 0 - throw(ArgumentError("local graph is not supported with outliers")) -end - -isCL = parse(Bool, conf["isCL"]) -if isCL && nout > 0 - throw(ArgumentError("Chung-Lu graph is not supported with outliers")) -end - # in what follows n is number of non-outlier nodes n = n - nout @@ -53,13 +45,11 @@ c_min = parse(Int, conf["c_min"]) c_max = parse(Int, conf["c_max"]) c_max_iter = parse(Int, conf["c_max_iter"]) @info "Expected value of community size: $(ABCDGraphGenerator.get_ev(τ₂, c_min, c_max))" -coms = ABCDGraphGenerator.sample_communities(τ₂, c_min, c_max, n, c_max_iter) -if nout > 0 - pushfirst!(coms, nout) -end -open(io -> foreach(d -> println(io, d), coms), conf["communitysizesfile"], "w") +coms = ABCDGraphGenerator.sample_communities(τ₂, ceil(Int, c_min / η), floor(Int, c_max / η), n, c_max_iter) +@assert sum(coms) == n +pushfirst!(coms, nout) -p = ABCDGraphGenerator.ABCDParams(degs, coms, μ, ξ, isCL, islocal, nout > 0) +p = ABCDGraphGenerator.ABCDParams(degs, coms, ξ, η) edges, clusters = ABCDGraphGenerator.gen_graph(p) open(conf["networkfile"], "w") do io for (a, b) in sort!(collect(edges)) @@ -71,3 +61,18 @@ open(conf["communityfile"], "w") do io println(io, i, "\t", c) end end + +open(conf["communitysizesfile"], "w") do io + comm_count = zeros(Int, length(coms)) + for c in clusters + @assert length(c) > 0 + if 1 in c + @assert length(c) == 1 + end + for v in c + comm_count[v] += 1 + end + end + println("eta is $η and empirically we have scaling of: ", extrema(comm_count[2:end] ./ coms[2:end])) + foreach(d -> println(io, d), comm_count) +end diff --git a/utils/com_sampler.jl b/utils/com_sampler.jl deleted file mode 100644 index a5cbb33..0000000 --- a/utils/com_sampler.jl +++ /dev/null @@ -1,36 +0,0 @@ -using ABCDGraphGenerator -using Random - -# note that for backward compatibility reasons `[nout]` is an optional parameter -# that comes last -@info "Usage: julia com_sampler.jl filename τ₂ c_min c_max n max_iter [seed] [nout]" -@info "Example: julia com_sampler.jl community_sizes.dat 2 50 1000 10000 1000 42 100" - -filename = ARGS[1] -τ₂ = parse(Float64, ARGS[2]) -c_min = parse(Int, ARGS[3]) -c_max = parse(Int, ARGS[4]) -n = parse(Int, ARGS[5]) -max_iter = parse(Int, ARGS[6]) -length(ARGS) >= 7 && Random.seed!(parse(Int, ARGS[7])) -if length(ARGS) >= 8 - nout = parse(Int, ARGS[8]) -else - nout = 0 -end - -length(ARGS) >= 9 && @warn "more than 8 parameters passed" - -@info "Expected value of community size: $(ABCDGraphGenerator.get_ev(τ₂, c_min, c_max))" - -if nout > n - throw(ArgumentError("number of outliers cannot be larger than graph size")) -end - -coms = ABCDGraphGenerator.sample_communities(τ₂, c_min, c_max, n - nout, max_iter) - -if nout > 0 - pushfirst!(coms, nout) -end - -open(io -> foreach(d -> println(io, d), coms), filename, "w") diff --git a/utils/deg_sampler.jl b/utils/deg_sampler.jl deleted file mode 100644 index ceaab3b..0000000 --- a/utils/deg_sampler.jl +++ /dev/null @@ -1,19 +0,0 @@ -using ABCDGraphGenerator -using Random - -@info "Usage: julia deg_sampler.jl filename τ₁ d_min d_max n max_iter [seed]" -@info "Example: julia deg_sampler.jl degrees.dat 3 5 50 10000 1000 42" - -filename = ARGS[1] -τ₁ = parse(Float64, ARGS[2]) -d_min = parse(Int, ARGS[3]) -d_max = parse(Int, ARGS[4]) -n = parse(Int, ARGS[5]) -max_iter = parse(Int, ARGS[6]) -length(ARGS) == 7 && Random.seed!(parse(Int, ARGS[7])) - -@info "Expected value of degree: $(ABCDGraphGenerator.get_ev(τ₁, d_min, d_max))" - -degs = ABCDGraphGenerator.sample_degrees(τ₁, d_min, d_max, n, max_iter) - -open(io -> foreach(d -> println(io, d), degs), filename, "w") diff --git a/utils/example_config.toml b/utils/example_config.toml index 45ff35a..e885880 100644 --- a/utils/example_config.toml +++ b/utils/example_config.toml @@ -1,23 +1,17 @@ seed = "42" # RNG seed, use "" for no seeding n = "10000" # number of vertices in graph +nout = "100" # number of vertices in graph that are outliers t1 = "3" # power-law exponent for degree distribution d_min = "5" # minimum degree d_max = "50" # maximum degree d_max_iter = "1000" # maximum number of iterations for sampling degrees t2 = "2" # power-law exponent for cluster size distribution -c_min = "50" # minimum cluster size -c_max = "1000" # maximum cluster size +c_min = "50" # minimum cluster size after growing +c_max = "1000" # maximum cluster size after growing c_max_iter = "1000" # maximum number of iterations for sampling cluster sizes -# Exactly one of xi and mu must be passed as Float64. Also if xi is provided islocal must be set to false or omitted. xi = "0.2" # fraction of edges to fall in background graph -#mu = "0.2" # mixing parameter -islocal = "false" # if "true" mixing parameter is restricted to local cluster, otherwise it is global -isCL = "false" # if "false" use configuration model, if "true" use Chung-Lu +eta = "1.5" # average number of community per community node degreefile = "deg.dat" # name of file do generate that contains vertex degrees communitysizesfile = "cs.dat" # name of file do generate that contains community sizes communityfile = "com.dat" # name of file do generate that contains assignments of vertices to communities networkfile = "edge.dat" # name of file do generate that contains edges of the generated graph -nout = "100" # number of vertices in graph that are outliers; optional parameter - # if nout is passed and is not zero then we require islocal = "false", - # isCL = "false", and xi (not mu) must be passed - # if nout > 0 then it is recommended that xi > 0 diff --git a/utils/graph_check.jl b/utils/graph_check.jl deleted file mode 100644 index 9925443..0000000 --- a/utils/graph_check.jl +++ /dev/null @@ -1,90 +0,0 @@ -using Statistics -using ABCDGraphGenerator: ArgParse - -function parse_commandline() - s = ArgParse.ArgParseSettings() - - ArgParse.@add_arg_table! s begin - "degrees" - help = "degrees file" - required = true - "community_size" - help = "community sizes file" - required = true - "community" - help = "community file" - required = true - "network" - help = "network file" - required = true - "isCL" - help = "pass true if graph is CL and false if CM" - arg_type = Bool - required = true - end - s.usage = "graph_check.jl [-h] degrees community_size community network isCL" - return ArgParse.parse_args(s) -end - -parsed_args = parse_commandline() - -degrees_fname = parsed_args["degrees"] -community_sizes_fname = parsed_args["community_size"] -community_fname = parsed_args["community"] -network_fname = parsed_args["network"] -isCL = parsed_args["isCL"] - -degrees = parse.(Int, readlines(degrees_fname)) -community_sizes = parse.(Int, readlines(community_sizes_fname)) -community = (x -> parse.(Int, x[2])).(split.(readlines(community_fname))) -network = (x -> parse.(Int, x)).(split.(readlines(network_fname))) - -@assert length(degrees) == length(community) == sum(community_sizes) -@info "Number of nodes: $(length(degrees))" -@info "Number of communities: $(length(community_sizes))" - -nei_community = [Int[] for _ in 1:length(degrees)] -for (a, b) in network - push!(nei_community[a], community[b]) - push!(nei_community[b], community[a]) -end - -@info "mean required degree: $(mean(degrees))" -@info "min required degree: $(minimum(degrees))" -@info "max required degree: $(maximum(degrees))" - -@info "mean generated degree: $(mean(length.(nei_community)))" -@info "min generated degree: $(minimum(length.(nei_community)))" -@info "max generated degree: $(maximum(length.(nei_community)))" - -if !isCL - bad_degree = [i for i in 1:length(degrees) if degrees[i] != length(nei_community[i])] - - if isempty(bad_degree) - @info "all generated degrees are equal to required degrees" - else - bad_nodes = [(node=i, expected=degrees[i], actual=length(nei_community[i])) for i in bad_degree] - @warn "Nodes with not matching degrees are $bad_nodes" - end -end - -for i in 1:length(community_sizes) - wanted_size = community_sizes[i] - actual_size = count(==(i), community) - if wanted_size != actual_size - @warn "For community $i actual size $actual_size is not equal to wanted size $wanted_size" - end -end - -internal_count = [count(==(community[i]), nei_community[i]) for i in 1:length(degrees)] -outside_count = [count(!=(community[i]), nei_community[i]) for i in 1:length(degrees)] - -internal_frac = internal_count ./ (internal_count .+ outside_count) - -@info "mean graph level proportion of internal edges: $(mean(internal_frac))" - -@info "Proportion of internal edges per community:" -for i in 1:length(community_sizes) - internal_frac_com = sum(internal_count[community .== i]) ./ sum((internal_count .+ outside_count)[community .== i]) - @info "Community $i has size $(community_sizes[i]) and internal fraction $internal_frac_com" -end diff --git a/utils/graph_sampler.jl b/utils/graph_sampler.jl deleted file mode 100644 index 83e91e3..0000000 --- a/utils/graph_sampler.jl +++ /dev/null @@ -1,69 +0,0 @@ -using ABCDGraphGenerator -using Random - -# note that for backward compatibility reasons `[nout]` is an optional parameter -# that comes last -@info "Usage: julia graph_sampler.jl networkfile communityfile degreefile communitysizesfile mu|xi fraction isCL islocal [seed] [nout]" -@info "Example: julia graph_sampler.jl network.dat community.dat degrees.dat community_sizes.dat xi 0.2 true true 42 100" - -networkfile = ARGS[1] -communityfile = ARGS[2] -degreefile = ARGS[3] -communitysizesfile = ARGS[4] -muxi = ARGS[5] -fraction = parse(Float64, ARGS[6]) -isCL = parse(Bool, ARGS[7]) -islocal = parse(Bool, ARGS[8]) - -length(ARGS) >= 9 && Random.seed!(parse(Int, ARGS[9])) -if length(ARGS) >= 10 - nout = parse(Int, ARGS[10]) -else - nout = 0 -end - -length(ARGS) >= 11 && @warn "more than 10 parameters passed" - -coms = parse.(Int, readlines(communitysizesfile)) - -if nout > 0 - nout == coms[1] || throw(ArgumentError("nout does not match first community")) -end - -muxi in ["mu","xi"] || throw(ArgumentError("only mu or xi names are allowed for")) -μ, ξ = nothing, nothing -if muxi == "mu" - μ = fraction -else - ξ = fraction -end - -if isnothing(ξ) && nout > 0 - throw(ArgumentError("μ is not supported with outliers")) -end - -if islocal && nout > 0 - throw(ArgumentError("local graph is not supported with outliers")) -end - -if isCL && nout > 0 - throw(ArgumentError("Chung-Lu graph is not supported with outliers")) -end - -p = ABCDGraphGenerator.ABCDParams(parse.(Int, readlines(degreefile)), - coms, - μ, ξ, isCL, islocal, nout > 0) - -edges, clusters = ABCDGraphGenerator.gen_graph(p) - -open(networkfile, "w") do io - for (a, b) in sort!(collect(edges)) - println(io, a, "\t", b) - end -end - -open(communityfile, "w") do io - for (i, c) in enumerate(clusters) - println(io, i, "\t", c) - end -end diff --git a/utils/install.jl b/utils/install.jl deleted file mode 100644 index a6cd8d9..0000000 --- a/utils/install.jl +++ /dev/null @@ -1,3 +0,0 @@ -using Pkg - -Pkg.add(PackageSpec(url="https://github.com/bkamins/ABCDGraphGenerator.jl"))