forked from igraph/python-igraph
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcommunity.py
More file actions
630 lines (530 loc) · 28 KB
/
community.py
File metadata and controls
630 lines (530 loc) · 28 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
from igraph._igraph import GraphBase
from igraph.clustering import VertexDendrogram, VertexClustering
from igraph.utils import deprecated
from typing import List, Sequence, Tuple
def _community_fastgreedy(graph, weights=None):
"""Community structure based on the greedy optimization of modularity.
This algorithm merges individual nodes into communities in a way that
greedily maximizes the modularity score of the graph. It can be proven
that if no merge can increase the current modularity score, the
algorithm can be stopped since no further increase can be achieved.
This algorithm is said to run almost in linear time on sparse graphs.
B{Reference}: A Clauset, MEJ Newman and C Moore: Finding community structure
in very large networks. I{Phys Rev E} 70, 066111 (2004).
@param weights: edge attribute name or a list containing edge
weights
@return: an appropriate L{VertexDendrogram} object.
"""
merges, qs = GraphBase.community_fastgreedy(graph, weights)
optimal_count = _optimal_cluster_count_from_merges_and_modularity(graph, merges, qs)
return VertexDendrogram(
graph, merges, optimal_count, modularity_params={"weights": weights}
)
def _community_infomap(graph, edge_weights=None, vertex_weights=None, trials=10):
"""Finds the community structure of the network according to the Infomap
method of Martin Rosvall and Carl T. Bergstrom.
B{References}
- M. Rosvall and C. T. Bergstrom: Maps of information flow reveal
community structure in complex networks, I{PNAS} 105, 1118 (2008).
U{https://doi.org/10.1073/pnas.0706851105},
U{https://arxiv.org/abs/0707.0609}.
- M. Rosvall, D. Axelsson, and C. T. Bergstrom: The map equation,
I{Eur Phys. J Special Topics} 178, 13 (2009).
U{https://doi.org/10.1140/epjst/e2010-01179-1},
U{https://arxiv.org/abs/0906.1405}.
@param edge_weights: name of an edge attribute or a list containing
edge weights.
@param vertex_weights: name of a vertex attribute or a list containing
vertex weights.
@param trials: the number of attempts to partition the network.
@return: an appropriate L{VertexClustering} object with an extra attribute
called C{codelength} that stores the code length determined by the
algorithm.
"""
membership, codelength = GraphBase.community_infomap(
graph, edge_weights, vertex_weights, trials
)
return VertexClustering(
graph,
membership,
params={"codelength": codelength},
modularity_params={"weights": edge_weights},
)
def _community_leading_eigenvector(
graph, clusters=None, weights=None, arpack_options=None
):
"""Newman's leading eigenvector method for detecting community structure.
This is the proper implementation of the recursive, divisive algorithm:
each split is done by maximizing the modularity regarding the
original network.
B{Reference}: MEJ Newman: Finding community structure in networks using the
eigenvectors of matrices, arXiv:physics/0605087
@param clusters: the desired number of communities. If C{None}, the
algorithm tries to do as many splits as possible. Note that the
algorithm won't split a community further if the signs of the leading
eigenvector are all the same, so the actual number of discovered
communities can be less than the desired one.
@param weights: name of an edge attribute or a list containing
edge weights.
@param arpack_options: an L{ARPACKOptions} object used to fine-tune
the ARPACK eigenvector calculation. If omitted, the module-level
variable called C{arpack_options} is used.
@return: an appropriate L{VertexClustering} object.
"""
if clusters is None:
clusters = -1
kwds = {"weights": weights}
if arpack_options is not None:
kwds["arpack_options"] = arpack_options
membership, _, q = GraphBase.community_leading_eigenvector(graph, clusters, **kwds)
return VertexClustering(graph, membership, modularity=q)
def _community_label_propagation(graph, weights=None, initial=None, fixed=None):
"""Finds the community structure of the graph according to the label
propagation method of Raghavan et al.
Initially, each vertex is assigned a different label. After that,
each vertex chooses the dominant label in its neighbourhood in each
iteration. Ties are broken randomly and the order in which the
vertices are updated is randomized before every iteration. The
algorithm ends when vertices reach a consensus.
Note that since ties are broken randomly, there is no guarantee that
the algorithm returns the same community structure after each run.
In fact, they frequently differ. See the paper of Raghavan et al.
on how to come up with an aggregated community structure.
Also note that the community _labels_ (numbers) have no semantic meaning
and igraph is free to re-number communities. If you use fixed labels,
igraph may still re-number the communities, but co-community membership
constraints will be respected: if you had two vertices with fixed labels
that belonged to the same community, they will still be in the same
community at the end. Similarly, if you had two vertices with fixed
labels that belonged to different communities, they will still be in
different communities at the end.
B{Reference}: Raghavan, U.N. and Albert, R. and Kumara, S. Near linear
time algorithm to detect community structures in large-scale networks.
I{Phys Rev} E 76:036106, 2007. U{https://arxiv.org/abs/0709.2938}.
@param weights: name of an edge attribute or a list containing
edge weights
@param initial: name of a vertex attribute or a list containing
the initial vertex labels. Labels are identified by integers from
zero to M{n-1} where M{n} is the number of vertices. Negative
numbers may also be present in this vector, they represent unlabeled
vertices.
@param fixed: a list of booleans for each vertex. C{True} corresponds
to vertices whose labeling should not change during the algorithm.
It only makes sense if initial labels are also given. Unlabeled
vertices cannot be fixed. It may also be the name of a vertex
attribute; each attribute value will be interpreted as a Boolean.
@return: an appropriate L{VertexClustering} object.
"""
if isinstance(fixed, str):
fixed = [bool(o) for o in graph.vs[fixed]]
cl = GraphBase.community_label_propagation(graph, weights, initial, fixed)
return VertexClustering(graph, cl, modularity_params={"weights": weights})
def _community_multilevel(graph, weights=None, return_levels=False, resolution=1):
"""Community structure based on the multilevel algorithm of
Blondel et al.
This is a bottom-up algorithm: initially every vertex belongs to a
separate community, and vertices are moved between communities
iteratively in a way that maximizes the vertices' local contribution
to the overall modularity score. When a consensus is reached (i.e. no
single move would increase the modularity score), every community in
the original graph is shrunk to a single vertex (while keeping the
total weight of the incident edges) and the process continues on the
next level. The algorithm stops when it is not possible to increase
the modularity anymore after shrinking the communities to vertices.
This algorithm is said to run almost in linear time on sparse graphs.
B{Reference}: VD Blondel, J-L Guillaume, R Lambiotte and E Lefebvre: Fast
unfolding of community hierarchies in large networks, I{J Stat Mech}
P10008 (2008). U{https://arxiv.org/abs/0803.0476}
@param weights: edge attribute name or a list containing edge
weights
@param return_levels: if C{True}, the communities at each level are
returned in a list. If C{False}, only the community structure with
the best modularity is returned.
@param resolution: the resolution parameter to use in the modularity
measure. Smaller values result in a smaller number of larger clusters,
while higher values yield a large number of small clusters. The classical
modularity measure assumes a resolution parameter of 1.
@return: a list of L{VertexClustering} objects, one corresponding to
each level (if C{return_levels} is C{True}), or a L{VertexClustering}
corresponding to the best modularity.
"""
if graph.is_directed():
raise ValueError("input graph must be undirected")
modularity_params = {"weights": weights, "resolution": resolution}
if return_levels:
levels, qs = GraphBase.community_multilevel(
graph, weights, return_levels=True, resolution=resolution
)
result = []
for level, q in zip(levels, qs):
result.append(
VertexClustering(graph, level, q, modularity_params=modularity_params)
)
else:
membership = GraphBase.community_multilevel(
graph, weights, return_levels=False, resolution=resolution
)
result = VertexClustering(
graph, membership, modularity_params=modularity_params
)
return result
def _community_optimal_modularity(graph, *args, **kwds):
"""Calculates the optimal modularity score of the graph and the
corresponding community structure.
This function uses the GNU Linear Programming Kit to solve a large
integer optimization problem in order to find the optimal modularity
score and the corresponding community structure, therefore it is
unlikely to work for graphs larger than a few (less than a hundred)
vertices. Consider using one of the heuristic approaches instead if
you have such a large graph.
@return: the calculated membership vector and the corresponding
modularity in a tuple."""
membership, modularity = GraphBase.community_optimal_modularity(
graph, *args, **kwds
)
return VertexClustering(graph, membership, modularity)
def _community_edge_betweenness(graph, clusters=None, directed=True, weights=None):
"""Community structure based on the betweenness of the edges in the
network.
The idea is that the betweenness of the edges connecting two
communities is typically high, as many of the shortest paths between
nodes in separate communities go through them. So we gradually remove
the edge with the highest betweenness and recalculate the betweennesses
after every removal. This way sooner or later the network falls of to
separate components. The result of the clustering will be represented
by a dendrogram.
When edge weights are given, the ratio of betweenness and weight values
is used to choose which edges to remove first, as described in
M. E. J. Newman: Analysis of Weighted Networks (2004), Section C.
Thus, edges with large weights are treated as strong connections,
and will be removed later than weak connections having similar betweenness.
Weights are also used for calculating modularity.
@param clusters: the number of clusters we would like to see. This
practically defines the "level" where we "cut" the dendrogram to
get the membership vector of the vertices. If C{None}, the dendrogram
is cut at the level that maximizes the modularity when the graph is
unweighted; otherwise the dendrogram is cut at at a single cluster
(because cluster count selection based on modularities does not make
sense for this method if not all the weights are equal).
@param directed: whether the directionality of the edges should be
taken into account or not.
@param weights: name of an edge attribute or a list containing
edge weights. Higher weights indicate stronger connections.
@return: a L{VertexDendrogram} object, initally cut at the maximum
modularity or at the desired number of clusters.
"""
merges, qs = GraphBase.community_edge_betweenness(graph, directed, weights)
if clusters is None:
if qs is not None:
clusters = _optimal_cluster_count_from_merges_and_modularity(
graph, merges, qs
)
else:
clusters = 1
return VertexDendrogram(
graph, merges, clusters, modularity_params={"weights": weights}
)
def _community_spinglass(graph, *args, **kwds):
"""Finds the community structure of the graph according to the
spinglass community detection method of Reichardt & Bornholdt.
B{References}
- Reichardt J and Bornholdt S: Statistical mechanics of community
detection. I{Phys Rev E} 74:016110 (2006).
U{https://arxiv.org/abs/cond-mat/0603718}.
- Traag VA and Bruggeman J: Community detection in networks
with positive and negative links. I{Phys Rev E} 80:036115 (2009).
U{https://arxiv.org/abs/0811.2329}.
@keyword weights: edge weights to be used. Can be a sequence or
iterable or even an edge attribute name.
@keyword spins: integer, the number of spins to use. This is the
upper limit for the number of communities. It is not a problem
to supply a (reasonably) big number here, in which case some
spin states will be unpopulated.
@keyword parupdate: whether to update the spins of the vertices in
parallel (synchronously) or not
@keyword start_temp: the starting temperature
@keyword stop_temp: the stop temperature
@keyword cool_fact: cooling factor for the simulated annealing
@keyword update_rule: specifies the null model of the simulation.
Possible values are C{"config"} (a random graph with the same
vertex degrees as the input graph) or C{"simple"} (a random
graph with the same number of edges)
@keyword gamma: the gamma argument of the algorithm, specifying the
balance between the importance of present and missing edges
within a community. The default value of 1.0 assigns equal
importance to both of them.
@keyword implementation: currently igraph contains two implementations
of the spinglass community detection algorithm. The faster
original implementation is the default. The other implementation
is able to take into account negative weights, this can be
chosen by setting C{implementation} to C{"neg"}
@keyword lambda_: the lambda argument of the algorithm, which
specifies the balance between the importance of present and missing
negatively weighted edges within a community. Smaller values of
lambda lead to communities with less negative intra-connectivity.
If the argument is zero, the algorithm reduces to a graph coloring
algorithm, using the number of spins as colors. This argument is
ignored if the original implementation is used. Note the underscore
at the end of the argument name; this is due to the fact that
lambda is a reserved keyword in Python.
@return: an appropriate L{VertexClustering} object.
"""
membership = GraphBase.community_spinglass(graph, *args, **kwds)
if "weights" in kwds:
modularity_params = {"weights": kwds["weights"]}
else:
modularity_params = {}
return VertexClustering(graph, membership, modularity_params=modularity_params)
def _community_voronoi(graph, lengths=None, weights=None, mode="out", radius=None):
"""Finds communities using Voronoi partitioning.
This function finds communities using a Voronoi partitioning of vertices based
on the given edge lengths divided by the edge clustering coefficient.
The generator vertices are chosen to be those with the largest local relative
density within a radius, with the local relative density of a vertex defined
as C{s * m / (m + k)}, where C{s} is the strength of the vertex, C{m} is
the number of edges within the vertex's first order neighborhood, while C{k}
is the number of edges with only one endpoint within this neighborhood.
B{References}
- Deritei et al., Community detection by graph Voronoi diagrams,
I{New Journal of Physics} 16, 063007 (2014).
U{https://doi.org/10.1088/1367-2630/16/6/063007}.
- Molnár et al., Community Detection in Directed Weighted Networks using
Voronoi Partitioning, I{Scientific Reports} 14, 8124 (2024).
U{https://doi.org/10.1038/s41598-024-58624-4}.
@param lengths: edge lengths, or C{None} to consider all edges as having
unit length. Voronoi partitioning will use edge lengths equal to
lengths / ECC where ECC is the edge clustering coefficient.
@param weights: edge weights, or C{None} to consider all edges as having
unit weight. Weights are used when selecting generator points, as well
as for computing modularity.
@param mode: specifies how to use the direction of edges when computing
distances from generator points. If C{"out"} (the default), distances
from generator points to all other nodes are considered following the
direction of edges. If C{"in"}, distances are computed in the reverse
direction (i.e., from all nodes to generator points). If C{"all"},
edge directions are ignored and the graph is treated as undirected.
This parameter is ignored for undirected graphs.
@param radius: the radius/resolution to use when selecting generator points.
The larger this value, the fewer partitions there will be. Pass C{None}
to automatically select the radius that maximizes modularity.
@return: an appropriate L{VertexClustering} object with an extra attribute
called C{generators} (the generator vertices).
"""
# Convert mode string to proper enum value to avoid deprecation warning
if isinstance(mode, str):
mode_map = {"out": "out", "in": "in", "all": "all", "total": "all"} # alias
if mode.lower() in mode_map:
mode = mode_map[mode.lower()]
else:
raise ValueError(f"Invalid mode '{mode}'. Must be one of: out, in, all")
membership, generators, modularity = GraphBase.community_voronoi(graph, lengths, weights, mode, radius)
params = {"generators": generators}
modularity_params = {}
if weights is not None:
modularity_params["weights"] = weights
clustering = VertexClustering(
graph, membership, modularity=modularity, params=params, modularity_params=modularity_params
)
clustering.generators = generators
return clustering
def _community_walktrap(graph, weights=None, steps=4):
"""Community detection algorithm of Latapy & Pons, based on random
walks.
The basic idea of the algorithm is that short random walks tend to stay
in the same community. The result of the clustering will be represented
as a dendrogram.
B{Reference}: Pascal Pons, Matthieu Latapy: Computing communities in large
networks using random walks, U{https://arxiv.org/abs/physics/0512106}.
@param weights: name of an edge attribute or a list containing
edge weights
@param steps: length of random walks to perform
@return: a L{VertexDendrogram} object, initially cut at the maximum
modularity.
"""
merges, qs = GraphBase.community_walktrap(graph, weights, steps)
optimal_count = _optimal_cluster_count_from_merges_and_modularity(graph, merges, qs)
return VertexDendrogram(
graph, merges, optimal_count, modularity_params={"weights": weights}
)
def _k_core(graph, *args):
"""Returns some k-cores of the graph.
The method accepts an arbitrary number of arguments representing
the desired indices of the M{k}-cores to be returned. The arguments
can also be lists or tuples. The result is a single L{Graph} object
if an only integer argument was given, otherwise the result is a
list of L{Graph} objects representing the desired k-cores in the
order the arguments were specified. If no argument is given, returns
all M{k}-cores in increasing order of M{k}.
"""
if len(args) == 0:
indices = range(graph.vcount())
return_single = False
else:
return_single = True
indices = []
for arg in args:
try:
indices.extend(arg)
except Exception:
indices.append(arg)
if len(indices) > 1 or hasattr(args[0], "__iter__"):
return_single = False
corenesses = graph.coreness()
result = []
vidxs = range(graph.vcount())
for idx in indices:
core_idxs = [vidx for vidx in vidxs if corenesses[vidx] >= idx]
result.append(graph.subgraph(core_idxs))
if return_single:
return result[0]
return result
def _community_leiden(
graph,
objective_function="CPM",
weights=None,
resolution=1.0,
beta=0.01,
initial_membership=None,
n_iterations=2,
node_weights=None,
**kwds
):
"""Finds the community structure of the graph using the Leiden
algorithm of Traag, van Eck & Waltman.
B{Reference}: Traag, V. A., Waltman, L., & van Eck, N. J. (2019). From Louvain
to Leiden: guaranteeing well-connected communities. I{Scientific Reports},
9(1), 5233. doi: 10.1038/s41598-019-41695-z
@param objective_function: whether to use the Constant Potts
Model (CPM) or modularity. Must be either C{"CPM"} or C{"modularity"}.
@param weights: edge weights to be used. Can be a sequence or
iterable or even an edge attribute name.
@param resolution: the resolution parameter to use. Higher resolutions
lead to more smaller communities, while lower resolutions lead to fewer
larger communities.
@param beta: parameter affecting the randomness in the Leiden
algorithm. This affects only the refinement step of the algorithm.
@param initial_membership: if provided, the Leiden algorithm
will try to improve this provided membership. If no argument is
provided, the aglorithm simply starts from the singleton partition.
@param n_iterations: the number of iterations to iterate the Leiden
algorithm. Each iteration may improve the partition further. Using
a negative number of iterations will run until a stable iteration is
encountered (i.e. the quality was not increased during that
iteration).
@param node_weights: the node weights used in the Leiden algorithm.
If this is not provided, it will be automatically determined on the
basis of whether you want to use CPM or modularity. If you do provide
this, please make sure that you understand what you are doing.
@return: an appropriate L{VertexClustering} object with an extra attribute
called C{quality} that stores the value of the internal quality function
optimized by the algorithm.
"""
if objective_function.lower() not in ("cpm", "modularity"):
raise ValueError('objective_function must be "CPM" or "modularity".')
if "resolution_parameter" in kwds:
deprecated(
"resolution_parameter keyword argument is deprecated, use "
"resolution=... instead"
)
resolution = kwds.pop("resolution_parameter")
if kwds:
raise TypeError("unexpected keyword argument")
membership, quality = GraphBase.community_leiden(
graph,
edge_weights=weights,
node_weights=node_weights,
resolution=resolution,
normalize_resolution=(objective_function == "modularity"),
beta=beta,
initial_membership=initial_membership,
n_iterations=n_iterations,
)
params = {"quality": quality}
modularity_params = {"resolution": resolution}
if weights is not None:
modularity_params["weights"] = weights
return VertexClustering(
graph, membership, params=params, modularity_params=modularity_params
)
def _community_fluid_communities(graph, no_of_communities):
"""Community detection based on fluids interacting on the graph.
The algorithm is based on the simple idea of several fluids interacting
in a non-homogeneous environment (the graph topology), expanding and
contracting based on their interaction and density. Weighted graphs are
not supported.
This function implements the community detection method described in:
Parés F, Gasulla DG, et. al. (2018) Fluid Communities: A Competitive,
Scalable and Diverse Community Detection Algorithm.
@param no_of_communities: The number of communities to be found. Must be
greater than 0 and fewer than or equal to the number of vertices in the graph.
@return: an appropriate L{VertexClustering} object.
"""
# Validate input parameters
if no_of_communities <= 0:
raise ValueError("no_of_communities must be greater than 0")
if no_of_communities > graph.vcount():
raise ValueError("no_of_communities must be fewer than or equal to the number of vertices")
# Check if graph is weighted (not supported)
if graph.is_weighted():
raise ValueError("Weighted graphs are not supported by the fluid communities algorithm")
# Handle directed graphs - the algorithm works on undirected graphs
# but can accept directed graphs (they are treated as undirected)
if graph.is_directed():
import warnings
warnings.warn(
"Directed graphs are treated as undirected in the fluid communities algorithm",
UserWarning,
stacklevel=2
)
membership = GraphBase.community_fluid_communities(graph, no_of_communities)
return VertexClustering(graph, membership)
def _modularity(self, membership, weights=None, resolution=1, directed=True):
"""Calculates the modularity score of the graph with respect to a given
clustering.
The modularity of a graph w.r.t. some division measures how good the
division is, or how separated are the different vertex types from each
other. It's defined as M{Q=1/(2m)*sum(Aij-gamma*ki*kj/(2m)delta(ci,cj),i,j)}.
M{m} is the number of edges, M{Aij} is the element of the M{A}
adjacency matrix in row M{i} and column M{j}, M{ki} is the degree of
node M{i}, M{kj} is the degree of node M{j}, and M{Ci} and C{cj} are
the types of the two vertices (M{i} and M{j}), and M{gamma} is a resolution
parameter that defaults to 1 for the classical definition of modularity.
M{delta(x,y)} is one iff M{x=y}, 0 otherwise.
If edge weights are given, the definition of modularity is modified as
follows: M{Aij} becomes the weight of the corresponding edge, M{ki}
is the total weight of edges adjacent to vertex M{i}, M{kj} is the
total weight of edges adjacent to vertex M{j} and M{m} is the total
edge weight in the graph.
B{Reference}: MEJ Newman and M Girvan: Finding and evaluating community
structure in networks. I{Phys Rev E} 69 026113, 2004.
@param membership: a membership list or a L{VertexClustering} object
@param weights: optional edge weights or C{None} if all edges are
weighed equally. Attribute names are also allowed.
@param resolution: the resolution parameter I{gamma} in the formula above.
The classical definition of modularity is retrieved when the resolution
parameter is set to 1.
@param directed: whether to consider edge directions if the graph is directed.
C{True} will use the directed variant of the modularity measure where the
in- and out-degrees of nodes are treated separately; C{False} will treat
directed graphs as undirected.
@return: the modularity score
"""
if isinstance(membership, VertexClustering):
if membership.graph != self:
raise ValueError("clustering object belongs to another graph")
return GraphBase.modularity(
self, membership.membership, weights, resolution, directed
)
else:
return GraphBase.modularity(self, membership, weights, resolution, directed)
def _optimal_cluster_count_from_merges_and_modularity(
graph, merges: Sequence[Tuple[int, int]], qs: List[float]
) -> float:
"""Helper function to find the optimal cluster count for a hierarchical
clustering of a graph, given the merge matrix and the list of modularity
values after each merge.
Reverses the modularity vector as a side effect.
"""
no_of_comps = graph.vcount() - len(merges)
qs.reverse()
return qs.index(max(qs)) + no_of_comps