NetworKit

This Jupyter notebook provides an example of using the Python packages gravis and NetworKit. The .ipynb file can be found here.

References

Installation

  • With pip: pip install networkit

  • With conda: conda install -c conda-forge networkit

Import

[1]:
import os
import warnings
warnings.simplefilter("ignore")  # ignore some deprecation warnings
[2]:
import networkit as nk

import gravis as gv

Quick start

Example 1

[3]:
def calculate_properties(g):
    # Centrality calculation
    node_centralities = nk.centrality.PageRank(g).run().scores()
    g.indexEdges()
    edge_centralities = nk.centrality.Betweenness(g, computeEdgeCentrality=True, normalized=True).run().edgeScores()

    # Community detection


    # Graph properties
    graph_metadata = {'edge_opacity': 0.5}

    # Node properties: Size by centrality, color by community
    node_metadata = {node_id: {'size': 5 + val*2000} for node_id, val in enumerate(node_centralities)}

    # Edge properties: Size by centrality, color by community (within=community color, between=black)
    edges = []
    g.forEdges(lambda s, t, ea, eb: edges.append('({}, {})'.format(s, t)))
    edge_metadata = {edge_id: {'size': 0.5 + val*50} for edge_id, val in zip(edges, edge_centralities)}

    # Properties can not be stored in a NetworKit graph, so they are collected in a list instead
    data = [g, graph_metadata, node_metadata, edge_metadata]
    return data


# Create a graph with a generator function
g = nk.generators.HyperbolicGenerator(250).generate()

# Calculate properties (not stored in graph!)
data = calculate_properties(g)

# Plot it
gv.d3(data, zoom_factor=0.2)
[3]:

Graph construction

1) Manual graph construction

1.a) Graph (with directed=False)

undirected, with self-loops, with parallel edges, without attributes

[4]:
ug = nk.Graph(directed=False)


# Node with automatic id (starts from 0)
ug.addNode()
ug.addNode()
ug.addNode()
ug.addNode()
ug.addNode()
ug.addNode()
ug.addNode()
ug.addNode()

# Node with user-defined id
# ~ Not supported ~


# Nodes
# ~ Not supported (despite addNodes in API reference) ~


# Edge (nodes need to already exist)
ug.addEdge(0, 1)
ug.addEdge(1, 2)
ug.addEdge(2, 3)
ug.addEdge(3, 4)
ug.addEdge(4, 5)
ug.addEdge(5, 6)
ug.addEdge(6, 7)
ug.addEdge(7, 0)
ug.addEdge(0, 0)
ug.addEdge(0, 0)
ug.addEdge(0, 1)
ug.addEdge(0, 1)


# Edges
# ~ Not supported (despite addEdge description in API reference) ~


gv.d3(ug, graph_height=200)
[4]:

1.b) Graph (with directed=True)

directed, with self-loops, with parallel edges, without attributes

[5]:
dg = nk.Graph(directed=True)


for node in ug.iterNodes():
    dg.addNode()

for source, target in ug.iterEdges():
    dg.addEdge(source, target)


gv.d3(dg, graph_height=200)
[5]:

Assign attributes to a created graph

  • StackOverflow: NetworKit does not allow to store any user-defined attributes within the graph object. Edges can have weights though.

2) Algorithmic graph construction

[6]:
num_nodes = 50
num_clusters = 4

generator = nk.generators.PowerlawDegreeSequence(minDeg=2, maxDeg=10, gamma=-2)
generator.run()
# degree_sequence = [1, 2, 3, 4, 5] * 10
degree_sequence = generator.getDegreeSequence(num_nodes)


# BTER graph generator implementation in FEASTPACK using GNU Octave
# g = nk.generators.BTERReplicator(g).generate()  # FileNotFoundError


# Barabasi-Albert model (with faster Batagelj-Brandes algorithm)
g = nk.generators.BarabasiAlbertGenerator(k=2, nMax=num_nodes, n0=0, batagelj=True).generate()


# Chung-Lu model - arg1: expected degree sequence
g = nk.generators.ChungLuGenerator(degree_sequence).generate()


# A clustered random graph - arg1: number of nodes, arg2: number of edges,
#                            arg3: intra-cluster edge probability, arg4: inter-cluster edge probability
generator = nk.generators.ClusteredRandomGraphGenerator(n=num_nodes, k=num_clusters, pin=0.5, pout=0.02)
c = generator.getCommunities()  # generated ground truth clustering
g = generator.generate()


# EdgeSwitchingMarkovChainGenerator
g = nk.generators.ConfigurationModelGenerator(degree_sequence).generate()


# Dorogovtsev-Mendes model
g = nk.generators.DorogovtsevMendesGenerator(num_nodes).generate()
generator = nk.generators.DynamicDorogovtsevMendesGenerator(num_nodes)
#generator.generate(2)
#g = generator.getGraph()


# Forest fire model
generator = nk.generators.DynamicForestFireGenerator(p=0.1, directed=True, r=1.0)
graph_event = generator.generate(1)
# TODO: How to get a graph?


# A dynamically growing path
generator = nk.generators.DynamicPathGenerator(num_nodes)
#g = generator.getGraph()
# TODO


# Edge-Switching Markov-Chain method (=random simple graph with exactly the given degree sequence)
g = nk.generators.EdgeSwitchingMarkovChainGenerator(degree_sequence).generate()


# Erdős–Rényi model G(n,p)
g = nk.generators.ErdosRenyiGenerator(num_nodes, 0.1, directed=False).generate()


# Havel-Hakimi model
g = nk.generators.HavelHakimiGenerator(degree_sequence).generate()


# Hyperbolic generator
average_degree = 6
power_law_exponent = 3
temperature = 0
g = nk.generators.HyperbolicGenerator(
    num_nodes, k=average_degree, gamma=power_law_exponent, T=temperature).generate()
# graph_event_generator = nk.generators.DynamicHyperbolicGenerator(
#     num_nodes, average_degree, gamma=power_law_exponent, T=temperature)


# LFR clustered graph generator
generator = nk.generators.LFRGenerator(num_nodes)
generator.setDegreeSequence(degree_sequence)
generator.setCommunitySizeSequence(degree_sequence)
generator.setMu(1.2)
generator.run()
gg = generator.getGraph()


# Mocnik model (random spatial graphs)
g = nk.generators.MocnikGeneratorBasic(dim=3, n=num_nodes, k=2.0).generate()
g = nk.generators.MocnikGenerator(dim=3, n=num_nodes, k=2.0, weighted=True).generate()


# resembles an assumed geometric distribution of nodes in a P2P network
# graph_event_generator = nk.generators.DynamicPubWebGenerator(num_nodes)
g = nk.generators.PubWebGenerator(
    num_nodes, numberOfDenseAreas=2, neighborhoodRadius=12.5, maxNumberOfNeighbors=8).generate()

# regular ring lattice
g = nk.generators.RegularRingLatticeGenerator(num_nodes, nNeighbors=2).generate()

# R-MAT (recursive matrix) graphs
g = nk.generators.RmatGenerator(scale=5, edgeFactor=2.2, a=0.3, b=0.3, c=0.2, d=0.2).generate()

# Watts-Strogatz model (regular ring lattice, then edges are rewired)
rewiring_probability = 0.1
g = nk.generators.WattsStrogatzGenerator(nNodes=num_nodes, nNeighbors=2, p=rewiring_probability).generate()


gv.d3(g)
[6]:

3) Graph loading from an internal collection

[7]:
# TODO

4) Graph import and export

Import

[8]:
filepath = os.path.join('data', 'networkit_graph.gml')
g = nk.graphio.readGraph(filepath, nk.Format.GML)

Export

[9]:
filepath = os.path.join('data', 'networkit_graph.gml')
nk.graphio.writeGraph(g, filepath, nk.Format.GML)
WARNING:root:overriding given file

5) Graph modification that results in a new graph

Basic graph inspection

[10]:
g = nk.generators.BarabasiAlbertGenerator(k=2, nMax=10).generate()

gv.d3(g, graph_height=200)
[10]:

1) Graph and its properties

[11]:
print('Class:', type(g))
print()
print('Directed:', g.isDirected())
print('Number of nodes:', g.numberOfNodes())
print('Number of edges:', g.numberOfEdges())
print('Number of self-loops:', g.numberOfSelfLoops())
Class: <class 'networkit.graph.Graph'>

Directed: False
Number of nodes: 10
Number of edges: 18
Number of self-loops: 0
[12]:
nk.overview(g)
Network Properties:
nodes, edges                    10, 18
directed?                       False
weighted?                       False
isolated nodes                  0
self-loops                      0
density                         0.400000
clustering coefficient          0.378095
min/max/avg degree              2, 8, 3.600000
degree assortativity            -0.243463
number of connected components  1
size of largest component       10 (100.00 %)

2) Nodes and their properties

[13]:
def func_called_for_each_node(node):
    print(node, end='  ')

g.forNodes(func_called_for_each_node)
0  1  2  3  4  5  6  7  8  9

3) Edges and their properties

[14]:
def func_called_for_each_edge(source, target, edge_weight, edge_id):
    my_edge_id = '({}, {})'.format(source, target)
    print(my_edge_id, end='  ')

g.forEdges(func_called_for_each_edge)
(1, 0)  (1, 0)  (2, 0)  (2, 1)  (3, 1)  (3, 2)  (4, 1)  (4, 3)  (5, 4)  (5, 2)  (6, 5)  (6, 0)  (7, 5)  (7, 1)  (8, 1)  (8, 4)  (9, 1)  (9, 2)

Calculating graph measures and metrics

Centrality

[15]:
centrality_values = nk.centrality.ApproxBetweenness(g).run().scores()
centrality_values = nk.centrality.ApproxCloseness(g, nSamples=10).run().scores()
centrality_values = nk.centrality.Betweenness(g).run().scores()
centrality_values = nk.centrality.Closeness(g, True, nk.centrality.ClosenessVariant.Standard).run().scores()
centrality_values = nk.centrality.Closeness(g, True, nk.centrality.ClosenessVariant.Generalized).run().scores()
centrality_values = nk.centrality.DegreeCentrality(g).run().scores()
centrality_values = nk.centrality.DynApproxBetweenness(g).run().scores()
centrality_values = nk.centrality.DynBetweenness(g).run().scores()
centrality_values = nk.centrality.EigenvectorCentrality(g).run().scores()
centrality_values = nk.centrality.EstimateBetweenness(g, nSamples=10).run().scores()
centrality_values = nk.centrality.HarmonicCloseness(g).run().scores()
centrality_values = nk.centrality.KPathCentrality(g).run().scores()
# centrality_values = nk.centrality.KadabraBetweenness(g).run().scores()  # slow
centrality_values = nk.centrality.KatzCentrality(g).run().scores()
centrality_values = nk.centrality.LaplacianCentrality(g).run().scores()
centrality_values = nk.centrality.LocalClusteringCoefficient(g).run().scores()
centrality_values = nk.centrality.PageRank(g).run().scores()
# centrality_values = nk.centrality.SciPyEVZ(g).run().scores()  # scipy dependency
# centrality_values = nk.centrality.SciPyPageRank(g).run().scores()  # scipy dependency
centrality_values = nk.centrality.Sfigality(g).run().scores()
centrality_values = nk.centrality.SpanningEdgeCentrality(g).run().scores()
[16]:
top_centrality_values = nk.centrality.TopCloseness(g, k=5).run().topkScoresList()
top_centrality_values = nk.centrality.TopHarmonicCloseness(g, k=5).run().topkScoresList()
[17]:
group_centrality_values = nk.centrality.ApproxGroupBetweenness(g, groupSize=4, epsilon=0.1).run().groupMaxBetweenness()

group = nk.centrality.GroupCloseness(g).run().groupMaxCloseness()
group_score = nk.centrality.GroupCloseness(g).run().scoreOfGroup(group)

group = nk.centrality.GroupDegree(g).run().groupMaxDegree()
group_score = nk.centrality.GroupDegree(g).run().scoreOfGroup(group)
[18]:
partition = nk.community.detectCommunities(g, inspect=False)
partition_centrality_values = nk.centrality.LocalPartitionCoverage(g, partition).run().scores()
node_centrality_value = nk.centrality.PermanenceCentrality(g, partition).run().getPermanence(0)
Communities detected in 0.00013 [s]
[19]:
node = nk.centrality.DynBetweennessOneNode(g, node=0).run()
# node = nk.centrality.DynKatzCentrality(g, 0).run()  # slow
node = nk.centrality.DynTopHarmonicCloseness(g, 0).run()
[20]:
matrix = nk.centrality.PageRankMatrix(g)
# eigenvector = nk.centrality.adjacencyEigenvector(g, False)  # scipy dependency
# eigenvectors = nk.centrality.symmetricEigenvectors(matrix)  # scipy dependency
[21]:
ranking1 = nk.centrality.ranking(g, algorithm=nk.centrality.Betweenness, normalized=False)
ranking2 = nk.centrality.ranking(g, algorithm=nk.centrality.ApproxBetweenness, normalized=False)
ranks = nk.centrality.rankPerNode(ranking1)
rank_erros = nk.centrality.relativeRankErrors(ranking1, ranking2)

scores = nk.centrality.scores(g, algorithm=nk.centrality.Betweenness)

Groups of nodes

Cliques

[22]:
# TODO

Cores

[23]:
# TODO

Components

[24]:
# TODO

Communities

[25]:
cm = nk.community.detectCommunities(g, inspect=False)
communities = cm.getVector()
modularity_value = nk.community.Modularity().getQuality(cm, g)
Communities detected in 0.00155 [s]
[26]:
# TODO

Paths and distances

[27]:
# TODO

Global properties

[28]:
g.totalEdgeWeight()

# TODO
[28]:
18.0

Graph visualization

[29]:
# TODO