# Source code for nngt.analysis.nx_functions

#-*- coding:utf-8 -*-
#
# analysis/nx_functions.py
#
# This file is part of the NNGT project, a graph-library for standardized and
# and reproducible graph analysis: generate and analyze networks with your
# favorite graph library (graph-tool/igraph/networkx) on any platform, without
# any change to your code.
# Copyright (C) 2015-2022 Tanguy Fardet
#
# This program is free software: you can redistribute it and/or modify
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

""" Tools to analyze graphs with the networkx backend """

import numpy as np
import scipy.sparse as ssp

from ..lib.test_functions import nonstring_container, is_integer
from ..lib.graph_helpers import _get_nx_weights, _get_nx_graph

import networkx as nx

[docs]def global_clustering_binary_undirected(g):
'''
Returns the undirected global clustering coefficient.

This corresponds to the ratio of undirected triangles to the number of

Parameters
----------
g : :class:~nngt.Graph
Graph to analyze.

References
----------
.. [nx-global-clustering] :nxdoc:algorithms.cluster.transitivity
'''
return nx.transitivity(g.graph.to_undirected(as_view=True))

[docs]def local_clustering_binary_undirected(g, nodes=None):
'''
Returns the undirected local clustering coefficient of some nodes.

If g is directed, then it is converted to a simple undirected graph
(no parallel edges).

Parameters
----------
g : :class:~nngt.Graph
Graph to analyze.
nodes : list, optional (default: all nodes)
The list of nodes for which the clustering will be returned

Returns
-------
lc : :class:numpy.ndarray
The list of clustering coefficients, on per node.

References
----------
.. [nx-local-clustering] :nxdoc:algorithms.cluster.clustering
'''
num_nodes = g.node_nb()

if nonstring_container(nodes):
num_nodes = len(nodes)
elif nodes is not None:
num_nodes = 1

lc = nx.clustering(g.graph.to_undirected(as_view=True), nodes=nodes,
weight=None)

if num_nodes == 1:
return lc

if nodes is None:
nodes = list(range(num_nodes))

return np.array([lc[n] for n in nodes], dtype=float)

[docs]def assortativity(g, degree, weights=None):
'''
Returns the assortativity of the graph.
This tells whether nodes are preferentially connected together depending
on their degree.

Parameters
----------
g : :class:~nngt.Graph
Graph to analyze.
degree : str
The type of degree that should be considered.
weights : bool or str, optional (default: binary edges)
Whether edge weights should be considered; if None or False
then use binary edges; if True, uses the 'weight' edge attribute,
otherwise uses any valid edge attribute required.

References
----------
.. [nx-assortativity]
:nxdoc:algorithms.assortativity.degree_assortativity_coefficient
'''
w = _get_nx_weights(g, weights)

return nx.degree_pearson_correlation_coefficient(
g.graph, x=degree, y=degree, weight=w)

[docs]def reciprocity(g):
'''
Calculate the edge reciprocity of the graph.

The reciprocity is defined as the number of edges that have a reciprocal
edge (an edge between the same nodes but in the opposite direction)
divided by the total number of edges.
This is also the probability for any given edge, that its reciprocal edge
exists.
By definition, the reciprocity of undirected graphs is 1.

@todo: check whether we can get this for single nodes for all libraries.

Parameters
----------
g : :class:~nngt.Graph
Graph to analyze.

References
----------
.. [nx-reciprocity] :nxdoc:algorithms.reciprocity.overall_reciprocity
'''
if not g.is_directed():
return 1.

return nx.overall_reciprocity(g.graph)

[docs]def closeness(g, weights=None, nodes=None, mode="out", harmonic=True,
default=np.NaN):
r'''
Returns the closeness centrality of some nodes.

Closeness centrality of a node u is defined, for the harmonic version,
as the sum of the reciprocal of the shortest path distance :math:d_{uv}
from u to the N - 1 other nodes in the graph (if mode is "out",
reciprocally :math:d_{vu}, the distance to u from another node v,
if mode is "in"):

.. math::

C(u) = \frac{1}{N - 1} \sum_{v \neq u} \frac{1}{d_{uv}},

or, using the arithmetic definition, as the reciprocal of the
average shortest path distance to/from u over to all other nodes:

.. math::

C(u) = \frac{n - 1}{\sum_{v \neq u} d_{uv}},

where d_{uv} is the shortest-path distance from u to v,
and n is the number of nodes in the component.

By definition, the distance is infinite when nodes are not connected by
a path in the harmonic case (such that :math:\frac{1}{d(v, u)} = 0),
while the distance itself is taken as zero for unconnected nodes in the
first equation.

Parameters
----------
g : :class:~nngt.Graph
Graph to analyze.
weights : bool or str, optional (default: binary edges)
Whether edge weights should be considered; if None or False
then use binary edges; if True, uses the 'weight' edge attribute,
otherwise uses any valid edge attribute required.
nodes : list, optional (default: all nodes)
The list of nodes for which the clutering will be returned
mode : str, optional (default: "out")
For directed graphs, whether the distances are computed from ("out") or
to ("in") each of the nodes.
harmonic : bool, optional (default: True)
Whether the arithmetic or the harmonic (recommended) version
of the closeness should be used.

Returns
-------
c : :class:numpy.ndarray
The list of closeness centralities, on per node.

References
----------
.. [nx-harmonic] :nxdoc:algorithms.centrality.harmonic_centrality
.. [nx-closeness] :nxdoc:algorithms.centrality.closeness_centrality
'''
w = _get_nx_weights(g, weights)

graph = g.graph

if graph.is_directed() and mode == "out":
graph = g.graph.reverse(copy=False)

c = None

if harmonic:
c = nx.harmonic_centrality(graph, distance=w)
else:
c = nx.closeness_centrality(graph, distance=w, wf_improved=False)

c = np.array([v for _, v in c.items()])

# normalize
if harmonic:
c *= 1 / (len(graph) - 1)
elif default != 0:
c[c == 0.] = default

if nodes is None:
return c

return c[nodes]

[docs]def betweenness(g, btype="both", weights=None):
'''
Returns the normalized betweenness centrality of the nodes and edges.

Parameters
----------
g : :class:~nngt.Graph
Graph to analyze.
btype : str, optional (default 'both')
The centrality that should be returned (either 'node', 'edge', or
'both'). By default, both betweenness centralities are computed.
weights : bool or str, optional (default: binary edges)
Whether edge weights should be considered; if None or False
then use binary edges; if True, uses the 'weight' edge attribute,
otherwise uses any valid edge attribute required.

Returns
-------
nb : :class:numpy.ndarray
The nodes' betweenness if btype is 'node' or 'both'
eb : :class:numpy.ndarray
The edges' betweenness if btype is 'edge' or 'both'

References
----------
.. [nx-ebetw] :nxdoc:algorithms.centrality.edge_betweenness_centrality
.. [nx-nbetw] :nxdoc:networkx.algorithms.centrality.betweenness_centrality
'''
w = _get_nx_weights(g, weights)

nb, eb = None, None

if btype in ("both", "node"):
di_nb = nx.betweenness_centrality(g.graph, weight=w)
nb    = np.array([di_nb[i] for i in g.get_nodes()])

if btype in ("both", "edge"):
di_eb = nx.edge_betweenness_centrality(g.graph, weight=w)
eb    = np.array([di_eb[tuple(e)] for e in g.edges_array])

if btype == "node":
return nb
elif btype == "edge":
return eb

return nb, eb

[docs]def connected_components(g, ctype=None):
'''
Returns the connected component to which each node belongs.

Parameters
----------
g : :class:~nngt.Graph
Graph to analyze.
ctype : str, optional (default 'scc')
Type of component that will be searched: either strongly connected
('scc', by default) or weakly connected ('wcc').

Returns
-------
cc, hist : :class:numpy.ndarray
The component associated to each node (cc) and the number of nodes in
each of the component (hist).

References
----------
.. [nx-ucc] :nxdoc:algorithms.components.connected_components
.. [nx-scc] :nxdoc:algorithms.components.strongly_connected_components
.. [nx-wcc] :nxdoc:algorithms.components.weakly_connected_components
'''
if ctype is None:
ctype = "scc" if g.is_directed() else "wcc"

res   = None

if not g.is_directed():
res = nx.connected_components(g.graph)
elif ctype == "scc":
res = nx.strongly_connected_components(g.graph)
elif ctype == "wcc":
res = nx.weakly_connected_components(g.graph)
else:
raise ValueError("Invalid ctype, only 'scc' and 'wcc' are allowed.")

cc   = np.zeros(g.node_nb(), dtype=int)
hist = []

for i, nodes in enumerate(res):
cc[list(nodes)] = i

hist.append(len(nodes))

return cc, np.array(hist, dtype=int)

[docs]def shortest_path(g, source, target, directed=None, weights=None,
combine_weights="mean"):
'''
Returns a shortest path between sourceand target.
The algorithms returns an empty list if there is no path between the nodes.

Parameters
----------
g : :class:~nngt.Graph
Graph to analyze.
source : int
Node from which the path starts.
target : int
Node where the path ends.
directed : bool, optional (default: g.is_directed())
Whether the edges should be considered as directed or not
(automatically set to False if g is undirected).
weights : str or array, optional (default: binary)
Whether to use weighted edges to compute the distances. By default,
all edges are considered to have distance 1.
combine_weights : str, optional (default: 'mean')
How to combine the weights of reciprocal edges if the graph is directed
but directed is set to False. It can be:

* "sum": the sum of the edge attribute values will be used for the new
edge.
* "mean": the mean of the edge attribute values will be used for the
new edge.
* "min": the minimum of the edge attribute values will be used for the
new edge.
* "max": the maximum of the edge attribute values will be used for the
new edge.

Returns
-------
path : list of ints
Order of the nodes making up the path from source to target.

References
----------
.. [nx-sp] :nxdoc:algorithms.shortest_paths.generic.shortest_path
'''
g, graph, w = _get_nx_graph(g, directed, weights, combine_weights,
return_all=True)

w = _get_nx_weights(g, w)

try:
return nx.shortest_path(graph, source, target, weight=w)
except nx.NetworkXNoPath:
return []

[docs]def all_shortest_paths(g, source, target, directed=None, weights=None,
combine_weights="mean"):
'''
Yields all shortest paths from source to target.
The algorithms returns an empty generator if there is no path between the
nodes.

Parameters
----------
g : :class:~nngt.Graph
Graph to analyze.
source : int
Node from which the paths starts.
target : int, optional (default: all nodes)
Node where the paths ends.
directed : bool, optional (default: g.is_directed())
Whether the edges should be considered as directed or not
(automatically set to False if g is undirected).
weights : str or array, optional (default: binary)
Whether to use weighted edges to compute the distances. By default,
all edges are considered to have distance 1.
combine_weights : str, optional (default: 'mean')
How to combine the weights of reciprocal edges if the graph is directed
but directed is set to False. It can be:

* "sum": the sum of the edge attribute values will be used for the new
edge.
* "mean": the mean of the edge attribute values will be used for the
new edge.
* "min": the minimum of the edge attribute values will be used for the
new edge.
* "max": the maximum of the edge attribute values will be used for the
new edge.

Returns
-------
all_paths : generator
Generator yielding paths as lists of ints.

References
----------
.. [nx-sp] :nxdoc:algorithms.shortest_paths.generic.all_shortest_paths
'''
g, graph, w = _get_nx_graph(g, directed, weights, combine_weights,
return_all=True)

w = _get_nx_weights(g, w)

try:
return nx.all_shortest_paths(graph, source, target, weight=w)
except nx.NetworkXNoPath:
return (_ for _ in [])

[docs]def shortest_distance(g, sources=None, targets=None, directed=None,
weights=None, combine_weights="mean"):
'''
Returns the length of the shortest paths between sourcesand targets.
The algorithms return infinity if there are no paths between nodes.

Parameters
----------
g : :class:~nngt.Graph
Graph to analyze.
sources : list of nodes, optional (default: all)
Nodes from which the paths must be computed.
targets : list of nodes, optional (default: all)
Nodes to which the paths must be computed.
directed : bool, optional (default: g.is_directed())
Whether the edges should be considered as directed or not
(automatically set to False if g is undirected).
weights : str or array, optional (default: binary)
Whether to use weighted edges to compute the distances. By default,
all edges are considered to have distance 1.
combine_weights : str, optional (default: 'mean')
How to combine the weights of reciprocal edges if the graph is directed
but directed is set to False. It can be:

* "sum": the sum of the edge attribute values will be used for the new
edge.
* "mean": the mean of the edge attribute values will be used for the
new edge.
* "min": the minimum of the edge attribute values will be used for the
new edge.
* "max": the maximum of the edge attribute values will be used for the
new edge.

Returns
-------
distance : float, or 1d/2d numpy array of floats
Distance (if single source and single target) or distance array.
For multiple sources and targets, the shape of the matrix is (S, T),
with S the number of sources and T the number of targets; for a single
source or target, return a 1d-array of length T or S.

References
----------
.. [nx-sp] :nxdoc:algorithms.shortest_paths.weighted.multi_source_dijkstra
'''
num_nodes = g.node_nb()

# check consistency for weights and directed
g, graph, w = _get_nx_graph(g, directed, weights, combine_weights,
return_all=True)

w = _get_nx_weights(g, w)

# check for single source/target case and convert sources and targets
if is_integer(sources):
if is_integer(targets):
try:
return nx.shortest_path_length(graph, sources, targets,
weight=w)
except Exception as e:
return np.inf

sources = [sources]
elif sources is None:
sources = range(num_nodes)

if is_integer(targets):
targets = [targets]

# compute distances
data, ii, jj = [], [], []

def _nx_sp(nx_graph, s, weight):
if weight is None:
return nx.single_source_shortest_path_length(nx_graph, s)

dist, _ = nx.multi_source_dijkstra(graph, [s], weight=weight)

return dist

for s in sources:
dist = _nx_sp(graph, s, w)

if targets is None:
data.extend(dist.values())
ii.extend((s for _ in range(len(dist))))
jj.extend(dist.keys())
else:
for t in targets:
if t in dist:
data.append(dist[t])
ii.append(s)
jj.append(t)

num_sources = num_nodes if sources is None else len(sources)
num_targets = num_nodes if targets is None else len(targets)

mat_dist = np.full((num_sources, num_targets), np.inf)
mat_dist[ii, jj] = data

if num_sources == 1:
return mat_dist[0]

if num_targets == 1:
return mat_dist.T[0]

return mat_dist

[docs]def average_path_length(g, sources=None, targets=None, directed=None,
weights=None, combine_weights="mean",
unconnected=False):
r'''
Returns the average shortest path length between sources and targets.
The algorithms raises an error if all nodes are not connected unless
unconnected is set to True.

The average path length is defined as

.. math::

L = \frac{1}{N_p} \sum_{u,v} d(u, v),

where :math:N_p is the number of paths between sources and targets,
and :math:d(u, v) is the shortest path distance from u to v.

If sources and targets are both None, then the total number of paths is
:math:N_p = N(N - 1), with :math:N the number of nodes in the graph.

Parameters
----------
g : :class:~nngt.Graph
Graph to analyze.
sources : list of nodes, optional (default: all)
Nodes from which the paths must be computed.
targets : list of nodes, optional (default: all)
Nodes to which the paths must be computed.
directed : bool, optional (default: g.is_directed())
Whether the edges should be considered as directed or not
(automatically set to False if g is undirected).
weights : str or array, optional (default: binary)
Whether to use weighted edges to compute the distances. By default,
all edges are considered to have distance 1.
combine_weights : str, optional (default: 'mean')
How to combine the weights of reciprocal edges if the graph is directed
but directed is set to False. It can be:

* "sum": the sum of the edge attribute values will be used for the new
edge.
* "mean": the mean of the edge attribute values will be used for the
new edge.
* "min": the minimum of the edge attribute values will be used for the
new edge.
* "max": the maximum of the edge attribute values will be used for the
new edge.
unconnected : bool, optional (default: False)
If set to true, ignores unconnected nodes and returns the average path
length of the existing paths.

References
----------
.. [nx-sp] :nxdoc:algorithms.shortest_paths.generic.average_shortest_path_length
'''
directed = g.is_directed() if directed is None else directed

if sources is None and targets is None and not unconnected:
g, graph, w = _get_nx_graph(g, directed, weights, combine_weights,
return_all=True)

w = _get_nx_weights(g, w)

return nx.average_shortest_path_length(graph, weight=w)

mat_dist = shortest_distance(g, sources=sources, targets=targets,
directed=directed, weights=weights)

if not unconnected and np.any(np.isinf(mat_dist)):
raise nx.NetworkXNoPath("sources and target do not belong to the "
"same connected component.")

# compute the number of path
num_paths = np.sum(mat_dist != 0)

# compute average path length
if unconnected:
num_paths -= np.sum(np.isinf(mat_dist))

return np.nansum(mat_dist) / num_paths

return np.sum(mat_dist) / num_paths

[docs]def diameter(g, directed=None, weights=None, combine_weights="mean",
is_connected=False):
'''
Returns the diameter of the graph.

.. versionchanged:: 2.3
Added combine_weights argument.

.. versionchanged:: 2.0
Added directed and is_connected arguments.

It returns infinity if the graph is not connected (strongly connected for
directed graphs) unless is_connected is True, in which case it returns
the longest existing shortest distance.

Parameters
----------
g : :class:~nngt.Graph
Graph to analyze.
directed : bool, optional (default: g.is_directed())
Whether to compute the directed diameter if the graph is directed.
If False, then the graph is treated as undirected. The option switches
to False automatically if g is undirected.
weights : bool or str, optional (default: binary edges)
Whether edge weights should be considered; if None or False
then use binary edges; if True, uses the 'weight' edge attribute,
otherwise uses any valid edge attribute required.
combine_weights : str, optional (default: 'mean')
How to combine the weights of reciprocal edges if the graph is directed
but directed is set to False. It can be:

* "sum": the sum of the edge attribute values will be used for the new
edge.
* "mean": the mean of the edge attribute values will be used for the
new edge.
* "min": the minimum of the edge attribute values will be used for the
new edge.
* "max": the maximum of the edge attribute values will be used for the
new edge.
is_connected : bool, optional (default: False)
If False, check whether the graph is connected or not and return
infinite diameter if graph is unconnected. If True, the graph is
assumed to be connected.

--------
:func:nngt.analysis.shortest_distance

References
----------
.. [nx-diameter] :nxdoc:algorithms.distance_measures.diameter
.. [nx-dijkstra] :nxdoc:algorithms.shortest_paths.weighted.all_pairs_dijkstra
'''
w = _get_nx_weights(g, weights)

# weighted or "connected" cases
if w is not None or is_connected:
dist = shortest_distance(g, directed=directed, weights=weights,
combine_weights=combine_weights)

if is_connected:
return np.max(dist[~np.isinf(dist)])

return np.max(dist)

# unweighted case
graph = _get_nx_graph(g, directed, w, combine_weights)

try:
return nx.diameter(graph)
except nx.exception.NetworkXError:
return np.inf

r'''
Returns the adjacency matrix :math:A of the graph.
With edge :math:i \leftarrow j corresponding to entry :math:A_{ij}.

Parameters
----------
g : :class:~nngt.Graph
Graph to analyze.
weights : bool or str, optional (default: binary edges)
Whether edge weights should be considered; if None or False
then returns the binary adjacency matrix; if True, returns the
weighted matrix, otherwise fills the matrix with any valid edge
attribute values.
mformat : str, optional (default: "csr")
Type of :mod:scipy.sparse matrix that will be returned, by
default :class:scipy.sparse.csr_matrix.

Returns
-------
The adjacency matrix as a :class:scipy.sparse.csr_matrix.

References
----------
.. [nx-adjacency] :nxdoc:.convert_matrix.to_scipy_sparse_matrix
'''
w = _get_nx_weights(g, weights)

return nx.to_scipy_sparse_matrix(g.graph, nodelist=range(g.node_nb()),
weight=w, format=mformat)

def get_edges(g):
'''
Returns the edges in the graph by order of creation.
'''
return g.edges_array