Source code for nngt.analysis.graph_analysis

#!/usr/bin/env python
#-*- coding:utf-8 -*-
#
# This file is part of the NNGT project to generate and analyze
# neuronal networks and their activity.
# Copyright (C) 2015-2017  Tanguy Fardet
# 
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

""" Tools for graph analysis using the graph libraries """

import numpy as np
import scipy.sparse.linalg as spl

import nngt
from nngt.lib import InvalidArgument, nonstring_container, is_integer
from .activity_analysis import get_b2, get_firing_rate
from .bayesian_blocks import bayesian_blocks


__all__ = [
    "adjacency_matrix",
    "assortativity",
    "betweenness_distrib",
    "binning",
	"closeness",
	"clustering",
    "degree_distrib",
	"diameter",
    "local_clustering",
    "node_attributes",
	"num_iedges",
	"num_scc",
	"num_wcc",
	"reciprocity",
	"spectral_radius",
    "subgraph_centrality",
    "transitivity"
]


# ------------- #
# Distributions #
# ------------- #

[docs]def degree_distrib(graph, deg_type="total", node_list=None, use_weights=False, log=False, num_bins='bayes'): ''' Degree distribution of a graph. .. versionchanged:: 0.7 Inclusion of automatic binning. Parameters ---------- graph : :class:`~nngt.Graph` or subclass the graph to analyze. deg_type : string, optional (default: "total") type of degree to consider ("in", "out", or "total"). node_list : list or numpy.array of ints, optional (default: None) Restrict the distribution to a set of nodes (default: all nodes). use_weights : bool, optional (default: False) use weighted degrees (do not take the sign into account: all weights are positive). log : bool, optional (default: False) use log-spaced bins. num_bins : int, list or str, optional (default: 'bayes') Any of the automatic methodes from :func:`numpy.histogram`, or 'bayes' will provide automatic bin optimization. Otherwise, an int for the number of bins can be provided, or the direct bins list. See also -------- :func:`numpy.histogram`, :func:`~nngt.analysis.binning` Returns ------- counts : :class:`numpy.array` number of nodes in each bin deg : :class:`numpy.array` bins ''' degrees = graph.get_degrees(deg_type, node_list, use_weights) if num_bins == 'bayes' or is_integer(num_bins): num_bins = binning(degrees, bins=num_bins, log=log) return np.histogram(degrees, num_bins)
[docs]def betweenness_distrib(graph, use_weights=True, nodes=None, num_nbins='bayes', num_ebins='bayes', log=False): ''' Betweenness distribution of a graph. .. versionchanged:: 0.7 Inclusion of automatic binning. Parameters ---------- graph : :class:`~nngt.Graph` or subclass the graph to analyze. use_weights : bool, optional (default: True) use weighted degrees (do not take the sign into account : all weights are positive). nodes : list or numpy.array of ints, optional (default: all nodes) Restrict the distribution to a set of nodes (only impacts the node attribute). log : bool, optional (default: False) use log-spaced bins. num_bins : int, list or str, optional (default: 'bayes') Any of the automatic methodes from :func:`numpy.histogram`, or 'bayes' will provide automatic bin optimization. Otherwise, an int for the number of bins can be provided, or the direct bins list. Returns ------- ncounts : :class:`numpy.array` number of nodes in each bin nbetw : :class:`numpy.array` bins for node betweenness ecounts : :class:`numpy.array` number of edges in each bin ebetw : :class:`numpy.array` bins for edge betweenness ''' ia_nbetw, ia_ebetw = graph.get_betweenness( btype="both", use_weights=use_weights) if nodes is not None: ia_nbetw = ia_nbetw[nodes] ra_nbins, ra_ebins = None, None if num_ebins == 'bayes' or log: ra_ebins = binning(ia_ebetw, bins=num_ebins, log=log) else: ra_ebins = num_ebins if num_nbins == 'bayes' or log: ra_nbins = binning(ia_nbetw, bins=num_nbins, log=log) else: ra_nbins = num_nbins ra_ebins = binning(ia_ebetw, bins=num_ebins, log=log) ncounts, nbetw = np.histogram(ia_nbetw, ra_nbins) ecounts, ebetw = np.histogram(ia_ebetw, ra_ebins) return ncounts, nbetw, ecounts, ebetw
# --------------- # # Node properties # # --------------- #
[docs]def closeness(graph, nodes=None, use_weights=False): ''' Return the closeness centrality for each node in `nodes`. Parameters ---------- graph : :class:`~nngt.Graph` object Graph to analyze. nodes : array-like container with node ids, optional (default = all nodes) Nodes for which the local clustering coefficient should be computed. use_weights : bool, optional (default: False) Whether weighted closeness should be used. ''' return nngt.analyze_graph["closeness"](graph, nodes, use_weights)
[docs]def local_clustering(graph, nodes=None): ''' Local clustering coefficient of the nodes. Defined as .. math:: c_i = 3 \\times \\frac{\\text{triangles}}{\\text{connected triples}} Parameters ---------- graph : :class:`~nngt.Graph` object Graph to analyze. nodes : array-like container with node ids, optional (default = all nodes) Nodes for which the local clustering coefficient should be computed. ''' if nngt._config["backend"] == "igraph": return np.array(graph.transitivity_local_undirected(nodes)) elif nngt._config["backend"] == "networkx": raise NotImplementedError("Will soon be available for NX.") return nngt.analyze_graph["local_clustering"](graph, nodes)
# ---------------- # # Graph properties # # ---------------- #
[docs]def assortativity(graph, deg_type="in"): ''' Assortativity of the graph. Parameters ---------- graph : :class:`~nngt.Graph` or subclass Network to analyze. deg_type : string, optional (default: 'in') Type of degree to take into account (among 'in', 'out' or 'total'). Returns ------- a float quantifying the graph assortativity. ''' if nngt._config["backend"] == "igraph": deg_list = graph.get_degrees(deg_type=deg_type) return graph.assortativity(deg_list, directed=graph.is_directed()) #~ return graph.assortativity(deg_type, directed=graph.is_directed()) elif nngt._config["backend"] == "graph-tool": return nngt.analyze_graph["assortativity"](graph, deg_type)[0] else: if deg_type == 'total': raise InvalidArgument("Cannot use total degree assortativity with " "`networkx`.") return nngt.analyze_graph["assortativity"](graph, x=deg_type, y=deg_type)
[docs]def reciprocity(graph): ''' Graph reciprocity, defined as :math:`E^\leftrightarrow/E`, where :math:`E^\leftrightarrow` and :math:`E` are, respectively, the number of bidirectional edges and the total number of edges in the graph. Returns ------- a float quantifying the reciprocity. ''' if nngt._config["backend"] == "igraph": return graph.reciprocity() else: return nngt.analyze_graph["reciprocity"](graph)
[docs]def clustering(graph): ''' Global clustering coefficient of the graph. Defined as: .. math:: c = 3 \\times \\frac{\\text{triangles}}{\\text{connected triples}} ''' if nngt._config["backend"] == "igraph": return graph.transitivity_undirected() else: return nngt.analyze_graph["clustering"](graph)
[docs]def transitivity(graph): ''' Same as :func:`nngt.analysis.clustering` (for networkx users) ''' return clustering(graph)
[docs]def num_iedges(graph): ''' Returns the number of inhibitory connections. ''' num_einhib = len(graph["type"].a < 0) return float(num_einhib)/graph.edge_nb()
[docs]def num_scc(graph, listing=False): ''' Returns the number of strongly connected components (SCCs). SCC are ensembles where all contained nodes can reach any other node in the ensemble using the directed edges. See also -------- num_wcc ''' lst_histo = None if nngt._config["backend"] == "graph-tool": vprop_comp, lst_histo = nngt.analyze_graph["scc"](graph, directed=True) elif nngt._config["backend"] == "igraph": lst_histo = graph.clusters() lst_histo = [cluster for cluster in lst_histo] else: lst_histo = [comp for comp in nngt.analyze_graph["scc"](graph)] if listing: return len(lst_histo), lst_histo else: return len(lst_histo)
[docs]def num_wcc(graph, listing=False): ''' Connected components if the directivity of the edges is ignored. (i.e. all edges are considered bidirectional). See also -------- num_scc ''' lst_histo = None if nngt._config["backend"] == "graph-tool": _, lst_histo = nngt.analyze_graph["wcc"](graph, directed=False) elif nngt._config["backend"] == "igraph": lst_histo = graph.clusters("WEAK") lst_histo = [cluster for cluster in lst_histo] else: lst_histo = [comp for comp in nngt.analyze_graph["wcc"](graph)] if listing: return len(lst_histo), lst_histo else: return len(lst_histo)
[docs]def diameter(graph): ''' Pseudo-diameter of the graph @todo: weighted diameter ''' if nngt._config["backend"] == "igraph": return graph.diameter() elif nngt._config["backend"] == "networkx": return nngt.analyze_graph["diameter"](graph) else: return nngt.analyze_graph["diameter"](graph)[0]
# ------------------- # # Spectral properties # # ------------------- #
[docs]def spectral_radius(graph, typed=True, weighted=True): ''' Spectral radius of the graph, defined as the eigenvalue of greatest module. Parameters ---------- graph : :class:`~nngt.Graph` or subclass Network to analyze. typed : bool, optional (default: True) Whether the excitatory/inhibitory type of the connnections should be considered. weighted : bool, optional (default: True) Whether the weights should be taken into account. Returns ------- the spectral radius as a float. ''' weights = None if typed and "type" in graph.eproperties.keys(): weights = graph.eproperties["type"].copy() if weighted and "weight" in graph.eproperties.keys(): if weights is not None: weights = np.multiply(weights, graph.eproperties["weight"]) else: weights = graph.eproperties["weight"].copy() mat_adj = nngt.analyze_graph["adjacency"](graph,weights) eigenval = [0] try: eigenval = spl.eigs(mat_adj,return_eigenvectors=False) except spl.eigen.arpack.ArpackNoConvergence as err: eigenval = err.eigenvalues if len(eigenval): return np.amax(np.absolute(eigenval)) else: raise spl.eigen.arpack.ArpackNoConvergence()
[docs]def adjacency_matrix(graph, types=True, weights=True): ''' Adjacency matrix of the graph. Parameters ---------- graph : :class:`~nngt.Graph` or subclass Network to analyze. types : bool, optional (default: True) Whether the excitatory/inhibitory type of the connnections should be considered (only if the weighing factor is the synaptic strength). weights : bool or string, optional (default: True) Whether weights should be taken into account; if True, then connections are weighed by their synaptic strength, if False, then a binary matrix is returned, if `weights` is a string, then the ponderation is the correponding value of the edge attribute (e.g. "distance" will return an adjacency matrix where each connection is multiplied by its length). Returns ------- a :class:`~scipy.sparse.csr_matrix`. ''' return graph.adjacency_matrix(types=types, weights=weights)
[docs]def subgraph_centrality(graph, weights=True, normalize="max_centrality"): ''' Subgraph centrality, accordign to [Estrada2005], for each node in the graph. **[Estrada2005]:** Ernesto Estrada and Juan A. Rodríguez-Velázquez, Subgraph centrality in complex networks, PHYSICAL REVIEW E 71, 056103 (2005), `available on ArXiv <http://www.arxiv.org/pdf/cond-mat/0504730>`_. Parameters ---------- graph : :class:`~nngt.Graph` or subclass Network to analyze. weights : bool or string, optional (default: True) Whether weights should be taken into account; if True, then connections are weighed by their synaptic strength, if False, then a binary matrix is returned, if `weights` is a string, then the ponderation is the correponding value of the edge attribute (e.g. "distance" will return an adjacency matrix where each connection is multiplied by its length). normalize : str, optional (default: "max_centrality") Whether the centrality should be normalized. Accepted normalizations are "max_eigenvalue" and "max_centrality"; the first rescales the adjacency matrix by the its largest eigenvalue before taking the exponential, the second sets the maximum centrality to one. Returns ------- centralities : :class:`numpy.ndarray` The subgraph centrality of each node. ''' adj_mat = graph.adjacency_matrix(types=False, weights=weights).tocsc() centralities = None if normalize == "max_centrality": centralities = spl.expm(adj_mat / adj_mat.max()).diagonal() centralities /= centralities.max() elif normalize == "max_eigenvalue": norm, _ = spl.eigs(adj_mat, k=1) centralities = spl.expm(adj_mat / norm).diagonal() else: raise InvalidArgument('`normalize` should be either False, "eigenmax",' ' or "centralmax".') return centralities
# ----------------------- # # Get all node properties # # ----------------------- #
[docs]def node_attributes(network, attributes, nodes=None, data=None): ''' Return node `attributes` for a set of `nodes`. Parameters ---------- network : :class:`~nngt.Graph` The graph where the `nodes` belong. attributes : str or list Attributes which should be returned, among: * "betweenness" * "clustering" * "closeness" * "in-degree", "out-degree", "total-degree" * "subgraph_centrality" nodes : list, optional (default: all nodes) Nodes for which the attributes should be returned. data : :class:`numpy.array` of shape (N, 2), optional (default: None) Potential data on the spike events; if not None, it must contain the sender ids on the first column and the spike times on the second. Returns ------- values : array-like or dict Returns the attributes, either as an array if only one attribute is required (`attributes` is a :obj:`str`) or as a :obj:`dict` of arrays. ''' if nonstring_container(attributes): values = {} for attr in attributes: values[attr] = _get_attribute(network, attr, nodes, data) return values else: return _get_attribute(network, attributes, nodes, data)
def find_nodes(network, attributes, equal=None, upper_bound=None, lower_bound=None, upper_fraction=None, lower_fraction=None, data=None): ''' Return the nodes in the graph which fulfill the given conditions. Parameters ---------- network : :class:`~nngt.Graph` The graph where the `nodes` belong. attributes : str or list Properties on which the conditions apply, among: * "B2" (requires NEST or `data` entry) * "betweenness" * "clustering" * "firing_rate" (requires NEST or `data` entry) * "in-degree", "out-degree", "total-degree" * "subgraph_centrality" * any custom property formerly set by the user equal : optional (default: None) Value to which `attributes` should be equal. For a given property, this entry is cannot be used together with any of the others. upper_bound : optional (default: None) Value which should strictly major `attributes` in the desired nodes. Can be combined with all other entries, except `equal`. lower_bound : optional (default: None) Value which should minor or be equal to the value of `attributes` in the desired nodes. Can be combined with all other entries, except `equal`. upper_fraction : optional (default: None) Only the nodes that belong to the `upper_fraction` with the highest values for `attributes` are kept. lower_fraction : optional (default: None) Only the nodes that belong to the `lower_fraction` with the lowest values for `attributes` are kept. Notes ----- When combining both `*_fraction` and `*_bound` entries, their effects are cumulated, i.e. only the nodes belonging to the fraction AND displaying a value that is consistent with the boundary are kept. Examples -------- nodes = g.find("in-degree", upper_bound=15, lower_bound=10) nodes2 = g.find(["total-degree", "clustering"], equal=[20, None], lower=[None, 0.1]) ''' if not nonstring_container(attributes): attributes = [attributes] equal = [equal] upper_bound = [upper_bound] lower_bound = [lower_bound] upper_fraction = [upper_fraction] lower_fraction = [lower_fraction] assert not np.any([ len(attributes)-len(equal), len(upper_bound)-len(equal), len(lower_bound)-len(equal), len(upper_fraction)-len(equal), len(lower_fraction)-len(equal)]) nodes = set(range(self.node_nb())) # find the nodes di_attr = node_attributes(self, attributes) keep = np.ones(self.node_nb(), dtype=bool) for i in range(len(attributes)): attr, eq = attributes[i], equal[i] ub, lb = upper_bound[i], lower_bound[i] uf, lf = upper_fraction[i], lower_fraction[i] # check that the combination is valid if eq is not None: assert (ub is None)*(lb is None)*(uf is None)*(lf is None), \ "`equal` entry is incompatible with all other entries." keep *= (_get_attribute(self, attr) == eq) if ub is not None: keep *= (_get_attribute(self, attr) < ub) if lb is not None: keep *= (_get_attribute(self, attr) >= lb) values = None if uf is not None or lf is not None: values = _get_attribute(self, attr) if uf is not None: num_keep = int(self.node_nb()*uf) sort = np.argsort(values)[:-num_keep] keep_tmp = np.ones(self.node_nb(), dtype=bool) keep_tmp[sort] = 0 keep *= keep_tmp if lf is not None: num_keep = int(self.node_nb()*lf) sort = np.argsort(values)[:num_keep] keep_tmp = np.zeros(self.node_nb(), dtype=bool) keep_tmp[sort] = 1 keep *= keep_tmp nodes = nodes.intersection_update(np.array(nodes)[keep]) return nodes # ----- # # Tools # # ----- #
[docs]def binning(x, bins='bayes', log=False): """ Binning function providing automatic binning using Bayesian blocks in addition to standard linear and logarithmic uniform bins. .. versionadded:: 0.7 Parameters ---------- x : array-like Array of data to be histogrammed bins : int, list or 'auto', optional (default: 'bayes') If `bins` is 'bayes', in use bayesian blocks for dynamic bin widths; if it is an int, the interval will be separated into log : bool, optional (default: False) Whether the bins should be evenly spaced on a logarithmic scale. """ x = np.asarray(x) new_bins = None if bins == 'bayes': return bayesian_blocks(x) elif nonstring_container(bins) or bins == "auto": return bins elif is_integer(bins): if log: return np.logspace(np.log10(np.maximum(x.min(), 1e-10)), np.log10(x.max()), bins) else: return np.linspace(x.min(), x.max(), bins) else: raise ValueError("unrecognized bin code: '" + str(bins) + "'.")
def _get_attribute(network, attribute, nodes=None, data=None): ''' If data is not None, must be an np.array of shape (N, 2). ''' if attribute.lower() == "b2": return get_b2(network, nodes=nodes, data=data) elif attribute == "betweenness": betw = network.get_betweenness("node") if nodes is not None: return betw[nodes] return betw elif attribute == "closeness": return closeness(network, nodes=nodes) elif attribute == "clustering": return local_clustering(network, nodes=nodes) elif "degree" in attribute.lower(): dtype = attribute[:attribute.index("-")] if dtype.startswith("w"): return network.get_degrees( dtype[1:], node_list=nodes, use_weights=True, use_types=True) else: return network.get_degrees(dtype, node_list=nodes) elif attribute == "firing_rate": return get_firing_rate(network, nodes=nodes, data=data) elif attribute == "subgraph_centrality": sc = subgraph_centrality(network) if nodes is not None: return sc[nodes] return sc elif attribute in network.nodes_attributes: return network.get_node_attributes(nodes=nodes, name=attribute) else: raise RuntimeError( "Attribute '{}' is not available.".format(attribute))