Source code for nngt.analysis.clustering

# -*- coding: utf-8 -*-
# SPDX-FileCopyrightText: 2015-2023 Tanguy Fardet
# SPDX-License-Identifier: GPL-3.0-or-later
# nngt/analysis/clustering.py

""" Tools for directed/weighted clsutering analysis """

import numpy as np

import nngt
from nngt.lib import nonstring_container
from nngt.lib.graph_helpers import _get_matrices


__all__ = [
    "global_clustering",
    "global_clustering_binary_undirected",
    "local_closure",
    "local_clustering",
    "local_clustering_binary_undirected",
    "triplet_count",
    "triangle_count",
]


def global_clustering_binary_undirected(g):
    '''
    Returns the undirected global clustering coefficient.

    This corresponds to the ratio of undirected triangles to the number of
    undirected triads.

    Parameters
    ----------
    g : :class:`~nngt.Graph`
        Graph to analyze.
    '''
    # Note, this function is overloaded by the library-specific version
    # if igraph, graph-tool, or networkx is used
    triangles = triangle_count(g, weights=None, directed=False)
    triplets  = triplet_count(g, weights=None, directed=False)

    return np.sum(triangles) / np.sum(triplets)


[docs]def global_clustering(g, directed=True, weights=None, method="continuous", mode="total", combine_weights="mean"): ''' Returns the global clustering coefficient. This corresponds to the ratio of triangles to the number of triplets. For directed and weighted cases, see definitions of generalized triangles and triplets in the associated functions below. Parameters ---------- g : :class:`~nngt.Graph` Graph to analyze. directed : bool, optional (default: True) Whether to compute the directed clustering if the graph is directed. weights : bool or str, optional (default: binary edges) Whether edge weights should be considered; if ``None`` or ``False`` then use binary edges; if ``True``, uses the 'weight' edge attribute, otherwise uses any valid edge attribute required. method : str, optional (default: 'continuous') Method used to compute the weighted clustering, either 'barrat' [Barrat2004]_, 'continuous' [Fardet2021]_, 'onnela' [Onnela2005]_, or 'zhang' [Zhang2005]_. mode : str, optional (default: "total") Type of clustering to use for directed graphs, among "total", "fan-in", "fan-out", "middleman", and "cycle" [Fagiolo2007]_. combine_weights : str, optional (default: 'mean') How to combine the weights of reciprocal edges if the graph is directed but `directed` is set to False. It can be: * "sum": the sum of the edge attribute values will be used for the new edge. * "mean": the mean of the edge attribute values will be used for the new edge. * "min": the minimum of the edge attribute values will be used for the new edge. * "max": the maximum of the edge attribute values will be used for the new edge. Reference --------- .. [gt-global-clustering] :gtdoc:`clustering.global_clustering` .. [ig-global-clustering] :igdoc:`transitivity_undirected` .. [nx-global-clustering] :nxdoc:`algorithms.cluster.transitivity` .. [Barrat2004] Barrat, Barthelemy, Pastor-Satorras, Vespignani. The Architecture of Complex Weighted Networks. PNAS 2004, 101 (11). :doi:`10.1073/pnas.0400087101`. .. [Onnela2005] Onnela, Saramäki, Kertész, Kaski. Intensity and Coherence of Motifs in Weighted Complex Networks. Phys. Rev. E 2005, 71 (6), 065103. :doi:`10.1103/physreve.71.065103`, arxiv:`cond-mat/0408629`. .. [Fagiolo2007] Fagiolo. Clustering in Complex Directed Networks. Phys. Rev. E 2007, 76 (2), 026107. :doi:`10.1103/PhysRevE.76.026107`, :arxiv:`physics/0612169`. .. [Zhang2005] Zhang, Horvath. A General Framework for Weighted Gene Co-Expression Network Analysis. Statistical Applications in Genetics and Molecular Biology 2005, 4 (1). :doi:`10.2202/1544-6115.1128`, `PDF <https://dibernardo.tigem.it/files/papers/2008/ zhangbin-statappsgeneticsmolbio.pdf>`_. .. [Fardet2021] Fardet, Levina. Weighted directed clustering: interpretations and requirements for heterogeneous, inferred, and measured networks. 2021. :arxiv:`2105.06318`. See also -------- :func:`~nngt.analysis.triplet_count` :func:`~nngt.analysis.triangle_count` ''' assert method in ("barrat", "continuous", "onnela", "zhang"), \ "Unknown method '{}'".format(method) # check directivity and weights directed *= g.is_directed() weighted = weights not in (False, None) if not directed and not weighted: return global_clustering_binary_undirected(g) elif not weighted: # directed clustering triangles = triangle_count(g, mode=mode) triplets = triplet_count(g, mode=mode) return np.sum(triangles) / np.sum(triplets) triangles, triplets = _triangles_and_triplets(g, directed, weights, method, mode, combine_weights, None) return np.sum(triangles) / np.sum(triplets)
[docs]def local_closure(g, directed=True, weights=None, method=None, mode="cycle-out", combine_weights="mean"): r''' Compute the local closure for each node, as defined in [Yin2019]_ as the fraction of 2-walks that are closed. For undirected binary or weighted adjacency matrices :math:`W = \{ w_{ij} \}`, the normal (or Zhang-like) definition is given by: .. math:: H_i^0 = \frac{\sum_{j\neq k} w_{ij} w_{jk} w_{ki}} {\sum_{j\neq k\neq i} w_{ij}w_{jk}} = \frac{W^3_{ii}}{\sum_{j \neq i} W^2_{ij}} While a continuous version of the local closure is also proposed as: .. math:: H_i = \frac{\sum_{j\neq k} \sqrt[3]{w_{ij} w_{jk} w_{ki}}^2} {\sum_{j\neq k\neq i} \sqrt{w_{ij}w_{jk}}} = \frac{\left( W^{\left[ \frac{2}{3} \right]} \right)_{ii}^3} {\sum_{j \neq i} \left( W^{\left[ \frac{1}{2} \right]} \right)^2_{ij}} with :math:`W^{[\alpha]} = \{ w^\alpha_{ij} \}`. Directed versions of the local closure where defined as follow for a node :math:`i` connected to nodes :math:`j` and :math:`k`: * "cycle-out" is given by the pattern [(i, j), (j, k), (k, i)], * "cycle-in" is given by the pattern [(k, j), (j, i), (i, k)], * "fan-in" is given by the pattern [(k, j), (j, i), (k, i)], * "fan-out" is given by the pattern [(i, j), (j, k), (i, k)]. See [Fardet2021]_ for more details. Parameters ---------- g : :class:`~nngt.Graph` Graph to analyze. directed : bool, optional (default: True) Whether to compute the directed clustering if the graph is directed. weights : bool or str, optional (default: binary edges) Whether edge weights should be considered; if ``None`` or ``False`` then use binary edges; if ``True``, uses the 'weight' edge attribute, otherwise uses any valid edge attribute required. method : str, optional (default: 'continuous') Method used to compute the weighted clustering, either 'normal'/'zhang' or 'continuous'. mode : str, optional (default: "circle-out") Type of clustering to use for directed graphs, among "circle-out", "circle-in", "fan-in", or "fan-out". combine_weights : str, optional (default: 'mean') How to combine the weights of reciprocal edges if the graph is directed but `directed` is set to False. It can be: * "sum": the sum of the edge attribute values will be used for the new edge. * "mean": the mean of the edge attribute values will be used for the new edge. * "min": the minimum of the edge attribute values will be used for the new edge. * "max": the maximum of the edge attribute values will be used for the new edge. References ---------- .. [Yin2019] Yin, Benson, and Leskovec. The Local Closure Coefficient: A New Perspective On Network Clustering. Proceedings of the Twelfth ACM International Conference on Web Search and Data Mining 2019, 303-311. :doi:`10.1145/3289600.3290991`, `PDF <https://www.cs.cornell.edu/~arb/ papers/closure-coefficients-WSDM-2019.pdf>`_. .. [Fardet2021] Fardet, Levina. Weighted directed clustering: interpretations and requirements for heterogeneous, inferred, and measured networks. 2021. :arxiv:`2105.06318`. ''' directed *= g.is_directed() weighted = weights not in (False, None) mat, numer, denom = None, None, None if not directed and g.is_directed(): _, mat = _get_matrices(g, directed, weights, weighted, combine_weights, normed=True) else: mat = g.adjacency_matrix(weights=weights).astype(float) mat /= mat.max() mat.setdiag(0) mat2, mat3 = None, None if directed: # set correct matrix if mode.endswith("-in"): mat = mat.T if method == "continuous" and weights is not None: sqmat = mat.sqrt() cbmat = mat.power(2/3) mat2 = sqmat@sqmat if mode in ("cycle-in", "cycle-out"): mat3 = cbmat@cbmat@cbmat elif mode in ("fan-in", "fan-out"): mat3 = cbmat@cbmat@cbmat.T else: raise ValueError("Unknown `mode`: '" + mode + "'.'") elif method in ("normal", "zhang", None): mat2 = mat@mat if mode in ("cycle-in", "cycle-out"): mat3 = mat2@mat elif mode in ("fan-in", "fan-out"): mat3 = mat2@mat.T else: raise ValueError("Unknown `mode`: '" + mode + "'.'") else: raise ValueError("Unknown `method`: '" + method + "'.'") else: # undirected if method == "continuous" and weights is not None: sqmat = mat.sqrt() cbmat = mat.power(2/3) mat2 = sqmat@sqmat mat3 = cbmat@cbmat@cbmat elif method in ("normal", "zhang", None): mat2 = mat@mat mat3 = mat2@mat else: raise ValueError("Unknown `method`: '" + method + "'.'") numer = mat3.diagonal() denom = _sum(mat2, axis=1) - mat2.diagonal() denom[denom == 0] = 1 return numer / denom
def local_clustering_binary_undirected(g, nodes=None): r''' Returns the undirected local clustering coefficient of some `nodes`. .. math:: C_i = \frac{A^3_{ii}}{d_i(d_i - 1)} = \frac{\Delta_i}{T_i} with :math:`A` the adjacency matrix, :math:`d_i` the degree of node :math:`i`, :math:`\Delta_i` is the number of triangles, and :math:`T_i` is the number of triplets to which :math:`i` belongs. If `g` is directed, then it is converted to a simple undirected graph (no parallel edges), both directed and reciprocal edges are merged into a single edge. Parameters ---------- g : :class:`~nngt.Graph` Graph to analyze. nodes : list, optional (default: all nodes) The list of nodes for which the clustering will be returned Returns ------- lc : :class:`numpy.ndarray` The list of clustering coefficients, on per node. References ---------- .. [gt-local-clustering] :gtdoc:`clustering.local_clustering` .. [ig-local-clustering] :igdoc:`transitivity_local_undirected` .. [nx-local-clustering] :nxdoc:`algorithms.cluster.clustering` ''' # Note, this function is overloaded by the library-specific version # if igraph, graph-tool, or networkx is used triangles = triangle_count(g, weights=None, nodes=nodes, directed=False) triplets = triplet_count(g, weights=None, nodes=nodes, directed=False) if nonstring_container(triangles): triplets[triangles == 0] = 1 elif triangles == 0: return 0 return triangles / triplets
[docs]def local_clustering(g, nodes=None, directed=True, weights=None, method="continuous", mode="total", combine_weights="mean"): r''' Local (weighted directed) clustering coefficient of the nodes, ignoring self-loops. If no weights are requested and the graph is undirected, returns the undirected binary clustering. For all weighted cases, the weights are assumed to be positive and they are normalized to dimensionless values between 0 and 1 through a division by the highest weight. The default `method` for weighted networks is the continuous definition [Fardet2021]_ and is defined as: .. math:: C_i = \frac{\sum_{jk} \sqrt[3]{w_{ij} w_{ik} w_{jk}}} {\sum_{j\neq k} \sqrt{w_{ij} w_{ik}}} = \frac{\left(W^{\left[\frac{2}{3}\right]}\right)^3_{ii}} {\left(s^{\left[\frac{1}{2}\right]}_i\right)^2 - s_i} for undirected networks, with :math:`W = \{ w_{ij}\} = \tilde{W} / \max(\tilde{W})` the normalized weight matrix, :math:`s_i` the normalized strength of node :math:`i`, and :math:`s^{[\frac{1}{2}]}_i = \sum_k \sqrt{w_{ik}}` the strength associated to the matrix :math:`W^{[\frac{1}{2}]} = \{\sqrt{w_{ij}}\}`. For directed networks, we used the total clustering defined in [Fagiolo2007]_ by default, hence the second equation becomes: .. math:: C_i = \frac{\frac{1}{2}\left(W^{\left[\frac{2}{3}\right]} + W^{\left[\frac{2}{3}\right],T}\right)^3_{ii}} {\left(s^{\left[\frac{1}{2}\right]}_i\right)^2 - 2s^{\leftrightarrow}_i - s_i} with :math:`s^{\leftrightarrow} = \sum_k \sqrt{w_{ik}w_{ki}}` the reciprocal strength (associated to reciprocal connections). For the other modes, see the generalized definitions in [Fagiolo2007]_. Contrary to 'barrat' and 'onnela' [Saramaki2007]_, this method displays *all* following properties: * fully continuous (no jump in clustering when weights go to zero), * equivalent to binary clustering when all weights are 1, * equivalence between no-edge and zero-weight edge cases, * normalized (always between zero and 1). Using either 'continuous' or 'zhang' is usually recommended for weighted graphs, see the discussion in [Fardet2021]_ for details. Parameters ---------- g : :class:`~nngt.Graph` object Graph to analyze. nodes : array-like container with node ids, optional (default = all nodes) Nodes for which the local clustering coefficient should be computed. directed : bool, optional (default: True) Whether to compute the directed clustering if the graph is directed. weights : bool or str, optional (default: binary edges) Whether edge weights should be considered; if ``None`` or ``False`` then use binary edges; if ``True``, uses the 'weight' edge attribute, otherwise uses any valid edge attribute required. method : str, optional (default: 'continuous') Method used to compute the weighted clustering, either 'barrat' [Barrat2004]_/[Clemente2018]_, 'continuous' [Fardet2021]_, 'onnela' [Onnela2005]_/[Fagiolo2007]_, or 'zhang' [Zhang2005]_. mode : str, optional (default: "total") Type of clustering to use for directed graphs, among "total", "fan-in", "fan-out", "middleman", and "cycle" [Fagiolo2007]_. combine_weights : str, optional (default: 'mean') How to combine the weights of reciprocal edges if the graph is directed but `directed` is set to False. It can be: * "min": the minimum of the edge attribute values will be used for the new edge. * "max": the maximum of the edge attribute values will be used for the new edge. * "mean": the mean of the edge attribute values will be used for the new edge. * "sum": equivalent to mean due to weight normalization. Returns ------- lc : :class:`numpy.ndarray` The list of clustering coefficients, on per node. References ---------- .. [Barrat2004] Barrat, Barthelemy, Pastor-Satorras, Vespignani. The Architecture of Complex Weighted Networks. PNAS 2004, 101 (11). :doi:`10.1073/pnas.0400087101`. .. [Clemente2018] Clemente, Grassi. Directed Clustering in Weighted Networks: A New Perspective. Chaos, Solitons & Fractals 2018, 107, 26–38. :doi:`10.1016/j.chaos.2017.12.007`, :arxiv:`1706.07322`. .. [Fagiolo2007] Fagiolo. Clustering in Complex Directed Networks. Phys. Rev. E 2007, 76, (2), 026107. :doi:`10.1103/PhysRevE.76.026107`, :arxiv:`physics/0612169`. .. [Onnela2005] Onnela, Saramäki, Kertész, Kaski. Intensity and Coherence of Motifs in Weighted Complex Networks. Phys. Rev. E 2005, 71 (6), 065103. :doi:`10.1103/physreve.71.065103`, :arxiv:`cond-mat/0408629`. .. [Saramaki2007] Saramäki, Kivelä, Onnela, Kaski, Kertész. Generalizations of the Clustering Coefficient to Weighted Complex Networks. Phys. Rev. E 2007, 75 (2), 027105. :doi:`10.1103/PhysRevE.75.027105`, :arxiv:`cond-mat/0608670`. .. [Zhang2005] Zhang, Horvath. A General Framework for Weighted Gene Co-Expression Network Analysis. Statistical Applications in Genetics and Molecular Biology 2005, 4 (1). :doi:`10.2202/1544-6115.1128`, `PDF <https://dibernardo.tigem.it/files/papers/2008/ zhangbin-statappsgeneticsmolbio.pdf>`_. .. [Fardet2021] Fardet, Levina. Weighted directed clustering: interpretations and requirements for heterogeneous, inferred, and measured networks. 2021. :arxiv:`2105.06318`. See also -------- :func:`undirected_binary_clustering` :func:`global_clustering` ''' # check directivity and weights directed *= g.is_directed() weighted = weights not in (None, False) triplets, triangles = None, None if not directed and not weighted: # undirected binary clustering uses the library method return local_clustering_binary_undirected(g, nodes=nodes) elif not weighted: # directed clustering triangles = triangle_count(g, nodes=nodes, mode=mode) triplets = triplet_count(g, nodes, mode=mode).astype(float) else: triangles, triplets = _triangles_and_triplets( g, directed, weights, method, mode, combine_weights, nodes) if nonstring_container(triplets): triplets[triangles == 0] = 1 elif triangles == 0: return 0 return triangles / triplets
[docs]def triangle_count(g, nodes=None, directed=True, weights=None, method="normal", mode="total", combine_weights="mean"): ''' Returns the number or the strength (also called intensity) of triangles for each node. Parameters ---------- g : :class:`~nngt.Graph` object Graph to analyze. nodes : array-like container with node ids, optional (default = all nodes) Nodes for which the local clustering coefficient should be computed. directed : bool, optional (default: True) Whether to compute the directed clustering if the graph is directed. weights : bool or str, optional (default: binary edges) Whether edge weights should be considered; if ``None`` or ``False`` then use binary edges; if ``True``, uses the 'weight' edge attribute, otherwise uses any valid edge attribute required. method : str, optional (default: 'normal') Method used to compute the weighted triangles, either 'normal', where the weights are directly used, or the definitions associated to the weighted clustering: 'barrat' [Barrat2004]_, 'continuous', 'onnela' [Onnela2005]_, or 'zhang' [Zhang2005]_. mode : str, optional (default: "total") Type of clustering to use for directed graphs, among "total", "fan-in", "fan-out", "middleman", and "cycle" [Fagiolo2007]_. combine_weights : str, optional (default: 'mean') How to combine the weights of reciprocal edges if the graph is directed but `directed` is set to False. It can be: * "sum": the sum of the edge attribute values will be used for the new edge. * "mean": the mean of the edge attribute values will be used for the new edge. * "min": the minimum of the edge attribute values will be used for the new edge. * "max": the maximum of the edge attribute values will be used for the new edge. Returns ------- tr : array Number or weight of triangles to which each node belongs. References ---------- .. [Barrat2004] Barrat, Barthelemy, Pastor-Satorras, Vespignani. The Architecture of Complex Weighted Networks. PNAS 2004, 101 (11). :doi:`10.1073/pnas.0400087101`. .. [Fagiolo2007] Fagiolo. Clustering in Complex Directed Networks. Phys. Rev. E 2007, 76, (2), 026107. :doi:`10.1103/PhysRevE.76.026107`, :arxiv:`physics/0612169`. .. [Onnela2005] Onnela, Saramäki, Kertész, Kaski. Intensity and Coherence of Motifs in Weighted Complex Networks. Phys. Rev. E 2005, 71 (6), 065103. :doi:`10.1103/physreve.71.065103`, :arxiv:`cond-mat/0408629`. .. [Zhang2005] Zhang, Horvath. A General Framework for Weighted Gene Co-Expression Network Analysis. Statistical Applications in Genetics and Molecular Biology 2005, 4 (1). :doi:`10.2202/1544-6115.1128`, `PDF <https://dibernardo.tigem.it/files/papers/2008/ zhangbin-statappsgeneticsmolbio.pdf>`_. ''' directed *= g.is_directed() weighted = weights not in (False, None) exponent = None if method == "onnela": exponent = 1/3 elif method == "continuous": exponent = 2/3 # get relevant matrices (use directed=False to get both dir/undir mat) mat, matsym = _get_matrices( g, directed, weights, weighted, combine_weights, exponent=exponent, normed=True) # if unweighted, adj is mat, adjsym is matsym adj, adjsym = mat, matsym # for barrat, we need both weighted and binary matrices if method == "barrat" and weighted: adj, adjsym = _get_matrices(g, directed, None, False, combine_weights) return _triangle_count(mat, matsym, adj, adjsym, method, mode, weighted, directed, nodes)
[docs]def triplet_count(g, nodes=None, directed=True, weights=None, method="normal", mode="total", combine_weights="mean"): r''' Returns the number or the strength (also called intensity) of triplets for each node. For binary networks, the triplets of node :math:`i` are defined as: .. math:: T_i = \sum_{j,k} a_{ij}a_{ik} Parameters ---------- g : :class:`~nngt.Graph` object Graph to analyze. nodes : array-like container with node ids, optional (default = all nodes) Nodes for which the local clustering coefficient should be computed. directed : bool, optional (default: True) Whether to compute the directed clustering if the graph is directed. weights : bool or str, optional (default: binary edges) Whether edge weights should be considered; if ``None`` or ``False`` then use binary edges; if ``True``, uses the 'weight' edge attribute, otherwise uses any valid edge attribute required. method : str, optional (default: 'continuous') Method used to compute the weighted triplets, either 'normal', where the edge weights are directly used, or the definitions used for weighted clustering coefficients, 'barrat' [Barrat2004]_, 'continuous', 'onnela' [Onnela2005]_, or 'zhang' [Zhang2005]_. mode : str, optional (default: "total") Type of clustering to use for directed graphs, among "total", "fan-in", "fan-out", "middleman", and "cycle" [Fagiolo2007]_. combine_weights : str, optional (default: 'mean') How to combine the weights of reciprocal edges if the graph is directed but `directed` is set to False. It can be: * "sum": the sum of the edge attribute values will be used for the new edge. * "mean": the mean of the edge attribute values will be used for the new edge. * "min": the minimum of the edge attribute values will be used for the new edge. * "max": the maximum of the edge attribute values will be used for the new edge. Returns ------- tr : array Number or weight of triplets to which each node belongs. References ---------- .. [Barrat2004] Barrat, Barthelemy, Pastor-Satorras, Vespignani. The Architecture of Complex Weighted Networks. PNAS 2004, 101 (11). :doi:`10.1073/pnas.0400087101`. .. [Fagiolo2007] Fagiolo. Clustering in Complex Directed Networks. Phys. Rev. E 2007, 76, (2), 026107. :doi:`10.1103/PhysRevE.76.026107`, :arxiv:`physics/0612169`. .. [Zhang2005] Zhang, Horvath. A General Framework for Weighted Gene Co-Expression Network Analysis. Statistical Applications in Genetics and Molecular Biology 2005, 4 (1). :doi:`10.2202/1544-6115.1128`, `PDF <https://dibernardo.tigem.it/files/papers/2008/ zhangbin-statappsgeneticsmolbio.pdf>`_. ''' directed *= g.is_directed() weighted = weights not in (False, None) # simple binary cases if not weighted or method == "onnela": # undirected if not directed: deg = None if g.is_directed(): _, adjsym = _get_matrices(g, directed, None, False, combine_weights) if nodes is None: deg = _sum(adjsym, axis=0) else: deg = _sum(adjsym, axis=0)[nodes] else: deg = g.get_degrees(nodes=nodes) if nodes is None or nonstring_container(nodes): return (0.5*deg*(deg - 1)).astype(int) return 0.5*deg*(deg - 1) # directed if mode in ("total", "cycle", "middleman"): adj = g.adjacency_matrix() d_recip = (adj@adj).diagonal() if nodes is not None: d_recip = d_recip[nodes] din = g.get_degrees("in", nodes=nodes) dout = g.get_degrees("out", nodes=nodes) if mode == "total": dtot = din + dout return dtot*(dtot - 1) - 2*d_recip return din*dout - d_recip else: assert mode in ("fan-in", "fan-out"), \ "Unknown mode '{}'".format(mode) deg = g.get_degrees(mode[4:], nodes=nodes) return deg*(deg - 1) # check method for weighted W, Wu, A, Au = None, None, None, None if method in ("continuous", "normal", "zhang"): # we need only the weighted matrices W, Wu = _get_matrices(g, directed, weights, weighted, combine_weights=combine_weights, normed=True) elif method == "barrat": # we need only the (potentially) directed matrices W = g.adjacency_matrix(weights=weights) A = g.adjacency_matrix() else: raise ValueError("`method` must be either 'barrat', 'onnela', " "'zhang', or 'continuous'/'normal' (identical " "options).") return _triplet_count_weighted( g, W, Wu, A, Au, method, mode, directed, weights, nodes)
# ---------------------------------------------------------- # # Overwrite binary clusterings with library-specific version # # ---------------------------------------------------------- # if nngt._config["backend"] == "networkx": from .nx_functions import (global_clustering_binary_undirected, local_clustering_binary_undirected) if nngt._config["backend"] == "igraph": from .ig_functions import (global_clustering_binary_undirected, local_clustering_binary_undirected) if nngt._config["backend"] == "graph-tool": from .gt_functions import (global_clustering_binary_undirected, local_clustering_binary_undirected) # -------------- # # Tool functions # # -------------- # def _triangles_and_triplets(g, directed, weights, method, mode, combine_weights, nodes): ''' Return the triangles and triplets ''' # weighted clustering W, Wu, A, Au = None, None, None, None triplets = None # check the method to get the relevant matrices if method == "continuous": W, Wu = _get_matrices(g, directed, weights, True, combine_weights, exponent=2/3, normed=True) Wtr, Wtru = _get_matrices(g, directed, weights, True, combine_weights, normed=True) triplets = _triplet_count_weighted( g, Wtr, Wtru, A, Au, method, mode, directed, weights, nodes) if method == "zhang": W, Wu = _get_matrices(g, directed, weights, True, combine_weights, normed=True) triplets = _triplet_count_weighted( g, W, Wu, A, Au, method, mode, directed, weights, nodes) elif method == "onnela": W, Wu = _get_matrices(g, directed, weights, True, combine_weights, exponent=1/3, normed=True) # onnela uses the binary triplets triplets = triplet_count(g, nodes=nodes, directed=directed, mode=mode, weights=None) elif method == "barrat": # we need all matrices W, Wu = _get_matrices(g, directed, weights, True, combine_weights, normed=True) A, Au = _get_matrices(g, directed, None, False, combine_weights) triplets = _triplet_count_weighted( g, W, Wu, A, Au, method, mode, directed, weights, nodes) # get triangles and triplet strength triangles = _triangle_count(W, Wu, A, Au, method, mode, weighted=True, directed=directed, nodes=nodes) return triangles, triplets def _triangle_count(mat, matsym, adj, adjsym, method, mode, weighted, directed, nodes): ''' (Un)weighted (un)directed triangle count. ''' tr = None if method == "barrat": if mode == "total": tr = 0.5*(matsym@adjsym@adjsym).diagonal() elif mode == "cycle": tr = 0.5*(mat@adj@adj + mat.T@adj.T@adj.T).diagonal() elif mode == "middleman": tr = 0.5*(mat.T@adj@adj.T + mat@adj.T@adj).diagonal() elif mode == "fan-in": tr = 0.5*(mat.T@adjsym@adj).diagonal() elif mode == "fan-out": tr = 0.5*(mat@adjsym@adj.T).diagonal() else: raise ValueError("Unknown mode ''.".format(mode)) else: if not weighted: mat, matsym = adj, adjsym elif method not in ("continuous", "zhang", "normal", "onnela"): raise ValueError("Invalid `method`: '{}'".format(method)) if mode == "total": tr = 0.5*(matsym@matsym@matsym).diagonal() elif mode == "cycle": tr = (mat@mat@mat).diagonal() elif mode == "middleman": tr = (mat@mat.T@mat).diagonal() elif mode == "fan-in": tr = (mat.T@mat@mat).diagonal() elif mode == "fan-out": tr = (mat@mat@mat.T).diagonal() else: raise ValueError("Unknown mode ''.".format(mode)) if isinstance(tr, np.matrix): tr = tr.A1 if nodes is None: return tr return tr[nodes] def _triplet_count_weighted(g, mat, matsym, adj, adjsym, method, mode, directed, weights, nodes): ''' triplet count, weighted only. ''' tr = None if method == "normal": pass elif method == "continuous": if directed: sqmat = mat.sqrt() if mode == "total": s2_sq_tot = np.square( _sum(sqmat, axis=0) + _sum(sqmat, axis=1)) s_tot = _sum(mat, axis=0) + _sum(mat, axis=1) s_recip = 2*(sqmat@sqmat).diagonal() tr = s2_sq_tot - s_tot - s_recip elif mode in ("cycle", "middleman"): s_sq_out = _sum(sqmat, axis=0) s_sq_in = _sum(sqmat, axis=1) s_recip = (sqmat@sqmat).diagonal() tr = s_sq_in*s_sq_out - s_recip elif mode in ("fan-in", "fan-out"): axis = 0 if mode == "fan-in" else 1 s2_sq = np.square(_sum(sqmat, axis=axis)) sgth = _sum(mat, axis=axis) tr = s2_sq - sgth else: raise ValueError("Unknown mode ''.".format(mode)) else: sqmat = matsym.sqrt() s2_sq = np.square(_sum(sqmat, axis=0)) s = _sum(matsym, axis=0) tr = 0.5*(s2_sq - s) elif method == "zhang": if directed: mat2 = mat.power(2) if mode == "total": s2_sq_tot = np.square(_sum(mat, axis=0) + _sum(mat, axis=1)) s_tot = _sum(mat2, axis=0) + _sum(mat2, axis=1) s_recip = 2*(mat@mat).diagonal() tr = s2_sq_tot - s_tot - s_recip elif mode in ("cycle", "middleman"): s_sq_out = _sum(mat, axis=0) s_sq_in = _sum(mat, axis=1) s_recip = (mat@mat).diagonal() tr = s_sq_in*s_sq_out - s_recip elif mode in ("fan-in", "fan-out"): axis = 0 if mode == "fan-in" else 1 s2_sq = np.square(_sum(mat, axis=axis)) sgth = _sum(mat2, axis=axis) tr = s2_sq - sgth else: raise ValueError("Unknown mode ''.".format(mode)) else: mat2 = matsym.power(2) s2_sq = np.square(_sum(matsym, axis=0)) s = _sum(mat2, axis=0) tr = 0.5*(s2_sq - s) elif method == "barrat": if directed: # specifc definition of the reciprocal strength from Clemente if mode == "total": s_recip = 0.5*(mat@adj + adj@mat).diagonal() dtot = g.get_degrees("total") wmax = np.max(g.get_weights()) stot = g.get_degrees("total", weights=weights) / wmax tr = stot*(dtot - 1) - 2*s_recip elif mode in ("cycle", "middleman"): s_recip = 0.5*(mat@adj + adj@mat).diagonal() s_in = _sum(mat, axis=0) s_out = _sum(mat, axis=1) d_in = g.get_degrees("in") d_out = g.get_degrees("out") tr = 0.5*(s_in*d_out + s_out*d_in) - s_recip elif mode in ("fan-in", "fan-out"): axis = 0 if mode == "fan-in" else 1 sgth = _sum(mat, axis=axis) deg = g.get_degrees(mode[4:]) tr = sgth*(deg - 1) else: raise ValueError("Unknown mode ''.".format(mode)) elif g.is_directed(): d = _sum(adjsym, axis=0) s = _sum(matsym, axis=0) tr = 0.5*s*(d - 1) else: d = g.get_degrees() s = _sum(matsym, axis=0) tr = 0.5*s*(d - 1) else: raise ValueError( "Invalid `method` for triplet count: '{}'".format(method)) if nodes is None: return tr return tr[nodes] def _sum(mat, axis): ''' Sum either sparse matrix or array ''' res = mat.sum(axis=axis) if "matrix" in str(type(mat)): return res.A1 return res