# Source code for graph_tool.correlations

#! /usr/bin/env python
# -*- coding: utf-8 -*-
#
# graph_tool -- a general graph manipulation python module
#
# Copyright (C) 2006-2023 Tiago de Paula Peixoto <tiago@skewed.de>
#
# This program is free software; you can redistribute it and/or modify it under
# Software Foundation; either version 3 of the License, or (at your option) any
# later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
# details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

"""
graph_tool.correlations
---------------------------

This module contains functions that compute various correlations among nodes of
a network.

Summary
+++++++

.. autosummary::
:nosignatures:
:toctree: autosummary

assortativity
scalar_assortativity
corr_hist
combined_corr_hist
avg_neighbor_corr
avg_combined_corr

"""

from .. dl_import import dl_import
dl_import("from . import libgraph_tool_correlations")

from .. import _degree, _prop, _check_prop_scalar, VertexPropertyMap
from numpy import *

__all__ = ["assortativity", "scalar_assortativity", "corr_hist",
"combined_corr_hist", "avg_neighbor_corr", "avg_neighbour_corr",
"avg_combined_corr"]

[docs]def assortativity(g, deg, eweight=None):
r"""Obtain the assortativity coefficient for the given graph.

Parameters
----------
g : :class:~graph_tool.Graph
Graph to be used.
deg : string or :class:~graph_tool.VertexPropertyMap
Degree type ("in", "out" or "total") or vertex property map, which
specifies the vertex types.
eweight : :class:~graph_tool.EdgePropertyMap (optional, default: None)
If given, this will specify the edge weights, otherwise a constant value
of one will be used.

Returns
-------
assortativity coefficient : tuple of two floats
The assortativity coefficient, and its variance.

--------
assortativity: assortativity coefficient
scalar_assortativity: scalar assortativity coefficient
corr_hist: vertex-vertex correlation histogram
combined_corr_hist: combined single-vertex correlation histogram
avg_neighbor_corr: average nearest-neighbor correlation
avg_combined_corr: average combined single-vertex correlation

Notes
-----
The assortativity coefficient [newman-mixing-2003]_ tells in a concise
fashion how vertices of different types are preferentially connected amongst
themselves, and is defined by

.. math::
r = \frac{\sum_i e_{ii} - \sum_i a_i b_i}{1-\sum_i a_i b_i}

where :math:a_i=\sum_je_{ij} and :math:b_j=\sum_ie_{ij}, and
:math:e_{ij} is the fraction of edges from a vertex of type
i to a vertex of type j.

The variance is obtained with the jackknife method_.

If enabled during compilation, this algorithm runs in parallel.

Examples
--------

.. doctest:: assortativity

>>> g = gt.collection.data["pgp-strong-2009"]
>>> g = gt.GraphView(g, directed=False)
>>> gt.assortativity(g, "out")
(0.0240578552..., 0.0003272847...)

References
----------
.. [newman-mixing-2003] M. E. J. Newman, "Mixing patterns in networks",
Phys. Rev. E 67, 026126 (2003), :doi:10.1103/PhysRevE.67.026126
.. _jackknife method: http://en.wikipedia.org/wiki/Resampling_%28statistics%29#Jackknife

"""
return libgraph_tool_correlations.\
assortativity_coefficient(g._Graph__graph, _degree(g, deg),
_prop("e", g, eweight))

[docs]def scalar_assortativity(g, deg, eweight=None):
r"""
Obtain the scalar assortativity coefficient for the given graph.

Parameters
----------
g : :class:~graph_tool.Graph
Graph to be used.
deg : string or :class:~graph_tool.VertexPropertyMap
Degree type ("in", "out" or "total") or vertex property map, which
specifies the vertex scalar values.
eweight : :class:~graph_tool.EdgePropertyMap (optional, default: None)
If given, this will specify the edge weights, otherwise a constant value
of one will be used.

Returns
-------
scalar assortativity coefficient : tuple of two floats
The scalar assortativity coefficient, and its variance.

--------
assortativity: assortativity coefficient
scalar_assortativity: scalar assortativity coefficient
corr_hist: vertex-vertex correlation histogram
combined_corr_hist: combined single-vertex correlation histogram
avg_neighbor_corr: average nearest-neighbor correlation
avg_combined_corr: average combined single-vertex correlation

Notes
-----
The scalar assortativity coefficient [newman-mixing-2003]_ tells in a
concise fashion how vertices of different types are preferentially connected
amongst themselves, and is defined by

.. math::
r = \frac{\sum_{xy} xy(e_{xy} - a_x b_y)}{\sigma_a\sigma_b}

where :math:a_x=\sum_ye_{xy} and :math:b_y=\sum_xe_{xy}, and
:math:e_{xy} is the fraction of edges from a vertex of type
x to a vertex of type y.

The variance is obtained with the jackknife method_.

If enabled during compilation, this algorithm runs in parallel.

Examples
--------

.. doctest:: scalar_assortativity

>>> g = gt.collection.data["pgp-strong-2009"]
>>> g = gt.GraphView(g, directed=False)
>>> gt.scalar_assortativity(g, "out")
(0.026384932418..., 0.001213444270...)

References
----------
.. [newman-mixing-2003] M. E. J. Newman, "Mixing patterns in networks",
Phys. Rev. E 67, 026126 (2003), :doi:10.1103/PhysRevE.67.026126
.. _jackknife method: http://en.wikipedia.org/wiki/Resampling_%28statistics%29#Jackknife
"""
if isinstance(deg, VertexPropertyMap):
_check_prop_scalar(deg, name="deg")
return libgraph_tool_correlations.\
scalar_assortativity_coefficient(g._Graph__graph, _degree(g, deg),
_prop("e", g, eweight))

[docs]def corr_hist(g, deg_source, deg_target, bins=[[0, 1], [0, 1]], weight=None,
float_count=True):
r"""
Obtain the correlation histogram for the given graph.

Parameters
----------
g : :class:~graph_tool.Graph
Graph to be used.
deg_source : string or :class:~graph_tool.VertexPropertyMap
degree type ("in", "out" or "total") or vertex property map for the
source vertex.
deg_target : string or :class:~graph_tool.VertexPropertyMap
degree type ("in", "out" or "total") or vertex property map for the
target vertex.
bins : list of lists (optional, default: [[0, 1], [0, 1]])
A list of bin edges to be used for the source and target degrees. If any
list has size 2, it is used to create an automatically generated bin
range starting from the first value, and with constant bin width given
by the second value.
weight : edge property map (optional, default: None)
Weight (multiplicative factor) to be used on each edge.
float_count : bool (optional, default: True)
If True, the bin counts are converted float variables, which is useful
for normalization, and other processing. It False, the bin counts will
be unsigned integers.

Returns
-------
bin_counts : :class:numpy.ndarray
Two-dimensional array with the bin counts.
source_bins : :class:numpy.ndarray
Source degree bins
target_bins : :class:numpy.ndarray
Target degree bins

--------
assortativity: assortativity coefficient
scalar_assortativity: scalar assortativity coefficient
corr_hist: vertex-vertex correlation histogram
combined_corr_hist: combined single-vertex correlation histogram
avg_neighbor_corr: average nearest-neighbor correlation
avg_combined_corr: average combined single-vertex correlation

Notes
-----
The correlation histogram counts, for every vertex with degree (or scalar
property) 'source_deg', the number of out-neighbors with degree (or scalar
property) 'target_deg'.

If enabled during compilation, this algorithm runs in parallel.

Examples
--------

.. testsetup:: corr_hist

from pylab import *
np.random.seed(42)
gt.seed_rng(42)

.. doctest:: corr_hist

>>> def sample_k(max):
...     accept = False
...     while not accept:
...         k = np.random.randint(1,max+1)
...         accept = np.random.random() < 1.0/k
...     return k
...
>>> g = gt.random_graph(10000, lambda: sample_k(40),
...                     model="probabilistic-configuration",
...                     edge_probs=lambda i, j: (sin(i / pi) * sin(j / pi) + 1) / 2,
...                     directed=False, n_iter=100)
>>> h = gt.corr_hist(g, "out", "out")
>>> clf()
>>> xlabel("Source out-degree")
Text(...)
>>> ylabel("Target out-degree")
Text(...)
>>> imshow(h.T, interpolation="nearest", origin="lower")
<...>
>>> colorbar()
<...>
>>> savefig("corr.svg")

.. figure:: corr.*
:align: center

Out/out-degree correlation histogram.
"""

ret = libgraph_tool_correlations.\
vertex_correlation_histogram(g._Graph__graph, _degree(g, deg_source),
_degree(g, deg_target),
_prop("e", g, weight),
[float(x) for x in bins],
[float(x) for x in bins])
return [array(ret, dtype="float64") if float_count else ret,
[ret, ret]]

[docs]def combined_corr_hist(g, deg1, deg2, bins=[[0, 1], [0, 1]], float_count=True):
r"""
Obtain the single-vertex combined correlation histogram for the given graph.

Parameters
----------
g : :class:~graph_tool.Graph
Graph to be used.
deg1 : string or :class:~graph_tool.VertexPropertyMap
first degree type ("in", "out" or "total") or vertex property map.
deg2 : string or :class:~graph_tool.VertexPropertyMap
second degree type ("in", "out" or "total") or vertex property map.
bins : list of lists (optional, default: [[0, 1], [0, 1]])
A list of bin edges to be used for the first and second degrees. If any
list has size 2, it is used to create an automatically generated bin
range starting from the first value, and with constant bin width given
by the second value.
float_count : bool (optional, default: True)
If True, the bin counts are converted float variables, which is useful
for normalization, and other processing. It False, the bin counts will
be unsigned integers.

Returns
-------
bin_counts : :class:numpy.ndarray
Two-dimensional array with the bin counts.
first_bins : :class:numpy.ndarray
First degree bins
second_bins : :class:numpy.ndarray
Second degree bins

Notes
-----
If enabled during compilation, this algorithm runs in parallel.

--------
assortativity: assortativity coefficient
scalar_assortativity: scalar assortativity coefficient
corr_hist: vertex-vertex correlation histogram
combined_corr_hist: combined single-vertex correlation histogram
avg_neighbor_corr: average nearest-neighbor correlation
avg_combined_corr: average combined single-vertex correlation

Examples
--------

.. testsetup:: combined_corr_hist

from pylab import *
np.random.seed(42)
gt.seed_rng(42)

.. doctest:: combined_corr_hist

>>> def sample_k(max):
...     accept = False
...     while not accept:
...         i = np.random.randint(1, max + 1)
...         j = np.random.randint(1, max + 1)
...         accept = np.random.random() < (sin(i / pi) * sin(j / pi) + 1) / 2
...     return i,j
...
>>> g = gt.random_graph(10000, lambda: sample_k(40))
>>> h = gt.combined_corr_hist(g, "in", "out")
>>> clf()
>>> xlabel("In-degree")
Text(...)
>>> ylabel("Out-degree")
Text(...)
>>> imshow(h.T, interpolation="nearest", origin="lower")
<...>
>>> colorbar()
<...>
>>> savefig("combined_corr.svg")

.. figure:: combined_corr.*
:align: center

Combined in/out-degree correlation histogram.

"""
ret = libgraph_tool_correlations.\
vertex_combined_correlation_histogram(g._Graph__graph,
_degree(g, deg1),
_degree(g, deg2),
[float(x) for x in bins],
[float(x) for x in bins])
return [array(ret, dtype="float64") if float_count else ret,
[ret, ret]]

[docs]def avg_neighbor_corr(g, deg_source, deg_target, bins=[0, 1], weight=None):
r"""
Obtain the average neighbor-neighbor correlation for the given graph.

Parameters
----------
g : :class:~graph_tool.Graph
Graph to be used.
deg_source : string or :class:~graph_tool.VertexPropertyMap
degree type ("in", "out" or "total") or vertex property map for the
source vertex.
deg_target : string or :class:~graph_tool.VertexPropertyMap
degree type ("in", "out" or "total") or vertex property map for the
target vertex.
bins : list (optional, default: [0, 1])
Bins to be used for the source degrees. If the list has size 2, it is
used as the constant width of an automatically generated bin range,
starting from the first value.
weight : edge property map (optional, default: None)
Weight (multiplicative factor) to be used on each edge.

Returns
-------
bin_avg : :class:numpy.ndarray
Array with the deg_target average for the get_source bins.
bin_dev : :class:numpy.ndarray
Array with the standard deviation of the deg_target average for the
get_source bins.
bins : :class:numpy.ndarray
Source degree bins,

--------
assortativity: assortativity coefficient
scalar_assortativity: scalar assortativity coefficient
corr_hist: vertex-vertex correlation histogram
combined_corr_hist: combined single-vertex correlation histogram
avg_neighbor_corr: average nearest-neighbor correlation
avg_combined_corr: average combined single-vertex correlation

Notes
-----
The average correlation is the average, for every vertex with degree (or
scalar property) 'source_deg', the of the 'target_deg' degree (or
scalar property) of its neighbors.

If enabled during compilation, this algorithm runs in parallel.

Examples
--------

.. testsetup:: avg_neighbor_corr

from pylab import *
np.random.seed(42)
gt.seed_rng(42)

.. doctest:: avg_neighbor_corr

>>> def sample_k(max):
...     accept = False
...     while not accept:
...         k = np.random.randint(1,max+1)
...         accept = np.random.random() < 1.0 / k
...     return k
...
>>> g = gt.random_graph(10000, lambda: sample_k(40),
...                     model="probabilistic-configuration",
...                     edge_probs=lambda i, j: (sin(i / pi) * sin(j / pi) + 1) / 2,
...                     directed=False, n_iter=100)
>>> h = gt.avg_neighbor_corr(g, "out", "out")
>>> clf()
>>> xlabel("Source out-degree")
Text(...)
>>> ylabel("Target out-degree")
Text(...)
>>> errorbar(h[:-1], h, yerr=h, fmt="o")
<...>
>>> savefig("avg_corr.svg")

.. figure:: avg_corr.*
:align: center

Average out/out degree correlation.
"""

ret = libgraph_tool_correlations.\
vertex_avg_correlation(g._Graph__graph, _degree(g, deg_source),
_degree(g, deg_target), _prop("e", g, weight),
[float(x) for x in bins])
return [ret, ret, ret]

avg_neighbour_corr = avg_neighbor_corr

[docs]def avg_combined_corr(g, deg1, deg2, bins=[0, 1]):
r"""
Obtain the single-vertex combined correlation histogram for the given graph.

Parameters
----------
g : :class:~graph_tool.Graph
Graph to be used.
deg1 : string or :class:~graph_tool.VertexPropertyMap
first degree type ("in", "out" or "total") or vertex property map.
deg2 : string or :class:~graph_tool.VertexPropertyMap
second degree type ("in", "out" or "total") or vertex property map.
bins : list (optional, default: [0, 1])
Bins to be used for the first degrees. If the list has size 2, it is
used as the constant width of an automatically generated bin range,
starting from the first value.

Returns
-------
bin_avg : :class:numpy.ndarray
Array with the deg2 average for the deg1 bins.
bin_dev : :class:numpy.ndarray
Array with the standard deviation of the deg2 average for the deg1 bins.
bins : :class:numpy.ndarray
The deg1 bins.

Notes
-----
If enabled during compilation, this algorithm runs in parallel.

--------
assortativity: assortativity coefficient
scalar_assortativity: scalar assortativity coefficient
corr_hist: vertex-vertex correlation histogram
combined_corr_hist: combined single-vertex correlation histogram
avg_neighbor_corr: average nearest-neighbor correlation
avg_combined_corr: average combined single-vertex correlation

Examples
--------

.. testsetup:: avg_combined_corr

from pylab import *
np.random.seed(42)
gt.seed_rng(42)

.. doctest:: avg_combined_corr

>>> def sample_k(max):
...     accept = False
...     while not accept:
...         i = np.random.randint(1,max+1)
...         j = np.random.randint(1,max+1)
...         accept = np.random.random() < (sin(i/pi)*sin(j/pi)+1)/2
...     return i,j
...
>>> g = gt.random_graph(10000, lambda: sample_k(40))
>>> h = gt.avg_combined_corr(g, "in", "out")
>>> clf()
>>> xlabel("In-degree")
Text(...)
>>> ylabel("Out-degree")
Text(...)
>>> errorbar(h[:-1], h, yerr=h, fmt="o")
<...>
>>> savefig("combined_avg_corr.svg")

.. figure:: combined_avg_corr.*
:align: center

Average combined in/out-degree correlation.
"""

ret = libgraph_tool_correlations.\
vertex_avg_combined_correlation(g._Graph__graph, _degree(g, deg1),
_degree(g, deg2),
[float(x) for x in bins])
return [ret, ret, ret]