Source code for graph_tool.inference.planted_partition

#! /usr/bin/env python
# -*- coding: utf-8 -*-
#
# graph_tool -- a general graph manipulation python module
#
# Copyright (C) 2006-2025 Tiago de Paula Peixoto <tiago@skewed.de>
#
# This program is free software; you can redistribute it and/or modify it under
# the terms of the GNU Lesser General Public License as published by the Free
# Software Foundation; either version 3 of the License, or (at your option) any
# later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
# details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

from .. import Graph, GraphView, _get_rng, Vector_size_t, PropertyMap, \
    group_vector_property
from . blockmodel import DictState, init_q_cache
from . base_states import *
from . util import *

from .. dl_import import dl_import
dl_import("from . import libgraph_tool_inference as libinference")

import numpy as np
import math


[docs]
@entropy_state_signature
class PPBlockState(MCMCState, MultiflipMCMCState, MultilevelMCMCState,
                   GibbsMCMCState, DrawBlockState):
    r"""Obtain the partition of a network according to the Bayesian planted partition
    model.

    Parameters
    ----------
    g : :class:`~graph_tool.Graph`
        Graph to be modelled.
    b : :class:`~graph_tool.PropertyMap` (optional, default: ``None``)
        Initial partition. If not supplied, a partition into a single group will
        be used.
    entropy_args: ``dict`` (optional, default: ``{}``)
        Override default arguments for :meth:`~PPBlockState.entropy()`
        method and releated operations.

    References
    ----------
    .. [lizhi-statistical-2020] Lizhi Zhang, Tiago P. Peixoto, "Statistical
       inference of assortative community structures", Phys. Rev. Research 2
       043271 (2020), :doi:`10.1103/PhysRevResearch.2.043271`, :arxiv:`2006.14493`
    """

    def __init__(self, g, b=None, entropy_args={}):
        EntropyState.__init__(self, entropy_args=entropy_args)
        self.g = GraphView(g, directed=False)
        if b is None:
            self.b = self.g.new_vp("int64_t")
        elif isinstance(g, PropertyMap):
            self.b = b.copy("int64_t")
        else:
            self.b = self.g.new_vp("int64_t", vals=b)

        self.wr = Vector_size_t()
        self.er = Vector_size_t()
        self.err = Vector_size_t()
        self.eio = Vector_size_t()

        init_q_cache(max((2 * max((self.g.num_edges(),
                                   self.g.num_vertices())), 100)))

        self.bg = Graph(g.num_vertices(), directed=False)
        self._abg = self.bg._get_any()
        self._state = libinference.make_pp_state(self)

    def __copy__(self):
        return self.copy()

    def __deepcopy__(self, memo):
        g = copy.deepcopy(self.g, memo)
        b = copy.deepcopy(self.b, memo)
        return self.copy(g=g, b=b)


[docs]
    def copy(self, g=None, b=None):
        r"""Copies the state. The parameters override the state properties, and
         have the same meaning as in the constructor."""
        return PPBlockState(g=g if g is not None else self.g,
                            b=b if b is not None else self.b)


    def __getstate__(self):
        state = EntropyState.__getstate__(self)
        return dict(state, g=self.g, b=self.b)

    def __setstate__(self, state):
        self.__init__(**state)

    def __repr__(self):
        return "<PPBlockState object with %d blocks, for graph %s, at 0x%x>" % \
            (self.get_B(), str(self.g), id(self))


[docs]
    def get_blocks(self):
        r"""Returns the property map which contains the block labels for each vertex."""
        return self.b



[docs]
    def get_state(self):
        """Alias to :meth:`~PPBlockState.get_blocks`."""
        return self.get_blocks()



[docs]
    def get_B(self):
        r"Returns the total number of blocks."
        return len(np.unique(self.b.fa))



[docs]
    def get_nonempty_B(self):
        r"Alias to :meth:`~PPBlockState.get_B`."
        return self.get_B()



[docs]
    def get_Be(self):
        r"""Returns the effective number of blocks, defined as :math:`e^{H}`, with
        :math:`H=-\sum_r\frac{n_r}{N}\ln \frac{n_r}{N}`, where :math:`n_r` is
        the number of nodes in group r.
        """
        w = np.array(np.bincount(self.b.fa), dtype="double")
        w = w[w>0]
        w /= w.sum()
        return np.exp(-(w*np.log(w)).sum())



[docs]
    def virtual_vertex_move(self, v, s, **kwargs):
        r"""Computes the entropy difference if vertex ``v`` is moved to block ``s``. The
        remaining parameters are the same as in
        :meth:`~graph_tool.inference.PPBlockState.entropy`."""
        return self._state.virtual_move(int(v), self.b[v], s,
                                        self._get_entropy_args(dict(self._entropy_args,
                                                                    **kwargs)))



[docs]
    def move_vertex(self, v, s):
        r"""Move vertex ``v`` to block ``s``."""
        self._state.move_vertex(int(v), int(s))


    @copy_state_wrap
    def _entropy(self, uniform=False, degree_dl_kind="distributed"):
        r"""Return the model entropy (negative log-likelihood).

        Parameters
        ----------
        uniform : ``bool`` (optional, default: ``False``)
            If ``True``, the uniform planted partition model is used, otherwise
            a non-uniform version is used.
        degree_dl_kind : ``str`` (optional, default: ``"distributed"``)
            This specifies the prior used for the degree sequence. It must be
            one of: ``"uniform"`` or ``"distributed"`` (default).

        Notes
        -----

        The "entropy" of the state is the negative log-likelihood of the
        microcanonical SBM, that includes the generated graph
        :math:`\boldsymbol{A}` and the model parameters :math:`e_{\text{in}}`,
        :math:`e_{\text{out}}`, :math:`\boldsymbol{k}` and
        :math:`\boldsymbol{b}`,

        .. math::

           \Sigma &= - \ln P(\boldsymbol{A},e_{\text{in}},e_{\text{out}},\boldsymbol{k},\boldsymbol{b}) \\
                  &= - \ln P(\boldsymbol{A}|e_{\text{in}},e_{\text{out}},\boldsymbol{k},\boldsymbol{b}) - \ln P(e_{\text{in}},e_{\text{out}},\boldsymbol{k},\boldsymbol{b}).

        This value is also called the `description length
        <https://en.wikipedia.org/wiki/Minimum_description_length>`_ of the data,
        and it corresponds to the amount of information required to describe it
        (in `nats <https://en.wikipedia.org/wiki/Nat_(unit)>`_).

        For the uniform version of the model, the likelihood is

        .. math::

            P(\boldsymbol{A}|\boldsymbol{k},\boldsymbol{b}) = \frac{e_{\text{in}}!e_{\text{out}}!}
            {\left(\frac{B}{2}\right)^{e_{\text{in}}}{B\choose 2}^{e_{\text{out}}}(E+1)^{1-\delta_{B,1}}\prod_re_r!}\times
            \frac{\prod_ik_i!}{\prod_{i<j}A_{ij}!\prod_i A_{ii}!!}.

        where :math:`e_{\text{in}}` and :math:`e_{\text{out}}` are the number of
        edges inside and outside communities, respectively, and :math:`e_r` is
        the sum of degrees in group :math:`r`.

        For the non-uniform model we have instead:

        .. math::

            P(\boldsymbol{A}|\boldsymbol{k},\boldsymbol{b}) = \frac{e_{\text{out}}!\prod_re_{rr}!!}
            {{B\choose 2}^{e_{\text{out}}}(E+1)^{1-\delta_{B,1}}\prod_re_r!}\times{B + e_{\text{in}} - 1 \choose e_{\text{in}}}^{-1}\times
            \frac{\prod_ik_i!}{\prod_{i<j}A_{ij}!\prod_i A_{ii}!!}.


        Here there are two options for the prior on the degrees:

        1. ``degree_dl_kind == "uniform"``

            .. math::

                P(\boldsymbol{k}|\boldsymbol{e},\boldsymbol{b}) = \prod_r\left(\!\!{n_r\choose e_r}\!\!\right)^{-1}.

            This corresponds to a noninformative prior, where the degrees are
            sampled from a uniform distribution.

        2. ``degree_dl_kind == "distributed"`` (default)

            .. math::

                P(\boldsymbol{k}|\boldsymbol{e},\boldsymbol{b}) = \prod_r\frac{\prod_k\eta_k^r!}{n_r!} \prod_r q(e_r, n_r)^{-1}

            with :math:`\eta_k^r` being the number of nodes with degree
            :math:`k` in group :math:`r`, and :math:`q(n,m)` being the number of
            `partitions
            <https://en.wikipedia.org/wiki/Partition_(number_theory)>`_ of
            integer :math:`n` into at most :math:`m` parts.

            This corresponds to a prior for the degree sequence conditioned on
            the degree frequencies, which are themselves sampled from a uniform
            hyperprior. This option should be preferred in most cases.


        For the partition prior :math:`P(\boldsymbol{b})` please refer to
        :meth:`~graph_tool.inference.BlockState.entropy`.

        References
        ----------
        .. [lizhi-statistical-2020] Lizhi Zhang, Tiago P. Peixoto, "Statistical
           inference of assortative community structures", Phys. Rev. Research 2
           043271 (2020), :doi:`10.1103/PhysRevResearch.2.043271`, :arxiv:`2006.14493`
        """

        eargs = self._get_entropy_args(locals())

        return self._state.entropy(eargs)

    def _gen_eargs(self, args):
        return libinference.pp_entropy_args()

    def _get_entropy_args(self, kwargs, consume=False):
        if not consume:
            kwargs = kwargs.copy()
        deg_dl_kind = kwargs.get("degree_dl_kind",
                                 self._entropy_args["degree_dl_kind"])
        if deg_dl_kind == "entropy":
            kind = libinference.deg_dl_kind.ent
        elif deg_dl_kind == "uniform":
            kind = libinference.deg_dl_kind.uniform
        elif deg_dl_kind == "distributed":
            kind = libinference.deg_dl_kind.dist
        kwargs["degree_dl_kind"] = kind
        return super()._get_entropy_args(kwargs, consume)

    def _mcmc_sweep_dispatch(self, mcmc_state):
        return libinference.pp_mcmc_sweep(mcmc_state, self._state,
                                          _get_rng())

    def _multiflip_mcmc_sweep_dispatch(self, mcmc_state):
        return libinference.pp_multiflip_mcmc_sweep(mcmc_state, self._state,
                                                    _get_rng())

    def _multilevel_mcmc_sweep_dispatch(self, mcmc_state):
        return libinference.pp_multilevel_mcmc_sweep(mcmc_state, self._state,
                                                     _get_rng())

    def _gibbs_sweep_dispatch(self, gibbs_state):
        return libinference.pp_gibbs_sweep(gibbs_state, self._state,
                                           _get_rng())