Source code for cardinal_pythonlib.probability

#!/usr/bin/env python
# cardinal_pythonlib/probability.py

"""
===============================================================================

    Original code copyright (C) 2009-2022 Rudolf Cardinal (rudolf@pobox.com).

    This file is part of cardinal_pythonlib.

    Licensed under the Apache License, Version 2.0 (the "License");
    you may not use this file except in compliance with the License.
    You may obtain a copy of the License at

        https://www.apache.org/licenses/LICENSE-2.0

    Unless required by applicable law or agreed to in writing, software
    distributed under the License is distributed on an "AS IS" BASIS,
    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    See the License for the specific language governing permissions and
    limitations under the License.

===============================================================================

**Miscellaneous probability functions.**

"""

import math
from math import exp as math_exp, log as math_ln, log10 as math_log10

# For speed:
INFINITY = math.inf
MINUS_INFINITY = -math.inf


# =============================================================================
# Basic maths
# =============================================================================


[docs]def ln(x: float) -> float:
    """
    Version of :func:`math.log` that treats log(0) as ``-inf``, rather than
    crashing with ``ValueError: math domain error``.

    Args:
        x: parameter

    Returns:
        float: ln(x), the natural logarithm of x

    See
    https://stackoverflow.com/questions/42980201/logarithm-of-zero-in-python.

    For speed, use ``from math import log as math_log``, etc.:

    .. code-block: python

        import math
        from math import log as math_log
        import timeit
        INFINITY = math.inf
        MINUS_INFINITY = -math.inf

        timeit.timeit('(math.log(x) for x in range(1, 100))', number=10000)
        # 0.00756470113992691
        timeit.timeit('(math_log(x) for x in range(1, 100))', number=10000)
        # 0.004151405766606331

        timeit.timeit('(-math.inf for _ in range(1, 100))', number=10000)
        # 0.009433002211153507
        timeit.timeit('(MINUS_INFINITY for _ in range(1, 100))', number=10000)
        # 0.004293474368751049

    Note also that although floats can't be "too close to zero" to cause an
    error, other things (like Decimal objects) can. See
    https://stackoverflow.com/questions/19095774/python-math-domain-errors-in-math-log-function

    Exception catching is likely faster:

    .. code-block: python

        from cardinal_pythonlib.probability import ln
        import timeit

        # return math_ln(x) if x != 0 else MINUS_INFINITY
        timeit.timeit('(ln(x) for x in range(1, 100))', number=10000)
        # 0.012411762960255146

        # try... except ValueError...
        timeit.timeit('(ln(x) for x in range(1, 100))', number=10000)
        # 0.007783170789480209

    """  # noqa
    # return math_ln(x) if x != 0 else MINUS_INFINITY  # slower, less helpful
    try:
        return math_ln(x)
    except ValueError:
        if x < 0:
            raise ValueError(f"Can't take log of a negative number: {x}")
        # Either x > 0 but causing problems anyway, or x == 0.
        return MINUS_INFINITY


[docs]def log10(x: float) -> float:
    """
    Version of :func:`math.log10` that treats log(0) as ``-inf``, rather than
    crashing with ``ValueError: math domain error``.

    Args:
        x: parameter

    Returns:
        float: log10(x), the logarithm to base 10 of x

    See
    https://stackoverflow.com/questions/42980201/logarithm-of-zero-in-python.
    """
    # return math_log10(x) if x != 0 else MINUS_INFINITY
    try:
        return math_log10(x)
    except ValueError:
        if x < 0:
            raise ValueError(f"Can't take log of a negative number: {x}")
        # Either x > 0 but causing problems anyway, or x == 0.
        return MINUS_INFINITY


# =============================================================================
# Basic probability
# =============================================================================


[docs]def odds_from_probability(p: float) -> float:
    r"""
    Returns odds, given a probability.

    .. math::

        o = \frac{p}{1 - p}

    Args:
        p: probability

    Returns:
        float: odds
    """
    return p / (1 - p)


[docs]def probability_from_odds(odds: float) -> float:
    r"""
    Returns a probability, given odds.

    .. math::

        o &= \frac{p}{1 - p}  \\
        o (1 - p) &= p  \\
        o - op &= p  \\
        o &= p + op  \\
        o &= p(1 + o)  \\
        p &= \frac{o}{1 + o}

    Args:
        odds: odds

    Returns:
        float: probability
    """
    return odds / (1 + odds)


[docs]def probability_from_log_prob(log_p: float) -> float:
    """
    Returns a probability from a log probability.

    Args:
        log_p: ln(p); natural log of p

    Returns:
        float: p
    """
    return math_exp(log_p)


[docs]def probability_from_log_odds(log_odds: float) -> float:
    """
    Returns a probability from a log odds.

    Args:
        log_odds: ln(o); natural log of o

    Returns:
        float: p
    """
    return probability_from_odds(math_exp(log_odds))


[docs]def log_probability_from_log_odds(log_odds: float) -> float:
    r"""
    No obvious quick form for this:

    .. math::

        o &= \frac{p}{1 - p}  \\

        \log o &= \log p - \log (1 - p)  \\

    Args:
        log_odds: ln(o); natural log of o

    Returns:
        float: ln(p)

    """
    return ln(probability_from_log_odds(log_odds))


[docs]def log_odds_from_probability(p: float) -> float:
    """
    Returns log odds from a probability.

    Args:
        p: probability

    Returns:
        float: ln(odds)

    See

    - https://wiki.lesswrong.com/wiki/Odds_ratio
    - https://wiki.lesswrong.com/wiki/Log_odds
    """
    return ln(odds_from_probability(p))


[docs]def log_odds_from_1_in_n(n: float) -> float:
    r"""
    If the chance of something occurring are 1 in n, then its probability is
    :math:`1/n`, and its odds are :math:`\frac{1}{n - 1}`. This function
    returns the log of those odds.

    Args:
        n: ``n``, as above

    Returns:
        float: :math:`\ln \frac{1}{n - 1} = \ln 1 - \ln (n - 1) = -\ln(n - 1)`
    """
    return -ln(n - 1)


# =============================================================================
# Bayesian update rules
# =============================================================================


[docs]def bayes_posterior(
    prior: float, likelihood: float, marginal_likelihood: float
) -> float:
    r"""
    Returns P(H | D), the posterior probability of hypothesis H, given data D.

    Args:
        prior:
            P(H), the prior belief in H
        likelihood:
            P(D | H), the probability of observing D given H
        marginal_likelihood:
            P(D), the probability of observing D; also called evidence or model
            evidence.

    Returns:
        float: P(H | D), the probability of H given D.

    Bayes' rule:

    .. math::

        P(A | B) P(B) = P(B | A) P(A) = P(A \land B)

        P(H | D) = \frac{ P(D | H) \cdot P(H) }{ P(D) }

        \text{posterior} =
            \frac
            { \text{likelihood} \cdot \text{prior} }
            { \text{marginal likelihood} }

    """
    return likelihood * prior / marginal_likelihood


[docs]def log_bayes_posterior(
    log_prior: float, log_likelihood: float, log_marginal_likelihood: float
) -> float:
    r"""
    Exactly equivalent to :func:`bayes_posterior`, but using log probability.

    Args:
        log_prior: :math:`\log P(H)`
        log_likelihood: :math:`\log P(D | H)`
        log_marginal_likelihood: :math:`\log P(D)`

    Returns:
        float: :math:`\log P(H | D)`
    """
    return log_likelihood + log_prior - log_marginal_likelihood


[docs]def posterior_odds(prior_odds: float, likelihood_ratio: float) -> float:
    r"""
    Returns the posterior odds for a hypothesis, given the prior odds and the
    likelihood ratio.

    .. math::

        P(A | B) &= P(B | A) \frac{ P(A) }{ P(B) }  \\

        P(\neg A | B) &= P(B | \neg A) \frac{ P(\neg A) }{ P(B) }  \\

        \frac{ P(A | B) }{ P(\neg A | B) } &=
            \frac{ P(B | A) }{ P(B | \neg A) }
            \frac{ P(A) }{ P(\neg A) }  \\

        \text{posterior odds} &= \text{likelihood ratio} \cdot
                                 \text{prior odds}

    Args:
        prior_odds:
            prior odds, :math:`\frac{ P(A) }{ P(\neg A) }`
        likelihood_ratio:
            likelihood ratio, :math:`\frac{ P(B | A) }{ P(B | \neg A) }`

    Returns:
        float: posterior odds, :math:`\frac{ P(A | B) }{ P(\neg A | B) }`

    e.g. https://wiki.lesswrong.com/wiki/Odds_ratio

    """
    return likelihood_ratio * prior_odds


[docs]def log_posterior_odds(
    log_prior_odds: float, log_likelihood_ratio: float
) -> float:
    r"""
    Exactly as for :func:`posterior_odds`, but with log odds.
    """
    return log_likelihood_ratio + log_prior_odds


[docs]def log_likelihood_ratio_from_p(
    p_d_given_h: float, p_d_given_not_h: float
) -> float:
    r"""
    Returns

    .. math::

        \ln \frac{ P(D | H) }{ P(D | \neg H) } =
            \ln P(D | H) - \ln P(D | \neg H)

    Args:
        p_d_given_h: :math:`P(D | H)`
        p_d_given_not_h: :math:`P(D | \neg H)`

    Returns:
        float: log likelihood ratio, as above

    """  # noqa
    return ln(p_d_given_h) - ln(p_d_given_not_h)


[docs]def log_posterior_odds_from_pdh_pdnh(
    log_prior_odds: float, p_d_given_h: float, p_d_given_not_h: float
) -> float:
    r"""
    Calculates posterior odds.

    Args:
        log_prior_odds:
            log prior odds of H, :math:`ln(\frac{ P(H) }{ P(\neg H) })`
        p_d_given_h:
            :math:`P(D | H)`
        p_d_given_not_h:
            :math:`P(D | \neg H)`

    Returns:
        float:
            log posterior odds of H,
            :math:`ln(\frac{ P(H | D) }{ P(\neg H | D) })`

    """
    return log_posterior_odds(
        log_prior_odds=log_prior_odds,
        log_likelihood_ratio=log_likelihood_ratio_from_p(
            p_d_given_h=p_d_given_h, p_d_given_not_h=p_d_given_not_h
        ),
    )


[docs]def log_posterior_odds_from_bool_d_pdh_pdnh(
    log_prior_odds: float, d: bool, p_d_given_h: float, p_d_given_not_h: float
) -> float:
    r"""
    Calculates posterior odds.

    Args:
        log_prior_odds:
            log prior odds of H, :math:`ln(\frac{ P(H) }{ P(\neg H) })`
        d:
            whether D is true or not
        p_d_given_h:
            :math:`P(D | H)`
        p_d_given_not_h:
            :math:`P(D | \neg H)`

    Returns:
        float:
            log posterior odds of H,
            :math:`ln(\frac{ P(H | D) }{ P(\neg H | D) })`

    """
    if d:
        # D is true
        log_lr = log_likelihood_ratio_from_p(
            p_d_given_h=p_d_given_h, p_d_given_not_h=p_d_given_not_h
        )
    else:
        # not-D is true
        p_not_d_given_h = 1 - p_d_given_h
        p_not_d_given_not_h = 1 - p_d_given_not_h
        log_lr = log_likelihood_ratio_from_p(
            p_d_given_h=p_not_d_given_h, p_d_given_not_h=p_not_d_given_not_h
        )
    return log_posterior_odds(
        log_prior_odds=log_prior_odds, log_likelihood_ratio=log_lr
    )