caleb
/
filval-python


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235
							"""
    histogram.py
    The functions in this module use a representation of a histogram that is a
    tuple containing an arr of N bin values, an array of N bin errors(symmetric)
    and an array of N+1 bin edges(N lower edges + 1 upper edge).

    For 2d histograms, It is similar, but the arrays are two dimensional and
    there are separate arrays for x-edges and y-edges.
"""

import numpy as np
from scipy.optimize import curve_fit


def hist(th1, rescale_x=1.0, rescale_y=1.0):
    nbins = th1.GetNbinsX()

    edges = np.zeros(nbins+1, np.float32)
    values = np.zeros(nbins, np.float32)
    errors = np.zeros(nbins, np.float32)

    for i in range(nbins):
        edges[i] = th1.GetXaxis().GetBinLowEdge(i+1)
        values[i] = th1.GetBinContent(i+1)
        errors[i] = th1.GetBinError(i+1)

    edges[nbins] = th1.GetXaxis().GetBinUpEdge(nbins)
    edges *= rescale_x
    values *= rescale_y
    errors *= rescale_y
    return values, errors, edges


def hist_bin_centers(h):
    _, _, edges = h
    return (edges[:-1] + edges[1:])/2.0


def hist_slice(h, range_):
    values, errors, edges = h
    lim_low, lim_high = range_
    slice_ = np.logical_and(edges[:-1] > lim_low, edges[1:] < lim_high)
    last = len(slice_) - np.argmax(slice_[::-1])
    return (values[slice_],
            errors[slice_],
            np.concatenate([edges[:-1][slice_], [edges[last]]]))


def hist_add(*hs):
    if len(hs) == 0:
        return np.zeros(0)
    vals, errs, edges = zip(*hs)
    return np.sum(vals, axis=0), np.sqrt(np.sum([err*err for err in errs], axis=0)), edges[0]


def hist_sub(*hs):
    if len(hs) == 0:
        return np.zeros(0)
    h0, hs = hs
    hs = hist_add(hs)
    hs = -hs[0], *hs[1:]
    return hist_add(h0, hs)


def hist_mul(h1, h2, cov=None):
    h1_vals, h1_errs, num_edges = h1
    h2_vals, h2_errs, _ = h2
    prod_vals = h1_vals * h2_vals
    prod_errs2 = (h1_errs/h1_vals)**2 + (h2_errs/h2_vals)**2
    if cov:
        prod_errs2 += 2*cov/(h1_vals*h2_vals)
    prod_errs = abs(h1_vals*h2_vals)*np.sqrt(prod_errs2)
    return prod_vals, prod_errs, num_edges


def hist_div(num, den, cov=None):
    num_vals, num_errs, num_edges = num
    den_vals, den_errs, _ = den
    rat_vals = num_vals / den_vals
    rat_errs2 = (num_errs/num_vals)**2 + (den_errs/den_vals)**2
    if cov:
        rat_errs2 -= 2*cov/(num_vals*den_vals)
    rat_errs = abs(num_vals/den_vals)*np.sqrt(rat_errs2)
    return rat_vals, rat_errs, num_edges


def hist_integral(h, times_bin_width=True):
    values, errors, edges = h
    if times_bin_width:
        bin_widths = [abs(x2 - x1) for x1, x2 in zip(edges[:-1], edges[1:])]
        return sum(val*width for val, width in zip(values, bin_widths))
    else:
        return sum(values)


def hist_scale(h, scale):
    values, errors, edges = h
    return values*scale, errors*scale, edges


def hist_norm(h, norm=1):
    scale = norm/np.sum(h[0])
    return hist_scale(h, scale)


def hist_mean(h):
    xs = hist_bin_centers(h)
    ys, _, _ = h
    return sum(x*y for x, y in zip(xs, ys)) / sum(ys)


def hist_var(h):
    xs = hist_bin_centers(h)
    ys, _, _ = h
    mean = sum(x*y for x, y in zip(xs, ys)) / sum(ys)
    mean2 = sum((x**2)*y for x, y in zip(xs, ys)) / sum(ys)
    return mean2 - mean**2


def hist_std(h):
    return np.sqrt(hist_var(h))


def hist_stats(h):
    return {'int': hist_integral(h),
            'sum': hist_integral(h, False),
            'mean': hist_mean(h),
            'var': hist_var(h),
            'std': hist_std(h)}


# def hist_slice2d(h, range_):
#     values, errors, xs, ys = h

#     last = len(slice_) - np.argmax(slice_[::-1])

#     (xlim_low, xlim_high), (ylim_low, ylim_high) = range_
#     slice_ = np.logical_and(xs[:-1, :-1] > xlim_low, xs[1:, 1:] < xlim_high,
#                             ys[:-1, :-1] > ylim_low, ys[1:, 1:] < ylim_high)
#     last = len(slice_) - np.argmax(slice_[::-1])
#     return (values[slice_],
#             errors[slice_],
#             np.concatenate([edges[:-1][slice_], [edges[last]]]))


def hist_fit(h, f, p0=None):
    values, errors, edges = h
    xs = hist_bin_centers(h)
    # popt, pcov = curve_fit(f, xs, values, p0=p0, sigma=errors)
    popt, pcov = curve_fit(f, xs, values, p0=p0)
    return popt, pcov


def hist_rebin(h, range_, nbins):
    raise NotImplementedError()


def hist2d(th2, rescale_x=1.0, rescale_y=1.0, rescale_z=1.0):
    """ Converts TH2 object to something amenable to
        plotting w/ matplotlab's pcolormesh.
    """
    nbins_x = th2.GetNbinsX()
    nbins_y = th2.GetNbinsY()
    xs = np.zeros((nbins_y+1, nbins_x+1), dtype=np.float32)
    ys = np.zeros((nbins_y+1, nbins_x+1), dtype=np.float32)
    values = np.zeros((nbins_y, nbins_x), dtype=np.float32)
    errors = np.zeros((nbins_y, nbins_x), dtype=np.float32)
    for i in range(nbins_x):
        for j in range(nbins_y):
            xs[j][i] = th2.GetXaxis().GetBinLowEdge(i+1)
            ys[j][i] = th2.GetYaxis().GetBinLowEdge(j+1)
            values[j][i] = th2.GetBinContent(i+1, j+1)
            errors[j][i] = th2.GetBinError(i+1, j+1)
        xs[nbins_y][i] = th2.GetXaxis().GetBinUpEdge(i)
        ys[nbins_y][i] = th2.GetYaxis().GetBinUpEdge(nbins_y)
    for j in range(nbins_y+1):
        xs[j][nbins_x] = th2.GetXaxis().GetBinUpEdge(nbins_x)
        ys[j][nbins_x] = th2.GetYaxis().GetBinUpEdge(j)

    xs *= rescale_x
    ys *= rescale_y
    values *= rescale_z
    errors *= rescale_z

    return values, errors, xs, ys


def hist2d_norm(h, norm=1, axis=None):
    """

    :param h:
    :param norm: value to normalize the sum of axis to
    :param axis: which axis to normalize None is the sum over all bins, 0 is columns, 1 is rows.
    :return: The normalized histogram
    """
    values, errors, xs, ys = h
    with np.warnings.catch_warnings():
        np.warnings.filterwarnings('ignore', 'invalid value encountered in true_divide')
        scale_values = norm / np.sum(values, axis=axis)
        scale_values[scale_values == np.inf] = 1
        scale_values[scale_values == -np.inf] = 1
    if axis == 1:
        scale_values.shape = (scale_values.shape[0], 1)
    values = values * scale_values
    errors = errors * scale_values
    return values, errors, xs.copy(), ys.copy()


def hist2d_percent_contour(h, percent: float, axis: str):
    values, _, xs, ys = h

    try:
        axis = axis.lower()
        axis_idx = {'x': 1, 'y': 0}[axis]
    except KeyError:
        raise ValueError('axis must be \'x\' or \'y\'')
    if percent < 0 or percent > 1:
        raise ValueError('percent must be in [0,1]')

    with np.warnings.catch_warnings():
        np.warnings.filterwarnings('ignore', 'invalid value encountered in true_divide')
        values = values / np.sum(values, axis=axis_idx, keepdims=True)
        np.nan_to_num(values, copy=False)
    values = np.cumsum(values, axis=axis_idx)
    idxs = np.argmax(values > percent, axis=axis_idx)

    bins_y = (ys[:-1, 0] + ys[1:, 0])/2
    bins_x = (xs[0, :-1] + xs[0, 1:])/2

    if axis == 'x':
        return bins_x[idxs], bins_y
    else:
        return bins_x, bins_y[idxs]