histogram_utils.py 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155
  1. '''
  2. histogram_utils.py
  3. The functions in this module use a representation of a histogram that is a
  4. tuple containing an arr of N bin values, an array of N bin errors(symmetric)
  5. and an array of N+1 bin edges(N lower edges + 1 upper edge).
  6. For 2d histograms, It is similar, but the arrays are two dimensional and
  7. there are separate arrays for x-edges and y-edges.
  8. '''
  9. import numpy as np
  10. from scipy.optimize import curve_fit
  11. def hist(th1, rescale_x=1.0, rescale_y=1.0):
  12. nbins = th1.GetNbinsX()
  13. edges = np.zeros(nbins+1, np.float32)
  14. values = np.zeros(nbins, np.float32)
  15. errors = np.zeros(nbins, np.float32)
  16. for i in range(nbins):
  17. edges[i] = th1.GetXaxis().GetBinLowEdge(i+1)
  18. values[i] = th1.GetBinContent(i+1)
  19. errors[i] = th1.GetBinError(i+1)
  20. edges[nbins] = th1.GetXaxis().GetBinUpEdge(nbins)
  21. edges *= rescale_x
  22. values *= rescale_y
  23. errors *= rescale_y
  24. return values, errors, edges
  25. def hist_bin_centers(h):
  26. _, _, edges = h
  27. return (edges[:-1] + edges[1:])/2.0
  28. def hist2d(th2, rescale_x=1.0, rescale_y=1.0, rescale_z=1.0):
  29. """ Converts TH2 object to something amenable to
  30. plotting w/ matplotlab's pcolormesh.
  31. """
  32. nbins_x = th2.GetNbinsX()
  33. nbins_y = th2.GetNbinsY()
  34. print(nbins_x, nbins_y)
  35. xs = np.zeros((nbins_y+1, nbins_x+1), np.float32)
  36. ys = np.zeros((nbins_y+1, nbins_x+1), np.float32)
  37. values = np.zeros((nbins_y, nbins_x), np.float32)
  38. errors = np.zeros((nbins_y, nbins_x), np.float32)
  39. for i in range(nbins_x):
  40. for j in range(nbins_y):
  41. xs[j][i] = th2.GetXaxis().GetBinLowEdge(i+1)
  42. ys[j][i] = th2.GetYaxis().GetBinLowEdge(j+1)
  43. values[j][i] = th2.GetBinContent(i+1, j+1)
  44. errors[j][i] = th2.GetBinError(i+1, j+1)
  45. xs[nbins_y][i] = th2.GetXaxis().GetBinUpEdge(i)
  46. ys[nbins_y][i] = th2.GetYaxis().GetBinUpEdge(nbins_y)
  47. for j in range(nbins_y+1):
  48. xs[j][nbins_x] = th2.GetXaxis().GetBinUpEdge(nbins_x)
  49. ys[j][nbins_x] = th2.GetYaxis().GetBinUpEdge(j)
  50. xs *= rescale_x
  51. ys *= rescale_y
  52. values *= rescale_z
  53. errors *= rescale_z
  54. return values, errors, xs, ys
  55. def hist_slice(hist, range_):
  56. values, errors, edges = hist
  57. lim_low, lim_high = range_
  58. slice_ = np.logical_and(edges[:-1] > lim_low, edges[1:] < lim_high)
  59. last = len(slice_) - np.argmax(slice_[::-1])
  60. return (values[slice_],
  61. errors[slice_],
  62. np.concatenate([edges[:-1][slice_], [edges[last]]]))
  63. def hist_add(*hists):
  64. if len(hists) == 0:
  65. return np.zeros(0)
  66. vals, errs, edges = zip(*hists)
  67. return np.sum(vals, axis=0), np.sqrt(np.sum([err*err for err in errs], axis=0)), edges[0]
  68. def hist_integral(hist, times_bin_width=True):
  69. values, errors, edges = hist
  70. if times_bin_width:
  71. bin_widths = [abs(x2 - x1) for x1, x2 in zip(edges[:-1], edges[1:])]
  72. return sum(val*width for val, width in zip(values, bin_widths))
  73. else:
  74. return sum(values)
  75. def hist_scale(hist, scale):
  76. values, errors, edges = hist
  77. return values*scale, errors*scale, edges
  78. def hist_normalize(hist, norm = 1):
  79. scale = norm/np.sum(hist[0])
  80. return hist_scale(hist, scale)
  81. def hist_mean(hist):
  82. xs = hist_bin_centers(hist)
  83. ys, _, _ = hist
  84. return sum(x*y for x, y in zip(xs, ys)) / sum(ys)
  85. def hist_var(hist):
  86. xs = hist_bin_centers(hist)
  87. ys, _, _ = hist
  88. mean = sum(x*y for x, y in zip(xs, ys)) / sum(ys)
  89. mean2 = sum((x**2)*y for x, y in zip(xs, ys)) / sum(ys)
  90. return mean2 - mean**2
  91. def hist_std(hist):
  92. return np.sqrt(hist_var(hist))
  93. def hist_stats(hist):
  94. return {'int': hist_integral(hist),
  95. 'sum': hist_integral(hist, False),
  96. 'mean': hist_mean(hist),
  97. 'var': hist_var(hist),
  98. 'std': hist_std(hist)}
  99. # def hist_slice2d(h, range_):
  100. # values, errors, xs, ys = h
  101. # last = len(slice_) - np.argmax(slice_[::-1])
  102. # (xlim_low, xlim_high), (ylim_low, ylim_high) = range_
  103. # slice_ = np.logical_and(xs[:-1, :-1] > xlim_low, xs[1:, 1:] < xlim_high,
  104. # ys[:-1, :-1] > ylim_low, ys[1:, 1:] < ylim_high)
  105. # last = len(slice_) - np.argmax(slice_[::-1])
  106. # return (values[slice_],
  107. # errors[slice_],
  108. # np.concatenate([edges[:-1][slice_], [edges[last]]]))
  109. def hist_fit(h, f, p0=None):
  110. values, errors, edges = h
  111. xs = hist_bin_centers(h)
  112. # popt, pcov = curve_fit(f, xs, values, p0=p0, sigma=errors)
  113. popt, pcov = curve_fit(f, xs, values, p0=p0)
  114. return popt, pcov
  115. def hist_rebin(hist, range_, nbins):
  116. raise NotImplementedError()