histogram.py 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263
  1. """
  2. histogram.py
  3. The functions in this module use a representation of a histogram that is a
  4. tuple containing an arr of N bin values, an array of N bin errors(symmetric)
  5. and an array of N+1 bin edges(N lower edges + 1 upper edge).
  6. For 2d histograms, It is similar, but the arrays are two dimensional and
  7. there are separate arrays for x-edges and y-edges.
  8. """
  9. import numpy as np
  10. from scipy.optimize import curve_fit
  11. def hist(th1, rescale_x=1.0, rescale_y=1.0):
  12. nbins = th1.GetNbinsX()
  13. edges = np.zeros(nbins+1, np.float32)
  14. values = np.zeros(nbins, np.float32)
  15. errors = np.zeros(nbins, np.float32)
  16. for i in range(nbins):
  17. edges[i] = th1.GetXaxis().GetBinLowEdge(i+1)
  18. values[i] = th1.GetBinContent(i+1)
  19. errors[i] = th1.GetBinError(i+1)
  20. edges[nbins] = th1.GetXaxis().GetBinUpEdge(nbins)
  21. edges *= rescale_x
  22. values *= rescale_y
  23. errors *= rescale_y
  24. return values, errors, edges
  25. def hist_bin_centers(h):
  26. _, _, edges = h
  27. return (edges[:-1] + edges[1:])/2.0
  28. def hist_slice(h, range_):
  29. values, errors, edges = h
  30. lim_low, lim_high = range_
  31. slice_ = np.logical_and(edges[:-1] > lim_low, edges[1:] < lim_high)
  32. last = len(slice_) - np.argmax(slice_[::-1])
  33. return (values[slice_],
  34. errors[slice_],
  35. np.concatenate([edges[:-1][slice_], [edges[last]]]))
  36. def hist_add(*hs):
  37. if len(hs) == 0:
  38. return np.zeros(0)
  39. vals, errs, edges = zip(*hs)
  40. return np.sum(vals, axis=0), np.sqrt(np.sum([err*err for err in errs], axis=0)), edges[0]
  41. def hist_sub(*hs):
  42. if len(hs) == 0:
  43. return np.zeros(0)
  44. h0, hs = hs
  45. hs = hist_add(hs)
  46. hs = -hs[0], *hs[1:]
  47. return hist_add(h0, hs)
  48. def hist_mul(h1, h2, cov=None):
  49. h1_vals, h1_errs, num_edges = h1
  50. h2_vals, h2_errs, _ = h2
  51. prod_vals = h1_vals * h2_vals
  52. prod_errs2 = (h1_errs/h1_vals)**2 + (h2_errs/h2_vals)**2
  53. if cov:
  54. prod_errs2 += 2*cov/(h1_vals*h2_vals)
  55. prod_errs = abs(h1_vals*h2_vals)*np.sqrt(prod_errs2)
  56. return prod_vals, prod_errs, num_edges
  57. def hist_div(num, den, cov=None):
  58. num_vals, num_errs, num_edges = num
  59. den_vals, den_errs, _ = den
  60. rat_vals = num_vals / den_vals
  61. rat_errs2 = (num_errs/num_vals)**2 + (den_errs/den_vals)**2
  62. if cov:
  63. rat_errs2 -= 2*cov/(num_vals*den_vals)
  64. rat_errs = abs(num_vals/den_vals)*np.sqrt(rat_errs2)
  65. return rat_vals, rat_errs, num_edges
  66. def hist_integral(h, times_bin_width=True):
  67. values, errors, edges = h
  68. if times_bin_width:
  69. bin_widths = [abs(x2 - x1) for x1, x2 in zip(edges[:-1], edges[1:])]
  70. return sum(val*width for val, width in zip(values, bin_widths))
  71. else:
  72. return sum(values)
  73. def hist_scale(h, scale):
  74. values, errors, edges = h
  75. return values*scale, errors*scale, edges
  76. def hist_norm(h, norm=1):
  77. scale = norm/np.sum(h[0])
  78. return hist_scale(h, scale)
  79. def hist_mean(h):
  80. xs = hist_bin_centers(h)
  81. ys, _, _ = h
  82. return sum(x*y for x, y in zip(xs, ys)) / sum(ys)
  83. def hist_var(h):
  84. xs = hist_bin_centers(h)
  85. ys, _, _ = h
  86. mean = sum(x*y for x, y in zip(xs, ys)) / sum(ys)
  87. mean2 = sum((x**2)*y for x, y in zip(xs, ys)) / sum(ys)
  88. return mean2 - mean**2
  89. def hist_std(h):
  90. return np.sqrt(hist_var(h))
  91. def hist_stats(h):
  92. return {'int': hist_integral(h),
  93. 'sum': hist_integral(h, False),
  94. 'mean': hist_mean(h),
  95. 'var': hist_var(h),
  96. 'std': hist_std(h)}
  97. # def hist_slice2d(h, range_):
  98. # values, errors, xs, ys = h
  99. # last = len(slice_) - np.argmax(slice_[::-1])
  100. # (xlim_low, xlim_high), (ylim_low, ylim_high) = range_
  101. # slice_ = np.logical_and(xs[:-1, :-1] > xlim_low, xs[1:, 1:] < xlim_high,
  102. # ys[:-1, :-1] > ylim_low, ys[1:, 1:] < ylim_high)
  103. # last = len(slice_) - np.argmax(slice_[::-1])
  104. # return (values[slice_],
  105. # errors[slice_],
  106. # np.concatenate([edges[:-1][slice_], [edges[last]]]))
  107. def hist_fit(h, f, p0=None):
  108. values, errors, edges = h
  109. xs = hist_bin_centers(h)
  110. # popt, pcov = curve_fit(f, xs, values, p0=p0, sigma=errors)
  111. popt, pcov = curve_fit(f, xs, values, p0=p0)
  112. return popt, pcov
  113. def hist_rebin(h, nbins, min_, max_):
  114. vals, errs, edges = h
  115. low_edges = edges[:-1]
  116. high_edges = edges[1:]
  117. widths = edges[1:] - edges[:-1]
  118. vals_new = np.zeros(nbins, dtype=vals.dtype)
  119. errs_new = np.zeros(nbins, dtype=vals.dtype) # TODO: properly propagate errors
  120. edges_new = np.linspace(min_, max_, nbins+1, dtype=vals.dtype)
  121. for i, (low, high) in enumerate(zip(edges_new[:-1], edges_new[1:])):
  122. # wholly contained bins
  123. b_idx = np.logical_and((low_edges >= low), (high_edges <= high)).nonzero()[0]
  124. bin_sum = np.sum(vals[b_idx])
  125. # internally contained
  126. b_idx = np.logical_and((low_edges < low), (high_edges > high)).nonzero()[0]
  127. bin_sum += np.sum(vals[b_idx])
  128. # left edge
  129. b_idx = np.logical_and((low_edges < high), (low_edges >= low), (high_edges > high)).nonzero()[0]
  130. if len(b_idx) != 0:
  131. idx = b_idx[0]
  132. bin_sum += vals[idx]*(high - low_edges[idx])/widths[idx]
  133. # right edge
  134. b_idx = np.logical_and((high_edges > low), (low_edges < low), (high_edges <= high)).nonzero()[0]
  135. if len(b_idx) != 0:
  136. idx = b_idx[0]
  137. bin_sum += vals[idx]*(high_edges[idx] - low)/widths[idx]
  138. vals_new[i] = bin_sum
  139. return vals_new, errs_new, edges_new
  140. def hist2d(th2, rescale_x=1.0, rescale_y=1.0, rescale_z=1.0):
  141. """ Converts TH2 object to something amenable to
  142. plotting w/ matplotlab's pcolormesh.
  143. """
  144. nbins_x = th2.GetNbinsX()
  145. nbins_y = th2.GetNbinsY()
  146. xs = np.zeros((nbins_y+1, nbins_x+1), dtype=np.float32)
  147. ys = np.zeros((nbins_y+1, nbins_x+1), dtype=np.float32)
  148. values = np.zeros((nbins_y, nbins_x), dtype=np.float32)
  149. errors = np.zeros((nbins_y, nbins_x), dtype=np.float32)
  150. for i in range(nbins_x):
  151. for j in range(nbins_y):
  152. xs[j][i] = th2.GetXaxis().GetBinLowEdge(i+1)
  153. ys[j][i] = th2.GetYaxis().GetBinLowEdge(j+1)
  154. values[j][i] = th2.GetBinContent(i+1, j+1)
  155. errors[j][i] = th2.GetBinError(i+1, j+1)
  156. xs[nbins_y][i] = th2.GetXaxis().GetBinUpEdge(i)
  157. ys[nbins_y][i] = th2.GetYaxis().GetBinUpEdge(nbins_y)
  158. for j in range(nbins_y+1):
  159. xs[j][nbins_x] = th2.GetXaxis().GetBinUpEdge(nbins_x)
  160. ys[j][nbins_x] = th2.GetYaxis().GetBinUpEdge(j)
  161. xs *= rescale_x
  162. ys *= rescale_y
  163. values *= rescale_z
  164. errors *= rescale_z
  165. return values, errors, xs, ys
  166. def hist2d_norm(h, norm=1, axis=None):
  167. """
  168. :param h:
  169. :param norm: value to normalize the sum of axis to
  170. :param axis: which axis to normalize None is the sum over all bins, 0 is columns, 1 is rows.
  171. :return: The normalized histogram
  172. """
  173. values, errors, xs, ys = h
  174. with np.warnings.catch_warnings():
  175. np.warnings.filterwarnings('ignore', 'invalid value encountered in true_divide')
  176. scale_values = norm / np.sum(values, axis=axis)
  177. scale_values[scale_values == np.inf] = 1
  178. scale_values[scale_values == -np.inf] = 1
  179. if axis == 1:
  180. scale_values.shape = (scale_values.shape[0], 1)
  181. values = values * scale_values
  182. errors = errors * scale_values
  183. return values, errors, xs.copy(), ys.copy()
  184. def hist2d_percent_contour(h, percent: float, axis: str):
  185. values, _, xs, ys = h
  186. try:
  187. axis = axis.lower()
  188. axis_idx = {'x': 1, 'y': 0}[axis]
  189. except KeyError:
  190. raise ValueError('axis must be \'x\' or \'y\'')
  191. if percent < 0 or percent > 1:
  192. raise ValueError('percent must be in [0,1]')
  193. with np.warnings.catch_warnings():
  194. np.warnings.filterwarnings('ignore', 'invalid value encountered in true_divide')
  195. values = values / np.sum(values, axis=axis_idx, keepdims=True)
  196. np.nan_to_num(values, copy=False)
  197. values = np.cumsum(values, axis=axis_idx)
  198. idxs = np.argmax(values > percent, axis=axis_idx)
  199. bins_y = (ys[:-1, 0] + ys[1:, 0])/2
  200. bins_x = (xs[0, :-1] + xs[0, 1:])/2
  201. if axis == 'x':
  202. return bins_x[idxs], bins_y
  203. else:
  204. return bins_x, bins_y[idxs]