yields.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344
  1. #!/usr/bin/env python
  2. import numpy as np
  3. import matplotlib.pyplot as plt
  4. from filval.result_set import ResultSet
  5. from filval.histogram_utils import hist, hist_add, hist_normalize, hist_scale
  6. from filval.plotter import (decl_plot, render_plots, hist_plot, hist_plot_stack,
  7. Plot, generate_dashboard, hists_to_table)
  8. an_tttt = ([0.47, 0.33, 0.18, 0.78, 0.49, 0.52, 0.33, 0.49],
  9. [0, 0, 0, 0, 0, 0, 0, 0], [0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5])
  10. an_ttw = ([2.29663, 0.508494, 0.161166, 1.03811, 0.256401, 0.127582, 0.181522, 0.141659],
  11. [0, 0, 0, 0, 0, 0, 0, 0], [0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5])
  12. an_ttz = ([0.974751, 0.269195, 1e-06, 0.395831, 0.0264703, 0.06816, 0.8804, 0.274265],
  13. [0, 0, 0, 0, 0, 0, 0, 0], [0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5])
  14. an_tth = ([1.13826, 0.361824, 0.162123, 0.683917, 0.137608, 0.0632719, 0.554491, 0.197864],
  15. [0, 0, 0, 0, 0, 0, 0, 0], [0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5])
  16. @decl_plot
  17. def plot_yield_grid(rss, tau_category=-1):
  18. r"""## Event Yield
  19. The event yield for the eight signal regions defined in AN-17-115. Data is normalized
  20. to the integrated luminosity of $35.9\textrm{fb}^{-1}$.
  21. Ignoring taus means both that there is no requirement on number of good taus *and*
  22. the taus, if present, are not considered for the SS pair.
  23. If taus are not ignored, any good tau with $p_T$>20Gev is considered in constructing the SS lepton pair. The yields
  24. are then further broken down by the number of good taus in the event.
  25. A "good" tau in the above means any tau candidate passing the `byTightIsolationMVArun2v1DBoldDMwLT` ID w/ pt>20GeV.
  26. It is also required to pass tau-lepton cross cleaning where it must not match any electron or muon within
  27. $\delta R < 0.4$.
  28. Truth-matched taus are those that match within $\delta R < 0.3$ with gen-level taus that pass the flag `fromHardProcessDecayed`.
  29. """
  30. def get_sr(rs, tm=False):
  31. if tm:
  32. if tau_category == 0:
  33. return hist(rs.SRs_0tmtau)
  34. if tau_category == 1:
  35. return hist(rs.SRs_1tmtau)
  36. elif tau_category == 2:
  37. return hist(rs.SRs_2tmtau)
  38. else:
  39. if tau_category == -1:
  40. return hist(rs.ignore_tau_SRs)
  41. elif tau_category == 0:
  42. return hist(rs.SRs_0tau)
  43. elif tau_category == 1:
  44. return hist(rs.SRs_1tau)
  45. elif tau_category == 2:
  46. return hist(rs.SRs_2tau)
  47. _, ((ax_tttt, ax_ttw), (ax_ttz, ax_tth)) = plt.subplots(2, 2)
  48. tttt, ttw, ttz, tth = [get_sr(rs) for rs in rss]
  49. tm_tttt, tm_ttw, tm_ttz, tm_tth = [get_sr(rs, True) for rs in rss]
  50. plt.sca(ax_tttt)
  51. hist_plot(tttt, title='TTTT', stats=False, label='Mock', include_errors=True)
  52. if tau_category == -1:
  53. hist_plot(an_tttt, title='TTTT', stats=False, label='AN')
  54. elif tau_category >= 0:
  55. hist_plot(tm_tttt, title='TTTT', stats=False, label='Truth-Matched Taus', include_errors=True)
  56. plt.ylim((0, None))
  57. plt.sca(ax_ttw)
  58. hist_plot(ttw, title='TTW', stats=False, label='Mock', include_errors=True)
  59. if tau_category == -1:
  60. hist_plot(an_ttw, title='TTW', stats=False, label='AN')
  61. elif tau_category >= 0:
  62. hist_plot(tm_ttw, title='TTW', stats=False, label='Truth-Matched Taus', include_errors=True)
  63. plt.ylim((0, None))
  64. plt.legend()
  65. plt.sca(ax_ttz)
  66. hist_plot(ttz, title='TTZ', stats=False, label='Mock', include_errors=True)
  67. if tau_category == -1:
  68. hist_plot(an_ttz, title='TTZ', stats=False, label='AN')
  69. elif tau_category >= 0:
  70. hist_plot(tm_ttz, title='TTZ', stats=False, label='Truth-Matched Taus', include_errors=True)
  71. plt.ylim((0, None))
  72. plt.xlabel('Signal Region')
  73. plt.sca(ax_tth)
  74. hist_plot(tth, title='TTH', stats=False, label='Mock', include_errors=True)
  75. if tau_category == -1:
  76. hist_plot(an_tth, title='TTH', stats=False, label='AN')
  77. elif tau_category >= 0:
  78. hist_plot(tm_tth, title='TTH', stats=False, label='Truth-Matched Taus', include_errors=True)
  79. plt.ylim((0, None))
  80. plt.xlabel('Signal Region')
  81. def to_table(hists):
  82. return hists_to_table(hists, row_labels=['TTTT', 'TTW', 'TTZ', 'TTH'],
  83. column_labels=[f'SR{n}' for n in range(1, 9)])
  84. tables = '<h2>Mock</h2>'
  85. tables += to_table([tttt, ttw, ttz, tth])
  86. if tau_category == -1:
  87. tables += '<h2>AN</h2>'
  88. tables += to_table([an_tttt, an_ttw, an_ttz, an_tth])
  89. elif tau_category >= 0:
  90. tables += '<h2>TM Taus</h2>'
  91. tables += to_table([tm_tttt, tm_ttw, tm_ttz, tm_tth])
  92. return tables
  93. @decl_plot
  94. def plot_yield_stack(rss):
  95. r"""## Event Yield - Stacked
  96. The event yield for the eight signal regions defined in AN-17-115. Data is normalized
  97. to the Moriond 2018 integrated luminosity ($35.9\textrm{fb}^{-1}$). Code for the histogram generation is
  98. here: <https://github.com/cfangmeier/FTAnalysis/blob/master/studies/tau/Yield.C>
  99. """
  100. ft, ttw, ttz, tth = map(lambda rs: hist(rs.SRs), rss)
  101. hist_plot_stack([ttw, ttz, tth], labels=['TTW', 'TTZ', 'TTH'])
  102. ft = ft[0]*10, ft[1], ft[2]
  103. hist_plot(ft, label='TTTT (x10)', stats=False, color='k')
  104. plt.ylim((0, 60))
  105. plt.xlabel('Signal Region')
  106. plt.legend()
  107. @decl_plot
  108. def plot_lep_multi(rss, dataset):
  109. _, (ax_els, ax_mus, ax_taus) = plt.subplots(3, 1)
  110. els = list(map(lambda rs: hist_normalize(hist(rs.nEls)), rss))
  111. mus = list(map(lambda rs: hist_normalize(hist(rs.nMus)), rss))
  112. taus = list(map(lambda rs: hist_normalize(hist(rs.nTaus)), rss))
  113. def _plot(ax, procs):
  114. plt.sca(ax)
  115. ft, ttw, ttz, tth = procs
  116. h = {'TTTT': ft,
  117. 'TTW': ttw,
  118. 'TTZ': ttz,
  119. 'TTH': tth}[dataset]
  120. hist_plot(h, stats=False, label=dataset)
  121. _plot(ax_els, els)
  122. plt.xlabel('\\# Good Electrons')
  123. plt.legend()
  124. _plot(ax_mus, mus)
  125. plt.xlabel('\\# Good Muons')
  126. _plot(ax_taus, taus)
  127. plt.xlabel('\\# Good Taus')
  128. @decl_plot
  129. def plot_sig_strength(rss):
  130. r""" The signal strength of the TTTT signal defined as
  131. $\frac{S}{\sqrt{S+B}}$
  132. """
  133. ft, ttw, ttz, tth = map(lambda rs: hist(rs.SRs), rss)
  134. bg = hist_add(ttw, ttz, tth)
  135. strength = ft[0] / np.sqrt(ft[0] + bg[0])
  136. hist_plot((strength, ft[1], ft[2]), stats=False)
  137. @decl_plot
  138. def plot_event_obs(rss, dataset, in_signal_region=True):
  139. r"""The distribution of $N_{jet}$, $N_{Bjet}$, MET, and $H_T$ in either all events
  140. or only signal region (igoring taus) events.
  141. """
  142. _, ((ax_njet, ax_nbjet), (ax_ht, ax_met)) = plt.subplots(2, 2)
  143. # ft, ttw, ttz, tth = map(lambda rs: hist(rs.SRs), rss)
  144. ft, ttw, ttz, tth = rss
  145. rs = {'TTTT': ft,
  146. 'TTW': ttw,
  147. 'TTZ': ttz,
  148. 'TTH': tth}[dataset]
  149. def _plot(ax, obs):
  150. plt.sca(ax)
  151. if in_signal_region:
  152. h = {'MET': rs.met_in_SR,
  153. 'HT': rs.ht_in_SR,
  154. 'NJET': rs.njet_in_SR,
  155. 'NBJET': rs.nbjet_in_SR}[obs]
  156. else:
  157. h = {'MET': rs.met,
  158. 'HT': rs.ht,
  159. 'NJET': rs.njet,
  160. 'NBJET': rs.nbjet}[obs]
  161. hist_plot(hist(h), stats=False, label=dataset, xlabel=obs)
  162. _plot(ax_njet, 'NJET')
  163. _plot(ax_nbjet, 'NBJET')
  164. _plot(ax_ht, 'HT')
  165. _plot(ax_met, 'MET')
  166. @decl_plot
  167. def plot_event_obs_stack(rss, in_signal_region=True):
  168. r"""
  169. """
  170. _, ((ax_njet, ax_nbjet), (ax_ht, ax_met)) = plt.subplots(2, 2)
  171. def _plot(ax, obs):
  172. plt.sca(ax)
  173. if in_signal_region:
  174. attr = {'MET': 'met_in_SR',
  175. 'HT': 'ht_in_SR',
  176. 'NJET': 'njet_in_SR',
  177. 'NBJET': 'nbjet_in_SR'}[obs]
  178. else:
  179. attr = {'MET': 'met',
  180. 'HT': 'ht',
  181. 'NJET': 'njet',
  182. 'NBJET': 'nbjet'}[obs]
  183. ft, ttw, ttz, tth = map(lambda rs: hist(getattr(rs, attr)), rss)
  184. hist_plot_stack([ttw, ttz, tth], labels=["TTW", "TTZ", "TTH"])
  185. hist_plot(hist_scale(ft, 5), label="TTTT (x5)", color='k')
  186. plt.xlabel(obs)
  187. _plot(ax_njet, 'NJET')
  188. _plot(ax_nbjet, 'NBJET')
  189. plt.legend()
  190. _plot(ax_ht, 'HT')
  191. _plot(ax_met, 'MET')
  192. @decl_plot
  193. def plot_tau_purity(rss):
  194. _, ((ax_ft, ax_ttw), (ax_ttz, ax_tth)) = plt.subplots(2, 2)
  195. ft, ttw, ttz, tth = list(map(lambda rs: hist(rs.tau_purity_v_pt), rss))
  196. def _plot(ax, dataset):
  197. plt.sca(ax)
  198. h = {'TTTT': ft,
  199. 'TTW': ttw,
  200. 'TTZ': ttz,
  201. 'TTH': tth}[dataset]
  202. hist_plot(h, stats=False, label=dataset)
  203. plt.text(200, 0.05, dataset)
  204. plt.xlabel(r"$P_T$(GeV)")
  205. _plot(ax_ft, 'TTTT')
  206. _plot(ax_ttw, 'TTW')
  207. _plot(ax_ttz, 'TTZ')
  208. _plot(ax_tth, 'TTH')
  209. if __name__ == '__main__':
  210. # First create a ResultSet object which loads all of the objects from root file
  211. # into memory and makes them available as attributes
  212. rss = (ResultSet("ft", 'data/yield_ft.root'),
  213. ResultSet("ttw", 'data/yield_ttw.root'),
  214. ResultSet("ttz", 'data/yield_ttz.root'),
  215. ResultSet("tth", 'data/yield_tth.root'))
  216. # Next, declare all of the (sub)plots that will be assembled into full
  217. # figures later
  218. yield_tau_ignore_tau = plot_yield_grid, (rss, -1)
  219. yield_tau_0tau = plot_yield_grid, (rss, 0)
  220. yield_tau_1tau = plot_yield_grid, (rss, 1)
  221. yield_tau_2tau = plot_yield_grid, (rss, 2)
  222. ft_event_obs_in_sr = plot_event_obs, (rss, 'TTTT'), {'in_signal_region': True}
  223. ttw_event_obs_in_sr = plot_event_obs, (rss, 'TTW'), {'in_signal_region': True}
  224. ttz_event_obs_in_sr = plot_event_obs, (rss, 'TTZ'), {'in_signal_region': True}
  225. tth_event_obs_in_sr = plot_event_obs, (rss, 'TTH'), {'in_signal_region': True}
  226. ft_event_obs = plot_event_obs, (rss, 'TTTT'), {'in_signal_region': False}
  227. ttw_event_obs = plot_event_obs, (rss, 'TTW'), {'in_signal_region': False}
  228. ttz_event_obs = plot_event_obs, (rss, 'TTZ'), {'in_signal_region': False}
  229. tth_event_obs = plot_event_obs, (rss, 'TTH'), {'in_signal_region': False}
  230. ft_lep_multi = plot_lep_multi, (rss, 'TTTT')
  231. ttw_lep_multi = plot_lep_multi, (rss, 'TTW')
  232. ttz_lep_multi = plot_lep_multi, (rss, 'TTZ')
  233. tth_lep_multi = plot_lep_multi, (rss, 'TTH')
  234. # tau_purity = plot_tau_purity, (rss)
  235. # Now assemble the plots into figures.
  236. plots = [
  237. Plot([[yield_tau_ignore_tau]],
  238. 'Yield Ignoring Taus'),
  239. Plot([[yield_tau_0tau]],
  240. 'Yield For events with 0 Tau'),
  241. Plot([[yield_tau_1tau]],
  242. 'Yield For events with 1 Tau'),
  243. Plot([[yield_tau_2tau]],
  244. 'Yield For events with 2 or more Tau'),
  245. Plot([[ft_lep_multi]],
  246. 'Lepton Multiplicity - TTTT'),
  247. Plot([[ttw_lep_multi]],
  248. 'Lepton Multiplicity - TTW'),
  249. Plot([[ttz_lep_multi]],
  250. 'Lepton Multiplicity - TTZ'),
  251. Plot([[tth_lep_multi]],
  252. 'Lepton Multiplicity - TTH'),
  253. Plot([[ft_event_obs_in_sr]],
  254. 'TTTT - Event Observables (In SR)'),
  255. Plot([[ttw_event_obs_in_sr]],
  256. 'TTW - Event Observables (In SR)'),
  257. Plot([[ttz_event_obs_in_sr]],
  258. 'TTZ - Event Observables (In SR)'),
  259. Plot([[tth_event_obs_in_sr]],
  260. 'TTH - Event Observables (In SR)'),
  261. Plot([[ft_event_obs]],
  262. 'TTTT - Event Observables (All Events)'),
  263. Plot([[ttw_event_obs]],
  264. 'TTW - Event Observables (All Events)'),
  265. Plot([[ttz_event_obs]],
  266. 'TTZ - Event Observables (All Events)'),
  267. Plot([[tth_event_obs]],
  268. 'TTH - Event Observables (All Events)'),
  269. # Plot([[yield_notau]],
  270. # 'Yield Without Tau'),
  271. # Plot([[yield_tau_stack]],
  272. # 'Yield With Tau Stacked'),
  273. # Plot([[yield_notau_stack]],
  274. # 'Yield Without Tau Stacked'),
  275. # Plot([[yield_tau_stack],
  276. # [yield_notau_stack]],
  277. # 'Event Yield, top: with tau, bottom: no tau'),
  278. # Plot([[sig_strength_tau],
  279. # [sig_strength_notau]],
  280. # 'Signal Strength'),
  281. # Plot([[event_obs_stack]],
  282. # 'Event Observables'),
  283. # Plot([[tau_purity]],
  284. # 'Tau Purity'),
  285. ]
  286. # Finally, render and save the plots and generate the html+bootstrap
  287. # dashboard to view them
  288. render_plots(plots, to_disk=False)
  289. generate_dashboard(plots, 'TTTT Yields', output='yield_breakout_4.html', source_file=__file__,
  290. ana_source="https://github.com/cfangmeier/FTAnalysis/commit/46fc1afab42f6b04c9e21ba519e6a30524983550")