yields.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446
  1. #!/usr/bin/env python
  2. import numpy as np
  3. import matplotlib.pyplot as plt
  4. from filval.result_set import ResultSet
  5. from filval.histogram import hist, hist2d, hist_add, hist_norm, hist_scale, hist2d_norm
  6. from filval.plotting import (decl_plot, render_plots, hist_plot, hist_plot_stack, hist2d_plot,
  7. Plot, generate_dashboard, hists_to_table)
  8. an_tttt = ([0.47, 0.33, 0.18, 0.78, 0.49, 0.52, 0.33, 0.49],
  9. [0, 0, 0, 0, 0, 0, 0, 0], [0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5])
  10. an_ttw = ([2.29663, 0.508494, 0.161166, 1.03811, 0.256401, 0.127582, 0.181522, 0.141659],
  11. [0, 0, 0, 0, 0, 0, 0, 0], [0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5])
  12. an_ttz = ([0.974751, 0.269195, 1e-06, 0.395831, 0.0264703, 0.06816, 0.8804, 0.274265],
  13. [0, 0, 0, 0, 0, 0, 0, 0], [0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5])
  14. an_tth = ([1.13826, 0.361824, 0.162123, 0.683917, 0.137608, 0.0632719, 0.554491, 0.197864],
  15. [0, 0, 0, 0, 0, 0, 0, 0], [0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5])
  16. @decl_plot
  17. def plot_yield_grid(rss, tau_category=-1):
  18. r"""## Event Yield
  19. The event yield for the eight signal regions defined in AN-17-115. Data is normalized
  20. to the integrated luminosity of $35.9\textrm{fb}^{-1}$.
  21. Ignoring taus means both that there is no requirement on number of good taus *and*
  22. the taus, if present, are not considered for the SS pair.
  23. If taus are not ignored, any good tau with $p_T$>20Gev is considered in constructing the SS lepton pair. The yields
  24. are then further broken down by the number of good taus in the event.
  25. A "good" tau in the above means any tau candidate passing the `byTightIsolationMVArun2v1DBoldDMwLT` ID w/ pt>20GeV.
  26. It is also required to pass tau-lepton cross cleaning where it must not match any electron or muon within
  27. $\delta R < 0.4$.
  28. Truth-matched taus are those that match within $\delta R < 0.3$ with gen-level taus that pass the flag `fromHardProcessDecayed`.
  29. """
  30. def get_sr(rs, tm=False):
  31. if tm:
  32. if tau_category == 0:
  33. return hist(rs.SRs_0tmtau)
  34. if tau_category == 1:
  35. return hist(rs.SRs_1tmtau)
  36. elif tau_category == 2:
  37. return hist(rs.SRs_2tmtau)
  38. else:
  39. if tau_category == -1:
  40. return hist(rs.ignore_tau_SRs)
  41. elif tau_category == 0:
  42. return hist(rs.SRs_0tau)
  43. elif tau_category == 1:
  44. return hist(rs.SRs_1tau)
  45. elif tau_category == 2:
  46. return hist(rs.SRs_2tau)
  47. _, ((ax_tttt, ax_ttw), (ax_ttz, ax_tth)) = plt.subplots(2, 2)
  48. tttt, ttw, ttz, tth = [get_sr(rs) for rs in rss]
  49. tm_tttt, tm_ttw, tm_ttz, tm_tth = [get_sr(rs, True) for rs in rss]
  50. plt.sca(ax_tttt)
  51. hist_plot(tttt, title='TTTT', stats=False, label='Mock', include_errors=True)
  52. if tau_category == -1 and len(tttt[0]) == len(an_tttt[0]):
  53. hist_plot(an_tttt, title='TTTT', stats=False, label='AN')
  54. elif tau_category >= 0:
  55. hist_plot(tm_tttt, title='TTTT', stats=False, label='Truth-Matched Taus', include_errors=True)
  56. plt.ylim((0, None))
  57. plt.sca(ax_ttw)
  58. hist_plot(ttw, title='TTW', stats=False, label='Mock', include_errors=True)
  59. if tau_category == -1 and len(tttt[0]) == len(an_tttt[0]):
  60. hist_plot(an_ttw, title='TTW', stats=False, label='AN')
  61. elif tau_category >= 0:
  62. hist_plot(tm_ttw, title='TTW', stats=False, label='Truth-Matched Taus', include_errors=True)
  63. plt.ylim((0, None))
  64. plt.legend()
  65. plt.sca(ax_ttz)
  66. hist_plot(ttz, title='TTZ', stats=False, label='Mock', include_errors=True)
  67. if tau_category == -1 and len(tttt[0]) == len(an_tttt[0]):
  68. hist_plot(an_ttz, title='TTZ', stats=False, label='AN')
  69. elif tau_category >= 0:
  70. hist_plot(tm_ttz, title='TTZ', stats=False, label='Truth-Matched Taus', include_errors=True)
  71. plt.ylim((0, None))
  72. plt.xlabel('Signal Region')
  73. plt.sca(ax_tth)
  74. hist_plot(tth, title='TTH', stats=False, label='Mock', include_errors=True)
  75. if tau_category == -1 and len(tttt[0]) == len(an_tttt[0]):
  76. hist_plot(an_tth, title='TTH', stats=False, label='AN')
  77. elif tau_category >= 0:
  78. hist_plot(tm_tth, title='TTH', stats=False, label='Truth-Matched Taus', include_errors=True)
  79. plt.ylim((0, None))
  80. plt.xlabel('Signal Region')
  81. def to_table(hists):
  82. return hists_to_table(hists, row_labels=['TTTT', 'TTW', 'TTZ', 'TTH'],
  83. column_labels=[f'SR{n}' for n in range(1, len(tttt[0])+1)])
  84. tables = '<h2>Mock</h2>'
  85. tables += to_table([tttt, ttw, ttz, tth])
  86. if tau_category == -1:
  87. tables += '<h2>AN</h2>'
  88. tables += to_table([an_tttt, an_ttw, an_ttz, an_tth])
  89. elif tau_category >= 0:
  90. tables += '<h2>TM Taus</h2>'
  91. tables += to_table([tm_tttt, tm_ttw, tm_ttz, tm_tth])
  92. return tables
  93. @decl_plot
  94. def plot_yield_v_gen(rss, tau_category=-1):
  95. r"""## Event Yield Vs. # of Generated Taus
  96. """
  97. def get_sr(rs):
  98. h = None
  99. if tau_category == 0:
  100. h = rs.SRs_0tmtau_diff_nGenTau
  101. if tau_category == 1:
  102. h = rs.SRs_1tmtau_diff_nGenTau
  103. elif tau_category == 2:
  104. h = rs.SRs_2tmtau_diff_nGenTau
  105. return hist2d_norm(hist2d(h), norm=1, axis=0)
  106. _, ((ax_tttt, ax_ttw), (ax_ttz, ax_tth)) = plt.subplots(2, 2)
  107. tttt, ttw, ttz, tth = [get_sr(rs) for rs in rss]
  108. def do_plot(h, title):
  109. hist2d_plot(h, title=title, txt_format='{:.2f}')
  110. plt.sca(ax_tttt)
  111. do_plot(tttt, 'TTTT')
  112. plt.ylabel("\# Gen Taus")
  113. plt.sca(ax_ttw)
  114. do_plot(ttw, 'TTW')
  115. plt.legend()
  116. plt.sca(ax_ttz)
  117. do_plot(ttz, 'TTZ')
  118. plt.xlabel('Signal Region')
  119. plt.ylabel("\# Gen Taus")
  120. plt.sca(ax_tth)
  121. do_plot(tth, 'TTH')
  122. plt.xlabel('Signal Region')
  123. @decl_plot
  124. def plot_nGen_v_nSel(rss):
  125. _, ((ax_tttt, ax_ttw), (ax_ttz, ax_tth)) = plt.subplots(2, 2)
  126. tttt, ttw, ttz, tth = [hist2d(rs.nGen_v_RecoTaus_in_SR) for rs in rss]
  127. def do_plot(h, title):
  128. hist2d_plot(h, title=title, txt_format='{:.2f}')
  129. plt.sca(ax_tttt)
  130. do_plot(tttt, 'TTTT')
  131. plt.ylabel("\# Selected Taus")
  132. plt.sca(ax_ttw)
  133. do_plot(ttw, 'TTW')
  134. plt.legend()
  135. plt.sca(ax_ttz)
  136. do_plot(ttz, 'TTZ')
  137. plt.xlabel('\# Gen Taus')
  138. plt.ylabel("\# Selected Taus")
  139. plt.sca(ax_tth)
  140. do_plot(tth, 'TTH')
  141. plt.xlabel('\# Gen Taus')
  142. @decl_plot
  143. def plot_yield_stack(rss):
  144. r"""## Event Yield - Stacked
  145. The event yield for the eight signal regions defined in AN-17-115. Data is normalized
  146. to the Moriond 2018 integrated luminosity ($35.9\textrm{fb}^{-1}$). Code for the histogram generation is
  147. here: <https://github.com/cfangmeier/FTAnalysis/blob/master/studies/tau/Yield.C>
  148. """
  149. tttt, ttw, ttz, tth = map(lambda rs: hist(rs.SRs), rss)
  150. hist_plot_stack([ttw, ttz, tth], labels=['TTW', 'TTZ', 'TTH'])
  151. tttt = tttt[0]*10, tttt[1], tttt[2]
  152. hist_plot(tttt, label='TTTT (x10)', stats=False, color='k')
  153. plt.ylim((0, 60))
  154. plt.xlabel('Signal Region')
  155. plt.legend()
  156. @decl_plot
  157. def plot_lep_multi(rss, dataset):
  158. _, (ax_els, ax_mus, ax_taus) = plt.subplots(3, 1)
  159. els = list(map(lambda rs: hist_norm(hist(rs.nEls)), rss))
  160. mus = list(map(lambda rs: hist_norm(hist(rs.nMus)), rss))
  161. taus = list(map(lambda rs: hist_norm(hist(rs.nTaus)), rss))
  162. def _plot(ax, procs):
  163. plt.sca(ax)
  164. tttt, ttw, ttz, tth = procs
  165. h = {'TTTT': tttt,
  166. 'TTW': ttw,
  167. 'TTZ': ttz,
  168. 'TTH': tth}[dataset]
  169. hist_plot(h, stats=False, label=dataset)
  170. _plot(ax_els, els)
  171. plt.xlabel('\\# Good Electrons')
  172. plt.legend()
  173. _plot(ax_mus, mus)
  174. plt.xlabel('\\# Good Muons')
  175. _plot(ax_taus, taus)
  176. plt.xlabel('\\# Good Taus')
  177. @decl_plot
  178. def plot_sig_strength(rss):
  179. r""" The signal strength of the TTTT signal defined as
  180. $\frac{S}{\sqrt{S+B}}$
  181. """
  182. tttt, ttw, ttz, tth = map(lambda rs: hist(rs.SRs), rss)
  183. bg = hist_add(ttw, ttz, tth)
  184. strength = tttt[0] / np.sqrt(tttt[0] + bg[0])
  185. hist_plot((strength, tttt[1], tttt[2]), stats=False)
  186. @decl_plot
  187. def plot_event_obs(rss, dataset, in_signal_region=True):
  188. r"""The distribution of $N_{jet}$, $N_{Bjet}$, MET, and $H_T$ in either all events
  189. or only signal region (igoring taus) events.
  190. """
  191. _, ((ax_njet, ax_nbjet), (ax_ht, ax_met)) = plt.subplots(2, 2)
  192. # tttt, ttw, ttz, tth = map(lambda rs: hist(rs.SRs), rss)
  193. tttt, ttw, ttz, tth = rss
  194. rs = {'TTTT': tttt,
  195. 'TTW': ttw,
  196. 'TTZ': ttz,
  197. 'TTH': tth}[dataset]
  198. def _plot(ax, obs):
  199. plt.sca(ax)
  200. if in_signal_region:
  201. h = {'MET': rs.met_in_SR,
  202. 'HT': rs.ht_in_SR,
  203. 'NJET': rs.njet_in_SR,
  204. 'NBJET': rs.nbjet_in_SR}[obs]
  205. else:
  206. h = {'MET': rs.met,
  207. 'HT': rs.ht,
  208. 'NJET': rs.njet,
  209. 'NBJET': rs.nbjet}[obs]
  210. hist_plot(hist(h), stats=False, label=dataset, xlabel=obs)
  211. _plot(ax_njet, 'NJET')
  212. _plot(ax_nbjet, 'NBJET')
  213. _plot(ax_ht, 'HT')
  214. _plot(ax_met, 'MET')
  215. @decl_plot
  216. def plot_event_obs_stack(rss, in_signal_region=True):
  217. r"""
  218. """
  219. _, ((ax_njet, ax_nbjet), (ax_ht, ax_met)) = plt.subplots(2, 2)
  220. def _plot(ax, obs):
  221. plt.sca(ax)
  222. if in_signal_region:
  223. attr = {'MET': 'met_in_SR',
  224. 'HT': 'ht_in_SR',
  225. 'NJET': 'njet_in_SR',
  226. 'NBJET': 'nbjet_in_SR'}[obs]
  227. else:
  228. attr = {'MET': 'met',
  229. 'HT': 'ht',
  230. 'NJET': 'njet',
  231. 'NBJET': 'nbjet'}[obs]
  232. tttt, ttw, ttz, tth = map(lambda rs: hist(getattr(rs, attr)), rss)
  233. hist_plot_stack([ttw, ttz, tth], labels=["TTW", "TTZ", "TTH"])
  234. hist_plot(hist_scale(tttt, 5), label="TTTT (x5)", color='k')
  235. plt.xlabel(obs)
  236. _plot(ax_njet, 'NJET')
  237. _plot(ax_nbjet, 'NBJET')
  238. plt.legend()
  239. _plot(ax_ht, 'HT')
  240. _plot(ax_met, 'MET')
  241. # @decl_plot
  242. # def plot_s_over_b(rss):
  243. # def get_sr(rs):
  244. # h = None
  245. # if tau_category == 0:
  246. # h = rs.SRs_0tmtau_diff_nGenTau
  247. # if tau_category == 1:
  248. # h = rs.SRs_1tmtau_diff_nGenTau
  249. # elif tau_category == 2:
  250. # h = rs.SRs_2tmtau_diff_nGenTau
  251. # return hist2d_norm(hist2d(h), norm=1, axis=0)
  252. #
  253. # _, ((ax_tttt, ax_ttw), (ax_ttz, ax_tth)) = plt.subplots(2, 2)
  254. # tttt =
  255. # ttw, ttz, tth = [get_sr(rs) for rs in rss]
  256. # pass
  257. @decl_plot
  258. def plot_tau_purity(rss):
  259. _, ((ax_tttt, ax_ttw), (ax_ttz, ax_tth)) = plt.subplots(2, 2)
  260. tttt, ttw, ttz, tth = list(map(lambda rs: hist(rs.tau_purity_v_pt), rss))
  261. def _plot(ax, dataset):
  262. plt.sca(ax)
  263. h = {'TTTT': tttt,
  264. 'TTW': ttw,
  265. 'TTZ': ttz,
  266. 'TTH': tth}[dataset]
  267. hist_plot(h, stats=False, label=dataset)
  268. plt.text(200, 0.05, dataset)
  269. plt.xlabel(r"$P_T$(GeV)")
  270. _plot(ax_tttt, 'TTTT')
  271. _plot(ax_ttw, 'TTW')
  272. _plot(ax_ttz, 'TTZ')
  273. _plot(ax_tth, 'TTH')
  274. if __name__ == '__main__':
  275. data_path = 'data/output_new_sr_new_id_binning/'
  276. save_plots = True
  277. # First create a ResultSet object which loads all of the objects from root file
  278. # into memory and makes them available as attributes
  279. rss = (ResultSet("tttt", data_path+'yield_tttt.root'),
  280. ResultSet("ttw", data_path+'yield_ttw.root'),
  281. ResultSet("ttz", data_path+'yield_ttz.root'),
  282. ResultSet("tth", data_path+'yield_tth.root'))
  283. # Next, declare all of the (sub)plots that will be assembled into full
  284. # figures later
  285. yield_tau_ignore_tau = plot_yield_grid, (rss, -1)
  286. yield_tau_0tau = plot_yield_grid, (rss, 0)
  287. yield_tau_1tau = plot_yield_grid, (rss, 1)
  288. yield_tau_2tau = plot_yield_grid, (rss, 2)
  289. tttt_event_obs_in_sr = plot_event_obs, (rss, 'TTTT'), {'in_signal_region': True}
  290. ttw_event_obs_in_sr = plot_event_obs, (rss, 'TTW'), {'in_signal_region': True}
  291. ttz_event_obs_in_sr = plot_event_obs, (rss, 'TTZ'), {'in_signal_region': True}
  292. tth_event_obs_in_sr = plot_event_obs, (rss, 'TTH'), {'in_signal_region': True}
  293. tttt_event_obs = plot_event_obs, (rss, 'TTTT'), {'in_signal_region': False}
  294. ttw_event_obs = plot_event_obs, (rss, 'TTW'), {'in_signal_region': False}
  295. ttz_event_obs = plot_event_obs, (rss, 'TTZ'), {'in_signal_region': False}
  296. tth_event_obs = plot_event_obs, (rss, 'TTH'), {'in_signal_region': False}
  297. tttt_lep_multi = plot_lep_multi, (rss, 'TTTT')
  298. ttw_lep_multi = plot_lep_multi, (rss, 'TTW')
  299. ttz_lep_multi = plot_lep_multi, (rss, 'TTZ')
  300. tth_lep_multi = plot_lep_multi, (rss, 'TTH')
  301. yield_v_gen_0tm = plot_yield_v_gen, (rss, 0)
  302. yield_v_gen_1tm = plot_yield_v_gen, (rss, 1)
  303. yield_v_gen_2tm = plot_yield_v_gen, (rss, 2)
  304. # tau_purity = plot_tau_purity, (rss)
  305. nGen_v_nSel = plot_nGen_v_nSel, (rss,)
  306. # Now assemble the plots into figures.
  307. plots = [
  308. Plot([[yield_tau_ignore_tau]],
  309. 'Yield Ignoring Taus'),
  310. Plot([[yield_tau_0tau]],
  311. 'Yield For events with 0 Tau'),
  312. Plot([[yield_tau_1tau]],
  313. 'Yield For events with 1 Tau'),
  314. Plot([[yield_tau_2tau]],
  315. 'Yield For events with 2 or more Tau'),
  316. # Plot([[yield_v_gen_0tm]],
  317. # 'Yield For events with 0 TM Tau Vs Gen Tau'),
  318. # Plot([[yield_v_gen_1tm]],
  319. # 'Yield For events with 1 TM Tau Vs Gen Tau'),
  320. # Plot([[yield_v_gen_2tm]],
  321. # 'Yield For events with 2 TM Tau Vs Gen Tau'),
  322. Plot([[nGen_v_nSel]],
  323. r'#Generated tau vs. #Selected tau'),
  324. Plot([[tttt_lep_multi]],
  325. 'Lepton Multiplicity - TTTT'),
  326. Plot([[ttw_lep_multi]],
  327. 'Lepton Multiplicity - TTW'),
  328. Plot([[ttz_lep_multi]],
  329. 'Lepton Multiplicity - TTZ'),
  330. Plot([[tth_lep_multi]],
  331. 'Lepton Multiplicity - TTH'),
  332. Plot([[tttt_event_obs_in_sr]],
  333. 'TTTT - Event Observables (In SR)'),
  334. Plot([[ttw_event_obs_in_sr]],
  335. 'TTW - Event Observables (In SR)'),
  336. Plot([[ttz_event_obs_in_sr]],
  337. 'TTZ - Event Observables (In SR)'),
  338. Plot([[tth_event_obs_in_sr]],
  339. 'TTH - Event Observables (In SR)'),
  340. # Plot([[tttt_event_obs]],
  341. # 'TTTT - Event Observables (All Events)'),
  342. # Plot([[ttw_event_obs]],
  343. # 'TTW - Event Observables (All Events)'),
  344. # Plot([[ttz_event_obs]],
  345. # 'TTZ - Event Observables (All Events)'),
  346. # Plot([[tth_event_obs]],
  347. # 'TTH - Event Observables (All Events)'),
  348. # Plot([[yield_notau]],
  349. # 'Yield Without Tau'),
  350. # Plot([[yield_tau_stack]],
  351. # 'Yield With Tau Stacked'),
  352. # Plot([[yield_notau_stack]],
  353. # 'Yield Without Tau Stacked'),
  354. # Plot([[yield_tau_stack],
  355. # [yield_notau_stack]],
  356. # 'Event Yield, top: with tau, bottom: no tau'),
  357. # Plot([[sig_strength_tau],
  358. # [sig_strength_notau]],
  359. # 'Signal Strength'),
  360. # Plot([[event_obs_stack]],
  361. # 'Event Observables'),
  362. # Plot([[tau_purity]],
  363. # 'Tau Purity'),
  364. ]
  365. # Finally, render and save the plots and generate the html+bootstrap
  366. # dashboard to view them
  367. render_plots(plots, to_disk=save_plots)
  368. if not save_plots:
  369. generate_dashboard(plots, 'TTTT Yields',
  370. output='yields.html',
  371. source=__file__,
  372. ana_source=("https://github.com/cfangmeier/FTAnalysis/commit/"
  373. "0cbdac4509391fffb9fff87d0521b7dd0a30a55c"),
  374. config=data_path+'config.yaml'
  375. )