tau_studies.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457
  1. #!/usr/bin/env python
  2. import numpy as np
  3. import matplotlib.pyplot as plt
  4. from filval.result_set import ResultSet
  5. from filval.histogram import hist, hist2d, hist_add, hist_sub, hist_div, hist_norm, hist_scale, hist2d_norm
  6. from filval.plotting import (decl_plot, render_plots, hist_plot, hist_plot_stack, hist2d_plot,
  7. Plot, generate_dashboard, hists_to_table)
  8. def set_xticks():
  9. x_ticks = [f'SR{i}' for i in range(1, 17)]
  10. plt.xticks([x for x in range(1, 17)], x_ticks, rotation=60)
  11. def get_sr(rs, tau_category, tm=False):
  12. if tm:
  13. if tau_category == 0:
  14. return hist(rs.SRs_0tmtau)
  15. if tau_category == 1:
  16. return hist(rs.SRs_1tmtau)
  17. elif tau_category == 2:
  18. return hist(rs.SRs_2tmtau)
  19. else:
  20. if tau_category == -1:
  21. return hist(rs.ignore_tau_SRs)
  22. elif tau_category == 0:
  23. return hist(rs.SRs_0tau)
  24. elif tau_category == 1:
  25. return hist(rs.SRs_1tau)
  26. elif tau_category == 2:
  27. return hist(rs.SRs_2tau)
  28. @decl_plot
  29. def plot_yield_grid(tau_category=-1):
  30. r"""## Event Yield
  31. The event yield for the eight signal regions defined in AN-17-115. Data is normalized
  32. to the integrated luminosity of $35.9\textrm{fb}^{-1}$.
  33. Ignoring taus means both that there is no requirement on number of good taus *and*
  34. the taus, if present, are not considered for the SS pair.
  35. If taus are not ignored, any good tau with $p_T$>20Gev is considered in constructing the SS lepton pair. The yields
  36. are then further broken down by the number of good taus in the event.
  37. A "good" tau in the above means any tau candidate passing the `byTightIsolationMVArun2v1DBoldDMwLT` ID w/ pt>20GeV.
  38. It is also required to pass tau-lepton cross cleaning where it must not match any electron or muon within
  39. $\delta R < 0.4$.
  40. Truth-matched taus are those that match within $\delta R < 0.3$ with gen-level taus that pass the flag `fromHardProcessDecayed`.
  41. """
  42. _, ((ax_tttt, ax_ttw), (ax_ttz, ax_tth)) = plt.subplots(2, 2)
  43. tttt, ttw, ttz, tth = [get_sr(rs, tau_category) for rs in rss]
  44. tm_tttt, tm_ttw, tm_ttz, tm_tth = [get_sr(rs, tau_category, True) for rs in rss]
  45. plt.sca(ax_tttt)
  46. plt.title('TTTT')
  47. hist_plot(tttt, stats=False, label='Mock Analysis', include_errors=True)
  48. if tau_category >= 0:
  49. hist_plot(tm_tttt, stats=False, label='Truth-Matched Taus', include_errors=True)
  50. plt.ylim((0, None))
  51. set_xticks()
  52. plt.sca(ax_ttw)
  53. plt.title('TTW')
  54. hist_plot(ttw, stats=False, label='Mock Analysis', include_errors=True)
  55. if tau_category >= 0:
  56. hist_plot(tm_ttw, stats=False, label='Truth-Matched Taus', include_errors=True)
  57. plt.ylim((0, None))
  58. set_xticks()
  59. plt.legend()
  60. plt.sca(ax_ttz)
  61. plt.title('TTZ')
  62. hist_plot(ttz, stats=False, label='Mock Analysis', include_errors=True)
  63. if tau_category >= 0:
  64. hist_plot(tm_ttz, stats=False, label='Truth-Matched Taus', include_errors=True)
  65. plt.ylim((0, None))
  66. set_xticks()
  67. plt.xlabel('Signal Region')
  68. plt.sca(ax_tth)
  69. plt.title('TTH')
  70. hist_plot(tth, stats=False, label='Mock Analysis', include_errors=True)
  71. if tau_category >= 0:
  72. hist_plot(tm_tth, stats=False, label='Truth-Matched Taus', include_errors=True)
  73. plt.ylim((0, None))
  74. set_xticks()
  75. plt.xlabel('Signal Region')
  76. def to_table(hists):
  77. return hists_to_table(hists, row_labels=['TTTT', 'TTW', 'TTZ', 'TTH'],
  78. column_labels=[f'SR{n}' for n in range(1, len(tttt[0])+1)])
  79. tables = '<h2>Mock</h2>'
  80. tables += to_table([tttt, ttw, ttz, tth])
  81. if tau_category >= 0:
  82. tables += '<h2>TM Taus</h2>'
  83. tables += to_table([tm_tttt, tm_ttw, tm_ttz, tm_tth])
  84. return tables
  85. @decl_plot
  86. def plot_yield_v_gen(tau_category=-1):
  87. r"""## Event Yield Vs. # of Generated Taus
  88. """
  89. def get_sr(rs):
  90. h = None
  91. if tau_category == 0:
  92. h = rs.SRs_0tmtau_diff_nGenTau
  93. if tau_category == 1:
  94. h = rs.SRs_1tmtau_diff_nGenTau
  95. elif tau_category == 2:
  96. h = rs.SRs_2tmtau_diff_nGenTau
  97. return hist2d_norm(hist2d(h), norm=1, axis=0)
  98. _, ((ax_tttt, ax_ttw), (ax_ttz, ax_tth)) = plt.subplots(2, 2)
  99. tttt, ttw, ttz, tth = [get_sr(rs) for rs in rss]
  100. def do_plot(h, title):
  101. hist2d_plot(h, title=title, txt_format='{:.2f}')
  102. plt.sca(ax_tttt)
  103. do_plot(tttt, 'TTTT')
  104. plt.ylabel("\# Gen Taus")
  105. plt.sca(ax_ttw)
  106. do_plot(ttw, 'TTW')
  107. plt.legend()
  108. plt.sca(ax_ttz)
  109. do_plot(ttz, 'TTZ')
  110. plt.xlabel('Signal Region')
  111. plt.ylabel("\# Gen Taus")
  112. plt.sca(ax_tth)
  113. do_plot(tth, 'TTH')
  114. plt.xlabel('Signal Region')
  115. @decl_plot
  116. def plot_nGen_v_nSel():
  117. _, ((ax_tttt, ax_ttw), (ax_ttz, ax_tth)) = plt.subplots(2, 2)
  118. tttt, ttw, ttz, tth = [hist2d(rs.nGen_v_RecoTaus_in_SR) for rs in rss]
  119. def do_plot(h, title):
  120. hist2d_plot(h, title=title, txt_format='{:.2f}')
  121. plt.sca(ax_tttt)
  122. do_plot(tttt, 'TTTT')
  123. plt.ylabel("\# Selected Taus")
  124. plt.sca(ax_ttw)
  125. do_plot(ttw, 'TTW')
  126. plt.legend()
  127. plt.sca(ax_ttz)
  128. do_plot(ttz, 'TTZ')
  129. plt.xlabel('\# Gen Taus')
  130. plt.ylabel("\# Selected Taus")
  131. plt.sca(ax_tth)
  132. do_plot(tth, 'TTH')
  133. plt.xlabel('\# Gen Taus')
  134. @decl_plot
  135. def plot_yield_stack():
  136. r"""## Event Yield - Stacked
  137. The event yield for the eight signal regions defined in AN-17-115. Data is normalized
  138. to the Moriond 2018 integrated luminosity ($35.9\textrm{fb}^{-1}$). Code for the histogram generation is
  139. here: <https://github.com/cfangmeier/FTAnalysis/blob/master/studies/tau/Yield.C>
  140. """
  141. tttt, ttw, ttz, tth = map(lambda rs: hist(rs.SRs), rss)
  142. hist_plot_stack([ttw, ttz, tth], labels=['TTW', 'TTZ', 'TTH'])
  143. tttt = tttt[0]*10, tttt[1], tttt[2]
  144. hist_plot(tttt, label='TTTT (x10)', stats=False, color='k')
  145. plt.ylim((0, 60))
  146. plt.xlabel('Signal Region')
  147. plt.legend()
  148. @decl_plot
  149. def plot_lep_multi(dataset):
  150. _, (ax_els, ax_mus, ax_taus) = plt.subplots(3, 1)
  151. els = list(map(lambda rs: hist_norm(hist(rs.nEls)), rss))
  152. mus = list(map(lambda rs: hist_norm(hist(rs.nMus)), rss))
  153. taus = list(map(lambda rs: hist_norm(hist(rs.nTaus)), rss))
  154. def _plot(ax, procs):
  155. plt.sca(ax)
  156. tttt, ttw, ttz, tth = procs
  157. h = {'TTTT': tttt,
  158. 'TTW': ttw,
  159. 'TTZ': ttz,
  160. 'TTH': tth}[dataset]
  161. hist_plot(h, stats=False, label=dataset)
  162. _plot(ax_els, els)
  163. plt.xlabel('\\# Good Electrons')
  164. plt.legend()
  165. _plot(ax_mus, mus)
  166. plt.xlabel('\\# Good Muons')
  167. _plot(ax_taus, taus)
  168. plt.xlabel('\\# Good Taus')
  169. @decl_plot
  170. def plot_sig_strength(tau_category, tm):
  171. r""" The signal strength of the TTTT signal defined as
  172. $\frac{S}{\sqrt{S+B+\sigma_B^2}}$
  173. """
  174. tttt, ttw, ttz, tth = map(lambda rs: get_sr(rs, tau_category, tm), rss)
  175. bg = hist_add(ttw, ttz, tth)
  176. strength = tttt[0] / np.sqrt(tttt[0] + bg[0] + bg[1]**2)
  177. hist_plot((strength, tttt[1], tttt[2]), stats=False)
  178. @decl_plot
  179. def plot_event_obs(dataset, in_sr=True):
  180. r"""The distribution of $N_{jet}$, $N_{Bjet}$, MET, and $H_T$ in either all events
  181. or only signal region (igoring taus) events.
  182. """
  183. _, ((ax_njet, ax_nbjet), (ax_ht, ax_met)) = plt.subplots(2, 2)
  184. # tttt, ttw, ttz, tth = map(lambda rs: hist(rs.SRs), rss)
  185. tttt, ttw, ttz, tth = rss
  186. rs = {'TTTT': tttt,
  187. 'TTW': ttw,
  188. 'TTZ': ttz,
  189. 'TTH': tth}[dataset]
  190. def _plot(ax, obs):
  191. plt.sca(ax)
  192. if in_sr:
  193. h = {'MET': rs.met_in_SR,
  194. 'HT': rs.ht_in_SR,
  195. 'NJET': rs.njet_in_SR,
  196. 'NBJET': rs.nbjet_in_SR}[obs]
  197. else:
  198. h = {'MET': rs.met,
  199. 'HT': rs.ht,
  200. 'NJET': rs.njet,
  201. 'NBJET': rs.nbjet}[obs]
  202. hist_plot(hist(h), stats=False, label=dataset)
  203. plt.xlabel(obs)
  204. _plot(ax_njet, 'NJET')
  205. _plot(ax_nbjet, 'NBJET')
  206. _plot(ax_ht, 'HT')
  207. _plot(ax_met, 'MET')
  208. @decl_plot
  209. def plot_event_obs_stack(in_signal_region=True):
  210. r"""
  211. """
  212. _, ((ax_njet, ax_nbjet), (ax_ht, ax_met)) = plt.subplots(2, 2)
  213. def _plot(ax, obs):
  214. plt.sca(ax)
  215. if in_signal_region:
  216. attr = {'MET': 'met_in_SR',
  217. 'HT': 'ht_in_SR',
  218. 'NJET': 'njet_in_SR',
  219. 'NBJET': 'nbjet_in_SR'}[obs]
  220. else:
  221. attr = {'MET': 'met',
  222. 'HT': 'ht',
  223. 'NJET': 'njet',
  224. 'NBJET': 'nbjet'}[obs]
  225. tttt, ttw, ttz, tth = map(lambda rs: hist(getattr(rs, attr)), rss)
  226. hist_plot_stack([ttw, ttz, tth], labels=["TTW", "TTZ", "TTH"])
  227. hist_plot(hist_scale(tttt, 5), label="TTTT (x5)", color='k')
  228. plt.xlabel(obs)
  229. _plot(ax_njet, 'NJET')
  230. _plot(ax_nbjet, 'NBJET')
  231. plt.legend()
  232. _plot(ax_ht, 'HT')
  233. _plot(ax_met, 'MET')
  234. @decl_plot
  235. def plot_s_over_b(tau_category):
  236. tttt, ttw, ttz, tth = [get_sr(rs, tau_category) for rs in rss]
  237. sig = get_sr(rss[0], tau_category, tm=True)
  238. fake_tttt = hist_sub(tttt, sig)
  239. total_bg = hist_add(fake_tttt, ttw, ttz, tth)
  240. plt.subplot(3, 1, 1)
  241. hist_plot(total_bg, include_errors=True, label="TTV+TTH+Fake TTTT")
  242. hist_plot(sig, include_errors=True, label="Truth Matched TTTT")
  243. plt.ylabel('\\# Events')
  244. plt.ylim((0, 5.5))
  245. set_xticks()
  246. plt.legend()
  247. plt.subplot(3, 1, 2)
  248. hist_plot(hist_div(sig, total_bg), label="ratio", color='k')
  249. plt.ylabel('Ratio')
  250. plt.ylim((0, 2.7))
  251. set_xticks()
  252. plt.subplot(3, 1, 3)
  253. bg_vals, bg_errs, _ = total_bg
  254. sig_vals, _, _ = sig
  255. den = np.sqrt(sig_vals + bg_vals + bg_errs**2)
  256. sig = sig_vals / den, *sig[1:]
  257. hist_plot(sig, color='k')
  258. plt.ylabel(r'$S/\sqrt{S+B+\sigma_B^2}$')
  259. plt.ylim((0, 0.5))
  260. set_xticks()
  261. # tables = '<h2>Mock</h2>'
  262. # tables += to_table([total_bg, ttw, ttz, tth])
  263. # if tau_category >= 0:
  264. # tables += '<h2>TM Taus</h2>'
  265. # tables += to_table([tm_tttt, tm_ttw, tm_ttz, tm_tth])
  266. # return tables
  267. @decl_plot
  268. def plot_tau_efficiency():
  269. _, ((ax_tttt, ax_ttw), (ax_ttz, ax_tth)) = plt.subplots(2, 2)
  270. tttt, ttw, ttz, tth = list(map(lambda rs: hist(rs.tau_efficiency_v_pt), rss))
  271. def _plot(ax, dataset):
  272. plt.sca(ax)
  273. h = {'TTTT': tttt,
  274. 'TTW': ttw,
  275. 'TTZ': ttz,
  276. 'TTH': tth}[dataset]
  277. hist_plot(h, stats=False, label=dataset, include_errors=True)
  278. plt.text(200, 0.05, dataset)
  279. plt.xlabel(r"$P_T$(GeV)")
  280. plt.ylim((0, 1.1))
  281. _plot(ax_tttt, 'TTTT')
  282. _plot(ax_ttw, 'TTW')
  283. _plot(ax_ttz, 'TTZ')
  284. _plot(ax_tth, 'TTH')
  285. @decl_plot
  286. def plot_tau_purity():
  287. _, ((ax_tttt, ax_ttw), (ax_ttz, ax_tth)) = plt.subplots(2, 2)
  288. tttt, ttw, ttz, tth = list(map(lambda rs: hist(rs.tau_purity_v_pt), rss))
  289. def _plot(ax, dataset):
  290. plt.sca(ax)
  291. h = {'TTTT': tttt,
  292. 'TTW': ttw,
  293. 'TTZ': ttz,
  294. 'TTH': tth}[dataset]
  295. hist_plot(h, stats=False, label=dataset, include_errors=True)
  296. plt.text(200, 0.05, dataset)
  297. plt.xlabel(r"$P_T$(GeV)")
  298. plt.ylim((0, 1.1))
  299. _plot(ax_tttt, 'TTTT')
  300. _plot(ax_ttw, 'TTW')
  301. _plot(ax_ttz, 'TTZ')
  302. _plot(ax_tth, 'TTH')
  303. if __name__ == '__main__':
  304. ext = ""
  305. # ext = "_dr03"
  306. data_path = f'data/TauStudies/output{ext}/'
  307. save_plots = False
  308. rss = (ResultSet("tttt", data_path+'yield_tttt.root'),
  309. ResultSet("ttw", data_path+'yield_ttw.root'),
  310. ResultSet("ttz", data_path+'yield_ttz.root'),
  311. ResultSet("tth", data_path+'yield_tth.root'))
  312. tttt_event_obs_in_sr = plot_event_obs, ('TTTT',), {'in_sr': True}
  313. ttw_event_obs_in_sr = plot_event_obs, ('TTW',), {'in_sr': True}
  314. ttz_event_obs_in_sr = plot_event_obs, ('TTZ',), {'in_sr': True}
  315. tth_event_obs_in_sr = plot_event_obs, ('TTH',), {'in_sr': True}
  316. tttt_event_obs = plot_event_obs, ('TTTT',), {'in_sr': False}
  317. ttw_event_obs = plot_event_obs, ('TTW',), {'in_sr': False}
  318. ttz_event_obs = plot_event_obs, ('TTZ',), {'in_sr': False}
  319. tth_event_obs = plot_event_obs, ('TTH',), {'in_sr': False}
  320. tttt_lep_multi = plot_lep_multi, ('TTTT',)
  321. ttw_lep_multi = plot_lep_multi, ('TTW',)
  322. ttz_lep_multi = plot_lep_multi, ('TTZ',)
  323. tth_lep_multi = plot_lep_multi, ('TTH',)
  324. yield_v_gen_0tm = plot_yield_v_gen, (0,)
  325. yield_v_gen_1tm = plot_yield_v_gen, (1,)
  326. yield_v_gen_2tm = plot_yield_v_gen, (2,)
  327. nGen_v_nSel = plot_nGen_v_nSel
  328. # Now assemble the plots into figures.
  329. plots = [
  330. Plot((plot_yield_grid, (-1,)),
  331. 'Yield Ignoring Taus'),
  332. Plot((plot_yield_grid, (0,)), 'Yield For events with 0 Tau'),
  333. Plot((plot_s_over_b, (0,)), 'Real TTTT and Total BG - 0 Tau'),
  334. Plot((plot_yield_grid, (1,)), 'Yield For events with 1 Tau'),
  335. Plot((plot_s_over_b, (1,)), 'Real TTTT and Total BG - 1 Tau'),
  336. Plot((plot_yield_grid, (2,)), 'Yield For events with 2 or more Tau'),
  337. Plot((plot_s_over_b, (2,)), 'Real TTTT and Total BG - 2 or more Tau'),
  338. Plot(nGen_v_nSel,
  339. r'#Generated tau vs. #Selected tau'),
  340. Plot(tttt_lep_multi,
  341. 'Lepton Multiplicity - TTTT'),
  342. Plot(ttw_lep_multi,
  343. 'Lepton Multiplicity - TTW'),
  344. Plot(ttz_lep_multi,
  345. 'Lepton Multiplicity - TTZ'),
  346. Plot(tth_lep_multi,
  347. 'Lepton Multiplicity - TTH'),
  348. Plot(tttt_event_obs_in_sr,
  349. 'TTTT - Event Observables (In SR)'),
  350. Plot(ttw_event_obs_in_sr,
  351. 'TTW - Event Observables (In SR)'),
  352. Plot(ttz_event_obs_in_sr,
  353. 'TTZ - Event Observables (In SR)'),
  354. Plot(tth_event_obs_in_sr,
  355. 'TTH - Event Observables (In SR)'),
  356. Plot(plot_tau_efficiency,
  357. 'Tau Efficiency'),
  358. Plot(plot_tau_purity,
  359. 'Tau Purity'),
  360. ]
  361. # Finally, render and save the plots and generate the html+bootstrap
  362. # dashboard to view them
  363. render_plots(plots, to_disk=save_plots)
  364. if not save_plots:
  365. with open(data_path+'config.yaml') as f:
  366. config_txt = f.read()
  367. generate_dashboard(plots, 'Tau Studies',
  368. output=f'tau_studies{ext}.html',
  369. source=__file__,
  370. config=config_txt
  371. )