#!/usr/bin/env python
import numpy as np
import matplotlib.pyplot as plt
from filval.result_set import ResultSet
from filval.histogram import hist, hist2d, hist_add, hist_sub, hist_div, hist_norm, hist_scale, hist2d_norm
from filval.plotting import (decl_plot, render_plots, hist_plot, hist_plot_stack, hist2d_plot,
Plot, generate_dashboard, hists_to_table)
def set_xticks():
x_ticks = [f'SR{i}' for i in range(1, 17)]
plt.xticks([x for x in range(1, 17)], x_ticks, rotation=60)
def get_sr(rs, tau_category, tm=False):
if tm:
if tau_category == 0:
return hist(rs.SRs_0tmtau)
if tau_category == 1:
return hist(rs.SRs_1tmtau)
elif tau_category == 2:
return hist(rs.SRs_2tmtau)
else:
if tau_category == -1:
return hist(rs.ignore_tau_SRs)
elif tau_category == 0:
return hist(rs.SRs_0tau)
elif tau_category == 1:
return hist(rs.SRs_1tau)
elif tau_category == 2:
return hist(rs.SRs_2tau)
@decl_plot
def plot_yield_grid(tau_category=-1):
r"""## Event Yield
The event yield for the eight signal regions defined in AN-17-115. Data is normalized
to the integrated luminosity of $35.9\textrm{fb}^{-1}$.
Ignoring taus means both that there is no requirement on number of good taus *and*
the taus, if present, are not considered for the SS pair.
If taus are not ignored, any good tau with $p_T$>20Gev is considered in constructing the SS lepton pair. The yields
are then further broken down by the number of good taus in the event.
A "good" tau in the above means any tau candidate passing the `byTightIsolationMVArun2v1DBoldDMwLT` ID w/ pt>20GeV.
It is also required to pass tau-lepton cross cleaning where it must not match any electron or muon within
$\delta R < 0.4$.
Truth-matched taus are those that match within $\delta R < 0.3$ with gen-level taus that pass the flag `fromHardProcessDecayed`.
"""
_, ((ax_tttt, ax_ttw), (ax_ttz, ax_tth)) = plt.subplots(2, 2)
tttt, ttw, ttz, tth = [get_sr(rs, tau_category) for rs in rss]
tm_tttt, tm_ttw, tm_ttz, tm_tth = [get_sr(rs, tau_category, True) for rs in rss]
plt.sca(ax_tttt)
plt.title('TTTT')
hist_plot(tttt, stats=False, label='Mock Analysis', include_errors=True)
if tau_category >= 0:
hist_plot(tm_tttt, stats=False, label='Truth-Matched Taus', include_errors=True)
plt.ylim((0, None))
set_xticks()
plt.sca(ax_ttw)
plt.title('TTW')
hist_plot(ttw, stats=False, label='Mock Analysis', include_errors=True)
if tau_category >= 0:
hist_plot(tm_ttw, stats=False, label='Truth-Matched Taus', include_errors=True)
plt.ylim((0, None))
set_xticks()
plt.legend()
plt.sca(ax_ttz)
plt.title('TTZ')
hist_plot(ttz, stats=False, label='Mock Analysis', include_errors=True)
if tau_category >= 0:
hist_plot(tm_ttz, stats=False, label='Truth-Matched Taus', include_errors=True)
plt.ylim((0, None))
set_xticks()
plt.xlabel('Signal Region')
plt.sca(ax_tth)
plt.title('TTH')
hist_plot(tth, stats=False, label='Mock Analysis', include_errors=True)
if tau_category >= 0:
hist_plot(tm_tth, stats=False, label='Truth-Matched Taus', include_errors=True)
plt.ylim((0, None))
set_xticks()
plt.xlabel('Signal Region')
def to_table(hists):
return hists_to_table(hists, row_labels=['TTTT', 'TTW', 'TTZ', 'TTH'],
column_labels=[f'SR{n}' for n in range(1, len(tttt[0])+1)])
tables = '
Mock
'
tables += to_table([tttt, ttw, ttz, tth])
if tau_category >= 0:
tables += 'TM Taus
'
tables += to_table([tm_tttt, tm_ttw, tm_ttz, tm_tth])
return tables
@decl_plot
def plot_yield_v_gen(tau_category=-1):
r"""## Event Yield Vs. # of Generated Taus
"""
def get_sr(rs):
h = None
if tau_category == 0:
h = rs.SRs_0tmtau_diff_nGenTau
if tau_category == 1:
h = rs.SRs_1tmtau_diff_nGenTau
elif tau_category == 2:
h = rs.SRs_2tmtau_diff_nGenTau
return hist2d_norm(hist2d(h), norm=1, axis=0)
_, ((ax_tttt, ax_ttw), (ax_ttz, ax_tth)) = plt.subplots(2, 2)
tttt, ttw, ttz, tth = [get_sr(rs) for rs in rss]
def do_plot(h, title):
hist2d_plot(h, title=title, txt_format='{:.2f}')
plt.sca(ax_tttt)
do_plot(tttt, 'TTTT')
plt.ylabel("\# Gen Taus")
plt.sca(ax_ttw)
do_plot(ttw, 'TTW')
plt.legend()
plt.sca(ax_ttz)
do_plot(ttz, 'TTZ')
plt.xlabel('Signal Region')
plt.ylabel("\# Gen Taus")
plt.sca(ax_tth)
do_plot(tth, 'TTH')
plt.xlabel('Signal Region')
@decl_plot
def plot_nGen_v_nSel():
_, ((ax_tttt, ax_ttw), (ax_ttz, ax_tth)) = plt.subplots(2, 2)
tttt, ttw, ttz, tth = [hist2d(rs.nGen_v_RecoTaus_in_SR) for rs in rss]
def do_plot(h, title):
hist2d_plot(h, title=title, txt_format='{:.2f}')
plt.sca(ax_tttt)
do_plot(tttt, 'TTTT')
plt.ylabel("\# Selected Taus")
plt.sca(ax_ttw)
do_plot(ttw, 'TTW')
plt.legend()
plt.sca(ax_ttz)
do_plot(ttz, 'TTZ')
plt.xlabel('\# Gen Taus')
plt.ylabel("\# Selected Taus")
plt.sca(ax_tth)
do_plot(tth, 'TTH')
plt.xlabel('\# Gen Taus')
@decl_plot
def plot_yield_stack():
r"""## Event Yield - Stacked
The event yield for the eight signal regions defined in AN-17-115. Data is normalized
to the Moriond 2018 integrated luminosity ($35.9\textrm{fb}^{-1}$). Code for the histogram generation is
here:
"""
tttt, ttw, ttz, tth = map(lambda rs: hist(rs.SRs), rss)
hist_plot_stack([ttw, ttz, tth], labels=['TTW', 'TTZ', 'TTH'])
tttt = tttt[0]*10, tttt[1], tttt[2]
hist_plot(tttt, label='TTTT (x10)', stats=False, color='k')
plt.ylim((0, 60))
plt.xlabel('Signal Region')
plt.legend()
@decl_plot
def plot_lep_multi(dataset):
_, (ax_els, ax_mus, ax_taus) = plt.subplots(3, 1)
els = list(map(lambda rs: hist_norm(hist(rs.nEls)), rss))
mus = list(map(lambda rs: hist_norm(hist(rs.nMus)), rss))
taus = list(map(lambda rs: hist_norm(hist(rs.nTaus)), rss))
def _plot(ax, procs):
plt.sca(ax)
tttt, ttw, ttz, tth = procs
h = {'TTTT': tttt,
'TTW': ttw,
'TTZ': ttz,
'TTH': tth}[dataset]
hist_plot(h, stats=False, label=dataset)
_plot(ax_els, els)
plt.xlabel('\\# Good Electrons')
plt.legend()
_plot(ax_mus, mus)
plt.xlabel('\\# Good Muons')
_plot(ax_taus, taus)
plt.xlabel('\\# Good Taus')
@decl_plot
def plot_sig_strength(tau_category, tm):
r""" The signal strength of the TTTT signal defined as
$\frac{S}{\sqrt{S+B+\sigma_B^2}}$
"""
tttt, ttw, ttz, tth = map(lambda rs: get_sr(rs, tau_category, tm), rss)
bg = hist_add(ttw, ttz, tth)
strength = tttt[0] / np.sqrt(tttt[0] + bg[0] + bg[1]**2)
hist_plot((strength, tttt[1], tttt[2]), stats=False)
@decl_plot
def plot_event_obs(dataset, in_sr=True):
r"""The distribution of $N_{jet}$, $N_{Bjet}$, MET, and $H_T$ in either all events
or only signal region (igoring taus) events.
"""
_, ((ax_njet, ax_nbjet), (ax_ht, ax_met)) = plt.subplots(2, 2)
# tttt, ttw, ttz, tth = map(lambda rs: hist(rs.SRs), rss)
tttt, ttw, ttz, tth = rss
rs = {'TTTT': tttt,
'TTW': ttw,
'TTZ': ttz,
'TTH': tth}[dataset]
def _plot(ax, obs):
plt.sca(ax)
if in_sr:
h = {'MET': rs.met_in_SR,
'HT': rs.ht_in_SR,
'NJET': rs.njet_in_SR,
'NBJET': rs.nbjet_in_SR}[obs]
else:
h = {'MET': rs.met,
'HT': rs.ht,
'NJET': rs.njet,
'NBJET': rs.nbjet}[obs]
hist_plot(hist(h), stats=False, label=dataset)
plt.xlabel(obs)
_plot(ax_njet, 'NJET')
_plot(ax_nbjet, 'NBJET')
_plot(ax_ht, 'HT')
_plot(ax_met, 'MET')
@decl_plot
def plot_event_obs_stack(in_signal_region=True):
r"""
"""
_, ((ax_njet, ax_nbjet), (ax_ht, ax_met)) = plt.subplots(2, 2)
def _plot(ax, obs):
plt.sca(ax)
if in_signal_region:
attr = {'MET': 'met_in_SR',
'HT': 'ht_in_SR',
'NJET': 'njet_in_SR',
'NBJET': 'nbjet_in_SR'}[obs]
else:
attr = {'MET': 'met',
'HT': 'ht',
'NJET': 'njet',
'NBJET': 'nbjet'}[obs]
tttt, ttw, ttz, tth = map(lambda rs: hist(getattr(rs, attr)), rss)
hist_plot_stack([ttw, ttz, tth], labels=["TTW", "TTZ", "TTH"])
hist_plot(hist_scale(tttt, 5), label="TTTT (x5)", color='k')
plt.xlabel(obs)
_plot(ax_njet, 'NJET')
_plot(ax_nbjet, 'NBJET')
plt.legend()
_plot(ax_ht, 'HT')
_plot(ax_met, 'MET')
@decl_plot
def plot_s_over_b(tau_category):
tttt, ttw, ttz, tth = [get_sr(rs, tau_category) for rs in rss]
sig = get_sr(rss[0], tau_category, tm=True)
fake_tttt = hist_sub(tttt, sig)
total_bg = hist_add(fake_tttt, ttw, ttz, tth)
plt.subplot(3, 1, 1)
hist_plot(total_bg, include_errors=True, label="TTV+TTH+Fake TTTT")
hist_plot(sig, include_errors=True, label="Truth Matched TTTT")
plt.ylabel('\\# Events')
plt.ylim((0, 5.5))
set_xticks()
plt.legend()
plt.subplot(3, 1, 2)
hist_plot(hist_div(sig, total_bg), label="ratio", color='k')
plt.ylabel('Ratio')
plt.ylim((0, 2.7))
set_xticks()
plt.subplot(3, 1, 3)
bg_vals, bg_errs, _ = total_bg
sig_vals, _, _ = sig
den = np.sqrt(sig_vals + bg_vals + bg_errs**2)
sig = sig_vals / den, *sig[1:]
hist_plot(sig, color='k')
plt.ylabel(r'$S/\sqrt{S+B+\sigma_B^2}$')
plt.ylim((0, 0.5))
set_xticks()
# tables = 'Mock
'
# tables += to_table([total_bg, ttw, ttz, tth])
# if tau_category >= 0:
# tables += 'TM Taus
'
# tables += to_table([tm_tttt, tm_ttw, tm_ttz, tm_tth])
# return tables
@decl_plot
def plot_tau_efficiency():
_, ((ax_tttt, ax_ttw), (ax_ttz, ax_tth)) = plt.subplots(2, 2)
tttt, ttw, ttz, tth = list(map(lambda rs: hist(rs.tau_efficiency_v_pt), rss))
def _plot(ax, dataset):
plt.sca(ax)
h = {'TTTT': tttt,
'TTW': ttw,
'TTZ': ttz,
'TTH': tth}[dataset]
hist_plot(h, stats=False, label=dataset, include_errors=True)
plt.text(200, 0.05, dataset)
plt.xlabel(r"$P_T$(GeV)")
plt.ylim((0, 1.1))
_plot(ax_tttt, 'TTTT')
_plot(ax_ttw, 'TTW')
_plot(ax_ttz, 'TTZ')
_plot(ax_tth, 'TTH')
@decl_plot
def plot_tau_purity():
_, ((ax_tttt, ax_ttw), (ax_ttz, ax_tth)) = plt.subplots(2, 2)
tttt, ttw, ttz, tth = list(map(lambda rs: hist(rs.tau_purity_v_pt), rss))
def _plot(ax, dataset):
plt.sca(ax)
h = {'TTTT': tttt,
'TTW': ttw,
'TTZ': ttz,
'TTH': tth}[dataset]
hist_plot(h, stats=False, label=dataset, include_errors=True)
plt.text(200, 0.05, dataset)
plt.xlabel(r"$P_T$(GeV)")
plt.ylim((0, 1.1))
_plot(ax_tttt, 'TTTT')
_plot(ax_ttw, 'TTW')
_plot(ax_ttz, 'TTZ')
_plot(ax_tth, 'TTH')
if __name__ == '__main__':
ext = ""
# ext = "_dr03"
data_path = f'data/TauStudies/output{ext}/'
save_plots = False
rss = (ResultSet("tttt", data_path+'yield_tttt.root'),
ResultSet("ttw", data_path+'yield_ttw.root'),
ResultSet("ttz", data_path+'yield_ttz.root'),
ResultSet("tth", data_path+'yield_tth.root'))
tttt_event_obs_in_sr = plot_event_obs, ('TTTT',), {'in_sr': True}
ttw_event_obs_in_sr = plot_event_obs, ('TTW',), {'in_sr': True}
ttz_event_obs_in_sr = plot_event_obs, ('TTZ',), {'in_sr': True}
tth_event_obs_in_sr = plot_event_obs, ('TTH',), {'in_sr': True}
tttt_event_obs = plot_event_obs, ('TTTT',), {'in_sr': False}
ttw_event_obs = plot_event_obs, ('TTW',), {'in_sr': False}
ttz_event_obs = plot_event_obs, ('TTZ',), {'in_sr': False}
tth_event_obs = plot_event_obs, ('TTH',), {'in_sr': False}
tttt_lep_multi = plot_lep_multi, ('TTTT',)
ttw_lep_multi = plot_lep_multi, ('TTW',)
ttz_lep_multi = plot_lep_multi, ('TTZ',)
tth_lep_multi = plot_lep_multi, ('TTH',)
yield_v_gen_0tm = plot_yield_v_gen, (0,)
yield_v_gen_1tm = plot_yield_v_gen, (1,)
yield_v_gen_2tm = plot_yield_v_gen, (2,)
nGen_v_nSel = plot_nGen_v_nSel
# Now assemble the plots into figures.
plots = [
Plot((plot_yield_grid, (-1,)),
'Yield Ignoring Taus'),
Plot((plot_yield_grid, (0,)), 'Yield For events with 0 Tau'),
Plot((plot_s_over_b, (0,)), 'Real TTTT and Total BG - 0 Tau'),
Plot((plot_yield_grid, (1,)), 'Yield For events with 1 Tau'),
Plot((plot_s_over_b, (1,)), 'Real TTTT and Total BG - 1 Tau'),
Plot((plot_yield_grid, (2,)), 'Yield For events with 2 or more Tau'),
Plot((plot_s_over_b, (2,)), 'Real TTTT and Total BG - 2 or more Tau'),
Plot(nGen_v_nSel,
r'#Generated tau vs. #Selected tau'),
Plot(tttt_lep_multi,
'Lepton Multiplicity - TTTT'),
Plot(ttw_lep_multi,
'Lepton Multiplicity - TTW'),
Plot(ttz_lep_multi,
'Lepton Multiplicity - TTZ'),
Plot(tth_lep_multi,
'Lepton Multiplicity - TTH'),
Plot(tttt_event_obs_in_sr,
'TTTT - Event Observables (In SR)'),
Plot(ttw_event_obs_in_sr,
'TTW - Event Observables (In SR)'),
Plot(ttz_event_obs_in_sr,
'TTZ - Event Observables (In SR)'),
Plot(tth_event_obs_in_sr,
'TTH - Event Observables (In SR)'),
Plot(plot_tau_efficiency,
'Tau Efficiency'),
Plot(plot_tau_purity,
'Tau Purity'),
]
# Finally, render and save the plots and generate the html+bootstrap
# dashboard to view them
render_plots(plots, to_disk=save_plots)
if not save_plots:
with open(data_path+'config.yaml') as f:
config_txt = f.read()
generate_dashboard(plots, 'Tau Studies',
output=f'tau_studies{ext}.html',
source=__file__,
config=config_txt
)