123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213 |
- #!/usr/bin/env python
- import re
- import numpy as np
- import matplotlib.pyplot as plt
- from filval.plotting import (decl_plot, render_plots, hist_plot, Plot, generate_dashboard, hists_to_table)
- from filval.histogram import hist, hist_add
- X_TICKS = ['CRZ', 'CRW']+[f'SR{i}' for i in range(1, 17)]
- def read_yields(tag, postfit=False):
- import ROOT
- f = ROOT.TFile.Open(f'data/JetPtStudies/{tag}/mlfit.root')
- hists = {'tttt': hist(f.Get('shapes_prefit/SS/tttt')),
- 'ttw': hist(f.Get('shapes_prefit/SS/ttw')),
- 'ttz': hist(f.Get('shapes_prefit/SS/ttz')),
- 'tth': hist(f.Get('shapes_prefit/SS/tth'))
- }
- try:
- hists['fakes_mc'] = hist(f.Get('shapes_prefit/SS/fakes_mc'))
- except:
- print(tag, 'has no fakes_mc')
- hists['fakes_mc'] = None
- return hists
- def set_xticks():
- plt.xticks([x-0.5 for x in range(1, 19)], X_TICKS, rotation=60)
- @decl_plot
- def plot_yields(bJetPt):
- """\
- [SR Definitions](https://github.com/cfangmeier/FTAnalysis/blob/new_baseline/analysis/misc/signal_regions.h#L127)
- """
- _, ((ax_tttt, ax_ttw), (ax_ttz, ax_tth)) = plt.subplots(2, 2)
- if bJetPt == 25:
- cut_40 = read_yields('v1.0.4_JetPtCut40')
- cut_35 = read_yields('v1.0.4_JetPtCut35')
- cut_30 = read_yields('v1.0.4_JetPtCut30')
- cut_25 = read_yields('v1.0.4_JetPtCut25')
- cut_20 = read_yields('v1.0.4_JetPtCut20')
- else: # bJetPt == 20
- cut_40 = read_yields('v1.0.4_bJetPtCut20JetPtCut40')
- cut_35 = read_yields('v1.0.4_bJetPtCut20JetPtCut35')
- cut_30 = read_yields('v1.0.4_bJetPtCut20JetPtCut30')
- cut_25 = read_yields('v1.0.4_bJetPtCut20JetPtCut25')
- cut_20 = read_yields('v1.0.4_bJetPtCut20JetPtCut20')
- def do_plot(axis, key):
- plt.sca(axis)
- hist_plot(cut_40[key], label="Jet $P_T>$40", include_errors=True)
- hist_plot(cut_35[key], label="Jet $P_T>$35", include_errors=True)
- hist_plot(cut_30[key], label="Jet $P_T>$30", include_errors=True)
- hist_plot(cut_25[key], label="Jet $P_T>$25", include_errors=True)
- hist_plot(cut_20[key], label="Jet $P_T>$20", include_errors=True)
- plt.title(key)
- set_xticks()
- do_plot(ax_tttt, 'tttt')
- do_plot(ax_ttw, 'ttw')
- plt.legend()
- do_plot(ax_ttz, 'ttz')
- do_plot(ax_tth, 'tth')
- tables = []
- for cut, label in zip([cut_40, cut_35, cut_30, cut_25, cut_20],
- ["Jet $P_T>$40", "Jet $P_T>$35", "Jet $P_T>$30", "Jet $P_T>$25", "Jet $P_T>$20"]):
- tables.append(f'<h3>{label}</h3>')
- tables.append(hists_to_table([cut['tttt'], cut['ttw'], cut['tth'], cut['ttz']],
- column_labels=X_TICKS, row_labels=['TTTT', 'TTW', 'TTH', 'TTZ']))
- return ''.join(tables)
- @decl_plot
- def plot_yields_ratio():
- _, ((ax_tttt, ax_ttw), (ax_ttz, ax_tth)) = plt.subplots(2, 2)
- cut_40 = read_yields('v1.0.4_JetPtCut40')
- cut_35 = read_yields('v1.0.4_JetPtCut35')
- cut_30 = read_yields('v1.0.4_JetPtCut30')
- cut_25 = read_yields('v1.0.4_JetPtCut25')
- cut_20 = read_yields('v1.0.4_JetPtCut20')
- def do_plot(axis, key):
- plt.sca(axis)
- c35 = (cut_35[key][0] / cut_40[key][0], *cut_35[key][1:])
- c30 = (cut_30[key][0] / cut_40[key][0], *cut_30[key][1:])
- c25 = (cut_25[key][0] / cut_40[key][0], *cut_25[key][1:])
- c20 = (cut_20[key][0] / cut_40[key][0], *cut_25[key][1:])
- hist_plot(c35, label="Jet $P_T>$35")
- hist_plot(c30, label="Jet $P_T>$30")
- hist_plot(c25, label="Jet $P_T>$25")
- hist_plot(c20, label="Jet $P_T>$20")
- plt.title(key)
- axis.set_ylim((0, 10))
- set_xticks()
- do_plot(ax_tttt, 'tttt')
- plt.legend()
- do_plot(ax_ttw, 'ttw')
- do_plot(ax_ttz, 'ttz')
- do_plot(ax_tth, 'tth')
- @decl_plot
- def plot_signal_over_background(tags):
- cuts = [read_yields(tag) for tag in tags]
- def clean_label(label):
- return label.replace('_', r'\_')
- def do_plot_significance(hists, label):
- background = hist_add(hists['ttw'], hists['ttz'], hists['tth'])
- tttt = hists['tttt']
- ttw = hists['ttw'][0]
- ttz = hists['ttz'][0]
- tth = hists['tth'][0]
- sigma2_bg = (0.4*ttw)**2 + (0.4*ttz)**2 + (0.5*tth)**2
- if hists['fakes_mc'] is not None:
- fakes_mc = hists['fakes_mc']
- background = hist_add(background, fakes_mc)
- sigma2_bg += (0.2*fakes_mc[0])**2
- ratio = tttt[0] / np.sqrt(tttt[0] + background[0] + sigma2_bg), *tttt[1:]
- hist_plot(ratio, label=clean_label(label))
- plt.sca(plt.subplot(221))
- for cut, tag in zip(cuts, tags):
- signal = cut['tttt']
- hist_plot(signal, label=clean_label(tag))
- set_xticks()
- plt.ylabel('TTTT')
- plt.ylim((0, 10))
- plt.sca(plt.subplot(222))
- for cut, tag in zip(cuts, tags):
- background = hist_add(cut['ttw'], cut['ttz'], cut['tth'])
- hist_plot(background, label=clean_label(tag))
- set_xticks()
- plt.ylabel('TTW+TTZ+TTH+MCFakes')
- plt.ylim((0, 10))
- plt.sca(plt.subplot(212))
- for cut, tag in zip(cuts, tags):
- do_plot_significance(cut, tag)
- set_xticks()
- plt.ylabel(r'SIG / $\sqrt{SIG+BG + \sigma_{BG}^2}$')
- plt.legend()
- tables = []
- for cut, tag in zip(cuts, tags):
- tables.append(f'<h3>{tag}</h3>')
- tables.append(hists_to_table([cut['tttt'], cut['ttw'], cut['tth'], cut['ttz']],
- column_labels=X_TICKS, row_labels=['TTTT', 'TTW', 'TTH', 'TTZ']))
- return ''.join(tables)
- @decl_plot
- def plot_sigs(include_fakes):
- """\
- Significances above only include TTZ, TTW, TTH, and (optionally) MC-Fakes(TTBar) backgrounds.
- """
- def get_sig(tag):
- regex = re.compile('Sig: ([0-9.]+)')
- ext = '' if include_fakes else '_nofakes'
- with open(f'data/JetPtStudies/{tag}{ext}.txt') as f:
- sig = float(regex.findall(f.read())[0])
- return sig
- sigs_b25 = {n: get_sig(f'v1.0.4_JetPtCut{n}') for n in [40, 35, 30, 25, 20]}
- sigs_b20 = {n: get_sig(f'v1.0.4_bJetPtCut20JetPtCut{n}') for n in [40, 35, 30, 25, 20]}
- plt.scatter(*zip(*sigs_b25.items()), label="bJetMinPt=25")
- for key, val in sigs_b25.items():
- plt.text(key, val-0.1, f'{key}: {val:0.3f}')
- plt.scatter(*zip(*sigs_b20.items()), label="bJetMinPt=20")
- for key, val in sigs_b20.items():
- plt.text(key, val+0.1, f'{key}: {val:0.3f}')
- plt.ylabel('Significance')
- plt.xlabel('Jet $P_T$ Cut')
- plt.xticks([20, 25, 30, 35, 40])
- plt.xlim((18, 43))
- plt.ylim((0, 3.0))
- plt.legend()
- def main():
- sig_o_bg = plot_signal_over_background, (['v1.0.4_JetPtCut40', 'v1.0.4_JetPtCut25', 'v1.0.4_JetPtCut20'],)
- sig_o_bg2 = plot_signal_over_background, (['v1.0.4_JetPtCut25', 'v1.0.4_bJetPtCut20JetPtCut25'],)
- plots = [
- Plot((plot_yields, (25,)), 'Prefit Yields (bJetPt>25)'),
- Plot((plot_yields, (20,)), 'Prefit Yields (bJetPt>20)'),
- Plot(plot_yields_ratio, 'Prefit Yields Relative to Baseline'),
- Plot((plot_sigs, (True,)), 'Significances with Fakes'),
- Plot((plot_sigs, (False,)), 'Significances w/o Fakes'),
- Plot(sig_o_bg, 'Binned Yields with variable JetPt'),
- Plot(sig_o_bg2, 'Binned Yields with variable bJetPt'),
- ]
- render_plots(plots, to_disk=False)
- generate_dashboard(plots, 'New Baseline Yields - Various JetPt Cuts',
- output='new_baseline_yields_jet_pt_cuts.html',
- source=__file__,
- )
- if __name__ == '__main__':
- main()
|