Sfoglia il codice sorgente

Adds new plots and June 20 Presentation

Caleb Fangmeier 5 anni fa
parent
commit
358beaee75

BIN
docs/presentations/2018_06_20/16x9_seal03.jpg


BIN
docs/presentations/2018_06_20/CMSlogo.png


+ 63 - 0
docs/presentations/2018_06_20/beamerthemebjeldbak.sty

@@ -0,0 +1,63 @@
+% Insipired by Cameron Bracken's theme originally posted
+% here: http://cameron.bracken.bz/beamer-template
+% January 2009
+
+% Modifications done by Martin Bjeldbak Madsen
+% June 2014
+\mode<presentation>
+
+\RequirePackage{tgpagella}
+
+% \DeclareOptionBeamer{titlepage}{\PassOptionsToPackage{titlepage=#1}{beamertheme-Inner}}
+% \ProcessOptionsBeamer
+
+\useoutertheme[subsection=false,shadow]{miniframes}
+\useinnertheme{default}
+\usefonttheme{serif}
+
+\setbeamertemplate{footline} % show slide number on all slides but the first
+{%
+  \ifnum\c@framenumber=1
+  \else
+    \begin{beamercolorbox}[wd=0.95\paperwidth,right,dp=2ex]{page number}
+      \insertframenumber/\inserttotalframenumber%
+    \end{beamercolorbox}
+  \fi%
+  \begin{beamercolorbox}[colsep=1.25pt]{lower separation line foot}
+  \end{beamercolorbox}
+}
+
+% \ifnum\c@framenumber=1
+\usebackgroundtemplate{\includegraphics[width=1.3\paperwidth]{16x9_seal03.jpg}}
+% \else
+% \fi%
+
+% Display a slide before the current section with overview inf
+% \AtBeginSection[]
+% {%
+%    \begin{frame}
+%        \frametitle{Overview}
+%        \tableofcontents[currentsection,hideothersubsections]
+%    \end{frame}
+% }
+
+\setbeamerfont{title like}{shape=\scshape}
+\setbeamerfont{frametitle}{shape=\scshape}
+\setbeamerfont{section in head/foot}{shape=\scshape,size=\tiny}
+
+\setbeamertemplate{navigation symbols}{} % hide bottom nav buttons
+\setbeamercovered{transparent} % don't hide strip-teased bullet points
+
+% \definecolor{barcolor}{HTML}{77C4D3} % teal
+\definecolor{barcolor}{HTML}{DD0000} % teal
+\setbeamercolor{lower separation line head}{bg=barcolor}
+\setbeamercolor{lower separation line foot}{bg=barcolor}
+\setbeamercolor{normal text}{fg=black,bg=white}
+\setbeamercolor{alerted text}{fg=red}
+\setbeamercolor{example text}{fg=black}
+\setbeamercolor{structure}{fg=black}
+
+\setbeamercolor{palette tertiary}{fg=black,bg=black!10}
+\setbeamercolor{palette quaternary}{fg=black,bg=black!10}
+
+\mode<all>

BIN
docs/presentations/2018_06_20/figures/eff_table.png


+ 1 - 0
docs/presentations/2018_06_20/live_figures

@@ -0,0 +1 @@
+../../../plotting/seeding_studies/figures

BIN
docs/presentations/2018_06_20/main.pdf


+ 328 - 0
docs/presentations/2018_06_20/main.tex

@@ -0,0 +1,328 @@
+
+% rubber: module pdftex
+
+\documentclass[english,aspectratio=43,8pt]{beamer}
+\usepackage{graphicx}
+\usepackage{amssymb}
+\usepackage{booktabs}
+\usepackage{siunitx}
+\usepackage{subcaption}
+\usepackage{marvosym}
+\usepackage{verbatim}
+\usepackage[normalem]{ulem}  % Needed for /sout
+
+\newcommand{\pb}{\si{\pico\barn}}%
+\newcommand{\fb}{\si{\femto\barn}}%
+\newcommand{\invfb}{\si{\per\femto\barn}}
+\newcommand{\GeV}{\si{\giga\electronvolt}}
+
+\hypersetup{colorlinks=true,urlcolor=blue}
+
+\usetheme[]{bjeldbak}
+
+\newcommand{\backupbegin}{%
+   \newcounter{finalframe}
+   \setcounter{finalframe}{\value{framenumber}}
+}
+\newcommand{\backupend}{%
+   \setcounter{framenumber}{\value{finalframe}}
+}
+
+\newcommand\blfootnote[1]{%
+  \begingroup
+  \renewcommand\thefootnote{}\footnote{#1}%
+  \addtocounter{footnote}{-1}%
+  \endgroup
+}
+
+\begin{document}
+
+\title[$e$ Seeding Validation]{Offline Electron Seeding Validation \-- Update}
+\author[C. Fangmeier]{\textbf{Caleb Fangmeier} \\ Ilya Kravchenko,  Greg Snow}
+\institute[UNL]{University of Nebraska \-- Lincoln}
+\date{EGM Reco/Comm/HLT meeting | June 22, 2018}
+
+\titlegraphic{%
+\begin{figure}
+  \includegraphics[width=1in]{CMSlogo.png}\hspace{0.75in}\includegraphics[width=1in]{nebraska-n.png}
+\end{figure}
+}
+
+\begin{frame}[plain]
+  \titlepage%
+\end{frame}
+
+\begin{frame}{Introduction}
+  \begin{itemize}
+    \item Our goal is to study \textbf{seeding} for the \textbf{offline} GSF tracking with the \textbf{new pixel detector}.
+    \item Specifically, we want to optimize the new pixel-matching scheme from HLT for use in off-line reconstruction.
+    \item This Talk:
+      \begin{itemize}
+        \item Define and demonstrate performance of a GSF-Track ``Fake Rate'' for:
+          \begin{itemize}
+            \item Current offline (Legacy HLT) seeding method with default offline settings
+            \item New HLT seeding method with HLT settings\footnotemark%
+            \item New HLT seeding method with optimized-for-offline (aka \texttt{wide}) settings
+          \end{itemize}
+        \item Show efficiency for prompt prompt electrons specifically
+      \end{itemize}
+  \end{itemize}
+  \footnotetext[1]{\tiny Note: In previous talks I've called this one \texttt{narrow}.}
+\end{frame}
+
+
+\begin{frame}{N-Hit Electron Seeding}
+  \begin{columns}
+  \begin{column}{0.5\textwidth}
+    {\small
+    \begin{enumerate}
+      \item Using the beam spot, the SC position, and SC energy, propagate a path through the pixels.
+      \item Require the first hit to be within a $\delta\phi$ and $\delta z$ window. ($\delta\phi$ and $\delta R$ for FPIX)
+      \item $\delta z$ window for first hit is huge as SC and beam spot positions give very little information about $z$.
+      \item Forget the SC position, and propagate a new track based on the vertex and first hit positions, and the SC energy.
+      \item Progress one-by-one through the remaining hits in the seed and require each one fit within a specified window around the track.
+      \item Quit when all hits are matched, or a hit falls outside the window. No skipping is allowed.
+    \end{enumerate}
+    }
+  \end{column}
+  \begin{column}{0.5\textwidth}
+    \begin{figure}
+      \includegraphics[width=0.9\textwidth]{../common/diagrams/seeding_step2.png}
+    \end{figure}
+    \begin{figure}
+      \includegraphics[width=0.9\textwidth]{../common/diagrams/seeding_step3.png}
+    \end{figure}
+  \end{column}
+  \end{columns}
+\end{frame}
+
+\begin{frame}{Definitions}
+  \begin{itemize}
+    \item \textbf{Sim-Track \--} A track from a simulated electron both originating from the luminous region of CMS (beam-spot +- 5$\sigma$) and having $|\eta|<3.0$.
+    \item \textbf{ECAL-Driven Seed \--} A seed created via a matching procedure between Super-Clusters and General Tracking Seeds (Either from \texttt{ElectronSeedProducer} or \texttt{ElectronNHitSeedProducer}). Must have $HOE<0.15$.
+    \item \textbf{GSF Track \--} A track from GSF-Tracking resulting from an \textbf{ECAL-Driven Seed}
+    % \item \textbf{Seeding Efficiency \--} The fraction of \textbf{Sim-Tracks} that have a matching \textbf{ECAL-Driven Seed} (based on simhit-rechit linkage or $\Delta R$ matching)
+    \item \textbf{GSF Tracking Efficiency \--} The fraction of \textbf{Sim-Tracks} that have a matching \textbf{GSF Track} (based on $\Delta R$ matching)
+    % \item \textbf{ECAL-Driven Seed Purity \--} The fraction of \textbf{ECAL-Driven Seeds} that have a matching \textbf{Sim-Track}
+    \item \textbf{GSF Tracking Purity \--} The fraction of \textbf{GSF Tracks} that have a matching \textbf{Sim-Track}
+    \item \textbf{GSF Tracking Fake Rate \--} The fraction of nontruth-matched Super-Clusters which result in at least one \textbf{GSF Track}.
+  \end{itemize}
+\end{frame}
+
+% \begin{frame}{Previous status-quo}
+%   \begin{columns}
+%     \begin{column}{0.45\textwidth}
+%       {\small
+%       \begin{itemize}
+%         \item In a previous presentation\footnotemark, I showed efficiency vs. purity for
+%           \begin{itemize}
+%             \item Old pair-match seeding (\texttt{ElectronSeedProducer})
+%             \item New triplet seeding (\texttt{ElectronNHitSeedProducer}) for several choices of matching windows.
+%           \end{itemize}
+%         \item Performance of new seeding at the \texttt{wide} working point was comparable to old seeding in low-fake ($Z\rightarrow e^+e^-$) environment
+%         \item Needed to validate performance in a high fake environment.
+%       \end{itemize}
+%     }
+%     \end{column}
+%     \begin{column}{0.6\textwidth}
+%       \begin{figure}
+%         \includegraphics[width=0.9\textwidth]{../common/figures/tracking_roc_curves_linear_plus_old_hoe.png}
+%       \end{figure}
+%     \end{column}
+%   \end{columns}
+% \footnotetext[1]{\tiny \url{https://indico.cern.ch/event/697077/contributions/2936039/attachments/1618649/2573874/main.pdf}}
+% \end{frame}
+
+\begin{frame}{Relative Performance \-- GSF Tracking Efficiency}
+  \begin{columns}
+    \begin{column}{0.5\textwidth}
+      \begin{itemize}
+        \item Figure shows GSF Tracking efficiency vs kinematic variables of the electron \texttt{SimTrack}
+        \item Efficiency is more or less the same for both DY and $t\bar{t}$ environments and for both algorithms and working points.
+        \item Largest (statistically significant) differences appear at low $p_T$ and in the barrel/endcap transition region.
+      \end{itemize}
+    \end{column}
+    \begin{column}{0.5\textwidth}
+      \begin{figure}
+        GSF Tracking Efficiency
+        \includegraphics[width=1.0\textwidth]{live_figures/tracking_eff_dR.png}
+      \end{figure}
+    \end{column}
+  \end{columns}
+  \blfootnote{\tiny This and the following slide have been show before and are included for completeness}
+\end{frame}
+
+\begin{frame}{Relative Performance \-- GSF Track Purity}
+  \begin{columns}
+    \begin{column}{0.5\textwidth}
+      \begin{itemize}
+        \item Figure shows GSF Tracking purity vs kinematic variables of the \texttt{GSFTrack}
+        \item Clearly purity is affected by the higher fake environment in the $t\bar{t}$ sample.
+        \item Note how the \texttt{narrow} working point of the new seeding (green) has significantly better purity than the \texttt{wide} working point or the old seeding.
+        \item Purity loss at high $p_T$ is a feature of the shared-hits matching between \texttt{SimTracks} and \texttt{GSFTracks}.
+      \end{itemize}
+    \end{column}
+    \begin{column}{0.5\textwidth}
+      \begin{figure}
+        GSF Tracking Purity
+        \includegraphics[width=1.0\textwidth]{live_figures/tracking_pur_dR.png}
+      \end{figure}
+    \end{column}
+  \end{columns}
+\end{frame}
+
+\begin{frame}{Relative Performance \-- GSF Tracking Fake Rate}
+  \begin{columns}
+    \begin{column}{0.5\textwidth}
+      \begin{itemize}
+        \item Figure shows GSF Tracking fake rate vs kinematic variables of the supercluster
+        \item Supercluster must have $HOE<0.15$, so fake are presumably from mostly photons or $\pi^0$
+        \item There is a clear reduction in the fake rate with respect to the old method in both the \texttt{default} and \texttt{wide} working points.
+        \item Seen in both $Z\rightarrow ee$ and $t\bar{t}$
+      \end{itemize}
+    \end{column}
+    \begin{column}{0.5\textwidth}
+      \begin{figure}
+        GSF Tracking Fake Rate
+        \includegraphics[width=1.0\textwidth]{live_figures/fake_rate_no_e_match.png}
+      \end{figure}
+    \end{column}
+  \end{columns}
+\end{frame}
+
+\begin{frame}{Relative Performance \-- Prompt Efficiency}
+  \begin{columns}
+    \begin{column}{0.4\textwidth}
+      \begin{itemize}
+        \item The fraction of prompt electrons that match a GSF-Track
+        \item Biggest improvements, again, happen at low $p_T$ and in the barrel/endcap transition region
+      \end{itemize}
+    \end{column}
+    \begin{column}{0.6\textwidth}
+      \begin{figure}
+        Prompt GSF Tracking Efficiency
+        \includegraphics[width=1.0\textwidth]{live_figures/prompt_eff_dR.png}
+      \end{figure}
+    \end{column}
+  \end{columns}
+\end{frame}
+
+\begin{frame}{Relative Performance \-- Seed Multiplicity}
+  \begin{columns}
+    \begin{column}{0.4\textwidth}
+      \begin{itemize}
+        \item A single supercluster can potentially produce many seeds if it matches with many nearby tracks, however only one of these can be from the electron.
+        \item Reducing the number of overall seeds while still producing \emph{the} correct one is desirable from a computational perspective.
+        \item The new seeding scheme (\texttt{wide} WP) reduces the number of seeds by a factor of 3.8 for $t\bar{t}$ and 5.6 for $Z\rightarrow ee$.
+      \end{itemize}
+    \end{column}
+    \begin{column}{0.6\textwidth}
+      \begin{figure}
+        Number of Electron Seeds Per Event
+        \includegraphics[width=1.0\textwidth]{live_figures/number_of_good_seeds.png}
+      \end{figure}
+    \end{column}
+  \end{columns}
+\end{frame}
+
+\begin{frame}{Overall Performance}
+  \begin{center}
+    Integrating over all tracks with $p_T>20$GeV and $\eta<2.4$ yields the performance numbers below.
+    \begin{figure}
+      % Number of Electron Seeds Per Event
+      \includegraphics[width=0.6\textwidth]{figures/eff_table.png}
+    \end{figure}
+\begin{itemize}
+    \item The HLT default settings (\texttt{narrow}) of the new pixel matching
+      scheme yield non-trivially better purity at the loss of some efficiency
+      with respect to both the old seeding and the \texttt{wide} working point.
+    \item The \texttt{wide} working point of the new seeding matches the
+      \texttt{old-seeding} within errors except for purity is $\approx 2$\%
+      better in the $t\bar{t}$ sample
+\end{itemize}
+  \end{center}
+\end{frame}
+
+\begin{frame}{Conclusions \& Outlook}
+  \begin{itemize}
+    \item The new seeding algorithm has been optimized to have better or comparable performance to the current Offline seeding method in all investigated metrics including
+      \begin{itemize}
+        \item GSF Tracking Efficiency
+        \item GSF Tracking Purity
+        \item GSF Tracking Fake Rate
+        \item Number of Seeds
+      \end{itemize}
+    \item Unless there are objections, propose to move forward with implementing the new algorithm as the default in the next available CMSSW release.
+  \end{itemize}
+  \blfootnote{\tiny Analysis and ploting code is available at \url{}}
+  \blfootnote{\tiny Additional plots are available at \url{}}
+\end{frame}
+
+\appendix
+\backupbegin%
+
+\begin{frame}
+  \begin{center}
+    {\Huge BACKUP}
+  \end{center}
+\end{frame}
+
+\begin{frame}{Overall Performance}
+  \begin{columns}
+    \begin{column}{0.5\textwidth}
+      \begin{figure}
+        GSF Tracking Performance (Hit Matched)
+        \includegraphics[width=1.0\textwidth]{live_figures/tracking_roc_curve.png}
+      \end{figure}
+    \end{column}
+    \begin{column}{0.5\textwidth}
+      \begin{figure}
+        GSF Tracking Performance ($\Delta R$ Matched)
+        \includegraphics[width=1.0\textwidth]{live_figures/tracking_roc_curve_dR.png}
+      \end{figure}
+    \end{column}
+  \end{columns}
+\end{frame}
+
+\begin{frame}{Matching Window Parameters}
+\begin{table}[]
+\centering
+\begin{tabular}{@{}llrrrr@{}}
+\toprule
+&  & \textbf{narrow} & \textbf{default (HLT)} & \textbf{wide} & \textbf{extra-wide} \\ \midrule
+Hit 1 & dPhiMaxHighEt & \textbf{0.025} & \textbf{0.05} & \textbf{0.1} & \textbf{0.15} \\
+ & dPhiMaxHighEtThres & 20.0 & 20.0 & 20.0 & 20.0 \\
+ & dPhiMaxLowEtGrad & -0.002 & -0.002 & -0.002 & -0.002 \\
+ & dRzMaxHighEt & 9999.0 & 9999.0 & 9999.0 & 9999.0 \\
+ & dRzMaxHighEtThres & 0.0 & 0.0 & 0.0 & 0.0 \\
+ & dRzMaxLowEtGrad & 0.0 & 0.0 & 0.0 & 0.0 \\ \midrule
+Hit 2 & dPhiMaxHighEt & \textbf{0.0015} & \textbf{0.003} & \textbf{0.006} & \textbf{0.009} \\
+ & dPhiMaxHighEtThres & 0.0 & 0.0 & 0.0 & 0.0 \\
+ & dPhiMaxLowEtGrad & 0.0 & 0.0 & 0.0 & 0.0 \\
+ & dRzMaxHighEt & \textbf{0.025} & \textbf{0.05} & \textbf{0.1} & \textbf{0.15} \\
+ & dRzMaxHighEtThres & 30.0 & 30.0 & 30.0 & 30.0 \\
+ & dRzMaxLowEtGrad & -0.002 & -0.002 & -0.002 & -0.002 \\ \midrule
+Hit 3+ & dPhiMaxHighEt & \textbf{0.0015} & \textbf{0.003} & \textbf{0.006} & \textbf{0.009} \\
+ & dPhiMaxHighEtThres & 0.0 & 0.0 & 0.0 & 0.0 \\
+ & dPhiMaxLowEtGrad & 0.0 & 0.0 & 0.0 & 0.0 \\
+ & dRzMaxHighEt & \textbf{0.025} & \textbf{0.05} & \textbf{0.1} & \textbf{0.15} \\
+ & dRzMaxHighEtThres & 30.0 & 30.0 & 30.0 & 30.0 \\
+ & dRzMaxLowEtGrad & -0.002 & -0.002 & -0.002 & -0.002 \\ \bottomrule
+\end{tabular}
+\end{table}
+\centering
+\texttt{NHit} Seeding window parameters. Bold designates modified values.
+\end{frame}
+
+\begin{frame}{Samples}
+  \begin{itemize}
+    \item {\tiny /ZToEE\_NNPDF30\_13TeV-powheg\_M\_120\_200/RunIISummer17DRStdmix-NZSFlatPU28to62\_92X\_upgrade2017\_realistic\_v10-v1}
+
+    \item {\tiny /TT\_TuneCUETP8M2T4\_13TeV-powheg-pythia8/RunIISummer17DRStdmix-NZSFlatPU28to62\_92X\_upgrade2017\_realistic\_v10-v2}
+  \end{itemize}
+
+\end{frame}
+
+\backupend%
+
+\end{document}

BIN
docs/presentations/2018_06_20/nebraska-n.png


+ 84 - 47
plotting/eff_plots.py

@@ -149,8 +149,8 @@ def calc_window(et, eta, hit, variable, cut_sel):
 
 
 def hist_integral_ratio(num, den):
-    num_int = num.get_integral()
-    den_int = den.get_integral()
+    num_int = num.integral
+    den_int = den.integral
 
     ratio = num_int / den_int
     error = np.sqrt(den_int) / den_int  # TODO: Check this definition of error
@@ -166,15 +166,19 @@ def center_text(x, y, txt, **kwargs):
 def hist_plot(h: Hist1D, *args, include_errors=False, line_width=1, **kwargs):
     """ Plots a 1D ROOT histogram object using matplotlib """
 
-    counts = h.get_counts()
-    edges = h.get_edges()
+    counts = h.counts
+    edges = h.edges
     left, right = edges[:-1], edges[1:]
     x = np.array([left, right]).T.flatten()
     y = np.array([counts, counts]).T.flatten()
 
     plt.plot(x, y, *args, linewidth=line_width, **kwargs)
     if include_errors:
-        plt.errorbar(h.get_bin_centers(), h.counts, yerr=h.errors,
+        if h.errors_up is not None:
+            errors = np.vstack((h.errors_down, h.errors_up))
+        else:
+            errors = h.errors
+        plt.errorbar(h.bin_centers, h.counts, yerr=errors,
                      color='k', marker=None, linestyle='None',
                      barsabove=True, elinewidth=.7, capsize=1)
 
@@ -196,7 +200,7 @@ def hist2d_percent_contour(h: Hist1D, percent: float, axis: str):
     values = np.cumsum(values, axis=axis_idx)
     idxs = np.argmax(values > percent, axis=axis_idx)
 
-    x_centers, y_centers = h.get_bin_centers()
+    x_centers, y_centers = h.bin_centers
 
     if axis == 'x':
         return x_centers[idxs], y_centers
@@ -270,6 +274,7 @@ def plot_hit_vs_layer(sample, region):
 @mpb.decl_fig
 def plot_roc_curve(pfx, ext=''):
     load_samples()
+    show_fr = pfx == "tracking"
 
     def get_num_den(sample, basename):
         num = Hist1D(sample[f'{basename}_num'])
@@ -280,9 +285,13 @@ def plot_roc_curve(pfx, ext=''):
         sample_name = f'{proc}-{wp}'
         eff, eff_err = get_num_den(sample, f'{pfx}_eff_v_phi{ext}')
         pur, pur_err = get_num_den(sample, f'{pfx}_pur_v_phi{ext}')
-        rows.append([wp,
-                     rf'${eff*100:0.2f}\pm{eff_err*100:0.2f}\%$',
-                     rf'${pur*100:0.2f}\pm{pur_err*100:0.2f}\%$'])
+        if show_fr:
+            fr, fr_err = get_num_den(sample, f'fake_rate_v_phi')
+
+            rows.append([wp,
+                         rf'${eff*100:0.2f}\pm{eff_err*100:0.2f}\%$',
+                         rf'${pur*100:0.2f}\pm{pur_err*100:0.2f}\%$',
+                         rf'${fr*100:0.2f}\pm{fr_err*100:0.2f}\%$'])
 
         plt.errorbar([pur], [eff], xerr=[pur_err], yerr=[eff_err],
                      label=sample_name, marker='o', color=color(proc, wp))
@@ -296,12 +305,17 @@ def plot_roc_curve(pfx, ext=''):
     plt.legend(loc='lower right')
 
     col_labels = ['Sample', 'Working Point', 'Efficiency', 'Purity']
+    if show_fr:
+        col_labels.append("Fake Rate")
     row_labels = [r'$Z \rightarrow ee$', '', '', r'$t\bar{t}$', '', '']
     return to_html_table(rows, col_labels, row_labels, 'table-condensed')
 
 
 @mpb.decl_fig
-def plot_kinematic_eff(pref, ext='', xlim=(None, None), ylim=(None, None), norm=None, label_pfx='', incl_sel=True):
+def plot_kinematic_eff(pref, ext='', ylim=(None, None), norm=None, label_pfx='', incl_sel=True,
+                       bins_pt=None, bins_eta=None, bins_phi=None,
+                       xlim_pt=(None, None), xlim_eta=(None, None), xlim_phi=(None, None),
+                       is_ratio=False):
     load_samples()
     ax_pt = plt.subplot(221)
     ax_eta = plt.subplot(222)
@@ -312,34 +326,44 @@ def plot_kinematic_eff(pref, ext='', xlim=(None, None), ylim=(None, None), norm=
         l = sample_name
         c = color(proc, wp)
 
-        def do_plot(ax, name):
+        def do_plot(ax, name, bins):
             plt.sca(ax)
-            h = Hist1D(sample[name], no_overflow=True)
-            if norm:
-                h = h / (norm*h.get_integral())
+            if is_ratio:
+                num = Hist1D(sample[name+"_num"], no_overflow=True)
+                den = Hist1D(sample[name+"_den"], no_overflow=True)
+                if bins:
+                    num.rebin(bins)
+                    den.rebin(bins)
+                h = num // den
+            else:
+                h = Hist1D(sample[name], no_overflow=True)
+                if norm:
+                    h = h / (norm*h.integral)
+                if bins:
+                    h.rebin(bins)
             hist_plot(h, include_errors=errors, label=l, color=c)
 
-        do_plot(ax_pt, f'{pref}_v_pt{ext}')
-        do_plot(ax_eta, f'{pref}_v_eta{ext}')
-        do_plot(ax_phi, f'{pref}_v_phi{ext}')
+        do_plot(ax_pt, f'{pref}_v_pt{ext}', bins_pt)
+        do_plot(ax_eta, f'{pref}_v_eta{ext}', bins_eta)
+        do_plot(ax_phi, f'{pref}_v_phi{ext}', bins_phi)
 
     plt.sca(ax_pt)
-    if not incl_sel: center_text(0.5, 0.15, r'$|\eta|<2.4$')
+    if not incl_sel: center_text(0.5, 0.15, r'$|\eta|<2.5$')
     plt.xlabel(fr"{label_pfx} $p_T$")
     plt.ylim(ylim)
-    plt.xlim(xlim)
+    plt.xlim(xlim_pt)
 
     plt.sca(ax_eta)
     if not incl_sel: center_text(0.5, 0.15, r'$p_T>20$')
     plt.xlabel(fr"{label_pfx} $\eta$")
     plt.ylim(ylim)
-    plt.xlim(xlim)
+    plt.xlim(xlim_eta)
 
     plt.sca(ax_phi)
     if not incl_sel: center_text(0.5, 0.15, r'$p_T>20$ and $|\eta|<2.4$')
     plt.xlabel(fr"{label_pfx} $\phi$")
     plt.ylim(ylim)
-    plt.xlim(xlim)
+    plt.xlim(xlim_phi)
     plt.tight_layout()
     plt.legend(loc='upper left', bbox_to_anchor=(0.6, 0.45), bbox_transform=plt.gcf().transFigure)
 
@@ -349,7 +373,7 @@ def plot_ecal_rel_res():
     load_samples()
     for sample_name, sample in samples.items():
         h = Hist1D(sample['ecal_energy_resolution'])
-        h = h / h.get_integral()
+        h = h / h.integral
         hist_plot(h, label=sample_name)
     plt.xlabel(r"ECAL $E_T$ relative error")
     plt.legend()
@@ -390,9 +414,9 @@ def simple_dist(hist_name, rebin=(), norm=1, xlabel="", ylabel="", xlim=None, yl
         if rebin:
             h.rebin(*rebin)
 
-        mean = np.sum(h.get_counts() * h.get_bin_centers()) / h.get_integral()
+        mean = np.sum(h.counts * h.bin_centers) / h.integral
         if norm is not None:
-            h = h * (norm / h.get_integral())
+            h = h * (norm / h.integral)
         hist_plot(h, label=f'{sample_name} ($\\mu={mean:.2f}$)',
                   color=color(proc, wp), line_width=line_width)
     if xlim:
@@ -411,7 +435,7 @@ def simple_dist2d(hist_name, proc, wp, xlabel="", ylabel="", xlim=None, ylim=Non
     # sample_name = f'{proc}-{wp}'
     h = Hist2D(sample[hist_name])
     if norm is not None:
-        h = h * (norm / h.get_integral())
+        h = h * (norm / h.integral)
     plot_2d(h, colz_fmt='g')
     if xlim:
         plt.xlim(xlim)
@@ -431,7 +455,7 @@ def all_cut_plots(refresh=True, publish=False):
         'number_of_seeds': (simple_dist, ('n_seeds',),
                             dict(xlabel='Number of Seeds', rebin=(50, -0.5, 200.5))),
         'number_of_good_seeds': (simple_dist, ('n_good_seeds',),
-                                 dict(xlabel='Number of Good Seeds', rebin=(50, -0.5, 200.5))),
+                                 dict(xlabel='Number of Seeds', rebin=(50, -0.5, 200.5))),
         'number_of_scls': (simple_dist, ('n_scl',),
                            dict(xlabel='Number of Super-Clusters', xlim=(-0.5, 25.5))),
         'number_of_good_scls': (simple_dist, ('n_good_scl',),
@@ -442,6 +466,11 @@ def all_cut_plots(refresh=True, publish=False):
         'number_of_gsf_tracks': (simple_dist, ('n_gsf_track',),
                                  dict(xlabel='Number of reco electrons', xlim=(-0.5, 20.5))),
 
+        'number_of_prompt': (simple_dist, ('n_prompt',),
+                             dict(xlabel='Number of prompt electrons', xlim=(-0.5, 20.5))),
+        'number_of_nonprompt': (simple_dist, ('n_nonprompt',),
+                                dict(xlabel='Number of nonprompt electrons', xlim=(-0.5, 20.5))),
+
         'number_of_matched': (simple_dist, ('n_matched',),
                               dict(xlabel='Number of matched electrons', xlim=(-0.5, 10.5), line_width=4)),
         'number_of_merged': (simple_dist, ('n_merged',),
@@ -482,19 +511,6 @@ def all_cut_plots(refresh=True, publish=False):
         'matched_dpT_dR': (simple_dist, ('matched_dpT_dR',),
                            dict(xlabel='dpT between sim and reco - dR Matched')),
 
-        'sim_pt': (simple_dist, ('sim_pt',),
-                   dict(xlabel='Sim Track $p_T$', xlim=(0, None))),
-        'sim_eta': (simple_dist, ('sim_eta',),
-                    dict(xlabel='Sim Track $eta$', rebin=(20, -3, 3))),
-        'sim_phi': (simple_dist, ('sim_phi',),
-                    dict(xlabel='Sim Track $phi$', rebin=(20, -3.14, 3.14), ylim=(0, None))),
-        'reco_pt': (simple_dist, ('reco_pt',),
-                    dict(xlabel='Reco Track $p_T$', xlim=(0, None))),
-        'reco_eta': (simple_dist, ('reco_eta',),
-                     dict(xlabel='Reco Track $eta$', rebin=(20, -3, 3))),
-        'reco_phi': (simple_dist, ('reco_phi',),
-                     dict(xlabel='Reco Track $phi$', rebin=(20, -3.14, 3.14), ylim=(0, None))),
-
         'tm_corr': (simple_dist2d, ('tm_corr', 'zee', 'old-default'),
                     dict(xlabel='Seed Matched', ylabel='Track Matched', norm=1)),
 
@@ -510,18 +526,26 @@ def all_cut_plots(refresh=True, publish=False):
 
 
         'good_sim_kinem': (plot_kinematic_eff, ('good_sim',),
-                           dict(norm=1, ylim=(0, None))),
+                           dict(norm=1, ylim=(0, None), bins_eta=30, bins_phi=30)),
         'gsf_track_kinem': (plot_kinematic_eff, ('gsf_track',),
-                            dict(norm=1, ylim=(0, None))),
+                            dict(norm=1, ylim=(0, None), bins_eta=30, bins_phi=30)),
         'seed_kinem': (plot_kinematic_eff, ('seed',),
-                       dict(norm=1, ylim=(0, None))),
+                       dict(norm=1, ylim=(0, None), bins_eta=30, bins_phi=30)),
         'scl_kinem': (plot_kinematic_eff, ('scl',),
-                      dict(norm=1, ylim=(0, None))),
+                      dict(norm=1, ylim=(0, None), bins_eta=30, bins_phi=30)),
+        'prompt_kinem': (plot_kinematic_eff, ('prompt',),
+                         dict(norm=1, ylim=(0, None), bins_pt=30, bins_eta=30, bins_phi=30)),
+        'nonprompt_kinem': (plot_kinematic_eff, ('nonprompt',),
+                            dict(norm=1, ylim=(0, None), xlim_pt=(0, 5), bins_eta=30, bins_phi=30)),
     }
 
     def add_num_den(key, func, args, kwargs):
-        figures[key] = (func, args, dict(**kwargs, ylim=(0, 1.1)))
         base_ext = kwargs.get('ext', '')
+        bins_pt_ = [0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 150, 200, 300]
+        kwargs['bins_pt'] = kwargs.get('bins_pt', bins_pt_)
+        kwargs['bins_eta'] = kwargs.get('bins_eta', 15)
+        kwargs['bins_phi'] = kwargs.get('bins_phi', 15)
+        figures[key] = (func, args, dict(**kwargs, ylim=(0, 1.1), is_ratio=True))
         kwargs_ = kwargs.copy()
         kwargs_['ext'] = base_ext+'_num'
         figures[key+'_num'] = (func, args, kwargs_)
@@ -533,13 +557,26 @@ def all_cut_plots(refresh=True, publish=False):
     add_num_den('tracking_pur', plot_kinematic_eff, ('tracking_pur',), dict(incl_sel=False))
     add_num_den('tracking_eff_dR', plot_kinematic_eff, ('tracking_eff',), dict(ext='_dR', incl_sel=False))
     add_num_den('tracking_pur_dR', plot_kinematic_eff, ('tracking_pur',), dict(ext='_dR', incl_sel=False))
+    add_num_den('prompt_eff', plot_kinematic_eff, ('prompt_eff',), dict(incl_sel=False))
+    add_num_den('prompt_pur', plot_kinematic_eff, ('prompt_pur',), dict(incl_sel=False))
+    add_num_den('prompt_eff_dR', plot_kinematic_eff, ('prompt_eff',), dict(ext='_dR', incl_sel=False))
+    add_num_den('prompt_pur_dR', plot_kinematic_eff, ('prompt_pur',), dict(ext='_dR', incl_sel=False))
+    add_num_den('nonprompt_eff', plot_kinematic_eff, ('nonprompt_eff',), dict(incl_sel=False))
+    add_num_den('nonprompt_pur', plot_kinematic_eff, ('nonprompt_pur',), dict(incl_sel=False))
+    add_num_den('nonprompt_eff_dR', plot_kinematic_eff, ('nonprompt_eff',), dict(ext='_dR', incl_sel=False))
+    add_num_den('nonprompt_pur_dR', plot_kinematic_eff, ('nonprompt_pur',), dict(ext='_dR', incl_sel=False))
+
     add_num_den('seeding_eff', plot_kinematic_eff, ('seed_eff',), dict(incl_sel=False))
     add_num_den('seeding_pur', plot_kinematic_eff, ('seed_pur',), dict(incl_sel=False))
+
     add_num_den('fake_rate_incl', plot_kinematic_eff, ('fake_rate_incl',), {})
+    add_num_den('fake_rate_no_e_match_incl', plot_kinematic_eff, ('fake_rate_no_e_match_incl',), {})
     add_num_den('partial_fake_rate_incl', plot_kinematic_eff, ('partial_fake_rate_incl',), {})
     add_num_den('full_fake_rate_incl', plot_kinematic_eff, ('full_fake_rate_incl',), {})
     add_num_den('clean_fake_rate_incl', plot_kinematic_eff, ('clean_fake_rate_incl',), {})
+    #
     add_num_den('fake_rate', plot_kinematic_eff, ('fake_rate',), dict(incl_sel=False))
+    add_num_den('fake_rate_no_e_match', plot_kinematic_eff, ('fake_rate_no_e_match',), dict(incl_sel=False))
     add_num_den('partial_fake_rate', plot_kinematic_eff, ('partial_fake_rate',), dict(incl_sel=False))
     add_num_den('full_fake_rate', plot_kinematic_eff, ('full_fake_rate',), dict(incl_sel=False))
     add_num_den('clean_fake_rate', plot_kinematic_eff, ('clean_fake_rate',), dict(incl_sel=False))
@@ -568,9 +605,9 @@ def all_cut_plots(refresh=True, publish=False):
                         output=f'hists.html',
                         source=__file__)
 
-    mpb.generate_report(figures, 'Update',
-                        output='report.html',
-                        body='../docs/reports/report_2018_05_30.md')
+    # mpb.generate_report(figures, 'Update',
+    #                     output='report.html',
+    #                     body='../docs/reports/report_2018_05_30.md')
     if publish:
         mpb.publish()
 

+ 16 - 7
plotting/sim_track_viz.py

@@ -148,6 +148,9 @@ def print_sim_vtxs(sim_vtxs, sim_tracks, sim_pvs, gens, gsf_tracks):
     gen_pxs = gens[b'gen_px']
     gen_pys = gens[b'gen_py']
     gen_pzs = gens[b'gen_pz']
+    gen_prompt = gens[b'gen_isPrompt']
+    gen_hadronic = gens[b'gen_isDirectHadronDecayProduct']
+    gen_tauic = gens[b'gen_isTauDecayProduct']
 
     gsf_pdgIds = -11 * gsf_tracks[b'trk_q']
     gsf_vxs = gsf_tracks[b'trk_vtxx'] - bsp[b'bsp_x']
@@ -166,33 +169,35 @@ def print_sim_vtxs(sim_vtxs, sim_tracks, sim_pvs, gens, gsf_tracks):
     #         print(f'{idx}|{processType}', sourceSimIdx, daughterSimIdx, sep=" - ")
 
     print('GEN')
-    for (idx, (px, py, pz, vx, vy, vz, pdgId)) in enumerate(zip(gen_pxs, gen_pys, gen_pzs, vxs, vys, vzs, gen_pdgIds)):
+    for (idx, (px, py, pz, vx, vy, vz, pdgId, prompt, hadronic, tauic)) in enumerate(zip(gen_pxs, gen_pys, gen_pzs, vxs,
+                                                                                         vys, vzs, gen_pdgIds,
+                                                                                         gen_prompt, gen_hadronic,
+                                                                                         gen_tauic)):
         if abs(pdgId) != 11: continue
         p = np.sqrt(px**2 + py**2 + pz**2)
         theta = np.arctan2(np.hypot(px, py), pz)
         phi = np.arctan2(px, py)
-        print(f'{idx: 4d}|{pdgId: 3d} - ({vx:8.2f},{vy:8.2f},{vz:8.2f}) ({theta:5.2f},{phi:5.2f}) {p:.2f}GeV')
+        print(f'{idx: 4d}|{pdgId: 3d} - ({vx:8.2f},{vy:8.2f},{vz:8.2f}) ({theta:5.2f},{phi:5.2f}) {p: 8.2f}GeV - ({prompt},{hadronic},{tauic}) ')
 
     print('SIM')
-    for (idx, (px, py, pz, parentVtxIdx, decayVtxIdx, pdgId)) in enumerate(zip(gen_pxs, gen_pys, gen_pzs, parentVtxIdxs, decayVtxIdxs, sim_pdgIds)):
+    for (idx, (px, py, pz, parentVtxIdx, decayVtxIdx, pdgId)) in enumerate(zip(sim_pxs, sim_pys, sim_pzs, parentVtxIdxs, decayVtxIdxs, sim_pdgIds)):
         if abs(pdgId) != 11: continue
-        if len(sourceSimIdxs[parentVtxIdx]) > 0: continue
+        if len(sourceSimIdxs[parentVtxIdx]) > 0: continue  # not from another sim track decay
         vx = xs[parentVtxIdx]
         vy = ys[parentVtxIdx]
         vz = zs[parentVtxIdx]
         p = np.sqrt(px**2 + py**2 + pz**2)
         theta = np.arctan2(np.hypot(px, py), pz)
         phi = np.arctan2(px, py)
-        print(f'{idx: 4d}|{pdgId: 3d} - ({vx:8.2f},{vy:8.2f},{vz:8.2f}) ({theta:5.2f},{phi:5.2f}) {p:.2f}GeV')
+        print(f'{idx: 4d}|{pdgId: 3d} - ({vx:8.2f},{vy:8.2f},{vz:8.2f}) ({theta:5.2f},{phi:5.2f}) {p: 8.2f}GeV')
 
     print('RECO')
     for (idx, (px, py, pz, vx, vy, vz, pdgId)) in enumerate(zip(gsf_pxs, gsf_pys, gsf_pzs, gsf_vxs, gsf_vys, gsf_vzs, gsf_pdgIds)):
         p = np.sqrt(px**2 + py**2 + pz**2)
         theta = np.arctan2(np.hypot(px, py), pz)
         phi = np.arctan2(px, py)
-        print(f'{idx: 4d}|{pdgId: 3d} - ({vx:8.2f},{vy:8.2f},{vz:8.2f}) ({theta:5.2f},{phi:5.2f}) {p:.2f}GeV')
+        print(f'{idx: 4d}|{pdgId: 3d} - ({vx:8.2f},{vy:8.2f},{vz:8.2f}) ({theta:5.2f},{phi:5.2f}) {p: 8.2f}GeV')
 
-    input()
 
 
 def plot_event(sim_tracks, sim_vtxs, sim_pvs, gens, gsf_tracks, event_idx):
@@ -265,11 +270,15 @@ def main():
         'gen_py',
         'gen_pz',
         'gen_pdgId',
+        'gen_isTauDecayProduct',
+        'gen_isDirectHadronDecayProduct',
+        'gen_isPrompt',
     ])
     bsp = {k: v[0] for k, v in bsp.items()}
     sim_pvs = tree.array('simpv_idx')
     for event_idx in range(tree.fEntries):
         plot_event(sim_tracks, sim_vtxs, sim_pvs, gens, gsf_tracks, event_idx)
+        if event_idx == 5: break
         # break