% rubber: module pdftex

\documentclass[english,aspectratio=43,8pt]{beamer}
\usepackage{graphicx}
\usepackage{amssymb}
\usepackage{booktabs}
\usepackage{siunitx}
\usepackage{subcaption}
\usepackage{marvosym}
\usepackage{verbatim}
\usepackage[normalem]{ulem}  % Needed for /sout

\newcommand{\pb}{\si{\pico\barn}}%
\newcommand{\fb}{\si{\femto\barn}}%
\newcommand{\invfb}{\si{\per\femto\barn}}
\newcommand{\GeV}{\si{\giga\electronvolt}}

\hypersetup{colorlinks=true,urlcolor=blue}

\usetheme[]{bjeldbak}

\newcommand{\backupbegin}{%
   \newcounter{finalframe}
   \setcounter{finalframe}{\value{framenumber}}
}
\newcommand{\backupend}{%
   \setcounter{framenumber}{\value{finalframe}}
}

\newcommand\blfootnote[1]{%
  \begingroup
  \renewcommand\thefootnote{}\footnote{#1}%
  \addtocounter{footnote}{-1}%
  \endgroup
}

\begin{document}

\title[$e$ Seeding Validation]{Offline Electron Seeding Validation \-- Update}
\author[C. Fangmeier]{\textbf{Caleb Fangmeier} \\ Ilya Kravchenko,  Greg Snow}
\institute[UNL]{University of Nebraska \-- Lincoln}
\date{EGM Reco/Comm/HLT meeting | June 22, 2018}

\titlegraphic{%
\begin{figure}
  \includegraphics[width=1in]{CMSlogo.png}\hspace{0.75in}\includegraphics[width=1in]{nebraska-n.png}
\end{figure}
}

\begin{frame}[plain]
  \titlepage%
\end{frame}

\begin{frame}{Introduction}
  \begin{itemize}
    \item Our goal is to study \textbf{seeding} for the \textbf{offline} GSF tracking with the \textbf{new pixel detector}.
    \item Specifically, we want to optimize the new pixel-matching scheme from HLT for use in off-line reconstruction.
    \item This Talk:
      \begin{itemize}
        \item Define and demonstrate performance of a GSF-Track ``Fake Rate'' for:
          \begin{itemize}
            \item Current offline (Legacy HLT) seeding method with default offline settings
            \item New seeding method with HLT settings\footnotemark%
            \item New seeding method with optimized-for-offline (aka \texttt{wide}) settings
          \end{itemize}
        \item Show efficiency for prompt electrons specifically
      \end{itemize}
  \end{itemize}
  \footnotetext[1]{\tiny Note: In previous talks I've called this one \texttt{narrow}.}
\end{frame}


\begin{frame}{N-Hit Electron Seeding}
  \begin{columns}
  \begin{column}{0.5\textwidth}
    {\small
    \begin{enumerate}
      \item Using the beam spot, the SC position, and SC energy, propagate a path through the pixels.
      \item Require the first hit to be within a $\delta\phi$ and $\delta z$ window. ($\delta\phi$ and $\delta R$ for FPIX)
      \item $\delta z$ window for first hit is huge as SC and beam spot positions give very little information about $z$.
      \item Forget the SC position, and propagate a new track based on the vertex and first hit positions, and the SC energy.
      \item Progress one-by-one through the remaining hits in the seed and require each one fit within a specified window around the track.
      \item Quit when all hits are matched, or a hit falls outside the window. No skipping is allowed.
    \end{enumerate}
    }
  \end{column}
  \begin{column}{0.5\textwidth}
    \begin{figure}
      \includegraphics[width=0.9\textwidth]{../common/diagrams/seeding_step2.png}
    \end{figure}
    \begin{figure}
      \includegraphics[width=0.9\textwidth]{../common/diagrams/seeding_step3.png}
    \end{figure}
  \end{column}
  \end{columns}
\end{frame}

\begin{frame}{Definitions}
  \begin{itemize}
    \item \textbf{Sim-Track \--} A track from a simulated electron both originating from the luminous region of CMS (beam-spot +- 5$\sigma$) and having $|\eta|<3.0$.
    \item \textbf{ECAL-Driven Seed \--} A seed created via a matching procedure between Super-Clusters and General Tracking Seeds (Either from \texttt{ElectronSeedProducer} or \texttt{ElectronNHitSeedProducer}). Must have $HOE<0.15$.
    \item \textbf{GSF Track \--} A track from GSF-Tracking resulting from an \textbf{ECAL-Driven Seed}
    % \item \textbf{Seeding Efficiency \--} The fraction of \textbf{Sim-Tracks} that have a matching \textbf{ECAL-Driven Seed} (based on simhit-rechit linkage or $\Delta R$ matching)
    \item \textbf{GSF Tracking Efficiency \--} The fraction of \textbf{Sim-Tracks} that have a matching \textbf{GSF Track} (based on $\Delta R$ matching)
    % \item \textbf{ECAL-Driven Seed Purity \--} The fraction of \textbf{ECAL-Driven Seeds} that have a matching \textbf{Sim-Track}
    \item \textbf{GSF Tracking Purity \--} The fraction of \textbf{GSF Tracks} that have a matching \textbf{Sim-Track}
    \item \textbf{GSF Tracking Fake Rate \--} The fraction of nontruth-matched Super-Clusters which result in at least one \textbf{GSF Track}.
  \end{itemize}
\end{frame}

% \begin{frame}{Previous status-quo}
%   \begin{columns}
%     \begin{column}{0.45\textwidth}
%       {\small
%       \begin{itemize}
%         \item In a previous presentation\footnotemark, I showed efficiency vs. purity for
%           \begin{itemize}
%             \item Old pair-match seeding (\texttt{ElectronSeedProducer})
%             \item New triplet seeding (\texttt{ElectronNHitSeedProducer}) for several choices of matching windows.
%           \end{itemize}
%         \item Performance of new seeding at the \texttt{wide} working point was comparable to old seeding in low-fake ($Z\rightarrow e^+e^-$) environment
%         \item Needed to validate performance in a high fake environment.
%       \end{itemize}
%     }
%     \end{column}
%     \begin{column}{0.6\textwidth}
%       \begin{figure}
%         \includegraphics[width=0.9\textwidth]{../common/figures/tracking_roc_curves_linear_plus_old_hoe.png}
%       \end{figure}
%     \end{column}
%   \end{columns}
% \footnotetext[1]{\tiny \url{https://indico.cern.ch/event/697077/contributions/2936039/attachments/1618649/2573874/main.pdf}}
% \end{frame}

\begin{frame}{Relative Performance \-- GSF Tracking Efficiency}
  \begin{columns}
    \begin{column}{0.5\textwidth}
      \begin{itemize}
        \item Figure shows GSF Tracking efficiency vs kinematic variables of the electron \texttt{SimTrack}
        \item Efficiency is comparable for both DY and $t\bar{t}$ environments and for both algorithms and working points.
        \item Largest differences appear at low $p_T$ and in the barrel/endcap transition region.
      \end{itemize}
    \end{column}
    \begin{column}{0.5\textwidth}
      \begin{figure}
        GSF Tracking Efficiency
        \includegraphics[width=1.0\textwidth]{live_figures/tracking_eff_dR.png}
      \end{figure}
    \end{column}
  \end{columns}
  \blfootnote{\tiny This and the following slide have been show before and are included for completeness}
\end{frame}

\begin{frame}{Relative Performance \-- GSF Track Purity}
  \begin{columns}
    \begin{column}{0.5\textwidth}
      \begin{itemize}
        \item Figure shows GSF Tracking purity vs kinematic variables of the \texttt{GSFTrack}
        \item Clearly purity is affected by the higher fake environment in the $t\bar{t}$ sample.
        \item Note how the \texttt{default} working point of the new seeding (red/pink) has significantly better purity than the \texttt{} working point or the old seeding.
      \end{itemize}
    \end{column}
    \begin{column}{0.5\textwidth}
      \begin{figure}
        GSF Tracking Purity
        \includegraphics[width=1.0\textwidth]{live_figures/tracking_pur_dR.png}
      \end{figure}
    \end{column}
  \end{columns}
\end{frame}

\begin{frame}{Relative Performance \-- GSF Tracking Fake Rate}
  \begin{columns}
    \begin{column}{0.5\textwidth}
      \begin{itemize}
        \item Figure shows GSF Tracking fake rate vs kinematic variables of the supercluster
        \item Supercluster must have $HOE<0.15$, so fake are presumably from mostly photons or $\pi^0$
        \item There is a clear reduction in the fake rate with respect to the old method in both the \texttt{default} and \texttt{wide} working points.
        \item Seen in both $Z\rightarrow ee$ and $t\bar{t}$
      \end{itemize}
    \end{column}
    \begin{column}{0.5\textwidth}
      \begin{figure}
        GSF Tracking Fake Rate
        \includegraphics[width=1.0\textwidth]{live_figures/fake_rate_no_e_match.png}
      \end{figure}
    \end{column}
  \end{columns}
\end{frame}

\begin{frame}{Relative Performance \-- Prompt Efficiency}
  \begin{columns}
    \begin{column}{0.4\textwidth}
      \begin{itemize}
        \item The fraction of prompt electrons that match a GSF-Track
        \item Biggest improvements, again, happen at low $p_T$ and in the barrel/endcap transition region
        \item Note the change in the first bin relative to the overall efficiency (Slide 5). Large non-prompt contribution at low $p_T$.
      \end{itemize}
    \end{column}
    \begin{column}{0.6\textwidth}
      \begin{figure}
        Prompt GSF Tracking Efficiency
        \includegraphics[width=1.0\textwidth]{live_figures/prompt_eff_dR.png}
      \end{figure}
    \end{column}
  \end{columns}
\end{frame}

\begin{frame}{Relative Performance \-- Seed Multiplicity}
  \begin{columns}
    \begin{column}{0.4\textwidth}
      \begin{itemize}
        \item A single supercluster can potentially produce many seeds if it matches with many nearby tracks, however only one of these can be from the electron.
        \item Reducing the number of overall seeds while still producing \emph{the} correct one is desirable from a computational perspective.
        \item The new seeding scheme (\texttt{wide} WP) reduces the number of seeds by a factor of 3.8 for $t\bar{t}$ and 5.6 for $Z\rightarrow ee$.
      \end{itemize}
    \end{column}
    \begin{column}{0.6\textwidth}
      \begin{figure}
        Number of Electron Seeds Per Event
        \includegraphics[width=1.0\textwidth]{live_figures/number_of_good_seeds.png}
      \end{figure}
    \end{column}
  \end{columns}
\end{frame}

\begin{frame}{Overall Performance}
  \begin{center}
    Integrating over all tracks with $p_T>20$GeV and $|\eta|<2.5$ yields the performance numbers below.
    \begin{figure}
      % Number of Electron Seeds Per Event
      \includegraphics[width=0.6\textwidth]{figures/eff_table.png}
    \end{figure}
    \begin{itemize}
      \item The HLT default settings (\texttt{new-default}) of the new pixel matching
          scheme yield non-trivially better purity at the loss of some efficiency
          with respect to both the old seeding and the \texttt{wide} working point.
      \item The \texttt{wide} working point of the new seeding matches the
          \texttt{old-seeding} within errors except for purity is $\approx 2$\%
          better in the $t\bar{t}$ sample
      \item Most likely better to choose the \texttt{wide} working point over the \texttt{default} one to get the gain in efficiency, and count on subsequent filters to compensate for the worse purity and fake rate.
    \end{itemize}
  \end{center}
\end{frame}

% ask for conclusion to project and find out
% - what changes need to be made
% - who is going to implement them

\begin{frame}{Conclusions \& Outlook}
  \begin{itemize}
    \item The new seeding algorithm has been optimized to have better or comparable performance to the current Offline seeding method(\texttt{old-default}) in all investigated metrics including
      \begin{itemize}
        \item GSF Tracking Efficiency
        \item GSF Tracking Purity
        \item GSF Tracking Fake Rate
        \item Number of Seeds
      \end{itemize}
    \item Unless there are objections, propose to move forward with implementing the new algorithm as the default in the next available CMSSW release.
  \end{itemize}
  \blfootnote{\tiny Analysis and ploting code is available at \url{https://git.fangmeier.tech/caleb/EGamma\_ElectronTrackingValidation}}
  \blfootnote{\tiny Additional plots are available at \url{https://eg.fangmeier.tech/seeding\_studies\_2018\_06\_20\_17/hists.html}}
\end{frame}

\appendix
\backupbegin%

\begin{frame}
  \begin{center}
    {\Huge BACKUP}
  \end{center}
\end{frame}

\begin{frame}{Overall Performance}
  \begin{columns}
    \begin{column}{0.5\textwidth}
      \begin{figure}
        GSF Tracking Performance (Hit Matched)
        \includegraphics[width=1.0\textwidth]{live_figures/tracking_roc_curve.png}
      \end{figure}
    \end{column}
    \begin{column}{0.5\textwidth}
      \begin{figure}
        GSF Tracking Performance ($\Delta R$ Matched)
        \includegraphics[width=1.0\textwidth]{live_figures/tracking_roc_curve_dR.png}
      \end{figure}
    \end{column}
  \end{columns}
\end{frame}

\begin{frame}{Matching Window Parameters}
\begin{table}[]
\centering
\begin{tabular}{@{}llrrrr@{}}
\toprule
&  & \textbf{narrow} & \textbf{default (HLT)} & \textbf{wide} & \textbf{extra-wide} \\ \midrule
Hit 1 & dPhiMaxHighEt & \textbf{0.025} & \textbf{0.05} & \textbf{0.1} & \textbf{0.15} \\
 & dPhiMaxHighEtThres & 20.0 & 20.0 & 20.0 & 20.0 \\
 & dPhiMaxLowEtGrad & -0.002 & -0.002 & -0.002 & -0.002 \\
 & dRzMaxHighEt & 9999.0 & 9999.0 & 9999.0 & 9999.0 \\
 & dRzMaxHighEtThres & 0.0 & 0.0 & 0.0 & 0.0 \\
 & dRzMaxLowEtGrad & 0.0 & 0.0 & 0.0 & 0.0 \\ \midrule
Hit 2 & dPhiMaxHighEt & \textbf{0.0015} & \textbf{0.003} & \textbf{0.006} & \textbf{0.009} \\
 & dPhiMaxHighEtThres & 0.0 & 0.0 & 0.0 & 0.0 \\
 & dPhiMaxLowEtGrad & 0.0 & 0.0 & 0.0 & 0.0 \\
 & dRzMaxHighEt & \textbf{0.025} & \textbf{0.05} & \textbf{0.1} & \textbf{0.15} \\
 & dRzMaxHighEtThres & 30.0 & 30.0 & 30.0 & 30.0 \\
 & dRzMaxLowEtGrad & -0.002 & -0.002 & -0.002 & -0.002 \\ \midrule
Hit 3+ & dPhiMaxHighEt & \textbf{0.0015} & \textbf{0.003} & \textbf{0.006} & \textbf{0.009} \\
 & dPhiMaxHighEtThres & 0.0 & 0.0 & 0.0 & 0.0 \\
 & dPhiMaxLowEtGrad & 0.0 & 0.0 & 0.0 & 0.0 \\
 & dRzMaxHighEt & \textbf{0.025} & \textbf{0.05} & \textbf{0.1} & \textbf{0.15} \\
 & dRzMaxHighEtThres & 30.0 & 30.0 & 30.0 & 30.0 \\
 & dRzMaxLowEtGrad & -0.002 & -0.002 & -0.002 & -0.002 \\ \bottomrule
\end{tabular}
\end{table}
\centering
\texttt{NHit} Seeding window parameters. Bold designates modified values.
\end{frame}

\begin{frame}{Samples}
  \begin{itemize}
    \item {\tiny /ZToEE\_NNPDF30\_13TeV-powheg\_M\_120\_200/RunIISummer17DRStdmix-NZSFlatPU28to62\_92X\_upgrade2017\_realistic\_v10-v1}

    \item {\tiny /TT\_TuneCUETP8M2T4\_13TeV-powheg-pythia8/RunIISummer17DRStdmix-NZSFlatPU28to62\_92X\_upgrade2017\_realistic\_v10-v2}
  \end{itemize}

\end{frame}

\backupend%

\end{document}