caleb
/
EGamma_ElectronTrackingValidation


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307
							
% rubber: module pdftex

\documentclass[english,aspectratio=43,8pt]{beamer}
\usepackage{graphicx}
\usepackage{amssymb}
\usepackage{booktabs}
\usepackage{siunitx}
\usepackage{subcaption}
\usepackage{marvosym}
\usepackage{verbatim}
\usepackage[normalem]{ulem}  % Needed for /sout

\newcommand{\pb}{\si{\pico\barn}}%
\newcommand{\fb}{\si{\femto\barn}}%
\newcommand{\invfb}{\si{\per\femto\barn}}
\newcommand{\GeV}{\si{\giga\electronvolt}}

\hypersetup{colorlinks=true,urlcolor=blue}

\usetheme[]{bjeldbak}

\newcommand{\backupbegin}{%
   \newcounter{finalframe}
   \setcounter{finalframe}{\value{framenumber}}
}
\newcommand{\backupend}{%
   \setcounter{framenumber}{\value{finalframe}}
}

\begin{document}

\title[$e$ Seeding Validation]{Offline Electron Seeding Validation \-- Update}
\author[C. Fangmeier]{\textbf{Caleb Fangmeier} \\ Ilya Kravchenko,  Greg Snow}
\institute[UNL]{University of Nebraska \-- Lincoln}
\date{EGM general meeting \textbf{CMS week} | April 18, 2018}

\titlegraphic{%
\begin{figure}
  \includegraphics[width=1in]{CMSlogo.png}\hspace{0.75in}\includegraphics[width=1in]{nebraska-n.png}
\end{figure}
}

\begin{frame}[plain]
  \titlepage%
\end{frame}

\begin{frame}{Introduction}
  \begin{itemize}
    \item Our goal is to study \textbf{seeding} for the \textbf{offline} GSF tracking with the \textbf{new pixel detector}.
    \item Specifically, we want to optimize the new pixel-matching scheme from HLT for use in off-line reconstruction.
    \item This Talk:
      \begin{itemize}
        \item Show performance comparison between old seeding two working points of the new seeding in fake-rich environment
          \begin{itemize}
            \item New Seeding working points: \texttt{narrow} (HLT default settings), and \texttt{wide} (double window sizes with respect to \texttt{narrow})
          \end{itemize}
        \item Show alternative efficiency/purity measurements using $\Delta R$ truth-matching between \texttt{SimTracks} and \texttt{GSFTracks}
      \end{itemize}
  \end{itemize}
\end{frame}


\begin{frame}{N-Hit Electron Seeding}
  \begin{columns}
  \begin{column}{0.5\textwidth}
    {\small
    \begin{enumerate}
      \item Using the beam spot, the SC position, and SC energy, propagate a path through the pixels.
      \item Require the first hit to be within a $\delta\phi$ and $\delta z$ window. ($\delta\phi$ and $\delta R$ for FPIX)
      \item $\delta z$ window for first hit is huge as SC and beam spot positions give very little information about $z$.
      \item Forget the SC position, and propagate a new track based on the vertex and first hit positions, and the SC energy.
      \item Progress one-by-one through the remaining hits in the seed and require each one fit within a specified window around the track.
      \item Quit when all hits are matched, or a hit falls outside the window. No skipping is allowed.
    \end{enumerate}
    }
  \end{column}
  \begin{column}{0.5\textwidth}
    \begin{figure}
      \includegraphics[width=0.9\textwidth]{../common/diagrams/seeding_step2.png}
    \end{figure}
    \begin{figure}
      \includegraphics[width=0.9\textwidth]{../common/diagrams/seeding_step3.png}
    \end{figure}
  \end{column}
  \end{columns}
\end{frame}

\begin{frame}{Definitions}
  \begin{itemize}
    \item \textbf{Sim-Track \--} A track from a simulated electron originating from the luminous region of CMS (beam-spot +- 5$\sigma$)
    \item \textbf{ECAL-Driven Seed \--} A seed created via a matching procedure between Super-Clusters and General Tracking Seeds (Either from \texttt{ElectronSeedProducer} or \texttt{ElectronNHitSeedProducer})
    \item \textbf{GSF Track \--} A track from GSF-Tracking resulting from an \textbf{ECAL-Driven Seed}
    % \item \textbf{Seeding Efficiency \--} The fraction of \textbf{Sim-Tracks} that have a matching \textbf{ECAL-Driven Seed} (based on simhit-rechit linkage or $\Delta R$ matching)
    \item \textbf{GSF Tracking Efficiency \--} The fraction of \textbf{Sim-Tracks} that have a matching \textbf{GSF Track} (again, based on simhit-rechit linkage or $\Delta R$ matching)
    % \item \textbf{ECAL-Driven Seed Purity \--} The fraction of \textbf{ECAL-Driven Seeds} that have a matching \textbf{Sim-Track}
    \item \textbf{GSF Tracking Purity \--} The fraction of \textbf{GSF Tracks} that have a matching \textbf{Sim-Track}
  \end{itemize}
\end{frame}

\begin{frame}{Previous status-quo}
  \begin{columns}
    \begin{column}{0.45\textwidth}
      {\small
      \begin{itemize}
        \item In a previous presentation\footnotemark, I showed efficiency vs. purity for
          \begin{itemize}
            \item Old pair-match seeding (\texttt{ElectronSeedProducer})
            \item New triplet seeding (\texttt{ElectronNHitSeedProducer}) for several choices of matching windows.
          \end{itemize}
        \item Performance of new seeding at the \texttt{wide} working point was comparable to old seeding in low-fake ($Z\rightarrow e^+e^-$) environment
        \item Needed to validate performance in a high fake environment.
      \end{itemize}
    }
    \end{column}
    \begin{column}{0.6\textwidth}
      \begin{figure}
        \includegraphics[width=0.9\textwidth]{../common/figures/tracking_roc_curves_linear_plus_old_hoe.png}
      \end{figure}
    \end{column}
  \end{columns}
\footnotetext[1]{\tiny \url{https://indico.cern.ch/event/697077/contributions/2936039/attachments/1618649/2573874/main.pdf}}
\end{frame}

\begin{frame}{Relative Performance - GSF Tracking Efficiency}
  \begin{columns}
    \begin{column}{0.5\textwidth}
      \begin{itemize}
        \item Figure shows GSF Tracking efficiency vs kinematic variables of the electron \texttt{SimTrack}
        \item Efficiency is more or less the same for both DY and $t\bar{t}$ environments and for both algorithms and working points.
        \item Largest (statistically significant) differences appear at low $p_T$ and in the barrel/endcap transition region.
      \end{itemize}
    \end{column}
    \begin{column}{0.5\textwidth}
      \begin{figure}
        GSF Tracking Efficiency
        \includegraphics[width=1.0\textwidth]{live_figures/tracking_eff_all.png}
      \end{figure}
    \end{column}
  \end{columns}
\end{frame}

\begin{frame}{Relative Performance - GSF Track Purity}
  \begin{columns}
    \begin{column}{0.5\textwidth}
      \begin{itemize}
        \item Figure shows GSF Tracking purity vs kinematic variables of the \texttt{GSFTrack}
        \item Clearly purity is affected by the higher fake environment in the $t\bar{t}$ sample.
        \item Note how the \texttt{narrow} working point of the new seeding (green) has significantly better purity than the \texttt{wide} working point or the old seeding.
        \item Purity loss at high $p_T$ is a feature of the shared-hits matching between \texttt{SimTracks} and \texttt{GSFTracks}.
      \end{itemize}
    \end{column}
    \begin{column}{0.5\textwidth}
      \begin{figure}
        GSF Tracking Purity
        \includegraphics[width=1.0\textwidth]{live_figures/tracking_pur_all.png}
      \end{figure}
    \end{column}
  \end{columns}
\end{frame}

\begin{frame}{$\Delta R$ Matching}
  \begin{columns}
    \begin{column}{0.5\textwidth}
      \begin{figure}
        GSF Tracking Efficiency ($\Delta R$ Matched)
        \includegraphics[width=1.0\textwidth]{live_figures/tracking_eff_all_dR.png}
      \end{figure}
    \end{column}
    \begin{column}{0.5\textwidth}
      \begin{figure}
        GSF Tracking Purity ($\Delta R$ Matched)
        \includegraphics[width=1.0\textwidth]{live_figures/tracking_pur_all_dR.png}
      \end{figure}
    \end{column}
  \end{columns}
  \begin{itemize}
    \item Previous efficiency/purity definitions based on shared tracker hits between \texttt{SimTracks} and \texttt{GSFTracks}.
    \item An alternative is to use simple $\Delta R<0.2$ matching.
    \item Overall numbers improve and purity no longer drops at high $p_T$.
  \end{itemize}
\end{frame}

\begin{frame}{Overall Performance}
  \begin{center}
    Integrating over all tracks with $p_T>20$GeV and $\eta<2.4$ yields the performance numbers below.
\begin{table}[]
  \centering
  \begin{tabular}{@{}llrr} \toprule
Sample & Algo & Efficiency ($\Delta R$ Matched) & Purity ($\Delta R$ Matched) \\ \midrule
$Z\rightarrow ee$ & \texttt{old-seeding} & $96.08\pm0.28\%$ & $99.54\pm0.29\%$ \\
                  & \texttt{narrow}      & $94.49\pm0.28\%$ & $99.72\pm0.29\%$ \\
                  & \texttt{wide}        & $96.00\pm0.28\%$ & $99.60\pm0.29\%$ \\
$t\bar{t}$        & \texttt{old-seeding} & $94.84\pm0.77\%$ & $57.49\pm0.60\%$ \\
                  & \texttt{narrow}      & $93.54\pm0.79\%$ & $65.84\pm0.67\%$ \\
                  & \texttt{wide}        & $95.06\pm0.77\%$ & $59.52\pm0.61\%$ \\
  \end{tabular}
\end{table}
\begin{itemize}
    \item The HLT default settings (\texttt{narrow}) of the new pixel matching
      scheme yield non-trivially better purity at the loss of some efficiency
      with respect to both the old seeding and the \texttt{wide} working point.
    \item The \texttt{wide} working point of the new seeding matches the
      \texttt{old-seeding} within errors except for purity is $\approx 2$\%
      better in the $t\bar{t}$ sample
\end{itemize}
  \end{center}
\end{frame}

\begin{frame}{Conclusions \& Outlook}
  \begin{itemize}
    \item The new seeding algorithm has been verified to perform as well as,
      and in some cases better, than the current pair seeding based on MC
      studies in both low and high purity environments.
    \item Now the question is which working point (\texttt{wide} or \texttt{narrow}) is preferable?
    \item Unless there are objections, propose to move forward with implementing the new algorithm as the default in the next available CMSSW release.
  \end{itemize}
\end{frame}

\appendix
\backupbegin

\begin{frame}
  \begin{center}
    {\Huge BACKUP}
  \end{center}
\end{frame}

\begin{frame}{Overall Performance}
  \begin{columns}
    \begin{column}{0.5\textwidth}
      \begin{figure}
        GSF Tracking Performance (Hit Matched)
        \includegraphics[width=1.0\textwidth]{live_figures/tracking_roc_curve.png}
      \end{figure}
    \end{column}
    \begin{column}{0.5\textwidth}
      \begin{figure}
        GSF Tracking Performance ($\Delta R$ Matched)
        \includegraphics[width=1.0\textwidth]{live_figures/tracking_roc_curve_dR.png}
      \end{figure}
    \end{column}
  \end{columns}
\end{frame}

\begin{frame}{Matching Window Parameters}
\begin{table}[]
\centering
\begin{tabular}{@{}llrrrr@{}}
\toprule
&  & \textbf{extra-narrow} & \textbf{narrow(HLT)} & \textbf{wide} & \textbf{extra-wide} \\ \midrule
Hit 1 & dPhiMaxHighEt & \textbf{0.025} & \textbf{0.05} & \textbf{0.1} & \textbf{0.15} \\
 & dPhiMaxHighEtThres & 20.0 & 20.0 & 20.0 & 20.0 \\
 & dPhiMaxLowEtGrad & -0.002 & -0.002 & -0.002 & -0.002 \\
 & dRzMaxHighEt & 9999.0 & 9999.0 & 9999.0 & 9999.0 \\
 & dRzMaxHighEtThres & 0.0 & 0.0 & 0.0 & 0.0 \\
 & dRzMaxLowEtGrad & 0.0 & 0.0 & 0.0 & 0.0 \\ \midrule
Hit 2 & dPhiMaxHighEt & \textbf{0.0015} & \textbf{0.003} & \textbf{0.006} & \textbf{0.009} \\
 & dPhiMaxHighEtThres & 0.0 & 0.0 & 0.0 & 0.0 \\
 & dPhiMaxLowEtGrad & 0.0 & 0.0 & 0.0 & 0.0 \\
 & dRzMaxHighEt & \textbf{0.025} & \textbf{0.05} & \textbf{0.1} & \textbf{0.15} \\
 & dRzMaxHighEtThres & 30.0 & 30.0 & 30.0 & 30.0 \\
 & dRzMaxLowEtGrad & -0.002 & -0.002 & -0.002 & -0.002 \\ \midrule
Hit 3+ & dPhiMaxHighEt & \textbf{0.0015} & \textbf{0.003} & \textbf{0.006} & \textbf{0.009} \\
 & dPhiMaxHighEtThres & 0.0 & 0.0 & 0.0 & 0.0 \\
 & dPhiMaxLowEtGrad & 0.0 & 0.0 & 0.0 & 0.0 \\
 & dRzMaxHighEt & \textbf{0.025} & \textbf{0.05} & \textbf{0.1} & \textbf{0.15} \\
 & dRzMaxHighEtThres & 30.0 & 30.0 & 30.0 & 30.0 \\
 & dRzMaxLowEtGrad & -0.002 & -0.002 & -0.002 & -0.002 \\ \bottomrule
\end{tabular}
\end{table}
\centering
\texttt{NHit} Seeding window parameters. Bold designates modified values.
\end{frame}

\begin{frame}{Overall Performance - Hit-Matching}
  \begin{center}
    Integrating over all tracks with $p_T>20$GeV and $\eta<2.4$ yields the performance numbers below.
\begin{table}[]
  \centering
  \begin{tabular}{@{}llrr} \toprule
Sample & Algo & Efficiency (Hit Matched) & Purity (Hit Matched) \\ \midrule
$Z\rightarrow ee$ & \texttt{old-seeding} & $88.05\pm0.28\%$ & $90.30\pm0.29\%$ \\
                  & \texttt{narrow}      & $86.63\pm0.28\%$ & $90.69\pm0.29\%$ \\
                  & \texttt{wide}        & $88.01\pm0.28\%$ & $90.43\pm0.29\%$ \\
$t\bar{t}$        & \texttt{old-seeding} & $88.06\pm0.77\%$ & $52.35\pm0.60\%$ \\
                  & \texttt{narrow}      & $86.89\pm0.79\%$ & $60.56\pm0.67\%$ \\
                  & \texttt{wide}        & $88.30\pm0.77\%$ & $54.38\pm0.61\%$ \\
  \end{tabular}
\end{table}
Note that the \texttt{wide} working point of the new seeding matches the \texttt{old-seeding} within errors except for purity is $\approx 2$\% better in the $t\bar{t}$ sample.
  \end{center}
\end{frame}

\begin{frame}{Samples}
  \begin{itemize}
    \item {\tiny /ZToEE\_NNPDF30\_13TeV-powheg\_M\_120\_200/RunIISummer17DRStdmix-NZSFlatPU28to62\_92X\_upgrade2017\_realistic\_v10-v1}

    \item {\tiny /TT\_TuneCUETP8M2T4\_13TeV-powheg-pythia8/RunIISummer17DRStdmix-NZSFlatPU28to62\_92X\_upgrade2017\_realistic\_v10-v2}
  \end{itemize}

\end{frame}

\backupend

\end{document}