Kaynağa Gözat

Adds DataFileDescriptor for keeping track on multiple input files and
associated changes to incorporate this class into the dataset class

Caleb Fangmeier 7 yıl önce
ebeveyn
işleme
62559fb459
3 değiştirilmiş dosya ile 129 ekleme ve 39 silme
  1. 78 0
      datafile.hpp
  2. 12 1
      dataset.hpp
  3. 39 38
      root/dataset.hpp

+ 78 - 0
datafile.hpp

@@ -0,0 +1,78 @@
+/**
+ * @file
+ * @author  Caleb Fangmeier <caleb@fangmeier.tech>
+ * @version 0.1
+ *
+ * @section LICENSE
+ *
+ *
+ * MIT License
+ *
+ * Copyright (c) 2017 Caleb Fangmeier
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * @section DESCRIPTION
+ */
+#ifndef datafile_hpp
+#define datafile_hpp
+
+#include <vector>
+#include <string>
+#include <fstream>
+#include <iostream>
+
+namespace fv::util{
+
+struct DataFileDescriptor{
+    std::string filename;
+    std::string label;
+    std::string category; //TODO: Populate this
+
+    DataFileDescriptor(const std::string filename, const std::string label)
+      :filename(filename), label(label),category("") { }
+
+    DataFileDescriptor(const std::string line):category(""){
+        std::string::size_type col_pos = line.find(":");
+        if(col_pos == std::string::npos){
+            this->filename = line;
+            this->label = "";
+        } else{
+            this->filename = line.substr(0, col_pos);
+            this->label = line.substr(col_pos+1);
+        }
+    }
+
+};
+
+// input list should be a file containing line-delineated filename for all Data
+// Files with an optional label separated by a comma.
+std::vector<DataFileDescriptor> read_input_list(const std::string &filename){
+    std::ifstream istrm(filename, std::ios::in);
+
+    std::string line;
+    std::vector<DataFileDescriptor> dfds;
+    std::string prefix = filename.substr(0, filename.find_last_of("/")+1);
+    while(std::getline(istrm, line)){
+        dfds.push_back(DataFileDescriptor(prefix+line));
+    }
+    return dfds;
+}
+}
+#endif // datafile_hpp

+ 12 - 1
dataset.hpp

@@ -55,6 +55,7 @@ class DataSet{
         }
 
         Value<int>* current_event_number;
+        int max_events;
 
     protected:
         ContainerSet containers;
@@ -78,7 +79,7 @@ class DataSet{
             return impl_map;
         }
     public:
-        DataSet(){
+        DataSet():max_events(0){
             auto& event_check = GenFunction::reg_func<int()>("event_number",
                 FUNC(([ds=this](){
                     return ds->get_current_event();
@@ -90,6 +91,7 @@ class DataSet{
             int events, current_event;
             summary();
             events = get_events();
+            if (max_events > 0) events = max_events;
             if (!silent) std::cout << std::endl;
             while( load_next() ){
                 current_event = get_current_event();
@@ -98,10 +100,19 @@ class DataSet{
                 for(auto con : containers){
                     con.second->fill();
                 }
+                if(max_events && current_event+1 >= max_events) break;
             }
             if (!silent) std::cout << " Finished!" << std::endl;
         }
 
+        void set_max_events(const int& max_events){
+            this->max_events = max_events;
+        }
+
+        int get_max_events(){
+            return this->max_events;
+        }
+
         virtual void save_all(){
             for(auto container : containers)
                 container.second->save();

+ 39 - 38
root/dataset.hpp

@@ -47,10 +47,12 @@ template<typename TREE_CLASS>
 class TreeDataSet : public DataSet{
     private:
         // Maps filenames to data category. Either "signal" or "background"
-        std::map<std::string,std::string> input_categories;
+        /* std::map<std::string,std::string> input_categories; */
         // Maps filenames to data label, eg. "TTTT", or "TTZ"
         std::map<std::string,std::string> input_labels;
+        std::map<std::string,std::string> input_categories;
         std::string output_filename;
+        std::vector<fv::util::DataFileDescriptor> dfds;
         TFile* output_file;
         TREE_CLASS* tree_obj;
         long next_entry;
@@ -96,58 +98,57 @@ class TreeDataSet : public DataSet{
         }
 
     public:
-        TreeDataSet(const std::string& output_filename,
-                    const std::string& input_filename,
-                    const std::string& data_label,
-                    const std::string& tree_name="tree")
+
+        /* TreeDataSet(const std::string& output_filename, */
+        /*             const std::vector<std::string>& input_filenames, */
+        /*             const std::string& data_label, */
+        /*             const std::string& tree_name="tree") */
+        /*   :DataSet(), */
+        /*    input_labels({ {input_filename, data_label} }), */
+        /*    output_filename(output_filename), */
+        /*    next_entry(0) { */
+        /*     TChain* chain = new TChain(tree_name.c_str()); */
+        /*     chain->Add(input_filename.c_str()); */
+        /*     tree_obj = new TREE_CLASS(chain); */
+        /*     nentries = tree_obj->fChain->GetEntries(); */
+        /*     output_file = TFile::Open(output_filename.c_str(), "RECREATE"); */
+        /*     tree_obj->fChain->SetBranchStatus("*", false); */
+        /*   } */
+
+        /* void print_branch_statuses(){ */
+        /*     TObjArray* obj_arr = tree_obj->fChain->GetListOfBranches(); */
+        /*     for(TIter iter=obj_arr->begin(); iter!= obj_arr->end(); ++iter){ */
+        /*         TBranch* br = (TBranch*)*iter; */
+        /*         std::cout << br->GetName() << ": " */
+        /*             << tree_obj->fChain->GetBranchStatus(br->GetName()) << std::endl; */
+        /*     } */
+
+        /* } */
+
+        TreeDataSet(const std::string& output_filename, const std::vector<fv::util::DataFileDescriptor>& dfds,
+                    const std::string& tree_name)
           :DataSet(),
-           input_categories({ {input_filename, "signal"} }),
-           input_labels({ {input_filename, data_label} }),
+           dfds(dfds),
            output_filename(output_filename),
            next_entry(0) {
             TChain* chain = new TChain(tree_name.c_str());
-            chain->Add(input_filename.c_str());
+            for(auto& dfd : dfds){
+                chain->Add(dfd.filename.c_str());
+                input_labels[dfd.filename] = dfd.label;
+                input_categories[dfd.filename] = dfd.category;
+            }
             tree_obj = new TREE_CLASS(chain);
             nentries = tree_obj->fChain->GetEntries();
             output_file = TFile::Open(output_filename.c_str(), "RECREATE");
             tree_obj->fChain->SetBranchStatus("*", false);
-            /* print_branch_statuses(); */
           }
 
-        void print_branch_statuses(){
-            TObjArray* obj_arr = tree_obj->fChain->GetListOfBranches();
-            for(TIter iter=obj_arr->begin(); iter!= obj_arr->end(); ++iter){
-                TBranch* br = (TBranch*)*iter;
-                std::cout << br->GetName() << ": " 
-                    << tree_obj->fChain->GetBranchStatus(br->GetName()) << std::endl;
-            }
-
-        }
-
-        // TODO: Rewrite this constructor
-        /* MiniTreeDataSet(const std::string& output_filename, const std::map<std::string,std::string>& filenames_with_labels) */
-        /*   :DataSet(), */
-        /*    input_categories(filenames_with_labels), */
-        /*    output_filename(output_filename), */
-        /*    next_entry(0) { */
-        /*     TChain* chain = new TChain("tree"); */
-        /*     for(auto& p : filenames_with_labels){ */
-        /*         std::string filename; */
-        /*         std::tie(filename, std::ignore) = p; */
-        /*         chain->Add(filename.c_str()); */
-        /*     } */
-        /*     Init(chain); */
-        /*     nentries = fChain->GetEntries(); */
-        /*     output_file = TFile::Open(output_filename.c_str(), "RECREATE"); */
-        /*     this->fChain->SetBranchStatus("*", false); */
-        /*   } */
-
         ~TreeDataSet(){
             save_event_count_and_xsection();
             output_file->Close();
         }
 
-        const std::string& get_current_event_label() const{
+        const std::string& get_current_event_category() const{
             TFile* file = tree_obj->fChain->GetFile();
             std::string filename = file->GetName();
             return input_categories.at(filename);