Sfoglia il codice sorgente

Fixes issues while running batch jobs

  - tmp file for globbing inputs would get clobbered by parallel jobs,
    now each job gets it's own temp file based on the glob path. (this
    is imperfect, but works for now)
  - Nicer printing of current file that limits the number of chars
    printed. Also option to append w/ a '\n' instead of a '\r' for
    capturing output in a file. (like w/ condor)
Caleb 6 anni fa
parent
commit
e91fbb1a90
2 ha cambiato i file con 29 aggiunte e 7 eliminazioni
  1. 7 3
      include/config.hpp
  2. 22 4
      include/dataset.hpp

+ 7 - 3
include/config.hpp

@@ -39,6 +39,7 @@
 #include <sstream>
 
 #include <regex>
+#include <functional>
 
 #include "yaml-cpp/yaml.h"
 
@@ -93,17 +94,20 @@ namespace fv_util {
 
 
     std::vector<std::string> glob(const std::string& base) {
+        std::hash<std::string> s_hash;
         std::stringstream ss;
-        ss << "ls -1 " << base << " > __tmp__";
+        std::stringstream tmp_filename;
+        tmp_filename << "__tmp" << s_hash(base) << "__";
+        ss << "ls -1 " << base << " > " << tmp_filename.str();
         system(ss.str().c_str());
 
         std::vector<std::string> filenames;
-        std::ifstream f("__tmp__");
+        std::ifstream f(tmp_filename.str());
         std::string line;
         while (std::getline(f, line)) {
             filenames.push_back(line);
         }
-        std::remove("__tmp__");
+        std::remove(tmp_filename.str().c_str());
         return filenames;
     }
 

+ 22 - 4
include/dataset.hpp

@@ -31,6 +31,7 @@
 #ifndef dataset_hpp
 #define dataset_hpp
 #include <iostream>
+#include <sstream>
 #include <functional>
 
 #include <sys/time.h>
@@ -68,6 +69,8 @@ namespace fv {
 
         timeval start_time;
         bool stop_requested;
+        bool silent;
+        bool batch;
 
         void print_status() {
             size_t m_used = fv_util::getCurrentRSS() / 1024 / 1024;
@@ -92,8 +95,20 @@ namespace fv {
             if (secs_remaining > 0) {
                 time_remaining << secs_remaining << "S";
             }
-            std::cout << "\rprocessing event: " << current_event + 1 << "/" << get_events()
-                      << " of file: " << get_current_file().filename
+            if (batch) {
+                std::cout << "\n";
+            } else {
+                std::cout << "\r";
+            }
+
+            std::string filename = get_current_file().filename;
+            if (filename.size() > 80) {
+                std::stringstream ss;
+                ss << filename.substr(0,27) << "..." << filename.substr(filename.size() - 50);
+                filename = ss.str();
+            }
+            std::cout << "processing event: " << current_event + 1 << "/" << get_events()
+                      << " of file: " << filename
                       << ", " << m_used <<  "MB used "
                       << ", " << time_remaining.str() << " est. time remaining"
                       << std::flush;
@@ -122,8 +137,11 @@ namespace fv {
 
     public:
         DataSet() : stop_requested(false){
-            max_events = fv_util::the_config->get_max_events();
+            using fv_util::the_config;
+            max_events = the_config->get_max_events();
             the_dataset = this;
+            silent = the_config->get("silent").as<bool>(false);
+            batch = the_config->get("batch").as<bool>(false);
 
             signal(SIGINT, request_stop_callback);
             signal(SIGTERM, request_stop_callback);
@@ -133,7 +151,7 @@ namespace fv {
             if (stop_requested) return false;
             int current_event = get_current_event();
             if (current_event == 0) gettimeofday(&start_time, nullptr);
-            if (verbose and (((current_event + 1) % 500) == 0 or current_event+1 == get_events())) print_status();
+            if (!silent and (((current_event + 1) % 500) == 0 or current_event+1 == get_events())) print_status();
             GenValue::reset();
             return load_next();
         }