/** * @file * @author Caleb Fangmeier * @version 0.1 * * @section LICENSE * * * MIT License * * Copyright (c) 2017 Caleb Fangmeier * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * @section DESCRIPTION * This header defines a set of generic classes that wrap up "values". In * essence, a Value object is just something that contains a value of type T * and can provide it when requested. The usefulness stems from composing * values together with calculations. This enables very clear dependency * mapping and a way to know clearly how every value was arrived at. This could * be used to, for example, automatically generate commentary for plots that * explain the exect calculation used to create it. Or easily making a series * of plots contrasting different values that have been composed slightly * differently. */ #ifndef value_hpp #define value_hpp #include #include #include #include #include #include #include #include #include #include #include "log.hpp" /** * The namespace containing all filval classes and functions. */ namespace fv{ bool in_register_function = false; template class Function; // undefined /** * Parent class to all Function classes. Holds a class-level collection of all * created function objects. */ class GenFunction { private: std::string name; std::string impl; public: /** * Static mapping of functions from their name to the object wrapper of * the function. */ inline static std::map function_registry; GenFunction(const std::string& name, const std::string& impl) :impl(impl), name(name){ } virtual ~GenFunction() { }; std::string& get_name(){ return name; } /** * Attempt to invoke clang-format for the purpose of printing out * nicely formatted functions to the log file. If clang-format is not * present, this function just passes through the code unmodified. */ static std::string format_code(const std::string& code){ std::stringstream code_out(""); std::string command("echo \""+code+"\" | clang-format"); char buffer[255]; FILE *stream = popen(command.c_str(), "r"); while (fgets(buffer, 255, stream) != NULL) code_out << buffer; if (pclose(stream) == 0) return code_out.str(); else return code; } static std::string summary(){ std::stringstream ss; ss << "The following functions have been registered" << std::endl; for(auto p : function_registry){ if (p.second == nullptr) continue; ss << "-->" << p.second->name << std::endl; ss << format_code(p.second->impl); } return ss.str(); } template static Function& register_function(const std::string& name, std::function f, const std::string& impl){ in_register_function = true; Function* func; if (GenFunction::function_registry[name] != nullptr){ func = dynamic_cast*>(GenFunction::function_registry[name]); if (func == nullptr){ ERROR("Trying to register function which has already been registerd with a different type"); } } else { func = new Function(name, impl, f); GenFunction::function_registry[name] = func; } in_register_function = false; return *func; } }; /** * In order to enable proper provenance tracking, and at the same time keep * the ability to embed functions into values, the Function class should be * used. It is simply a wrapper around a std::function that also has a name. * This name is used when generating the name of values that use the function. * A function name is automatically prepended with "func::" to explicitly state * that the value is the result of a computation encoded within the function * object, and not from some other Value object. Unfortunately, it is up to the * user to find where that function is defined in the source code to inspect * what it is doing. But hopefully this isn't too onerous by just using grep. */ template class Function : public GenFunction { private: std::function f; public: Function(const std::string& name, const std::string& impl, std::function f) :GenFunction(name, impl), f(f){ if (!in_register_function) { WARNING("Don't instantiate Function objects directly! Use register_function instead."); } } Function(const std::string& name, std::function f) :Function(name, "N/A", f){ } ~Function() { } R operator()(ArgTypes ...args){ return f(args...); } }; #define FUNC(f) f, #f /** * A type-agnostic value. * It is necessary to create a type-agnostic parent class to Value so that * it is possible to handle collections of them. GenValue also provides the * rest of the type-independent interface to Value. */ class GenValue; typedef std::map ValueSet; class GenValue{ private: /** * The name of the value. * This is used to allow for dynamic lookup of * values based on their name via GenValue::get_value. */ std::string name; protected: /** * Mark the internal value as invalid. This is needed for DerivedValue * to force a recalculation of the internal value when a new * observation is loaded into memory. It is called automatically for * all GenValue objects when reset is called. */ virtual void _reset() = 0; /** * A static mapping containing all created Value objects. * Every value object must have a unique name, and this name is used as * a key in values to that object. This is used to enable more dynamic * creation of objects as well as avoiding the uneccesary passing of * pointers. */ inline static std::map values; /** * Composite value names are typically nested. This makes complex * values have rather unwieldy names. Therefore, one can declare * aliases which allow for more human-usable names to be used. When a * value is requested by name, an alias with that value takes precidence * over a name with that value. */ inline static std::map aliases; public: GenValue(const std::string& name, const std::string& alias) :name(name){ values[name] = this; if (alias != "") GenValue::alias(alias, this); } const std::string& get_name(){ return name; } static void reset(){ for (auto val : values){ val.second->_reset(); } } static GenValue* get_value(const std::string& name){ if (aliases[name] != nullptr) return aliases[name]; else if (values[name] != nullptr) return values[name]; else{ ERROR("Could not find alias or value \"" << name << "\". I'll tell you the ones I know about." << std::endl << summary()); CRITICAL("Aborting... :(", -1); } } static void alias(const std::string& name, GenValue* value){ if (aliases[name] != nullptr){ WARNING("WARNING: alias \"" << name << "\" overrides previous entry."); } aliases[name] = value; } static GenValue* alias(const std::string& name){ if (values[name] != nullptr){ WARNING("Alias \"" << name << "\" does not exist."); } return aliases[name]; } static std::string summary(){ std::stringstream ss; ss << "The following values have been created: " << std::endl; for (auto value : values){ if (value.second == nullptr) continue; ss << "\t\"" << value.first << "\" at address " << value.second << std::endl; } ss << "And these aliases:" << std::endl; for (auto alias : aliases){ std::string orig("VOID"); if (alias.second == nullptr) continue; for (auto value : values){ if (alias.second == value.second){ orig = value.second->get_name(); break; } } ss << "\t\"" << alias.first << "\" referring to \"" << orig << "\"" << std::endl; } return ss.str(); } friend std::ostream& operator<<(std::ostream& os, const GenValue& gv); }; std::ostream& operator<<(std::ostream& os, GenValue& gv){ os << gv.get_name(); return os; } /** * A generic value. * In order to facilitate run-time creation of analysis routines, it is * necessary to have some ability to get and store *values*. Values can either * be directly taken from some original data source (i.e. ObservedValue), or * they can be a function of some other set of values (i.e. DerivedValue). They * template class T of Value is the type of thing that is returned upon * calling get_value(). */ template class Value : public GenValue{ public: Value(const std::string& name, const std::string& alias="") :GenValue(name, alias){ } /** Calculate, if necessary, and return the value held by this object. */ virtual T& get_value() = 0; }; /** * A generic, observed, value. * An ObservedValue is the interface to your dataset. Upon creation, an * ObservedValue is given a pointer to an object of type T. When an observation * is loaded into memory, the value at the location referenced by that pointer * must be updated with the associated data from that observation. This is the * responsibility of whatever DataSet implementation is being used. This object * then will read that data and return it when requested. */ template class ObservedValue : public Value{ private: T *val_ref; void _reset(){ } public: ObservedValue(const std::string& name, T* val_ref, const std::string& alias="") :Value(name, alias), val_ref(val_ref){ } T& get_value(){ return *val_ref; } }; /** * A generic, derived, value. * A DerivedValue is generally defined as some function of other Value objects. * For example, a Pair is a function of two other Value objects that makes a * pair of them. Note that these other Value objects are free to be either * ObservedValues or other DerivedValues. * * It is desireable from a performance standpoint that each DerivedValue be * calculated no more than once per observation. Therefore, when a get_value is * called on a DerivedValue, it first checks whether the value that it holds is * **valid**, meaning it has already been calculated for this observation. If * so, it simply returns the value. If not, the update_value function is called * to calculate the value. and then the newly calculated value is marked as * valid and returned. */ template class DerivedValue : public Value{ private: void _reset(){ value_valid = false; } protected: T value; bool value_valid; /** * Updates the internal value. * This function should be overridden by any child class to do the * actual work of updating value based on whatever rules the class * chooses. Normally, this consists of geting the values from some * associated Value objects, doing some calculation on them, and * storing the result in value. */ virtual void update_value() = 0; public: DerivedValue(const std::string& name, const std::string& alias="") :Value(name, alias), value_valid(false) { } T& get_value(){ if (!value_valid){ update_value(); value_valid = true; } return value; } }; /** * A std::vector wrapper around a C-style array. * In order to make some of the higher-level Value types easier to work with, * it is a good idea to wrap all arrays in the original data source with * std::vector objects. To do this, it is necessary to supply both a Value * object containing the array itself as well as another Value object * containing the size of that array. Currently, update_value will simply copy * the contents of the array into the interally held vector. * \todo avoid an unneccessary copy and set the vectors data directly. */ template class WrapperVector : public DerivedValue >{ private: Value* size; Value* data; void update_value(){ int n = size->get_value(); T* data_ref = data->get_value(); this->value.resize(n); for (int i=0; ivalue[i] = *(data_ref+i); } } public: WrapperVector(Value* size, Value* data, const std::string& alias="") :DerivedValue >("vectorOf("+size->get_name()+","+data->get_name()+")", alias), size(size), data(data){ } WrapperVector(const std::string &label_size, const std::string &label_data, const std::string& alias="") :WrapperVector(dynamic_cast*>(GenValue::get_value(label_size)), dynamic_cast*>(GenValue::get_value(label_data)), alias) { } }; /** * Creates a std::pair type from a two other Value objects. */ template class Pair : public DerivedValue >{ protected: std::pair*, Value* > value_pair; void update_value(){ this->value.first = value_pair.first->get_value(); this->value.second = value_pair.second->get_value(); } public: Pair(Value *value1, Value *value2, const std::string alias="") :DerivedValue >("pair("+value1->get_name()+","+value2->get_name()+")", alias), value_pair(value1, value2){ } Pair(const std::string& label1, const std::string& label2, const std::string alias="") :Pair(dynamic_cast*>(GenValue::get_value(label1)), dynamic_cast*>(GenValue::get_value(label2)), alias){ } }; /** * Takes a set of four Value > objects and a function of four Ts * and returns a std::vector. This is used in, for instance, calculating the * energy of a set of particles when one has separate arrays containing pt, * eta, phi, and mass. These arrays are first wrapped up in VectorWrappers and * then passes along with a function to calculate the energy into a ZipMapFour. * The result of this calculation is a new vector containing the energy for * each particle. Note that if the input vectors are not all the same size, * calculations are only performed up to the size of the shortest. * \see MiniTreeDataSet * \todo find way to implement for arbitrary number(and possibly type) of * vector inputs. */ template class ZipMapFour : public DerivedValue >{ private: Function& f; Value >* v1; Value >* v2; Value >* v3; Value >* v4; void update_value(){ std::vector v1_val = v1->get_value(); std::vector v2_val = v2->get_value(); std::vector v3_val = v3->get_value(); std::vector v4_val = v4->get_value(); int n; std::tie(n, std::ignore) = std::minmax({v1_val.size(), v2_val.size(), v3_val.size(), v4_val.size()}); this->value.resize(n); for (int i=0; ivalue[i] = f(v1_val[i], v2_val[i], v3_val[i], v4_val[i]); } } public: ZipMapFour(Function& f, Value >* v1, Value >* v2, Value >* v3, Value >* v4, const std::string alias="") :DerivedValue >("zipmap("+f.get_name()+":"+v1->get_name()+","+v2->get_name()+","+ v3->get_name()+","+v4->get_name()+")", alias), f(f), v1(v1), v2(v2), v3(v3), v4(v4) { } ZipMapFour(Function& f, const std::string& label1, const std::string& label2, const std::string& label3, const std::string& label4, const std::string alias="") :ZipMapFour(f, dynamic_cast >*>(GenValue::get_value(label1)), dynamic_cast >*>(GenValue::get_value(label2)), dynamic_cast >*>(GenValue::get_value(label3)), dynamic_cast >*>(GenValue::get_value(label4)), alias){ } }; /** * Reduce a Value of type vector to just a T. * This is useful functionality to model, for instance, calculating the maximum * element of a vector, or a the mean. See child classes for specific * implementations. */ template class Reduce : public DerivedValue{ private: Function)>& reduce; Value >* v; void update_value(){ this->value = reduce(v->get_value()); } public: Reduce(Function)>& reduce, Value >* v, const std::string alias="") :DerivedValue("reduceWith("+reduce.get_name()+":"+v->get_name()+")", alias), reduce(reduce), v(v) { } Reduce(Function)>& reduce, const std::string& v_name, const std::string alias="") :Reduce(reduce, dynamic_cast >*>(GenValue::get_value(v_name)), alias) { } }; /** * Find and return the maximum value of a vector. */ template class Max : public Reduce{ public: Max(const std::string& v_name, const std::string alias="") :Reduce(GenFunction::register_function)>("max", FUNC(([](std::vector vec){ return *std::max_element(vec.begin(), vec.end());}))), v_name, alias) { } }; /** * Find and return the minimum value of a vector. */ template class Min : public Reduce{ public: Min(const std::string& v_name, const std::string alias="") :Reduce(new Function)>("min", [](std::vector vec){ return *std::min_element(vec.begin(), vec.end());}), v_name, alias) { } }; /** * Calculate the mean value of a vector. */ template class Mean : public Reduce{ public: Mean(const std::string& v_name, const std::string alias="") :Reduce(new Function)>("mean", [](std::vector vec){ int n = 0; T sum = 0; for (T e : vec){ n++; sum += e; } return n>0 ? sum / n : 0; }), v_name, alias) { } }; /** * Extract the element at a specific index from a vector. */ template class ElementOf : public Reduce{ public: ElementOf(Value* index, const std::string& v_name, const std::string alias="") :Reduce(new Function)>("elementOf", [index](std::vector vec){return vec[index->get_value()];}), v_name, alias) { } ElementOf(const std::string& name, int index, const std::string& v_name, const std::string alias="") :Reduce(name, [index](std::vector vec){return vec[index];}, v_name, alias) { } }; /** * Similar to Reduce, but returns a pair of a T and an int. * This is useful if you need to know where in the vector exists the element * being returned. */ template class ReduceIndex : public DerivedValue >{ private: Function(std::vector)>& reduce; Value >* v; void update_value(){ this->value = reduce(v->get_value()); } public: ReduceIndex(Function(std::vector)>& reduce, Value >* v, const std::string alias="") :DerivedValue("reduceIndexWith("+reduce.get_name()+":"+v->get_name()+")", alias), reduce(reduce), v(v) { } ReduceIndex(Function(std::vector)>& reduce, const std::string& v_name, const std::string alias="") :ReduceIndex(reduce, dynamic_cast >*>(GenValue::get_value(v_name)), alias) { } }; /** * Find and return the maximum value of a vector and its index. */ template class MaxIndex : public ReduceIndex{ public: MaxIndex(const std::string& v_name, const std::string alias="") :ReduceIndex(GenFunction::register_function)>("maxIndex", FUNC(([](std::vector vec){ auto elptr = std::max_element(vec.begin(), vec.end()); return std::pair(*elptr, int(elptr-vec.begin())); } ))), v_name, alias) { } }; /** * Find and return the minimum value of a vector and its index. */ template class MinIndex : public ReduceIndex{ public: MinIndex(const std::string& v_name, const std::string alias="") :ReduceIndex(GenFunction::register_function)>("minIndex", FUNC(([](std::vector vec){ auto elptr = std::min_element(vec.begin(), vec.end()); return std::pair(*elptr, int(elptr-vec.begin())); } ))), v_name, alias) { } }; /** * A generic value owning only a function object. * All necessary values upon which this value depends must be bound to the * function object. */ template class BoundValue : public DerivedValue{ protected: Function& f; void update_value(){ this->value = f(); } public: BoundValue(Function& f, const std::string alias="") :DerivedValue(f.get_name()+"()", alias), f(f) { } }; /** * A Value of a pointer. The pointer is constant, however the data the pointer * points to is variable. */ template class PointerValue : public DerivedValue{ protected: void update_value(){ } public: PointerValue(const std::string& name, T* ptr, const std::string alias="") :DerivedValue(name, alias){ this->value = ptr; } }; /** * A Value which always returns the same value, supplied in the constructor. */ template class ConstantValue : public DerivedValue{ protected: T const_value; void update_value(){ this->value = const_value; } public: ConstantValue(const std::string& name, T const_value, const std::string alias="") :DerivedValue("const::"+name, alias), const_value(const_value) { } }; } #endif // value_hpp