value.hpp 23 KB


  1. /**
  2. * @file
  3. * @author Caleb Fangmeier <caleb@fangmeier.tech>
  4. * @version 0.1
  5. *
  6. * @section LICENSE
  7. *
  8. *
  9. * MIT License
  10. *
  11. * Copyright (c) 2017 Caleb Fangmeier
  12. *
  13. * Permission is hereby granted, free of charge, to any person obtaining a copy
  14. * of this software and associated documentation files (the "Software"), to deal
  15. * in the Software without restriction, including without limitation the rights
  16. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  17. * copies of the Software, and to permit persons to whom the Software is
  18. * furnished to do so, subject to the following conditions:
  19. *
  20. * The above copyright notice and this permission notice shall be included in all
  21. * copies or substantial portions of the Software.
  22. *
  23. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  24. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  25. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  26. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  27. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  28. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  29. * SOFTWARE.
  30. *
  31. * @section DESCRIPTION
  32. * This header defines a set of generic classes that wrap up "values". In
  33. * essence, a Value<T> object is just something that contains a value of type T
  34. * and can provide it when requested. The usefulness stems from composing
  35. * values together with calculations. This enables very clear dependency
  36. * mapping and a way to know clearly how every value was arrived at. This could
  37. * be used to, for example, automatically generate commentary for plots that
  38. * explain the exect calculation used to create it. Or easily making a series
  39. * of plots contrasting different values that have been composed slightly
  40. * differently.
  41. */
  42. #ifndef value_hpp
  43. #define value_hpp
  44. #include <iostream>
  45. #include <sstream>
  46. #include <utility>
  47. #include <algorithm>
  48. #include <map>
  49. #include <vector>
  50. #include <tuple>
  51. #include <initializer_list>
  52. #include <functional>
  53. #include "log.hpp"
  54. /**
  55. * The namespace containing all filval classes and functions.
  56. */
  57. namespace fv{
  58. bool in_register_function = false;
  59. template<typename> class Function; // undefined
  60. /**
  61. * Parent class to all Function classes. Holds a class-level collection of all
  62. * created function objects.
  63. */
  64. class GenFunction {
  65. private:
  66. std::string name;
  67. std::string impl;
  68. protected:
  69. /* virtual void _() = 0; */
  70. public:
  71. /**
  72. * Static mapping of functions from their name to the object wrapper of
  73. * the function.
  74. */
  75. inline static std::map<const std::string, GenFunction*> function_registry;
  76. GenFunction(const std::string& name, const std::string& impl)
  77. :impl(impl),
  78. name(name){
  79. }
  80. virtual ~GenFunction() { };
  81. std::string& get_name(){
  82. return name;
  83. }
  84. static std::string summary(){
  85. std::stringstream ss;
  86. for(auto p : function_registry){
  87. if (p.second == nullptr) continue;
  88. ss << p.second->name << std::endl;
  89. ss << "\t" << p.second->impl << std::endl;
  90. ss << "##############################" << std::endl;
  91. }
  92. return ss.str();
  93. }
  94. /* template <typename R, typename... ArgTypes> */
  95. template <typename T>
  96. static Function<T>& register_function(const std::string& name, std::function<T> f, const std::string& impl){
  97. in_register_function = true;
  98. Function<T>* func;
  99. if (GenFunction::function_registry[name] != nullptr){
  100. func = dynamic_cast<Function<T>*>(GenFunction::function_registry[name]);
  101. if (func == nullptr){
  102. ERROR("Trying to register function which has already been registerd with a different type");
  103. }
  104. } else {
  105. func = new Function<T>(name, impl, f);
  106. GenFunction::function_registry[name] = func;
  107. }
  108. in_register_function = false;
  109. return *func;
  110. }
  111. /* static Function<R(ArgTypes...)>& register_function(const std::string& name, std::function<R(ArgTypes...)> f){ */
  112. /* return Function<R(ArgTypes...)>::register_function(name, f, "N/A"); */
  113. /* } */
  114. };
  115. /**
  116. * In order to enable proper provenance tracking, and at the same time keep
  117. * the ability to embed functions into values, the Function class should be
  118. * used. It is simply a wrapper around a std::function that also has a name.
  119. * This name is used when generating the name of values that use the function.
  120. * A function name is automatically prepended with "func::" to explicitly state
  121. * that the value is the result of a computation encoded within the function
  122. * object, and not from some other Value object. Unfortunately, it is up to the
  123. * user to find where that function is defined in the source code to inspect
  124. * what it is doing. But hopefully this isn't too onerous by just using grep.
  125. */
  126. template <typename R, typename... ArgTypes>
  127. class Function<R(ArgTypes...)> : public GenFunction {
  128. private:
  129. std::function<R(ArgTypes...)> f;
  130. public:
  131. Function(const std::string& name, const std::string& impl, std::function<R(ArgTypes...)> f)
  132. :GenFunction(name, impl), f(f){
  133. if (!in_register_function) {
  134. WARNING("Don't instantiate Function objects directly! Use register_function instead.");
  135. }
  136. }
  137. Function(const std::string& name, std::function<R(ArgTypes...)> f)
  138. :Function(name, "N/A", f){ }
  139. ~Function() { }
  140. R operator()(ArgTypes ...args){
  141. return f(args...);
  142. }
  143. };
  144. #define FUNC(f) f, #f
  145. /**
  146. * A type-agnostic value.
  147. * It is necessary to create a type-agnostic parent class to Value so that
  148. * it is possible to handle collections of them. GenValue also provides the
  149. * rest of the type-independent interface to Value.
  150. */
  151. class GenValue;
  152. typedef std::map<std::string, GenValue*> ValueSet;
  153. class GenValue{
  154. private:
  155. /**
  156. * The name of the value.
  157. * This is used to allow for dynamic lookup of
  158. * values based on their name via GenValue::get_value.
  159. */
  160. std::string name;
  161. protected:
  162. /**
  163. * Mark the internal value as invalid. This is needed for DerivedValue
  164. * to force a recalculation of the internal value when a new
  165. * observation is loaded into memory. It is called automatically for
  166. * all GenValue objects when reset is called.
  167. */
  168. virtual void _reset() = 0;
  169. /**
  170. * A static mapping containing all created Value objects.
  171. * Every value object must have a unique name, and this name is used as
  172. * a key in values to that object. This is used to enable more dynamic
  173. * creation of objects as well as avoiding the uneccesary passing of
  174. * pointers.
  175. */
  176. inline static std::map<const std::string, GenValue*> values;
  177. inline static std::map<const std::string, GenValue*> aliases;
  178. public:
  179. GenValue(const std::string& name)
  180. :name(name){
  181. values[name] = this;
  182. }
  183. const std::string& get_name(){
  184. return name;
  185. }
  186. static void reset(){
  187. for (auto val : values){
  188. val.second->_reset();
  189. }
  190. }
  191. static GenValue* get_value(const std::string& name){
  192. if (aliases[name] != nullptr)
  193. return aliases[name];
  194. else if (values[name] != nullptr)
  195. return values[name];
  196. else{
  197. ERROR("Could not find alias or value \"" << name << "\". I'll tell you the ones I know about."
  198. << summary());
  199. CRITICAL("Aborting... :(", -1);
  200. }
  201. }
  202. static void alias(const std::string& name, GenValue* value){
  203. if (aliases[name] != nullptr){
  204. WARNING("WARNING: alias \"" << name << "\" overrides previous entry.");
  205. }
  206. aliases[name] = value;
  207. }
  208. static GenValue* alias(const std::string& name){
  209. if (values[name] != nullptr){
  210. WARNING("Alias \"" << name << "\" does not exist.");
  211. }
  212. return aliases[name];
  213. }
  214. static std::string summary(){
  215. std::stringstream ss;
  216. ss << "The following values have been created: " << std::endl;
  217. for (auto value : values){
  218. if (value.second == nullptr) continue;
  219. ss << "\t\"" << value.first << "\" at address " << value.second << std::endl;
  220. }
  221. ss << "And these aliases:" << std::endl;
  222. for (auto alias : aliases){
  223. std::string orig("VOID");
  224. if (alias.second == nullptr) continue;
  225. for (auto value : values){
  226. if (alias.second == value.second){
  227. orig = value.second->get_name();
  228. break;
  229. }
  230. }
  231. ss << "\t\"" << alias.first << "\" referring to \"" << orig << "\"" << std::endl;
  232. }
  233. return ss.str();
  234. }
  235. };
  236. /**
  237. * A generic value.
  238. * In order to facilitate run-time creation of analysis routines, it is
  239. * necessary to have some ability to get and store *values*. Values can either
  240. * be directly taken from some original data source (i.e. ObservedValue), or
  241. * they can be a function of some other set of values (i.e. DerivedValue). They
  242. * template class T of Value<T> is the type of thing that is returned upon
  243. * calling get_value().
  244. */
  245. template <typename T>
  246. class Value : public GenValue{
  247. public:
  248. Value(const std::string& name)
  249. :GenValue(name){ }
  250. /** Calculate, if necessary, and return the value held by this object.
  251. */
  252. virtual T& get_value() = 0;
  253. };
  254. /**
  255. * A generic, observed, value.
  256. * An ObservedValue is the interface to your dataset. Upon creation, an
  257. * ObservedValue is given a pointer to an object of type T. When an observation
  258. * is loaded into memory, the value at the location referenced by that pointer
  259. * must be updated with the associated data from that observation. This is the
  260. * responsibility of whatever DataSet implementation is being used. This object
  261. * then will read that data and return it when requested.
  262. */
  263. template <typename T>
  264. class ObservedValue : public Value<T>{
  265. private:
  266. T *val_ref;
  267. void _reset(){ }
  268. public:
  269. ObservedValue(const std::string& name, T* val_ref)
  270. :Value<T>(name),
  271. val_ref(val_ref){ }
  272. T& get_value(){
  273. return *val_ref;
  274. }
  275. };
  276. /**
  277. * A generic, derived, value.
  278. * A DerivedValue is generally defined as some function of other Value objects.
  279. * For example, a Pair is a function of two other Value objects that makes a
  280. * pair of them. Note that these other Value objects are free to be either
  281. * ObservedValues or other DerivedValues.
  282. *
  283. * It is desireable from a performance standpoint that each DerivedValue be
  284. * calculated no more than once per observation. Therefore, when a get_value is
  285. * called on a DerivedValue, it first checks whether the value that it holds is
  286. * **valid**, meaning it has already been calculated for this observation. If
  287. * so, it simply returns the value. If not, the update_value function is called
  288. * to calculate the value. and then the newly calculated value is marked as
  289. * valid and returned.
  290. */
  291. template <typename T>
  292. class DerivedValue : public Value<T>{
  293. private:
  294. void _reset(){
  295. value_valid = false;
  296. }
  297. protected:
  298. T value;
  299. bool value_valid;
  300. /**
  301. * Updates the internal value.
  302. * This function should be overridden by any child class to do the
  303. * actual work of updating value based on whatever rules the class
  304. * chooses. Normally, this consists of geting the values from some
  305. * associated Value objects, doing some calculation on them, and
  306. * storing the result in value.
  307. */
  308. virtual void update_value() = 0;
  309. public:
  310. DerivedValue(const std::string& name)
  311. :Value<T>(name),
  312. value_valid(false) { }
  313. T& get_value(){
  314. if (!value_valid){
  315. update_value();
  316. value_valid = true;
  317. }
  318. return value;
  319. }
  320. };
  321. /**
  322. * A std::vector wrapper around a C-style array.
  323. * In order to make some of the higher-level Value types easier to work with,
  324. * it is a good idea to wrap all arrays in the original data source with
  325. * std::vector objects. To do this, it is necessary to supply both a Value
  326. * object containing the array itself as well as another Value object
  327. * containing the size of that array. Currently, update_value will simply copy
  328. * the contents of the array into the interally held vector.
  329. * \todo avoid an unneccessary copy and set the vectors data directly.
  330. */
  331. template <typename T>
  332. class WrapperVector : public DerivedValue<std::vector<T> >{
  333. private:
  334. Value<int>* size;
  335. Value<T*>* data;
  336. void update_value(){
  337. int n = size->get_value();
  338. T* data_ref = data->get_value();
  339. this->value.resize(n);
  340. for (int i=0; i<n; i++){
  341. this->value[i] = *(data_ref+i);
  342. }
  343. }
  344. public:
  345. WrapperVector(Value<int>* size, Value<T*>* data)
  346. :DerivedValue<std::vector<T> >("vectorOf("+size->get_name()+","+data->get_name()+")"),
  347. size(size), data(data){ }
  348. WrapperVector(const std::string &label_size, const std::string &label_data)
  349. :WrapperVector(dynamic_cast<Value<int>*>(GenValue::values.at(label_size)),
  350. dynamic_cast<Value<T*>*>(GenValue::values.at(label_data))) { }
  351. };
  352. /**
  353. * Creates a std::pair type from a two other Value objects.
  354. */
  355. template <typename T1, typename T2>
  356. class Pair : public DerivedValue<std::pair<T1, T2> >{
  357. protected:
  358. std::pair<Value<T1>*, Value<T2>* > value_pair;
  359. void update_value(){
  360. this->value.first = value_pair.first->get_value();
  361. this->value.second = value_pair.second->get_value();
  362. }
  363. public:
  364. Pair(Value<T1> *value1, Value<T2> *value2)
  365. :DerivedValue<std::pair<T1, T2> >("pair("+value1->get_name()+","+value2->get_name()+")"),
  366. value_pair(value1, value2){ }
  367. Pair(const std::string& label1, const std::string& label2)
  368. :Pair(dynamic_cast<Value<T1>*>(GenValue::values.at(label1)),
  369. dynamic_cast<Value<T1>*>(GenValue::values.at(label2))){ }
  370. };
  371. /**
  372. * Takes a set of four Value<std::vector<T> > objects and a function of four Ts
  373. * and returns a std::vector<R>. This is used in, for instance, calculating the
  374. * energy of a set of particles when one has separate arrays containing pt,
  375. * eta, phi, and mass. These arrays are first wrapped up in VectorWrappers and
  376. * then passes along with a function to calculate the energy into a ZipMapFour.
  377. * The result of this calculation is a new vector containing the energy for
  378. * each particle. Note that if the input vectors are not all the same size,
  379. * calculations are only performed up to the size of the shortest.
  380. * \see MiniTreeDataSet
  381. * \todo find way to implement for arbitrary number(and possibly type) of
  382. * vector inputs.
  383. */
  384. template <typename R, typename T>
  385. class ZipMapFour : public DerivedValue<std::vector<R> >{
  386. private:
  387. Function<R(T, T, T, T)>& f;
  388. Value<std::vector<T> >* v1;
  389. Value<std::vector<T> >* v2;
  390. Value<std::vector<T> >* v3;
  391. Value<std::vector<T> >* v4;
  392. void update_value(){
  393. std::vector<T> v1_val = v1->get_value();
  394. std::vector<T> v2_val = v2->get_value();
  395. std::vector<T> v3_val = v3->get_value();
  396. std::vector<T> v4_val = v4->get_value();
  397. int n;
  398. std::tie(n, std::ignore) = std::minmax({v1_val.size(), v2_val.size(), v3_val.size(), v4_val.size()});
  399. this->value.resize(n);
  400. for (int i=0; i<n; i++){
  401. this->value[i] = f(v1_val[i], v2_val[i], v3_val[i], v4_val[i]);
  402. }
  403. }
  404. public:
  405. ZipMapFour(Function<R(T, T, T, T)>& f,
  406. Value<std::vector<T> >* v1, Value<std::vector<T> >* v2,
  407. Value<std::vector<T> >* v3, Value<std::vector<T> >* v4)
  408. :DerivedValue<std::vector<R> >("zipmap("+f.get_name()+":"+v1->get_name()+","+v2->get_name()+","+v3->get_name()+","+v4->get_name()+")"),
  409. f(f), v1(v1), v2(v2), v3(v3), v4(v4) { }
  410. ZipMapFour(Function<R(T, T, T, T)>* f,
  411. const std::string &label1, const std::string &label2,
  412. const std::string &label3, const std::string &label4)
  413. :ZipMapFour(f,
  414. dynamic_cast<Value<std::vector<T> >*>(GenValue::values.at(label1)),
  415. dynamic_cast<Value<std::vector<T> >*>(GenValue::values.at(label2)),
  416. dynamic_cast<Value<std::vector<T> >*>(GenValue::values.at(label3)),
  417. dynamic_cast<Value<std::vector<T> >*>(GenValue::values.at(label4))){ }
  418. };
  419. /**
  420. * Reduce a Value of type vector<T> to just a T.
  421. * This is useful functionality to model, for instance, calculating the maximum
  422. * element of a vector, or a the mean. See child classes for specific
  423. * implementations.
  424. */
  425. template <typename T>
  426. class Reduce : public DerivedValue<T>{
  427. private:
  428. Function<T(std::vector<T>)>& reduce;
  429. Value<std::vector<T> >* v;
  430. void update_value(){
  431. this->value = reduce(v->get_value());
  432. }
  433. public:
  434. Reduce(Function<T(std::vector<T>)>& reduce, Value<std::vector<T> >* v)
  435. :DerivedValue<T>("reduceWith("+reduce.get_name()+":"+v->get_name()+")"),
  436. reduce(reduce), v(v) { }
  437. Reduce(Function<T(std::vector<T>)>& reduce, const std::string& v_name)
  438. :Reduce(reduce, dynamic_cast<Value<std::vector<T> >*>(GenValue::get_value(v_name))) { }
  439. };
  440. /**
  441. * Find and return the maximum value of a vector.
  442. */
  443. template <typename T>
  444. class Max : public Reduce<T>{
  445. public:
  446. Max(const std::string& v_name)
  447. :Reduce<T>(GenFunction::register_function<T(std::vector<T>)>("max",
  448. FUNC(([](std::vector<T> vec){
  449. return *std::max_element(vec.begin(), vec.end());}))),
  450. v_name) { }
  451. };
  452. /**
  453. * Find and return the minimum value of a vector.
  454. */
  455. template <typename T>
  456. class Min : public Reduce<T>{
  457. public:
  458. Min(const std::string& v_name)
  459. :Reduce<T>(new Function<T(std::vector<T>)>("min", [](std::vector<T> vec){
  460. return *std::min_element(vec.begin(), vec.end());}),
  461. v_name) { }
  462. };
  463. /**
  464. * Calculate the mean value of a vector.
  465. */
  466. template <typename T>
  467. class Mean : public Reduce<T>{
  468. public:
  469. Mean(const std::string& v_name)
  470. :Reduce<T>(new Function<T(std::vector<T>)>("mean", [](std::vector<T> vec){
  471. int n = 0; T sum = 0;
  472. for (T e : vec){ n++; sum += e; }
  473. return n>0 ? sum / n : 0; }),
  474. v_name) { }
  475. };
  476. /**
  477. * Extract the element at a specific index from a vector.
  478. */
  479. template <typename T>
  480. class ElementOf : public Reduce<T>{
  481. public:
  482. ElementOf(Value<int>* index, const std::string& v_name)
  483. :Reduce<T>(new Function<T(std::vector<T>)>("elementOf", [index](std::vector<T> vec){return vec[index->get_value()];}),
  484. v_name) { }
  485. ElementOf(const std::string& name, int index, const std::string& v_name)
  486. :Reduce<T>(name, [index](std::vector<T> vec){return vec[index];}, v_name) { }
  487. };
  488. /**
  489. * Similar to Reduce, but returns a pair of a T and an int.
  490. * This is useful if you need to know where in the vector exists the element
  491. * being returned.
  492. */
  493. template <typename T>
  494. class ReduceIndex : public DerivedValue<std::pair<T, int> >{
  495. private:
  496. Function<std::pair<T,int>(std::vector<T>)>& reduce;
  497. Value<std::vector<T> >* v;
  498. void update_value(){
  499. this->value = reduce(v->get_value());
  500. }
  501. public:
  502. ReduceIndex(Function<std::pair<T,int>(std::vector<T>)>& reduce, Value<std::vector<T> >* v)
  503. :DerivedValue<T>("reduceIndexWith("+reduce.get_name()+":"+v->get_name()+")"),
  504. reduce(reduce), v(v) { }
  505. ReduceIndex(Function<std::pair<T,int>(std::vector<T>)>& reduce, const std::string& v_name)
  506. :ReduceIndex(reduce, dynamic_cast<Value<std::vector<T> >*>(GenValue::get_value(v_name))) { }
  507. };
  508. /**
  509. * Find and return the maximum value of a vector and its index.
  510. */
  511. template <typename T>
  512. class MaxIndex : public ReduceIndex<T>{
  513. public:
  514. MaxIndex(const std::string& v_name)
  515. :ReduceIndex<T>(GenFunction::register_function<T(std::vector<T>)>("maxIndex",
  516. FUNC(([](std::vector<T> vec){
  517. auto elptr = std::max_element(vec.begin(), vec.end());
  518. return std::pair<T,int>(*elptr, int(elptr-vec.begin())); }
  519. ))), v_name) { }
  520. };
  521. /**
  522. * Find and return the minimum value of a vector and its index.
  523. */
  524. template <typename T>
  525. class MinIndex : public ReduceIndex<T>{
  526. public:
  527. MinIndex(const std::string& v_name)
  528. :ReduceIndex<T>(GenFunction::register_function<T(std::vector<T>)>("minIndex",
  529. FUNC(([](std::vector<T> vec){
  530. auto elptr = std::min_element(vec.begin(), vec.end());
  531. return std::pair<T,int>(*elptr, int(elptr-vec.begin())); }
  532. ))), v_name) { }
  533. };
  534. /**
  535. * A generic value owning only a function object.
  536. * All necessary values upon which this value depends must be bound to the
  537. * function object.
  538. */
  539. template <typename T>
  540. class BoundValue : public DerivedValue<T>{
  541. protected:
  542. Function<T()>& f;
  543. void update_value(){
  544. this->value = f();
  545. }
  546. public:
  547. BoundValue(Function<T()>& f)
  548. :DerivedValue<T>(f.get_name()+"(<bound>)"),
  549. f(f) { }
  550. };
  551. /**
  552. * A Value of a pointer. The pointer is constant, however the data the pointer
  553. * points to is variable.
  554. */
  555. template <typename T>
  556. class PointerValue : public DerivedValue<T*>{
  557. protected:
  558. void update_value(){ }
  559. public:
  560. PointerValue(const std::string& name, T* ptr)
  561. :DerivedValue<T*>(name){
  562. this->value = ptr;
  563. }
  564. };
  565. /**
  566. * A Value which always returns the same value, supplied in the constructor.
  567. */
  568. template <typename T>
  569. class ConstantValue : public DerivedValue<T>{
  570. protected:
  571. T const_value;
  572. void update_value(){
  573. this->value = const_value;
  574. }
  575. public:
  576. ConstantValue(const std::string& name, T const_value)
  577. :DerivedValue<T>("const::"+name),
  578. const_value(const_value) { }
  579. };
  580. }
  581. #endif // value_hpp