value.hpp 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619
  1. /**
  2. * @file
  3. * @author Caleb Fangmeier <caleb@fangmeier.tech>
  4. * @version 0.1
  5. *
  6. * @section LICENSE
  7. *
  8. *
  9. * MIT License
  10. *
  11. * Copyright (c) 2017 Caleb Fangmeier
  12. *
  13. * Permission is hereby granted, free of charge, to any person obtaining a copy
  14. * of this software and associated documentation files (the "Software"), to deal
  15. * in the Software without restriction, including without limitation the rights
  16. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  17. * copies of the Software, and to permit persons to whom the Software is
  18. * furnished to do so, subject to the following conditions:
  19. *
  20. * The above copyright notice and this permission notice shall be included in all
  21. * copies or substantial portions of the Software.
  22. *
  23. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  24. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  25. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  26. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  27. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  28. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  29. * SOFTWARE.
  30. *
  31. * @section DESCRIPTION
  32. * This header defines a set of generic classes that wrap up "values". In
  33. * essence, a Value<T> object is just something that contains a value of type T
  34. * and can provide it when requested. The usefulness stems from composing
  35. * values together with calculations. This enables very clear dependency
  36. * mapping and a way to know clearly how every value was arrived at. This could
  37. * be used to, for example, automatically generate commentary for plots that
  38. * explain the exect calculation used to create it. Or easily making a series
  39. * of plots contrasting different values that have been composed slightly
  40. * differently.
  41. */
  42. #ifndef value_hpp
  43. #define value_hpp
  44. #include <iostream>
  45. #include <sstream>
  46. #include <utility>
  47. #include <algorithm>
  48. #include <map>
  49. #include <vector>
  50. #include <tuple>
  51. #include <initializer_list>
  52. #include <functional>
  53. #include "log.hpp"
  54. /**
  55. * The namespace containing all filval classes and functions.
  56. */
  57. namespace filval{
  58. /**
  59. * Parent class to all Function classes. Holds a class-level collection of all
  60. * created function objects.
  61. */
  62. class GenFunction {
  63. protected:
  64. std::string name;
  65. std::string impl;
  66. /**
  67. * Static mapping of functions from their name to the object wrapper of
  68. * the function.
  69. */
  70. inline static std::map<std::string, GenFunction*> function_registry;
  71. public:
  72. GenFunction(const std::string& name, const std::string& impl)
  73. :name("func::"+name),impl(impl){
  74. function_registry[name] = this;
  75. }
  76. std::string& get_name(){
  77. return name;
  78. }
  79. static std::string& summary(){
  80. std::stringstream ss;
  81. for(auto p : function_registry){
  82. ss << p.first << std::endl;
  83. ss << p.second->impl << std::endl;
  84. ss << "*****************************************" << std::endl;
  85. }
  86. }
  87. };
  88. /**
  89. * In order to enable proper provenance tracking, and at the same time keep
  90. * the ability to embed functions into values, the Function class should be
  91. * used. It is simply a wrapper around a std::function that also has a name.
  92. * This name is used when generating the name of values that use the function.
  93. * A function name is automatically prepended with "func::" to explicitly state
  94. * that the value is the result of a computation encoded within the function
  95. * object, and not from some other Value object. Unfortunately, it is up to the
  96. * user to find where that function is defined in the source code to inspect
  97. * what it is doing. But hopefully this isn't too onerous by just using grep.
  98. */
  99. template<typename> class Function; // undefined
  100. template <typename R, typename... ArgTypes>
  101. class Function<R(ArgTypes...)> : public GenFunction {
  102. private:
  103. std::function<R(ArgTypes...)> f;
  104. public:
  105. Function(const std::string& name, const std::string& impl, std::function<R(ArgTypes...)> f)
  106. :GenFunction(name, impl), f(f){ }
  107. Function(const std::string& name, std::function<R(ArgTypes...)> f)
  108. :Function(name, "N/A", f){ }
  109. R operator()(ArgTypes ...args){
  110. return f(args...);
  111. }
  112. };
  113. #define FUNC(t, n, f) Function<t>(n, #f, f)
  114. /**
  115. * A type-agnostic value.
  116. * It is necessary to create a type-agnostic parent class to Value so that
  117. * it is possible to handle collections of them. GenValue also provides the
  118. * rest of the type-independent interface to Value.
  119. */
  120. class GenValue;
  121. typedef std::map<std::string, GenValue*> ValueSet;
  122. class GenValue{
  123. private:
  124. /**
  125. * The name of the value.
  126. * This is used to allow for dynamic lookup of
  127. * values based on their name via GenValue::get_value.
  128. */
  129. std::string name;
  130. protected:
  131. /**
  132. * Mark the internal value as invalid. This is needed for DerivedValue
  133. * to force a recalculation of the internal value when a new
  134. * observation is loaded into memory. It is called automatically for
  135. * all GenValue objects when reset is called.
  136. */
  137. virtual void _reset() = 0;
  138. /**
  139. * A static mapping containing all created Value objects.
  140. * Every value object must have a unique name, and this name is used as
  141. * a key in values to that object. This is used to enable more dynamic
  142. * creation of objects as well as avoiding the uneccesary passing of
  143. * pointers.
  144. */
  145. inline static std::map<const std::string, GenValue*> values;
  146. inline static std::map<const std::string, GenValue*> aliases;
  147. public:
  148. GenValue(const std::string& name)
  149. :name(name){
  150. values[name] = this;
  151. }
  152. const std::string& get_name(){
  153. return name;
  154. }
  155. static void reset(){
  156. for (auto val : values){
  157. val.second->_reset();
  158. }
  159. }
  160. static GenValue* get_value(const std::string& name){
  161. if (aliases[name] != nullptr)
  162. return aliases[name];
  163. else if (values[name] != nullptr)
  164. return values[name];
  165. else{
  166. ERROR("Could not find alias or value \"" << name << "\". I'll tell you the ones I know about."
  167. << summary());
  168. CRITICAL("Aborting... :(", -1);
  169. }
  170. }
  171. static void alias(const std::string& name, GenValue* value){
  172. if (aliases[name] != nullptr){
  173. WARNING("WARNING: alias \"" << name << "\" overrides previous entry.");
  174. }
  175. aliases[name] = value;
  176. }
  177. static GenValue* alias(const std::string& name){
  178. if (values[name] != nullptr){
  179. WARNING("Alias \"" << name << "\" does not exist.");
  180. }
  181. return aliases[name];
  182. }
  183. static std::string summary(){
  184. std::stringstream ss;
  185. ss << "The following values have been created: " << std::endl;
  186. for (auto value : values){
  187. if (value.second == nullptr) continue;
  188. ss << "\t\"" << value.first << "\" at address " << value.second << std::endl;
  189. }
  190. ss << "And these aliases:" << std::endl;
  191. for (auto alias : aliases){
  192. std::string orig("VOID");
  193. if (alias.second == nullptr) continue;
  194. for (auto value : values){
  195. if (alias.second == value.second){
  196. orig = value.second->get_name();
  197. break;
  198. }
  199. }
  200. ss << "\t\"" << alias.first << "\" referring to \"" << orig << "\"" << std::endl;
  201. }
  202. return ss.str();
  203. }
  204. };
  205. /**
  206. * A generic value.
  207. * In order to facilitate run-time creation of analysis routines, it is
  208. * necessary to have some ability to get and store *values*. Values can either
  209. * be directly taken from some original data source (i.e. ObservedValue), or
  210. * they can be a function of some other set of values (i.e. DerivedValue). They
  211. * template class T of Value<T> is the type of thing that is returned upon
  212. * calling get_value().
  213. */
  214. template <typename T>
  215. class Value : public GenValue{
  216. public:
  217. Value(const std::string& name)
  218. :GenValue(name){ }
  219. /** Calculate, if necessary, and return the value held by this object.
  220. */
  221. virtual T& get_value() = 0;
  222. };
  223. /**
  224. * A generic, observed, value.
  225. * An ObservedValue is the interface to your dataset. Upon creation, an
  226. * ObservedValue is given a pointer to an object of type T. When an observation
  227. * is loaded into memory, the value at the location referenced by that pointer
  228. * must be updated with the associated data from that observation. This is the
  229. * responsibility of whatever DataSet implementation is being used. This object
  230. * then will read that data and return it when requested.
  231. */
  232. template <typename T>
  233. class ObservedValue : public Value<T>{
  234. private:
  235. T *val_ref;
  236. void _reset(){ }
  237. public:
  238. ObservedValue(const std::string& name, T* val_ref)
  239. :Value<T>(name),
  240. val_ref(val_ref){ }
  241. T& get_value(){
  242. return *val_ref;
  243. }
  244. };
  245. /**
  246. * A generic, derived, value.
  247. * A DerivedValue is generally defined as some function of other Value objects.
  248. * For example, a Pair is a function of two other Value objects that makes a
  249. * pair of them. Note that these other Value objects are free to be either
  250. * ObservedValues or other DerivedValues.
  251. *
  252. * It is desireable from a performance standpoint that each DerivedValue be
  253. * calculated no more than once per observation. Therefore, when a get_value is
  254. * called on a DerivedValue, it first checks whether the value that it holds is
  255. * **valid**, meaning it has already been calculated for this observation. If
  256. * so, it simply returns the value. If not, the update_value function is called
  257. * to calculate the value. and then the newly calculated value is marked as
  258. * valid and returned.
  259. */
  260. template <typename T>
  261. class DerivedValue : public Value<T>{
  262. private:
  263. void _reset(){
  264. value_valid = false;
  265. }
  266. protected:
  267. T value;
  268. bool value_valid;
  269. /**
  270. * Updates the internal value.
  271. * This function should be overridden by any child class to do the
  272. * actual work of updating value based on whatever rules the class
  273. * chooses. Normally, this consists of geting the values from some
  274. * associated Value objects, doing some calculation on them, and
  275. * storing the result in value.
  276. */
  277. virtual void update_value() = 0;
  278. public:
  279. DerivedValue(const std::string& name)
  280. :Value<T>(name),
  281. value_valid(false) { }
  282. T& get_value(){
  283. if (!value_valid){
  284. update_value();
  285. value_valid = true;
  286. }
  287. return value;
  288. }
  289. };
  290. /**
  291. * A std::vector wrapper around a C-style array.
  292. * In order to make some of the higher-level Value types easier to work with,
  293. * it is a good idea to wrap all arrays in the original data source with
  294. * std::vector objects. To do this, it is necessary to supply both a Value
  295. * object containing the array itself as well as another Value object
  296. * containing the size of that array. Currently, update_value will simply copy
  297. * the contents of the array into the interally held vector.
  298. * \todo avoid an unneccessary copy and set the vectors data directly.
  299. */
  300. template <typename T>
  301. class WrapperVector : public DerivedValue<std::vector<T> >{
  302. private:
  303. Value<int>* size;
  304. Value<T*>* data;
  305. void update_value(){
  306. int n = size->get_value();
  307. T* data_ref = data->get_value();
  308. this->value.resize(n);
  309. for (int i=0; i<n; i++){
  310. this->value[i] = *(data_ref+i);
  311. }
  312. }
  313. public:
  314. WrapperVector(Value<int>* size, Value<T*>* data)
  315. :DerivedValue<std::vector<T> >("vectorOf("+size->get_name()+","+data->get_name()+")"),
  316. size(size), data(data){ }
  317. WrapperVector(const std::string &label_size, const std::string &label_data)
  318. :WrapperVector(dynamic_cast<Value<int>*>(GenValue::values.at(label_size)),
  319. dynamic_cast<Value<T*>*>(GenValue::values.at(label_data))) { }
  320. };
  321. /**
  322. * Creates a std::pair type from a two other Value objects.
  323. */
  324. template <typename T1, typename T2>
  325. class Pair : public DerivedValue<std::pair<T1, T2> >{
  326. protected:
  327. std::pair<Value<T1>*, Value<T2>* > value_pair;
  328. void update_value(){
  329. this->value.first = value_pair.first->get_value();
  330. this->value.second = value_pair.second->get_value();
  331. }
  332. public:
  333. Pair(Value<T1> *value1, Value<T2> *value2)
  334. :DerivedValue<std::pair<T1, T2> >("pair("+value1->get_name()+","+value2->get_name()+")"),
  335. value_pair(value1, value2){ }
  336. Pair(const std::string& label1, const std::string& label2)
  337. :Pair(dynamic_cast<Value<T1>*>(GenValue::values.at(label1)),
  338. dynamic_cast<Value<T1>*>(GenValue::values.at(label2))){ }
  339. };
  340. /**
  341. * Takes a set of four Value<std::vector<T> > objects and a function of four Ts
  342. * and returns a std::vector<R>. This is used in, for instance, calculating the
  343. * energy of a set of particles when one has separate arrays containing pt,
  344. * eta, phi, and mass. These arrays are first wrapped up in VectorWrappers and
  345. * then passes along with a function to calculate the energy into a ZipMapFour.
  346. * The result of this calculation is a new vector containing the energy for
  347. * each particle. Note that if the input vectors are not all the same size,
  348. * calculations are only performed up to the size of the shortest.
  349. * \see MiniTreeDataSet
  350. * \todo find way to implement for arbitrary number(and possibly type) of
  351. * vector inputs.
  352. */
  353. template <typename R, typename T>
  354. class ZipMapFour : public DerivedValue<std::vector<R> >{
  355. private:
  356. Function<R(T, T, T, T)> f;
  357. Value<std::vector<T> >* v1;
  358. Value<std::vector<T> >* v2;
  359. Value<std::vector<T> >* v3;
  360. Value<std::vector<T> >* v4;
  361. void update_value(){
  362. std::vector<T> v1_val = v1->get_value();
  363. std::vector<T> v2_val = v2->get_value();
  364. std::vector<T> v3_val = v3->get_value();
  365. std::vector<T> v4_val = v4->get_value();
  366. int n;
  367. std::tie(n, std::ignore) = std::minmax({v1_val.size(), v2_val.size(), v3_val.size(), v4_val.size()});
  368. this->value.resize(n);
  369. for (int i=0; i<n; i++){
  370. this->value[i] = f(v1_val[i], v2_val[i], v3_val[i], v4_val[i]);
  371. }
  372. }
  373. public:
  374. ZipMapFour(Function<R(T, T, T, T)> f,
  375. Value<std::vector<T> >* v1, Value<std::vector<T> >* v2,
  376. Value<std::vector<T> >* v3, Value<std::vector<T> >* v4)
  377. :DerivedValue<std::vector<R> >("zipmap("+f.get_name()+":"+v1->get_name()+","+v2->get_name()+","+v3->get_name()+","+v4->get_name()+")"),
  378. f(f), v1(v1), v2(v2), v3(v3), v4(v4) { }
  379. ZipMapFour(Function<R(T, T, T, T)> f,
  380. const std::string &label1, const std::string &label2,
  381. const std::string &label3, const std::string &label4)
  382. :ZipMapFour(f,
  383. dynamic_cast<Value<std::vector<T> >*>(GenValue::values.at(label1)),
  384. dynamic_cast<Value<std::vector<T> >*>(GenValue::values.at(label2)),
  385. dynamic_cast<Value<std::vector<T> >*>(GenValue::values.at(label3)),
  386. dynamic_cast<Value<std::vector<T> >*>(GenValue::values.at(label4))){ }
  387. };
  388. /**
  389. * Reduce a Value of type vector<T> to just a T.
  390. * This is useful functionality to model, for instance, calculating the maximum
  391. * element of a vector, or a the mean. See child classes for specific
  392. * implementations.
  393. */
  394. template <typename T>
  395. class Reduce : public DerivedValue<T>{
  396. private:
  397. Function<T(std::vector<T>)> reduce;
  398. Value<std::vector<T> >* v;
  399. void update_value(){
  400. this->value = reduce(v->get_value());
  401. }
  402. public:
  403. Reduce(Function<T(std::vector<T>)> reduce, Value<std::vector<T> >* v)
  404. :DerivedValue<T>("reduceWith("+reduce.get_name()+":"+v->get_name()+")"),
  405. reduce(reduce), v(v) { }
  406. Reduce(Function<T(std::vector<T>)> reduce, const std::string& v_name)
  407. :Reduce(reduce, dynamic_cast<Value<std::vector<T> >*>(GenValue::get_value(v_name))) { }
  408. };
  409. /**
  410. * Find and return the maximum value of a vector.
  411. */
  412. template <typename T>
  413. class Max : public Reduce<T>{
  414. public:
  415. Max(const std::string& v_name)
  416. :Reduce<T>(Function<T(std::vector<T>)>("max", [](std::vector<T> vec){
  417. return *std::max_element(vec.begin(), vec.end());}),
  418. v_name) { }
  419. };
  420. /**
  421. * Find and return the minimum value of a vector.
  422. */
  423. template <typename T>
  424. class Min : public Reduce<T>{
  425. public:
  426. Min(const std::string& v_name)
  427. :Reduce<T>(Function<T(std::vector<T>)>("min", [](std::vector<T> vec){
  428. return *std::min_element(vec.begin(), vec.end());}),
  429. v_name) { }
  430. };
  431. /**
  432. * Calculate the mean value of a vector.
  433. */
  434. template <typename T>
  435. class Mean : public Reduce<T>{
  436. public:
  437. Mean(const std::string& v_name)
  438. :Reduce<T>(Function<T(std::vector<T>)>("mean", [](std::vector<T> vec){
  439. int n = 0; T sum = 0;
  440. for (T e : vec){ n++; sum += e; }
  441. return n>0 ? sum / n : 0; }),
  442. v_name) { }
  443. };
  444. /**
  445. * Extract the element at a specific index from a vector.
  446. */
  447. template <typename T>
  448. class ElementOf : public Reduce<T>{
  449. public:
  450. ElementOf(Value<int>* index, const std::string& v_name)
  451. :Reduce<T>(Function<T(std::vector<T>)>("elementOf", [index](std::vector<T> vec){return vec[index->get_value()];}),
  452. v_name) { }
  453. ElementOf(const std::string& name, int index, const std::string& v_name)
  454. :Reduce<T>(name, [index](std::vector<T> vec){return vec[index];}, v_name) { }
  455. };
  456. /**
  457. * Similar to Reduce, but returns a pair of a T and an int.
  458. * This is useful if you need to know where in the vector exists the element
  459. * being returned.
  460. */
  461. template <typename T>
  462. class ReduceIndex : public DerivedValue<std::pair<T, int> >{
  463. private:
  464. Function<std::pair<T,int>(std::vector<T>)> reduce;
  465. Value<std::vector<T> >* v;
  466. void update_value(){
  467. this->value = reduce(v->get_value());
  468. }
  469. public:
  470. ReduceIndex(Function<std::pair<T,int>(std::vector<T>)> reduce, Value<std::vector<T> >* v)
  471. :DerivedValue<T>("reduceIndexWith("+reduce.get_name()+":"+v->get_name()+")"),
  472. reduce(reduce), v(v) { }
  473. ReduceIndex(Function<std::pair<T,int>(std::vector<T>)> reduce, const std::string& v_name)
  474. :ReduceIndex(reduce, dynamic_cast<Value<std::vector<T> >*>(GenValue::get_value(v_name))) { }
  475. };
  476. /**
  477. * Find and return the maximum value of a vector and its index.
  478. */
  479. template <typename T>
  480. class MaxIndex : public ReduceIndex<T>{
  481. public:
  482. MaxIndex(const std::string& v_name)
  483. :ReduceIndex<T>(Function<T(std::vector<T>)>("maxIndex", [](std::vector<T> vec){
  484. auto elptr = std::max_element(vec.begin(), vec.end());
  485. return std::pair<T,int>(*elptr, int(elptr-vec.begin()));}),
  486. v_name) { }
  487. };
  488. /**
  489. * Find and return the minimum value of a vector and its index.
  490. */
  491. template <typename T>
  492. class MinIndex : public ReduceIndex<T>{
  493. public:
  494. MinIndex(const std::string& v_name)
  495. :ReduceIndex<T>(Function<T(std::vector<T>)>("minIndex", [](std::vector<T> vec){
  496. auto elptr = std::min_element(vec.begin(), vec.end());
  497. return std::pair<T,int>(*elptr, int(elptr-vec.begin()));}),
  498. v_name) { }
  499. };
  500. /**
  501. * A variadic
  502. */
  503. /* template <typename R, typename... T> */
  504. /* class MultiFunc : public DerivedValue<R>{ */
  505. /* private: */
  506. /* Function<R(T...)> f; */
  507. /* std::tuple<T...> value_tuple; */
  508. /* void update_value(){ */
  509. /* this->value = f(value_tuple); */
  510. /* } */
  511. /* public: */
  512. /* MultiFunc(const std::string& name, Function<R(std::tuple<T...>)> f, T... varargs) */
  513. /* :DerivedValue<R>(name), */
  514. /* f(f), */
  515. /* value_tuple(varargs...){ } */
  516. /* }; */
  517. /**
  518. * A generic value owning only a function object.
  519. * All necessary values upon which this value depends must be bound to the
  520. * function object.
  521. */
  522. template <typename T>
  523. class BoundValue : public DerivedValue<T>{
  524. protected:
  525. Function<T()> f;
  526. void update_value(){
  527. this->value = f();
  528. }
  529. public:
  530. BoundValue(Function<T()> f)
  531. :DerivedValue<T>(f.get_name()+"(<bound>)"),
  532. f(f) { }
  533. };
  534. /**
  535. * A Value of a pointer. The pointer is constant, however the data the pointer
  536. * points to is variable.
  537. */
  538. template <typename T>
  539. class PointerValue : public DerivedValue<T*>{
  540. protected:
  541. void update_value(){ }
  542. public:
  543. PointerValue(const std::string& name, T* ptr)
  544. :DerivedValue<T*>(name){
  545. this->value = ptr;
  546. }
  547. };
  548. /**
  549. * A Value which always returns the same value, supplied in the constructor.
  550. */
  551. template <typename T>
  552. class ConstantValue : public DerivedValue<T>{
  553. protected:
  554. T const_value;
  555. void update_value(){
  556. this->value = const_value;
  557. }
  558. public:
  559. ConstantValue(const std::string& name, T const_value)
  560. :DerivedValue<T>("const::"+name),
  561. const_value(const_value) { }
  562. };
  563. }
  564. #endif // value_hpp