value.hpp 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584
  1. /**
  2. * @file
  3. * @author Caleb Fangmeier <caleb@fangmeier.tech>
  4. * @version 0.1
  5. *
  6. * @section LICENSE
  7. *
  8. *
  9. * MIT License
  10. *
  11. * Copyright (c) 2017 Caleb Fangmeier
  12. *
  13. * Permission is hereby granted, free of charge, to any person obtaining a copy
  14. * of this software and associated documentation files (the "Software"), to deal
  15. * in the Software without restriction, including without limitation the rights
  16. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  17. * copies of the Software, and to permit persons to whom the Software is
  18. * furnished to do so, subject to the following conditions:
  19. *
  20. * The above copyright notice and this permission notice shall be included in all
  21. * copies or substantial portions of the Software.
  22. *
  23. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  24. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  25. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  26. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  27. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  28. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  29. * SOFTWARE.
  30. *
  31. * @section DESCRIPTION
  32. * This header defines a set of generic classes that wrap up "values". In
  33. * essence, a Value<T> object is just something that contains a value of type T
  34. * and can provide it when requested. The usefulness stems from composing
  35. * values together with calculations. This enables very clear dependency
  36. * mapping and a way to know clearly how every value was arrived at. This could
  37. * be used to, for example, automatically generate commentary for plots that
  38. * explain the exect calculation used to create it. Or easily making a series
  39. * of plots contrasting different values that have been composed slightly
  40. * differently.
  41. */
  42. #ifndef value_hpp
  43. #define value_hpp
  44. #include <iostream>
  45. #include <sstream>
  46. #include <utility>
  47. #include <algorithm>
  48. #include <map>
  49. #include <vector>
  50. #include <tuple>
  51. #include <initializer_list>
  52. #include <functional>
  53. #include "log.hpp"
  54. /**
  55. * The namespace containing all filval classes and functions.
  56. */
  57. namespace filval{
  58. /**
  59. * In order to enable proper provenance tracking, and at the same time keep
  60. * the ability to embed functions into values, the Function class should be
  61. * used. It is simply a wrapper around a std::function that also has a name.
  62. * This name is used when generating the name of values that use the function.
  63. * A function name is automatically prepended with "func::" to explicitly state
  64. * that the value is the result of a computation encoded within the function
  65. * object, and not from some other Value object. Unfortunately, it is up to the
  66. * user to find where that function is defined in the source code to inspect
  67. * what it is doing. But hopefully this isn't too onerous by just using grep.
  68. */
  69. template<typename> class Function; // undefined
  70. template <typename R, typename... ArgTypes>
  71. class Function<R(ArgTypes...)> {
  72. std::string name;
  73. std::function<R(ArgTypes...)> f;
  74. public:
  75. Function(const std::string& name, std::function<R(ArgTypes...)> f)
  76. :name("func::"+name),f(f){ }
  77. std::string& get_name(){
  78. return name;
  79. }
  80. R operator()(ArgTypes ...args){
  81. return f(args...);
  82. }
  83. };
  84. /**
  85. * A type-agnostic value.
  86. * It is necessary to create a type-agnostic parent class to Value so that
  87. * it is possible to handle collections of them. GenValue also provides the
  88. * rest of the type-independent interface to Value.
  89. */
  90. class GenValue;
  91. typedef std::map<std::string, GenValue*> ValueSet;
  92. class GenValue{
  93. private:
  94. /**
  95. * The name of the value.
  96. * This is used to allow for dynamic lookup of
  97. * values based on their name via GenValue::get_value.
  98. */
  99. std::string name;
  100. protected:
  101. /**
  102. * Mark the internal value as invalid. This is needed for DerivedValue
  103. * to force a recalculation of the internal value when a new
  104. * observation is loaded into memory. It is called automatically for
  105. * all GenValue objects when reset is called.
  106. */
  107. virtual void _reset() = 0;
  108. /**
  109. * A static mapping containing all created Value objects.
  110. * Every value object must have a unique name, and this name is used as
  111. * a key in values to that object. This is used to enable more dynamic
  112. * creation of objects as well as avoiding the uneccesary passing of
  113. * pointers.
  114. */
  115. inline static std::map<const std::string, GenValue*> values;
  116. inline static std::map<const std::string, GenValue*> aliases;
  117. public:
  118. GenValue(const std::string& name)
  119. :name(name){
  120. values[name] = this;
  121. }
  122. const std::string& get_name(){
  123. return name;
  124. }
  125. static void reset(){
  126. for (auto val : values){
  127. val.second->_reset();
  128. }
  129. }
  130. static GenValue* get_value(const std::string& name){
  131. if (aliases[name] != nullptr)
  132. return aliases[name];
  133. else if (values[name] != nullptr)
  134. return values[name];
  135. else{
  136. ERROR("Could not find alias or value \"" << name << "\". I'll tell you the ones I know about."
  137. << summary());
  138. CRITICAL("Aborting... :(", -1);
  139. }
  140. }
  141. static void alias(const std::string& name, GenValue* value){
  142. if (aliases[name] != nullptr){
  143. WARNING("WARNING: alias \"" << name << "\" overrides previous entry.");
  144. }
  145. aliases[name] = value;
  146. }
  147. static GenValue* alias(const std::string& name){
  148. if (values[name] != nullptr){
  149. WARNING("Alias \"" << name << "\" does not exist.");
  150. }
  151. return aliases[name];
  152. }
  153. static std::string summary(){
  154. std::stringstream ss;
  155. ss << "The following values have been created: " << std::endl;
  156. for (auto value : values){
  157. if (value.second == nullptr) continue;
  158. ss << "\t\"" << value.first << "\" at address " << value.second << std::endl;
  159. }
  160. ss << "And these aliases:" << std::endl;
  161. for (auto alias : aliases){
  162. std::string orig("VOID");
  163. if (alias.second == nullptr) continue;
  164. for (auto value : values){
  165. if (alias.second == value.second){
  166. orig = value.second->get_name();
  167. break;
  168. }
  169. }
  170. ss << "\t\"" << alias.first << "\" referring to \"" << orig << "\"" << std::endl;
  171. }
  172. return ss.str();
  173. }
  174. };
  175. /**
  176. * A generic value.
  177. * In order to facilitate run-time creation of analysis routines, it is
  178. * necessary to have some ability to get and store *values*. Values can either
  179. * be directly taken from some original data source (i.e. ObservedValue), or
  180. * they can be a function of some other set of values (i.e. DerivedValue). They
  181. * template class T of Value<T> is the type of thing that is returned upon
  182. * calling get_value().
  183. */
  184. template <typename T>
  185. class Value : public GenValue{
  186. public:
  187. Value(const std::string& name)
  188. :GenValue(name){ }
  189. /** Calculate, if necessary, and return the value held by this object.
  190. */
  191. virtual T& get_value() = 0;
  192. };
  193. /**
  194. * A generic, observed, value.
  195. * An ObservedValue is the interface to your dataset. Upon creation, an
  196. * ObservedValue is given a pointer to an object of type T. When an observation
  197. * is loaded into memory, the value at the location referenced by that pointer
  198. * must be updated with the associated data from that observation. This is the
  199. * responsibility of whatever DataSet implementation is being used. This object
  200. * then will read that data and return it when requested.
  201. */
  202. template <typename T>
  203. class ObservedValue : public Value<T>{
  204. private:
  205. T *val_ref;
  206. void _reset(){ }
  207. public:
  208. ObservedValue(const std::string& name, T* val_ref)
  209. :Value<T>(name),
  210. val_ref(val_ref){ }
  211. T& get_value(){
  212. return *val_ref;
  213. }
  214. };
  215. /**
  216. * A generic, derived, value.
  217. * A DerivedValue is generally defined as some function of other Value objects.
  218. * For example, a Pair is a function of two other Value objects that makes a
  219. * pair of them. Note that these other Value objects are free to be either
  220. * ObservedValues or other DerivedValues.
  221. *
  222. * It is desireable from a performance standpoint that each DerivedValue be
  223. * calculated no more than once per observation. Therefore, when a get_value is
  224. * called on a DerivedValue, it first checks whether the value that it holds is
  225. * **valid**, meaning it has already been calculated for this observation. If
  226. * so, it simply returns the value. If not, the update_value function is called
  227. * to calculate the value. and then the newly calculated value is marked as
  228. * valid and returned.
  229. */
  230. template <typename T>
  231. class DerivedValue : public Value<T>{
  232. private:
  233. void _reset(){
  234. value_valid = false;
  235. }
  236. protected:
  237. T value;
  238. bool value_valid;
  239. /**
  240. * Updates the internal value.
  241. * This function should be overridden by any child class to do the
  242. * actual work of updating value based on whatever rules the class
  243. * chooses. Normally, this consists of geting the values from some
  244. * associated Value objects, doing some calculation on them, and
  245. * storing the result in value.
  246. */
  247. virtual void update_value() = 0;
  248. public:
  249. DerivedValue(const std::string& name)
  250. :Value<T>(name),
  251. value_valid(false) { }
  252. T& get_value(){
  253. if (!value_valid){
  254. update_value();
  255. value_valid = true;
  256. }
  257. return value;
  258. }
  259. };
  260. /**
  261. * A std::vector wrapper around a C-style array.
  262. * In order to make some of the higher-level Value types easier to work with,
  263. * it is a good idea to wrap all arrays in the original data source with
  264. * std::vector objects. To do this, it is necessary to supply both a Value
  265. * object containing the array itself as well as another Value object
  266. * containing the size of that array. Currently, update_value will simply copy
  267. * the contents of the array into the interally held vector.
  268. * \todo avoid an unneccessary copy and set the vectors data directly.
  269. */
  270. template <typename T>
  271. class WrapperVector : public DerivedValue<std::vector<T> >{
  272. private:
  273. Value<int>* size;
  274. Value<T*>* data;
  275. void update_value(){
  276. int n = size->get_value();
  277. T* data_ref = data->get_value();
  278. this->value.resize(n);
  279. for (int i=0; i<n; i++){
  280. this->value[i] = *(data_ref+i);
  281. }
  282. }
  283. public:
  284. WrapperVector(Value<int>* size, Value<T*>* data)
  285. :DerivedValue<std::vector<T> >("vectorOf("+size->get_name()+","+data->get_name()+")"),
  286. size(size), data(data){ }
  287. WrapperVector(const std::string &label_size, const std::string &label_data)
  288. :WrapperVector(dynamic_cast<Value<int>*>(GenValue::values.at(label_size)),
  289. dynamic_cast<Value<T*>*>(GenValue::values.at(label_data))) { }
  290. };
  291. /**
  292. * Creates a std::pair type from a two other Value objects.
  293. */
  294. template <typename T1, typename T2>
  295. class Pair : public DerivedValue<std::pair<T1, T2> >{
  296. protected:
  297. std::pair<Value<T1>*, Value<T2>* > value_pair;
  298. void update_value(){
  299. this->value.first = value_pair.first->get_value();
  300. this->value.second = value_pair.second->get_value();
  301. }
  302. public:
  303. Pair(Value<T1> *value1, Value<T2> *value2)
  304. :DerivedValue<std::pair<T1, T2> >("pair("+value1->get_name()+","+value2->get_name()+")"),
  305. value_pair(value1, value2){ }
  306. Pair(const std::string& label1, const std::string& label2)
  307. :Pair(dynamic_cast<Value<T1>*>(GenValue::values.at(label1)),
  308. dynamic_cast<Value<T1>*>(GenValue::values.at(label2))){ }
  309. };
  310. /**
  311. * Takes a set of four Value<std::vector<T> > objects and a function of four Ts
  312. * and returns a std::vector<R>. This is used in, for instance, calculating the
  313. * energy of a set of particles when one has separate arrays containing pt,
  314. * eta, phi, and mass. These arrays are first wrapped up in VectorWrappers and
  315. * then passes along with a function to calculate the energy into a ZipMapFour.
  316. * The result of this calculation is a new vector containing the energy for
  317. * each particle. Note that if the input vectors are not all the same size,
  318. * calculations are only performed up to the size of the shortest.
  319. * \see MiniTreeDataSet
  320. * \todo find way to implement for arbitrary number(and possibly type) of
  321. * vector inputs.
  322. */
  323. template <typename R, typename T>
  324. class ZipMapFour : public DerivedValue<std::vector<R> >{
  325. private:
  326. Function<R(T, T, T, T)> f;
  327. Value<std::vector<T> >* v1;
  328. Value<std::vector<T> >* v2;
  329. Value<std::vector<T> >* v3;
  330. Value<std::vector<T> >* v4;
  331. void update_value(){
  332. std::vector<T> v1_val = v1->get_value();
  333. std::vector<T> v2_val = v2->get_value();
  334. std::vector<T> v3_val = v3->get_value();
  335. std::vector<T> v4_val = v4->get_value();
  336. int n;
  337. std::tie(n, std::ignore) = std::minmax({v1_val.size(), v2_val.size(), v3_val.size(), v4_val.size()});
  338. this->value.resize(n);
  339. for (int i=0; i<n; i++){
  340. this->value[i] = f(v1_val[i], v2_val[i], v3_val[i], v4_val[i]);
  341. }
  342. }
  343. public:
  344. ZipMapFour(Function<R(T, T, T, T)> f,
  345. Value<std::vector<T> >* v1, Value<std::vector<T> >* v2,
  346. Value<std::vector<T> >* v3, Value<std::vector<T> >* v4)
  347. :DerivedValue<std::vector<R> >("zipmap("+f.get_name()+":"+v1->get_name()+","+v2->get_name()+","+v3->get_name()+","+v4->get_name()+")"),
  348. f(f), v1(v1), v2(v2), v3(v3), v4(v4) { }
  349. ZipMapFour(Function<R(T, T, T, T)> f,
  350. const std::string &label1, const std::string &label2,
  351. const std::string &label3, const std::string &label4)
  352. :ZipMapFour(f,
  353. dynamic_cast<Value<std::vector<T> >*>(GenValue::values.at(label1)),
  354. dynamic_cast<Value<std::vector<T> >*>(GenValue::values.at(label2)),
  355. dynamic_cast<Value<std::vector<T> >*>(GenValue::values.at(label3)),
  356. dynamic_cast<Value<std::vector<T> >*>(GenValue::values.at(label4))){ }
  357. };
  358. /**
  359. * Reduce a Value of type vector<T> to just a T.
  360. * This is useful functionality to model, for instance, calculating the maximum
  361. * element of a vector, or a the mean. See child classes for specific
  362. * implementations.
  363. */
  364. template <typename T>
  365. class Reduce : public DerivedValue<T>{
  366. private:
  367. Function<T(std::vector<T>)> reduce;
  368. Value<std::vector<T> >* v;
  369. void update_value(){
  370. this->value = reduce(v->get_value());
  371. }
  372. public:
  373. Reduce(Function<T(std::vector<T>)> reduce, Value<std::vector<T> >* v)
  374. :DerivedValue<T>("reduceWith("+reduce.get_name()+":"+v->get_name()+")"),
  375. reduce(reduce), v(v) { }
  376. Reduce(Function<T(std::vector<T>)> reduce, const std::string& v_name)
  377. :Reduce(reduce, dynamic_cast<Value<std::vector<T> >*>(GenValue::get_value(v_name))) { }
  378. };
  379. /**
  380. * Find and return the maximum value of a vector.
  381. */
  382. template <typename T>
  383. class Max : public Reduce<T>{
  384. public:
  385. Max(const std::string& v_name)
  386. :Reduce<T>(Function<T(std::vector<T>)>("max", [](std::vector<T> vec){
  387. return *std::max_element(vec.begin(), vec.end());}),
  388. v_name) { }
  389. };
  390. /**
  391. * Find and return the minimum value of a vector.
  392. */
  393. template <typename T>
  394. class Min : public Reduce<T>{
  395. public:
  396. Min(const std::string& v_name)
  397. :Reduce<T>(Function<T(std::vector<T>)>("min", [](std::vector<T> vec){
  398. return *std::min_element(vec.begin(), vec.end());}),
  399. v_name) { }
  400. };
  401. /**
  402. * Calculate the mean value of a vector.
  403. */
  404. template <typename T>
  405. class Mean : public Reduce<T>{
  406. public:
  407. Mean(const std::string& v_name)
  408. :Reduce<T>(Function<T(std::vector<T>)>("mean", [](std::vector<T> vec){
  409. int n = 0; T sum = 0;
  410. for (T e : vec){ n++; sum += e; }
  411. return n>0 ? sum / n : 0; }),
  412. v_name) { }
  413. };
  414. /**
  415. * Extract the element at a specific index from a vector.
  416. */
  417. template <typename T>
  418. class ElementOf : public Reduce<T>{
  419. public:
  420. ElementOf(Value<int>* index, const std::string& v_name)
  421. :Reduce<T>(Function<T(std::vector<T>)>("elementOf", [index](std::vector<T> vec){return vec[index->get_value()];}),
  422. v_name) { }
  423. ElementOf(const std::string& name, int index, const std::string& v_name)
  424. :Reduce<T>(name, [index](std::vector<T> vec){return vec[index];}, v_name) { }
  425. };
  426. /**
  427. * Similar to Reduce, but returns a pair of a T and an int.
  428. * This is useful if you need to know where in the vector exists the element
  429. * being returned.
  430. */
  431. template <typename T>
  432. class ReduceIndex : public DerivedValue<std::pair<T, int> >{
  433. private:
  434. Function<std::pair<T,int>(std::vector<T>)> reduce;
  435. Value<std::vector<T> >* v;
  436. void update_value(){
  437. this->value = reduce(v->get_value());
  438. }
  439. public:
  440. ReduceIndex(Function<std::pair<T,int>(std::vector<T>)> reduce, Value<std::vector<T> >* v)
  441. :DerivedValue<T>("reduceIndexWith("+reduce.get_name()+":"+v->get_name()+")"),
  442. reduce(reduce), v(v) { }
  443. ReduceIndex(Function<std::pair<T,int>(std::vector<T>)> reduce, const std::string& v_name)
  444. :ReduceIndex(reduce, dynamic_cast<Value<std::vector<T> >*>(GenValue::get_value(v_name))) { }
  445. };
  446. /**
  447. * Find and return the maximum value of a vector and its index.
  448. */
  449. template <typename T>
  450. class MaxIndex : public ReduceIndex<T>{
  451. public:
  452. MaxIndex(const std::string& v_name)
  453. :ReduceIndex<T>(Function<T(std::vector<T>)>("maxIndex", [](std::vector<T> vec){
  454. auto elptr = std::max_element(vec.begin(), vec.end());
  455. return std::pair<T,int>(*elptr, int(elptr-vec.begin()));}),
  456. v_name) { }
  457. };
  458. /**
  459. * Find and return the minimum value of a vector and its index.
  460. */
  461. template <typename T>
  462. class MinIndex : public ReduceIndex<T>{
  463. public:
  464. MinIndex(const std::string& v_name)
  465. :ReduceIndex<T>(Function<T(std::vector<T>)>("minIndex", [](std::vector<T> vec){
  466. auto elptr = std::min_element(vec.begin(), vec.end());
  467. return std::pair<T,int>(*elptr, int(elptr-vec.begin()));}),
  468. v_name) { }
  469. };
  470. /**
  471. * A variadic
  472. */
  473. /* template <typename R, typename... T> */
  474. /* class MultiFunc : public DerivedValue<R>{ */
  475. /* private: */
  476. /* Function<R(T...)> f; */
  477. /* std::tuple<T...> value_tuple; */
  478. /* void update_value(){ */
  479. /* this->value = f(value_tuple); */
  480. /* } */
  481. /* public: */
  482. /* MultiFunc(const std::string& name, Function<R(std::tuple<T...>)> f, T... varargs) */
  483. /* :DerivedValue<R>(name), */
  484. /* f(f), */
  485. /* value_tuple(varargs...){ } */
  486. /* }; */
  487. /**
  488. * A generic value owning only a function object.
  489. * All necessary values upon which this value depends must be bound to the
  490. * function object.
  491. */
  492. template <typename T>
  493. class BoundValue : public DerivedValue<T>{
  494. protected:
  495. Function<T()> f;
  496. void update_value(){
  497. this->value = f();
  498. }
  499. public:
  500. BoundValue(Function<T()> f)
  501. :DerivedValue<T>(f.get_name()+"(<bound>)"),
  502. f(f) { }
  503. };
  504. /**
  505. * A Value of a pointer. The pointer is constant, however the data the pointer
  506. * points to is variable.
  507. */
  508. template <typename T>
  509. class PointerValue : public DerivedValue<T*>{
  510. protected:
  511. void update_value(){ }
  512. public:
  513. PointerValue(const std::string& name, T* ptr)
  514. :DerivedValue<T*>(name){
  515. this->value = ptr;
  516. }
  517. };
  518. /**
  519. * A Value which always returns the same value, supplied in the constructor.
  520. */
  521. template <typename T>
  522. class ConstantValue : public DerivedValue<T>{
  523. protected:
  524. T const_value;
  525. void update_value(){
  526. this->value = const_value;
  527. }
  528. public:
  529. ConstantValue(const std::string& name, T const_value)
  530. :DerivedValue<T>("const::"+name),
  531. const_value(const_value) { }
  532. };
  533. }
  534. #endif // value_hpp