2121#include < stdexcept>
2222#include < gsl/gsl> // for guideline support library; array_view
2323#include < type_traits>
24- #include " MemoryResources/MemoryResources.h"
24+ #include < cstring> // memmove, memcpy
25+ #include < memory>
26+ #include < vector>
2527// type traits are needed for the compile time consistency check
2628// maybe to be moved out of Framework first
2729// #include "Framework/TypeTraits.h"
@@ -30,8 +32,9 @@ namespace o2
3032{
3133namespace dataformats
3234{
33- // a simple struct having information about truth elements for particular indices:
34- // how many associations we have and where they start in the storage
35+ // / @struct MCTruthHeaderElement
36+ // / @brief Simple struct having information about truth elements for particular indices:
37+ // / how many associations we have and where they start in the storage
3538struct MCTruthHeaderElement {
3639 MCTruthHeaderElement () = default ; // for ROOT IO
3740
@@ -45,35 +48,55 @@ struct MCTruthHeaderElement {
4548// /
4649// / The actual MCtruth type is a generic template type and can be supplied by the user
4750// / It is meant to manage associations from one "dataobject" identified by an index into an array
48- // / to multiple TruthElements
51+ // / to multiple TruthElements. Each "dataobject" is identified by a sequential index. Truth elements
52+ // / belonging to one object are always in contingous sequence in the truth element storage. Since
53+ // / multiple truth elements can be associated with one object, the header array stores the start
54+ // / of the associated truth element sequence.
4955// /
56+ // / Since the class contains two subsequent vectors, it is not POD even if the TruthElement is
57+ // / POD. ROOT serialization is rather inefficient and in addition has a large memory footprint
58+ // / if the container has lots of (>1000000) elements. between 3 and 4x more than the actual
59+ // / size is allocated. If the two vectors are flattend to a raw vector before streaming, the
60+ // / serialization works without memory overhead. The deflate/inflate methods are called from
61+ // / a custom streamer, storing the vectors in the raw buffer and vice versa, each of the methods
62+ // / emptying the source data.
63+ // /
64+ // / TODO:
65+ // / - add move assignment from a source vector, by that passing an object which has access to
66+ // / different underlying memory resources, until that, the pmr::MemoryResource has been
67+ // / removed again
68+ // / - add interface to access header and truth elements directly from the raw buffer, by that
69+ // / inflation can be postponed until new elements are added, with the effect that inflation
70+ // / can be avoided in most cases
71+ // /
72+ // / Note:
73+ // / The two original vector members could be transient, however reading serialized version 1
74+ // / objects does not work correctly. In a different approach, the two vectors have been removed
75+ // / completely with an efficient interface to the binary buffer, but the read pragma was not able
76+ // / to access the member offset from the StreamerInfo.
5077template <typename TruthElement>
5178class MCTruthContainer
5279{
5380 private:
54- // / The allocator to be used with the internal vectors is by default o2::pmr::polymorphic_allocator
55- // / we might change this to be a template parameter
56- // / template <typename TruthElement, template <typename ...> class Allocator = o2::pmr::polymorphic_allocator>
57- template <typename T>
58- using Allocator = o2::pmr::polymorphic_allocator<T>;
59-
6081 // for the moment we require the truth element to be messageable in order to simply flatten the object
6182 // if it turnes out that other types are required this needs to be extended and method flatten nees to
6283 // be conditionally added
6384 // TODO: activate this check
6485 // static_assert(o2::framework::is_messageable<TruthElement>::value, "truth element type must be messageable");
6586
66- // the header structure array serves as an index into the actual storage
67- std::vector<MCTruthHeaderElement, Allocator<MCTruthHeaderElement>> mHeaderArray ;
68- // the buffer containing the actual truth information
69- std::vector<TruthElement, Allocator<TruthElement>> mTruthArray ;
87+ std::vector<MCTruthHeaderElement> mHeaderArray ; // the header structure array serves as an index into the actual storage
88+ std::vector<TruthElement> mTruthArray ; // the buffer containing the actual truth information
89+ // / buffer used only for streaming the to above vectors in a flat structure
90+ // / TODO: use polymorphic allocator so that it can work on an underlying custom memory resource,
91+ // / e.g. directly on the memory of the incoming message.
92+ std::vector<char > mStreamerData ; // buffer used for streaming a flat raw buffer
7093
7194 size_t getSize (uint dataindex) const
7295 {
7396 // calculate size / number of labels from a difference in pointed indices
74- const auto size = (dataindex < mHeaderArray . size () - 1 )
75- ? mHeaderArray [ dataindex + 1 ] .index - mHeaderArray [ dataindex] .index
76- : mTruthArray . size () - mHeaderArray [ dataindex] .index ;
97+ const auto size = (dataindex < getIndexedSize () - 1 )
98+ ? getMCTruthHeader ( dataindex + 1 ) .index - getMCTruthHeader ( dataindex) .index
99+ : getNElements () - getMCTruthHeader ( dataindex) .index ;
77100 return size;
78101 }
79102
@@ -91,6 +114,7 @@ class MCTruthContainer
91114 // move assignment operator
92115 MCTruthContainer& operator =(MCTruthContainer&& other) = default ;
93116
117+ using self_type = MCTruthContainer<TruthElement>;
94118 struct FlatHeader {
95119 uint8_t version = 1 ;
96120 uint8_t sizeofHeaderElement = sizeof (MCTruthHeaderElement);
@@ -101,7 +125,7 @@ class MCTruthContainer
101125 };
102126
103127 // access
104- MCTruthHeaderElement getMCTruthHeader (uint dataindex) const { return mHeaderArray [dataindex]; }
128+ MCTruthHeaderElement const & getMCTruthHeader (uint dataindex) const { return mHeaderArray [dataindex]; }
105129 // access the element directly (can be encapsulated better away)... needs proper element index
106130 // which can be obtained from the MCTruthHeader startposition and size
107131 TruthElement const & getElement (uint elementindex) const { return mTruthArray [elementindex]; }
@@ -117,7 +141,7 @@ class MCTruthContainer
117141 if (dataindex >= getIndexedSize ()) {
118142 return gsl::span<TruthElement>();
119143 }
120- return gsl::span<TruthElement>(&mTruthArray [mHeaderArray [ dataindex] .index ], getSize (dataindex));
144+ return gsl::span<TruthElement>(&mTruthArray [getMCTruthHeader ( dataindex) .index ], getSize (dataindex));
121145 }
122146
123147 // get individual const "view" container for a given data index
@@ -127,7 +151,7 @@ class MCTruthContainer
127151 if (dataindex >= getIndexedSize ()) {
128152 return gsl::span<const TruthElement>();
129153 }
130- return gsl::span<const TruthElement>(&mTruthArray [mHeaderArray [ dataindex] .index ], getSize (dataindex));
154+ return gsl::span<const TruthElement>(&mTruthArray [getMCTruthHeader ( dataindex) .index ], getSize (dataindex));
131155 }
132156
133157 void clear ()
@@ -248,17 +272,10 @@ class MCTruthContainer
248272 }
249273 }
250274
251- // TODO: find appropriate name for 'flatten'
252- template <typename MemoryResource>
253- std::vector<char , Allocator<char >> flatten (MemoryResource* resource)
254- {
255- std::vector<char , Allocator<char >> buffer{ resource };
256- [[maybe_unused]] auto size = flatten_to (buffer);
257- assert (size == buffer.size ());
258- return buffer;
259- }
260-
261- // TODO: find appropriate name for 'flatten'
275+ // / Flatten the internal arrays to the provided container
276+ // / Copies the content of the two vectors of PODs to a contiguous container.
277+ // / The flattened data starts with a specific header @ref FlatHeader describing
278+ // / size and content of the two vectors within the raw buffer.
262279 template <typename ContainerType>
263280 size_t flatten_to (ContainerType& container)
264281 {
@@ -281,6 +298,9 @@ class MCTruthContainer
281298 return bufferSize;
282299 }
283300
301+ // / Resore internal vectors from a raw buffer
302+ // / The two vectors are resized according to the information in the \a FlatHeader
303+ // / struct at the beginning of the buffer. Data is copied to the vectors.
284304 void restore_from (const char * buffer, size_t bufferSize)
285305 {
286306 if (buffer == nullptr || bufferSize < sizeof (FlatHeader)) {
@@ -311,7 +331,45 @@ class MCTruthContainer
311331 memcpy (mTruthArray .data (), source, copySize);
312332 }
313333
314- ClassDefNV (MCTruthContainer, 1 );
334+ // / Print some info
335+ template <typename Stream>
336+ void print (Stream& stream)
337+ {
338+ stream << " MCTruthContainer index = " << getIndexedSize () << " for " << getNElements () << " elements(s), flat buffer size " << mStreamerData .size () << std::endl;
339+ }
340+
341+ // / Inflate the object from the internal buffer
342+ // / The class has a specific member to store flattened data. Due to some limitations in ROOT
343+ // / it is more efficient to first flatten the objects to a raw buffer and empty the two vectors
344+ // / before serialization. This function restores the vectors from the internal raw buffer.
345+ // / Called from the custom streamer.
346+ void inflate ()
347+ {
348+ if (mHeaderArray .size () > 0 ) {
349+ mStreamerData .clear ();
350+ return ;
351+ }
352+ restore_from (mStreamerData .data (), mStreamerData .size ());
353+ mStreamerData .clear ();
354+ }
355+
356+ // / Deflate the object to the internal buffer
357+ // / The class has a specific member to store flattened data. Due to some limitations in ROOT
358+ // / it is more efficient to first flatten the objects to a raw buffer and empty the two vectors
359+ // / before serialization. This function stores the vectors to the internal raw buffer.
360+ // / Called from the custom streamer.
361+ void deflate ()
362+ {
363+ if (mStreamerData .size () > 0 ) {
364+ clear ();
365+ return ;
366+ }
367+ mStreamerData .clear ();
368+ flatten_to (mStreamerData );
369+ clear ();
370+ }
371+
372+ ClassDefNV (MCTruthContainer, 2 );
315373}; // end class
316374
317375} // namespace dataformats
0 commit comments