Skip to content

Commit 2ed4e5f

Browse files
committed
More efficient structures for MCTruth sharing and ROOT IO
This commit achieves/provides the following: * A new class ConstMCTruthContainer, allowing to share **read-only** MC labels in a flat buffer and in shared memory (using the DPL->make mechanism). This avoids going through ROOT IO when communicating between DPL devices and reduces the overall memory usage and CPU time. * A new class IOMCTruthContainerView, allowing to overcome the size limitation of labels when writing to a TTree entry. This is achieved by splitting the buffer into several members, which ROOT serializes separately. * A test workflow is provided, exchanging MClabels between a couple of DPL processes. The workflow can run with the existing as well the new way of sharing labels.
1 parent a70d658 commit 2ed4e5f

File tree

13 files changed

+730
-1
lines changed

13 files changed

+730
-1
lines changed

DataFormats/simulation/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ o2_target_root_dictionary(
3737
include/SimulationDataFormat/LabelContainer.h
3838
include/SimulationDataFormat/MCEventHeader.h
3939
include/SimulationDataFormat/MCEventStats.h
40+
include/SimulationDataFormat/IOMCTruthContainerView.h
4041
LINKDEF src/SimulationDataLinkDef.h)
4142
# note the explicit LINKDEF as the linkdef in src is
4243
#
Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
// Copyright CERN and copyright holders of ALICE O2. This software is
2+
// distributed under the terms of the GNU General Public License v3 (GPL
3+
// Version 3), copied verbatim in the file "COPYING".
4+
//
5+
// See http://alice-o2.web.cern.ch/license for full licensing information.
6+
//
7+
// In applying this license CERN does not waive the privileges and immunities
8+
// granted to it by virtue of its status as an Intergovernmental Organization
9+
// or submit itself to any jurisdiction.
10+
11+
/// \file ConstMCTruthContainer.h
12+
/// \brief A const (ready only) version of MCTruthContainer
13+
/// \author Sandro Wenzel - August 2020
14+
15+
#ifndef O2_CONSTMCTRUTHCONTAINER_H
16+
#define O2_CONSTMCTRUTHCONTAINER_H
17+
18+
#include <SimulationDataFormat/MCTruthContainer.h>
19+
#include <Framework/Traits.h>
20+
21+
namespace o2
22+
{
23+
namespace dataformats
24+
{
25+
26+
/// @class ConstMCTruthContainer
27+
/// @brief A read-only version of MCTruthContainer allowing for storage optimisation
28+
///
29+
/// This provides access functionality to MCTruthContainer with optimized linear storage
30+
/// so that the data can easily be shared in memory or sent over network.
31+
/// This container needs to be initialized by calling "flatten_to" from an existing
32+
/// MCTruthContainer
33+
template <typename TruthElement>
34+
class ConstMCTruthContainer : public std::vector<char>
35+
{
36+
public:
37+
// (unfortunately we need these constructors for DPL)
38+
using std::vector<char>::vector;
39+
ConstMCTruthContainer() = default;
40+
41+
// const data access
42+
// get individual const "view" container for a given data index
43+
// the caller can't do modifications on this view
44+
MCTruthHeaderElement const& getMCTruthHeader(uint32_t dataindex) const
45+
{
46+
return getHeaderStart()[dataindex];
47+
}
48+
49+
gsl::span<const TruthElement> getLabels(uint32_t dataindex) const
50+
{
51+
if (dataindex >= getIndexedSize()) {
52+
return gsl::span<const TruthElement>();
53+
}
54+
const auto start = getMCTruthHeader(dataindex).index;
55+
const auto labelsptr = getLabelStart();
56+
return gsl::span<const TruthElement>(&labelsptr[start], getSize(dataindex));
57+
}
58+
59+
// return the number of original data indexed here
60+
size_t getIndexedSize() const { return size() >= sizeof(FlatHeader) ? getHeader().nofHeaderElements : 0; }
61+
62+
// return the number of labels managed in this container
63+
size_t getNElements() const { return size() >= sizeof(FlatHeader) ? getHeader().nofTruthElements : 0; }
64+
65+
private:
66+
using FlatHeader = typename MCTruthContainer<TruthElement>::FlatHeader;
67+
68+
size_t getSize(uint32_t dataindex) const
69+
{
70+
// calculate size / number of labels from a difference in pointed indices
71+
const auto size = (dataindex < getIndexedSize() - 1)
72+
? getMCTruthHeader(dataindex + 1).index - getMCTruthHeader(dataindex).index
73+
: getNElements() - getMCTruthHeader(dataindex).index;
74+
return size;
75+
}
76+
77+
/// Restore internal vectors from a raw buffer
78+
/// The two vectors are resized according to the information in the \a FlatHeader
79+
/// struct at the beginning of the buffer. Data is copied to the vectors.
80+
TruthElement const* const getLabelStart() const
81+
{
82+
auto* source = &(*this)[0];
83+
auto flatheader = getHeader();
84+
source += sizeof(FlatHeader);
85+
const size_t headerSize = flatheader.sizeofHeaderElement * flatheader.nofHeaderElements;
86+
source += headerSize;
87+
return (TruthElement const* const)source;
88+
}
89+
90+
FlatHeader const& getHeader() const
91+
{
92+
const auto* source = &(*this)[0];
93+
const auto& flatheader = *reinterpret_cast<FlatHeader const*>(source);
94+
return flatheader;
95+
}
96+
97+
MCTruthHeaderElement const* const getHeaderStart() const
98+
{
99+
auto* source = &(*this)[0];
100+
source += sizeof(FlatHeader);
101+
return (MCTruthHeaderElement const* const)source;
102+
}
103+
};
104+
} // namespace dataformats
105+
} // namespace o2
106+
107+
// This is done so that DPL treats this container as a vector.
108+
// In particular in enables
109+
// a) --> snapshot without ROOT dictionary (as a flat buffer)
110+
// b) --> requesting the resource in shared mem using make<T>
111+
namespace o2::framework
112+
{
113+
template <typename T>
114+
struct is_specialization<o2::dataformats::ConstMCTruthContainer<T>, std::vector> : std::true_type {
115+
};
116+
} // namespace o2::framework
117+
118+
#endif //O2_CONSTMCTRUTHCONTAINER_H
Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
// Copyright CERN and copyright holders of ALICE O2. This software is
2+
// distributed under the terms of the GNU General Public License v3 (GPL
3+
// Version 3), copied verbatim in the file "COPYING".
4+
//
5+
// See http://alice-o2.web.cern.ch/license for full licensing information.
6+
//
7+
// In applying this license CERN does not waive the privileges and immunities
8+
// granted to it by virtue of its status as an Intergovernmental Organization
9+
// or submit itself to any jurisdiction.
10+
11+
/// \file IOMCTruthContainerView.h
12+
/// \brief A special IO container - splitting a given vector to enable ROOT IO
13+
/// \author Sandro Wenzel - August 2020
14+
15+
#ifndef ALICEO2_DATAFORMATS_IOMCTRUTHVIEW_H_
16+
#define ALICEO2_DATAFORMATS_IOMCTRUTHVIEW_H_
17+
18+
#include "GPUCommonRtypes.h" // to have the ClassDef macros
19+
#include <vector>
20+
#include <gsl/span>
21+
22+
namespace o2
23+
{
24+
namespace dataformats
25+
{
26+
27+
///
28+
/// A specially constructed class allowing to stream a very large
29+
/// vector buffer to a ROOT file. This is needed since ROOT currently has a size
30+
/// limitation of ~1GB for data that it can stream per entry in a branch.
31+
/// The solution is based on the ability of ROOT to split entries per data member, so
32+
/// some input buffer gets divided into multiple parts.
33+
///
34+
/// TODO: We could template this class to encode original type information (for the input buffer).
35+
class IOMCTruthContainerView
36+
{
37+
public:
38+
IOMCTruthContainerView() = default;
39+
40+
/// Constructor taking an existing flat vector as input; No copy is done - the
41+
/// container is just a split view on the original buffer.
42+
IOMCTruthContainerView(std::vector<char> const& input)
43+
{
44+
const auto delta = input.size() / N;
45+
N2 = input.size() - (N - 1) * delta;
46+
N1 = delta;
47+
// TODO: this could benefit from a loop expansion
48+
part1 = &input[0];
49+
part2 = &input[delta];
50+
part3 = &input[2 * delta];
51+
part4 = &input[3 * delta];
52+
part5 = &input[4 * delta];
53+
part6 = &input[5 * delta];
54+
part7 = &input[6 * delta];
55+
part8 = &input[7 * delta];
56+
part9 = &input[8 * delta];
57+
part10 = &input[9 * delta];
58+
}
59+
60+
/// A function to recreate a flat output vector from this buffer. This
61+
/// function is copying the data.
62+
template <typename Alloc>
63+
void copyandflatten(std::vector<char, Alloc>& output) const
64+
{
65+
// TODO: this could benefit from a loop expansion
66+
copyhelper(part1, N1, output);
67+
copyhelper(part2, N1, output);
68+
copyhelper(part3, N1, output);
69+
copyhelper(part4, N1, output);
70+
copyhelper(part5, N1, output);
71+
copyhelper(part6, N1, output);
72+
copyhelper(part7, N1, output);
73+
copyhelper(part8, N1, output);
74+
copyhelper(part9, N1, output);
75+
copyhelper(part10, N2, output);
76+
}
77+
78+
private:
79+
static constexpr int N = 10;
80+
int N1 = 0;
81+
int N2 = 0;
82+
const char* part1 = nullptr; //[N1]
83+
const char* part2 = nullptr; //[N1]
84+
const char* part3 = nullptr; //[N1]
85+
const char* part4 = nullptr; //[N1]
86+
const char* part5 = nullptr; //[N1]
87+
const char* part6 = nullptr; //[N1]
88+
const char* part7 = nullptr; //[N1]
89+
const char* part8 = nullptr; //[N1]
90+
const char* part9 = nullptr; //[N1]
91+
const char* part10 = nullptr; //[N2]
92+
93+
template <typename Alloc>
94+
void copyhelper(const char* input, int size, std::vector<char, Alloc>& output) const
95+
{
96+
gsl::span<const char> tmp(input, size);
97+
std::copy(tmp.begin(), tmp.end(), std::back_inserter(output));
98+
}
99+
100+
ClassDefNV(IOMCTruthContainerView, 1);
101+
};
102+
} // namespace dataformats
103+
} // namespace o2
104+
105+
#endif

DataFormats/simulation/include/SimulationDataFormat/MCTruthContainer.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -310,7 +310,7 @@ class MCTruthContainer
310310
/// The flattened data starts with a specific header @ref FlatHeader describing
311311
/// size and content of the two vectors within the raw buffer.
312312
template <typename ContainerType>
313-
size_t flatten_to(ContainerType& container)
313+
size_t flatten_to(ContainerType& container) const
314314
{
315315
size_t bufferSize = sizeof(FlatHeader) + sizeof(MCTruthHeaderElement) * mHeaderArray.size() + sizeof(TruthElement) * mTruthArray.size();
316316
container.resize((bufferSize / sizeof(typename ContainerType::value_type)) + ((bufferSize % sizeof(typename ContainerType::value_type)) > 0 ? 1 : 0));

DataFormats/simulation/src/SimulationDataLinkDef.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,4 +68,6 @@
6868
#pragma link C++ class o2::dataformats::MCEventStats + ;
6969
#pragma link C++ class o2::dataformats::MCEventHeader + ;
7070

71+
#pragma link C++ class o2::dataformats::IOMCTruthContainerView + ;
72+
7173
#endif

DataFormats/simulation/test/testMCTruthContainer.cxx

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,14 @@
1313
#define BOOST_TEST_DYN_LINK
1414
#include <boost/test/unit_test.hpp>
1515
#include "SimulationDataFormat/MCTruthContainer.h"
16+
#include "SimulationDataFormat/MCCompLabel.h"
17+
#include "SimulationDataFormat/ConstMCTruthContainer.h"
1618
#include "SimulationDataFormat/LabelContainer.h"
19+
#include "SimulationDataFormat/IOMCTruthContainerView.h"
1720
#include <algorithm>
1821
#include <iostream>
22+
#include <TFile.h>
23+
#include <TTree.h>
1924

2025
namespace o2
2126
{
@@ -180,6 +185,19 @@ BOOST_AUTO_TEST_CASE(MCTruthContainer_flatten)
180185
BOOST_CHECK(restoredContainer.getElement(1) == 2);
181186
BOOST_CHECK(restoredContainer.getElement(2) == 1);
182187
BOOST_CHECK(restoredContainer.getElement(3) == 10);
188+
189+
// check the special version ConstMCTruthContainer
190+
using ConstMCTruthContainer = dataformats::ConstMCTruthContainer<TruthElement>;
191+
ConstMCTruthContainer cc;
192+
container.flatten_to(cc);
193+
194+
BOOST_CHECK(cc.getIndexedSize() == container.getIndexedSize());
195+
BOOST_CHECK(cc.getNElements() == container.getNElements());
196+
BOOST_CHECK(cc.getLabels(0).size() == container.getLabels(0).size());
197+
BOOST_CHECK(cc.getLabels(1).size() == container.getLabels(1).size());
198+
BOOST_CHECK(cc.getLabels(2).size() == container.getLabels(2).size());
199+
BOOST_CHECK(cc.getLabels(2)[0] == container.getLabels(2)[0]);
200+
BOOST_CHECK(cc.getLabels(2)[0] == 10);
183201
}
184202

185203
BOOST_AUTO_TEST_CASE(LabelContainer_noncont)
@@ -305,4 +323,52 @@ BOOST_AUTO_TEST_CASE(MCTruthContainer_move)
305323
BOOST_CHECK(container.getNElements() == 4);
306324
}
307325

326+
BOOST_AUTO_TEST_CASE(MCTruthContainer_ROOTIO)
327+
{
328+
using TruthElement = o2::MCCompLabel;
329+
using Container = dataformats::MCTruthContainer<TruthElement>;
330+
Container container;
331+
const size_t BIGSIZE{1000000};
332+
for (int i = 0; i < BIGSIZE; ++i) {
333+
container.addElement(i, TruthElement(i, i, i));
334+
container.addElement(i, TruthElement(i + 1, i, i));
335+
}
336+
std::vector<char> buffer;
337+
container.flatten_to(buffer);
338+
339+
// We use the special IO split container to stream to a file and back
340+
dataformats::IOMCTruthContainerView io(buffer);
341+
{
342+
TFile f("tmp2.root", "RECREATE");
343+
TTree tree("o2sim", "o2sim");
344+
auto br = tree.Branch("Labels", &io, 32000, 2);
345+
tree.Fill();
346+
tree.Write();
347+
f.Close();
348+
}
349+
350+
// read back
351+
TFile f2("tmp2.root", "OPEN");
352+
auto tree2 = (TTree*)f2.Get("o2sim");
353+
dataformats::IOMCTruthContainerView* io2 = nullptr;
354+
auto br2 = tree2->GetBranch("Labels");
355+
BOOST_CHECK(br2 != nullptr);
356+
br2->SetAddress(&io2);
357+
br2->GetEntry(0);
358+
359+
// make a const MC label container out of it
360+
using ConstMCTruthContainer = dataformats::ConstMCTruthContainer<TruthElement>;
361+
ConstMCTruthContainer cc;
362+
io2->copyandflatten(cc);
363+
364+
BOOST_CHECK(cc.getNElements() == BIGSIZE * 2);
365+
BOOST_CHECK(cc.getIndexedSize() == BIGSIZE);
366+
BOOST_CHECK(cc.getLabels(0).size() == 2);
367+
BOOST_CHECK(cc.getLabels(0)[0] == TruthElement(0, 0, 0));
368+
BOOST_CHECK(cc.getLabels(0)[1] == TruthElement(1, 0, 0));
369+
BOOST_CHECK(cc.getLabels(BIGSIZE - 1).size() == 2);
370+
BOOST_CHECK(cc.getLabels(BIGSIZE - 1)[0] == TruthElement(BIGSIZE - 1, BIGSIZE - 1, BIGSIZE - 1));
371+
BOOST_CHECK(cc.getLabels(BIGSIZE - 1)[1] == TruthElement(BIGSIZE, BIGSIZE - 1, BIGSIZE - 1));
372+
}
373+
308374
} // namespace o2

Steer/DigitizerWorkflow/CMakeLists.txt

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,3 +54,12 @@ o2_add_executable(digitizer-workflow
5454
O2::TRDWorkflow
5555
O2::DataFormatsTRD
5656
O2::ZDCSimulation)
57+
58+
59+
o2_add_executable(mctruth-testworkflow
60+
COMPONENT_NAME sim
61+
SOURCES src/MCTruthTestWorkflow.cxx
62+
src/MCTruthSourceSpec.cxx
63+
src/MCTruthWriterSpec.cxx
64+
PUBLIC_LINK_LIBRARIES O2::Framework
65+
O2::SimulationDataFormat)

0 commit comments

Comments
 (0)