Skip to content

Commit 885a5bd

Browse files
Using smart pointers to correctly cleanup nodes from Huffman tree
Fixes a memory leak caused by the allocated new nodes in the binary tree sorting. Algorithm is based on combining pointers to nodes in a new node which had so far been allocated using new operator. The cleanup has been ignored so far, and needs a bit bookkeeping since the original leave nodes are owned by the vector of leave nodes, while all tree nodes are owned by their parent nodes. Introducing shared_ptr solves this problem. TODO: Performance still needs to be checked, maybe some counts of the shared pointer can be avoided using references. Further changes: - using compiler default for copy ctor and assignment operator. - checking for empty node list at write out
1 parent f85454f commit 885a5bd

File tree

1 file changed

+77
-45
lines changed

1 file changed

+77
-45
lines changed

Utilities/DataCompression/include/DataCompression/HuffmanCodec.h

Lines changed: 77 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include <set>
2525
#include <map>
2626
#include <vector>
27+
#include <memory>
2728
#include <exception>
2829
#include <stdexcept>
2930
#include <iostream>
@@ -44,19 +45,19 @@ template<typename _CodeType>
4445
class HuffmanNode {
4546
public:
4647
typedef HuffmanNode self_type;
47-
typedef HuffmanNode* pointer;
48+
typedef self_type* pointer;
49+
typedef std::shared_ptr<self_type> shared_pointer;
4850
typedef _CodeType CodeType;
4951

50-
HuffmanNode() : mLeft(nullptr), mRight(nullptr), mWeight(0.), mIndex(~uint16_t(0)), mCode(), mCodeLen(0) {}
51-
HuffmanNode(const HuffmanNode& other) : mLeft(other.mLeft), mRight(other.mRight), mWeight(other.mWeight), mCode(other.mCode), mIndex(other.mIndex), mCodeLen(other.mCodeLen) {}
52-
HuffmanNode& operator=(const HuffmanNode& other) {
53-
if (this != &other) new (this) HuffmanNode(other);
54-
return *this;
55-
}
52+
HuffmanNode() : mLeft(), mRight(), mWeight(0.), mIndex(~uint16_t(0)), mCode(), mCodeLen(0) {}
53+
HuffmanNode(const HuffmanNode& other) = default;
54+
HuffmanNode& operator=(const HuffmanNode& other) = default;
5655
~HuffmanNode() {}
5756

58-
HuffmanNode(double weight, uint16_t index = ~uint16_t(0)) : mLeft(nullptr), mRight(nullptr), mWeight(weight), mIndex(index), mCode(), mCodeLen(0) {}
59-
HuffmanNode(pointer left, pointer right) : mLeft(left), mRight(right), mWeight((mLeft!=nullptr?mLeft->mWeight:0.)+(mRight!=nullptr?mRight->mWeight:0.)), mIndex(~uint16_t(0)), mCode(), mCodeLen(0) {}
57+
HuffmanNode(double weight, uint16_t index = ~uint16_t(0)) : mLeft(), mRight(), mWeight(weight), mIndex(index), mCode(), mCodeLen(0) {}
58+
// TODO: check if the shared pointers can be passed by reference
59+
// quick attempt lead to compilation error
60+
HuffmanNode(shared_pointer left, shared_pointer right) : mLeft(left), mRight(right), mWeight((mLeft?mLeft->mWeight:0.)+(mRight?mRight->mWeight:0.)), mIndex(~uint16_t(0)), mCode(), mCodeLen(0) {}
6061

6162
bool operator<(const HuffmanNode& other) const {
6263
return mWeight < other.mWeight;
@@ -67,8 +68,8 @@ class HuffmanNode {
6768
uint16_t getIndex() const {return mIndex;}
6869

6970
// TODO: can be combined to one function with templated index
70-
pointer getLeftChild() const {return mLeft;}
71-
pointer getRightChild() const {return mRight;}
71+
pointer getLeftChild() const {return mLeft.get();}
72+
pointer getRightChild() const {return mRight.get();}
7273
void setBinaryCode(uint16_t codeLen, CodeType code) {mCode = code; mCodeLen = codeLen;}
7374

7475
self_type& operator<<=(bool bit) {
@@ -89,20 +90,20 @@ class HuffmanNode {
8990
void print(std::ostream& stream = std::cout) const {
9091
static int level=1;
9192
stream << "node weight: " << std::setw(9) << mWeight;
92-
if (mLeft == nullptr) stream << " leave ";
93-
else stream << " tree ";
93+
if (!mLeft) stream << " leave ";
94+
else stream << " tree ";
9495
stream << " code length: " << mCodeLen;
9596
stream << " code " << mCode;
9697
stream << std::endl;
9798
level++;
98-
if (mLeft!=nullptr) {stream << std::setw(level) << level << ": left: "; mLeft->print(stream);}
99-
if (mRight!=nullptr) {stream << std::setw(level) << level << ": right: "; mRight->print(stream);}
99+
if (mLeft) {stream << std::setw(level) << level << ": left: "; mLeft->print(stream);}
100+
if (mRight) {stream << std::setw(level) << level << ": right: "; mRight->print(stream);}
100101
level--;
101102
}
102103

103104
private:
104-
pointer mLeft;
105-
pointer mRight;
105+
shared_pointer mLeft;
106+
shared_pointer mRight;
106107
double mWeight;
107108
uint16_t mIndex;
108109
CodeType mCode;
@@ -188,8 +189,8 @@ class HuffmanModel : public _BASE {
188189
auto nodeIndex = _BASE::alphabet_type::getIndex(symbol);
189190
if (nodeIndex < mLeaveNodes.size()) {
190191
// valid symbol/value
191-
codeLength = mLeaveNodes[nodeIndex].getBinaryCodeLength();
192-
return mLeaveNodes[nodeIndex].getBinaryCode();
192+
codeLength = mLeaveNodes[nodeIndex]->getBinaryCodeLength();
193+
return mLeaveNodes[nodeIndex]->getBinaryCode();
193194
} else {
194195
std::string msg = "symbol "; msg += symbol;
195196
msg += " not found in alphapet "; msg += _BASE::getName();
@@ -210,9 +211,17 @@ class HuffmanModel : public _BASE {
210211
* @return value, valid if codeLength > 0
211212
*/
212213
value_type Decode(code_type code, uint16_t& codeLength) const {
214+
// TODO: need to check if there is a loaded tree, but don't
215+
// want to check this every time when calling. Maybe its enough
216+
// to let the dereferencing below throw an exception
213217
codeLength = 0;
214218
typename _BASE::value_type v = 0;
215-
const _NodeType* node = *mTreeNodes.begin();
219+
// dereference the iterator and get raw pointer from shared pointer
220+
// TODO: work on shared pointers here as well
221+
// the top node is the only element in the multiset after using the
222+
// weighted sort algorithm to build the tree, all nodes are referenced
223+
// from their parents in the tree.
224+
const _NodeType* node = (*mTreeNodes.begin()).get();
216225
uint16_t codeMSB = code.size() - 1;
217226
while (node) {
218227
// N.B.: nodes have either both child nodes or none of them
@@ -239,18 +248,34 @@ class HuffmanModel : public _BASE {
239248
}
240249

241250
/**
242-
* 'less' functor for pointer type arguments
243-
* used in the multiset for sorting in the order less probable to
244-
* more probable
251+
* 'less' functor used in the multiset for sorting in the order less
252+
* probable to more probable
245253
*/
246254
template<typename T>
247-
class pless {
255+
class isless {
248256
public:
249257
bool operator()(const T a, const T b) {
258+
return a < b;
259+
}
260+
};
261+
/// specialization for pointer types
262+
template<typename T>
263+
class isless<T*> {
264+
public:
265+
bool operator()(const T* a, const T* b) {
250266
if (a == nullptr || b == nullptr) return false;
251267
return *a < *b;
252268
}
253269
};
270+
/// specialization for shared pointer
271+
template<typename T>
272+
class isless<std::shared_ptr<T>> {
273+
public:
274+
bool operator()(const std::shared_ptr<T>& a, const std::shared_ptr<T>& b) {
275+
if (!a || !b) return false;
276+
return *a < *b;
277+
}
278+
};
254279

255280
/**
256281
* Combine and sort nodes to build a binary tree
@@ -264,26 +289,28 @@ class HuffmanModel : public _BASE {
264289
_BASE& model = *this;
265290
for ( auto i : model) {
266291
// create nodes knowing about their index and the symbol weight
267-
mLeaveNodes.push_back(_NodeType(i.second, _BASE::alphabet_type::getIndex(i.first)));
292+
mLeaveNodes.push_back(std::make_shared<_NodeType>(i.second, _BASE::alphabet_type::getIndex(i.first)));
268293
}
269294

270295
// insert pointer to nodes into ordered structure to build tree
271296
// since the type is a pointer, a specific 'less' functor needs to
272297
// be provided to dereference before applying operator<
273298
for ( auto &i : mLeaveNodes) {
274-
mTreeNodes.insert(&(i));
299+
mTreeNodes.insert(i);
275300
}
276301
while (mTreeNodes.size() > 1) {
277302
// create new node combining the two with lowest probability
278-
_NodeType* combinedNode=new _NodeType(*mTreeNodes.begin(), *++mTreeNodes.begin());
303+
std::shared_ptr<_NodeType> combinedNode = std::make_shared<_NodeType>(*mTreeNodes.begin(), *++mTreeNodes.begin());
279304
// remove those two nodes from the list
280305
mTreeNodes.erase(mTreeNodes.begin());
281306
mTreeNodes.erase(mTreeNodes.begin());
282307
// insert the new node according to the less functor
283308
mTreeNodes.insert(combinedNode);
284309
}
285-
//assign value
286-
assignCode(*mTreeNodes.begin());
310+
//assign value, method works on pointer
311+
// dereference iterator and shared_ptr to get the raw pointer
312+
// TODO: change method to work on shared instead of raw pointers
313+
assignCode((*mTreeNodes.begin()).get());
287314
return true;
288315
}
289316

@@ -338,7 +365,8 @@ class HuffmanModel : public _BASE {
338365
* @brief Write Huffman table in self-consistent format.
339366
*/
340367
int write(std::ostream& out) {
341-
return write(out, *mTreeNodes.begin(), 0);
368+
if (mTreeNodes.size() == 0) return 0;
369+
return write(out, (*mTreeNodes.begin()).get(), 0);
342370
}
343371

344372
/**
@@ -398,49 +426,49 @@ class HuffmanModel : public _BASE {
398426
int symbolIndex = _BASE::alphabet_type::getIndex(symbol);
399427
// grow the vector as operator[] always expects index within range
400428
if (mLeaveNodes.size() < symbolIndex + 1) mLeaveNodes.resize(symbolIndex + 1);
401-
mLeaveNodes[symbolIndex] = _NodeType(weight, symbolIndex);
429+
mLeaveNodes[symbolIndex] = std::make_shared<_NodeType>(weight, symbolIndex);
402430
std::stringstream ps(parameters);
403431
uint16_t codeLen = 0; ps >> codeLen;
404432
code_type code = 0; ps >> code;
405-
mLeaveNodes[symbolIndex].setBinaryCode(codeLen, code);
433+
mLeaveNodes[symbolIndex]->setBinaryCode(codeLen, code);
406434
leaveNodeMap[lineNo] = symbolIndex;
407435
_BASE::addWeight(symbol, weight);
408436
//std::cout << "leave node " << lineNo << " " << " value=" << value << " weight=" << weight << " " << codeLen << " " << code << std::endl;
409437
}
410438
nodeIndices.insert(lineNo);
411439
}
412-
std::map<int, _NodeType*> treeNodes;
440+
std::map<int, std::shared_ptr<_NodeType>> treeNodes;
413441
for (auto conf : treeNodeConfigurations) {
414-
_NodeType* left = NULL;
442+
std::shared_ptr<_NodeType> left;
415443
auto ln = leaveNodeMap.find(conf.left);
416444
if ( ln != leaveNodeMap.end()) {
417-
left = &mLeaveNodes[ln->second];
445+
left = mLeaveNodes[ln->second];
418446
leaveNodeMap.erase(ln);
419447
} else {
420448
auto tn = treeNodes.find(conf.left);
421449
if (tn == treeNodes.end()) {
422450
std::cerr << "Internal error: can not find left child node with index " << conf.left << std::endl;
423451
return -1;
424452
}
425-
treeNodes.erase(tn);
426453
left = tn->second;
454+
treeNodes.erase(tn);
427455
}
428-
_NodeType* right = NULL;
456+
std::shared_ptr<_NodeType> right;
429457
auto rn = leaveNodeMap.find(conf.right);
430458
if (rn != leaveNodeMap.end()) {
431-
right = &mLeaveNodes[rn->second];
459+
right = mLeaveNodes[rn->second];
432460
leaveNodeMap.erase(rn);
433461
} else {
434462
auto tn = treeNodes.find(conf.right);
435463
if (tn == treeNodes.end()) {
436464
std::cerr << "Internal error: can not find right child node with index " << conf.right << std::endl;
437465
return -1;
438466
}
439-
treeNodes.erase(tn);
440467
right = tn->second;
468+
treeNodes.erase(tn);
441469
}
442-
_NodeType* combinedNode = new _NodeType(left, right);
443-
treeNodes[conf.index] = combinedNode;
470+
// make combined node shared ptr and add to map
471+
treeNodes[conf.index] = std::make_shared<_NodeType>(left, right);
444472
}
445473
if (leaveNodeMap.size() != 0 || treeNodes.size() != 1) {
446474
std::cerr << "error: " << leaveNodeMap.size() << " unhandled leave node(s)"
@@ -453,8 +481,12 @@ class HuffmanModel : public _BASE {
453481

454482
void print() const {
455483
if (mTreeNodes.size() > 0) {
456-
_NodeType* topNode = *mTreeNodes.begin();
457-
topNode->print();
484+
_NodeType* topNode = (*mTreeNodes.begin()).get();
485+
if (topNode) {
486+
topNode->print();
487+
} else {
488+
// TODO: handle this error condition
489+
}
458490
}
459491
};
460492

@@ -486,9 +518,9 @@ class HuffmanModel : public _BASE {
486518
// the alphabet, determined by template parameter
487519
typename _BASE::alphabet_type mAlphabet;
488520
// Huffman leave nodes containing symbol index to code mapping
489-
std::vector<_NodeType> mLeaveNodes;
521+
std::vector<std::shared_ptr<_NodeType>> mLeaveNodes;
490522
// multiset, order determined by less functor working on pointers
491-
std::multiset<_NodeType*, pless<_NodeType*> > mTreeNodes;
523+
std::multiset<std::shared_ptr<_NodeType>, isless<std::shared_ptr<_NodeType>>> mTreeNodes;
492524
};
493525

494526
}; // namespace AliceO2

0 commit comments

Comments
 (0)