Skip to content

Commit abcfbd7

Browse files
authored
ARROW-17071: [C++][Compute] Fixing off-by-one error in hash join node (apache#13616)
Fixing off-by-one error in hash join node. Zeroing allocated bit vectors in hash join node to fix another valgrind error. Authored-by: michalursa <michal@ursacomputing.com> Signed-off-by: Weston Pace <weston.pace@gmail.com>
1 parent ffd31d8 commit abcfbd7

2 files changed

Lines changed: 14 additions & 1 deletion

File tree

cpp/src/arrow/compute/exec/util.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -335,7 +335,7 @@ class TailSkipForSIMD {
335335
static int FixSelection(int64_t num_rows_safe, int num_selected,
336336
const uint16_t* selection) {
337337
int num_selected_safe = num_selected;
338-
while (num_selected_safe > 0 && selection[num_selected_safe] >= num_rows_safe) {
338+
while (num_selected_safe > 0 && selection[num_selected_safe - 1] >= num_rows_safe) {
339339
--num_selected_safe;
340340
}
341341
return num_selected_safe;

cpp/src/arrow/compute/light_array.cc

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -237,13 +237,17 @@ Status ResizableArrayData::ResizeFixedLengthBuffers(int num_rows_new) {
237237
buffers_[kValidityBuffer],
238238
AllocateResizableBuffer(
239239
bit_util::BytesForBits(num_rows_allocated_new) + kNumPaddingBytes, pool_));
240+
memset(mutable_data(kValidityBuffer), 0,
241+
bit_util::BytesForBits(num_rows_allocated_new) + kNumPaddingBytes);
240242
if (column_metadata.is_fixed_length) {
241243
if (column_metadata.fixed_length == 0) {
242244
ARROW_ASSIGN_OR_RAISE(
243245
buffers_[kFixedLengthBuffer],
244246
AllocateResizableBuffer(
245247
bit_util::BytesForBits(num_rows_allocated_new) + kNumPaddingBytes,
246248
pool_));
249+
memset(mutable_data(kFixedLengthBuffer), 0,
250+
bit_util::BytesForBits(num_rows_allocated_new) + kNumPaddingBytes);
247251
} else {
248252
ARROW_ASSIGN_OR_RAISE(
249253
buffers_[kFixedLengthBuffer],
@@ -267,13 +271,22 @@ Status ResizableArrayData::ResizeFixedLengthBuffers(int num_rows_new) {
267271
ARROW_DCHECK(buffers_[kValidityBuffer] != NULLPTR &&
268272
buffers_[kVariableLengthBuffer] != NULLPTR);
269273

274+
int64_t bytes_for_bits_before =
275+
bit_util::BytesForBits(num_rows_allocated_) + kNumPaddingBytes;
276+
int64_t bytes_for_bits_after =
277+
bit_util::BytesForBits(num_rows_allocated_new) + kNumPaddingBytes;
278+
270279
RETURN_NOT_OK(buffers_[kValidityBuffer]->Resize(
271280
bit_util::BytesForBits(num_rows_allocated_new) + kNumPaddingBytes));
281+
memset(mutable_data(kValidityBuffer) + bytes_for_bits_before, 0,
282+
bytes_for_bits_after - bytes_for_bits_before);
272283

273284
if (column_metadata.is_fixed_length) {
274285
if (column_metadata.fixed_length == 0) {
275286
RETURN_NOT_OK(buffers_[kFixedLengthBuffer]->Resize(
276287
bit_util::BytesForBits(num_rows_allocated_new) + kNumPaddingBytes));
288+
memset(mutable_data(kFixedLengthBuffer) + bytes_for_bits_before, 0,
289+
bytes_for_bits_after - bytes_for_bits_before);
277290
} else {
278291
RETURN_NOT_OK(buffers_[kFixedLengthBuffer]->Resize(
279292
num_rows_allocated_new * column_metadata.fixed_length + kNumPaddingBytes));

0 commit comments

Comments
 (0)