forked from alibaba/AliSQL
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathNdbQueryOperation.cpp
More file actions
5212 lines (4534 loc) · 152 KB
/
NdbQueryOperation.cpp
File metadata and controls
5212 lines (4534 loc) · 152 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/*
Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <ndb_global.h>
#include <NdbDictionary.hpp>
#include <NdbIndexScanOperation.hpp>
#include "NdbQueryBuilder.hpp"
#include "NdbQueryOperation.hpp"
#include "API.hpp"
#include "NdbQueryBuilderImpl.hpp"
#include "NdbQueryOperationImpl.hpp"
#include "NdbInterpretedCode.hpp"
#include <signaldata/TcKeyReq.hpp>
#include <signaldata/TcKeyRef.hpp>
#include <signaldata/ScanTab.hpp>
#include <signaldata/QueryTree.hpp>
#include <signaldata/DbspjErr.hpp>
#include "AttributeHeader.hpp"
#include <Bitmask.hpp>
#if 0
#define DEBUG_CRASH() assert(false)
#else
#define DEBUG_CRASH()
#endif
/** To prevent compiler warnings about variables that are only used in asserts
* (when building optimized version).
*/
#define UNUSED(x) ((void)(x))
// To force usage of SCAN_NEXTREQ even for small scans resultsets
static const bool testNextReq = false;
/* Various error codes that are not specific to NdbQuery. */
static const int Err_TupleNotFound = 626;
static const int Err_MemoryAlloc = 4000;
static const int Err_SendFailed = 4002;
static const int Err_FunctionNotImplemented = 4003;
static const int Err_UnknownColumn = 4004;
static const int Err_ReceiveTimedOut = 4008;
static const int Err_NodeFailCausedAbort = 4028;
static const int Err_ParameterError = 4118;
static const int Err_SimpleDirtyReadFailed = 4119;
static const int Err_WrongFieldLength = 4209;
static const int Err_ReadTooMuch = 4257;
static const int Err_InvalidRangeNo = 4286;
static const int Err_DifferentTabForKeyRecAndAttrRec = 4287;
static const int Err_KeyIsNULL = 4316;
static const int Err_FinaliseNotCalled = 4519;
static const int Err_InterpretedCodeWrongTab = 4524;
/* A 'void' index for a tuple in internal parent / child correlation structs .*/
static const Uint16 tupleNotFound = 0xffff;
/** Set to true to trace incomming signals.*/
const bool traceSignals = false;
enum
{
/**
* Set NdbQueryOperationImpl::m_parallelism to this value to indicate that
* scan parallelism should be adaptive.
*/
Parallelism_adaptive = 0xffff0000,
/**
* Set NdbQueryOperationImpl::m_parallelism to this value to indicate that
* all fragments should be scanned in parallel.
*/
Parallelism_max = 0xffff0001
};
/**
* A class for accessing the correlation data at the end of a tuple (for
* scan queries). These data have the following layout:
*
* Word 0: AttributeHeader
* Word 1, upper halfword: tuple id of parent tuple.
* Word 1, lower halfword: tuple id of this tuple.
* Word 2: Id of receiver for root operation (where the ancestor tuple of this
* tuple will go).
*
* Both tuple identifiers are unique within this batch and root fragment.
* With these identifiers, it is possible to relate a tuple to its parent and
* children. That way, results for child operations can be updated correctly
* when the application iterates over the results of the root scan operation.
*/
class TupleCorrelation
{
public:
static const Uint32 wordCount = 1;
explicit TupleCorrelation()
: m_correlation((tupleNotFound<<16) | tupleNotFound)
{}
/** Conversion to/from Uint32 to store/fetch from buffers */
explicit TupleCorrelation(Uint32 val)
: m_correlation(val)
{}
Uint32 toUint32() const
{ return m_correlation; }
Uint16 getTupleId() const
{ return m_correlation & 0xffff;}
Uint16 getParentTupleId() const
{ return m_correlation >> 16;}
private:
Uint32 m_correlation;
}; // class TupleCorrelation
class CorrelationData
{
public:
static const Uint32 wordCount = 3;
explicit CorrelationData(const Uint32* tupleData, Uint32 tupleLength):
m_corrPart(tupleData + tupleLength - wordCount)
{
assert(tupleLength >= wordCount);
assert(AttributeHeader(m_corrPart[0]).getAttributeId()
== AttributeHeader::CORR_FACTOR64);
assert(AttributeHeader(m_corrPart[0]).getByteSize() == 2*sizeof(Uint32));
assert(getTupleCorrelation().getTupleId()<tupleNotFound);
assert(getTupleCorrelation().getParentTupleId()<tupleNotFound);
}
Uint32 getRootReceiverId() const
{ return m_corrPart[2];}
const TupleCorrelation getTupleCorrelation() const
{ return TupleCorrelation(m_corrPart[1]); }
private:
const Uint32* const m_corrPart;
}; // class CorrelationData
/**
* If a query has a scan operation as its root, then that scan will normally
* read from several fragments of its target table. Each such root fragment
* scan, along with any child lookup operations that are spawned from it,
* runs independently, in the sense that:
* - The API will know when it has received all data from a fragment for a
* given batch and all child operations spawned from it.
* - When one fragment is complete (for a batch) the API will make these data
* avaliable to the application, even if other fragments are not yet complete.
* - The tuple identifiers that are used for matching children with parents are
* only guaranteed to be unique within one batch, operation, and root
* operation fragment. Tuples derived from different root fragments must
* thus be kept apart.
*
* This class manages the state of one such read operation, from one particular
* fragment of the target table of the root operation. If the root operation
* is a lookup, then there will be only one instance of this class.
*/
class NdbRootFragment {
public:
/** Build hash map for mapping from root receiver id to NdbRootFragment
* instance.*/
static void buildReciverIdMap(NdbRootFragment* frags,
Uint32 noOfFrags);
/** Find NdbRootFragment instance corresponding to a given root receiver id.*/
static NdbRootFragment* receiverIdLookup(NdbRootFragment* frags,
Uint32 noOfFrags,
Uint32 receiverId);
explicit NdbRootFragment();
~NdbRootFragment();
/**
* Initialize object.
* @param query Enclosing query.
* @param fragNo This object manages state for reading from the fragNo'th
* fragment that the root operation accesses.
*/
void init(NdbQueryImpl& query, Uint32 fragNo);
static void clear(NdbRootFragment* frags, Uint32 noOfFrags);
Uint32 getFragNo() const
{ return m_fragNo; }
/**
* Prepare for receiving another batch of results.
*/
void prepareNextReceiveSet();
/**
* Prepare for reading another batch of results.
*/
void grabNextResultSet(); // Need mutex lock
bool hasReceivedMore() const; // Need mutex lock
void setReceivedMore(); // Need mutex lock
void incrOutstandingResults(Int32 delta)
{
m_outstandingResults += delta;
}
void clearOutstandingResults()
{
m_outstandingResults = 0;
}
void setConfReceived(Uint32 tcPtrI);
/**
* The root operation will read from a number of fragments of a table.
* This method checks if all results for the current batch has been
* received for a given fragment. This includes both results for the root
* operation and any child operations. Note that child operations may access
* other fragments; the fragment number only refers to what
* the root operation does.
*
* @return True if current batch is complete for this fragment.
*/
bool isFragBatchComplete() const
{
assert(m_fragNo!=voidFragNo);
return m_confReceived && m_outstandingResults==0;
}
/**
* Get the result stream that handles results derived from this root
* fragment for a particular operation.
* @param operationNo The id of the operation.
* @return The result stream for this root fragment.
*/
NdbResultStream& getResultStream(Uint32 operationNo) const;
NdbResultStream& getResultStream(const NdbQueryOperationImpl& op) const
{ return getResultStream(op.getQueryOperationDef().getQueryOperationIx()); }
Uint32 getReceiverId() const;
Uint32 getReceiverTcPtrI() const;
/**
* @return True if there are no more batches to be received for this fragment.
*/
bool finalBatchReceived() const;
/**
* @return True if there are no more results from this root fragment (for
* the current batch).
*/
bool isEmpty() const;
/**
* This method is used for marking which streams belonging to this
* NdbRootFragment which has remaining batches for a sub scan
* instantiated from the current batch of its parent operation.
*/
void setRemainingSubScans(Uint32 nodeMask)
{
m_remainingScans = nodeMask;
}
/** Release resources after last row has been returned */
void postFetchRelease();
private:
/** No copying.*/
NdbRootFragment(const NdbRootFragment&);
NdbRootFragment& operator=(const NdbRootFragment&);
STATIC_CONST( voidFragNo = 0xffffffff);
/** Enclosing query.*/
NdbQueryImpl* m_query;
/** Number of the root operation fragment.*/
Uint32 m_fragNo;
/** For processing results originating from this root fragment (Array of).*/
NdbResultStream* m_resultStreams;
/**
* Number of available prefetched ResultSets which are completely
* received. Will be made available for reading by calling
* ::grabNextResultSet()
*/
Uint32 m_availResultSets; // Need mutex
/**
* The number of outstanding TCKEYREF or TRANSID_AI messages to receive
* for the fragment. This includes both messages related to the
* root operation and any descendant operation that was instantiated as
* a consequence of tuples found by the root operation.
* This number may temporarily be negative if e.g. TRANSID_AI arrives
* before SCAN_TABCONF.
*/
Int32 m_outstandingResults;
/**
* This is an array with one element for each fragment that the root
* operation accesses (i.e. one for a lookup, all for a table scan).
*
* Each element is true iff a SCAN_TABCONF (for that fragment) or
* TCKEYCONF message has been received
*/
bool m_confReceived;
/**
* A bitmask of operation id's for which we will receive more
* ResultSets in a NEXTREQ.
*/
Uint32 m_remainingScans;
/**
* Used for implementing a hash map from root receiver ids to a
* NdbRootFragment instance. m_idMapHead is the index of the first
* NdbRootFragment in the m_fragNo'th hash bucket.
*/
int m_idMapHead;
/**
* Used for implementing a hash map from root receiver ids to a
* NdbRootFragment instance. m_idMapNext is the index of the next
* NdbRootFragment in the same hash bucket as this one.
*/
int m_idMapNext;
}; //NdbRootFragment
/**
* 'class NdbResultSet' is a helper for 'class NdbResultStream'.
* It manages the buffers which rows are received into and
* read from.
*/
class NdbResultSet
{
friend class NdbResultStream;
public:
explicit NdbResultSet();
void init(NdbQueryImpl& query,
Uint32 maxRows, Uint32 rowSize);
void prepareReceive(NdbReceiver& receiver)
{
m_rowCount = 0;
receiver.prepareReceive(m_buffer);
}
void prepareRead(NdbReceiver& receiver)
{
receiver.prepareRead(m_buffer,m_rowCount);
}
Uint32 getRowCount() const
{ return m_rowCount; }
private:
/** No copying.*/
NdbResultSet(const NdbResultSet&);
NdbResultSet& operator=(const NdbResultSet&);
/** The buffers which we receive the results into */
char* m_buffer;
/** Used for checking if buffer overrun occurred. */
Uint32* m_batchOverflowCheck;
/** Array of TupleCorrelations for all rows in m_buffer */
TupleCorrelation* m_correlations;
Uint32 m_rowSize;
/** The current #rows in 'm_buffer'.*/
Uint32 m_rowCount;
}; // class NdbResultSet
/**
* This class manages the subset of result data for one operation that is
* derived from one fragment of the root operation. Note that the result tuples
* may come from any fragment, but they all have initial ancestors from the
* same fragment of the root operation.
* For each operation there will thus be one NdbResultStream for each fragment
* that the root operation reads from (one in the case of lookups.)
* This class has an NdbReceiver object for processing tuples as well as
* structures for correlating child and parent tuples.
*/
class NdbResultStream {
public:
/**
* @param operation The operation for which we will receive results.
* @param rootFragNo 0..n-1 when the root operation reads from n fragments.
*/
explicit NdbResultStream(NdbQueryOperationImpl& operation,
NdbRootFragment& rootFrag);
~NdbResultStream();
/**
* Prepare for receiving first results.
*/
void prepare();
/** Prepare for receiving next batch of scan results. */
void prepareNextReceiveSet();
NdbReceiver& getReceiver()
{ return m_receiver; }
const NdbReceiver& getReceiver() const
{ return m_receiver; }
const char* getCurrentRow()
{ return m_receiver.get_row(); }
/**
* Process an incomming tuple for this stream. Extract parent and own tuple
* ids and pass it on to m_receiver.
*
* @param ptr buffer holding tuple.
* @param len buffer length.
*/
void execTRANSID_AI(const Uint32 *ptr, Uint32 len,
TupleCorrelation correlation);
/**
* A complete batch has been received for a fragment on this NdbResultStream,
* Update whatever required before the appl. are allowed to navigate the result.
* @return true if node and all its siblings have returned all rows.
*/
bool prepareResultSet(Uint32 remainingScans);
/**
* Navigate within the current ResultSet to resp. first and next row.
* For non-parent operations in the pushed query, navigation is with respect
* to any preceding parents which results in this ResultSet depends on.
* Returns either the tupleNo within TupleSet[] which we navigated to, or
* tupleNotFound().
*/
Uint16 firstResult();
Uint16 nextResult();
/**
* Returns true if last row matching the current parent tuple has been
* consumed.
*/
bool isEmpty() const
{ return m_iterState == Iter_finished; }
/**
* This method
* returns true if this result stream holds the last batch of a sub scan.
* This means that it is the last batch of the scan that was instantiated
* from the current batch of its parent operation.
*/
bool isSubScanComplete(Uint32 remainingScans) const
{
/**
* Find the node number seen by the SPJ block. Since a unique index
* operation will have two distincts nodes in the tree used by the
* SPJ block, this number may be different from 'opNo'.
*/
const Uint32 internalOpNo = m_operation.getQueryOperationDef().getQueryOperationId();
const bool complete = !((remainingScans >> internalOpNo) & 1);
assert(complete || isScanResult()); // Lookups should always be 'complete'
return complete;
}
bool isScanQuery() const
{ return (m_properties & Is_Scan_Query); }
bool isScanResult() const
{ return (m_properties & Is_Scan_Result); }
bool isInnerJoin() const
{ return (m_properties & Is_Inner_Join); }
/** For debugging.*/
friend NdbOut& operator<<(NdbOut& out, const NdbResultStream&);
/**
* TupleSet contain two logically distinct set of information:
*
* - Child/Parent correlation set required to correlate
* child tuples with its parents. Child/Tuple pairs are indexed
* by tuple number which is the same as the order in which tuples
* appear in the NdbReceiver buffers.
*
* - A HashMap on 'm_parentId' used to locate tuples correlated
* to a parent tuple. Indexes by hashing the parentId such that:
* - [hash(parentId)].m_hash_head will then index the first
* TupleSet entry potential containing the parentId to locate.
* - .m_hash_next in the indexed TupleSet may index the next TupleSet
* to considder.
*
* Both the child/parent correlation set and the parentId HashMap has been
* folded into the same structure on order to reduce number of objects
* being dynamically allocated.
* As an advantage this results in an autoscaling of the hash bucket size .
*
* Structure is only present if 'isScanQuery'
*/
class TupleSet {
public:
// Tuple ids are unique within this batch and stream
Uint16 m_parentId; // Id of parent tuple which this tuple is correlated with
Uint16 m_tupleId; // Id of this tuple
Uint16 m_hash_head; // Index of first item in TupleSet[] matching a hashed parentId.
Uint16 m_hash_next; // 'next' index matching
bool m_skip; // Skip this tuple in result processing for now
/** If the n'th bit is set, then a matching tuple for the n,th child has been seen.
* This information is needed when generating left join tuples for those tuples
* that had no matching children.*/
Bitmask<(NDB_SPJ_MAX_TREE_NODES+31)/32> m_hasMatchingChild;
explicit TupleSet() : m_hash_head(tupleNotFound)
{}
private:
/** No copying.*/
TupleSet(const TupleSet&);
TupleSet& operator=(const TupleSet&);
};
private:
/**
* This stream handles results derived from specified
* m_rootFrag of the root operation.
*/
const NdbRootFragment& m_rootFrag;
/** Operation to which this resultStream belong.*/
NdbQueryOperationImpl& m_operation;
/** ResultStream for my parent operation, or NULL if I am root */
NdbResultStream* const m_parent;
const enum properties
{
Is_Scan_Query = 0x01,
Is_Scan_Result = 0x02,
Is_Inner_Join = 0x10
} m_properties;
/** The receiver object that unpacks transid_AI messages.*/
NdbReceiver m_receiver;
/**
* ResultSets are received into and read from this stream,
* intended to be extended into double buffered ResultSet later.
*/
NdbResultSet m_resultSets[1];
Uint32 m_read; // We read from m_resultSets[m_read]
Uint32 m_recv; // We receive into m_resultSets[m_recv]
/** This is the state of the iterator used by firstResult(), nextResult().*/
enum
{
/** The first row has not been fetched yet.*/
Iter_notStarted,
/** Is iterating the ResultSet, (implies 'm_currentRow!=tupleNotFound').*/
Iter_started,
/** Last row for current ResultSet has been returned.*/
Iter_finished
} m_iterState;
/**
* Tuple id of the current tuple, or 'tupleNotFound'
* if Iter_notStarted or Iter_finished.
*/
Uint16 m_currentRow;
/** Max #rows which this stream may recieve in its TupleSet structures */
Uint32 m_maxRows;
/** TupleSet contains the correlation between parent/childs */
TupleSet* m_tupleSet;
void buildResultCorrelations();
Uint16 getTupleId(Uint16 tupleNo) const
{ return (m_tupleSet) ? m_tupleSet[tupleNo].m_tupleId : 0; }
Uint16 getCurrentTupleId() const
{ return (m_currentRow==tupleNotFound) ? tupleNotFound : getTupleId(m_currentRow); }
Uint16 findTupleWithParentId(Uint16 parentId) const;
Uint16 findNextTuple(Uint16 tupleNo) const;
/** No copying.*/
NdbResultStream(const NdbResultStream&);
NdbResultStream& operator=(const NdbResultStream&);
}; //class NdbResultStream
//////////////////////////////////////////////
///////// NdbBulkAllocator methods ///////////
//////////////////////////////////////////////
NdbBulkAllocator::NdbBulkAllocator(size_t objSize)
:m_objSize(objSize),
m_maxObjs(0),
m_buffer(NULL),
m_nextObjNo(0)
{}
int NdbBulkAllocator::init(Uint32 maxObjs)
{
assert(m_buffer == NULL);
m_maxObjs = maxObjs;
// Add check for buffer overrun.
m_buffer = new char[m_objSize*m_maxObjs+1];
if (unlikely(m_buffer == NULL))
{
return Err_MemoryAlloc;
}
m_buffer[m_maxObjs * m_objSize] = endMarker;
return 0;
}
void NdbBulkAllocator::reset(){
// Overrun check.
assert(m_buffer == NULL || m_buffer[m_maxObjs * m_objSize] == endMarker);
// Overwrite with 0xff bytes to detect accidental use of released memory.
assert(m_buffer == NULL ||
memset(m_buffer, 0xff, m_maxObjs * m_objSize) != NULL);
delete [] m_buffer;
m_buffer = NULL;
m_nextObjNo = 0;
m_maxObjs = 0;
}
void* NdbBulkAllocator::allocObjMem(Uint32 noOfObjs)
{
assert(m_nextObjNo + noOfObjs <= m_maxObjs);
void * const result = m_buffer+m_objSize*m_nextObjNo;
m_nextObjNo += noOfObjs;
return m_nextObjNo > m_maxObjs ? NULL : result;
}
///////////////////////////////////////////
///////// NdbResultSet methods ///////////
///////////////////////////////////////////
NdbResultSet::NdbResultSet() :
m_buffer(NULL),
m_batchOverflowCheck(NULL),
m_correlations(NULL),
m_rowSize(0),
m_rowCount(0)
{}
void
NdbResultSet::init(NdbQueryImpl& query,
Uint32 maxRows,
Uint32 rowSize)
{
m_rowSize = rowSize;
{
const int bufferSize = rowSize * maxRows;
NdbBulkAllocator& bufferAlloc = query.getRowBufferAlloc();
m_buffer = reinterpret_cast<char*>(bufferAlloc.allocObjMem(bufferSize));
// So that we can test for buffer overrun.
m_batchOverflowCheck =
reinterpret_cast<Uint32*>(bufferAlloc.allocObjMem(sizeof(Uint32)));
*m_batchOverflowCheck = 0xacbd1234;
if (query.getQueryDef().isScanQuery())
{
m_correlations = reinterpret_cast<TupleCorrelation*>
(bufferAlloc.allocObjMem(maxRows*sizeof(TupleCorrelation)));
}
}
}
//////////////////////////////////////////////
///////// NdbResultStream methods ///////////
//////////////////////////////////////////////
NdbResultStream::NdbResultStream(NdbQueryOperationImpl& operation,
NdbRootFragment& rootFrag)
:
m_rootFrag(rootFrag),
m_operation(operation),
m_parent(operation.getParentOperation()
? &rootFrag.getResultStream(*operation.getParentOperation())
: NULL),
m_properties(
(enum properties)
((operation.getQueryDef().isScanQuery()
? Is_Scan_Query : 0)
| (operation.getQueryOperationDef().isScanOperation()
? Is_Scan_Result : 0)
| (operation.getQueryOperationDef().getMatchType() != NdbQueryOptions::MatchAll
? Is_Inner_Join : 0))),
m_receiver(operation.getQuery().getNdbTransaction().getNdb()),
m_resultSets(), m_read(0xffffffff), m_recv(0),
m_iterState(Iter_notStarted),
m_currentRow(tupleNotFound),
m_maxRows(0),
m_tupleSet(NULL)
{};
NdbResultStream::~NdbResultStream()
{
for (int i = static_cast<int>(m_maxRows)-1; i >= 0; i--)
{
m_tupleSet[i].~TupleSet();
}
}
void
NdbResultStream::prepare()
{
const Uint32 rowSize = m_operation.getRowSize();
NdbQueryImpl &query = m_operation.getQuery();
/* Parent / child correlation is only relevant for scan type queries
* Don't create a m_tupleSet with these correlation id's for lookups!
*/
if (isScanQuery())
{
m_maxRows = m_operation.getMaxBatchRows();
m_tupleSet =
new (query.getTupleSetAlloc().allocObjMem(m_maxRows))
TupleSet[m_maxRows];
}
else
m_maxRows = 1;
m_resultSets[0].init(query, m_maxRows, rowSize);
m_receiver.init(NdbReceiver::NDB_QUERY_OPERATION, false, &m_operation);
m_receiver.do_setup_ndbrecord(
m_operation.getNdbRecord(),
m_maxRows,
0 /*key_size*/,
0 /*read_range_no*/,
rowSize,
m_resultSets[m_recv].m_buffer);
} //NdbResultStream::prepare
/** Locate, and return 'tupleNo', of first tuple with specified parentId.
* parentId == tupleNotFound is use as a special value for iterating results
* from the root operation in the order which they was inserted by
* ::buildResultCorrelations()
*
* Position of 'currentRow' is *not* updated and should be modified by callee
* if it want to keep the new position.
*/
Uint16
NdbResultStream::findTupleWithParentId(Uint16 parentId) const
{
assert ((parentId==tupleNotFound) == (m_parent==NULL));
if (likely(m_resultSets[m_read].m_rowCount>0))
{
if (m_tupleSet==NULL)
{
assert (m_resultSets[m_read].m_rowCount <= 1);
return 0;
}
const Uint16 hash = (parentId % m_maxRows);
Uint16 currentRow = m_tupleSet[hash].m_hash_head;
while (currentRow != tupleNotFound)
{
assert(currentRow < m_maxRows);
if (m_tupleSet[currentRow].m_skip == false &&
m_tupleSet[currentRow].m_parentId == parentId)
{
return currentRow;
}
currentRow = m_tupleSet[currentRow].m_hash_next;
}
}
return tupleNotFound;
} //NdbResultStream::findTupleWithParentId()
/** Locate, and return 'tupleNo', of next tuple with same parentId as currentRow
* Position of 'currentRow' is *not* updated and should be modified by callee
* if it want to keep the new position.
*/
Uint16
NdbResultStream::findNextTuple(Uint16 tupleNo) const
{
if (tupleNo!=tupleNotFound && m_tupleSet!=NULL)
{
assert(tupleNo < m_maxRows);
Uint16 parentId = m_tupleSet[tupleNo].m_parentId;
Uint16 nextRow = m_tupleSet[tupleNo].m_hash_next;
while (nextRow != tupleNotFound)
{
assert(nextRow < m_maxRows);
if (m_tupleSet[nextRow].m_skip == false &&
m_tupleSet[nextRow].m_parentId == parentId)
{
return nextRow;
}
nextRow = m_tupleSet[nextRow].m_hash_next;
}
}
return tupleNotFound;
} //NdbResultStream::findNextTuple()
Uint16
NdbResultStream::firstResult()
{
Uint16 parentId = tupleNotFound;
if (m_parent!=NULL)
{
parentId = m_parent->getCurrentTupleId();
if (parentId == tupleNotFound)
{
m_currentRow = tupleNotFound;
m_iterState = Iter_finished;
return tupleNotFound;
}
}
if ((m_currentRow=findTupleWithParentId(parentId)) != tupleNotFound)
{
m_iterState = Iter_started;
m_receiver.setCurrentRow(m_resultSets[m_read].m_buffer, m_currentRow);
return m_currentRow;
}
m_iterState = Iter_finished;
return tupleNotFound;
} //NdbResultStream::firstResult()
Uint16
NdbResultStream::nextResult()
{
// Fetch next row for this stream
if (m_currentRow != tupleNotFound &&
(m_currentRow=findNextTuple(m_currentRow)) != tupleNotFound)
{
m_iterState = Iter_started;
m_receiver.setCurrentRow(m_resultSets[m_read].m_buffer, m_currentRow);
return m_currentRow;
}
m_iterState = Iter_finished;
return tupleNotFound;
} //NdbResultStream::nextResult()
/**
* Callback when a TRANSID_AI signal (receive row) is processed.
*/
void
NdbResultStream::execTRANSID_AI(const Uint32 *ptr, Uint32 len,
TupleCorrelation correlation)
{
NdbResultSet& receiveSet = m_resultSets[m_recv];
if (isScanQuery())
{
/**
* Store TupleCorrelation.
*/
receiveSet.m_correlations[receiveSet.m_rowCount] = correlation;
}
m_receiver.execTRANSID_AI(ptr, len);
receiveSet.m_rowCount++;
} // NdbResultStream::execTRANSID_AI()
/**
* Make preparation for another batch of results to be received.
* This NdbResultStream, and all its sibling will receive a batch
* of results from the datanodes.
*/
void
NdbResultStream::prepareNextReceiveSet()
{
assert (isScanQuery());
m_iterState = Iter_notStarted;
m_currentRow = tupleNotFound;
m_resultSets[m_recv].prepareReceive(m_receiver);
/**
* If this stream will get new rows in the next batch, then so will
* all of its descendants.
*/
for (Uint32 childNo = 0; childNo < m_operation.getNoOfChildOperations();
childNo++)
{
NdbQueryOperationImpl& child = m_operation.getChildOperation(childNo);
m_rootFrag.getResultStream(child).prepareNextReceiveSet();
}
} //NdbResultStream::prepareNextReceiveSet
/**
* Make preparations for another batch of result to be read:
* - Fill in parent/child result correlations in m_tupleSet[]
* - ... or reset m_tupleSet[] if we reuse the previous.
* - Apply inner/outer join filtering to remove non qualifying
* rows.
*/
bool
NdbResultStream::prepareResultSet(Uint32 remainingScans)
{
bool isComplete = isSubScanComplete(remainingScans); //Childs with more rows
assert(isComplete || isScanResult()); //Lookups always 'complete'
m_read = m_recv;
NdbResultSet& readResult = m_resultSets[m_read];
// Set correct buffer and #rows received by this ResultSet.
readResult.prepareRead(m_receiver);
/**
* Prepare NdbResultSet for reading - either the next received
* from datanodes or reuse the last as has been determined by
* ::prepareNextReceiveSet()
*/
if (m_tupleSet!=NULL)
{
const bool newResults = (m_iterState!=Iter_finished);
if (newResults)
{
buildResultCorrelations();
}
else
{
// Makes all rows in 'TupleSet' available (clear 'm_skip' flag)
for (Uint32 tupleNo=0; tupleNo<readResult.getRowCount(); tupleNo++)
{
m_tupleSet[tupleNo].m_skip = false;
}
}
}
/**
* Recursively iterate all child results depth first.
* Filter away any result rows which should not be visible (yet) -
* Either due to incomplete child batches, or the join being an 'inner join'.
* Set result itterator state to 'before first' resultrow.
*/
for (Uint32 childNo=0; childNo < m_operation.getNoOfChildOperations(); childNo++)
{
const NdbQueryOperationImpl& child = m_operation.getChildOperation(childNo);
NdbResultStream& childStream = m_rootFrag.getResultStream(child);
const bool allSubScansComplete = childStream.prepareResultSet(remainingScans);
Uint32 childId = child.getQueryOperationDef().getQueryOperationIx();
/* Condition 1) & 2) calc'ed outside loop, see comments further below: */
const bool skipNonMatches = !allSubScansComplete || // 1)
childStream.isInnerJoin(); // 2)
if (m_tupleSet!=NULL)
{
for (Uint32 tupleNo=0; tupleNo<readResult.getRowCount(); tupleNo++)
{
if (!m_tupleSet[tupleNo].m_skip)
{
Uint16 tupleId = getTupleId(tupleNo);
if (childStream.findTupleWithParentId(tupleId)!=tupleNotFound)
m_tupleSet[tupleNo].m_hasMatchingChild.set(childId);
/////////////////////////////////
// No child matched for this row. Making parent row visible
// will cause a NULL (outer join) row to be produced.
// Skip NULL row production when:
// 1) Some child batches are not complete; they may contain later matches.
// 2) Join type is 'inner join', skip as no child are matching.
// 3) A match was found in a previous batch.
// Condition 1) & 2) above is precalculated in 'bool skipNonMatches'
//
else if (skipNonMatches // 1 & 2)