Skip to content

Commit d74ae2a

Browse files
authored
[6.7][ML] Improve autodetect logic for persistence (elastic#437) (elastic#440)
Changed the logic surrounding persistence of both state and quantiles on graceful shutdown so that persistence only occurs if and only if at least one input record has been processed or time has been advanced. Backport elastic#437
1 parent 9d03ecd commit d74ae2a

17 files changed

+183
-5
lines changed

docs/CHANGELOG.asciidoc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ Adjust seccomp filter for Fedora 29. {ml-pull}354[#354]
4242
4343
=== Bug Fixes
4444
45-
=== Regressions
45+
* Improve autodetect logic for persistence. {ml-pull}437[#437]
4646
4747
== {es} version 6.6.2
4848

include/api/CAnomalyJob.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,9 @@ class API_EXPORT CAnomalyJob : public CDataProcessor {
182182
//! How many records did we handle?
183183
virtual uint64_t numRecordsHandled() const;
184184

185+
//! Is persistence needed?
186+
virtual bool isPersistenceNeeded(const std::string& description) const;
187+
185188
//! Log a list of the detectors and keys
186189
void description() const;
187190

@@ -454,6 +457,9 @@ class API_EXPORT CAnomalyJob : public CDataProcessor {
454457
//! The hierarchical results normalizer.
455458
model::CHierarchicalResultsNormalizer m_Normalizer;
456459

460+
//! Flag indicating whether or not time has been advanced.
461+
bool m_TimeAdvanced{false};
462+
457463
friend class ::CBackgroundPersisterTest;
458464
friend class ::CAnomalyJobTest;
459465
};

include/api/CDataProcessor.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,9 @@ class API_EXPORT CDataProcessor : private core::CNonCopyable {
8282
//! Access the output handler
8383
virtual COutputHandler& outputHandler() = 0;
8484

85+
//! Is persistence needed?
86+
virtual bool isPersistenceNeeded(const std::string& description) const = 0;
87+
8588
//! Create debug for a record. This is expensive so should NOT be
8689
//! called for every record as a matter of course.
8790
static std::string debugPrintRecord(const TStrStrUMap& dataRowFields);

include/api/CFieldDataTyper.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,9 @@ class API_EXPORT CFieldDataTyper : public CDataProcessor {
9898
virtual bool restoreState(core::CDataSearcher& restoreSearcher,
9999
core_t::TTime& completeToTime);
100100

101+
//! Is persistence needed?
102+
virtual bool isPersistenceNeeded(const std::string& description) const;
103+
101104
//! Persist current state
102105
virtual bool persistState(core::CDataAdder& persister);
103106

include/api/COutputChainer.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,9 @@ class API_EXPORT COutputChainer : public COutputHandler {
8181
//! Persist current state due to the periodic persistence being triggered.
8282
virtual bool periodicPersistState(CBackgroundPersister& persister);
8383

84+
//! Is persistence needed?
85+
virtual bool isPersistenceNeeded(const std::string& description) const;
86+
8487
//! The chainer does consume control messages, because it passes them on
8588
//! to whatever processor it's chained to.
8689
virtual bool consumesControlMessages();

include/api/COutputHandler.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,9 @@ class API_EXPORT COutputHandler : private core::CNonCopyable {
9797
//! Persist current state due to the periodic persistence being triggered.
9898
virtual bool periodicPersistState(CBackgroundPersister& persister);
9999

100+
//! Is persistence needed?
101+
virtual bool isPersistenceNeeded(const std::string& description) const;
102+
100103
//! Does this handler deal with control messages?
101104
virtual bool consumesControlMessages();
102105

include/config/CAutoconfigurer.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,9 @@ class CONFIG_EXPORT CAutoconfigurer : public api::CDataProcessor {
4646
//! Generate the report.
4747
virtual void finalise();
4848

49+
//! Is persistence needed?
50+
virtual bool isPersistenceNeeded(const std::string& description) const;
51+
4952
//! No-op.
5053
virtual bool restoreState(core::CDataSearcher& restoreSearcher,
5154
core_t::TTime& completeToTime);

lib/api/CAnomalyJob.cc

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -216,8 +216,10 @@ bool CAnomalyJob::handleRecord(const TStrStrUMap& dataRowFields) {
216216
}
217217

218218
void CAnomalyJob::finalise() {
219-
// Persist final state of normalizer
220-
m_JsonOutputWriter.persistNormalizer(m_Normalizer, m_LastNormalizerPersistTime);
219+
// Persist final state of normalizer iff an input record has been handled or time has been advanced.
220+
if (this->isPersistenceNeeded("quantiles state")) {
221+
m_JsonOutputWriter.persistNormalizer(m_Normalizer, m_LastNormalizerPersistTime);
222+
}
221223

222224
// Prune the models so that the final persisted state is as neat as possible
223225
this->pruneAllModels();
@@ -392,11 +394,23 @@ void CAnomalyJob::advanceTime(const std::string& time_) {
392394
LOG_TRACE(<< "Received request to advance time to " << time);
393395
}
394396

397+
m_TimeAdvanced = true;
398+
395399
this->outputBucketResultsUntil(time);
396400

397401
this->timeNow(time);
398402
}
399403

404+
bool CAnomalyJob::isPersistenceNeeded(const std::string& description) const {
405+
if ((m_NumRecordsHandled == 0) && (m_TimeAdvanced == false)) {
406+
LOG_DEBUG(<< "Will not attempt to persist " << description
407+
<< ". Zero records were handled and time has not been advanced.");
408+
return false;
409+
}
410+
411+
return true;
412+
}
413+
400414
void CAnomalyJob::outputBucketResultsUntil(core_t::TTime time) {
401415
// If the bucket time has increased, output results for all field names
402416
core_t::TTime bucketLength = m_ModelConfig.bucketLength();

lib/api/CCmdSkeleton.cc

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,7 @@ bool CCmdSkeleton::persistState() {
5656
return true;
5757
}
5858

59-
if (m_Processor.numRecordsHandled() == 0) {
60-
LOG_DEBUG(<< "Zero records were handled - will not attempt to persist state");
59+
if (m_Processor.isPersistenceNeeded("state") == false) {
6160
return true;
6261
}
6362

lib/api/CFieldDataTyper.cc

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -335,6 +335,20 @@ bool CFieldDataTyper::persistState(core::CDataAdder& persister) {
335335
return this->doPersistState(m_DataTyper->makePersistFunc(), m_ExamplesCollector, persister);
336336
}
337337

338+
bool CFieldDataTyper::isPersistenceNeeded(const std::string& description) const {
339+
// Pass on the request in case we're chained
340+
if (m_OutputHandler.isPersistenceNeeded(description)) {
341+
return true;
342+
}
343+
344+
if (m_NumRecordsHandled == 0) {
345+
LOG_DEBUG(<< "Zero records were handled - will not attempt to persist "
346+
<< description << ".");
347+
return false;
348+
}
349+
return true;
350+
}
351+
338352
bool CFieldDataTyper::doPersistState(const CDataTyper::TPersistFunc& dataTyperPersistFunc,
339353
const CCategoryExamplesCollector& examplesCollector,
340354
core::CDataAdder& persister) {

0 commit comments

Comments
 (0)