Skip to content

Commit be78652

Browse files
committed
In ProvDBpruneCore/ProvDBprune, algorithm initialization now performed in constructor (changes to factory function to support)
Fixed PSshardProvenanceDBclient not defining handshakes (use empty function body as not needed) Added a unit test for ProvDBprune that tests pruning and stats rebuild for multiple shards
1 parent 0687766 commit be78652

File tree

9 files changed

+163
-11
lines changed

9 files changed

+163
-11
lines changed

include/chimbuko/core/provdb/ProvDBpruneCore.hpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,9 @@ namespace chimbuko {
1010

1111
class ProvDBpruneCore{
1212
public:
13-
void prune(const std::string &algorithm, const ADOutlier::AlgoParams &algo_params, const std::string &params_ser, sonata::Database &db);
13+
ProvDBpruneCore(const std::string &algorithm, const ADOutlier::AlgoParams &algo_params, const std::string &model_ser);
14+
15+
void prune(sonata::Database &db);
1416

1517
/**
1618
* @brief Module implementation of pruning given the database shard. This same instance of ProvDBpruneCore will be called in turn for all shards
@@ -23,6 +25,8 @@ namespace chimbuko {
2325
*/
2426
virtual void finalize(sonata::Database &global_db){}
2527

28+
public:
29+
std::unique_ptr<ADOutlier> m_outlier;
2630
};
2731

2832
}

include/chimbuko/core/pserver/PSshardProvenanceDBclient.hpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,17 @@ namespace chimbuko{
2222
* @param addr_file_dir The directory containing the address files created by the provDB
2323
*/
2424
void connectShard(const std::string &addr_file_dir, int shard, int nshards, int ninstances);
25+
26+
/**
27+
* @brief No handshake is needed
28+
*/
29+
void handshakeHello(thallium::engine &eng, thallium::endpoint &server) override{};
30+
31+
/**
32+
* @brief No handshake is needed.
33+
*/
34+
void handshakeGoodbye(thallium::engine &eng, thallium::endpoint &server) override{};
35+
2536
};
2637

2738
}

include/chimbuko/modules/factory.hpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,12 @@ namespace chimbuko{
1919

2020
/**
2121
*@brief A factory function for ProvDBpruneCore instances
22+
*@param algorithm The AD algorithm
23+
*@param algo_params Parameters for the algoritm
24+
*@param model_ser The serialized model
2225
*/
23-
std::unique_ptr<ProvDBpruneCore> factoryInstantiateProvDBprune(const std::string &module);
26+
std::unique_ptr<ProvDBpruneCore> factoryInstantiateProvDBprune(const std::string &module,
27+
const std::string &algorithm, const ADOutlier::AlgoParams &algo_params, const std::string &model_ser);
2428

2529
/**
2630
*@brief A factory function for PSmoduleDataManager instances

include/chimbuko/modules/performance_analysis/provdb/ProvDBprune.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ namespace chimbuko {
1414

1515
class ProvDBprune: public ProvDBpruneCore{
1616
public:
17+
ProvDBprune(const std::string &algorithm, const ADOutlier::AlgoParams &algo_params, const std::string &model_ser): ProvDBpruneCore(algorithm,algo_params,model_ser){}
18+
1719
/**
1820
* @brief Prune the database shard. Both the anomalies and normalexecs will be updated
1921
*/

src/core/provdb/ProvDBpruneCore.cpp

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,11 @@
44

55
using namespace chimbuko;
66

7-
void ProvDBpruneCore::prune(const std::string &algorithm, const ADOutlier::AlgoParams &algo_params, const std::string &params_ser, sonata::Database &db){
8-
std::unique_ptr<ADOutlier> ad(ADOutlier::set_algorithm(0,algorithm,algo_params));
9-
ad->setGlobalParameters(params_ser); //input model
10-
ad->setGlobalModelSyncFrequency(0); //fix model
11-
12-
this->pruneImplementation(*ad, db);
7+
ProvDBpruneCore::ProvDBpruneCore(const std::string &algorithm, const ADOutlier::AlgoParams &algo_params, const std::string &model_ser): m_outlier(ADOutlier::set_algorithm(0,algorithm,algo_params)){
8+
m_outlier->setGlobalParameters(model_ser); //input model
9+
m_outlier->setGlobalModelSyncFrequency(0); //fix model
10+
}
11+
12+
void ProvDBpruneCore::prune(sonata::Database &db){
13+
this->pruneImplementation(*m_outlier, db);
1314
}

src/modules/factory.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,10 @@ std::unique_ptr<chimbuko::PSmoduleDataManagerCore> chimbuko::modules::factoryIns
2929
}
3030
}
3131

32-
std::unique_ptr<chimbuko::ProvDBpruneCore> chimbuko::modules::factoryInstantiateProvDBprune(const std::string &module){
32+
std::unique_ptr<chimbuko::ProvDBpruneCore> chimbuko::modules::factoryInstantiateProvDBprune(const std::string &module,
33+
const std::string &algorithm, const ADOutlier::AlgoParams &algo_params, const std::string &model_ser){
3334
if(module == "performance_analysis"){
34-
return std::unique_ptr<ProvDBpruneCore>(new performance_analysis::ProvDBprune);
35+
return std::unique_ptr<ProvDBpruneCore>(new performance_analysis::ProvDBprune(algorithm,algo_params,model_ser) );
3536
}else{
3637
fatal_error("Unknown module");
3738
}

test/unit_tests/modules/performance_analysis/provdb/Makefile.am

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,10 @@ AM_CPPFLAGS = -I$(top_srcdir)/include -I$(top_srcdir)/3rdparty @PS_FLAGS@
22
LDADD = $(top_builddir)/src/libchimbuko.la -lgtest -lstdc++fs
33

44
testdir = $(prefix)/test/unit_tests/modules/performance_analysis/provdb
5-
test_PROGRAMS = ProvDBpruneInterface
5+
test_PROGRAMS = ProvDBpruneInterface ProvDBprune
66

77
ProvDBpruneInterface_SOURCES = ProvDBpruneInterface.cpp ../../../unit_test_main_mpi.cpp ProvDBtester.cpp
88
ProvDBpruneInterface_LDADD = $(LDADD)
9+
10+
ProvDBprune_SOURCES = ProvDBprune.cpp ../../../unit_test_main_mpi.cpp ProvDBtester.cpp
11+
ProvDBprune_LDADD = $(LDADD)
Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
#include<chimbuko/modules/performance_analysis/provdb/ProvDBprune.hpp>
2+
#include<chimbuko/modules/performance_analysis/provdb/ProvDBmoduleSetup.hpp>
3+
#include<chimbuko/core/pserver/PSshardProvenanceDBclient.hpp>
4+
#include<chimbuko/core/pserver/PSglobalProvenanceDBclient.hpp>
5+
6+
#include "gtest/gtest.h"
7+
#include "../../../unit_test_common.hpp"
8+
#include "ProvDBtester.hpp"
9+
10+
using namespace chimbuko;
11+
using namespace chimbuko::modules::performance_analysis;
12+
13+
TEST(TestProvDBprune, works){
14+
int nshards = 2;
15+
ProvDBmoduleSetup setup;
16+
ProvDBtester pdb(nshards, setup);
17+
18+
{
19+
std::ofstream of("/tmp/provider.address.0");
20+
of << pdb.getAddr();
21+
}
22+
std::vector<std::unique_ptr<PSshardProvenanceDBclient> > shard_clients(nshards);
23+
for(int i=0;i<nshards;i++){
24+
shard_clients[i].reset(new PSshardProvenanceDBclient(setup.getMainDBcollections()));
25+
shard_clients[i]->connectShard("/tmp",i,nshards,1);
26+
}
27+
28+
PSglobalProvenanceDBclient glob_client(setup.getGlobalDBcollections());
29+
glob_client.connectServer(pdb.getAddr());
30+
31+
//Put some anomaly data on the shards. Use both shards to check aggregation
32+
nlohmann::json anom, norm;
33+
34+
//event that should be removed
35+
norm["runtime_exclusive"] = 100;
36+
norm["fid"] = 1234;
37+
norm["blah"] = "norm";
38+
39+
shard_clients[0]->sendData(norm, "anomalies");
40+
41+
//event that should be kept
42+
anom["runtime_exclusive"] = 1000;
43+
anom["fid"] = 1234;
44+
anom["blah"] = "real_anom1";
45+
anom["entry"] = 33; //need this info to gather anomaly metrics on kept anomalies
46+
anom["exit"] = 1033;
47+
anom["io_step"] = 13;
48+
anom["outlier_severity"] = 1000;
49+
anom["rid"] = 88;
50+
51+
shard_clients[0]->sendData(anom, "anomalies");
52+
53+
//event that should be removed
54+
norm["runtime_exclusive"] = 103;
55+
norm["fid"] = 1234;
56+
norm["blah"] = "norm2";
57+
58+
shard_clients[1]->sendData(norm, "anomalies");
59+
60+
//event that should be kept
61+
anom["runtime_exclusive"] = 1200;
62+
anom["fid"] = 1234;
63+
anom["blah"] = "real_anom2";
64+
anom["entry"] = 44;
65+
anom["exit"] = 1244;
66+
anom["io_step"] = 17;
67+
anom["outlier_severity"] = 1200;
68+
anom["rid"] = 88;
69+
70+
shard_clients[1]->sendData(anom, "anomalies");
71+
72+
//populate the global database
73+
nlohmann::json fstats;
74+
fstats["fid"] = 1234;
75+
glob_client.sendData(fstats, "func_stats");
76+
77+
double mean = 100;
78+
double stddev = 10;
79+
int count = 1000;
80+
81+
SstdParam param;
82+
param[1234].set_eta(mean);
83+
param[1234].set_rho(pow(stddev,2) * (count-1) );
84+
param[1234].set_count(count);
85+
ADOutlier::AlgoParams ap; ap.sstd_sigma = 5;
86+
87+
//Do the business
88+
ProvDBprune pruner("sstd", ap, param.serialize());
89+
for(int i=0;i<nshards;i++) pruner.prune(shard_clients[i]->getDatabase());
90+
pruner.finalize(glob_client.getDatabase());
91+
92+
//check the shards
93+
{
94+
auto sdata = shard_clients[0]->retrieveAllData("anomalies");
95+
EXPECT_EQ(sdata.size(),1);
96+
auto s = nlohmann::json::parse(sdata[0]);
97+
EXPECT_NEAR(s["outlier_score"].template get<double>(), 90, 1e-5);
98+
}
99+
{
100+
auto sdata = shard_clients[1]->retrieveAllData("anomalies");
101+
EXPECT_EQ(sdata.size(),1);
102+
auto s = nlohmann::json::parse(sdata[0]);
103+
EXPECT_NEAR(s["outlier_score"].template get<double>(), 110, 1e-5);
104+
}
105+
106+
//check the global database
107+
auto glob_data = glob_client.retrieveAllData("func_stats");
108+
EXPECT_EQ(glob_data.size(), 1);
109+
for(auto const &e : glob_data){
110+
nlohmann::json je = nlohmann::json::parse(e);
111+
std::cout << je.dump(4);
112+
113+
EXPECT_EQ(je["anomaly_metrics"]["first_io_step"].template get<int>(), 13);
114+
EXPECT_EQ(je["anomaly_metrics"]["last_io_step"].template get<int>(), 17);
115+
EXPECT_EQ(je["anomaly_metrics"]["min_timestamp"].template get<unsigned long>(), 33);
116+
EXPECT_EQ(je["anomaly_metrics"]["max_timestamp"].template get<unsigned long>(), 1244);
117+
EXPECT_EQ(je["anomaly_metrics"]["score"]["count"].template get<int>(), 2);
118+
EXPECT_EQ(je["anomaly_metrics"]["severity"]["count"].template get<int>(), 2);
119+
EXPECT_EQ(je["anomaly_metrics"]["severity"]["accumulate"].template get<unsigned long>(), 2200);
120+
EXPECT_EQ(je["anomaly_metrics"]["anomaly_count"]["count"].template get<int>(), 2); //2 different timesteps
121+
EXPECT_EQ(je["anomaly_metrics"]["anomaly_count"]["accumulate"].template get<unsigned long>(), 2);
122+
}
123+
}
124+
125+

test/unit_tests/run_all.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ set -o pipefail
4646
./modules/performance_analysis/ad/COPODOutlier
4747
./modules/performance_analysis/ad/FuncAnomalyMetrics
4848
./modules/performance_analysis/provdb/ProvDBpruneInterface
49+
./modules/performance_analysis/provdb/ProvDBprune
4950
./modules/performance_analysis/pserver/GlobalCounterStats
5051
./modules/performance_analysis/pserver/GlobalAnomalyStats
5152
./modules/performance_analysis/pserver/GlobalAnomalyMetrics

0 commit comments

Comments
 (0)