Skip to content

Commit 2f8d658

Browse files
yf225ezyang
authored andcommitted
Store perf numbers in S3 (#5951)
* Store perf numbers in S3 Previously the perf numbers are stored in https://github.com/yf225/perf-tests/tree/cpu, but we couldn't figure out a way to push the perf numbers only from master builds. This PR moves the perf number storage to S3, which allows us to have finer control over when to push the new numbers. This is in replacement of #5844 - storing numbers in RDS has its own problems with schema migration and backward compatibility, and using a NoSQL database might be an overkill at this point. * Fixed issues
1 parent 332d5ff commit 2f8d658

File tree

3 files changed

+64
-42
lines changed

3 files changed

+64
-42
lines changed

.jenkins/perf_test/compare_with_baseline.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,13 +19,18 @@
1919
elif 'gpu' in test_name:
2020
backend = 'gpu'
2121

22-
data_file_path = '../perf_test_numbers_{}.json'.format(backend)
22+
data_file_path = '../{}_runtime.json'.format(backend)
2323

2424
with open(data_file_path) as data_file:
2525
data = json.load(data_file)
2626

27-
mean = float(data[test_name]['mean'])
28-
sigma = float(data[test_name]['sigma'])
27+
if test_name in data:
28+
mean = float(data[test_name]['mean'])
29+
sigma = float(data[test_name]['sigma'])
30+
else:
31+
# Let the test pass if baseline number doesn't exist
32+
mean = sys.maxsize
33+
sigma = 0.001
2934

3035
print("population mean: ", mean)
3136
print("population sigma: ", sigma)
@@ -51,9 +56,10 @@
5156
print("z-value < 2, no perf regression detected.")
5257
if args.update:
5358
print("We will use these numbers as new baseline.")
54-
new_data_file_path = '../new_perf_test_numbers_{}.json'.format(backend)
59+
new_data_file_path = '../new_{}_runtime.json'.format(backend)
5560
with open(new_data_file_path) as new_data_file:
5661
new_data = json.load(new_data_file)
62+
new_data[test_name] = {}
5763
new_data[test_name]['mean'] = sample_mean
5864
new_data[test_name]['sigma'] = max(sample_sigma, sample_mean * 0.01)
5965
with open(new_data_file_path, 'w') as new_data_file:

.jenkins/short-perf-test-cpu.sh

Lines changed: 27 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -7,41 +7,49 @@ cd .jenkins/perf_test
77

88
echo "Running CPU perf test for PyTorch..."
99

10-
# Get last master commit hash
11-
export PYTORCH_COMMIT_ID=$(git log --format="%H" -n 1)
10+
pip install awscli
1211

13-
# Get baseline file from https://github.com/yf225/perf-tests
14-
if [ -f /var/lib/jenkins/host-workspace/perf_test_numbers_cpu.json ]; then
15-
cp /var/lib/jenkins/host-workspace/perf_test_numbers_cpu.json perf_test_numbers_cpu.json
16-
else
17-
curl https://raw.githubusercontent.com/yf225/perf-tests/master/perf_test_numbers_cpu.json -O
12+
# Set multipart_threshold to be sufficiently high, so that `aws s3 cp` is not a multipart read
13+
# More info at https://github.com/aws/aws-cli/issues/2321
14+
aws configure set default.s3.multipart_threshold 5GB
15+
16+
if [[ "$COMMIT_SOURCE" == master ]]; then
17+
# Get current master commit hash
18+
export MASTER_COMMIT_ID=$(git log --format="%H" -n 1)
1819
fi
1920

20-
if [[ "$GIT_COMMIT" == *origin/master* ]]; then
21+
# Find the master commit to test against
22+
IFS=$'\n'
23+
master_commit_ids=($(git rev-list HEAD))
24+
for commit_id in "${master_commit_ids[@]}"; do
25+
if aws s3 ls s3://ossci-perf-test/pytorch/cpu_runtime/${commit_id}.json; then
26+
LATEST_TESTED_COMMIT=${commit_id}
27+
break
28+
fi
29+
done
30+
aws s3 cp s3://ossci-perf-test/pytorch/cpu_runtime/${LATEST_TESTED_COMMIT}.json cpu_runtime.json
31+
32+
if [[ "$COMMIT_SOURCE" == master ]]; then
2133
# Prepare new baseline file
22-
cp perf_test_numbers_cpu.json new_perf_test_numbers_cpu.json
23-
python update_commit_hash.py new_perf_test_numbers_cpu.json ${PYTORCH_COMMIT_ID}
34+
cp cpu_runtime.json new_cpu_runtime.json
35+
python update_commit_hash.py new_cpu_runtime.json ${MASTER_COMMIT_ID}
2436
fi
2537

2638
# Include tests
2739
. ./test_cpu_speed_mini_sequence_labeler.sh
2840
. ./test_cpu_speed_mnist.sh
2941

3042
# Run tests
31-
if [[ "$GIT_COMMIT" == *origin/master* ]]; then
43+
if [[ "$COMMIT_SOURCE" == master ]]; then
3244
run_test test_cpu_speed_mini_sequence_labeler 20 compare_and_update
3345
run_test test_cpu_speed_mnist 20 compare_and_update
3446
else
3547
run_test test_cpu_speed_mini_sequence_labeler 20 compare_with_baseline
3648
run_test test_cpu_speed_mnist 20 compare_with_baseline
3749
fi
3850

39-
if [[ "$GIT_COMMIT" == *origin/master* ]]; then
40-
# Push new baseline file
41-
cp new_perf_test_numbers_cpu.json /var/lib/jenkins/host-workspace/perf_test_numbers_cpu.json
42-
cd /var/lib/jenkins/host-workspace
43-
git config --global user.email jenkins@ci.pytorch.org
44-
git config --global user.name Jenkins
45-
git add perf_test_numbers_cpu.json
46-
git commit -m "New CPU perf test baseline from ${PYTORCH_COMMIT_ID}"
51+
if [[ "$COMMIT_SOURCE" == master ]]; then
52+
# This could cause race condition if we are testing the same master commit twice,
53+
# but the chance of them executing this line at the same time is low.
54+
aws s3 cp new_cpu_runtime.json s3://ossci-perf-test/pytorch/cpu_runtime/${MASTER_COMMIT_ID}.json --acl public-read
4755
fi

.jenkins/short-perf-test-gpu.sh

Lines changed: 27 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -7,20 +7,32 @@ cd .jenkins/perf_test
77

88
echo "Running GPU perf test for PyTorch..."
99

10-
# Get last master commit hash
11-
export PYTORCH_COMMIT_ID=$(git log --format="%H" -n 1)
10+
pip install awscli
1211

13-
# Get baseline file from https://github.com/yf225/perf-tests
14-
if [ -f /var/lib/jenkins/host-workspace/perf_test_numbers_gpu.json ]; then
15-
cp /var/lib/jenkins/host-workspace/perf_test_numbers_gpu.json perf_test_numbers_gpu.json
16-
else
17-
curl https://raw.githubusercontent.com/yf225/perf-tests/master/perf_test_numbers_gpu.json -O
12+
# Set multipart_threshold to be sufficiently high, so that `aws s3 cp` is not a multipart read
13+
# More info at https://github.com/aws/aws-cli/issues/2321
14+
aws configure set default.s3.multipart_threshold 5GB
15+
16+
if [[ "$COMMIT_SOURCE" == master ]]; then
17+
# Get current master commit hash
18+
export MASTER_COMMIT_ID=$(git log --format="%H" -n 1)
1819
fi
1920

20-
if [[ "$GIT_COMMIT" == *origin/master* ]]; then
21+
# Find the master commit to test against
22+
IFS=$'\n'
23+
master_commit_ids=($(git rev-list HEAD))
24+
for commit_id in "${master_commit_ids[@]}"; do
25+
if aws s3 ls s3://ossci-perf-test/pytorch/gpu_runtime/${commit_id}.json; then
26+
LATEST_TESTED_COMMIT=${commit_id}
27+
break
28+
fi
29+
done
30+
aws s3 cp s3://ossci-perf-test/pytorch/gpu_runtime/${LATEST_TESTED_COMMIT}.json gpu_runtime.json
31+
32+
if [[ "$COMMIT_SOURCE" == master ]]; then
2133
# Prepare new baseline file
22-
cp perf_test_numbers_gpu.json new_perf_test_numbers_gpu.json
23-
python update_commit_hash.py new_perf_test_numbers_gpu.json ${PYTORCH_COMMIT_ID}
34+
cp gpu_runtime.json new_gpu_runtime.json
35+
python update_commit_hash.py new_gpu_runtime.json ${MASTER_COMMIT_ID}
2436
fi
2537

2638
# Include tests
@@ -31,7 +43,7 @@ fi
3143
. ./test_gpu_speed_mlstm.sh
3244

3345
# Run tests
34-
if [[ "$GIT_COMMIT" == *origin/master* ]]; then
46+
if [[ "$COMMIT_SOURCE" == master ]]; then
3547
run_test test_gpu_speed_mnist 20 compare_and_update
3648
run_test test_gpu_speed_word_language_model 20 compare_and_update
3749
run_test test_gpu_speed_cudnn_lstm 20 compare_and_update
@@ -45,12 +57,8 @@ else
4557
run_test test_gpu_speed_mlstm 20 compare_with_baseline
4658
fi
4759

48-
if [[ "$GIT_COMMIT" == *origin/master* ]]; then
49-
# Push new baseline file
50-
cp new_perf_test_numbers_gpu.json /var/lib/jenkins/host-workspace/perf_test_numbers_gpu.json
51-
cd /var/lib/jenkins/host-workspace
52-
git config --global user.email jenkins@ci.pytorch.org
53-
git config --global user.name Jenkins
54-
git add perf_test_numbers_gpu.json
55-
git commit -m "New GPU perf test baseline from ${PYTORCH_COMMIT_ID}"
60+
if [[ "$COMMIT_SOURCE" == master ]]; then
61+
# This could cause race condition if we are testing the same master commit twice,
62+
# but the chance of them executing this line at the same time is low.
63+
aws s3 cp new_gpu_runtime.json s3://ossci-perf-test/pytorch/gpu_runtime/${MASTER_COMMIT_ID}.json --acl public-read
5664
fi

0 commit comments

Comments
 (0)