Skip to content

Commit 75acaba

Browse files
committed
ARROW-2577: [Plasma] Add asv benchmarks for plasma
This adds some initial ASV benchmarks for plasma: - Put latency - Get latency - Put throughput for 1KB, 10KB, 100KB, 1MB, 10MB, 100MB It also includes some minor code restructuring to expose the start_plasma_store method. Author: Philipp Moritz <pcmoritz@gmail.com> Closes apache#2038 from pcmoritz/plasma-asv and squashes the following commits: 34a0684 <Philipp Moritz> measure wallclock time instead of process cpu time c89256f <Philipp Moritz> parametrize tests 3567ddc <Philipp Moritz> fix windows build eca1767 <Philipp Moritz> build plasma in asv 47671b3 <Philipp Moritz> fix 1261177 <Philipp Moritz> fix linting errors 7d4d685 <Philipp Moritz> Add asv benchmarks for plasma
1 parent 63da09a commit 75acaba

7 files changed

Lines changed: 179 additions & 82 deletions

File tree

python/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -404,7 +404,7 @@ if (PYARROW_BUILD_PLASMA)
404404
${LINK_LIBS}
405405
libplasma_shared)
406406

407-
set(CYTHON_EXTENSIONS ${CYTHON_EXTENSIONS} plasma)
407+
set(CYTHON_EXTENSIONS ${CYTHON_EXTENSIONS} _plasma)
408408
file(COPY ${PLASMA_EXECUTABLE} DESTINATION ${BUILD_OUTPUT_ROOT_DIRECTORY})
409409
endif()
410410

python/asv-build.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ cmake -GNinja \
3737
-DCMAKE_INSTALL_PREFIX=$ARROW_HOME \
3838
-DARROW_CXXFLAGS=$CXXFLAGS \
3939
-DARROW_PYTHON=ON \
40+
-DARROW_PLASMA=ON \
4041
-DARROW_BUILD_TESTS=OFF \
4142
..
4243
cmake --build . --target install
@@ -47,6 +48,7 @@ popd
4748
# Build pyarrow wrappers
4849
export SETUPTOOLS_SCM_PRETEND_VERSION=0.0.1
4950
export PYARROW_BUILD_TYPE=release
51+
export PYARROW_WITH_PLASMA=1
5052

5153
python setup.py clean
5254
find pyarrow -name "*.so" -delete

python/benchmarks/plasma.py

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
import numpy as np
19+
import timeit
20+
21+
import pyarrow as pa
22+
import pyarrow.plasma as plasma
23+
24+
from . import common
25+
26+
27+
class SimplePlasmaThroughput(object):
28+
"""Benchmark plasma store throughput with a single client."""
29+
30+
params = [1000, 100000, 10000000]
31+
32+
timer = timeit.default_timer
33+
34+
def setup(self, size):
35+
self.plasma_store_ctx = plasma.start_plasma_store(plasma_store_memory=10**9)
36+
plasma_store_name, p = self.plasma_store_ctx.__enter__()
37+
self.plasma_client = plasma.connect(plasma_store_name, "", 64)
38+
39+
self.data = np.random.randn(size // 8)
40+
41+
def teardown(self, size):
42+
self.plasma_store_ctx.__exit__(None, None, None)
43+
44+
def time_plasma_put_data(self, size):
45+
self.plasma_client.put(self.data)
46+
47+
48+
class SimplePlasmaLatency(object):
49+
"""Benchmark plasma store latency with a single client."""
50+
51+
timer = timeit.default_timer
52+
53+
def setup(self):
54+
self.plasma_store_ctx = plasma.start_plasma_store(plasma_store_memory=10**9)
55+
plasma_store_name, p = self.plasma_store_ctx.__enter__()
56+
self.plasma_client = plasma.connect(plasma_store_name, "", 64)
57+
58+
def teardown(self):
59+
self.plasma_store_ctx.__exit__(None, None, None)
60+
61+
def time_plasma_put(self):
62+
for i in range(1000):
63+
self.plasma_client.put(1)
64+
65+
def time_plasma_putget(self):
66+
for i in range(1000):
67+
x = self.plasma_client.put(1)
68+
self.plasma_client.get(x)

python/pyarrow/plasma.py

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
import contextlib
19+
import os
20+
import pyarrow as pa
21+
import subprocess
22+
import shutil
23+
import tempfile
24+
import time
25+
26+
from pyarrow._plasma import (ObjectID, ObjectNotAvailable, # noqa
27+
PlasmaBuffer, PlasmaClient, connect)
28+
29+
30+
@contextlib.contextmanager
31+
def start_plasma_store(plasma_store_memory,
32+
use_valgrind=False, use_profiler=False,
33+
use_one_memory_mapped_file=False,
34+
plasma_directory=None, use_hugepages=False):
35+
"""Start a plasma store process.
36+
Args:
37+
plasma_store_memory (int): Capacity of the plasma store in bytes.
38+
use_valgrind (bool): True if the plasma store should be started inside
39+
of valgrind. If this is True, use_profiler must be False.
40+
use_profiler (bool): True if the plasma store should be started inside
41+
a profiler. If this is True, use_valgrind must be False.
42+
use_one_memory_mapped_file: If True, then the store will use only a
43+
single memory-mapped file.
44+
plasma_directory (str): Directory where plasma memory mapped files
45+
will be stored.
46+
use_hugepages (bool): True if the plasma store should use huge pages.
47+
Return:
48+
A tuple of the name of the plasma store socket and the process ID of
49+
the plasma store process.
50+
"""
51+
if use_valgrind and use_profiler:
52+
raise Exception("Cannot use valgrind and profiler at the same time.")
53+
54+
tmpdir = tempfile.mkdtemp(prefix='test_plasma-')
55+
try:
56+
plasma_store_name = os.path.join(tmpdir, 'plasma.sock')
57+
plasma_store_executable = os.path.join(pa.__path__[0], "plasma_store")
58+
command = [plasma_store_executable,
59+
"-s", plasma_store_name,
60+
"-m", str(plasma_store_memory)]
61+
if use_one_memory_mapped_file:
62+
command += ["-f"]
63+
if plasma_directory:
64+
command += ["-d", plasma_directory]
65+
if use_hugepages:
66+
command += ["-h"]
67+
stdout_file = None
68+
stderr_file = None
69+
if use_valgrind:
70+
command = ["valgrind",
71+
"--track-origins=yes",
72+
"--leak-check=full",
73+
"--show-leak-kinds=all",
74+
"--leak-check-heuristics=stdstring",
75+
"--error-exitcode=1"] + command
76+
proc = subprocess.Popen(command, stdout=stdout_file,
77+
stderr=stderr_file)
78+
time.sleep(1.0)
79+
elif use_profiler:
80+
command = ["valgrind", "--tool=callgrind"] + command
81+
proc = subprocess.Popen(command, stdout=stdout_file,
82+
stderr=stderr_file)
83+
time.sleep(1.0)
84+
else:
85+
proc = subprocess.Popen(command, stdout=stdout_file,
86+
stderr=stderr_file)
87+
time.sleep(0.1)
88+
rc = proc.poll()
89+
if rc is not None:
90+
raise RuntimeError("plasma_store exited unexpectedly with "
91+
"code %d" % (rc,))
92+
93+
yield plasma_store_name, proc
94+
finally:
95+
if proc.poll() is None:
96+
proc.kill()
97+
shutil.rmtree(tmpdir)

python/pyarrow/tests/test_plasma.py

Lines changed: 9 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -19,16 +19,11 @@
1919
from __future__ import division
2020
from __future__ import print_function
2121

22-
import contextlib
2322
import os
2423
import pytest
2524
import random
26-
import shutil
2725
import signal
2826
import sys
29-
import subprocess
30-
import tempfile
31-
import time
3227

3328
import numpy as np
3429
import pyarrow as pa
@@ -106,76 +101,6 @@ def assert_get_object_equal(unit_test, client1, client2, object_id,
106101
assert plasma.buffers_equal(metadata, client1_metadata)
107102

108103

109-
@contextlib.contextmanager
110-
def start_plasma_store(plasma_store_memory=DEFAULT_PLASMA_STORE_MEMORY,
111-
use_valgrind=False, use_profiler=False,
112-
use_one_memory_mapped_file=False,
113-
plasma_directory=None, use_hugepages=False):
114-
"""Start a plasma store process.
115-
Args:
116-
use_valgrind (bool): True if the plasma store should be started inside
117-
of valgrind. If this is True, use_profiler must be False.
118-
use_profiler (bool): True if the plasma store should be started inside
119-
a profiler. If this is True, use_valgrind must be False.
120-
stdout_file: A file handle opened for writing to redirect stdout to. If
121-
no redirection should happen, then this should be None.
122-
stderr_file: A file handle opened for writing to redirect stderr to. If
123-
no redirection should happen, then this should be None.
124-
use_one_memory_mapped_file: If True, then the store will use only a
125-
single memory-mapped file.
126-
Return:
127-
A tuple of the name of the plasma store socket and the process ID of
128-
the plasma store process.
129-
"""
130-
if use_valgrind and use_profiler:
131-
raise Exception("Cannot use valgrind and profiler at the same time.")
132-
133-
tmpdir = tempfile.mkdtemp(prefix='test_plasma-')
134-
try:
135-
plasma_store_name = os.path.join(tmpdir, 'plasma.sock')
136-
plasma_store_executable = os.path.join(pa.__path__[0], "plasma_store")
137-
command = [plasma_store_executable,
138-
"-s", plasma_store_name,
139-
"-m", str(plasma_store_memory)]
140-
if use_one_memory_mapped_file:
141-
command += ["-f"]
142-
if plasma_directory:
143-
command += ["-d", plasma_directory]
144-
if use_hugepages:
145-
command += ["-h"]
146-
stdout_file = None
147-
stderr_file = None
148-
if use_valgrind:
149-
command = ["valgrind",
150-
"--track-origins=yes",
151-
"--leak-check=full",
152-
"--show-leak-kinds=all",
153-
"--leak-check-heuristics=stdstring",
154-
"--error-exitcode=1"] + command
155-
proc = subprocess.Popen(command, stdout=stdout_file,
156-
stderr=stderr_file)
157-
time.sleep(1.0)
158-
elif use_profiler:
159-
command = ["valgrind", "--tool=callgrind"] + command
160-
proc = subprocess.Popen(command, stdout=stdout_file,
161-
stderr=stderr_file)
162-
time.sleep(1.0)
163-
else:
164-
proc = subprocess.Popen(command, stdout=stdout_file,
165-
stderr=stderr_file)
166-
time.sleep(0.1)
167-
rc = proc.poll()
168-
if rc is not None:
169-
raise RuntimeError("plasma_store exited unexpectedly with "
170-
"code %d" % (rc,))
171-
172-
yield plasma_store_name, proc
173-
finally:
174-
if proc.poll() is None:
175-
proc.kill()
176-
shutil.rmtree(tmpdir)
177-
178-
179104
@pytest.mark.plasma
180105
class TestPlasmaClient(object):
181106

@@ -185,7 +110,8 @@ def setup_method(self, test_method):
185110

186111
import pyarrow.plasma as plasma
187112
# Start Plasma store.
188-
self.plasma_store_ctx = start_plasma_store(
113+
self.plasma_store_ctx = plasma.start_plasma_store(
114+
plasma_store_memory=DEFAULT_PLASMA_STORE_MEMORY,
189115
use_valgrind=USE_VALGRIND,
190116
use_one_memory_mapped_file=use_one_memory_mapped_file)
191117
plasma_store_name, self.p = self.plasma_store_ctx.__enter__()
@@ -839,8 +765,10 @@ def test_object_id_equality_operators():
839765
reason="requires hugepage support")
840766
def test_use_huge_pages():
841767
import pyarrow.plasma as plasma
842-
with start_plasma_store(plasma_directory="/mnt/hugepages",
843-
use_hugepages=True) as (plasma_store_name, p):
768+
with plasma.start_plasma_store(
769+
plasma_store_memory=DEFAULT_PLASMA_STORE_MEMORY,
770+
plasma_directory="/mnt/hugepages",
771+
use_hugepages=True) as (plasma_store_name, p):
844772
plasma_client = plasma.connect(plasma_store_name, "", 64)
845773
create_object(plasma_client, 100000000)
846774

@@ -852,7 +780,9 @@ def test_use_huge_pages():
852780
def test_plasma_client_sharing():
853781
import pyarrow.plasma as plasma
854782

855-
with start_plasma_store() as (plasma_store_name, p):
783+
with plasma.start_plasma_store(
784+
plasma_store_memory=DEFAULT_PLASMA_STORE_MEMORY) \
785+
as (plasma_store_name, p):
856786
plasma_client = plasma.connect(plasma_store_name, "", 64)
857787
object_id = plasma_client.put(np.zeros(3))
858788
buf = plasma_client.get(object_id)

python/setup.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,7 @@ def initialize_options(self):
144144
'lib',
145145
'_parquet',
146146
'_orc',
147-
'plasma']
147+
'_plasma']
148148

149149
def _run_cmake(self):
150150
# The directory containing this setup.py
@@ -328,7 +328,7 @@ def _run_cmake(self):
328328
def _failure_permitted(self, name):
329329
if name == '_parquet' and not self.with_parquet:
330330
return True
331-
if name == 'plasma' and not self.with_plasma:
331+
if name == '_plasma' and not self.with_plasma:
332332
return True
333333
if name == '_orc' and not self.with_orc:
334334
return True

0 commit comments

Comments
 (0)