Skip to content

Commit 263ac8f

Browse files
committed
Prevent concurrent calls to Thread.start() and refactor RS client, PYTHON-560.
1 parent db1ec12 commit 263ac8f

File tree

4 files changed

+128
-61
lines changed

4 files changed

+128
-61
lines changed

pymongo/mongo_replica_set_client.py

Lines changed: 45 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@
5858
InvalidDocument,
5959
OperationFailure,
6060
InvalidOperation)
61+
from pymongo.thread_util import DummyLock
6162

6263
EMPTY = b("")
6364
MAX_RETRY = 3
@@ -277,6 +278,10 @@ def __init__(self, rsc, event_class):
277278
def start_sync(self):
278279
"""Start the Monitor and block until it's really started.
279280
"""
281+
# start() can return before the thread is fully bootstrapped,
282+
# so a fork can leave the thread thinking it's alive in a child
283+
# process when it's really dead:
284+
# http://bugs.python.org/issue18418.
280285
self.start() # Implemented in subclasses.
281286
self.started_event.wait(5)
282287

@@ -337,13 +342,6 @@ def __init__(self, rsc):
337342
self.setName("ReplicaSetMonitorThread")
338343
self.setDaemon(True)
339344

340-
# Track whether the thread has started. (Greenlets track this already.)
341-
self.started = False
342-
343-
def start(self):
344-
self.started = True
345-
super(MonitorThread, self).start()
346-
347345
def run(self):
348346
"""Override Thread's run method.
349347
"""
@@ -363,9 +361,16 @@ class MonitorGreenlet(Monitor, Greenlet):
363361
"""Greenlet based replica set monitor.
364362
"""
365363
def __init__(self, rsc):
364+
self.monitor_greenlet_alive = False
366365
Monitor.__init__(self, rsc, Event)
367366
Greenlet.__init__(self)
368367

368+
def start_sync(self):
369+
self.monitor_greenlet_alive = True
370+
371+
# Call superclass.
372+
Monitor.start_sync(self)
373+
369374
# Don't override `run` in a Greenlet. Add _run instead.
370375
# Refer to gevent's Greenlet docs and source for more
371376
# information.
@@ -375,8 +380,11 @@ def _run(self):
375380
self.monitor()
376381

377382
def isAlive(self):
378-
# Gevent defines bool(Greenlet) as True if it's alive.
379-
return bool(self)
383+
# bool(self) isn't immediately True after someone calls start(),
384+
# but isAlive() is. Thus it's safe for greenlets to do:
385+
# "if not monitor.isAlive(): monitor.start()"
386+
# ... and be guaranteed only one greenlet starts the monitor.
387+
return self.monitor_greenlet_alive
380388

381389
except ImportError:
382390
pass
@@ -641,6 +649,7 @@ def __init__(self, hosts_or_uri=None, max_pool_size=100,
641649
self.__tz_aware = common.validate_boolean('tz_aware', tz_aware)
642650
self.__document_class = document_class
643651
self.__monitor = None
652+
self.__closed = False
644653

645654
# Compatibility with mongo_client.MongoClient
646655
host = kwargs.pop('host', hosts_or_uri)
@@ -765,17 +774,18 @@ def __init__(self, hosts_or_uri=None, max_pool_size=100,
765774
# Common case: monitor RS with a background thread.
766775
self.__monitor_class = MonitorThread
767776

768-
self.__monitor = self.__monitor_class(self)
769-
register_monitor(self.__monitor)
770-
771-
self.__monitor_lock = threading.Lock()
777+
if self.__use_greenlets:
778+
# Greenlets don't need to lock around access to the monitor.
779+
# A Greenlet can safely do:
780+
# "if not self.__monitor: self.__monitor = monitor_class()"
781+
# because it won't be interrupted between the check and the
782+
# assignment.
783+
self.__monitor_lock = DummyLock()
784+
else:
785+
self.__monitor_lock = threading.Lock()
772786

773787
if _connect:
774-
# Wait for the monitor to really start. Otherwise if we return to
775-
# caller and caller forks immediately, the monitor could think it's
776-
# still alive in the child process when it really isn't.
777-
# See http://bugs.python.org/issue18418.
778-
self.__monitor.start_sync()
788+
self.__ensure_monitor()
779789

780790
def _cached(self, dbname, coll, index):
781791
"""Test if `index` is cached.
@@ -1074,28 +1084,26 @@ def __schedule_refresh(self, sync=False):
10741084
is in progress, the work of refreshing the state is only performed
10751085
once.
10761086
"""
1077-
if not self.__monitor:
1087+
if self.__closed:
10781088
raise InvalidOperation('MongoReplicaSetClient has been closed')
10791089

1080-
if not self.__monitor.isAlive():
1081-
# We've forked since monitor was created.
1082-
self.__restart_monitor()
1083-
1084-
self.__monitor.schedule_refresh()
1090+
monitor = self.__ensure_monitor()
1091+
monitor.schedule_refresh()
10851092
if sync:
1086-
self.__monitor.wait_for_refresh(timeout_seconds=5)
1093+
monitor.wait_for_refresh(timeout_seconds=5)
10871094

1088-
def __restart_monitor(self):
1095+
def __ensure_monitor(self):
1096+
"""Ensure the monitor is started, and return it."""
10891097
self.__monitor_lock.acquire()
10901098
try:
1091-
# Another thread may have restarted the monitor while we were
1092-
# waiting for the lock.
1093-
if self.__monitor.isAlive():
1094-
return
1095-
1096-
self.__monitor = self.__monitor_class(self)
1097-
register_monitor(self.__monitor)
1098-
self.__monitor.start_sync()
1099+
# Another thread can start the monitor while we wait for the lock.
1100+
if self.__monitor is not None and self.__monitor.isAlive():
1101+
return self.__monitor
1102+
1103+
monitor = self.__monitor = self.__monitor_class(self)
1104+
register_monitor(monitor)
1105+
monitor.start_sync()
1106+
return monitor
10991107
finally:
11001108
self.__monitor_lock.release()
11011109

@@ -1261,13 +1269,8 @@ def _ensure_connected(self, sync=False):
12611269
"""Ensure this client instance is connected to a primary.
12621270
"""
12631271
# This may be the first time we're connecting to the set.
1264-
if self.__monitor and not self.__monitor.started:
1265-
try:
1266-
self.__monitor.start()
1267-
# Minor race condition. It's possible that two (or more)
1268-
# threads could call monitor.start() consecutively. Just pass.
1269-
except RuntimeError:
1270-
pass
1272+
self.__ensure_monitor()
1273+
12711274
if sync:
12721275
rs_state = self.__rs_state
12731276
if not rs_state.primary_member:
@@ -1301,6 +1304,7 @@ def close(self):
13011304
.. versionchanged:: 2.2.1
13021305
The :meth:`close` method now terminates the replica set monitor.
13031306
"""
1307+
self.__closed = True
13041308
self.__rs_state = RSState(self.__make_threadlocal())
13051309

13061310
monitor, self.__monitor = self.__monitor, None

pymongo/thread_util.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,14 @@
4444
issue1868 = (sys.version_info[:3] <= (2, 7, 0))
4545

4646

47+
class DummyLock(object):
48+
def acquire(self):
49+
pass
50+
51+
def release(self):
52+
pass
53+
54+
4755
class Ident(object):
4856
def __init__(self):
4957
self._refs = {}
@@ -67,20 +75,13 @@ def watch(self, callback):
6775

6876

6977
class ThreadIdent(Ident):
70-
class _DummyLock(object):
71-
def acquire(self):
72-
pass
73-
74-
def release(self):
75-
pass
76-
7778
def __init__(self):
7879
super(ThreadIdent, self).__init__()
7980
self._local = threading.local()
8081
if issue1868:
8182
self._lock = threading.Lock()
8283
else:
83-
self._lock = ThreadIdent._DummyLock()
84+
self._lock = DummyLock()
8485

8586
# We watch for thread-death using a weakref callback to a thread local.
8687
# Weakrefs are permitted on subclasses of object but not object() itself.

test/test_replica_set_client.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1179,5 +1179,12 @@ class TestReplicaSetClientLazyConnect(
11791179
pass
11801180

11811181

1182+
# Test concurrent access to a lazily-connecting RS client, with Gevent.
1183+
class TestReplicaSetClientLazyConnectGevent(
1184+
TestReplicaSetClientBase,
1185+
_TestLazyConnectMixin):
1186+
use_greenlets = True
1187+
1188+
11821189
if __name__ == "__main__":
11831190
unittest.main()

test/utils.py

Lines changed: 67 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,23 @@
1515
"""Utilities for testing pymongo
1616
"""
1717

18+
import sys
1819
import threading
1920

21+
from nose.plugins.skip import SkipTest
2022
from pymongo import MongoClient, MongoReplicaSetClient
2123
from pymongo.errors import AutoReconnect
2224
from pymongo.pool import NO_REQUEST, NO_SOCKET_YET, SocketInfo
2325
from test import host, port, version
2426

27+
PY3 = sys.version_info[0] == 3
28+
29+
try:
30+
import gevent
31+
has_gevent = True
32+
except ImportError:
33+
has_gevent = False
34+
2535

2636
# No functools in Python 2.4
2737
def my_partial(f, *args, **kwargs):
@@ -352,37 +362,82 @@ def assertNotInRequestAndDifferentSock(self, client, pools):
352362

353363

354364
class _TestLazyConnectMixin(object):
355-
"""Inherit from this class and from unittest.TestCase, and override
356-
_get_client(self, **kwargs), for testing clients with _connect=False.
365+
"""Test concurrent operations on a lazily-connecting client.
366+
367+
Inherit from this class and from unittest.TestCase, and override
368+
_get_client(self, **kwargs), for testing a lazily-connecting
369+
client, i.e. a client initialized with _connect=False.
370+
371+
Set use_greenlets = True to test with Gevent.
357372
"""
373+
use_greenlets = False
358374
ntrials = 10
359375
nthreads = 10
376+
interval = None
360377

361378
def run_threads(self, collection, target):
362379
"""Run a target function in many threads.
363380
364381
target is a function taking a Collection and an integer.
365382
"""
366-
threads = [
367-
threading.Thread(target=my_partial(target, collection, i))
368-
for i in range(self.nthreads)]
383+
threads = []
384+
for i in range(self.nthreads):
385+
bound_target = my_partial(target, collection, i)
386+
if self.use_greenlets:
387+
threads.append(gevent.Greenlet(run=bound_target))
388+
else:
389+
threads.append(threading.Thread(target=bound_target))
369390

370391
for t in threads:
371392
t.start()
372393

373394
for t in threads:
374395
t.join(30)
375-
assert not t.isAlive()
396+
if self.use_greenlets:
397+
# bool(Greenlet) is True if it's alive.
398+
assert not t
399+
else:
400+
assert not t.isAlive()
376401

377402
def trial(self, reset, target, test):
403+
"""Test concurrent operations on a lazily-connecting client.
404+
405+
`reset` takes a collection and resets it for the next trial.
406+
407+
`target` takes a lazily-connecting collection and an index from
408+
0 to nthreads, and performs some operation, e.g. an insert.
409+
410+
`test` takes a collection and asserts a post-condition to prove
411+
`target` succeeded.
412+
"""
413+
if self.use_greenlets and not has_gevent:
414+
raise SkipTest('Gevent not installed')
415+
378416
collection = self._get_client().pymongo_test.test
379417

380-
for i in range(self.ntrials):
381-
reset(collection)
382-
lazy_client = self._get_client(_connect=False)
383-
lazy_collection = lazy_client.pymongo_test.test
384-
self.run_threads(lazy_collection, target)
385-
test(collection)
418+
# Make concurrency bugs more likely to manifest.
419+
if PY3:
420+
self.interval = sys.getswitchinterval()
421+
sys.setswitchinterval(1e-6)
422+
else:
423+
self.interval = sys.getcheckinterval()
424+
sys.setcheckinterval(1)
425+
426+
try:
427+
for i in range(self.ntrials):
428+
reset(collection)
429+
lazy_client = self._get_client(
430+
_connect=False, use_greenlets=self.use_greenlets)
431+
432+
lazy_collection = lazy_client.pymongo_test.test
433+
self.run_threads(lazy_collection, target)
434+
test(collection)
435+
436+
finally:
437+
if PY3:
438+
sys.setswitchinterval(self.interval)
439+
else:
440+
sys.setcheckinterval(self.interval)
386441

387442
def test_insert(self):
388443
def reset(collection):

0 commit comments

Comments
 (0)