Skip to content

Commit 06fc09c

Browse files
committed
much cleaner replica set management and fixed some bugs in failover
1 parent f277649 commit 06fc09c

3 files changed

Lines changed: 116 additions & 97 deletions

File tree

src/main/com/mongodb/DBTCPConnector.java

Lines changed: 110 additions & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ public DBTCPConnector( Mongo m , ServerAddress addr )
4040

4141
if ( addr.isPaired() ){
4242
_allHosts = new ArrayList<ServerAddress>( addr.explode() );
43-
_createLogger.info( "switching to replica set mode : " + _allHosts + " -> " + _curAddress );
43+
_createLogger.info( "switching to replica set mode : " + _allHosts + " -> " + _curMaster );
4444
}
4545
else {
4646
_set( addr );
@@ -62,7 +62,7 @@ public DBTCPConnector( Mongo m , List<ServerAddress> all )
6262

6363
_allHosts = new ArrayList<ServerAddress>( all ); // make a copy so it can't be modified
6464

65-
_createLogger.info( all + " -> " + _curAddress );
65+
_createLogger.info( all + " -> " + _curMaster );
6666
}
6767

6868
private static ServerAddress _checkAddress( ServerAddress addr ){
@@ -204,7 +204,7 @@ public Response call( DB db , DBCollection coll , OutMessage m , int retries )
204204
ServerError err = res.getError();
205205

206206
if ( err != null && err.isNotMasterError() ){
207-
_pickCurrent();
207+
_findMaster( true );
208208
if ( retries <= 0 ){
209209
throw new MongoException( "not talking to master and retries used up" );
210210
}
@@ -216,33 +216,41 @@ public Response call( DB db , DBCollection coll , OutMessage m , int retries )
216216
}
217217

218218
public ServerAddress getAddress(){
219-
return _curAddress;
219+
return _curMaster;
220220
}
221221

222222
public List<ServerAddress> getAllAddress() {
223223
return _allHosts;
224224
}
225225

226226
public String getConnectPoint(){
227-
return _curAddress.toString();
227+
return _curMaster.toString();
228228
}
229229

230230
boolean _error( Throwable t )
231231
throws MongoException {
232232
if ( _allHosts != null ){
233233
_logger.log( Level.WARNING , "replica set mode, switching master" , t );
234-
_pickCurrent();
234+
_findMaster( true );
235235
}
236236
return true;
237237
}
238238

239239
class MyPort {
240240

241241
DBPort get( boolean keep ){
242-
if ( _port != null )
243-
return _port;
242+
_findMaster( false );
243+
244+
if ( _port != null ){
245+
if ( _pool == _curPortPool )
246+
return _port;
247+
_pool.done( _port );
248+
_port = null;
249+
_pool = null;
250+
}
244251

245-
DBPort p = _curPortPool.get();
252+
_pool = _curPortPool;
253+
DBPort p = _pool.get();
246254
if ( keep && _inRequest )
247255
_port = p;
248256

@@ -251,11 +259,11 @@ DBPort get( boolean keep ){
251259

252260
void done( DBPort p ){
253261
if ( p != _port )
254-
_curPortPool.done( p );
262+
_pool.done( p );
255263
}
256264

257265
void error( DBPort p , Exception e ){
258-
_curPortPool.done( p );
266+
_pool.done( p );
259267
p.close();
260268

261269
_port = null;
@@ -270,7 +278,7 @@ void requestEnsureConnection(){
270278
if ( _port != null )
271279
return;
272280

273-
_port = _curPortPool.get();
281+
_port = _pool.get();
274282
}
275283

276284
void requestStart(){
@@ -279,24 +287,28 @@ void requestStart(){
279287

280288
void requestDone(){
281289
if ( _port != null )
282-
_curPortPool.done( _port );
290+
_pool.done( _port );
283291
_port = null;
284292
_inRequest = false;
285293
}
286294

287295
DBPort _port;
296+
DBPortPool _pool;
288297
boolean _inRequest;
289298
}
290299

291300
/**
292301
* @return next to try
293302
*/
294-
ServerAddress _addAllFromSet( DBObject o ){
295-
Object foo = o.get( "hosts" );
303+
ServerAddress _addAllFromSet( CommandResult res ){
304+
if ( res == null )
305+
return null;
306+
307+
Object foo = res.get( "hosts" );
296308
if ( ! ( foo instanceof List ) )
297309
return null;
298310

299-
String primary = (String)o.get("primary");
311+
String primary = (String)res.get("primary");
300312

301313
ServerAddress primaryAddress = null;
302314

@@ -322,89 +334,86 @@ ServerAddress _addAllFromSet( DBObject o ){
322334

323335
return primaryAddress;
324336
}
325-
326-
void _pickInitial()
327-
throws MongoException {
328-
if ( _curAddress != null )
329-
return;
330337

331-
// we need to just get a server to query for ismaster
332-
_pickCurrent();
333338

339+
/**
340+
* this will update set meta data
341+
* @return if addr should be the new master
342+
*/
343+
boolean _findMasterLoop( ServerAddress addr , ServerAddress[] outPrimary ){
334344
try {
335-
_logger.info( "current address beginning of _pickInitial: " + _curAddress );
336-
337-
DBObject im = isMasterCmd();
345+
CommandResult res = _isMasterCmd( addr );
346+
if ( res == null )
347+
return false;
338348

339-
ServerAddress other = _addAllFromSet( im );
340-
341-
if ( _isMaster( im ) )
342-
return;
349+
ServerAddress prim = _addAllFromSet( res );
350+
if ( outPrimary != null )
351+
outPrimary[0] = prim;
343352

344-
if ( other != null ){
345-
_set( other );
346-
im = isMasterCmd();
347-
_addAllFromSet( im );
348-
if ( _isMaster( im ) )
353+
return _isMaster( res );
354+
}
355+
catch ( Exception e ){
356+
_logger.warning( "can't do _findMasterLoop " + addr + "\t" + e );
357+
return false;
358+
}
359+
360+
}
361+
362+
void _findMaster( boolean reset )
363+
throws MongoException {
364+
365+
if ( _allHosts == null )
366+
return;
367+
368+
if ( ! reset && _curMaster != null )
369+
return;
370+
371+
synchronized ( _allHosts ){
372+
if ( _curMaster != null ){
373+
if ( _findMasterLoop( _curMaster , null ) )
349374
return;
350-
351-
_logger.severe( "primary given was wrong: " + other + " going to scan" );
375+
376+
_curMaster = null;
377+
_curPortPool = null;
352378
}
353379

354-
synchronized ( _allHosts ){
380+
for ( int loopNumber=0; loopNumber<3; loopNumber++ ){
381+
355382
Collections.shuffle( _allHosts );
356-
for ( ServerAddress a : _allHosts ){
357-
if ( _curAddress == a )
358-
continue;
359383

360-
_logger.info( "remote [" + _curAddress + "] -> [" + a + "]" );
361-
_set( a );
384+
ServerAddress newPrimary[] = new ServerAddress[1];
385+
386+
for ( int i=0; i<_allHosts.size(); i++ ){
387+
ServerAddress addr = _allHosts.get(i);
388+
_logger.config( "_findMaster looking at : " + addr );
389+
newPrimary[0] = null;
362390

363-
im = isMasterCmd();
364-
_addAllFromSet( im );
365-
if ( _isMaster( im ) )
391+
if ( _findMasterLoop( addr , newPrimary ) ){
392+
_set( addr );
366393
return;
394+
}
367395

368-
if ( _allHosts.size() == 2 )
369-
_logger.severe( "switched to: " + a + " but isn't master" );
370-
}
371-
372-
throw new MongoException( "can't find master" );
373-
}
374-
}
375-
catch ( Exception e ){
376-
_logger.log( Level.SEVERE , "can't pick initial master, using random one" , e );
377-
}
378-
}
379-
380-
private void _pickCurrent()
381-
throws MongoException {
382-
if ( _allHosts == null )
383-
throw new MongoException( "got master/slave issue but not in master/slave mode on the client side" );
384-
385-
synchronized ( _allHosts ){
386-
Collections.shuffle( _allHosts );
387-
for ( int i=0; i<_allHosts.size(); i++ ){
388-
ServerAddress a = _allHosts.get( i );
389-
if ( a == _curAddress )
390-
continue;
391-
392-
if ( _curAddress != null ){
393-
_logger.info( "switching from [" + _curAddress + "] to [" + a + "]" );
396+
if ( newPrimary[0] != null ){
397+
addr = newPrimary[0];
398+
if ( _findMasterLoop( addr , null ) ){
399+
_set( addr );
400+
return;
401+
}
402+
}
394403
}
395404

396-
_set( a );
397-
return;
398405
}
406+
// sleep - waiting for system to recover
407+
try { Thread.sleep( 1000 ); } catch ( Exception e ){}
399408
}
400-
401-
throw new MongoException( "couldn't find a new host to swtich too" );
409+
410+
throw new MongoException( "can't find master" );
402411
}
403412

404413
private boolean _set( ServerAddress addr ){
405-
if ( _curAddress == addr )
414+
if ( _curMaster == addr )
406415
return false;
407-
_curAddress = addr;
416+
_curMaster = addr;
408417
_curPortPool = _portHolder.get( addr );
409418
return true;
410419
}
@@ -414,26 +423,34 @@ public String debugString(){
414423
if ( _allHosts != null )
415424
buf.append( "replica set : " ).append( _allHosts );
416425
else
417-
buf.append( _curAddress ).append( " " ).append( _curAddress._addr );
426+
buf.append( _curMaster ).append( " " ).append( _curMaster._addr );
418427

419428
return buf.toString();
420429
}
421430

422-
DBObject isMasterCmd(){
423-
DBCollection collection = _mongo.getDB( "admin" ).getCollection( "$cmd" );
424-
425-
Iterator<DBObject> i = collection.__find( _isMaster , null , 0 , 1 , 0 );
426-
if ( i == null || ! i.hasNext() )
427-
throw new MongoException( "no result for ismaster query?" );
428-
429-
DBObject res = i.next();
430-
if ( i.hasNext() )
431-
throw new MongoException( "what's going on" );
432-
433-
return res;
431+
CommandResult _isMasterCmd( ServerAddress addr ){
432+
DBPortPool pool = _portHolder.get( addr );
433+
DBPort p = null;
434+
try {
435+
p = pool.get();
436+
CommandResult res = p.runCommand( _mongo.getDB( "admin" ) , _isMaster );
437+
return res;
438+
}
439+
catch ( Exception e ){
440+
_logger.log( Level.INFO , "can't run ismaster on : " + addr + "\t" + e );
441+
return null;
442+
}
443+
finally {
444+
if ( p != null ){
445+
pool.done( p );
446+
}
447+
}
434448
}
435449

436-
boolean _isMaster( DBObject res ){
450+
boolean _isMaster( CommandResult res ){
451+
if ( res == null )
452+
return false;
453+
437454
Object x = res.get( "ismaster" );
438455
if ( x == null )
439456
throw new IllegalStateException( "ismaster shouldn't be null: " + res );
@@ -452,7 +469,7 @@ public void close(){
452469
}
453470

454471
final Mongo _mongo;
455-
private ServerAddress _curAddress;
472+
private ServerAddress _curMaster;
456473
private DBPortPool _curPortPool;
457474
private DBPortPool.Holder _portHolder;
458475
private final List<ServerAddress> _allHosts;

src/main/com/mongodb/Mongo.java

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,7 @@ public Mongo( ServerAddress addr , MongoOptions options )
163163
_addrs = null;
164164
_options = options;
165165
_connector = new DBTCPConnector( this , _addr );
166-
_connector._pickInitial();
166+
_connector._findMaster( false );
167167
}
168168

169169
/**
@@ -187,7 +187,7 @@ public Mongo( ServerAddress left , ServerAddress right , MongoOptions options )
187187
_addrs = Arrays.asList( left , right );
188188
_options = options;
189189
_connector = new DBTCPConnector( this , _addrs );
190-
_connector._pickInitial();
190+
_connector._findMaster( false );
191191
}
192192

193193
/**
@@ -212,8 +212,7 @@ public Mongo( List<ServerAddress> replicaSetSeeds , MongoOptions options )
212212
_addrs = replicaSetSeeds;
213213
_options = options;
214214
_connector = new DBTCPConnector( this , _addrs );
215-
_connector._pickInitial();
216-
215+
_connector._findMaster( false );
217216
}
218217

219218
public DB getDB( String dbname ){

src/test/com/mongodb/ReplSetTest.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@ public void run(){
2222
Thread.sleep( 500 );
2323
_coll.findOne();
2424
}
25+
catch ( NullPointerException n ){
26+
n.printStackTrace();
27+
}
2528
catch ( Exception e ){
2629
System.out.println( _a + "\t" + e );
2730
}

0 commit comments

Comments
 (0)