Skip to content

Commit 76c7acc

Browse files
Add heartbeat / automatic server cleanup code
This replaces the previous method of attempting to clean up servers when an unexpected exit occurs in the client (e.g. SIGINT or panic) by a heartbeat protocol. If the server does not hear from the connecting client within a certain interval of time (500ms in this commit), it will de-activate itself. This prevents dangling Docker Machine server processes from accumulating. Signed-off-by: Nathan LeClaire <nathan.leclaire@gmail.com>
1 parent a1e610b commit 76c7acc

File tree

8 files changed

+83
-104
lines changed

8 files changed

+83
-104
lines changed

cmd/machine.go

Lines changed: 0 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@ package main
33
import (
44
"fmt"
55
"os"
6-
"os/signal"
76
"path"
87
"strconv"
98

@@ -141,31 +140,10 @@ func main() {
141140
},
142141
}
143142

144-
go commands.DeferClosePluginServers()
145-
146-
// Cleanup to run in case the user sends an interrupt (CTRL+C) to the
147-
// Machine program. Ensure that we do not leave dangling OS processes.
148-
signalCh := make(chan os.Signal, 1)
149-
signal.Notify(signalCh, os.Interrupt)
150-
151-
// Atypical exit condition -- write to the cleanup done channel after
152-
// ensuring that we have closed all exec-ed plugin servers.
153-
go func() {
154-
for range signalCh {
155-
log.Info("\nReceieved an interrupt, performing cleanup work...")
156-
close(commands.RpcClientDriversCh)
157-
<-commands.RpcDriversClosedCh
158-
os.Exit(1)
159-
}
160-
}()
161-
162143
// TODO: Close plugin servers in case of client panic.
163144
if err := app.Run(os.Args); err != nil {
164145
log.Error(err)
165146
}
166-
167-
close(commands.RpcClientDriversCh)
168-
<-commands.RpcDriversClosedCh
169147
}
170148

171149
func cmdNotFound(c *cli.Context, command string) {

commands/commands.go

Lines changed: 0 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,6 @@ var (
2121
ErrUnknownShell = errors.New("Error: Unknown shell")
2222
ErrNoMachineSpecified = errors.New("Error: Expected to get one or more machine names as arguments.")
2323
ErrExpectedOneMachine = errors.New("Error: Expected one machine name as an argument.")
24-
25-
RpcClientDriversCh = make(chan *rpcdriver.RpcClientDriver)
26-
RpcDriversClosedCh = make(chan bool)
2724
)
2825

2926
func newPluginDriver(driverName string, rawContent []byte) (*rpcdriver.RpcClientDriver, error) {
@@ -32,47 +29,14 @@ func newPluginDriver(driverName string, rawContent []byte) (*rpcdriver.RpcClient
3229
return nil, err
3330
}
3431

35-
RpcClientDriversCh <- d
36-
3732
return d, nil
3833
}
3934

40-
func DeferClosePluginServers() {
41-
rpcClientDrivers := []*rpcdriver.RpcClientDriver{}
42-
43-
for d := range RpcClientDriversCh {
44-
rpcClientDrivers = append(rpcClientDrivers, d)
45-
}
46-
47-
doneCh := make(chan bool)
48-
49-
for _, d := range rpcClientDrivers {
50-
d := d
51-
go func() {
52-
if err := d.Close(); err != nil {
53-
log.Debugf("Error closing connection to plugin server: %s", err)
54-
}
55-
56-
doneCh <- true
57-
}()
58-
}
59-
60-
for range rpcClientDrivers {
61-
<-doneCh
62-
}
63-
64-
RpcDriversClosedCh <- true
65-
}
66-
6735
func fatal(args ...interface{}) {
68-
close(RpcClientDriversCh)
69-
<-RpcDriversClosedCh
7036
log.Fatal(args...)
7137
}
7238

7339
func fatalf(fmtString string, args ...interface{}) {
74-
close(RpcClientDriversCh)
75-
<-RpcDriversClosedCh
7640
log.Fatalf(fmtString, args...)
7741
}
7842

commands/create.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -305,6 +305,10 @@ func cmdCreateOuter(c *cli.Context) {
305305
}
306306
}
307307

308+
if err := driver.Close(); err != nil {
309+
fatal(err)
310+
}
311+
308312
if err := c.App.Run(os.Args); err != nil {
309313
fatal(err)
310314
}

libmachine/drivers/plugin/localbinary/plugin.go

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ type LocalBinaryExecutor struct {
7777

7878
func NewLocalBinaryPlugin(driverName string) *LocalBinaryPlugin {
7979
return &LocalBinaryPlugin{
80-
stopCh: make(chan bool, 1),
80+
stopCh: make(chan bool),
8181
addrCh: make(chan string, 1),
8282
Executor: &LocalBinaryExecutor{
8383
DriverName: driverName,
@@ -132,13 +132,19 @@ func (lbe *LocalBinaryExecutor) Close() error {
132132
}
133133

134134
func stream(scanner *bufio.Scanner, streamOutCh chan<- string, stopCh <-chan bool) {
135-
for scanner.Scan() {
135+
lines := make(chan string)
136+
go func() {
137+
for scanner.Scan() {
138+
lines <- scanner.Text()
139+
}
140+
}()
141+
for {
136142
select {
137143
case <-stopCh:
138144
close(streamOutCh)
139145
return
140-
default:
141-
streamOutCh <- strings.Trim(scanner.Text(), "\n")
146+
case line := <-lines:
147+
streamOutCh <- strings.Trim(line, "\n")
142148
if err := scanner.Err(); err != nil {
143149
log.Warnf("Scanning stream: %s", err)
144150
}
@@ -148,7 +154,7 @@ func stream(scanner *bufio.Scanner, streamOutCh chan<- string, stopCh <-chan boo
148154

149155
func (lbp *LocalBinaryPlugin) AttachStream(scanner *bufio.Scanner) (<-chan string, chan<- bool) {
150156
streamOutCh := make(chan string)
151-
stopCh := make(chan bool, 1)
157+
stopCh := make(chan bool)
152158
go stream(scanner, streamOutCh, stopCh)
153159
return streamOutCh, stopCh
154160
}

libmachine/drivers/plugin/localbinary/plugin_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ func TestExecServer(t *testing.T) {
106106
stopCh: make(chan bool, 1),
107107
}
108108

109-
finalErr := make(chan error, 1)
109+
finalErr := make(chan error)
110110

111111
// Start the docker-machine-foo plugin server
112112
go func() {

libmachine/drivers/plugin/register_driver.go

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,18 @@ import (
66
"net/http"
77
"net/rpc"
88
"os"
9+
"time"
910

1011
"github.com/docker/machine/libmachine"
1112
"github.com/docker/machine/libmachine/drivers"
1213
"github.com/docker/machine/libmachine/drivers/plugin/localbinary"
1314
"github.com/docker/machine/libmachine/drivers/rpc"
1415
)
1516

17+
var (
18+
heartbeatTimeout = 500 * time.Millisecond
19+
)
20+
1621
func RegisterDriver(d drivers.Driver) {
1722
if os.Getenv(localbinary.PluginEnvKey) != localbinary.PluginEnvVal {
1823
fmt.Fprintln(os.Stderr, `This is a Docker Machine plugin binary.
@@ -23,11 +28,8 @@ Please use this plugin through the main 'docker-machine' binary.`)
2328

2429
libmachine.SetDebug(true)
2530

26-
rpcd := new(rpcdriver.RpcServerDriver)
27-
rpcd.ActualDriver = d
28-
rpcd.CloseCh = make(chan bool)
31+
rpcd := rpcdriver.NewRpcServerDriver(d)
2932
rpc.Register(rpcd)
30-
3133
rpc.HandleHTTP()
3234

3335
listener, err := net.Listen("tcp", "127.0.0.1:0")
@@ -41,5 +43,14 @@ Please use this plugin through the main 'docker-machine' binary.`)
4143

4244
go http.Serve(listener, nil)
4345

44-
<-rpcd.CloseCh
46+
for {
47+
select {
48+
case <-rpcd.CloseCh:
49+
os.Exit(0)
50+
case <-rpcd.HeartbeatCh:
51+
continue
52+
case <-time.After(heartbeatTimeout):
53+
os.Exit(1)
54+
}
55+
}
4556
}

libmachine/drivers/rpc/client_driver.go

Lines changed: 37 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package rpcdriver
33
import (
44
"fmt"
55
"net/rpc"
6+
"time"
67

78
"github.com/docker/machine/libmachine/drivers"
89
"github.com/docker/machine/libmachine/drivers/plugin/localbinary"
@@ -11,8 +12,14 @@ import (
1112
"github.com/docker/machine/libmachine/state"
1213
)
1314

15+
var (
16+
heartbeatInterval = 200 * time.Millisecond
17+
)
18+
1419
type RpcClientDriver struct {
15-
Client *InternalClient
20+
plugin localbinary.DriverPlugin
21+
heartbeatDoneCh chan bool
22+
Client *InternalClient
1623
}
1724

1825
type RpcCall struct {
@@ -22,36 +29,27 @@ type RpcCall struct {
2229
}
2330

2431
type InternalClient struct {
25-
RpcClient *rpc.Client
26-
Calls chan RpcCall
27-
CallErrs chan error
32+
MachineName string
33+
RpcClient *rpc.Client
2834
}
2935

3036
func (ic *InternalClient) Call(serviceMethod string, args interface{}, reply interface{}) error {
31-
ic.Calls <- RpcCall{
32-
ServiceMethod: serviceMethod,
33-
Args: args,
34-
Reply: reply,
37+
if serviceMethod != "RpcServerDriver.Heartbeat" {
38+
log.Debugf("(%s) Calling %+v", ic.MachineName, serviceMethod)
3539
}
36-
37-
return <-ic.CallErrs
40+
return ic.RpcClient.Call(serviceMethod, args, reply)
3841
}
3942

4043
func NewInternalClient(rpcclient *rpc.Client) *InternalClient {
4144
return &InternalClient{
4245
RpcClient: rpcclient,
43-
Calls: make(chan RpcCall),
44-
CallErrs: make(chan error),
4546
}
4647
}
47-
4848
func NewRpcClientDriver(rawDriverData []byte, driverName string) (*RpcClientDriver, error) {
4949
mcnName := ""
5050

5151
p := localbinary.NewLocalBinaryPlugin(driverName)
5252

53-
c := &RpcClientDriver{}
54-
5553
go func() {
5654
if err := p.Serve(); err != nil {
5755
// If we can't safely load the server, best to just
@@ -70,31 +68,24 @@ func NewRpcClientDriver(rawDriverData []byte, driverName string) (*RpcClientDriv
7068
return nil, err
7169
}
7270

73-
c.Client = NewInternalClient(rpcclient)
71+
c := &RpcClientDriver{
72+
Client: NewInternalClient(rpcclient),
73+
heartbeatDoneCh: make(chan bool),
74+
}
7475

75-
go func() {
76+
go func(heartbeatDoneCh <-chan bool) {
7677
for {
77-
call := <-c.Client.Calls
78-
79-
log.Debugf("(%s) Got msg %+v", mcnName, call)
80-
81-
if call.ServiceMethod == "RpcServerDriver.Close" {
82-
p.Close()
83-
}
84-
85-
c.Client.CallErrs <- c.Client.RpcClient.Call(call.ServiceMethod, call.Args, call.Reply)
86-
87-
if call.ServiceMethod == "RpcServerDriver.Close" {
88-
// If we're messaging the server to close,
89-
// we're not accepting any more RPC calls at
90-
// all, so return from this function
91-
// (subsequent "requests" to make a call by
92-
// sending on the Calls channel will simply
93-
// block and never go through)
78+
select {
79+
case <-heartbeatDoneCh:
9480
return
81+
default:
82+
if err := c.Client.Call("RpcServerDriver.Heartbeat", struct{}{}, nil); err != nil {
83+
log.Warnf("Error attempting heartbeat call to plugin server: %s", err)
84+
}
85+
time.Sleep(heartbeatInterval)
9586
}
9687
}
97-
}()
88+
}(c.heartbeatDoneCh)
9889

9990
var version int
10091
if err := c.Client.Call("RpcServerDriver.GetVersion", struct{}{}, &version); err != nil {
@@ -108,6 +99,8 @@ func NewRpcClientDriver(rawDriverData []byte, driverName string) (*RpcClientDriv
10899

109100
mcnName = c.GetMachineName()
110101
p.MachineName = mcnName
102+
c.Client.MachineName = mcnName
103+
c.plugin = p
111104

112105
return c, nil
113106
}
@@ -121,6 +114,15 @@ func (c *RpcClientDriver) UnmarshalJSON(data []byte) error {
121114
}
122115

123116
func (c *RpcClientDriver) Close() error {
117+
c.heartbeatDoneCh <- true
118+
close(c.heartbeatDoneCh)
119+
120+
log.Debug("Making call to close connection to plugin binary")
121+
122+
if err := c.plugin.Close(); err != nil {
123+
return err
124+
}
125+
124126
log.Debug("Making call to close driver server")
125127

126128
if err := c.Client.Call("RpcServerDriver.Close", struct{}{}, nil); err != nil {

libmachine/drivers/rpc/server_driver.go

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,15 @@ func (r RpcFlags) Bool(key string) bool {
6767
type RpcServerDriver struct {
6868
ActualDriver drivers.Driver
6969
CloseCh chan bool
70+
HeartbeatCh chan bool
71+
}
72+
73+
func NewRpcServerDriver(d drivers.Driver) *RpcServerDriver {
74+
return &RpcServerDriver{
75+
ActualDriver: d,
76+
CloseCh: make(chan bool),
77+
HeartbeatCh: make(chan bool),
78+
}
7079
}
7180

7281
func (r *RpcServerDriver) Close(_, _ *struct{}) error {
@@ -181,3 +190,8 @@ func (r *RpcServerDriver) Start(_ *struct{}, _ *struct{}) error {
181190
func (r *RpcServerDriver) Stop(_ *struct{}, _ *struct{}) error {
182191
return r.ActualDriver.Stop()
183192
}
193+
194+
func (r *RpcServerDriver) Heartbeat(_ *struct{}, _ *struct{}) error {
195+
r.HeartbeatCh <- true
196+
return nil
197+
}

0 commit comments

Comments
 (0)