Skip to content

Commit 307cfec

Browse files
committed
Sync exit events
Signed-off-by: Kenfe-Mickael Laventure <mickael.laventure@gmail.com>
1 parent 7f6fe43 commit 307cfec

File tree

7 files changed

+73
-22
lines changed

7 files changed

+73
-22
lines changed

runtime/container.go

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -249,14 +249,21 @@ func (c *container) readSpec() (*specs.Spec, error) {
249249
}
250250

251251
func (c *container) Delete() error {
252-
err := os.RemoveAll(filepath.Join(c.root, c.id))
252+
var err error
253253
args := c.runtimeArgs
254254
args = append(args, "delete", c.id)
255255
if b, derr := exec.Command(c.runtime, args...).CombinedOutput(); err != nil {
256256
err = fmt.Errorf("%s: %q", derr, string(b))
257257
} else if len(b) > 0 {
258258
logrus.Debugf("%v %v: %q", c.runtime, args, string(b))
259259
}
260+
if rerr := os.RemoveAll(filepath.Join(c.root, c.id)); rerr != nil {
261+
if err != nil {
262+
err = fmt.Errorf("%s; failed to remove %s: %s", err, filepath.Join(c.root, c.id), rerr)
263+
} else {
264+
err = rerr
265+
}
266+
}
260267
return err
261268
}
262269

@@ -274,7 +281,7 @@ func (c *container) RemoveProcess(pid string) error {
274281
}
275282

276283
func (c *container) State() State {
277-
proc := c.processes["init"]
284+
proc := c.processes[InitProcessID]
278285
if proc == nil {
279286
return Stopped
280287
}

supervisor/add_process.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ func (s *Supervisor) addProcess(t *AddProcessTask) error {
3636
return err
3737
}
3838
ExecProcessTimer.UpdateSince(start)
39+
s.newExecSyncChannel(t.ID, t.PID)
3940
t.StartResponse <- StartResponse{ExecPid: process.SystemPid()}
4041
s.notifySubscribers(Event{
4142
Timestamp: time.Now(),

supervisor/delete.go

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -27,13 +27,21 @@ func (s *Supervisor) delete(t *DeleteTask) error {
2727
t.Process.Wait()
2828
}
2929
if !t.NoEvent {
30-
s.notifySubscribers(Event{
31-
Type: StateExit,
32-
Timestamp: time.Now(),
33-
ID: t.ID,
34-
Status: t.Status,
35-
PID: t.PID,
36-
})
30+
execMap := s.getDeleteExecSyncMap(t.ID)
31+
go func() {
32+
// Wait for all exec processe events to be sent (we seem
33+
// to sometimes receive them after the init event)
34+
for _, ch := range execMap {
35+
<-ch
36+
}
37+
s.notifySubscribers(Event{
38+
Type: StateExit,
39+
Timestamp: time.Now(),
40+
ID: t.ID,
41+
Status: t.Status,
42+
PID: t.PID,
43+
})
44+
}()
3745
}
3846
ContainersCounter.Dec(1)
3947
ContainerDeleteTimer.UpdateSince(start)

supervisor/exit.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,10 +73,11 @@ func (s *Supervisor) execExit(t *ExecExitTask) error {
7373
if err := container.RemoveProcess(t.PID); err != nil {
7474
logrus.WithField("error", err).Error("containerd: find container for pid")
7575
}
76+
synCh := s.getExecSyncChannel(t.ID, t.PID)
7677
// If the exec spawned children which are still using its IO
7778
// waiting here will block until they die or close their IO
7879
// descriptors.
79-
// Hence, we use a go routine to avoid block all other operations
80+
// Hence, we use a go routine to avoid blocking all other operations
8081
go func() {
8182
t.Process.Wait()
8283
s.notifySubscribers(Event{
@@ -86,6 +87,7 @@ func (s *Supervisor) execExit(t *ExecExitTask) error {
8687
PID: t.PID,
8788
Status: t.Status,
8889
})
90+
close(synCh)
8991
}()
9092
return nil
9193
}

supervisor/sort.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,5 +23,5 @@ func (s *processSorter) Swap(i, j int) {
2323
}
2424

2525
func (s *processSorter) Less(i, j int) bool {
26-
return s.processes[j].ID() == "init"
26+
return s.processes[j].ID() == runtime.InitProcessID
2727
}

supervisor/supervisor.go

Lines changed: 43 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -32,17 +32,18 @@ func New(stateDir string, runtimeName, shimName string, runtimeArgs []string, ti
3232
return nil, err
3333
}
3434
s := &Supervisor{
35-
stateDir: stateDir,
36-
containers: make(map[string]*containerInfo),
37-
startTasks: startTasks,
38-
machine: machine,
39-
subscribers: make(map[chan Event]struct{}),
40-
tasks: make(chan Task, defaultBufferSize),
41-
monitor: monitor,
42-
runtime: runtimeName,
43-
runtimeArgs: runtimeArgs,
44-
shim: shimName,
45-
timeout: timeout,
35+
stateDir: stateDir,
36+
containers: make(map[string]*containerInfo),
37+
startTasks: startTasks,
38+
machine: machine,
39+
subscribers: make(map[chan Event]struct{}),
40+
tasks: make(chan Task, defaultBufferSize),
41+
monitor: monitor,
42+
runtime: runtimeName,
43+
runtimeArgs: runtimeArgs,
44+
shim: shimName,
45+
timeout: timeout,
46+
containerExecSync: make(map[string]map[string]chan struct{}),
4647
}
4748
if err := setupEventLog(s, retainCount); err != nil {
4849
return nil, err
@@ -171,6 +172,10 @@ type Supervisor struct {
171172
eventLog []Event
172173
eventLock sync.Mutex
173174
timeout time.Duration
175+
// This is used to ensure that exec process death events are sent
176+
// before the init process death
177+
containerExecSyncLock sync.Mutex
178+
containerExecSync map[string]map[string]chan struct{}
174179
}
175180

176181
// Stop closes all startTasks and sends a SIGTERM to each container's pid1 then waits for they to
@@ -401,3 +406,30 @@ func (s *Supervisor) handleTask(i Task) {
401406
close(i.ErrorCh())
402407
}
403408
}
409+
410+
func (s *Supervisor) newExecSyncMap(containerID string) {
411+
s.containerExecSyncLock.Lock()
412+
s.containerExecSync[containerID] = make(map[string]chan struct{})
413+
s.containerExecSyncLock.Unlock()
414+
}
415+
416+
func (s *Supervisor) newExecSyncChannel(containerID, pid string) {
417+
s.containerExecSyncLock.Lock()
418+
s.containerExecSync[containerID][pid] = make(chan struct{})
419+
s.containerExecSyncLock.Unlock()
420+
}
421+
422+
func (s *Supervisor) getExecSyncChannel(containerID, pid string) chan struct{} {
423+
s.containerExecSyncLock.Lock()
424+
ch := s.containerExecSync[containerID][pid]
425+
s.containerExecSyncLock.Unlock()
426+
return ch
427+
}
428+
429+
func (s *Supervisor) getDeleteExecSyncMap(containerID string) map[string]chan struct{} {
430+
s.containerExecSyncLock.Lock()
431+
chs := s.containerExecSync[containerID]
432+
delete(s.containerExecSync, containerID)
433+
s.containerExecSyncLock.Unlock()
434+
return chs
435+
}

supervisor/worker.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ func (w *worker) Start() {
9090
}
9191
}
9292
ContainerStartTimer.UpdateSince(started)
93+
w.s.newExecSyncMap(t.Container.ID())
9394
t.Err <- nil
9495
t.StartResponse <- StartResponse{
9596
Container: t.Container,

0 commit comments

Comments
 (0)