Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 28 additions & 24 deletions go/worker/lambda/lambdaFunction.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ package lambda

import (
"bufio"
"container/list"
"errors"
"fmt"
"log/slog"
Expand Down Expand Up @@ -35,14 +34,17 @@ type LambdaFunc struct {
Meta *FunctionMeta

// lambda execution
funcChan chan *Invocation // server to func
instChan chan *Invocation // func to instances
doneChan chan *Invocation // instances to func
instances *list.List
funcChan chan *Invocation // server to func
instChan chan *Invocation // func to instances
doneChan chan *Invocation // instances to func
nInstances int

// send chan to the kill chan to destroy the instance, then
// wait for msg on sent chan to block until it is done
killChan chan chan bool

// killChan shared with each invocation.
invocationKillChan chan chan bool
}

// Invoke handles the invocation of the lambda function.
Expand Down Expand Up @@ -280,13 +282,11 @@ func (f *LambdaFunc) Task() {
}

if oldCodeDir != "" && oldCodeDir != f.codeDir {
el := f.instances.Front()
for el != nil {
waitChan := el.Value.(*LambdaInstance).AsyncKill()
for i := 0; i < f.nInstances; i++ {
waitChan := f.AsyncKillOneInvocation()
cleanupChan <- waitChan
el = el.Next()
}
f.instances = list.New()
f.nInstances = 0

// cleanupChan is a FIFO, so this will
// happen after the cleanup task waits
Expand Down Expand Up @@ -318,11 +318,9 @@ func (f *LambdaFunc) Task() {
case done := <-f.killChan:
// signal all instances to die, then wait for
// cleanup task to finish and exit
el := f.instances.Front()
for el != nil {
waitChan := el.Value.(*LambdaInstance).AsyncKill()
for i := 0; i < f.nInstances; i++ {
waitChan := f.AsyncKillOneInvocation()
cleanupChan <- waitChan
el = el.Next()
}
if f.codeDir != "" {
// cleanupChan <- f.codeDir
Expand Down Expand Up @@ -362,7 +360,7 @@ func (f *LambdaFunc) Task() {
if lastScaling != nil {
elapsed := now.Sub(*lastScaling)
if elapsed < adjustFreq {
if desiredInstances != f.instances.Len() {
if desiredInstances != f.nInstances {
timeout = time.NewTimer(adjustFreq - elapsed)
}
continue
Expand All @@ -371,19 +369,19 @@ func (f *LambdaFunc) Task() {

// kill or start at most one instance to get closer to
// desired number
if f.instances.Len() < desiredInstances {
f.printf("increase instances to %d", f.instances.Len()+1)
if f.nInstances < desiredInstances {
f.printf("increase instances to %d", f.nInstances+1)
f.newInstance()
lastScaling = &now
} else if f.instances.Len() > desiredInstances {
f.printf("reduce instances to %d", f.instances.Len()-1)
waitChan := f.instances.Back().Value.(*LambdaInstance).AsyncKill()
Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was the problematic line -- we chose the most recently spun up LambdaInstance, which need not be the idle one.

f.instances.Remove(f.instances.Back())
} else if f.nInstances > desiredInstances {
f.printf("reduce instances to %d", f.nInstances-1)
waitChan := f.AsyncKillOneInvocation()
f.nInstances--
cleanupChan <- waitChan
lastScaling = &now
}

if f.instances.Len() != desiredInstances {
if f.nInstances != desiredInstances {
// we can only adjust quickly, so we want to
// run through this loop again as soon as
// possible, even if there are no requests to
Expand All @@ -403,10 +401,10 @@ func (f *LambdaFunc) newInstance() {
lfunc: f,
codeDir: f.codeDir,
meta: f.Meta,
killChan: make(chan chan bool, 1),
killChan: f.invocationKillChan,
}

f.instances.PushBack(linst)
f.nInstances++

go linst.Task()
}
Expand All @@ -417,3 +415,9 @@ func (f *LambdaFunc) Kill() {
f.killChan <- done
<-done
}

func (f *LambdaFunc) AsyncKillOneInvocation() chan bool {
done := make(chan bool)
f.invocationKillChan <- done
return done
}
8 changes: 0 additions & 8 deletions go/worker/lambda/lambdaInstance.go
Original file line number Diff line number Diff line change
Expand Up @@ -240,11 +240,3 @@ func (linst *LambdaInstance) TrySendError(req *Invocation, statusCode int, msg s
linst.lfunc.printf("TrySendError failed: %s\n", err.Error())
}
}

// AsyncKill signals the instance to die, return chan that can be used to block
// until it's done
func (linst *LambdaInstance) AsyncKill() chan bool {
done := make(chan bool)
linst.killChan <- done
return done
}
12 changes: 6 additions & 6 deletions go/worker/lambda/lambdaManager.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package lambda

import (
"container/list"
"fmt"
"log/slog"
"net/http"
Expand Down Expand Up @@ -134,11 +133,12 @@ func (mgr *LambdaMgr) Get(name string) (f *LambdaFunc) {
lmgr: mgr,
name: name,
// TODO make these configurable
funcChan: make(chan *Invocation, 1024),
instChan: make(chan *Invocation, 1024),
doneChan: make(chan *Invocation, 1024),
instances: list.New(),
killChan: make(chan chan bool, 1),
funcChan: make(chan *Invocation, 1024),
instChan: make(chan *Invocation, 1024),
doneChan: make(chan *Invocation, 1024),
nInstances: 0,
killChan: make(chan chan bool, 1),
invocationKillChan: make(chan chan bool),
}

go f.Task()
Expand Down