-
Notifications
You must be signed in to change notification settings - Fork 148
Expand file tree
/
Copy pathlambdaInstance.go
More file actions
248 lines (207 loc) · 6.57 KB
/
lambdaInstance.go
File metadata and controls
248 lines (207 loc) · 6.57 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
package lambda
import (
"io"
"log"
"net/http"
"strings"
"github.com/open-lambda/open-lambda/ol/common"
"github.com/open-lambda/open-lambda/ol/worker/sandbox"
)
// This is essentially a virtual sandbox. It is backed by a real
// Sandbox (when it is allowed to allocate one). It pauses/unpauses
// based on usage, and starts fresh instances when they die.
type LambdaInstance struct {
lfunc *LambdaFunc
// snapshot of LambdaFunc, at the time the LambdaInstance is created
codeDir string
meta *sandbox.SandboxMeta
// send chan to the kill chan to destroy the instance, then
// wait for msg on sent chan to block until it is done
killChan chan chan bool
}
// this Task manages a single Sandbox (at any given time), and
// forwards requests from the function queue to that Sandbox.
// when there are no requests, the Sandbox is paused.
//
// These errors are handled as follows by Task:
//
// 1. Sandbox.Pause/Unpause: discard Sandbox, create new one to handle request
// 2. Sandbox.Create/Channel: discard Sandbox, propagate HTTP 500 to client
// 3. Error inside Sandbox: simply propagate whatever occurred to the client (TODO: restart Sandbox)
func (linst *LambdaInstance) Task() {
f := linst.lfunc
var sb sandbox.Sandbox
var err error
for {
// wait for a request (blocking) before making the
// Sandbox ready, or kill if we receive that signal
var req *Invocation
select {
case req = <-f.instChan:
case killed := <-linst.killChan:
if sb != nil {
rtLog := sb.GetRuntimeLog()
proxyLog := sb.GetProxyLog()
sb.Destroy("Lambda instance kill signal received")
log.Printf("Stopped sandbox")
if common.Conf.Log_output {
if rtLog != "" {
log.Printf("Runtime output is:")
for _, line := range strings.Split(rtLog, "\n") {
log.Printf(" %s", line)
}
}
if proxyLog != "" {
log.Printf("Proxy output is:")
for _, line := range strings.Split(proxyLog, "\n") {
log.Printf(" %s", line)
}
}
}
}
killed <- true
return
}
t := common.T0("LambdaInstance-WaitSandbox")
// if we have a sandbox, try unpausing it to see if it is still alive
if sb != nil {
// Unpause will often fail, because evictors
// are likely to prefer to evict paused
// sandboxes rather than inactive sandboxes.
// Thus, if this fails, we'll try to handle it
// by just creating a new sandbox.
t2 := common.T0("LambdaInstance-WaitSandbox-Unpause")
if err := sb.Unpause(); err != nil {
f.printf("discard sandbox %s due to Unpause error: %v", sb.ID(), err)
sb = nil
}
t2.T1()
}
// if we don't already have a Sandbox, create one, and
// HTTP proxy over the channel
if sb == nil {
sb = nil
if f.lmgr.ZygoteProvider != nil && f.rtType == common.RT_PYTHON {
scratchDir := f.lmgr.scratchDirs.Make(f.name)
// we don't specify parent SB, because ImportCache.Create chooses it for us
sb, err = f.lmgr.ZygoteProvider.Create(f.lmgr.sbPool, true, linst.codeDir, scratchDir, linst.meta, f.rtType)
if err != nil {
f.printf("failed to get Sandbox from import cache")
sb = nil
}
}
log.Printf("Creating new sandbox")
// import cache is either disabled or it failed
if sb == nil {
t2 := common.T0("LambdaInstance-WaitSandbox-NoImportCache")
scratchDir := f.lmgr.scratchDirs.Make(f.name)
sb, err = f.lmgr.sbPool.Create(nil, true, linst.codeDir, scratchDir, linst.meta, f.rtType)
t2.T1()
}
if err != nil {
linst.TrySendError(req, http.StatusInternalServerError, "could not create Sandbox: "+err.Error()+"\n", nil)
f.doneChan <- req
continue // wait for another request before retrying
}
}
t.T1()
// below here, we're guaranteed (1) sb != nil, (2) proxy != nil, (3) sb is unpaused
// serve until we incoming queue is empty
t = common.T0("LambdaInstance-ServeRequests")
for req != nil {
//f.printf("Forwarding request to sandbox")
t2 := common.T0("LambdaInstance-RoundTrip")
// get response from sandbox
url := "http://root" + req.r.RequestURI
httpReq, err := http.NewRequest(req.r.Method, url, req.r.Body)
if err != nil {
linst.TrySendError(req, http.StatusInternalServerError, "Could not create NewRequest: "+err.Error(), sb)
} else {
resp, err := sb.Client().Do(httpReq)
// copy response out
if err != nil {
linst.TrySendError(req, http.StatusBadGateway, "RoundTrip failed: "+err.Error()+"\n", sb)
} else {
// copy headers
// (adapted from copyHeaders: https://go.dev/src/net/http/httputil/reverseproxy.go)
for k, vv := range resp.Header {
for _, v := range vv {
req.w.Header().Add(k, v)
}
}
req.w.WriteHeader(resp.StatusCode)
// copy body
if _, err := io.Copy(req.w, resp.Body); err != nil {
// already used WriteHeader, so can't use that to surface on error anymore
msg := "reading lambda response failed: " + err.Error() + "\n"
f.printf("error: " + msg)
linst.TrySendError(req, 0, msg, sb)
}
resp.Body.Close()
}
}
// notify instance that we're done
t2.T1()
// Record at least 1 ms of elapsed time
v := int(t2.Milliseconds)
if v == 0 {
req.execMs = 1
} else {
req.execMs = v
}
f.doneChan <- req
// check whether we should shutdown (non-blocking)
select {
case killed := <-linst.killChan:
rtLog := sb.GetRuntimeLog()
sb.Destroy("Lambda instance kill signal received")
log.Printf("Stopped sandbox")
if common.Conf.Log_output {
if rtLog != "" {
log.Printf("Runtime output is:")
for _, line := range strings.Split(rtLog, "\n") {
log.Printf(" %s", line)
}
}
}
killed <- true
return
default:
}
// grab another request (non-blocking)
select {
case req = <-f.instChan:
default:
req = nil
}
}
if sb != nil {
if err := sb.Pause(); err != nil {
f.printf("discard sandbox %s due to Pause error: %v", sb.ID(), err)
sb = nil
}
}
t.T1()
}
}
func (linst *LambdaInstance) TrySendError(req *Invocation, statusCode int, msg string, sb sandbox.Sandbox) {
if statusCode > 0 {
req.w.WriteHeader(statusCode)
}
var err error
if sb != nil {
_, err = req.w.Write([]byte(msg + "\nSandbox State: " + sb.DebugString() + "\n"))
} else {
_, err = req.w.Write([]byte(msg + "\n"))
}
if err != nil {
linst.lfunc.printf("TrySendError failed: %s\n", err.Error())
}
}
// AsyncKill signals the instance to die, return chan that can be used to block
// until it's done
func (linst *LambdaInstance) AsyncKill() chan bool {
done := make(chan bool)
linst.killChan <- done
return done
}