Skip to content

Commit 3b665f8

Browse files
akamai-purger: Queue and response handling improvements (letsencrypt#5955)
- Make maximum queue size configurable via a new configuration key: 'MaxQueueSize'. - Default 'MaxQueueSize' to the previous value (1M) when 'MaxQueueSize' isn't specified. - akamaiPurger.purge() will only place the URLs starting at the first entry of the failed batch where a failure was encountered instead of the entire set that was originally passed. - Add a test to ensure that these changes are working as intended. - Make the purge batching easier to understand with some minor changes to variable names - Responses whose HTTP status code is not 201 will no longer be unmarshaled - Logs will explicitly call out if a response indicates that we've exceeded any rate limits imposed by Akamai. Fixes letsencrypt#5917
1 parent b19b791 commit 3b665f8

File tree

3 files changed

+111
-48
lines changed

3 files changed

+111
-48
lines changed

akamai/cache-client.go

Lines changed: 56 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,17 @@ const (
3131
v3PurgeTagPath = "/ccu/v3/delete/tag/"
3232
)
3333

34+
var (
35+
// ErrAllRetriesFailed indicates that all purge submission attempts have
36+
// failed.
37+
ErrAllRetriesFailed = errors.New("all attempts to submit purge request failed")
38+
39+
// errFatal is returned by the purge method of CachePurgeClient to indicate
40+
// that it failed for a reason that cannot be remediated by retrying the
41+
// request.
42+
errFatal = errors.New("fatal error")
43+
)
44+
3445
type v3PurgeRequest struct {
3546
Objects []string `json:"objects"`
3647
}
@@ -61,13 +72,6 @@ type CachePurgeClient struct {
6172
clk clock.Clock
6273
}
6374

64-
// ErrAllRetriesFailed indicates that all purge submission attempts have failed.
65-
var ErrAllRetriesFailed = errors.New("all attempts to submit purge request failed")
66-
67-
// errFatal is returned by the purge method of CachePurgeClient to indicate that
68-
// it failed for a reason that cannot be remediated by retrying the request.
69-
var errFatal = errors.New("fatal error")
70-
7175
// NewCachePurgeClient performs some basic validation of supplied configuration
7276
// and returns a newly constructed CachePurgeClient.
7377
func NewCachePurgeClient(
@@ -228,18 +232,44 @@ func (cpc *CachePurgeClient) authedRequest(endpoint string, body v3PurgeRequest)
228232
return err
229233
}
230234

231-
// Ensure that the purge request was successful.
235+
// Success for a request to purge a URL or Cache tag is 'HTTP 201'.
236+
// https://techdocs.akamai.com/purge-cache/reference/delete-url
237+
// https://techdocs.akamai.com/purge-cache/reference/delete-tag
238+
if resp.StatusCode != http.StatusCreated {
239+
switch resp.StatusCode {
240+
// https://techdocs.akamai.com/purge-cache/reference/403
241+
case http.StatusForbidden:
242+
return fmt.Errorf("client not authorized to make requests for URL %q: %w", resp.Request.URL, errFatal)
243+
244+
// https://techdocs.akamai.com/purge-cache/reference/504
245+
case http.StatusGatewayTimeout:
246+
return fmt.Errorf("server timed out, got HTTP %d (body %q) for URL %q", resp.StatusCode, respBody, resp.Request.URL)
247+
248+
// https://techdocs.akamai.com/purge-cache/reference/429
249+
case http.StatusTooManyRequests:
250+
return fmt.Errorf("exceeded request count rate limit, got HTTP %d (body %q) for URL %q", resp.StatusCode, respBody, resp.Request.URL)
251+
252+
// https://techdocs.akamai.com/purge-cache/reference/413
253+
case http.StatusRequestEntityTooLarge:
254+
return fmt.Errorf("exceeded request size rate limit, got HTTP %d (body %q) for URL %q", resp.StatusCode, respBody, resp.Request.URL)
255+
default:
256+
return fmt.Errorf("received HTTP %d (body %q) for URL %q", resp.StatusCode, respBody, resp.Request.URL)
257+
}
258+
}
259+
232260
var purgeInfo purgeResponse
233261
err = json.Unmarshal(respBody, &purgeInfo)
234262
if err != nil {
235263
return fmt.Errorf("while unmarshalling body %q from URL %q as JSON: %w", respBody, resp.Request.URL, err)
236264
}
237265

238-
if purgeInfo.HTTPStatus != http.StatusCreated || resp.StatusCode != http.StatusCreated {
266+
// Ensure the unmarshaled body concurs with the status of the response
267+
// received.
268+
if purgeInfo.HTTPStatus != http.StatusCreated {
239269
if purgeInfo.HTTPStatus == http.StatusForbidden {
240270
return fmt.Errorf("client not authorized to make requests to URL %q: %w", resp.Request.URL, errFatal)
241271
}
242-
return fmt.Errorf("received HTTP %d (body %q) from URL %q", resp.StatusCode, respBody, resp.Request.URL)
272+
return fmt.Errorf("unmarshaled HTTP %d (body %q) from URL %q", purgeInfo.HTTPStatus, respBody, resp.Request.URL)
243273
}
244274

245275
cpc.log.AuditInfof("Purge request sent successfully (ID %s) (body %s). Purge expected in %ds",
@@ -275,21 +305,26 @@ func (cpc *CachePurgeClient) purgeBatch(urls []string) error {
275305
return nil
276306
}
277307

278-
// Purge attempts to send a purge request to the Akamai CCU API cpc.retries
279-
// number of times before giving up and returning ErrAllRetriesFailed.
280-
func (cpc *CachePurgeClient) Purge(urls []string) error {
281-
for i := 0; i < len(urls); {
282-
sliceEnd := i + akamaiBatchSize
283-
if sliceEnd > len(urls) {
284-
sliceEnd = len(urls)
308+
// Purge dispatches the provided urls in batched requests to the Akamai CCU API.
309+
// Requests will be attempted cpc.retries number of times before giving up and
310+
// returning ErrAllRetriesFailed and the beginning index position of the batch
311+
// where the failure was encountered.
312+
func (cpc *CachePurgeClient) Purge(urls []string) (int, error) {
313+
totalURLs := len(urls)
314+
for batchBegin := 0; batchBegin < totalURLs; {
315+
batchEnd := batchBegin + akamaiBatchSize
316+
if batchEnd > totalURLs {
317+
// Avoid index out of range error.
318+
batchEnd = totalURLs
285319
}
286-
err := cpc.purgeBatch(urls[i:sliceEnd])
320+
321+
err := cpc.purgeBatch(urls[batchBegin:batchEnd])
287322
if err != nil {
288-
return err
323+
return batchBegin, err
289324
}
290-
i += akamaiBatchSize
325+
batchBegin += akamaiBatchSize
291326
}
292-
return nil
327+
return totalURLs, nil
293328
}
294329

295330
// CheckSignature is exported for use in tests and akamai-test-srv.

akamai/cache-client_test.go

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -138,18 +138,18 @@ func TestV3Purge(t *testing.T) {
138138
fc := clock.NewFake()
139139
client.clk = fc
140140

141-
err = client.Purge([]string{"http://test.com"})
141+
_, err = client.Purge([]string{"http://test.com"})
142142
test.AssertNotError(t, err, "Purge failed; expected 201 response")
143143

144144
started := client.clk.Now()
145145
as.responseCode = http.StatusInternalServerError
146-
err = client.Purge([]string{"http://test.com"})
146+
_, err = client.Purge([]string{"http://test.com"})
147147
test.AssertError(t, err, "Purge succeeded; expected 500 response")
148148
test.Assert(t, client.clk.Since(started) > (time.Second*4), "Retries should've taken at least 4.4 seconds")
149149

150150
started = client.clk.Now()
151151
as.responseCode = http.StatusCreated
152-
err = client.Purge([]string{"http:/test.com"})
152+
_, err = client.Purge([]string{"http:/test.com"})
153153
test.AssertError(t, err, "Purge succeeded; expected a 403 response from malformed URL")
154154
test.Assert(t, client.clk.Since(started) < time.Second, "Purge should've failed out immediately")
155155
}
@@ -249,8 +249,21 @@ func TestBigBatchPurge(t *testing.T) {
249249
urls = append(urls, fmt.Sprintf("http://test.com/%d", i))
250250
}
251251

252-
err = client.Purge(urls)
252+
stoppedAt, err := client.Purge(urls)
253253
test.AssertNotError(t, err, "Purge failed with 201 response")
254+
test.AssertEquals(t, stoppedAt, 250)
255+
256+
// Add a malformed URL.
257+
urls = append(urls, "http:/test.com")
258+
259+
// Add 10 more valid entries.
260+
for i := 0; i < 10; i++ {
261+
urls = append(urls, fmt.Sprintf("http://test.com/%d", i))
262+
}
263+
264+
stoppedAt, err = client.Purge(urls)
265+
test.AssertError(t, err, "Purge succeeded with a malformed URL")
266+
test.AssertEquals(t, stoppedAt, 200)
254267
}
255268

256269
func TestReverseBytes(t *testing.T) {

cmd/akamai-purger/main.go

Lines changed: 38 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -24,13 +24,20 @@ import (
2424
blog "github.com/letsencrypt/boulder/log"
2525
)
2626

27+
// defaultQueueSize is the default akamai-purger queue size.
28+
const defaultQueueSize = 1000000
29+
2730
type Config struct {
2831
AkamaiPurger struct {
2932
cmd.ServiceConfig
3033

3134
// PurgeInterval is the duration waited between purge requests.
3235
PurgeInterval cmd.ConfigDuration
3336

37+
// MaxQueueSize is the maximum size of the purger queue. If this value
38+
// isn't provided it will default to `defaultQueueSize`.
39+
MaxQueueSize int
40+
3441
BaseURL string
3542
ClientToken string
3643
ClientSecret string
@@ -43,50 +50,53 @@ type Config struct {
4350
Beeline cmd.BeelineConfig
4451
}
4552

53+
// akamaiPurger is a mutex protected container for a gRPC server which receives
54+
// requests to purge the URLs of OCSP responses cached by Akamai, stores these
55+
// URLs in an inner slice, and dispatches them to Akamai's Fast Purge API in
56+
// batches.
4657
type akamaiPurger struct {
58+
sync.Mutex
4759
akamaipb.UnimplementedAkamaiPurgerServer
48-
mu sync.Mutex
49-
toPurge []string
50-
51-
client *akamai.CachePurgeClient
52-
log blog.Logger
60+
toPurge []string
61+
maxQueueSize int
62+
client *akamai.CachePurgeClient
63+
log blog.Logger
5364
}
5465

5566
func (ap *akamaiPurger) len() int {
56-
ap.mu.Lock()
57-
defer ap.mu.Unlock()
67+
ap.Lock()
68+
defer ap.Unlock()
5869
return len(ap.toPurge)
5970
}
6071

6172
func (ap *akamaiPurger) purge() error {
62-
ap.mu.Lock()
73+
ap.Lock()
6374
urls := ap.toPurge[:]
6475
ap.toPurge = []string{}
65-
ap.mu.Unlock()
76+
ap.Unlock()
6677
if len(urls) == 0 {
6778
return nil
6879
}
6980

70-
err := ap.client.Purge(urls)
81+
stoppedAt, err := ap.client.Purge(urls)
7182
if err != nil {
72-
// Add the URLs back to the queue.
73-
ap.mu.Lock()
74-
ap.toPurge = append(urls, ap.toPurge...)
75-
ap.mu.Unlock()
83+
ap.Lock()
84+
85+
// Add the remaining URLs back, but at the end of the queue. If somehow
86+
// there's a URL which repeatedly results in error, it won't block the
87+
// entire queue, only a single batch.
88+
ap.toPurge = append(ap.toPurge, urls[stoppedAt:]...)
89+
ap.Unlock()
7690
ap.log.Errf("Failed to purge %d URLs: %s", len(urls), err)
7791
return err
7892
}
7993
return nil
8094
}
8195

82-
// maxQueueSize is used to reject Purge requests if the queue contains >= the
83-
// number of URLs to purge so that it can catch up.
84-
var maxQueueSize = 1000000
85-
8696
func (ap *akamaiPurger) Purge(ctx context.Context, req *akamaipb.PurgeRequest) (*emptypb.Empty, error) {
87-
ap.mu.Lock()
88-
defer ap.mu.Unlock()
89-
if len(ap.toPurge) >= maxQueueSize {
97+
ap.Lock()
98+
defer ap.Unlock()
99+
if len(ap.toPurge) >= ap.maxQueueSize {
90100
return nil, errors.New("akamai-purger queue too large")
91101
}
92102
ap.toPurge = append(ap.toPurge, req.Urls...)
@@ -160,6 +170,10 @@ func main() {
160170
cmd.Fail("'PurgeInterval' must be > 0")
161171
}
162172

173+
if c.AkamaiPurger.MaxQueueSize == 0 {
174+
c.AkamaiPurger.MaxQueueSize = defaultQueueSize
175+
}
176+
163177
ccu, err := akamai.NewCachePurgeClient(
164178
c.AkamaiPurger.BaseURL,
165179
c.AkamaiPurger.ClientToken,
@@ -174,8 +188,9 @@ func main() {
174188
cmd.FailOnError(err, "Failed to setup Akamai CCU client")
175189

176190
ap := &akamaiPurger{
177-
client: ccu,
178-
log: logger,
191+
maxQueueSize: c.AkamaiPurger.MaxQueueSize,
192+
client: ccu,
193+
log: logger,
179194
}
180195

181196
var gaugePurgeQueueLength = prometheus.NewGaugeFunc(

0 commit comments

Comments
 (0)