Skip to content

Commit fe92563

Browse files
committed
ROX-32316: Add generic rate limiter for VM index reports
Introduce a reusable token bucket rate limiter in pkg/rate that provides per-sensor fair rate limiting. Each sensor gets an equal share (1/N) of the global rate with automatic rebalancing on connect/disconnect. Key features: - Float64 rate support for sub-1 req/sec limits (e.g., 0.5) - Configurable via ROX_VM_INDEX_REPORT_RATE_LIMIT (default: 2.0 req/s) - Per-sensor bucket capacity via ROX_VM_INDEX_REPORT_BUCKET_CAPACITY - Prometheus metrics for accepted/rejected requests - Global registry for multiple workload types Integrated into virtualmachineindex pipeline with ACK/NACK responses sent back to Sensor when rate limit is exceeded. AI-assisted: Initial implementation and tests generated by AI. User-reviewed: Architecture decisions, naming conventions, float64 rate type, error handling (return errors vs panic), and test robustness.
1 parent 922a230 commit fe92563

File tree

7 files changed

+940
-5
lines changed

7 files changed

+940
-5
lines changed

central/sensor/service/pipeline/virtualmachineindex/pipeline.go

Lines changed: 80 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package virtualmachineindex
22

33
import (
44
"context"
5+
"strconv"
56

67
"github.com/pkg/errors"
78
countMetrics "github.com/stackrox/rox/central/metrics"
@@ -12,12 +13,19 @@ import (
1213
"github.com/stackrox/rox/generated/internalapi/central"
1314
"github.com/stackrox/rox/generated/storage"
1415
"github.com/stackrox/rox/pkg/centralsensor"
16+
"github.com/stackrox/rox/pkg/env"
1517
"github.com/stackrox/rox/pkg/features"
1618
"github.com/stackrox/rox/pkg/logging"
1719
"github.com/stackrox/rox/pkg/metrics"
20+
"github.com/stackrox/rox/pkg/rate"
1821
vmEnricher "github.com/stackrox/rox/pkg/virtualmachine/enricher"
1922
)
2023

24+
const (
25+
// rateLimiterWorkload is the workload name used for rate limiting VM index reports.
26+
rateLimiterWorkload = "vm_index_report"
27+
)
28+
2129
var (
2230
log = logging.LoggerForModule()
2331

@@ -26,26 +34,43 @@ var (
2634

2735
// GetPipeline returns an instantiation of this particular pipeline
2836
func GetPipeline() pipeline.Fragment {
37+
rateLimit, err := strconv.ParseFloat(env.VMIndexReportRateLimit.Setting(), 64)
38+
if err != nil {
39+
log.Panicf("Invalid %s value: %v", env.VMIndexReportRateLimit.EnvVar(), err)
40+
}
41+
rateLimiter, err := rate.RegisterLimiter(
42+
rateLimiterWorkload,
43+
rateLimit,
44+
env.VMIndexReportBucketCapacity.IntegerSetting(),
45+
)
46+
if err != nil {
47+
log.Panicf("Failed to create rate limiter for %s: %v", rateLimiterWorkload, err)
48+
}
2949
return newPipeline(
3050
vmDatastore.Singleton(),
3151
vmEnricher.Singleton(),
52+
rateLimiter,
3253
)
3354
}
3455

3556
// newPipeline returns a new instance of Pipeline.
36-
func newPipeline(vms vmDatastore.DataStore, enricher vmEnricher.VirtualMachineEnricher) pipeline.Fragment {
57+
func newPipeline(vms vmDatastore.DataStore, enricher vmEnricher.VirtualMachineEnricher, rateLimiter *rate.Limiter) pipeline.Fragment {
3758
return &pipelineImpl{
3859
vmDatastore: vms,
3960
enricher: enricher,
61+
rateLimiter: rateLimiter,
4062
}
4163
}
4264

4365
type pipelineImpl struct {
4466
vmDatastore vmDatastore.DataStore
4567
enricher vmEnricher.VirtualMachineEnricher
68+
rateLimiter *rate.Limiter
4669
}
4770

48-
func (p *pipelineImpl) OnFinish(_ string) {
71+
func (p *pipelineImpl) OnFinish(clusterID string) {
72+
// Notify rate limiter that this sensor disconnected so it can rebalance
73+
p.rateLimiter.OnSensorDisconnect(clusterID)
4974
}
5075

5176
func (p *pipelineImpl) Capabilities() []centralsensor.CentralCapability {
@@ -60,7 +85,7 @@ func (p *pipelineImpl) Match(msg *central.MsgFromSensor) bool {
6085
return msg.GetEvent().GetVirtualMachineIndexReport() != nil
6186
}
6287

63-
func (p *pipelineImpl) Run(ctx context.Context, _ string, msg *central.MsgFromSensor, _ common.MessageInjector) error {
88+
func (p *pipelineImpl) Run(ctx context.Context, _ string, msg *central.MsgFromSensor, injector common.MessageInjector) error {
6489
defer countMetrics.IncrementResourceProcessedCounter(pipeline.ActionToOperation(msg.GetEvent().GetAction()), metrics.VirtualMachineIndex)
6590

6691
if !features.VirtualMachines.Enabled() {
@@ -82,6 +107,18 @@ func (p *pipelineImpl) Run(ctx context.Context, _ string, msg *central.MsgFromSe
82107

83108
log.Debugf("Received virtual machine index report: %s", index.GetId())
84109

110+
// Extract cluster ID from injector (connection metadata)
111+
clusterID := extractClusterID(injector)
112+
113+
// Rate limit check
114+
allowed, reason := p.rateLimiter.TryConsume(clusterID)
115+
if !allowed {
116+
log.Infof("Rate limit exceeded for VM %s from cluster %s: %s",
117+
index.GetId(), clusterID, reason)
118+
sendVMIndexReportResponse(ctx, index.GetId(), central.SensorACK_NACK, reason, injector)
119+
return nil // Don't return error - would cause pipeline retry
120+
}
121+
85122
// Get or create VM
86123
vm := &storage.VirtualMachine{Id: index.GetId()}
87124

@@ -105,5 +142,45 @@ func (p *pipelineImpl) Run(ctx context.Context, _ string, msg *central.MsgFromSe
105142
log.Infof("Successfully enriched and stored VM %s with %d components",
106143
vm.GetId(), len(vm.GetScan().GetComponents()))
107144

145+
// Send ACK to Sensor
146+
sendVMIndexReportResponse(ctx, index.GetId(), central.SensorACK_ACK, "", injector)
147+
108148
return nil
109149
}
150+
151+
// extractClusterID extracts the cluster ID from the message injector (connection metadata).
152+
func extractClusterID(injector common.MessageInjector) string {
153+
// MessageInjector is actually a SensorConnection which has ClusterID()
154+
if conn, ok := injector.(interface{ ClusterID() string }); ok {
155+
return conn.ClusterID()
156+
}
157+
log.Warn("Unable to extract cluster ID from injector, using unknown")
158+
return "unknown"
159+
}
160+
161+
// sendVMIndexReportResponse sends an ACK or NACK for a VM index report.
162+
func sendVMIndexReportResponse(ctx context.Context, vmID string, action central.SensorACK_Action, reason string, injector common.MessageInjector) {
163+
conn, ok := injector.(interface {
164+
HasCapability(centralsensor.SensorCapability) bool
165+
})
166+
if !ok || !conn.HasCapability(centralsensor.SensorACKSupport) {
167+
log.Debugf("Sensor does not support SensorACK, skipping VM index report response for %s", vmID)
168+
return
169+
}
170+
171+
msg := &central.MsgToSensor{
172+
Msg: &central.MsgToSensor_SensorAck{
173+
SensorAck: &central.SensorACK{
174+
Action: action,
175+
MessageType: central.SensorACK_VM_INDEX_REPORT,
176+
ResourceId: vmID,
177+
Reason: reason,
178+
},
179+
},
180+
}
181+
if err := injector.InjectMessage(ctx, msg); err != nil {
182+
log.Warnf("Failed sending VM index report %s for %s: %v", action.String(), vmID, err)
183+
} else {
184+
log.Debugf("Sent VM index report %s for %s (reason=%q)", action.String(), vmID, reason)
185+
}
186+
}

central/sensor/service/pipeline/virtualmachineindex/pipeline_test.go

Lines changed: 94 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,10 @@ import (
1313
"github.com/stackrox/rox/generated/storage"
1414
"github.com/stackrox/rox/pkg/centralsensor"
1515
"github.com/stackrox/rox/pkg/features"
16+
"github.com/stackrox/rox/pkg/rate"
1617
vmEnricherMocks "github.com/stackrox/rox/pkg/virtualmachine/enricher/mocks"
1718
"github.com/stretchr/testify/assert"
19+
"github.com/stretchr/testify/require"
1820
"github.com/stretchr/testify/suite"
1921
"go.uber.org/mock/gomock"
2022
)
@@ -25,6 +27,13 @@ const (
2527

2628
var ctx = context.Background()
2729

30+
// mustNewLimiter creates a rate limiter or fails the test.
31+
func mustNewLimiter(t require.TestingT, workloadName string, globalRate float64, bucketCapacity int) *rate.Limiter {
32+
limiter, err := rate.NewLimiter(workloadName, globalRate, bucketCapacity)
33+
require.NoError(t, err)
34+
return limiter
35+
}
36+
2837
func TestPipeline(t *testing.T) {
2938
suite.Run(t, new(PipelineTestSuite))
3039
}
@@ -43,9 +52,12 @@ func (suite *PipelineTestSuite) SetupTest() {
4352
suite.mockCtrl = gomock.NewController(suite.T())
4453
suite.vmDatastore = vmDatastoreMocks.NewMockDataStore(suite.mockCtrl)
4554
suite.enricher = vmEnricherMocks.NewMockVirtualMachineEnricher(suite.mockCtrl)
55+
// Use unlimited rate limiter for tests (rate=0)
56+
rateLimiter := mustNewLimiter(suite.T(), "test", 0, 50)
4657
suite.pipeline = &pipelineImpl{
4758
vmDatastore: suite.vmDatastore,
4859
enricher: suite.enricher,
60+
rateLimiter: rateLimiter,
4961
}
5062
}
5163

@@ -176,13 +188,15 @@ func (suite *PipelineTestSuite) TestGetPipeline() {
176188
func (suite *PipelineTestSuite) TestNewPipeline() {
177189
mockDatastore := vmDatastoreMocks.NewMockDataStore(suite.mockCtrl)
178190
mockEnricher := vmEnricherMocks.NewMockVirtualMachineEnricher(suite.mockCtrl)
179-
pipeline := newPipeline(mockDatastore, mockEnricher)
191+
rateLimiter := mustNewLimiter(suite.T(), "test", 0, 50)
192+
pipeline := newPipeline(mockDatastore, mockEnricher, rateLimiter)
180193
suite.NotNil(pipeline)
181194

182195
impl, ok := pipeline.(*pipelineImpl)
183196
suite.True(ok, "Should return pipelineImpl instance")
184197
suite.Equal(mockDatastore, impl.vmDatastore)
185198
suite.Equal(mockEnricher, impl.enricher)
199+
suite.Equal(rateLimiter, impl.rateLimiter)
186200
}
187201

188202
// Test table-driven approach for different actions
@@ -229,9 +243,11 @@ func TestPipelineRun_DifferentActions(t *testing.T) {
229243

230244
vmDatastore := vmDatastoreMocks.NewMockDataStore(ctrl)
231245
enricher := vmEnricherMocks.NewMockVirtualMachineEnricher(ctrl)
246+
rateLimiter := mustNewLimiter(t, "test", 0, 50)
232247
pipeline := &pipelineImpl{
233248
vmDatastore: vmDatastore,
234249
enricher: enricher,
250+
rateLimiter: rateLimiter,
235251
}
236252

237253
vmID := "vm-1"
@@ -272,7 +288,11 @@ func TestPipelineEdgeCases(t *testing.T) {
272288
defer ctrl.Finish()
273289

274290
vmDatastore := vmDatastoreMocks.NewMockDataStore(ctrl)
275-
pipeline := &pipelineImpl{vmDatastore: vmDatastore}
291+
rateLimiter := mustNewLimiter(t, "test", 0, 50)
292+
pipeline := &pipelineImpl{
293+
vmDatastore: vmDatastore,
294+
rateLimiter: rateLimiter,
295+
}
276296

277297
t.Run("nil message", func(t *testing.T) {
278298
result := pipeline.Match(nil)
@@ -328,9 +348,11 @@ func TestPipelineRun_DisabledFeature(t *testing.T) {
328348

329349
vmDatastore := vmDatastoreMocks.NewMockDataStore(ctrl)
330350
enricher := vmEnricherMocks.NewMockVirtualMachineEnricher(ctrl)
351+
rateLimiter := mustNewLimiter(t, "test", 0, 50)
331352
pipeline := &pipelineImpl{
332353
vmDatastore: vmDatastore,
333354
enricher: enricher,
355+
rateLimiter: rateLimiter,
334356
}
335357

336358
vmID := "vm-1"
@@ -340,3 +362,73 @@ func TestPipelineRun_DisabledFeature(t *testing.T) {
340362

341363
assert.NoError(t, err)
342364
}
365+
366+
// TestPipelineRun_RateLimitDisabled tests that rate limiting is disabled when configured with 0
367+
func TestPipelineRun_RateLimitDisabled(t *testing.T) {
368+
t.Setenv(features.VirtualMachines.EnvVar(), "true")
369+
ctrl := gomock.NewController(t)
370+
defer ctrl.Finish()
371+
372+
vmDatastore := vmDatastoreMocks.NewMockDataStore(ctrl)
373+
enricher := vmEnricherMocks.NewMockVirtualMachineEnricher(ctrl)
374+
rateLimiter := mustNewLimiter(t, "test", 0, 50) // Disabled
375+
376+
pipeline := &pipelineImpl{
377+
vmDatastore: vmDatastore,
378+
enricher: enricher,
379+
rateLimiter: rateLimiter,
380+
}
381+
382+
vmID := "vm-1"
383+
msg := createVMIndexMessage(vmID, central.ResourceAction_SYNC_RESOURCE)
384+
385+
// Should process all 100 requests without rate limiting
386+
for i := 0; i < 100; i++ {
387+
enricher.EXPECT().
388+
EnrichVirtualMachineWithVulnerabilities(gomock.Any(), gomock.Any()).
389+
Return(nil)
390+
vmDatastore.EXPECT().
391+
UpdateVirtualMachineScan(ctx, vmID, gomock.Any()).
392+
Return(nil)
393+
394+
err := pipeline.Run(ctx, testClusterID, msg, nil)
395+
assert.NoError(t, err, "request %d should succeed with rate limiting disabled", i)
396+
}
397+
}
398+
399+
// TestPipelineRun_RateLimitEnabled tests that rate limiting rejects requests when enabled
400+
func TestPipelineRun_RateLimitEnabled(t *testing.T) {
401+
t.Setenv(features.VirtualMachines.EnvVar(), "true")
402+
ctrl := gomock.NewController(t)
403+
defer ctrl.Finish()
404+
405+
vmDatastore := vmDatastoreMocks.NewMockDataStore(ctrl)
406+
enricher := vmEnricherMocks.NewMockVirtualMachineEnricher(ctrl)
407+
rateLimiter := mustNewLimiter(t, "test", 5, 5) // 5 req/s, bucket capacity=5
408+
409+
pipeline := &pipelineImpl{
410+
vmDatastore: vmDatastore,
411+
enricher: enricher,
412+
rateLimiter: rateLimiter,
413+
}
414+
415+
vmID := "vm-1"
416+
msg := createVMIndexMessage(vmID, central.ResourceAction_SYNC_RESOURCE)
417+
418+
// First 5 requests should succeed (burst capacity)
419+
for i := 0; i < 5; i++ {
420+
enricher.EXPECT().
421+
EnrichVirtualMachineWithVulnerabilities(gomock.Any(), gomock.Any()).
422+
Return(nil)
423+
vmDatastore.EXPECT().
424+
UpdateVirtualMachineScan(ctx, vmID, gomock.Any()).
425+
Return(nil)
426+
427+
err := pipeline.Run(ctx, testClusterID, msg, nil)
428+
assert.NoError(t, err, "request %d should succeed within burst", i)
429+
}
430+
431+
// 6th request should be rejected (no NACK sent since injector is nil in test)
432+
err := pipeline.Run(ctx, testClusterID, msg, nil)
433+
assert.NoError(t, err, "rate limited request should not return error (NACK sent instead)")
434+
}

0 commit comments

Comments
 (0)