Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
d8063f6
Empty commit
vikin91 Dec 15, 2025
49a294d
ROX-32316: Add generic rate limiter for VM index reports
vikin91 Dec 15, 2025
d9586df
Readd rate limiting env vars
vikin91 Jan 6, 2026
1bcad9c
Use HasCapability as in the other pipelines
vikin91 Jan 6, 2026
e4d8949
Inject clock to rate limiter to avoid test flakiness
vikin91 Jan 7, 2026
8ac9222
Fix style: use the correct sync pkg
vikin91 Jan 7, 2026
66cca03
Fix: Skip sending on nil injector
vikin91 Jan 7, 2026
2eddfa5
Make rate limiter more generic
vikin91 Jan 9, 2026
7c47644
Improve metrics coverage
vikin91 Jan 9, 2026
1a4024f
Improve logs
vikin91 Jan 9, 2026
926eac1
Add defensive checks for rate limiter
vikin91 Jan 12, 2026
72effbb
Rephrase log message
vikin91 Jan 12, 2026
0cd7004
Fix race in getOrCreateLimiter
vikin91 Jan 12, 2026
fda13c6
Add debug log
vikin91 Jan 12, 2026
0b6dcf3
Change default ROX_VM_INDEX_REPORT_RATE_LIMIT to 1.0
vikin91 Jan 12, 2026
e8eab5f
Add comment on default rate limiter setting
vikin91 Jan 12, 2026
205ab76
Record metrics in rate-unlimited mode
vikin91 Jan 12, 2026
7b95652
Remove metrics: RequestsAccepted, RequestsRejected
vikin91 Jan 12, 2026
e9f7e19
Remove rate-limiter registry
vikin91 Jan 12, 2026
6933634
Add test coverage
vikin91 Jan 13, 2026
eda61fb
More limiter tests. Modify log statements
vikin91 Jan 13, 2026
2339b91
Drop sync.Map in favor of map+RWmutex (benchmarked)
vikin91 Jan 13, 2026
b3dc279
Address review: logging & test coverage
vikin91 Jan 13, 2026
90dd180
Change default rate limit for VM to 0.3
vikin91 Jan 15, 2026
1bbd36e
Add comment to pkg limiter
vikin91 Jan 15, 2026
0a344e1
Rephrase comment and reason string for nil rate limiter
vikin91 Jan 19, 2026
0136d77
Use rate-limited logger if the workload rate-limiter is nil
vikin91 Jan 19, 2026
34057eb
Update also test expectation for the changed reason string
vikin91 Jan 19, 2026
b99627d
Update comment to the rate limit env setting
vikin91 Jan 19, 2026
ab9279e
Change the fallback-default rate-limit to 0.3
vikin91 Jan 19, 2026
776693a
Remove the additional validation for ROX_VM_INDEX_REPORT_BUCKET_CAPACITY
vikin91 Jan 19, 2026
b2bcd0d
Use error instead of warning. Fix log text 1.0->0.3
vikin91 Jan 20, 2026
149a807
Mladen: Don't panic if cannot create rate limiter
vikin91 Jan 21, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion central/sensor/service/connection/connection_impl.go
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,7 @@ func (c *sensorConnection) Scrapes() scrape.Controller {
}

func (c *sensorConnection) InjectMessageIntoQueue(msg *central.MsgFromSensor) {
c.multiplexedPush(sac.WithAllAccess(withConnection(context.Background(), c)), msg, nil)
c.multiplexedPush(sac.WithAllAccess(WithConnection(context.Background(), c)), msg, nil)
}

func (c *sensorConnection) NetworkEntities() networkentities.Controller {
Expand Down
3 changes: 2 additions & 1 deletion central/sensor/service/connection/context.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ func FromContext(ctx context.Context) SensorConnection {
return conn
}

func withConnection(ctx context.Context, conn SensorConnection) context.Context {
// WithConnection returns a context with the given sensor connection attached.
func WithConnection(ctx context.Context, conn SensorConnection) context.Context {
return context.WithValue(ctx, contextKey{}, conn)
}
2 changes: 1 addition & 1 deletion central/sensor/service/connection/manager_impl.go
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,7 @@ func (m *manager) HandleConnection(ctx context.Context, sensorHello *central.Sen
m.complianceOperatorMgr,
m.initSyncMgr,
)
ctx = withConnection(ctx, conn)
ctx = WithConnection(ctx, conn)

oldConnection, err := m.replaceConnection(ctx, cluster, conn)
if err != nil {
Expand Down
99 changes: 95 additions & 4 deletions central/sensor/service/pipeline/virtualmachineindex/pipeline.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,22 +2,38 @@ package virtualmachineindex

import (
"context"
"strconv"

"github.com/pkg/errors"
countMetrics "github.com/stackrox/rox/central/metrics"
"github.com/stackrox/rox/central/sensor/service/common"
"github.com/stackrox/rox/central/sensor/service/connection"
"github.com/stackrox/rox/central/sensor/service/pipeline"
"github.com/stackrox/rox/central/sensor/service/pipeline/reconciliation"
vmDatastore "github.com/stackrox/rox/central/virtualmachine/datastore"
"github.com/stackrox/rox/generated/internalapi/central"
"github.com/stackrox/rox/generated/storage"
"github.com/stackrox/rox/pkg/centralsensor"
"github.com/stackrox/rox/pkg/env"
"github.com/stackrox/rox/pkg/features"
"github.com/stackrox/rox/pkg/logging"
"github.com/stackrox/rox/pkg/metrics"
"github.com/stackrox/rox/pkg/rate"
vmEnricher "github.com/stackrox/rox/pkg/virtualmachine/enricher"
)

const (
// rateLimiterWorkload is the workload name used for rate limiting VM index reports.
rateLimiterWorkload = "vm_index_report"
)

// rateLimiter defines the interface for rate limiting operations used by this pipeline.
// This interface is satisfied by *rate.Limiter and allows for easier testing.
type rateLimiter interface {
TryConsume(clientID string) (allowed bool, reason string)
OnClientDisconnect(clientID string)
}

var (
log = logging.LoggerForModule()

Expand All @@ -26,26 +42,43 @@ var (

// GetPipeline returns an instantiation of this particular pipeline
func GetPipeline() pipeline.Fragment {
rateLimit, err := strconv.ParseFloat(env.VMIndexReportRateLimit.Setting(), 64)
if err != nil {
log.Warnf("Invalid %s value: %v. Using fallback value of 0.3", env.VMIndexReportRateLimit.EnvVar(), err)
rateLimit = 0.3 // Keep in sync with the default value in env.VMIndexReportRateLimit.
}
bucketCapacity := env.VMIndexReportBucketCapacity.IntegerSetting()
rateLimiter, err := rate.NewLimiter(rateLimiterWorkload, rateLimit, bucketCapacity)
if err != nil {
log.Errorf("Failed to create rate limiter for %s: %v", rateLimiterWorkload, err)
}
return newPipeline(
vmDatastore.Singleton(),
vmEnricher.Singleton(),
rateLimiter,
)
}

// newPipeline returns a new instance of Pipeline.
func newPipeline(vms vmDatastore.DataStore, enricher vmEnricher.VirtualMachineEnricher) pipeline.Fragment {
func newPipeline(vms vmDatastore.DataStore, enricher vmEnricher.VirtualMachineEnricher, rl rateLimiter) pipeline.Fragment {
return &pipelineImpl{
vmDatastore: vms,
enricher: enricher,
rateLimiter: rl,
}
}

type pipelineImpl struct {
vmDatastore vmDatastore.DataStore
enricher vmEnricher.VirtualMachineEnricher
rateLimiter rateLimiter
}

func (p *pipelineImpl) OnFinish(_ string) {
func (p *pipelineImpl) OnFinish(clusterID string) {
// Notify rate limiter that this client (Sensor) has disconnected so it can rebalance the limiters.
if p.rateLimiter != nil {
p.rateLimiter.OnClientDisconnect(clusterID)
}
}

func (p *pipelineImpl) Capabilities() []centralsensor.CentralCapability {
Expand All @@ -60,7 +93,7 @@ func (p *pipelineImpl) Match(msg *central.MsgFromSensor) bool {
return msg.GetEvent().GetVirtualMachineIndexReport() != nil
}

func (p *pipelineImpl) Run(ctx context.Context, _ string, msg *central.MsgFromSensor, _ common.MessageInjector) error {
func (p *pipelineImpl) Run(ctx context.Context, clusterID string, msg *central.MsgFromSensor, injector common.MessageInjector) error {
defer countMetrics.IncrementResourceProcessedCounter(pipeline.ActionToOperation(msg.GetEvent().GetAction()), metrics.VirtualMachineIndex)

if !features.VirtualMachines.Enabled() {
Expand All @@ -82,6 +115,37 @@ func (p *pipelineImpl) Run(ctx context.Context, _ string, msg *central.MsgFromSe

log.Debugf("Received virtual machine index report: %s", index.GetId())

if clusterID == "" {
return errors.New("missing cluster ID in pipeline context")
}

// Extract connection for capability checks; cluster ID is taken from the pipeline argument.
conn := connection.FromContext(ctx)

// Rate limit check. Drop message if rate limiter is misconfigured (defensive behavior against misconfiguration)
// or rate limit exceeded. Afterwards, send NACK to Sensor if Sensor supports it.
if p.rateLimiter == nil {
logging.GetRateLimitedLogger().ErrorL(
"vm_index_report_nil_rate_limiter",
"No rate limiter found for workload %q. Dropping VM index report from cluster %s",
rateLimiterWorkload,
clusterID,
)
if conn != nil && conn.HasCapability(centralsensor.SensorACKSupport) {
sendVMIndexReportResponse(ctx, clusterID, index.GetId(), central.SensorACK_NACK, "rate limiter not configured", injector)
}
return nil // Don't return error - would cause pipeline retry
}

allowed, reason := p.rateLimiter.TryConsume(clusterID)
if !allowed {
log.Infof("Dropping VM index report %s from cluster %s: %s", index.GetId(), clusterID, reason)
if conn != nil && conn.HasCapability(centralsensor.SensorACKSupport) {
sendVMIndexReportResponse(ctx, clusterID, index.GetId(), central.SensorACK_NACK, reason, injector)
}
return nil // Don't return error - would cause pipeline retry
}

// Get or create VM
vm := &storage.VirtualMachine{Id: index.GetId()}

Expand All @@ -102,8 +166,35 @@ func (p *pipelineImpl) Run(ctx context.Context, _ string, msg *central.MsgFromSe
return errors.Wrapf(err, "failed to upsert VM %s to datastore", index.GetId())
}

log.Infof("Successfully enriched and stored VM %s with %d components",
log.Debugf("Successfully enriched and stored VM %s with %d components",
vm.GetId(), len(vm.GetScan().GetComponents()))

// Send ACK to Sensor if Sensor supports it
if conn != nil && conn.HasCapability(centralsensor.SensorACKSupport) {
sendVMIndexReportResponse(ctx, clusterID, index.GetId(), central.SensorACK_ACK, "", injector)
}
return nil
}

// sendVMIndexReportResponse sends an ACK or NACK for a VM index report.
func sendVMIndexReportResponse(ctx context.Context, clusterID, vmID string, action central.SensorACK_Action, reason string, injector common.MessageInjector) {
if injector == nil {
log.Debugf("Cannot send %s to Sensor for cluster %s - no injector", action.String(), clusterID)
return
}
msg := &central.MsgToSensor{
Msg: &central.MsgToSensor_SensorAck{
SensorAck: &central.SensorACK{
Action: action,
MessageType: central.SensorACK_VM_INDEX_REPORT,
ResourceId: vmID,
Reason: reason,
},
},
}
if err := injector.InjectMessage(ctx, msg); err != nil {
log.Warnf("Failed sending VM index report %s for VM %s in cluster %s: %v", action.String(), vmID, clusterID, err)
} else {
log.Debugf("Sent VM index report %s for VM %s in cluster %s (reason=%q)", action.String(), vmID, clusterID, reason)
}
}
Loading
Loading