Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion compliance/cmd/compliance/app/app.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ func Run() {
defer cancel()
umhNodeInv := handler.NewUnconfirmedMessageHandler(ctx, "node-inventory", env.NodeScanningAckDeadlineBase.DurationSetting())
umhNodeIndex := handler.NewUnconfirmedMessageHandler(ctx, "node-index", env.NodeScanningAckDeadlineBase.DurationSetting())
c := compliance.NewComplianceApp(np, scanner, cachedNodeIndexer, umhNodeInv, umhNodeIndex)
umhVMIndex := handler.NewUnconfirmedMessageHandler(ctx, "vm-index", env.NodeScanningAckDeadlineBase.DurationSetting())
c := compliance.NewComplianceApp(np, scanner, cachedNodeIndexer, umhNodeInv, umhNodeIndex, umhVMIndex)
c.Start()
}
40 changes: 37 additions & 3 deletions compliance/compliance.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"context"
"os"
"os/signal"
"strconv"
"syscall"
"time"

Expand All @@ -14,6 +15,7 @@ import (
cmetrics "github.com/stackrox/rox/compliance/collection/metrics"
"github.com/stackrox/rox/compliance/node"
"github.com/stackrox/rox/compliance/virtualmachines/relay"
vmmetrics "github.com/stackrox/rox/compliance/virtualmachines/relay/metrics"
"github.com/stackrox/rox/compliance/virtualmachines/relay/sender"
"github.com/stackrox/rox/compliance/virtualmachines/relay/stream"
v4 "github.com/stackrox/rox/generated/internalapi/scanner/v4"
Expand Down Expand Up @@ -48,18 +50,20 @@ type Compliance struct {
nodeIndexer node.NodeIndexer
umhNodeInventory node.UnconfirmedMessageHandler
umhNodeIndex node.UnconfirmedMessageHandler
umhVMIndex node.UnconfirmedMessageHandler
cache *sensor.MsgFromCompliance
}

// NewComplianceApp constructs the Compliance app object
func NewComplianceApp(nnp node.NodeNameProvider, scanner node.NodeScanner, nodeIndexer node.NodeIndexer,
umhNodeInv, umhNodeIndex node.UnconfirmedMessageHandler) *Compliance {
umhNodeInv, umhNodeIndex, umhVMIndex node.UnconfirmedMessageHandler) *Compliance {
return &Compliance{
nodeNameProvider: nnp,
nodeScanner: scanner,
nodeIndexer: nodeIndexer,
umhNodeInventory: umhNodeInv,
umhNodeIndex: umhNodeIndex,
umhVMIndex: umhVMIndex,
cache: nil,
}
}
Expand Down Expand Up @@ -145,7 +149,20 @@ func (c *Compliance) Start() {
sensorClient := sensor.NewVirtualMachineIndexReportServiceClient(conn)
reportSender := sender.New(sensorClient)

vmRelay := relay.New(reportStream, reportSender)
maxPerMinuteStr := env.VMRelayMaxReportsPerMinute.Setting()
maxPerMinute, err := strconv.ParseFloat(maxPerMinuteStr, 64)
if err != nil {
log.Panicf("Failed to parse %s value '%s' as float: %v",
env.VMRelayMaxReportsPerMinute.EnvVar(), maxPerMinuteStr, err)
}

vmRelay := relay.New(
reportStream,
reportSender,
c.umhVMIndex,
maxPerMinute,
env.VMRelayStaleAckThreshold.DurationSetting(),
)
if err := vmRelay.Run(ctx); err != nil {
log.Errorf("Error running virtual machine relay: %v", err)
}
Expand Down Expand Up @@ -382,7 +399,7 @@ func (c *Compliance) handleComplianceACK(ack *sensor.MsgToCompliance_ComplianceA
case sensor.MsgToCompliance_ComplianceACK_NODE_INDEX_REPORT:
c.handleNodeIndexACK(ack.GetAction(), ack.GetReason())
case sensor.MsgToCompliance_ComplianceACK_VM_INDEX_REPORT:
// TODO: Implement basic handling of VM_INDEX_REPORT ACK/NACK messages in ROX-33555.
c.handleVMIndexACK(ack.GetResourceId(), ack.GetAction(), ack.GetReason())
default:
log.Errorf("Unknown ComplianceACK message type: %s", ack.GetMessageType())
}
Expand Down Expand Up @@ -418,6 +435,23 @@ func (c *Compliance) handleNodeIndexACK(action sensor.MsgToCompliance_Compliance
}
}

// handleVMIndexACK handles ACK/NACK for VM index report messages.
func (c *Compliance) handleVMIndexACK(resourceID string, action sensor.MsgToCompliance_ComplianceACK_Action, reason string) {
switch action {
case sensor.MsgToCompliance_ComplianceACK_ACK:
vmmetrics.VMIndexACKsFromSensor.WithLabelValues("ACK").Inc()
c.umhVMIndex.HandleACK(resourceID)
case sensor.MsgToCompliance_ComplianceACK_NACK:
vmmetrics.VMIndexACKsFromSensor.WithLabelValues("NACK").Inc()
if reason != "" {
log.Infof("VM index NACK received for %s: %s", resourceID, reason)
}
c.umhVMIndex.HandleNACK(resourceID)
default:
log.Errorf("Unknown ComplianceACK action for VM index: %s", action)
}
}

func (c *Compliance) startAuditLogCollection(ctx context.Context, client sensor.ComplianceService_CommunicateClient, request *sensor.MsgToCompliance_AuditLogCollectionRequest_StartRequest) auditlog.Reader {
if request.GetCollectStartState() == nil {
log.Infof("Starting audit log reader on node %s in cluster %s with no saved state", c.nodeNameProvider.GetNodeName(), request.GetClusterId())
Expand Down
27 changes: 27 additions & 0 deletions compliance/compliance_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,8 @@ func (s *ComplianceTestSuite) TestHandleComplianceACK() {
expectedInventoryNACKs int
expectedIndexACKs int
expectedIndexNACKs int
expectedVMIndexACKs int
expectedVMIndexNACKs int
}{
"should handle NODE_INVENTORY ACK": {
ack: &sensor.MsgToCompliance_ComplianceACK{
Expand Down Expand Up @@ -96,16 +98,35 @@ func (s *ComplianceTestSuite) TestHandleComplianceACK() {
},
expectedIndexNACKs: 1,
},
"should handle VM_INDEX_REPORT ACK": {
ack: &sensor.MsgToCompliance_ComplianceACK{
Action: sensor.MsgToCompliance_ComplianceACK_ACK,
MessageType: sensor.MsgToCompliance_ComplianceACK_VM_INDEX_REPORT,
ResourceId: "vm-1",
},
expectedVMIndexACKs: 1,
},
"should handle VM_INDEX_REPORT NACK": {
ack: &sensor.MsgToCompliance_ComplianceACK{
Action: sensor.MsgToCompliance_ComplianceACK_NACK,
MessageType: sensor.MsgToCompliance_ComplianceACK_VM_INDEX_REPORT,
ResourceId: "vm-1",
Reason: "rate limit exceeded",
},
expectedVMIndexNACKs: 1,
},
}

for name, tc := range cases {
s.Run(name, func() {
mockInventory := newMockUnconfirmedMessageHandler()
mockIndex := newMockUnconfirmedMessageHandler()
mockVMIndex := newMockUnconfirmedMessageHandler()

c := &Compliance{
umhNodeInventory: mockInventory,
umhNodeIndex: mockIndex,
umhVMIndex: mockVMIndex,
}

c.handleComplianceACK(tc.ack)
Expand All @@ -114,17 +135,21 @@ func (s *ComplianceTestSuite) TestHandleComplianceACK() {
s.Equal(tc.expectedInventoryNACKs, mockInventory.nackCount, "inventory NACK count")
s.Equal(tc.expectedIndexACKs, mockIndex.ackCount, "index ACK count")
s.Equal(tc.expectedIndexNACKs, mockIndex.nackCount, "index NACK count")
s.Equal(tc.expectedVMIndexACKs, mockVMIndex.ackCount, "VM index ACK count")
s.Equal(tc.expectedVMIndexNACKs, mockVMIndex.nackCount, "VM index NACK count")
})
}
}

func (s *ComplianceTestSuite) TestHandleComplianceACK_NilACK() {
mockInventory := newMockUnconfirmedMessageHandler()
mockIndex := newMockUnconfirmedMessageHandler()
mockVMIndex := newMockUnconfirmedMessageHandler()

c := &Compliance{
umhNodeInventory: mockInventory,
umhNodeIndex: mockIndex,
umhVMIndex: mockVMIndex,
}

// Should not panic and should not call any handlers
Expand All @@ -134,4 +159,6 @@ func (s *ComplianceTestSuite) TestHandleComplianceACK_NilACK() {
s.Equal(0, mockInventory.nackCount)
s.Equal(0, mockIndex.ackCount)
s.Equal(0, mockIndex.nackCount)
s.Equal(0, mockVMIndex.ackCount)
s.Equal(0, mockVMIndex.nackCount)
}
62 changes: 62 additions & 0 deletions compliance/virtualmachines/relay/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,63 @@ var SemaphoreQueueSize = prometheus.NewGauge(
Help: "Number of connections waiting to be handled",
})

// VMIndexReportSendAttempts counts send attempts to Sensor by result.
var VMIndexReportSendAttempts = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: metrics.PrometheusNamespace,
Subsystem: metrics.ComplianceSubsystem.String(),
Name: "virtual_machine_relay_index_report_send_attempts_total",
Help: "Send attempts of VM index reports to Sensor partitioned by result",
},
[]string{"result"}, // success|retry
)

// VMIndexReportSendDurationSeconds observes per-attempt latency to Sensor by result.
var VMIndexReportSendDurationSeconds = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Namespace: metrics.PrometheusNamespace,
Subsystem: metrics.ComplianceSubsystem.String(),
Name: "virtual_machine_relay_index_report_send_duration_seconds",
Help: "Duration of VM index report send attempts to Sensor",
Buckets: prometheus.ExponentialBuckets(0.1, 2, 10),
},
[]string{"result"}, // success|retry
)

// ReportsRateLimited counts reports dropped by relay-side rate limiting.
var ReportsRateLimited = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: metrics.PrometheusNamespace,
Subsystem: metrics.ComplianceSubsystem.String(),
Name: "virtual_machine_relay_reports_rate_limited_total",
Help: "Reports dropped due to relay-side rate limiting",
},
[]string{"reason"}, // "normal", "stale_ack"
)

// AcksReceived counts ACK confirmations received from Sensor for VM index reports.
// NACKs are tracked separately in the main compliance component where they're handled.
var AcksReceived = prometheus.NewCounter(
prometheus.CounterOpts{
Namespace: metrics.PrometheusNamespace,
Subsystem: metrics.ComplianceSubsystem.String(),
Name: "virtual_machine_relay_acks_received_total",
Help: "ACK confirmations received from Sensor for VM index reports",
},
)

// VMIndexACKsFromSensor counts ACK/NACK responses received from Sensor for VM index reports.
// This metric is recorded when compliance.go handles ComplianceACK messages.
var VMIndexACKsFromSensor = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: metrics.PrometheusNamespace,
Subsystem: metrics.ComplianceSubsystem.String(),
Name: "virtual_machine_index_acks_from_sensor_total",
Help: "ACK/NACK responses received from Sensor for VM index reports",
},
[]string{"action"}, // "ACK", "NACK"
)

func init() {
prometheus.MustRegister(
IndexReportsMismatchingVsockCID,
Expand All @@ -86,5 +143,10 @@ func init() {
SemaphoreAcquisitionFailures,
SemaphoreHoldingSize,
SemaphoreQueueSize,
VMIndexReportSendAttempts,
VMIndexReportSendDurationSeconds,
ReportsRateLimited,
AcksReceived,
VMIndexACKsFromSensor,
)
}
Loading
Loading