Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 33 additions & 15 deletions sensor/common/networkflow/manager/manager_impl.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@ package manager
import (
"context"
"fmt"
"math"
"sync/atomic"
"time"

"github.com/pkg/errors"
"github.com/prometheus/client_golang/prometheus"
"github.com/stackrox/rox/generated/internalapi/central"
"github.com/stackrox/rox/generated/internalapi/sensor"
"github.com/stackrox/rox/generated/storage"
Expand Down Expand Up @@ -408,6 +408,14 @@ func (m *networkFlowManager) enrichAndSend() {
updatedEndpoints := m.updateComputer.ComputeUpdatedEndpoints(currentEndpoints)
updatedProcesses := m.updateComputer.ComputeUpdatedProcesses(currentProcesses)

flowMetrics.NumUpdatesSentToCentralCounter.WithLabelValues("connections").Add(float64(len(updatedConns)))
flowMetrics.NumUpdatesSentToCentralCounter.WithLabelValues("endpoints").Add(float64(len(updatedEndpoints)))
flowMetrics.NumUpdatesSentToCentralCounter.WithLabelValues("processes").Add(float64(len(updatedProcesses)))

flowMetrics.NumUpdatesSentToCentralGauge.WithLabelValues("connections").Set(float64(len(updatedConns)))
flowMetrics.NumUpdatesSentToCentralGauge.WithLabelValues("endpoints").Set(float64(len(updatedEndpoints)))
flowMetrics.NumUpdatesSentToCentralGauge.WithLabelValues("processes").Set(float64(len(updatedProcesses)))

if len(updatedConns)+len(updatedEndpoints) > 0 {
if sent := m.sendConnsEps(updatedConns, updatedEndpoints); sent {
// Update the UpdateComputer's internal state after sending updates to Central.
Expand Down Expand Up @@ -570,13 +578,24 @@ func (m *networkFlowManager) UnregisterCollector(hostname string, sequenceID int
}

func (h *hostConnections) Process(networkInfo *sensor.NetworkConnectionInfo, nowTimestamp timestamp.MicroTS, sequenceID int64) error {
flowMetrics.NetworkConnectionInfoMessagesRcvd.With(prometheus.Labels{"Hostname": h.hostname}).Inc()
flowMetrics.NetworkConnectionInfoMessagesRcvd.WithLabelValues(h.hostname).Inc()

updatedConnections := getUpdatedConnections(networkInfo)
updatedEndpoints := getUpdatedContainerEndpoints(networkInfo)
updatedConnections, numClosedConn := getUpdatedConnections(networkInfo)
// Use max to prevent numOpenConn going negative (that would panic).
numOpenConn := math.Max(float64(len(updatedConnections)-numClosedConn), 0)
flowMetrics.IncomingConnectionsEndpointsCounter.WithLabelValues("connections", "closed").Add(float64(numClosedConn))
flowMetrics.IncomingConnectionsEndpointsCounter.WithLabelValues("connections", "open").Add(numOpenConn)

flowMetrics.NumUpdated.With(prometheus.Labels{"Hostname": h.hostname, "Type": "Connection"}).Set(float64(len(updatedConnections)))
flowMetrics.NumUpdated.With(prometheus.Labels{"Hostname": h.hostname, "Type": "Endpoint"}).Set(float64(len(updatedEndpoints)))
updatedEndpoints, numClosedEp := getUpdatedContainerEndpoints(networkInfo)
// Use max to prevent numOpenEp going negative (that would panic).
numOpenEp := math.Max(float64(len(updatedEndpoints)-numClosedEp), 0)
flowMetrics.IncomingConnectionsEndpointsCounter.WithLabelValues("endpoints", "closed").Add(float64(numClosedEp))
flowMetrics.IncomingConnectionsEndpointsCounter.WithLabelValues("endpoints", "open").Add(numOpenEp)

flowMetrics.IncomingConnectionsEndpointsGauge.WithLabelValues(h.hostname, "Connection", "closed").Set(float64(numClosedConn))
flowMetrics.IncomingConnectionsEndpointsGauge.WithLabelValues(h.hostname, "Connection", "open").Set(numOpenConn)
flowMetrics.IncomingConnectionsEndpointsGauge.WithLabelValues(h.hostname, "Endpoint", "closed").Set(float64(numClosedEp))
flowMetrics.IncomingConnectionsEndpointsGauge.WithLabelValues(h.hostname, "Endpoint", "open").Set(numOpenEp)

collectorTS := timestamp.FromProtobuf(networkInfo.GetTime())
tsOffset := nowTimestamp - collectorTS
Expand Down Expand Up @@ -694,8 +713,9 @@ func processConnection(conn *sensor.NetworkConnection) (*connection, error) {
// getUpdatedConnections returns a map of connections to timestamp.
// The timestamp set to +infinity means that the connection is open;
// any other value >0 means that the connection is closed.
func getUpdatedConnections(networkInfo *sensor.NetworkConnectionInfo) map[connection]timestamp.MicroTS {
func getUpdatedConnections(networkInfo *sensor.NetworkConnectionInfo) (map[connection]timestamp.MicroTS, int) {
updatedConnections := make(map[connection]timestamp.MicroTS)
numClosed := 0

for _, conn := range networkInfo.GetUpdatedConnections() {
c, err := processConnection(conn)
Expand All @@ -708,19 +728,18 @@ func getUpdatedConnections(networkInfo *sensor.NetworkConnectionInfo) map[connec
ts := timestamp.FromProtobuf(conn.CloseTimestamp)
if ts == 0 {
ts = timestamp.InfiniteFuture
flowMetrics.IncomingConnectionsEndpoints.With(prometheus.Labels{"object": "connections", "closedTS": "unset"}).Inc()
} else {
flowMetrics.IncomingConnectionsEndpoints.With(prometheus.Labels{"object": "connections", "closedTS": "set"}).Inc()
numClosed++
}
updatedConnections[*c] = ts
}

return updatedConnections
return updatedConnections, numClosed
}

func getUpdatedContainerEndpoints(networkInfo *sensor.NetworkConnectionInfo) map[containerEndpoint]timestamp.MicroTS {
func getUpdatedContainerEndpoints(networkInfo *sensor.NetworkConnectionInfo) (map[containerEndpoint]timestamp.MicroTS, int) {
updatedEndpoints := make(map[containerEndpoint]timestamp.MicroTS)

numClosed := 0
for _, endpoint := range networkInfo.GetUpdatedEndpoints() {
normalize.NetworkEndpoint(endpoint)
ep := containerEndpoint{
Expand All @@ -736,14 +755,13 @@ func getUpdatedContainerEndpoints(networkInfo *sensor.NetworkConnectionInfo) map
ts := timestamp.FromProtobuf(endpoint.GetCloseTimestamp())
if ts == 0 {
ts = timestamp.InfiniteFuture
flowMetrics.IncomingConnectionsEndpoints.With(prometheus.Labels{"object": "endpoints", "closedTS": "unset"}).Inc()
} else {
flowMetrics.IncomingConnectionsEndpoints.With(prometheus.Labels{"object": "endpoints", "closedTS": "set"}).Inc()
numClosed++
}
updatedEndpoints[ep] = ts
}

return updatedEndpoints
return updatedEndpoints, numClosed
}

func (m *networkFlowManager) PublicIPsValueStream() concurrency.ReadOnlyValueStream[*sensor.IPAddressList] {
Expand Down
33 changes: 20 additions & 13 deletions sensor/common/networkflow/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@ func init() {

// Host Connections
NetworkConnectionInfoMessagesRcvd,
NumUpdated,
IncomingConnectionsEndpointsGauge,
HostConnectionsOperations,
IncomingConnectionsEndpoints,
IncomingConnectionsEndpointsCounter,

// Network Flows Manager
FlowEnrichmentEventsEndpoint,
Expand All @@ -25,7 +25,8 @@ func init() {
activeEndpointsCurrent,
PurgerEvents,
PurgerRunDuration,
NumUpdatesSentToCentral,
NumUpdatesSentToCentralCounter,
NumUpdatesSentToCentralGauge,

// Other
NetworkEntityFlowCounter, // flow directions and graph entities
Expand Down Expand Up @@ -62,28 +63,28 @@ var (
Name: hostConnectionsPrefix + "msgs_received_per_node_total",
Help: "Total number of messages containing network flows received from Collector for a specific node",
}, []string{"Hostname"})
// NumUpdated - 2. Out of newly arrived endpoints and connections, only selected need an update
NumUpdated = prometheus.NewGaugeVec(prometheus.GaugeOpts{
// IncomingConnectionsEndpointsGauge - 2. Out of newly arrived endpoints and connections, only selected need an update
IncomingConnectionsEndpointsGauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Namespace: metrics.PrometheusNamespace,
Subsystem: metrics.SensorSubsystem.String(),
Name: hostConnectionsPrefix + "num_updates",
Name: hostConnectionsPrefix + "incoming_objects_current",
Help: "Current number of network endpoints or connections being updated in the message from Collector received for a specific node",
}, []string{"Hostname", "Type"})
}, []string{"Hostname", "Type", "status"})
// HostConnectionsOperations - 3a. Out of the updates, only some result in adding the connection/endpoint to the map
HostConnectionsOperations = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: metrics.PrometheusNamespace,
Subsystem: metrics.SensorSubsystem.String(),
Name: hostConnectionsPrefix + "operations_total",
Help: "Total number of flows/endpoints added/removed in the host connections maps",
}, []string{"op", "object"})
// IncomingConnectionsEndpoints - 3b. how many Collector updates have the closeTS set and how many are unclosed
// IncomingConnectionsEndpointsCounter - 3b. how many Collector updates have the closeTS set and how many are unclosed
// This is useful to investigate the behavior of Sensor with fake workloads when manipulating the `generateUnclosedEndpoints` param.
IncomingConnectionsEndpoints = prometheus.NewCounterVec(prometheus.CounterOpts{
IncomingConnectionsEndpointsCounter = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: metrics.PrometheusNamespace,
Subsystem: metrics.SensorSubsystem.String(),
Name: hostConnectionsPrefix + "incoming_objects_total",
Help: "Total number of incoming connections/endpoints received from Collector with their close TS set or unset",
}, []string{"object", "closedTS"})
Help: "Total number of incoming connections/endpoints received from Collector with their close status",
}, []string{"object", "status"})
// End of processing of the networkConnectionInfo message

// FlowEnrichmentEventsEndpoint - 4a. Enrichment can have various outcomes. This metric stores the details about the outcomes for endpoints.
Expand Down Expand Up @@ -115,14 +116,20 @@ var (
Help: "Total number of internal flows observed by Sensor enrichment",
}, []string{"direction", "namespace"})

// NumUpdatesSentToCentral - 5. An update is calculated between the states in consecutive enrichment ticks and the
// NumUpdatesSentToCentralCounter - 5. An update is calculated between the states in consecutive enrichment ticks and the
// difference is treated as new updates. That updates are sent to central.
NumUpdatesSentToCentral = prometheus.NewCounterVec(prometheus.CounterOpts{
NumUpdatesSentToCentralCounter = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: metrics.PrometheusNamespace,
Subsystem: metrics.SensorSubsystem.String(),
Name: netFlowManagerPrefix + "num_sent_to_central_total",
Help: "A counter that tracks the total number of connections and endpoints being updated (i.e., sent to Central)",
}, []string{"object"})
NumUpdatesSentToCentralGauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Namespace: metrics.PrometheusNamespace,
Subsystem: metrics.SensorSubsystem.String(),
Name: netFlowManagerPrefix + "num_sent_to_central_current",
Help: "A gauge that tracks the current number of connections and endpoints being updated (i.e., sent to Central)",
}, []string{"object"})
activeFlowsCurrent = prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: metrics.PrometheusNamespace,
Subsystem: metrics.SensorSubsystem.String(),
Expand Down
Loading