Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion sensor/common/networkflow/manager/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ var (
// Manager processes network connections coming in from collector, enriches them and sends them to Central
type Manager interface {
UnregisterCollector(hostname string, sequenceID int64)
RegisterCollector(hostname string) (HostNetworkInfo, int64)
RegisterCollector(hostname string) (*hostConnections, int64)

PublicIPsValueStream() concurrency.ReadOnlyValueStream[*sensor.IPAddressList]
ExternalSrcsValueStream() concurrency.ReadOnlyValueStream[*sensor.IPNetworkList]
Expand Down
207 changes: 207 additions & 0 deletions sensor/common/networkflow/manager/manager_combined.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,207 @@
package manager

import (
"time"

"github.com/stackrox/rox/generated/internalapi/sensor"
"github.com/stackrox/rox/pkg/centralsensor"
"github.com/stackrox/rox/pkg/concurrency"
"github.com/stackrox/rox/pkg/sync"
"github.com/stackrox/rox/sensor/common"
"github.com/stackrox/rox/sensor/common/message"
"github.com/stackrox/rox/sensor/common/unimplemented"
"golang.org/x/exp/maps"
)

// NewCombinedManager creates a new instance of network flow manager
func NewCombinedManager(
managerLegacy Manager,
managerCurrent Manager,
) Manager {
return &combinedNetworkFlowManager{
manL: managerLegacy,
hostConnectionsL: make(map[string]*hostConnections),
manC: managerCurrent,
hostConnectionsC: make(map[string]*hostConnections),
enrichmentQueue: make(map[string]*hostConnections),
enricherTickerC: make(chan time.Time),
stopper: concurrency.NewStopper(),
}
}

var _ Manager = (*combinedNetworkFlowManager)(nil)

type combinedNetworkFlowManager struct {
unimplemented.Receiver

enricherTickerC <-chan time.Time

manL Manager
hostConnectionsL map[string]*hostConnections
manC Manager
hostConnectionsC map[string]*hostConnections

stopper concurrency.Stopper

// Common enrichment queue
enrichmentQueue map[string]*hostConnections
enrichmentQueueMutex sync.RWMutex
}

func (c *combinedNetworkFlowManager) UnregisterCollector(hostname string, sequenceID int64) {
c.manL.UnregisterCollector(hostname, sequenceID)
c.manC.UnregisterCollector(hostname, sequenceID)
}

func (c *combinedNetworkFlowManager) RegisterCollector(hostname string) (*hostConnections, int64) {
c.enrichmentQueueMutex.Lock()
defer c.enrichmentQueueMutex.Unlock()
log.Infof("Registering collector for %s", hostname)

conns := c.enrichmentQueue[hostname] // Collector will write to this
if conns == nil {
conns = &hostConnections{
hostname: hostname,
connections: make(map[connection]*connStatus),
endpoints: make(map[containerEndpoint]*connStatus),
}
c.enrichmentQueue[hostname] = conns
}

concurrency.WithLock(&conns.mutex, func() {
if conns.pendingDeletion != nil {
// Note that we don't need to check the return value, since `deleteHostConnections` needs to acquire
// m.connectionsByHostMutex. It can therefore only proceed once this function returns, in which case it will be
// a no-op due to `pendingDeletion` being `nil`.
conns.pendingDeletion.Stop()
conns.pendingDeletion = nil
}

conns.currentSequenceID++
})

hcL, _ := c.manL.RegisterCollector(hostname)
hcC, _ := c.manC.RegisterCollector(hostname)
c.hostConnectionsL[hostname] = hcL
c.hostConnectionsC[hostname] = hcC

return conns, conns.currentSequenceID
}

func (c *combinedNetworkFlowManager) PublicIPsValueStream() concurrency.ReadOnlyValueStream[*sensor.IPAddressList] {
return c.manC.PublicIPsValueStream()
}

func (c *combinedNetworkFlowManager) ExternalSrcsValueStream() concurrency.ReadOnlyValueStream[*sensor.IPNetworkList] {
return c.manC.ExternalSrcsValueStream()
}

func (c *combinedNetworkFlowManager) Notify(e common.SensorComponentEvent) {
log.Info(common.LogSensorComponentEvent(e, c.Name()))
c.manC.Notify(e)
c.manL.Notify(e)
}

func (c *combinedNetworkFlowManager) ResponsesC() <-chan *message.ExpiringMessage {
return c.manC.ResponsesC()
}

func (c *combinedNetworkFlowManager) Start() error {
_ = c.manL.Start()
_ = c.manC.Start()
ticker := time.NewTicker(7 * time.Second)
c.enricherTickerC = ticker.C

go c.runCopy()

go func() {
for {
select {
case <-c.manL.ResponsesC():
// discard message
case <-c.stopper.Flow().StopRequested():
return
}
}
}()
log.Infof("%s has started", c.Name())
return nil
}

func (c *combinedNetworkFlowManager) runCopy() {
// This takes the collector data from the enrichment queue and pushes into two enrichment queues for managers.
// It may delay the data by one tick
for {
select {
case <-c.enricherTickerC:
c.doCopy()
case <-c.stopper.Flow().StopRequested():
log.Infof("%s stops the copy loop", c.Name())
return
}
}
}

func (c *combinedNetworkFlowManager) doCopy() {
c.enrichmentQueueMutex.Lock()
defer c.enrichmentQueueMutex.Unlock()
for hostName, conns := range c.enrichmentQueue {
if conns != nil {
// Copy into L
cL := c.hostConnectionsL[hostName]
cL.mutex.Lock()
for conn, status := range conns.connections {
statusCopy := *status
// Do not overwrite existing
if _, found := cL.connections[conn]; !found {
cL.connections[conn] = &statusCopy
}
}
for ep, status := range conns.endpoints {
statusCopy := *status
if _, found := cL.endpoints[ep]; !found {
cL.endpoints[ep] = &statusCopy
}
}
cL.mutex.Unlock()
// Copy into C
cC := c.hostConnectionsC[hostName]
cC.mutex.Lock()
for conn, status := range conns.connections {
statusCopy := *status
// Do not overwrite existing
if _, found := cC.connections[conn]; !found {
cC.connections[conn] = &statusCopy
}
}
for ep, status := range conns.endpoints {
statusCopy := *status
if _, found := cC.endpoints[ep]; !found {
cC.endpoints[ep] = &statusCopy
}
}
cC.mutex.Unlock()
maps.Clear(conns.connections)
maps.Clear(conns.endpoints)
}
}
}

func (c *combinedNetworkFlowManager) Stop() {
c.manL.Stop()
c.manC.Stop()
if !c.stopper.Client().Stopped().IsDone() {
defer func() {
_ = c.stopper.Client().Stopped().Wait()
}()
}
c.stopper.Client().Stop()
}

func (c *combinedNetworkFlowManager) Capabilities() []centralsensor.SensorCapability {
return c.manC.Capabilities()
}

func (c *combinedNetworkFlowManager) Name() string {
return "combinedNetworkFlowManager"
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ import (
"github.com/prometheus/client_golang/prometheus"
"github.com/stackrox/rox/pkg/centralsensor"
"github.com/stackrox/rox/pkg/concurrency"
"github.com/stackrox/rox/pkg/env"
"github.com/stackrox/rox/pkg/features"
"github.com/stackrox/rox/pkg/net"
"github.com/stackrox/rox/pkg/networkgraph"
Expand Down Expand Up @@ -40,7 +39,7 @@ func (m *networkFlowManager) executeConnectionAction(
case PostEnrichmentActionRetry:
// noop, retry happens through not removing from `hostConns.connections`
case PostEnrichmentActionCheckRemove:
if status.checkRemoveCondition(env.NetworkFlowUseLegacyUpdateComputer.BooleanSetting(), status.enrichmentConsumption.consumedNetworkGraph) {
if status.checkRemoveCondition(m.legacyBehavior, status.enrichmentConsumption.consumedNetworkGraph) {
delete(hostConns.connections, *conn)
flowMetrics.HostConnectionsOperations.WithLabelValues("remove", "connections").Inc()
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@ func (m *networkFlowManager) executeEndpointAction(
case PostEnrichmentActionRetry:
// noop, retry happens through not removing from `hostConns.endpoints`
case PostEnrichmentActionCheckRemove:
if status.checkRemoveCondition(env.NetworkFlowUseLegacyUpdateComputer.BooleanSetting(), status.enrichmentConsumption.IsConsumed()) {
// TODO: EXPERIMENTAL: KEEP OLD BEHAVIOR FOR COMPARING LEGACY AGAINST TRANSITION_BASED
if status.checkRemoveCondition(true, status.enrichmentConsumption.IsConsumed()) {
delete(hostConns.endpoints, *ep)
flowMetrics.HostConnectionsOperations.WithLabelValues("remove", "endpoints").Inc()
flowMetrics.HostProcessesEvents.WithLabelValues("remove").Inc()
Expand Down
21 changes: 14 additions & 7 deletions sensor/common/networkflow/manager/manager_impl.go
Original file line number Diff line number Diff line change
Expand Up @@ -139,15 +139,19 @@ func WithEnrichTicker(ticker <-chan time.Time) Option {

// NewManager creates a new instance of network flow manager
func NewManager(
name string,
clusterEntities EntityStore,
externalSrcs externalsrcs.Store,
policyDetector detector.Detector,
pubSub *internalmessage.MessageSubscriber,
updateComputer updatecomputer.UpdateComputer,
legacyBehavior bool,
opts ...Option,
) Manager {
enricherTicker := time.NewTicker(enricherCycle)
mgr := &networkFlowManager{
name: name,
legacyBehavior: legacyBehavior,
connectionsByHost: make(map[string]*hostConnections),
clusterEntities: clusterEntities,
publicIPs: newPublicIPsManager(),
Expand Down Expand Up @@ -203,6 +207,9 @@ type networkFlowComponent interface {
type networkFlowManager struct {
unimplemented.Receiver

name string
legacyBehavior bool

connectionsByHost map[string]*hostConnections
connectionsByHostMutex sync.RWMutex

Expand Down Expand Up @@ -274,7 +281,7 @@ func (m *networkFlowManager) Capabilities() []centralsensor.SensorCapability {
}

func (m *networkFlowManager) Notify(e common.SensorComponentEvent) {
log.Info(common.LogSensorComponentEvent(e, "NetworkFlowManager"))
log.Info(common.LogSensorComponentEvent(e, m.Name()))
// Ensure that the sub-components are notified after this manager processes the notification.
defer func() {
if m.purger != nil {
Expand Down Expand Up @@ -382,20 +389,20 @@ func (m *networkFlowManager) updateEnrichmentCollectionsSize() {
})
}
})
flowMetrics.EnrichmentCollectionsSize.WithLabelValues("connectionsInEnrichQueue", "connections").Set(float64(numConnections))
flowMetrics.EnrichmentCollectionsSize.WithLabelValues("endpointsInEnrichQueue", "endpoints").Set(float64(numEndpoints))
flowMetrics.EnrichmentCollectionsSize.WithLabelValues(m.name, "connectionsInEnrichQueue", "connection").Set(float64(numConnections))
flowMetrics.EnrichmentCollectionsSize.WithLabelValues(m.name, "endpointsInEnrichQueue", "endpoint").Set(float64(numEndpoints))

// Number of entities (connections, endpoints) stored in memory for the purposes of not losing data while offline.
concurrency.WithRLock(&m.activeConnectionsMutex, func() {
flowMetrics.EnrichmentCollectionsSize.WithLabelValues("activeConnections", "connections").Set(float64(len(m.activeConnections)))
flowMetrics.EnrichmentCollectionsSize.WithLabelValues(m.name, "activeConnections", "connection").Set(float64(len(m.activeConnections)))
})
concurrency.WithRLock(&m.activeEndpointsMutex, func() {
flowMetrics.EnrichmentCollectionsSize.WithLabelValues("activeEndpoints", "endpoints").Set(float64(len(m.activeEndpoints)))
flowMetrics.EnrichmentCollectionsSize.WithLabelValues(m.name, "activeEndpoints", "endpoint").Set(float64(len(m.activeEndpoints)))
})

// Length and byte sizes of collections used internally by updatecomputer
if m.updateComputer != nil {
m.updateComputer.RecordSizeMetrics(flowMetrics.EnrichmentCollectionsSize, flowMetrics.EnrichmentCollectionsSizeBytes)
m.updateComputer.RecordSizeMetrics(m.name, flowMetrics.EnrichmentCollectionsSize, flowMetrics.EnrichmentCollectionsSizeBytes)
}
}

Expand Down Expand Up @@ -511,7 +518,7 @@ func (m *networkFlowManager) getAllHostConnections() []*hostConnections {
return allHostConns
}

func (m *networkFlowManager) RegisterCollector(hostname string) (HostNetworkInfo, int64) {
func (m *networkFlowManager) RegisterCollector(hostname string) (*hostConnections, int64) {
m.connectionsByHostMutex.Lock()
defer m.connectionsByHostMutex.Unlock()

Expand Down
4 changes: 2 additions & 2 deletions sensor/common/networkflow/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,13 +47,13 @@ var (
Subsystem: metrics.SensorSubsystem.String(),
Name: hostConnectionsPrefix + "collections_size_current",
Help: "Current size (number of elements) of given collection involved in enrichment",
}, []string{"Name", "Type"})
}, []string{"uc", "Name", "Type"})
EnrichmentCollectionsSizeBytes = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Namespace: metrics.PrometheusNamespace,
Subsystem: metrics.SensorSubsystem.String(),
Name: hostConnectionsPrefix + "collections_size_current_bytes",
Help: "Current size in bytes of given collection involved in enrichment",
}, []string{"Name", "Type"})
}, []string{"uc", "Name", "Type"})
// A networkConnectionInfo message arrives from collector

// NetworkConnectionInfoMessagesRcvd - 1. Collector sends NetworkConnection Info messages where each contains endpoints and connections
Expand Down
2 changes: 1 addition & 1 deletion sensor/common/networkflow/updatecomputer/interface.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ type UpdateComputer interface {
PeriodicCleanup(now time.Time, cleanupInterval time.Duration)

// RecordSizeMetrics records metrics for length and byte-size of the collections used in updateComputer.
RecordSizeMetrics(gv1, gv2 *prometheus.GaugeVec)
RecordSizeMetrics(name string, gv1, gv2 *prometheus.GaugeVec)
}

func New() UpdateComputer {
Expand Down
14 changes: 7 additions & 7 deletions sensor/common/networkflow/updatecomputer/legacy.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ func (l *Legacy) ResetState() {
l.enrichedProcessesLastSentState = nil
}

func (l *Legacy) RecordSizeMetrics(lenSize, byteSize *prometheus.GaugeVec) {
func (l *Legacy) RecordSizeMetrics(name string, lenSize, byteSize *prometheus.GaugeVec) {
lenConn := concurrency.WithRLock1(&l.lastSentStateMutex, func() int {
return len(l.enrichedConnsLastSentState)
})
Expand All @@ -116,19 +116,19 @@ func (l *Legacy) RecordSizeMetrics(lenSize, byteSize *prometheus.GaugeVec) {
lenProc := concurrency.WithRLock1(&l.lastSentStateMutex, func() int {
return len(l.enrichedProcessesLastSentState)
})
lenSize.WithLabelValues("lastSent", "conns").Set(float64(lenConn))
lenSize.WithLabelValues("lastSent", "endpoints").Set(float64(lenEp))
lenSize.WithLabelValues("lastSent", "processes").Set(float64(lenProc))
lenSize.WithLabelValues(name, "lastSent", "connection").Set(float64(lenConn))
lenSize.WithLabelValues(name, "lastSent", "endpoint").Set(float64(lenEp))
lenSize.WithLabelValues(name, "lastSent", "process").Set(float64(lenProc))

// Avg. byte-size of single element including go map overhead.
// Estimated with by creating a map with 100k elements, measuring memory consumption (including map overhead)
// and dividing again by 100k.
connsSize := 480 * lenConn
epSize := 330 * lenEp
procSize := 406 * lenProc
byteSize.WithLabelValues("lastSent", "conns").Set(float64(connsSize))
byteSize.WithLabelValues("lastSent", "endpoints").Set(float64(epSize))
byteSize.WithLabelValues("lastSent", "processes").Set(float64(procSize))
byteSize.WithLabelValues(name, "lastSent", "connection").Set(float64(connsSize))
byteSize.WithLabelValues(name, "lastSent", "endpoint").Set(float64(epSize))
byteSize.WithLabelValues(name, "lastSent", "process").Set(float64(procSize))
}

// computeUpdates is a generic helper for computing updates using the legacy LastSentState approach
Expand Down
2 changes: 1 addition & 1 deletion sensor/common/networkflow/updatecomputer/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ var (
Help: "Counts the internal update events for the categorizeUpdate method in TransitionBased updateComputer. " +
"The 'transition' allows counting the transitions of connections between states 'open' and 'closed'." +
"Action stores the decision whether a given update was sent to Central.",
}, []string{"transition", "entity", "action", "reason"})
}, []string{"uc", "transition", "entity", "action", "reason"})
periodicCleanupDurationSeconds = prometheus.NewHistogram(prometheus.HistogramOpts{
Namespace: metrics.PrometheusNamespace,
Subsystem: metrics.SensorSubsystem.String(),
Expand Down
Loading
Loading