Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions sensor/common/networkflow/manager/indicator/indicator.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,11 @@ func (i *NetworkConn) Key(h hash.Hash64) string {
return i.keyHash(h)
}

// BinaryKey generates a binary hash for memory-efficient storage in dedupers
func (i *NetworkConn) BinaryKey(h hash.Hash64) BinaryHash {
return i.binaryKeyHash(h)
}

// ContainerEndpoint is a key in Sensor's maps that track active endpoints. It's set of fields should be minimal.
// Fields are sorted by their size to optimize for memory padding.
type ContainerEndpoint struct {
Expand Down
9 changes: 9 additions & 0 deletions sensor/common/networkflow/manager/indicator/key.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,15 @@ func (i *NetworkConn) keyHash(h hash.Hash64) string {
return hashToHexString(h.Sum64())
}

// binaryKeyHash produces a binary hash that uniquely identifies a given NetworkConn indicator.
// This is a memory-optimized implementation using direct hash generation without string conversion.
func (i *NetworkConn) binaryKeyHash(h hash.Hash64) BinaryHash {
h.Reset()
hashStrings(h, i.SrcEntity.ID, i.DstEntity.ID)
hashPortAndProtocol(h, i.DstPort, i.Protocol)
return BinaryHash(h.Sum64())
}

// Common hash computation utilities
func hashPortAndProtocol(h hash.Hash64, port uint16, protocol storage.L4Protocol) {
buf := [6]byte{
Expand Down
2 changes: 2 additions & 0 deletions sensor/common/networkflow/manager/indicator/key_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,8 @@ func TestKey_UniquenessForDifferentObjects(t *testing.T) {
}
assert.NotEqual(t, conn1.Key(h), conn2.Key(h),
"Different NetworkConn objects should have different keys")
assert.NotEqual(t, conn1.BinaryKey(h), conn2.BinaryKey(h),
"Different NetworkConn objects should have different binary keys")
})

t.Run("endpoints", func(t *testing.T) {
Expand Down
42 changes: 18 additions & 24 deletions sensor/common/networkflow/updatecomputer/transition_based.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ type TransitionBased struct {

// State tracking for conditional updates - moved from networkFlowManager
connectionsDeduperMutex sync.RWMutex
connectionsDeduper *set.StringSet
connectionsDeduper *set.Set[indicator.BinaryHash]

endpointsDeduperMutex sync.RWMutex
endpointsDeduper map[indicator.BinaryHash]indicator.BinaryHash
Expand All @@ -113,29 +113,29 @@ type TransitionBased struct {

// Closed connection timestamp tracking for handling late-arriving updates
closedConnMutex sync.RWMutex
closedConnTimestamps map[string]closedConnEntry
closedConnTimestamps map[indicator.BinaryHash]closedConnEntry
closedConnRememberDuration time.Duration

lastCleanupMutex sync.RWMutex
lastCleanup time.Time
}

// newStringSetPtr returns a pointer to a new string set, which is originally a value type.
// newBinaryHashSetPtr returns a pointer to a new set of BinaryHash values, which is originally a value type.
// This avoids copying the set when it is used in the deduper.
func newStringSetPtr() *set.StringSet {
s := set.NewStringSet()
func newBinaryHashSetPtr() *set.Set[indicator.BinaryHash] {
s := set.NewSet[indicator.BinaryHash]()
return &s
}

// NewTransitionBased creates a new instance of the transition-based update computer.
func NewTransitionBased() *TransitionBased {
return &TransitionBased{
connectionsDeduper: newStringSetPtr(),
connectionsDeduper: newBinaryHashSetPtr(),
endpointsDeduper: make(map[indicator.BinaryHash]indicator.BinaryHash),
cachedUpdatesConn: make([]*storage.NetworkFlow, 0),
cachedUpdatesEp: make([]*storage.NetworkEndpoint, 0),
cachedUpdatesProc: make([]*storage.ProcessListeningOnPortFromSensor, 0),
closedConnTimestamps: make(map[string]closedConnEntry),
closedConnTimestamps: make(map[indicator.BinaryHash]closedConnEntry),
closedConnRememberDuration: env.NetworkFlowClosedConnRememberDuration.DurationSetting(),
lastCleanup: time.Now(),
}
Expand All @@ -156,7 +156,7 @@ func (c *TransitionBased) ComputeUpdatedConns(current map[indicator.NetworkConn]
// Process each enriched connection individually, categorize the transition, and generate an update if needed.
h := xxhash.New()
for conn, currTS := range current {
key := conn.Key(h)
key := conn.BinaryKey(h)

// Check if this connection has been closed recently.
prevTsFound, prevTS := c.lookupPrevTimestamp(key)
Expand Down Expand Up @@ -190,8 +190,8 @@ func (c *TransitionBased) ComputeUpdatedConns(current map[indicator.NetworkConn]
// Note that enriched entities for which enrichment should be retried never reach this function.
func categorizeUpdate(
prevTS, currTS timestamp.MicroTS, prevTsFound bool,
connKey string,
deduper *set.StringSet, mutex *sync.RWMutex) (bool, TransitionType) {
connKey indicator.BinaryHash,
deduper *set.Set[indicator.BinaryHash], mutex *sync.RWMutex) (bool, TransitionType) {

// Variables for ease of reading
isClosed := currTS != timestamp.InfiniteFuture
Expand Down Expand Up @@ -407,7 +407,7 @@ func (c *TransitionBased) updateLastCleanup(now time.Time) {
// ResetState clears the transition-based computer's firstTimeSeen tracking
func (c *TransitionBased) ResetState() {
concurrency.WithLock(&c.connectionsDeduperMutex, func() {
c.connectionsDeduper = newStringSetPtr()
c.connectionsDeduper = newBinaryHashSetPtr()
})
concurrency.WithLock(&c.endpointsDeduperMutex, func() {
c.endpointsDeduper = make(map[indicator.BinaryHash]indicator.BinaryHash)
Expand All @@ -416,7 +416,7 @@ func (c *TransitionBased) ResetState() {
c.updateLastCleanup(time.Now())

concurrency.WithLock(&c.closedConnMutex, func() {
c.closedConnTimestamps = make(map[string]closedConnEntry)
c.closedConnTimestamps = make(map[indicator.BinaryHash]closedConnEntry)
})
}

Expand Down Expand Up @@ -449,7 +449,7 @@ func (c *TransitionBased) RecordSizeMetrics(lenSize, byteSize *prometheus.GaugeV
// lookupPrevTimestamp retrieves the previous close-timestamp for a connection.
// For open connections, returns found==false.
// For recently closed connections, returns the stored timestamp and found==true.
func (c *TransitionBased) lookupPrevTimestamp(connKey string) (found bool, prevTS timestamp.MicroTS) {
func (c *TransitionBased) lookupPrevTimestamp(connKey indicator.BinaryHash) (found bool, prevTS timestamp.MicroTS) {
// For closed connections, check if we have stored previous timestamp
c.closedConnMutex.RLock()
defer c.closedConnMutex.RUnlock()
Expand All @@ -459,7 +459,7 @@ func (c *TransitionBased) lookupPrevTimestamp(connKey string) (found bool, prevT

// storeClosedConnectionTimestamp stores the timestamp of a closed connection for future reference
func (c *TransitionBased) storeClosedConnectionTimestamp(
connKey string, closedTS timestamp.MicroTS, closedConnRememberDuration time.Duration) {
connKey indicator.BinaryHash, closedTS timestamp.MicroTS, closedConnRememberDuration time.Duration) {
// Do not store open connections.
if closedTS == timestamp.InfiniteFuture {
return
Expand All @@ -475,21 +475,15 @@ func (c *TransitionBased) storeClosedConnectionTimestamp(
}

// calculateConnectionsDeduperByteSize calculates the memory usage of the connections deduper.
// The calculation includes: map reference (8 bytes) + string references (16 bytes per entry) + actual string content.
// The calculation includes: map reference (8 bytes) + BinaryHash entries (8 bytes per uint64 entry).
func (c *TransitionBased) calculateConnectionsDeduperByteSize() uintptr {
baseSize := concurrency.WithRLock1(&c.connectionsDeduperMutex, func() uintptr {
var totalStringBytes uintptr
for _, s := range c.connectionsDeduper.AsSlice() {
totalStringBytes += uintptr(len(s))
}
return uintptr(8) + // map reference
uintptr(c.connectionsDeduper.Cardinality())*16 + // string references (16 bytes each)
totalStringBytes // actual string content
uintptr(c.connectionsDeduper.Cardinality())*8 // 8 bytes per BinaryHash (uint64) entry
})

// Conservative 2x multiplier for set.StringSet overhead (buckets, hash table structure, etc.)
// The benchmarked overhead was 199/104 = 1.91x, but we use a slightly higher multiplier to be safe.
return baseSize * 2
// Conservative 1.8x multiplier for Go map overhead (same as endpoints deduper)
return baseSize * 18 / 10
}

// calculateEndpointsDeduperByteSize calculates the memory usage of the endpoints deduper.
Expand Down
38 changes: 19 additions & 19 deletions sensor/common/networkflow/updatecomputer/update_computer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -191,53 +191,53 @@ func Test_lookupPrevTimestamp(t *testing.T) {
past := nowTS - 1000

testCases := map[string]struct {
connKey string
setupStore func(name string)
connKey indicator.BinaryHash
setupStore func(key indicator.BinaryHash)
expectedFound bool
expectedPrevTS timestamp.MicroTS
}{
"Unknown connections should not be found and return 0": {
connKey: "unknown-connection",
setupStore: func(name string) {
transitionBased.storeClosedConnectionTimestamp("foo-bar", past, closedConnRememberDuration)
connKey: indicator.BinaryHash(0x1234567890ABCDEF),
setupStore: func(key indicator.BinaryHash) {
transitionBased.storeClosedConnectionTimestamp(indicator.BinaryHash(0xFEDCBA0987654321), past, closedConnRememberDuration)
},
expectedFound: false,
expectedPrevTS: 0,
},
"Open connections should not be found in closed connection tracking": {
connKey: "open-connection",
setupStore: func(_ string) {},
connKey: indicator.BinaryHash(0x1111111111111111),
setupStore: func(_ indicator.BinaryHash) {},
expectedFound: false,
expectedPrevTS: 0,
},
"Stored closed connection should be found with correct timestamp": {
connKey: "closed-connection-1",
setupStore: func(name string) {
transitionBased.storeClosedConnectionTimestamp(name, past, closedConnRememberDuration)
connKey: indicator.BinaryHash(0x2222222222222222),
setupStore: func(key indicator.BinaryHash) {
transitionBased.storeClosedConnectionTimestamp(key, past, closedConnRememberDuration)
},
expectedFound: true,
expectedPrevTS: past,
},
"Stored closed connection should be found regardless of current timestamp": {
connKey: "closed-connection-2",
setupStore: func(name string) {
transitionBased.storeClosedConnectionTimestamp(name, past, closedConnRememberDuration)
connKey: indicator.BinaryHash(0x3333333333333333),
setupStore: func(key indicator.BinaryHash) {
transitionBased.storeClosedConnectionTimestamp(key, past, closedConnRememberDuration)
},
expectedFound: true,
expectedPrevTS: past,
},
"Stored closed connection should be found even with same timestamp": {
connKey: "closed-connection-3",
setupStore: func(name string) {
transitionBased.storeClosedConnectionTimestamp(name, past, closedConnRememberDuration)
connKey: indicator.BinaryHash(0x4444444444444444),
setupStore: func(key indicator.BinaryHash) {
transitionBased.storeClosedConnectionTimestamp(key, past, closedConnRememberDuration)
},
expectedFound: true,
expectedPrevTS: past,
},
"Stored closed connection should still be found after cleanup": {
connKey: "closed-connection-4",
setupStore: func(name string) {
transitionBased.storeClosedConnectionTimestamp(name, past, closedConnRememberDuration)
connKey: indicator.BinaryHash(0x5555555555555555),
setupStore: func(key indicator.BinaryHash) {
transitionBased.storeClosedConnectionTimestamp(key, past, closedConnRememberDuration)
},
expectedFound: true,
expectedPrevTS: past,
Expand Down
Loading