Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 48 additions & 12 deletions pkg/process/filter/filter.go
Original file line number Diff line number Diff line change
@@ -1,15 +1,23 @@
package filter

import (
"hash"
"strings"
"unsafe"

"github.com/cespare/xxhash"
"github.com/stackrox/rox/generated/storage"
"github.com/stackrox/rox/pkg/containerid"
"github.com/stackrox/rox/pkg/set"
"github.com/stackrox/rox/pkg/stringutils"
"github.com/stackrox/rox/pkg/sync"
)

// BinaryHash represents a 64-bit hash for memory-efficient key storage.
// Using uint64 directly avoids conversion overhead and provides faster map operations.
// This follows the pattern from network flow dedupers (PR #17040).
type BinaryHash uint64

// This filter is a rudimentary filter that prevents a container from spamming Central
//
// Parameters:
Expand Down Expand Up @@ -43,12 +51,12 @@ type Filter interface {

type level struct {
hits int
children map[string]*level
children map[BinaryHash]*level
}

func newLevel() *level {
return &level{
children: make(map[string]*level),
children: make(map[BinaryHash]*level),
}
}

Expand All @@ -59,6 +67,10 @@ type filterImpl struct {

containersInDeployment map[string]map[string]*level
rootLock sync.Mutex

// Hash instance for computing BinaryHash keys
// Reused across Add() calls to avoid allocations
h hash.Hash64
}

func (f *filterImpl) siftNoLock(level *level, args []string, levelNum int) bool {
Expand All @@ -72,16 +84,20 @@ func (f *filterImpl) siftNoLock(level *level, args []string, levelNum int) bool
return true
}
// Truncate the current argument to the max size to avoid large arguments taking up a lot of space
// Clone to avoid retaining references to the ProcessIndicator protobuf object
currentArg := strings.Clone(stringutils.Truncate(args[0], maxArgSize))
nextLevel := level.children[currentArg]
// NO LONGER NEED strings.Clone() - we're hashing the string, not storing it
truncated := stringutils.Truncate(args[0], maxArgSize)

// Hash the argument string
argHash := hashString(f.h, truncated)

nextLevel := level.children[argHash]
if nextLevel == nil {
// If this level has already hit its max fan out then return false
if len(level.children) >= f.maxFanOut[levelNum] {
return false
}
nextLevel = newLevel()
level.children[currentArg] = nextLevel
level.children[argHash] = nextLevel
}

return f.siftNoLock(nextLevel, args[1:], levelNum+1)
Expand All @@ -95,6 +111,7 @@ func NewFilter(maxExactPathMatches, maxUniqueProcesses int, fanOut []int) Filter
maxFanOut: fanOut,

containersInDeployment: make(map[string]map[string]*level),
h: xxhash.New(),
}
}

Expand All @@ -120,19 +137,20 @@ func (f *filterImpl) Add(indicator *storage.ProcessIndicator) bool {

rootLevel := f.getOrAddRootLevelNoLock(indicator)

// Clone the exec file path to avoid retaining a reference to the ProcessIndicator
// protobuf object. Without this copy, the map key would hold a reference to the
// string within the protobuf, preventing garbage collection of the entire protobuf object.
execFilePath := strings.Clone(indicator.GetSignal().GetExecFilePath())
// NO LONGER NEED strings.Clone() - we're hashing the string, not storing it
execFilePath := indicator.GetSignal().GetExecFilePath()

// Hash the exec file path
execFilePathHash := hashString(f.h, execFilePath)

// Handle the process level independently as we will never reject a new process
processLevel := rootLevel.children[execFilePath]
processLevel := rootLevel.children[execFilePathHash]
if processLevel == nil {
if len(rootLevel.children) >= f.maxUniqueProcesses {
return false
}
processLevel = newLevel()
rootLevel.children[execFilePath] = processLevel
rootLevel.children[execFilePathHash] = processLevel
}

return f.siftNoLock(processLevel, strings.Fields(indicator.GetSignal().GetArgs()), 0)
Expand Down Expand Up @@ -189,3 +207,21 @@ func (f *filterImpl) DeleteByPod(pod *storage.Pod) {
}
}
}

// hashString creates a hash from a single string.
// Convenience wrapper for hashStrings with a single argument.
func hashString(h hash.Hash64, s string) BinaryHash {
if len(s) == 0 {
return BinaryHash(0)
}

h.Reset()
// Use zero-copy conversion from string to []byte using unsafe to avoid allocation.
// This is safe because:
// 1. h.Write() doesn't modify data (io.Writer contract)
// 2. xxhash doesn't retain references
// 3. string s remains alive during the call
//#nosec G103 -- Audited: zero-copy string-to-bytes conversion for performance
_, _ = h.Write(unsafe.Slice(unsafe.StringData(s), len(s)))
return BinaryHash(h.Sum64())
}
88 changes: 88 additions & 0 deletions pkg/process/filter/filter_bench_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
package filter

import (
"fmt"
"runtime"
"testing"

"github.com/stackrox/rox/generated/storage"
)

// BenchmarkAdd measures performance of adding process indicators
func BenchmarkAdd(b *testing.B) {
filter := NewFilter(100, 1000, []int{100, 100, 100})

// Create test data
indicators := make([]*storage.ProcessIndicator, 1000)
for i := range indicators {
indicators[i] = &storage.ProcessIndicator{
DeploymentId: fmt.Sprintf("dep%d", i%10),
ContainerName: "container",
Signal: &storage.ProcessSignal{
ContainerId: fmt.Sprintf("id%d", i%10),
ExecFilePath: fmt.Sprintf("/usr/bin/process%d", i%100),
Args: fmt.Sprintf("arg1 arg2 arg3 iteration%d", i),
},
}
}

b.ResetTimer()
for i := 0; i < b.N; i++ {
filter.Add(indicators[i%len(indicators)])
}
}

// BenchmarkAddMemory measures memory allocations
func BenchmarkAddMemory(b *testing.B) {
filter := NewFilter(100, 1000, []int{100, 100, 100})

pi := &storage.ProcessIndicator{
DeploymentId: "deployment",
ContainerName: "container",
Signal: &storage.ProcessSignal{
ContainerId: "containerid",
ExecFilePath: "/usr/bin/process",
Args: "arg1 arg2 arg3",
},
}

for b.Loop() {
filter.Add(pi)
}
}

// BenchmarkBuildIndicatorFilterMemory measures memory usage when building a filter
// with a large number of processes
func BenchmarkBuildIndicatorFilterMemory(b *testing.B) {
const (
NumDeployments = 100
NumPodsPerDeployment = 10
NumProcessesPerPod = 10
)

for b.Loop() {
filter := NewFilter(1000, 10000, []int{100, 50, 25, 10, 5})

for i := 0; i < NumDeployments; i++ {
deploymentID := fmt.Sprintf("deployment-%d", i)
for j := 0; j < NumPodsPerDeployment; j++ {
containerID := fmt.Sprintf("container-%d-%d", i, j)
for k := 0; k < NumProcessesPerPod; k++ {
pi := &storage.ProcessIndicator{
DeploymentId: deploymentID,
ContainerName: "container",
Signal: &storage.ProcessSignal{
ContainerId: containerID,
ExecFilePath: fmt.Sprintf("/usr/bin/process%d", k),
Args: fmt.Sprintf("arg1 arg2 arg3 iteration%d", k),
},
}
filter.Add(pi)
}
}
}

// Force GC to measure actual memory retained
runtime.GC()
}
}
Loading