stackrox · johannes94 · Dec 2, 2025 · Dec 2, 2025 · Dec 2, 2025 · Dec 2, 2025
diff --git a/pkg/process/filter/filter.go b/pkg/process/filter/filter.go
@@ -1,15 +1,23 @@
 package filter
 
 import (
+	"hash"
 	"strings"
+	"unsafe"
 
+	"github.com/cespare/xxhash"
 	"github.com/stackrox/rox/generated/storage"
 	"github.com/stackrox/rox/pkg/containerid"
 	"github.com/stackrox/rox/pkg/set"
 	"github.com/stackrox/rox/pkg/stringutils"
 	"github.com/stackrox/rox/pkg/sync"
 )
 
+// BinaryHash represents a 64-bit hash for memory-efficient key storage.
+// Using uint64 directly avoids conversion overhead and provides faster map operations.
+// This follows the pattern from network flow dedupers (PR #17040).
+type BinaryHash uint64
+
 // This filter is a rudimentary filter that prevents a container from spamming Central
 //
 // Parameters:
@@ -43,12 +51,12 @@ type Filter interface {
 
 type level struct {
 	hits     int
-	children map[string]*level
+	children map[BinaryHash]*level
 }
 
 func newLevel() *level {
 	return &level{
-		children: make(map[string]*level),
+		children: make(map[BinaryHash]*level),
 	}
 }
 
@@ -59,6 +67,10 @@ type filterImpl struct {
 
 	containersInDeployment map[string]map[string]*level
 	rootLock               sync.Mutex
+
+	// Hash instance for computing BinaryHash keys
+	// Reused across Add() calls to avoid allocations
+	h hash.Hash64
 }
 
 func (f *filterImpl) siftNoLock(level *level, args []string, levelNum int) bool {
@@ -72,16 +84,20 @@ func (f *filterImpl) siftNoLock(level *level, args []string, levelNum int) bool
 		return true
 	}
 	// Truncate the current argument to the max size to avoid large arguments taking up a lot of space
-	// Clone to avoid retaining references to the ProcessIndicator protobuf object
-	currentArg := strings.Clone(stringutils.Truncate(args[0], maxArgSize))
-	nextLevel := level.children[currentArg]
+	// NO LONGER NEED strings.Clone() - we're hashing the string, not storing it
+	truncated := stringutils.Truncate(args[0], maxArgSize)
+
+	// Hash the argument string
+	argHash := hashString(f.h, truncated)
+
+	nextLevel := level.children[argHash]
 	if nextLevel == nil {
 		// If this level has already hit its max fan out then return false
 		if len(level.children) >= f.maxFanOut[levelNum] {
 			return false
 		}
 		nextLevel = newLevel()
-		level.children[currentArg] = nextLevel
+		level.children[argHash] = nextLevel
 	}
 
 	return f.siftNoLock(nextLevel, args[1:], levelNum+1)
@@ -95,6 +111,7 @@ func NewFilter(maxExactPathMatches, maxUniqueProcesses int, fanOut []int) Filter
 		maxFanOut:           fanOut,
 
 		containersInDeployment: make(map[string]map[string]*level),
+		h:                      xxhash.New(),
 	}
 }
 
@@ -120,19 +137,20 @@ func (f *filterImpl) Add(indicator *storage.ProcessIndicator) bool {
 
 	rootLevel := f.getOrAddRootLevelNoLock(indicator)
 
-	// Clone the exec file path to avoid retaining a reference to the ProcessIndicator
-	// protobuf object. Without this copy, the map key would hold a reference to the
-	// string within the protobuf, preventing garbage collection of the entire protobuf object.
-	execFilePath := strings.Clone(indicator.GetSignal().GetExecFilePath())
+	// NO LONGER NEED strings.Clone() - we're hashing the string, not storing it
+	execFilePath := indicator.GetSignal().GetExecFilePath()
+
+	// Hash the exec file path
+	execFilePathHash := hashString(f.h, execFilePath)
 
 	// Handle the process level independently as we will never reject a new process
-	processLevel := rootLevel.children[execFilePath]
+	processLevel := rootLevel.children[execFilePathHash]
 	if processLevel == nil {
 		if len(rootLevel.children) >= f.maxUniqueProcesses {
 			return false
 		}
 		processLevel = newLevel()
-		rootLevel.children[execFilePath] = processLevel
+		rootLevel.children[execFilePathHash] = processLevel
 	}
 
 	return f.siftNoLock(processLevel, strings.Fields(indicator.GetSignal().GetArgs()), 0)
@@ -189,3 +207,21 @@ func (f *filterImpl) DeleteByPod(pod *storage.Pod) {
 		}
 	}
 }
+
+// hashString creates a hash from a single string.
+// Convenience wrapper for hashStrings with a single argument.
+func hashString(h hash.Hash64, s string) BinaryHash {
+	if len(s) == 0 {
+		return BinaryHash(0)
+	}
+
+	h.Reset()
+	// Use zero-copy conversion from string to []byte using unsafe to avoid allocation.
+	// This is safe because:
+	// 1. h.Write() doesn't modify data (io.Writer contract)
+	// 2. xxhash doesn't retain references
+	// 3. string s remains alive during the call
+	//#nosec G103 -- Audited: zero-copy string-to-bytes conversion for performance
+	_, _ = h.Write(unsafe.Slice(unsafe.StringData(s), len(s)))
+	return BinaryHash(h.Sum64())
+}
diff --git a/pkg/process/filter/filter_bench_test.go b/pkg/process/filter/filter_bench_test.go
@@ -0,0 +1,88 @@
+package filter
+
+import (
+	"fmt"
+	"runtime"
+	"testing"
+
+	"github.com/stackrox/rox/generated/storage"
+)
+
+// BenchmarkAdd measures performance of adding process indicators
+func BenchmarkAdd(b *testing.B) {
+	filter := NewFilter(100, 1000, []int{100, 100, 100})
+
+	// Create test data
+	indicators := make([]*storage.ProcessIndicator, 1000)
+	for i := range indicators {
+		indicators[i] = &storage.ProcessIndicator{
+			DeploymentId:  fmt.Sprintf("dep%d", i%10),
+			ContainerName: "container",
+			Signal: &storage.ProcessSignal{
+				ContainerId:  fmt.Sprintf("id%d", i%10),
+				ExecFilePath: fmt.Sprintf("/usr/bin/process%d", i%100),
+				Args:         fmt.Sprintf("arg1 arg2 arg3 iteration%d", i),
+			},
+		}
+	}
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		filter.Add(indicators[i%len(indicators)])
+	}
+}
+
+// BenchmarkAddMemory measures memory allocations
+func BenchmarkAddMemory(b *testing.B) {
+	filter := NewFilter(100, 1000, []int{100, 100, 100})
+
+	pi := &storage.ProcessIndicator{
+		DeploymentId:  "deployment",
+		ContainerName: "container",
+		Signal: &storage.ProcessSignal{
+			ContainerId:  "containerid",
+			ExecFilePath: "/usr/bin/process",
+			Args:         "arg1 arg2 arg3",
+		},
+	}
+
+	for b.Loop() {
+		filter.Add(pi)
+	}
+}
+
+// BenchmarkBuildIndicatorFilterMemory measures memory usage when building a filter
+// with a large number of processes
+func BenchmarkBuildIndicatorFilterMemory(b *testing.B) {
+	const (
+		NumDeployments       = 100
+		NumPodsPerDeployment = 10
+		NumProcessesPerPod   = 10
+	)
+
+	for b.Loop() {
+		filter := NewFilter(1000, 10000, []int{100, 50, 25, 10, 5})
+
+		for i := 0; i < NumDeployments; i++ {
+			deploymentID := fmt.Sprintf("deployment-%d", i)
+			for j := 0; j < NumPodsPerDeployment; j++ {
+				containerID := fmt.Sprintf("container-%d-%d", i, j)
+				for k := 0; k < NumProcessesPerPod; k++ {
+					pi := &storage.ProcessIndicator{
+						DeploymentId:  deploymentID,
+						ContainerName: "container",
+						Signal: &storage.ProcessSignal{
+							ContainerId:  containerID,
+							ExecFilePath: fmt.Sprintf("/usr/bin/process%d", k),
+							Args:         fmt.Sprintf("arg1 arg2 arg3 iteration%d", k),
+						},
+					}
+					filter.Add(pi)
+				}
+			}
+		}
+
+		// Force GC to measure actual memory retained
+		runtime.GC()
+	}
+}