Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 47 additions & 8 deletions central/metrics/custom/tracker/tracker_base.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ import (
"github.com/stackrox/rox/pkg/sync"
)

const inactiveGathererTTL = 2 * 24 * time.Hour

var (
log = logging.CreateLogger(logging.ModuleForName("central_metrics"), 1)
ErrStopIterator = errors.New("stopped")
Expand Down Expand Up @@ -101,7 +103,8 @@ type TrackerBase[F Finding] struct {
config *Configuration
metricsConfigMux sync.RWMutex

gatherers sync.Map // map[user ID]*tokenGatherer
gatherers sync.Map // map[user ID]*gatherer
cleanupWG sync.WaitGroup // for sync in testing.

registryFactory func(userID string) metrics.CustomRegistry // for mocking in tests.
}
Expand Down Expand Up @@ -258,10 +261,10 @@ func (tracker *TrackerBase[Finding]) Gather(ctx context.Context) {
if cfg == nil {
return
}
// Pass the cfg so that the same configuration is used there and here.
gatherer := tracker.getGatherer(id.UID(), cfg)

// Return if is still running.
if !gatherer.running.CompareAndSwap(false, true) {
// getGatherer() returns nil if the gatherer is still running.
if gatherer == nil {
return
}
defer gatherer.running.Store(false)
Expand All @@ -276,21 +279,57 @@ func (tracker *TrackerBase[Finding]) Gather(ctx context.Context) {
}

// getGatherer returns the existing or a new gatherer for the given userID.
// When creating a new gatherer, it also registers all known metrics on the
// gatherer registry.
// The returned gatherer will be set to a running state for synchronization
// purposes. When creating a new gatherer, it also registers all known metrics
// on the gatherer registry.
// Returns nil on error, or if the gatherer for this userID is still running.
func (tracker *TrackerBase[Finding]) getGatherer(userID string, cfg *Configuration) *gatherer {
// TODO(PR #16176): limit the number of different tokens accessing the metrics?
defer tracker.cleanupInactiveGatherers()
var gr *gatherer
if g, ok := tracker.gatherers.Load(userID); !ok {
r := tracker.registryFactory(userID)
gr = &gatherer{
registry: tracker.registryFactory(userID),
registry: r,
}
gr.running.Store(true)
tracker.gatherers.Store(userID, gr)
for metricName := range cfg.metrics {
tracker.registerMetric(gr, cfg, metricName)
}
} else {
gr = g.(*gatherer)
// Return nil if this gatherer is still running.
// Otherwise mark it running.
if !gr.running.CompareAndSwap(false, true) {
return nil
}
}
return gr
}

// cleanupInactiveGatherers frees the registries for the userIDs, that haven't
// shown up for inactiveGathererTTL.
func (tracker *TrackerBase[Finding]) cleanupInactiveGatherers() {
tracker.cleanupWG.Add(1)
go func() {
defer tracker.cleanupWG.Done()
tracker.gatherers.Range(func(userID, gv any) bool {
g := gv.(*gatherer)
// Make it running to block normal gathering.
if !g.running.CompareAndSwap(false, true) {
return true
}
if time.Since(g.lastGather) >= inactiveGathererTTL &&
// Do not delete a just created gatherer in test.
// lastGather should never be zero for a non-running gatherer
// in production run.
!g.lastGather.IsZero() {
metrics.DeleteCustomRegistry(userID.(string))
tracker.gatherers.Delete(userID)
} else {
g.running.Store(false)
}
return true
})
}()
}
89 changes: 89 additions & 0 deletions central/metrics/custom/tracker/tracker_base_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"github.com/stackrox/rox/pkg/grpc/authn/basic"
"github.com/stackrox/rox/pkg/uuid"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"go.uber.org/mock/gomock"
)

Expand Down Expand Up @@ -296,6 +297,94 @@ func TestTrackerBase_Gather(t *testing.T) {
result = make(map[string][]*aggregatedRecord)
tracker.Gather(ctx)
assert.Empty(t, result)
tracker.cleanupWG.Wait()
}

func TestTrackerBase_getGatherer(t *testing.T) {
ctrl := gomock.NewController(t)
rf := mocks.NewMockCustomRegistry(ctrl)
tracker := MakeTrackerBase("test", "test",
testLabelGetters,
makeTestGatherFunc(testData),
)
tracker.registryFactory = func(string) metrics.CustomRegistry { return rf }

md := makeTestMetricDescriptors(t)
tracker.Reconfigure(&Configuration{
metrics: md,
toAdd: slices.Collect(maps.Keys(md)),
period: time.Hour,
})

cfg := tracker.getConfiguration()
rf.EXPECT().RegisterMetric(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).
// each new gatherer, created by getGatherer, calls RegisterMetric
// for every metric.
Times(2 * len(cfg.metrics))

g := tracker.getGatherer("Admin", cfg)
require.NotNil(t, g)
tracker.cleanupWG.Wait()

g.lastGather = time.Now().Add(-inactiveGathererTTL)
g.running.Store(false)
_, ok := tracker.gatherers.Load("Admin")
assert.True(t, ok)
// This call should delete the "Admin" gatherer:
tracker.getGatherer("Donkey", cfg).running.Store(false)
tracker.cleanupWG.Wait()
_, ok = tracker.gatherers.Load("Admin")
assert.False(t, ok)
}

func TestTrackerBase_cleanup(t *testing.T) {
ctrl := gomock.NewController(t)
rf := mocks.NewMockCustomRegistry(ctrl)
rf.EXPECT().RegisterMetric(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).
Times(4).Return(nil)

// Test that cleanupInactiveGatherers removes gatherers that have been inactive for longer than inactiveGathererTTL.
tracker := MakeTrackerBase("test", "test",
testLabelGetters,
makeTestGatherFunc(testData),
)
tracker.registryFactory = func(string) metrics.CustomRegistry { return rf }

cfg := &Configuration{
metrics: makeTestMetricDescriptors(t),
toAdd: []MetricName{"test_metric"},
period: time.Hour,
}
tracker.Reconfigure(cfg)

// Add two gatherers: one active, one inactive.
activeID := "active"
inactiveID := "inactive"

activeGatherer := tracker.getGatherer(activeID, cfg)
inactiveGatherer := tracker.getGatherer(inactiveID, cfg)
tracker.cleanupWG.Wait()

// Set lastGather times.
activeGatherer.lastGather = time.Now()
activeGatherer.running.Store(false)
inactiveGatherer.lastGather = time.Now().Add(-inactiveGathererTTL)
inactiveGatherer.running.Store(false)

// Sanity: both gatherers present.
_, ok1 := tracker.gatherers.Load(activeID)
_, ok2 := tracker.gatherers.Load(inactiveID)
assert.True(t, ok1)
assert.True(t, ok2)

tracker.cleanupInactiveGatherers()
tracker.cleanupWG.Wait()

// Active gatherer should remain, inactive should be removed.
_, ok1 = tracker.gatherers.Load(activeID)
_, ok2 = tracker.gatherers.Load(inactiveID)
assert.True(t, ok1, "active gatherer should not be removed")
assert.False(t, ok2, "inactive gatherer should be removed")
}

func makeAdminContext(t *testing.T) context.Context {
Expand Down
17 changes: 17 additions & 0 deletions central/metrics/custom_registry.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,23 @@ func GetCustomRegistry(userID string) CustomRegistry {
return registry
}

// DeleteCustomRegistry unregisters all metrics and deletes a registry for the
// given userID.
func DeleteCustomRegistry(userID string) {
registriesMux.Lock()
defer registriesMux.Unlock()
registry, ok := userRegistries[userID]
if !ok {
return
}
registry.gauges.Range(func(metric, vec any) bool {
_ = registry.UnregisterMetric(metric.(string))
return true
})
registry.gauges.Clear()
delete(userRegistries, userID)
}

var _ CustomRegistry = (*customRegistry)(nil)

// UnregisterMetric unregister the given metric by name.
Expand Down
30 changes: 30 additions & 0 deletions central/metrics/custom_registry_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,3 +65,33 @@ func TestCustomRegistry_Reset(t *testing.T) {
assert.NoError(t, err)
assert.Equal(t, float64(24), value)
}

func TestDeleteCustomRegistry(t *testing.T) {
cr1 := GetCustomRegistry("user1")
cr2 := GetCustomRegistry("user2")
_ = cr1.RegisterMetric("test", "test", time.Hour, []string{"Test1", "Test2"})
_ = cr2.RegisterMetric("test", "test", time.Hour, []string{"Test1", "Test2"})
cr1.SetTotal("test", map[string]string{"Test1": "value1", "Test2": "value2"}, 42)
cr2.SetTotal("test", map[string]string{"Test1": "value1", "Test2": "value2"}, 24)

DeleteCustomRegistry("user1")

value, err := getMetricValue(t, cr1, "test")
assert.Error(t, err)
assert.Equal(t, float64(0), value)

cr1 = GetCustomRegistry("user1")
value, err = getMetricValue(t, cr1, "test")
assert.Error(t, err)
assert.Equal(t, float64(0), value)

value, err = getMetricValue(t, cr2, "test")
assert.NoError(t, err)
assert.Equal(t, float64(24), value)

assert.NotPanics(t, func() {
DeleteCustomRegistry("user1")
DeleteCustomRegistry("user1")
DeleteCustomRegistry("user100")
})
}
Loading