Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion central/metrics/custom/expiry/tracker.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ import (
)

func New(s service.Service) *tracker.TrackerBase[*finding] {
return tracker.MakeTrackerBase(
return tracker.MakeGlobalTrackerBase(
"cert_exp",
"certificate expiry",
LazyLabels,
Expand Down
2 changes: 1 addition & 1 deletion central/metrics/custom/policies/tracker.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import (
)

func New(ds policyDS.DataStore) *tracker.TrackerBase[*finding] {
return tracker.MakeTrackerBase(
return tracker.MakeGlobalTrackerBase(
"cfg",
"policies",
LazyLabels,
Expand Down
24 changes: 20 additions & 4 deletions central/metrics/custom/runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ import (
"context"
"net/http"

"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
alertDS "github.com/stackrox/rox/central/alert/datastore"
clusterDS "github.com/stackrox/rox/central/cluster/datastore"
configDS "github.com/stackrox/rox/central/config/datastore"
Expand Down Expand Up @@ -162,14 +164,28 @@ func (tr trackerRunner) ServeHTTP(w http.ResponseWriter, req *http.Request) {
go tracker.Gather(newCtx)
}
}
registry, err := metrics.GetCustomRegistry(userID)

userRegistry, err := metrics.GetCustomRegistry(userID)
if err != nil {
httputil.WriteError(w, err)
return
}
registry.Lock()
defer registry.Unlock()
registry.ServeHTTP(w, req)

globalRegistry, err := metrics.GetGlobalRegistry()
if err != nil {
httputil.WriteError(w, err)
return
}

userRegistry.Lock()
defer userRegistry.Unlock()
globalRegistry.Lock()
defer globalRegistry.Unlock()

promhttp.HandlerFor(
prometheus.Gatherers{userRegistry, globalRegistry},
promhttp.HandlerOpts{}).ServeHTTP(w, req)

go phonehome()
}

Expand Down
46 changes: 39 additions & 7 deletions central/metrics/custom/tracker/tracker_base.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
)

const inactiveGathererTTL = 2 * 24 * time.Hour
const globalScopeID = ""

var (
log = logging.CreateLogger(logging.ModuleForName("central_metrics"), 1)
Expand Down Expand Up @@ -100,11 +101,16 @@ func (g *gatherer[F]) trySetRunning() bool {
// TrackerBase implements a generic finding tracker.
// Configured with a finding generator and other arguments, it runs a goroutine
// that periodically aggregates gathered values and updates the gauge values.
//
// A tracker can be scoped, in which case it creates a separate registry for
// each scrape user ID, or global, where it reuses the same global registry for
// all users.
type TrackerBase[F Finding] struct {
metricPrefix string
description string
getters LazyLabelGetters[F]
generator FindingGenerator[F]
scoped bool

// metricsConfig can be changed with an API call.
config *Configuration
Expand All @@ -116,17 +122,39 @@ type TrackerBase[F Finding] struct {
registryFactory func(userID string) (metrics.CustomRegistry, error) // for mocking in tests.
}

// MakeTrackerBase initializes a tracker without any period or metrics
// MakeTrackerBase initializes a scoped tracker without any period or metrics
// configuration. Call Reconfigure to configure the period and the metrics.
func MakeTrackerBase[F Finding](metricPrefix, description string,
getters LazyLabelGetters[F], generator FindingGenerator[F],
) *TrackerBase[F] {
return makeTrackerBase(metricPrefix, description, true, getters, generator)
}

// MakeGlobalTrackerBase creates a global, i.e. non-scoped tracker.
func MakeGlobalTrackerBase[F Finding](metricPrefix, description string,
getters LazyLabelGetters[F], generator FindingGenerator[F],
) *TrackerBase[F] {
return makeTrackerBase(metricPrefix, description, false, getters, generator)
}

func globalRegistryFactory(string) (metrics.CustomRegistry, error) {
return metrics.GetGlobalRegistry()
}

func makeTrackerBase[F Finding](metricPrefix, description string, scoped bool,
getters LazyLabelGetters[F], generator FindingGenerator[F],
) *TrackerBase[F] {
registryFactory := globalRegistryFactory
if scoped {
registryFactory = metrics.GetCustomRegistry
}
return &TrackerBase[F]{
metricPrefix: metricPrefix,
description: description,
getters: getters,
generator: generator,
registryFactory: metrics.GetCustomRegistry,
scoped: scoped,
registryFactory: registryFactory,
}
}

Expand Down Expand Up @@ -276,13 +304,17 @@ func (tracker *TrackerBase[F]) Gather(ctx context.Context) {
if !cfg.isEnabled() {
return
}
id, err := authn.IdentityFromContext(ctx)
if err != nil {
utils.Should(err)
return
id := globalScopeID
if tracker.scoped {
userID, err := authn.IdentityFromContext(ctx)
if err != nil {
utils.Should(err)
return
}
id = userID.UID()
}
// Pass the cfg so that the same configuration is used there and here.
gatherer := tracker.getGatherer(id.UID(), cfg)
gatherer := tracker.getGatherer(id, cfg)
// getGatherer() returns nil if the gatherer is still running.
if gatherer == nil {
return
Expand Down
181 changes: 181 additions & 0 deletions central/metrics/custom/tracker/tracker_base_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ package tracker
import (
"context"
"maps"
"net/http"
"net/http/httptest"
"regexp"
"slices"
"strings"
Expand All @@ -13,6 +15,7 @@ import (
"github.com/stackrox/rox/central/metrics"
"github.com/stackrox/rox/central/metrics/mocks"
"github.com/stackrox/rox/pkg/auth/authproviders"
"github.com/stackrox/rox/pkg/defaults/accesscontrol"
"github.com/stackrox/rox/pkg/errox"
"github.com/stackrox/rox/pkg/grpc/authn"
"github.com/stackrox/rox/pkg/grpc/authn/basic"
Expand Down Expand Up @@ -44,6 +47,14 @@ func TestMakeTrackerBase(t *testing.T) {
tracker := MakeTrackerBase("test", "Test", testLabelGetters, nilGatherFunc)
assert.NotNil(t, tracker)
assert.Nil(t, tracker.getConfiguration())
assert.True(t, tracker.scoped)
}

func Test_makeTrackerBase(t *testing.T) {
tracker := makeTrackerBase("test", "Test", false, testLabelGetters, nilGatherFunc)
assert.NotNil(t, tracker)
assert.Nil(t, tracker.getConfiguration())
assert.False(t, tracker.scoped)
}

func TestTrackerBase_Reconfigure(t *testing.T) {
Expand Down Expand Up @@ -588,3 +599,173 @@ func Test_formatMetricsHelp(t *testing.T) {
period: time.Hour,
}, "metric1"))
}

func makeScopedGatherFunc(ctx context.Context, _ MetricDescriptors) FindingErrorSequence[testFinding] {
return func(yield func(testFinding, error) bool) {
identity, err := authn.IdentityFromContext(ctx)
if err != nil {
return
}

username := identity.UID()
var finding testFinding
for idx := range testData {
cluster := testData[idx]["Cluster"]

shouldYield := false
switch username {
case accesscontrol.Admin:
shouldYield = true
case "No Access": // see basic.ContextWithNoAccessIdentity code.
shouldYield = cluster == "cluster 1"
case accesscontrol.None:
shouldYield = false
}

if shouldYield && !yield(finding, nil) {
return
}
finding++
}
}
}

func makeTestCtxIdentity(t *testing.T, provider authproviders.Provider, accessCtxFunc func(*testing.T, authproviders.Provider) context.Context) (context.Context, authn.Identity) {
ctx := accessCtxFunc(t, provider)
id, _ := authn.IdentityFromContext(ctx)
return ctx, id
}

func Test_scope(t *testing.T) {
t.Run("scoped access", func(t *testing.T) {
tracker := MakeTrackerBase("test", "Test",
testLabelGetters,
makeScopedGatherFunc)

md := makeTestMetricDescriptors(t)
tracker.Reconfigure(&Configuration{
metrics: md,
toAdd: slices.Collect(maps.Keys(md)),
period: time.Hour,
})

provider, _ := authproviders.NewProvider(
authproviders.WithEnabled(true),
authproviders.WithID(uuid.NewV4().String()),
authproviders.WithName("test"),
)

adminCtx, adminIdentity := makeTestCtxIdentity(t, provider,
basic.ContextWithAdminIdentity)
cluster1Ctx, cluster1Identity := makeTestCtxIdentity(t, provider,
basic.ContextWithNoAccessIdentity)
noAccessCtx, noAccessIdentity := makeTestCtxIdentity(t, provider,
basic.ContextWithNoneIdentity)

tracker.Gather(adminCtx)
tracker.Gather(cluster1Ctx)
tracker.Gather(noAccessCtx)
tracker.cleanupWG.Wait()

adminGatherer, _ := tracker.gatherers.Load(adminIdentity.UID())
cluster1Gatherer, _ := tracker.gatherers.Load(cluster1Identity.UID())
noAccessGatherer, _ := tracker.gatherers.Load(noAccessIdentity.UID())

adminRegistry := adminGatherer.(*gatherer[testFinding]).registry
cluster1Registry := cluster1Gatherer.(*gatherer[testFinding]).registry
noAccessRegistry := noAccessGatherer.(*gatherer[testFinding]).registry

adminMetrics := readMetrics(adminRegistry)
cluster1Metrics := readMetrics(cluster1Registry)
noAccessMetrics := readMetrics(noAccessRegistry)

const expectedAdminMetrics = `# HELP rox_central_test_Test_scope_scoped_access_metric1 The total number of Test aggregated by Cluster, Severity, and gathered every 1h0m0s
# TYPE rox_central_test_Test_scope_scoped_access_metric1 gauge
rox_central_test_Test_scope_scoped_access_metric1{Cluster="cluster 1",Severity="CRITICAL"} 2
rox_central_test_Test_scope_scoped_access_metric1{Cluster="cluster 2",Severity="HIGH"} 1
rox_central_test_Test_scope_scoped_access_metric1{Cluster="cluster 3",Severity="LOW"} 1
rox_central_test_Test_scope_scoped_access_metric1{Cluster="cluster 5",Severity="LOW"} 1
# HELP rox_central_test_Test_scope_scoped_access_metric2 The total number of Test aggregated by Namespace, and gathered every 1h0m0s
# TYPE rox_central_test_Test_scope_scoped_access_metric2 gauge
rox_central_test_Test_scope_scoped_access_metric2{Namespace="ns 1"} 1
rox_central_test_Test_scope_scoped_access_metric2{Namespace="ns 2"} 1
rox_central_test_Test_scope_scoped_access_metric2{Namespace="ns 3"} 3
`

const expectedCluster1Metrics = `# HELP rox_central_test_Test_scope_scoped_access_metric1 The total number of Test aggregated by Cluster, Severity, and gathered every 1h0m0s
# TYPE rox_central_test_Test_scope_scoped_access_metric1 gauge
rox_central_test_Test_scope_scoped_access_metric1{Cluster="cluster 1",Severity="CRITICAL"} 2
# HELP rox_central_test_Test_scope_scoped_access_metric2 The total number of Test aggregated by Namespace, and gathered every 1h0m0s
# TYPE rox_central_test_Test_scope_scoped_access_metric2 gauge
rox_central_test_Test_scope_scoped_access_metric2{Namespace="ns 1"} 1
rox_central_test_Test_scope_scoped_access_metric2{Namespace="ns 3"} 1
`

assert.Equal(t, expectedAdminMetrics, adminMetrics)
assert.Equal(t, expectedCluster1Metrics, cluster1Metrics)
assert.Empty(t, noAccessMetrics)

t.Cleanup(func() {
metrics.DeleteCustomRegistry(adminIdentity.UID())
metrics.DeleteCustomRegistry(cluster1Identity.UID())
metrics.DeleteCustomRegistry(noAccessIdentity.UID())
})
})

t.Run("global access", func(t *testing.T) {
tracker := MakeGlobalTrackerBase("test", "Test",
testLabelGetters,
makeTestGatherFunc(testData))

md := makeTestMetricDescriptors(t)
tracker.Reconfigure(&Configuration{
metrics: md,
toAdd: slices.Collect(maps.Keys(md)),
period: time.Hour,
})

provider, _ := authproviders.NewProvider(
authproviders.WithEnabled(true),
authproviders.WithID(uuid.NewV4().String()),
authproviders.WithName("test"),
)

adminCtx, _ := makeTestCtxIdentity(t, provider,
basic.ContextWithAdminIdentity)
cluster1Ctx, _ := makeTestCtxIdentity(t, provider,
basic.ContextWithNoAccessIdentity)
noAccessCtx, _ := makeTestCtxIdentity(t, provider,
basic.ContextWithNoneIdentity)

tracker.Gather(adminCtx)
tracker.Gather(cluster1Ctx)
tracker.Gather(noAccessCtx)
tracker.cleanupWG.Wait()

globalRegistry, err := metrics.GetGlobalRegistry()
require.NoError(t, err)

const expectedMetrics = `# HELP rox_central_test_Test_scope_global_access_metric1 The total number of Test aggregated by Cluster, Severity, and gathered every 1h0m0s
# TYPE rox_central_test_Test_scope_global_access_metric1 gauge
rox_central_test_Test_scope_global_access_metric1{Cluster="cluster 1",Severity="CRITICAL"} 2
rox_central_test_Test_scope_global_access_metric1{Cluster="cluster 2",Severity="HIGH"} 1
rox_central_test_Test_scope_global_access_metric1{Cluster="cluster 3",Severity="LOW"} 1
rox_central_test_Test_scope_global_access_metric1{Cluster="cluster 5",Severity="LOW"} 1
# HELP rox_central_test_Test_scope_global_access_metric2 The total number of Test aggregated by Namespace, and gathered every 1h0m0s
# TYPE rox_central_test_Test_scope_global_access_metric2 gauge
rox_central_test_Test_scope_global_access_metric2{Namespace="ns 1"} 1
rox_central_test_Test_scope_global_access_metric2{Namespace="ns 2"} 1
rox_central_test_Test_scope_global_access_metric2{Namespace="ns 3"} 3
`

// All users should see the same global metrics.
globalMetrics := readMetrics(globalRegistry)
assert.Equal(t, expectedMetrics, globalMetrics)
})
}

func readMetrics(registry metrics.CustomRegistry) string {
rec := httptest.NewRecorder()
registry.ServeHTTP(rec, httptest.NewRequest(http.MethodGet, "/metrics", nil))
return rec.Body.String()
}
18 changes: 17 additions & 1 deletion central/metrics/custom_registry.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,12 +45,13 @@ type customRegistry struct {

var (
userRegistries map[string]*customRegistry = make(map[string]*customRegistry)
globalRegistry *customRegistry
registriesMux sync.Mutex
ErrTooMany = errox.ResourceExhausted.New("too many custom registries")
)

// GetCustomRegistry is a CustomRegistry factory that returns the existing or
// a new registry for the user.
// a new registry for the user. This is used for scoped (per-user) metrics.
func GetCustomRegistry(userID string) (CustomRegistry, error) {
registriesMux.Lock()
defer registriesMux.Unlock()
Expand All @@ -68,6 +69,21 @@ func GetCustomRegistry(userID string) (CustomRegistry, error) {
return registry, nil
}

// GetGlobalRegistry returns the shared global registry for non-scoped metrics.
// This registry is shared across all users and contains metrics that are not
// user-specific (e.g., API request counters).
func GetGlobalRegistry() (CustomRegistry, error) {
registriesMux.Lock()
defer registriesMux.Unlock()
if globalRegistry == nil {
globalRegistry = &customRegistry{
Registry: prometheus.NewRegistry(),
}
globalRegistry.Handler = promhttp.HandlerFor(globalRegistry, promhttp.HandlerOpts{})
}
return globalRegistry, nil
}

// DeleteCustomRegistry unregisters all metrics and deletes a registry for the
// given userID.
func DeleteCustomRegistry(userID string) {
Expand Down
Loading
Loading