Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 11 additions & 3 deletions go/common/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,11 @@ type Config struct {
// pass through to sandbox envirenment variable
Sandbox_config any `json:"sandbox_config"`

// KAFKA CACHE OPTIONS
Kafka_cache_size_mb int `json:"kafka_cache_size_mb"` // max message cache size in MB (default: 256)
Kafka_max_concurrent_fetches int `json:"kafka_max_concurrent_fetches"` // max simultaneous Kafka consumers (default: 10)
Kafka_prefetch_count int `json:"kafka_prefetch_count"` // messages to prefetch on cache miss (default: 5)

Docker DockerConfig `json:"docker"`
Limits LimitsConfig `json:"limits"`
InstallerLimits LimitsConfig `json:"installer_limits"` // limits profile for installers
Expand Down Expand Up @@ -291,9 +296,12 @@ func getDefaultConfigForPatching(olPath string) (*Config, error) {
Pkgs_dir: packagesDir,
Sandbox_config: map[string]any{},
SOCK_base_path: baseImgDir,
Registry_cache_ms: 5000, // 5 seconds
Mem_pool_mb: memPoolMb,
Import_cache_tree: zygoteTreePath,
Registry_cache_ms: 5000, // 5 seconds
Mem_pool_mb: memPoolMb,
Import_cache_tree: zygoteTreePath,
Kafka_cache_size_mb: 256,
Kafka_max_concurrent_fetches: 10,
Kafka_prefetch_count: 5,
Docker: DockerConfig{
Base_image: "ol-min",
},
Expand Down
145 changes: 145 additions & 0 deletions go/worker/event/kafkaFetcher.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
package event

import (
"context"
"fmt"
"log/slog"
"time"

"github.com/twmb/franz-go/pkg/kgo"
)

// KafkaFetcher retrieves Kafka messages, using a shared MessageCache as a
// read-through layer. On cache hit the message is returned immediately.
// On miss it prefetches multiple messages from Kafka, caches them all,
// and returns the requested one.
type KafkaFetcher struct {
cache *MessageCache
sem chan struct{}
prefetchCount int
}

// NewKafkaFetcher creates a KafkaFetcher backed by the given cache.
func NewKafkaFetcher(cache *MessageCache, maxConcurrent int, prefetchCount int) *KafkaFetcher {
if maxConcurrent <= 0 {
maxConcurrent = 10
}
if prefetchCount <= 0 {
prefetchCount = 5
}
return &KafkaFetcher{
cache: cache,
sem: make(chan struct{}, maxConcurrent),
prefetchCount: prefetchCount,
}
}

// Get returns the message at the given topic/partition/offset.
// It checks the cache first; on miss it fetches prefetchCount messages
// starting at offset, caches them all, and returns the requested one.
// Returns nil if no message is available.
func (kf *KafkaFetcher) Get(ctx context.Context, brokers []string, topic string, partition int32, offset int64) (*CachedMessage, error) {
key := CacheKey{Topic: topic, Partition: partition, Offset: offset}

if msg, hit := kf.cache.Get(key); hit {
return msg, nil
}

// Cache miss — prefetch from Kafka
records, err := kf.fetchFromKafka(ctx, brokers, topic, partition, offset, kf.prefetchCount)
if err != nil {
return nil, err
}

// Cache all fetched records
var result *CachedMessage
for _, r := range records {
headers := make(map[string]string)
for _, h := range r.Headers {
headers[h.Key] = string(h.Value)
}
msg := &CachedMessage{
Key: r.Key,
Value: r.Value,
Headers: headers,
Timestamp: r.Timestamp,
size: int64(len(r.Key) + len(r.Value) + 64),
}
kf.cache.Put(CacheKey{
Topic: r.Topic,
Partition: r.Partition,
Offset: r.Offset,
}, msg)

if r.Offset == offset {
result = msg
}
}

return result, nil
}

// fetchFromKafka creates a short-lived consumer, reads up to count records
// starting at offset, and closes the consumer. Blocks if the semaphore is full.
func (kf *KafkaFetcher) fetchFromKafka(ctx context.Context, brokers []string, topic string, partition int32, offset int64, count int) ([]*kgo.Record, error) {
select {
case kf.sem <- struct{}{}:
case <-ctx.Done():
return nil, ctx.Err()
}
defer func() { <-kf.sem }()

client, err := kgo.NewClient(
kgo.SeedBrokers(brokers...),
kgo.ConsumePartitions(map[string]map[int32]kgo.Offset{
topic: {
partition: kgo.NewOffset().At(offset),
},
}),
)
if err != nil {
return nil, fmt.Errorf("failed to create consumer for %s partition %d: %w", topic, partition, err)
}
defer client.Close()

var records []*kgo.Record
deadline := time.After(10 * time.Second)

for len(records) < count {
pollCtx, cancel := context.WithTimeout(ctx, 2*time.Second)
fetches := client.PollFetches(pollCtx)
cancel()

if errs := fetches.Errors(); len(errs) > 0 {
for _, fe := range errs {
if fe.Err == context.DeadlineExceeded {
continue
}
slog.Warn("KafkaFetcher fetch error",
"topic", topic,
"partition", partition,
"error", fe.Err)
}
}

fetches.EachRecord(func(r *kgo.Record) {
if r.Partition == partition && r.Offset >= offset {
records = append(records, r)
}
})

if len(records) >= count {
break
}

select {
case <-deadline:
return records, nil
case <-ctx.Done():
return records, ctx.Err()
default:
}
}

return records, nil
}
90 changes: 90 additions & 0 deletions go/worker/event/kafkaFetcher_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
package event

import (
"context"
"testing"
"time"
)

func newTestFetcher(maxConcurrent int) *KafkaFetcher {
cache := NewMessageCache(1024 * 1024)
return NewKafkaFetcher(cache, maxConcurrent, 5)
}

func TestNewKafkaFetcher_DefaultConcurrency(t *testing.T) {
kf := newTestFetcher(0)
if cap(kf.sem) != 10 {
t.Fatalf("expected default capacity 10, got %d", cap(kf.sem))
}
}

func TestNewKafkaFetcher_CustomConcurrency(t *testing.T) {
kf := newTestFetcher(5)
if cap(kf.sem) != 5 {
t.Fatalf("expected capacity 5, got %d", cap(kf.sem))
}
}

func TestKafkaFetcher_CacheHit(t *testing.T) {
cache := NewMessageCache(1024 * 1024)
kf := NewKafkaFetcher(cache, 1, 5)

// Pre-populate the cache
key := CacheKey{Topic: "t", Partition: 0, Offset: 42}
cache.Put(key, &CachedMessage{
Value: []byte("cached-value"),
size: 100,
})

// Get should return the cached message without hitting Kafka
msg, err := kf.Get(context.Background(), []string{"localhost:9092"}, "t", 0, 42)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if msg == nil {
t.Fatal("expected cache hit, got nil")
}
if string(msg.Value) != "cached-value" {
t.Fatalf("expected 'cached-value', got %q", string(msg.Value))
}
}

func TestKafkaFetcher_SemaphoreBlocksAtCapacity(t *testing.T) {
kf := newTestFetcher(1)

// Fill the semaphore slot
kf.sem <- struct{}{}

ctx, cancel := context.WithTimeout(context.Background(), 50*time.Millisecond)
defer cancel()

// Get should block on semaphore and then fail with context deadline
_, err := kf.Get(ctx, []string{"localhost:9092"}, "test-topic", 0, 0)
if err == nil {
t.Fatal("expected error when semaphore is full and context expires")
}
if err != context.DeadlineExceeded {
t.Fatalf("expected DeadlineExceeded, got %v", err)
}

// Release the slot
<-kf.sem
}

func TestKafkaFetcher_SemaphoreReleasedAfterFetch(t *testing.T) {
kf := newTestFetcher(1)

// Get will fail (no real broker) but should still release the semaphore slot
ctx, cancel := context.WithTimeout(context.Background(), 100*time.Millisecond)
defer cancel()

kf.Get(ctx, []string{"localhost:19092"}, "nonexistent", 0, 0)

// Verify the semaphore slot was released by acquiring it without blocking
select {
case kf.sem <- struct{}{}:
<-kf.sem
default:
t.Fatal("semaphore slot was not released after Get")
}
}
Loading
Loading