open-lambda · Yashwanth-Ranjan-Singaravel · Mar 2, 2026 · Mar 3, 2026 · Mar 3, 2026 · Mar 10, 2026
diff --git a/go/common/config.go b/go/common/config.go
@@ -62,6 +62,11 @@ type Config struct {
 	// pass through to sandbox envirenment variable
 	Sandbox_config any `json:"sandbox_config"`
 
+	// KAFKA CACHE OPTIONS
+	Kafka_cache_size_mb          int `json:"kafka_cache_size_mb"`          // max message cache size in MB (default: 256)
+	Kafka_max_concurrent_fetches int `json:"kafka_max_concurrent_fetches"` // max simultaneous Kafka consumers (default: 10)
+	Kafka_prefetch_count         int `json:"kafka_prefetch_count"`         // messages to prefetch on cache miss (default: 5)
+
 	Docker          DockerConfig   `json:"docker"`
 	Limits          LimitsConfig   `json:"limits"`
 	InstallerLimits LimitsConfig   `json:"installer_limits"` // limits profile for installers
@@ -291,9 +296,12 @@ func getDefaultConfigForPatching(olPath string) (*Config, error) {
 		Pkgs_dir:          packagesDir,
 		Sandbox_config:    map[string]any{},
 		SOCK_base_path:    baseImgDir,
-		Registry_cache_ms: 5000, // 5 seconds
-		Mem_pool_mb:       memPoolMb,
-		Import_cache_tree: zygoteTreePath,
+		Registry_cache_ms:          5000, // 5 seconds
+		Mem_pool_mb:                memPoolMb,
+		Import_cache_tree:          zygoteTreePath,
+		Kafka_cache_size_mb:          256,
+		Kafka_max_concurrent_fetches: 10,
+		Kafka_prefetch_count:         5,
 		Docker: DockerConfig{
 			Base_image: "ol-min",
 		},

diff --git a/go/worker/event/kafkaFetcher.go b/go/worker/event/kafkaFetcher.go
@@ -0,0 +1,145 @@
+package event
+
+import (
+	"context"
+	"fmt"
+	"log/slog"
+	"time"
+
+	"github.com/twmb/franz-go/pkg/kgo"
+)
+
+// KafkaFetcher retrieves Kafka messages, using a shared MessageCache as a
+// read-through layer. On cache hit the message is returned immediately.
+// On miss it prefetches multiple messages from Kafka, caches them all,
+// and returns the requested one.
+type KafkaFetcher struct {
+	cache         *MessageCache
+	sem           chan struct{}
+	prefetchCount int
+}
+
+// NewKafkaFetcher creates a KafkaFetcher backed by the given cache.
+func NewKafkaFetcher(cache *MessageCache, maxConcurrent int, prefetchCount int) *KafkaFetcher {
+	if maxConcurrent <= 0 {
+		maxConcurrent = 10
+	}
+	if prefetchCount <= 0 {
+		prefetchCount = 5
+	}
+	return &KafkaFetcher{
+		cache:         cache,
+		sem:           make(chan struct{}, maxConcurrent),
+		prefetchCount: prefetchCount,
+	}
+}
+
+// Get returns the message at the given topic/partition/offset.
+// It checks the cache first; on miss it fetches prefetchCount messages
+// starting at offset, caches them all, and returns the requested one.
+// Returns nil if no message is available.
+func (kf *KafkaFetcher) Get(ctx context.Context, brokers []string, topic string, partition int32, offset int64) (*CachedMessage, error) {
+	key := CacheKey{Topic: topic, Partition: partition, Offset: offset}
+
+	if msg, hit := kf.cache.Get(key); hit {
+		return msg, nil
+	}
+
+	// Cache miss — prefetch from Kafka
+	records, err := kf.fetchFromKafka(ctx, brokers, topic, partition, offset, kf.prefetchCount)
+	if err != nil {
+		return nil, err
+	}
+
+	// Cache all fetched records
+	var result *CachedMessage
+	for _, r := range records {
+		headers := make(map[string]string)
+		for _, h := range r.Headers {
+			headers[h.Key] = string(h.Value)
+		}
+		msg := &CachedMessage{
+			Key:       r.Key,
+			Value:     r.Value,
+			Headers:   headers,
+			Timestamp: r.Timestamp,
+			size:      int64(len(r.Key) + len(r.Value) + 64),
+		}
+		kf.cache.Put(CacheKey{
+			Topic:     r.Topic,
+			Partition: r.Partition,
+			Offset:    r.Offset,
+		}, msg)
+
+		if r.Offset == offset {
+			result = msg
+		}
+	}
+
+	return result, nil
+}
+
+// fetchFromKafka creates a short-lived consumer, reads up to count records
+// starting at offset, and closes the consumer. Blocks if the semaphore is full.
+func (kf *KafkaFetcher) fetchFromKafka(ctx context.Context, brokers []string, topic string, partition int32, offset int64, count int) ([]*kgo.Record, error) {
+	select {
+	case kf.sem <- struct{}{}:
+	case <-ctx.Done():
+		return nil, ctx.Err()
+	}
+	defer func() { <-kf.sem }()
+
+	client, err := kgo.NewClient(
+		kgo.SeedBrokers(brokers...),
+		kgo.ConsumePartitions(map[string]map[int32]kgo.Offset{
+			topic: {
+				partition: kgo.NewOffset().At(offset),
+			},
+		}),
+	)
+	if err != nil {
+		return nil, fmt.Errorf("failed to create consumer for %s partition %d: %w", topic, partition, err)
+	}
+	defer client.Close()
+
+	var records []*kgo.Record
+	deadline := time.After(10 * time.Second)
+
+	for len(records) < count {
+		pollCtx, cancel := context.WithTimeout(ctx, 2*time.Second)
+		fetches := client.PollFetches(pollCtx)
+		cancel()
+
+		if errs := fetches.Errors(); len(errs) > 0 {
+			for _, fe := range errs {
+				if fe.Err == context.DeadlineExceeded {
+					continue
+				}
+				slog.Warn("KafkaFetcher fetch error",
+					"topic", topic,
+					"partition", partition,
+					"error", fe.Err)
+			}
+		}
+
+		fetches.EachRecord(func(r *kgo.Record) {
+			if r.Partition == partition && r.Offset >= offset {
+				records = append(records, r)
+			}
+		})
+
+		if len(records) >= count {
+			break
+		}
+
+		select {
+		case <-deadline:
+			return records, nil
+		case <-ctx.Done():
+			return records, ctx.Err()
+		default:
+		}
+	}
+
+	return records, nil
+}
diff --git a/go/worker/event/kafkaFetcher_test.go b/go/worker/event/kafkaFetcher_test.go
@@ -0,0 +1,90 @@
+package event
+
+import (
+	"context"
+	"testing"
+	"time"
+)
+
+func newTestFetcher(maxConcurrent int) *KafkaFetcher {
+	cache := NewMessageCache(1024 * 1024)
+	return NewKafkaFetcher(cache, maxConcurrent, 5)
+}
+
+func TestNewKafkaFetcher_DefaultConcurrency(t *testing.T) {
+	kf := newTestFetcher(0)
+	if cap(kf.sem) != 10 {
+		t.Fatalf("expected default capacity 10, got %d", cap(kf.sem))
+	}
+}
+
+func TestNewKafkaFetcher_CustomConcurrency(t *testing.T) {
+	kf := newTestFetcher(5)
+	if cap(kf.sem) != 5 {
+		t.Fatalf("expected capacity 5, got %d", cap(kf.sem))
+	}
+}
+
+func TestKafkaFetcher_CacheHit(t *testing.T) {
+	cache := NewMessageCache(1024 * 1024)
+	kf := NewKafkaFetcher(cache, 1, 5)
+
+	// Pre-populate the cache
+	key := CacheKey{Topic: "t", Partition: 0, Offset: 42}
+	cache.Put(key, &CachedMessage{
+		Value: []byte("cached-value"),
+		size:  100,
+	})
+
+	// Get should return the cached message without hitting Kafka
+	msg, err := kf.Get(context.Background(), []string{"localhost:9092"}, "t", 0, 42)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if msg == nil {
+		t.Fatal("expected cache hit, got nil")
+	}
+	if string(msg.Value) != "cached-value" {
+		t.Fatalf("expected 'cached-value', got %q", string(msg.Value))
+	}
+}
+
+func TestKafkaFetcher_SemaphoreBlocksAtCapacity(t *testing.T) {
+	kf := newTestFetcher(1)
+
+	// Fill the semaphore slot
+	kf.sem <- struct{}{}
+
+	ctx, cancel := context.WithTimeout(context.Background(), 50*time.Millisecond)
+	defer cancel()
+
+	// Get should block on semaphore and then fail with context deadline
+	_, err := kf.Get(ctx, []string{"localhost:9092"}, "test-topic", 0, 0)
+	if err == nil {
+		t.Fatal("expected error when semaphore is full and context expires")
+	}
+	if err != context.DeadlineExceeded {
+		t.Fatalf("expected DeadlineExceeded, got %v", err)
+	}
+
+	// Release the slot
+	<-kf.sem
+}
+
+func TestKafkaFetcher_SemaphoreReleasedAfterFetch(t *testing.T) {
+	kf := newTestFetcher(1)
+
+	// Get will fail (no real broker) but should still release the semaphore slot
+	ctx, cancel := context.WithTimeout(context.Background(), 100*time.Millisecond)
+	defer cancel()
+
+	kf.Get(ctx, []string{"localhost:19092"}, "nonexistent", 0, 0)
+
+	// Verify the semaphore slot was released by acquiring it without blocking
+	select {
+	case kf.sem <- struct{}{}:
+		<-kf.sem
+	default:
+		t.Fatal("semaphore slot was not released after Get")
+	}
+}