🔥 Hardest Problem
Cache invalidation is famously difficult. Choose strategy based on consistency needs.
“There are only two hard things in Computer Science: cache invalidation and naming things.”
— Phil Karlton
Cache invalidation is deciding when and how to remove or update cached data when the underlying data changes.
Why is it hard?
Stale data = cached data that doesn’t match the database anymore.
Impact:
Update cache when data is written. Cache and database stay in sync.
How it works:
When to use:
Trade-offs:
1class WriteThroughCache:2 def __init__(self, cache: CacheClient, db: Database):3 self.cache = cache4 self.db = db5
6 def update_user(self, user_id: int, data: dict):7 # Write to cache8 cache_key = f"user:{user_id}"9 self.cache.set(cache_key, data, ttl=3600)10
11 # Write to database12 self.db.update_user(user_id, data)13
14 # Both complete - return success15 return True1class WriteThroughCache {2 private final CacheClient cache;3 private final Database db;4
5 public WriteThroughCache(CacheClient cache, Database db) {6 this.cache = cache;7 this.db = db;8 }9
10 public boolean updateUser(int userId, UserData data) {11 // Write to cache12 String cacheKey = "user:" + userId;13 cache.set(cacheKey, serialize(data), 3600);14
15 // Write to database16 db.updateUser(userId, data);17
18 // Both complete - return success19 return true;20 }21}Delete cache on write. Next read fetches fresh data.
How it works:
When to use:
Trade-offs:
1class WriteInvalidateCache:2 def __init__(self, cache: CacheClient, db: Database):3 self.cache = cache4 self.db = db5
6 def update_user(self, user_id: int, data: dict):7 # Update database8 self.db.update_user(user_id, data)9
10 # Invalidate cache11 cache_key = f"user:{user_id}"12 self.cache.delete(cache_key)13
14 return True15
16 def get_user(self, user_id: int):17 # Cache-aside pattern18 cache_key = f"user:{user_id}"19 cached = self.cache.get(cache_key)20
21 if cached:22 return cached23
24 # Cache miss - fetch from DB25 user = self.db.get_user(user_id)26
27 # Cache it28 if user:29 self.cache.set(cache_key, user, ttl=3600)30
31 return user1class WriteInvalidateCache {2 private final CacheClient cache;3 private final Database db;4
5 public WriteInvalidateCache(CacheClient cache, Database db) {6 this.cache = cache;7 this.db = db;8 }9
10 public boolean updateUser(int userId, UserData data) {11 // Update database12 db.updateUser(userId, data);13
14 // Invalidate cache15 String cacheKey = "user:" + userId;16 cache.delete(cacheKey);17
18 return true;19 }20
21 public Optional<User> getUser(int userId) {22 // Cache-aside pattern23 String cacheKey = "user:" + userId;24 Optional<String> cached = cache.get(cacheKey);25
26 if (cached.isPresent()) {27 return Optional.of(deserialize(cached.get()));28 }29
30 // Cache miss - fetch from DB31 Optional<User> user = db.getUser(userId);32
33 // Cache it34 user.ifPresent(u -> cache.set(cacheKey, serialize(u), 3600));35
36 return user;37 }38}Time-based expiration. Items expire after fixed time.
How it works:
When to use:
Trade-offs:
Invalidate on data change events. Most sophisticated approach.
How it works:
When to use:
Trade-offs:
Cache stampede (thundering herd) happens when cache expires and many requests simultaneously try to fetch from database.
Impact:
Only one request fetches; others wait.
How it works:
1import redis2import time3import uuid4
5class CacheWithLock:6 def __init__(self, redis_client: redis.Redis):7 self.redis = redis_client8 self.lock_ttl = 10 # seconds9
10 def get_with_lock(self, key: str, fetch_func):11 # Try cache first12 cached = self.redis.get(key)13 if cached:14 return cached15
16 # Cache miss - try to acquire lock17 lock_key = f"lock:{key}"18 lock_value = str(uuid.uuid4())19
20 # Try to acquire lock (set if not exists)21 acquired = self.redis.set(22 lock_key, lock_value,23 nx=True, ex=self.lock_ttl24 )25
26 if acquired:27 # We got the lock - fetch from DB28 try:29 data = fetch_func()30 self.redis.set(key, data, ex=300)31 return data32 finally:33 # Release lock (only if we still own it)34 if self.redis.get(lock_key) == lock_value:35 self.redis.delete(lock_key)36 else:37 # Someone else has lock - wait and retry38 time.sleep(0.1)39 # Retry cache (winner should have cached it)40 return self.redis.get(key) or fetch_func()1import redis.clients.jedis.Jedis;2import java.util.UUID;3
4class CacheWithLock {5 private final Jedis redis;6 private final int lockTtl = 10;7
8 public CacheWithLock(Jedis redis) {9 this.redis = redis;10 }11
12 public String getWithLock(String key, Supplier<String> fetchFunc) {13 // Try cache first14 String cached = redis.get(key);15 if (cached != null) {16 return cached;17 }18
19 // Cache miss - try to acquire lock20 String lockKey = "lock:" + key;21 String lockValue = UUID.randomUUID().toString();22
23 // Try to acquire lock (set if not exists)24 boolean acquired = "OK".equals(25 redis.set(lockKey, lockValue, "NX", "EX", lockTtl)26 );27
28 if (acquired) {29 // We got the lock - fetch from DB30 try {31 String data = fetchFunc.get();32 redis.setex(key, 300, data);33 return data;34 } finally {35 // Release lock (only if we still own it)36 if (lockValue.equals(redis.get(lockKey))) {37 redis.del(lockKey);38 }39 }40 } else {41 // Someone else has lock - wait and retry42 try {43 Thread.sleep(100);44 } catch (InterruptedException e) {45 Thread.currentThread().interrupt();46 }47 // Retry cache (winner should have cached it)48 String retry = redis.get(key);49 return retry != null ? retry : fetchFunc.get();50 }51 }52}Refresh cache randomly before expiration. Spreads load over time.
How it works:
1import random2import time3
4class ProbabilisticRefreshCache:5 def __init__(self, cache: CacheClient, db: Database):6 self.cache = cache7 self.db = db8
9 def get_with_refresh(self, key: str, ttl: int = 300):10 cached_data = self.cache.get(key)11
12 if cached_data:13 # Check if we should refresh (last 10% of TTL)14 # This is simplified - in practice, store timestamp15 if random.random() < 0.1: # 10% chance16 # Refresh in background17 self._refresh_async(key, ttl)18
19 return cached_data20
21 # Cache miss - fetch and cache22 data = self.db.fetch(key)23 self.cache.set(key, data, ttl=ttl)24 return data25
26 def _refresh_async(self, key: str, ttl: int):27 # Background refresh (simplified)28 data = self.db.fetch(key)29 self.cache.set(key, data, ttl=ttl)1import java.util.Random;2
3class ProbabilisticRefreshCache {4 private final CacheClient cache;5 private final Database db;6 private final Random random = new Random();7
8 public ProbabilisticRefreshCache(CacheClient cache, Database db) {9 this.cache = cache;10 this.db = db;11 }12
13 public String getWithRefresh(String key, int ttl) {14 String cached = cache.get(key);15
16 if (cached != null) {17 // Check if we should refresh (last 10% of TTL)18 if (random.nextDouble() < 0.1) { // 10% chance19 // Refresh in background20 refreshAsync(key, ttl);21 }22
23 return cached;24 }25
26 // Cache miss - fetch and cache27 String data = db.fetch(key);28 cache.set(key, data, ttl);29 return data;30 }31
32 private void refreshAsync(String key, int ttl) {33 // Background refresh (simplified)34 String data = db.fetch(key);35 cache.set(key, data, ttl);36 }37}| Strategy | Consistency | Complexity | Latency | Use Case |
|---|---|---|---|---|
| Write-Through | Strong | Low | High (write) | Critical data |
| Write-Invalidate | Eventual | Low | Low (write) | General purpose |
| TTL | Eventual | Low | Low | Read-heavy |
| Event-Driven | Strong | High | Low | Complex systems |
🔥 Hardest Problem
Cache invalidation is famously difficult. Choose strategy based on consistency needs.
⚡ Write-Invalidate
Most common: delete cache on write. Next read fetches fresh. Simple and effective.
🐘 Prevent Stampede
Use distributed locks or probabilistic refresh to prevent cache stampede.
🔄 Event-Driven
For complex systems, use event-driven invalidation for precise control.