🪣 Token Bucket: Most Popular
Token bucket allows bursts, smooths to average rate. Most widely used algorithm.
Without rate limiting:
With rate limiting:
Bucket holds tokens. Tokens refill at fixed rate. Request consumes token.
Characteristics:
Algorithm:
capacity tokensrefill_rate per second1import time2from threading import Lock3from typing import Optional4
5class TokenBucket:6 """Token bucket rate limiter"""7
8 def __init__(self, capacity: int, refill_rate: float):9 """10 Args:11 capacity: Maximum tokens in bucket12 refill_rate: Tokens added per second13 """14 self.capacity = capacity15 self.refill_rate = refill_rate16 self.tokens = capacity17 self.last_refill = time.time()18 self.lock = Lock()19
20 def acquire(self, tokens: int = 1) -> bool:21 """Try to acquire tokens. Returns True if successful."""22 with self.lock:23 self._refill()24
25 if self.tokens >= tokens:26 self.tokens -= tokens27 return True28 else:29 return False30
31 def _refill(self):32 """Refill tokens based on elapsed time"""33 now = time.time()34 elapsed = now - self.last_refill35
36 # Calculate tokens to add37 tokens_to_add = elapsed * self.refill_rate38
39 # Add tokens (don't exceed capacity)40 self.tokens = min(self.capacity, self.tokens + tokens_to_add)41 self.last_refill = now42
43 def get_available_tokens(self) -> int:44 """Get current number of available tokens"""45 with self.lock:46 self._refill()47 return int(self.tokens)48
49# Usage50rate_limiter = TokenBucket(capacity=10, refill_rate=2.0) # 10 tokens, refill 2/sec51
52def handle_request():53 if rate_limiter.acquire():54 # Process request55 return "Request processed"56 else:57 return "Rate limit exceeded", 4291import java.util.concurrent.locks.ReentrantLock;2
3public class TokenBucket {4 private final int capacity;5 private final double refillRate;6 private double tokens;7 private long lastRefill;8 private final ReentrantLock lock = new ReentrantLock();9
10 public TokenBucket(int capacity, double refillRate) {11 this.capacity = capacity;12 this.refillRate = refillRate;13 this.tokens = capacity;14 this.lastRefill = System.currentTimeMillis();15 }16
17 public boolean acquire(int tokensToConsume) {18 lock.lock();19 try {20 refill();21
22 if (tokens >= tokensToConsume) {23 tokens -= tokensToConsume;24 return true;25 } else {26 return false;27 }28 } finally {29 lock.unlock();30 }31 }32
33 private void refill() {34 long now = System.currentTimeMillis();35 double elapsed = (now - lastRefill) / 1000.0; // Convert to seconds36
37 // Calculate tokens to add38 double tokensToAdd = elapsed * refillRate;39
40 // Add tokens (don't exceed capacity)41 tokens = Math.min(capacity, tokens + tokensToAdd);42 lastRefill = now;43 }44
45 public int getAvailableTokens() {46 lock.lock();47 try {48 refill();49 return (int) tokens;50 } finally {51 lock.unlock();52 }53 }54}55
56// Usage57TokenBucket rateLimiter = new TokenBucket(10, 2.0); // 10 tokens, refill 2/sec58
59public String handleRequest() {60 if (rateLimiter.acquire(1)) {61 // Process request62 return "Request processed";63 } else {64 return "Rate limit exceeded"; // Return 42965 }66}Bucket holds requests. Requests leak out at fixed rate. If full, reject.
Characteristics:
Algorithm:
capacity (queue size)leak_rate per second1import time2import queue3from threading import Lock, Thread4from typing import Optional5
6class LeakyBucket:7 """Leaky bucket rate limiter"""8
9 def __init__(self, capacity: int, leak_rate: float):10 """11 Args:12 capacity: Maximum requests in bucket13 leak_rate: Requests processed per second14 """15 self.capacity = capacity16 self.leak_rate = leak_rate17 self.bucket = queue.Queue(maxsize=capacity)18 self.lock = Lock()19 self.processing = False20
21 def start_processing(self):22 """Start processing requests"""23 if not self.processing:24 self.processing = True25 Thread(target=self._process_requests, daemon=True).start()26
27 def add_request(self, request) -> bool:28 """Try to add request to bucket. Returns True if successful."""29 try:30 self.bucket.put_nowait(request)31 return True32 except queue.Full:33 return False34
35 def _process_requests(self):36 """Process requests at leak rate"""37 interval = 1.0 / self.leak_rate # Time between requests38
39 while self.processing:40 try:41 request = self.bucket.get(timeout=interval)42 # Process request43 self._handle_request(request)44 except queue.Empty:45 continue46
47 def _handle_request(self, request):48 """Handle processed request"""49 # Override in subclass or pass handler50 pass1import java.util.concurrent.BlockingQueue;2import java.util.concurrent.LinkedBlockingQueue;3import java.util.concurrent.Executors;4import java.util.concurrent.ScheduledExecutorService;5import java.util.concurrent.TimeUnit;6
7public class LeakyBucket {8 private final int capacity;9 private final double leakRate;10 private final BlockingQueue<Request> bucket;11 private final ScheduledExecutorService scheduler;12
13 public LeakyBucket(int capacity, double leakRate) {14 this.capacity = capacity;15 this.leakRate = leakRate;16 this.bucket = new LinkedBlockingQueue<>(capacity);17 this.scheduler = Executors.newScheduledThreadPool(1);18
19 // Start processing20 startProcessing();21 }22
23 public boolean addRequest(Request request) {24 // Try to add request to bucket25 return bucket.offer(request);26 }27
28 private void startProcessing() {29 long interval = (long) (1000 / leakRate); // Milliseconds between requests30
31 scheduler.scheduleAtFixedRate(() -> {32 Request request = bucket.poll();33 if (request != null) {34 handleRequest(request);35 }36 }, 0, interval, TimeUnit.MILLISECONDS);37 }38
39 private void handleRequest(Request request) {40 // Process request41 }42}Tracks requests in sliding time window. More accurate than fixed window.
Characteristics:
1import time2from collections import deque3from threading import Lock4
5class SlidingWindowRateLimiter:6 """Sliding window rate limiter"""7
8 def __init__(self, limit: int, window_seconds: int):9 """10 Args:11 limit: Maximum requests allowed12 window_seconds: Time window in seconds13 """14 self.limit = limit15 self.window_seconds = window_seconds16 self.requests = deque() # Store request timestamps17 self.lock = Lock()18
19 def is_allowed(self) -> bool:20 """Check if request is allowed"""21 with self.lock:22 now = time.time()23
24 # Remove old requests outside window25 while self.requests and self.requests[0] < now - self.window_seconds:26 self.requests.popleft()27
28 # Check if under limit29 if len(self.requests) < self.limit:30 self.requests.append(now)31 return True32 else:33 return False34
35 def get_remaining_requests(self) -> int:36 """Get remaining requests in current window"""37 with self.lock:38 now = time.time()39
40 # Remove old requests41 while self.requests and self.requests[0] < now - self.window_seconds:42 self.requests.popleft()43
44 return max(0, self.limit - len(self.requests))1import java.util.concurrent.ConcurrentLinkedDeque;2import java.util.concurrent.locks.ReentrantLock;3
4public class SlidingWindowRateLimiter {5 private final int limit;6 private final long windowMillis;7 private final ConcurrentLinkedDeque<Long> requests;8 private final ReentrantLock lock = new ReentrantLock();9
10 public SlidingWindowRateLimiter(int limit, int windowSeconds) {11 this.limit = limit;12 this.windowMillis = windowSeconds * 1000L;13 this.requests = new ConcurrentLinkedDeque<>();14 }15
16 public boolean isAllowed() {17 lock.lock();18 try {19 long now = System.currentTimeMillis();20
21 // Remove old requests outside window22 while (!requests.isEmpty() && requests.peekFirst() < now - windowMillis) {23 requests.pollFirst();24 }25
26 // Check if under limit27 if (requests.size() < limit) {28 requests.addLast(now);29 return true;30 } else {31 return false;32 }33 } finally {34 lock.unlock();35 }36 }37
38 public int getRemainingRequests() {39 lock.lock();40 try {41 long now = System.currentTimeMillis();42
43 // Remove old requests44 while (!requests.isEmpty() && requests.peekFirst() < now - windowMillis) {45 requests.pollFirst();46 }47
48 return Math.max(0, limit - requests.size());49 } finally {50 lock.unlock();51 }52 }53}Divides time into fixed windows. Simple but allows bursts.
Characteristics:
1import time2from threading import Lock3
4class FixedWindowRateLimiter:5 """Fixed window rate limiter"""6
7 def __init__(self, limit: int, window_seconds: int):8 """9 Args:10 limit: Maximum requests per window11 window_seconds: Window size in seconds12 """13 self.limit = limit14 self.window_seconds = window_seconds15 self.count = 016 self.window_start = time.time()17 self.lock = Lock()18
19 def is_allowed(self) -> bool:20 """Check if request is allowed"""21 with self.lock:22 now = time.time()23
24 # Check if window expired25 if now - self.window_start >= self.window_seconds:26 # Reset window27 self.count = 028 self.window_start = now29
30 # Check limit31 if self.count < self.limit:32 self.count += 133 return True34 else:35 return False1import java.util.concurrent.atomic.AtomicInteger;2import java.util.concurrent.atomic.AtomicLong;3import java.util.concurrent.locks.ReentrantLock;4
5public class FixedWindowRateLimiter {6 private final int limit;7 private final long windowMillis;8 private final AtomicInteger count = new AtomicInteger(0);9 private final AtomicLong windowStart = new AtomicLong(System.currentTimeMillis());10 private final ReentrantLock lock = new ReentrantLock();11
12 public FixedWindowRateLimiter(int limit, int windowSeconds) {13 this.limit = limit;14 this.windowMillis = windowSeconds * 1000L;15 }16
17 public boolean isAllowed() {18 lock.lock();19 try {20 long now = System.currentTimeMillis();21
22 // Check if window expired23 if (now - windowStart.get() >= windowMillis) {24 // Reset window25 count.set(0);26 windowStart.set(now);27 }28
29 // Check limit30 if (count.get() < limit) {31 count.incrementAndGet();32 return true;33 } else {34 return false;35 }36 } finally {37 lock.unlock();38 }39 }40}For multiple servers, use Redis:
1import redis2import time3
4class DistributedTokenBucket:5 """Distributed token bucket using Redis"""6
7 def __init__(self, redis_client: redis.Redis, key_prefix: str,8 capacity: int, refill_rate: float):9 self.redis = redis_client10 self.key_prefix = key_prefix11 self.capacity = capacity12 self.refill_rate = refill_rate13
14 def is_allowed(self, identifier: str) -> bool:15 """Check if request is allowed for identifier"""16 key = f"{self.key_prefix}:{identifier}"17 now = time.time()18
19 # Use Lua script for atomic operations20 lua_script = """21 local key = KEYS[1]22 local capacity = tonumber(ARGV[1])23 local refill_rate = tonumber(ARGV[2])24 local now = tonumber(ARGV[3])25
26 local bucket = redis.call('HMGET', key, 'tokens', 'last_refill')27 local tokens = tonumber(bucket[1]) or capacity28 local last_refill = tonumber(bucket[2]) or now29
30 -- Refill tokens31 local elapsed = now - last_refill32 local tokens_to_add = elapsed * refill_rate33 tokens = math.min(capacity, tokens + tokens_to_add)34
35 -- Check if can consume token36 if tokens >= 1 then37 tokens = tokens - 138 redis.call('HMSET', key, 'tokens', tokens, 'last_refill', now)39 redis.call('EXPIRE', key, 3600) -- Expire after 1 hour40 return 141 else42 redis.call('HMSET', key, 'tokens', tokens, 'last_refill', now)43 redis.call('EXPIRE', key, 3600)44 return 045 end46 """47
48 result = self.redis.eval(lua_script, 1, key,49 self.capacity, self.refill_rate, now)50 return bool(result)1import redis.clients.jedis.Jedis;2import redis.clients.jedis.JedisPool;3
4public class DistributedTokenBucket {5 private final JedisPool jedisPool;6 private final String keyPrefix;7 private final int capacity;8 private final double refillRate;9
10 private static final String LUA_SCRIPT =11 "local key = KEYS[1]\n" +12 "local capacity = tonumber(ARGV[1])\n" +13 "local refill_rate = tonumber(ARGV[2])\n" +14 "local now = tonumber(ARGV[3])\n" +15 "local bucket = redis.call('HMGET', key, 'tokens', 'last_refill')\n" +16 "local tokens = tonumber(bucket[1]) or capacity\n" +17 "local last_refill = tonumber(bucket[2]) or now\n" +18 "local elapsed = now - last_refill\n" +19 "local tokens_to_add = elapsed * refill_rate\n" +20 "tokens = math.min(capacity, tokens + tokens_to_add)\n" +21 "if tokens >= 1 then\n" +22 " tokens = tokens - 1\n" +23 " redis.call('HMSET', key, 'tokens', tokens, 'last_refill', now)\n" +24 " redis.call('EXPIRE', key, 3600)\n" +25 " return 1\n" +26 "else\n" +27 " redis.call('HMSET', key, 'tokens', tokens, 'last_refill', now)\n" +28 " redis.call('EXPIRE', key, 3600)\n" +29 " return 0\n" +30 "end";31
32 public boolean isAllowed(String identifier) {33 try (Jedis jedis = jedisPool.getResource()) {34 String key = keyPrefix + ":" + identifier;35 long now = System.currentTimeMillis() / 1000;36
37 Object result = jedis.eval(LUA_SCRIPT,38 Collections.singletonList(key),39 Arrays.asList(40 String.valueOf(capacity),41 String.valueOf(refillRate),42 String.valueOf(now)43 ));44
45 return ((Long) result) == 1L;46 }47 }48}1rate_limiter = TokenBucket(capacity=100, refill_rate=10)2client_ip = request.remote_addr3
4if not rate_limiter.is_allowed(client_ip):5 return "Rate limit exceeded", 4291user_id = get_user_id(request)2if not rate_limiter.is_allowed(f"user:{user_id}"):3 return "Rate limit exceeded", 4291api_key = request.headers.get('X-API-Key')2if not rate_limiter.is_allowed(f"key:{api_key}"):3 return "Rate limit exceeded", 4291# Different limits for different tiers2limits = {3 'free': TokenBucket(100, 10),4 'premium': TokenBucket(1000, 100),5 'enterprise': TokenBucket(10000, 1000),6}7
8tier = get_user_tier(user_id)9if not limits[tier].is_allowed(user_id):10 return "Rate limit exceeded", 429Inform clients about rate limits:
1HTTP/1.1 200 OK2X-RateLimit-Limit: 1003X-RateLimit-Remaining: 954X-RateLimit-Reset: 1640995200When limit exceeded:
1HTTP/1.1 429 Too Many Requests2X-RateLimit-Limit: 1003X-RateLimit-Remaining: 04X-RateLimit-Reset: 16409952005Retry-After: 60| Algorithm | Bursts | Accuracy | Memory | Complexity |
|---|---|---|---|---|
| Token Bucket | ✅ Yes | High | Low | Medium |
| Leaky Bucket | ❌ No | High | Medium | Medium |
| Sliding Window | ❌ No | Very High | High | High |
| Fixed Window | ✅ Yes | Medium | Low | Low |
Recommendation: Use Token Bucket for most cases. It’s simple, accurate, and allows bursts.
🪣 Token Bucket: Most Popular
Token bucket allows bursts, smooths to average rate. Most widely used algorithm.
📊 Sliding Window: Most Accurate
Sliding window is most accurate but uses more memory. Use when accuracy is critical.
🌐 Distributed: Use Redis
For multiple servers, use Redis with Lua scripts for atomic operations.
🔢 Return 429
When rate limit exceeded, return HTTP 429 with Retry-After header. Inform clients about limits.