6 changed files with 3493 additions and 115 deletions
@ -0,0 +1,315 @@
@@ -0,0 +1,315 @@
|
||||
from typing import NamedTuple, Dict, Tuple |
||||
from enum import Enum |
||||
|
||||
|
||||
class LoadBalancerSpec(NamedTuple): |
||||
capacity: float # e.g. float('inf') |
||||
baseLatency: int # ms |
||||
cost: int |
||||
|
||||
|
||||
class WebServerSmall(NamedTuple): |
||||
capacity: int |
||||
baseLatency: int |
||||
penaltyPerRPS: float |
||||
cost: int |
||||
|
||||
|
||||
class WebServerMedium(NamedTuple): |
||||
capacity: int |
||||
baseLatency: int |
||||
penaltyPerRPS: float |
||||
cost: int |
||||
|
||||
|
||||
class CacheStandard(NamedTuple): |
||||
capacity: int |
||||
baseLatency: int |
||||
penaltyPer10RPS: float |
||||
hitRates: Dict[str, float] |
||||
cost: int |
||||
|
||||
|
||||
class CacheLarge(NamedTuple): |
||||
capacity: int |
||||
baseLatency: int |
||||
penaltyPer10RPS: float |
||||
hitRates: Dict[str, float] |
||||
cost: int |
||||
|
||||
|
||||
class DbReadReplica(NamedTuple): |
||||
readCapacity: int # RPS |
||||
baseReadLatency: int # ms |
||||
penaltyPer10RPS: float |
||||
cost: int |
||||
|
||||
|
||||
class ComponentSpec(NamedTuple): |
||||
loadBalancer: LoadBalancerSpec |
||||
webServerSmall: WebServerSmall |
||||
webServerMedium: WebServerMedium |
||||
cacheStandard: CacheStandard |
||||
cacheLarge: CacheLarge |
||||
dbReadReplica: DbReadReplica |
||||
|
||||
|
||||
class Design(NamedTuple): |
||||
numWebServerSmall: int |
||||
numWebServerMedium: int |
||||
cacheType: str # Either "cacheStandard" or "cacheLarge" |
||||
cacheTTL: str |
||||
numDbReplicas: int |
||||
promotionDelaySeconds: int |
||||
|
||||
|
||||
class Level(NamedTuple): |
||||
id: int |
||||
description: str |
||||
targetRPS: int |
||||
maxP95Latency: int |
||||
maxMonthlyCost: int |
||||
requiredAvailability: int |
||||
failureEvents: list |
||||
componentSpec: ComponentSpec |
||||
simulatedDurationSeconds: int |
||||
|
||||
|
||||
class CacheType(Enum): |
||||
STANDARD = "cacheStandard" |
||||
LARGE = "cacheLarge" |
||||
|
||||
|
||||
class LevelSimulator: |
||||
def __init__(self, level: Level, design: Design): |
||||
self.level = level |
||||
self.design = design |
||||
self.specs = self.level.componentSpec |
||||
|
||||
def compute_cost(self) -> int: |
||||
s = self.specs |
||||
d = self.design |
||||
|
||||
cost_lb = s.loadBalancer.cost |
||||
cost_ws_small = d.numWebServerSmall * s.webServerSmall.cost |
||||
cost_ws_medium = d.numWebServerMedium * s.webServerMedium.cost |
||||
|
||||
if d.cacheType == CacheType.STANDARD.value: |
||||
cost_cache = s.cacheStandard.cost |
||||
else: |
||||
cost_cache = s.cacheLarge.cost |
||||
|
||||
# “1” here stands for the master; add d.numDbReplicas for replicas |
||||
cost_db = s.dbReadReplica.cost * (1 + d.numDbReplicas) |
||||
|
||||
return cost_lb + cost_ws_small + cost_ws_medium + cost_cache + cost_db |
||||
|
||||
def compute_rps(self) -> Tuple[float, float]: |
||||
""" |
||||
Returns (hits_rps, misses_rps) for a read workload of size level.targetRPS. |
||||
""" |
||||
s = self.specs |
||||
d = self.design |
||||
|
||||
total_rps = self.level.targetRPS |
||||
|
||||
if d.cacheType == CacheType.STANDARD.value: |
||||
hit_rate = s.cacheStandard.hitRates[d.cacheTTL] |
||||
else: |
||||
hit_rate = s.cacheLarge.hitRates[d.cacheTTL] |
||||
|
||||
hits_rps = total_rps * hit_rate |
||||
misses_rps = total_rps * (1 - hit_rate) |
||||
return hits_rps, misses_rps |
||||
|
||||
def compute_latencies(self) -> Dict[str, float]: |
||||
""" |
||||
Computes: |
||||
- L95_ws (worst P95 among small/medium, given misses_rps) |
||||
- L95_cache (baseLatency) |
||||
- L95_db_read (based on misses_rps and replicas) |
||||
- L95_total_read = miss_path (since misses are slower) |
||||
""" |
||||
s = self.specs |
||||
d = self.design |
||||
|
||||
# 1) First compute hits/misses |
||||
_, misses_rps = self.compute_rps() |
||||
|
||||
# 2) Web server P95 |
||||
cap_small = s.webServerSmall.capacity |
||||
cap_medium = s.webServerMedium.capacity |
||||
|
||||
weighted_count = d.numWebServerSmall + (2 * d.numWebServerMedium) |
||||
|
||||
if weighted_count == 0: |
||||
L95_ws = float("inf") |
||||
else: |
||||
load_per_weighted = misses_rps / weighted_count |
||||
|
||||
L95_ws_small = 0.0 |
||||
if d.numWebServerSmall > 0: |
||||
if load_per_weighted <= cap_small: |
||||
L95_ws_small = s.webServerSmall.baseLatency |
||||
else: |
||||
L95_ws_small = ( |
||||
s.webServerSmall.baseLatency |
||||
+ s.webServerSmall.penaltyPerRPS |
||||
* (load_per_weighted - cap_small) |
||||
) |
||||
|
||||
L95_ws_medium = 0.0 |
||||
# <<== FIXED: change “> 00” to “> 0” |
||||
if d.numWebServerMedium > 0: |
||||
if load_per_weighted <= cap_medium: |
||||
L95_ws_medium = s.webServerMedium.baseLatency |
||||
else: |
||||
L95_ws_medium = ( |
||||
s.webServerMedium.baseLatency |
||||
+ s.webServerMedium.penaltyPerRPS |
||||
* (load_per_weighted - cap_medium) |
||||
) |
||||
|
||||
L95_ws = max(L95_ws_small, L95_ws_medium) |
||||
|
||||
# 3) Cache P95 |
||||
if d.cacheType == CacheType.STANDARD.value: |
||||
L95_cache = s.cacheStandard.baseLatency |
||||
else: |
||||
L95_cache = s.cacheLarge.baseLatency |
||||
|
||||
# 4) DB read P95 |
||||
read_cap = s.dbReadReplica.readCapacity |
||||
base_read_lat = s.dbReadReplica.baseReadLatency |
||||
pen_per10 = s.dbReadReplica.penaltyPer10RPS |
||||
|
||||
num_reps = d.numDbReplicas |
||||
if num_reps == 0: |
||||
if misses_rps <= read_cap: |
||||
L95_db_read = base_read_lat |
||||
else: |
||||
excess = misses_rps - read_cap |
||||
L95_db_read = base_read_lat + pen_per10 * (excess / 10.0) |
||||
else: |
||||
load_per_rep = misses_rps / num_reps |
||||
if load_per_rep <= read_cap: |
||||
L95_db_read = base_read_lat |
||||
else: |
||||
excess = load_per_rep - read_cap |
||||
L95_db_read = base_read_lat + pen_per10 * (excess / 10.0) |
||||
|
||||
# 5) End-to-end P95 read = miss_path |
||||
L_lb = s.loadBalancer.baseLatency |
||||
miss_path = L_lb + L95_ws + L95_db_read |
||||
L95_total_read = miss_path |
||||
|
||||
return { |
||||
"L95_ws": L95_ws, |
||||
"L95_cache": L95_cache, |
||||
"L95_db_read": L95_db_read, |
||||
"L95_total_read": L95_total_read, |
||||
} |
||||
def compute_availability(self) -> float: |
||||
""" |
||||
If failureEvents=[], just return 100.0. |
||||
Otherwise: |
||||
- For each failure (e.g. DB master crash at t_crash), |
||||
if numDbReplicas==0 → downtime = sim_duration - t_crash |
||||
else if design has auto_failover: |
||||
downtime = failover_delay |
||||
else: |
||||
downtime = sim_duration - t_crash |
||||
- availability = (sim_duration - total_downtime) / sim_duration * 100 |
||||
""" |
||||
sim_duration = self.level.simulatedDurationSeconds # you’d need this field |
||||
total_downtime = 0 |
||||
for event in self.level.failureEvents: |
||||
t_crash = event["time"] |
||||
if event["type"] == "DB_MASTER_CRASH": |
||||
if self.design.numDbReplicas == 0: |
||||
total_downtime += (sim_duration - t_crash) |
||||
else: |
||||
# assume a fixed promotion delay (e.g. 5s) |
||||
delay = self.design.promotionDelaySeconds |
||||
total_downtime += delay |
||||
# (handle other event types if needed) |
||||
return (sim_duration - total_downtime) / sim_duration * 100 |
||||
|
||||
def validate(self) -> dict: |
||||
""" |
||||
1) Cost check |
||||
2) Throughput checks (cache, DB, WS) |
||||
3) Latency check |
||||
4) Availability check (if there are failureEvents) |
||||
Return { "pass": True, "metrics": {...} } or { "pass": False, "reason": "..." }. |
||||
""" |
||||
total_cost = self.compute_cost() |
||||
if total_cost > self.level.maxMonthlyCost: |
||||
return { "pass": False, "reason": f"Budget ${total_cost} > ${self.level.maxMonthlyCost}" } |
||||
|
||||
hits_rps, misses_rps = self.compute_rps() |
||||
|
||||
# Cache capacity |
||||
cache_cap = ( |
||||
self.specs.cacheStandard.capacity |
||||
if self.design.cacheType == CacheType.STANDARD.value |
||||
else self.specs.cacheLarge.capacity |
||||
) |
||||
if hits_rps > cache_cap: |
||||
return { "pass": False, "reason": f"Cache overloaded ({hits_rps:.1f} RPS > {cache_cap})" } |
||||
|
||||
# DB capacity |
||||
db_cap = self.specs.dbReadReplica.readCapacity |
||||
if self.design.numDbReplicas == 0: |
||||
if misses_rps > db_cap: |
||||
return { "pass": False, "reason": f"DB overloaded ({misses_rps:.1f} RPS > {db_cap})" } |
||||
else: |
||||
per_rep = misses_rps / self.design.numDbReplicas |
||||
if per_rep > db_cap: |
||||
return { |
||||
"pass": False, |
||||
"reason": f"DB replicas overloaded ({per_rep:.1f} RPS/replica > {db_cap})" |
||||
} |
||||
|
||||
# WS capacity |
||||
total_ws_cap = ( |
||||
self.design.numWebServerSmall * self.specs.webServerSmall.capacity |
||||
+ self.design.numWebServerMedium * self.specs.webServerMedium.capacity |
||||
) |
||||
if misses_rps > total_ws_cap: |
||||
return { |
||||
"pass": False, |
||||
"reason": f"Web servers overloaded ({misses_rps:.1f} RPS > {total_ws_cap})" |
||||
} |
||||
|
||||
# Latency |
||||
lat = self.compute_latencies() |
||||
if lat["L95_total_read"] > self.level.maxP95Latency: |
||||
return { |
||||
"pass": False, |
||||
"reason": f"P95 too high ({lat['L95_total_read']:.1f} ms > {self.level.maxP95Latency} ms)" |
||||
} |
||||
|
||||
# Availability (only if failureEvents is nonempty) |
||||
availability = 100.0 |
||||
if self.level.failureEvents: |
||||
availability = self.compute_availability() |
||||
if availability < self.level.requiredAvailability: |
||||
return { |
||||
"pass": False, |
||||
"reason": f"Availability too low ({availability:.1f}% < " |
||||
f"{self.level.requiredAvailability}%)" |
||||
} |
||||
|
||||
# If we reach here, all checks passed |
||||
return { |
||||
"pass": True, |
||||
"metrics": { |
||||
"cost": total_cost, |
||||
"p95": lat["L95_total_read"], |
||||
"achievedRPS": self.level.targetRPS, |
||||
"availability": ( |
||||
100.0 if not self.level.failureEvents else availability |
||||
) |
||||
} |
||||
} |
||||
@ -0,0 +1,286 @@
@@ -0,0 +1,286 @@
|
||||
package simulation |
||||
|
||||
import ( |
||||
"math" |
||||
) |
||||
|
||||
type LoadBalancerSpec struct { |
||||
Capacity float64 |
||||
BaseLatency int |
||||
Cost int |
||||
} |
||||
|
||||
type WebServerSmall struct { |
||||
Capacity int |
||||
BaseLatency int |
||||
PenaltyPerRPS float64 |
||||
Cost int |
||||
} |
||||
|
||||
type WebServerMedium struct { |
||||
Capacity int |
||||
BaseLatency int |
||||
PenaltyPerRPS float64 |
||||
Cost int |
||||
} |
||||
|
||||
type CacheStandard struct { |
||||
Capacity int |
||||
BaseLatency int |
||||
PenaltyPer10RPS float64 |
||||
HitRates map[string]float64 |
||||
Cost int |
||||
} |
||||
|
||||
type CacheLarge struct { |
||||
Capacity int |
||||
BaseLatency int |
||||
PenaltyPer10RPS float64 |
||||
HitRates map[string]float64 |
||||
Cost int |
||||
} |
||||
|
||||
type DbReadReplica struct { |
||||
ReadCapacity int // RPS
|
||||
BaseReadLatency int // ms
|
||||
PenaltyPer10RPS float64 |
||||
Cost int |
||||
} |
||||
|
||||
type ComponentSpec struct { |
||||
LoadBalancer LoadBalancerSpec |
||||
WebServerSmall WebServerSmall |
||||
WebServerMedium WebServerMedium |
||||
CacheStandard CacheStandard |
||||
CacheLarge CacheLarge |
||||
DbReadReplica DbReadReplica |
||||
} |
||||
|
||||
type Design struct { |
||||
NumWebServerSmall int |
||||
NumWebServerMedium int |
||||
CacheType CacheType // Enum (see below)
|
||||
CacheTTL string |
||||
NumDbReplicas int |
||||
PromotionDelaySeconds int |
||||
} |
||||
|
||||
type FailureEvent struct { |
||||
Type string |
||||
Time int |
||||
} |
||||
type Level struct { |
||||
ID int |
||||
Description string |
||||
TargetRPS int |
||||
MaxP95Latency int |
||||
MaxMonthlyCost int |
||||
RequiredAvailability int |
||||
FailureEvents []FailureEvent |
||||
ComponentSpec ComponentSpec |
||||
SimulatedDurationSeconds int |
||||
} |
||||
|
||||
type Metrics struct { |
||||
Cost int `json:"cost"` |
||||
P95 float64 `json:"p95"` |
||||
AchievedRPS int `json:"achievedRPS"` |
||||
Availability float64 `json:"availability"` |
||||
} |
||||
|
||||
type EvaluationResult struct { |
||||
Pass bool `json:"pass"` |
||||
Metrics Metrics `json:"metrics"` |
||||
} |
||||
|
||||
type Latencies struct { |
||||
L95WS float64 |
||||
L95Cache float64 |
||||
L95DBRead float64 |
||||
L95TotalRead float64 |
||||
} |
||||
|
||||
type ValidationMetrics struct { |
||||
Cost int |
||||
P95 float64 |
||||
AchievedRPS int |
||||
Availability float64 |
||||
} |
||||
type ValidationResults struct { |
||||
Pass bool |
||||
Reason *string |
||||
Metrics *ValidationMetrics |
||||
} |
||||
|
||||
type CacheType string |
||||
|
||||
const ( |
||||
CacheStandardType CacheType = "cacheStandard" |
||||
CacheLargeType CacheType = "cacheLarge" |
||||
) |
||||
|
||||
type LevelSimulator struct { |
||||
Level Level |
||||
Design Design |
||||
Specs ComponentSpec |
||||
} |
||||
|
||||
func (ls *LevelSimulator) ComputeCost() int { |
||||
s := ls.Specs |
||||
d := ls.Design |
||||
|
||||
costLb := s.LoadBalancer.Cost |
||||
costWSSmall := d.NumWebServerSmall * s.WebServerSmall.Cost |
||||
costWSMedium := d.NumWebServerMedium * s.WebServerSmall.Cost |
||||
var costCache int |
||||
|
||||
if d.CacheType == CacheStandardType { |
||||
costCache = s.CacheStandard.Cost |
||||
} else { |
||||
costCache = s.CacheLarge.Cost |
||||
} |
||||
|
||||
costDB := s.DbReadReplica.Cost * (1 + d.NumDbReplicas) |
||||
|
||||
return costLb + costWSSmall + costWSMedium + costCache + costDB |
||||
} |
||||
|
||||
func (ls *LevelSimulator) ComputeRPS() (float64, float64) { |
||||
s := ls.Specs |
||||
d := ls.Design |
||||
l := ls.Level |
||||
|
||||
totalRPS := l.TargetRPS |
||||
|
||||
var hitRate float64 |
||||
if d.CacheType == CacheStandardType { |
||||
hitRate = s.CacheStandard.HitRates[d.CacheTTL] |
||||
} else if d.CacheType == CacheLargeType { |
||||
hitRate = s.CacheLarge.HitRates[d.CacheTTL] |
||||
} else { |
||||
hitRate = 0.0 |
||||
} |
||||
|
||||
hitRPS := float64(totalRPS) * hitRate |
||||
missesRPS := float64(totalRPS) * (1 - hitRate) |
||||
return hitRPS, missesRPS |
||||
} |
||||
|
||||
func (ls *LevelSimulator) ComputeLatencies() Latencies { |
||||
s := ls.Specs |
||||
d := ls.Design |
||||
|
||||
_, missesRPS := ls.ComputeRPS() |
||||
|
||||
capSmall := s.WebServerSmall.Capacity |
||||
capMedium := s.WebServerMedium.Capacity |
||||
|
||||
weightedCount := d.NumWebServerSmall + (2 * d.NumWebServerSmall) |
||||
|
||||
var L95WS float64 |
||||
if weightedCount == 0 { |
||||
L95WS = math.Inf(1) |
||||
} else { |
||||
loadPerWeighted := missesRPS / float64(weightedCount) |
||||
|
||||
L95WSSmall := 0.0 |
||||
if d.NumWebServerSmall > 0 { |
||||
if loadPerWeighted <= float64(capSmall) { |
||||
L95WSSmall = float64(s.WebServerSmall.BaseLatency) |
||||
} else { |
||||
L95WSSmall = (float64(s.WebServerSmall.BaseLatency) + s.WebServerSmall.PenaltyPerRPS*(loadPerWeighted-float64(capSmall))) |
||||
} |
||||
} |
||||
|
||||
L95WSMedium := 0.0 |
||||
if d.NumWebServerMedium > 0 { |
||||
if loadPerWeighted <= float64(capMedium) { |
||||
L95WSMedium = float64(s.WebServerSmall.BaseLatency) |
||||
} else { |
||||
L95WSMedium = (float64(s.WebServerSmall.BaseLatency) + s.WebServerMedium.PenaltyPerRPS*(loadPerWeighted-float64(capMedium))) |
||||
} |
||||
} |
||||
|
||||
L95WS = math.Max(L95WSSmall, L95WSMedium) |
||||
} |
||||
|
||||
var L95Cache float64 |
||||
if d.CacheType == CacheStandardType { |
||||
L95Cache = float64(s.CacheStandard.BaseLatency) |
||||
} else if d.CacheType == CacheLargeType { |
||||
L95Cache = float64(s.CacheLarge.BaseLatency) |
||||
} else { |
||||
L95Cache = 0.0 |
||||
} |
||||
|
||||
readCap := s.DbReadReplica.ReadCapacity |
||||
baseReadLat := s.DbReadReplica.BaseReadLatency |
||||
penPer10 := s.DbReadReplica.PenaltyPer10RPS |
||||
|
||||
numReps := d.NumDbReplicas |
||||
var L95DbRead float64 |
||||
|
||||
if numReps == 0 { |
||||
if missesRPS <= float64(readCap) { |
||||
L95DbRead = float64(baseReadLat) |
||||
} else { |
||||
excess := missesRPS - float64(readCap) |
||||
L95DbRead = float64(baseReadLat) + penPer10*(excess/10.0) |
||||
} |
||||
} else { |
||||
loadPerRep := missesRPS / float64(numReps) |
||||
if loadPerRep <= float64(readCap) { |
||||
L95DbRead = float64(baseReadLat) |
||||
} else { |
||||
loadPerRep := missesRPS / float64(numReps) |
||||
if loadPerRep <= float64(readCap) { |
||||
L95DbRead = float64(baseReadLat) |
||||
} else { |
||||
excess := loadPerRep - loadPerRep - float64(readCap) |
||||
L95DbRead = float64(baseReadLat) + penPer10*(excess/10.0) |
||||
} |
||||
} |
||||
} |
||||
|
||||
LLb := s.LoadBalancer.BaseLatency |
||||
missPath := LLb + int(L95WS) + int(L95DbRead) |
||||
L95TotalRead := missPath |
||||
|
||||
return Latencies{ |
||||
L95WS: L95WS, |
||||
L95Cache: L95Cache, |
||||
L95DBRead: L95DbRead, |
||||
L95TotalRead: float64(L95TotalRead), |
||||
} |
||||
} |
||||
|
||||
func (ls *LevelSimulator) ComputeAvailability() float64 { |
||||
simDuration := ls.Level.SimulatedDurationSeconds |
||||
totalDownTime := 0 |
||||
for _, event := range ls.Level.FailureEvents { |
||||
tCrash := event.Time |
||||
if event.Type == "DB_MASTER_CRASH" { |
||||
if ls.Design.NumDbReplicas == 0 { |
||||
totalDownTime += (simDuration - tCrash) |
||||
} else { |
||||
delay := ls.Design.PromotionDelaySeconds |
||||
totalDownTime += delay |
||||
} |
||||
} |
||||
} |
||||
|
||||
return (float64(simDuration) - float64(totalDownTime)) / float64(simDuration) * 100 |
||||
} |
||||
|
||||
func (ls *LevelSimulator) Validate() ValidationResults { |
||||
totalCost := ls.ComputeCost() |
||||
if totalCost > ls.Level.MaxMonthlyCost { |
||||
return ValidationResults{ |
||||
Pass: false, |
||||
Metrics: Metrics{ |
||||
Cost: totalCost, |
||||
P95: la, |
||||
}, |
||||
} |
||||
} |
||||
} |
||||
Loading…
Reference in new issue