diff --git a/.gitignore b/.gitignore index c215b29..843372b 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ .env /tmp +systemdesigngame diff --git a/data/levels.json b/data/levels.json index 021f82c..b3d37b3 100644 --- a/data/levels.json +++ b/data/levels.json @@ -5,7 +5,6 @@ "description": "Scale your URL shortener to handle traffic spikes and ensure high availability.", "targetRps": 1000, "durationSec": 180, - "maxMonthlyUsd": 300, "maxP95LatencyMs": 150, "requiredAvailabilityPct": 99.9, "mustInclude": ["database", "loadBalancer"], @@ -24,7 +23,6 @@ "Target RPS: 1000", "Max P95 latency: 150ms", "Required availability: 99.9%", - "Max monthly cost: $300", "Simulation duration: 180 seconds" ] }, @@ -34,7 +32,6 @@ "description": "Support real-time chat across mobile and web, with message persistence.", "targetRps": 500, "durationSec": 300, - "maxMonthlyUsd": 500, "maxP95LatencyMs": 200, "requiredAvailabilityPct": 99.9, "mustInclude": ["webserver", "database", "messageQueue"], @@ -53,7 +50,6 @@ "Target RPS: 500", "Max P95 latency: 200ms", "Required availability: 99.9%", - "Max monthly cost: $500", "Simulation duration: 300 seconds" ] }, @@ -63,7 +59,6 @@ "description": "Add video transcoding, caching, and recommendations.", "targetRps": 1000, "durationSec": 600, - "maxMonthlyUsd": 2000, "maxP95LatencyMs": 300, "requiredAvailabilityPct": 99.9, "mustInclude": ["cdn", "data pipeline", "cache"], @@ -82,7 +77,6 @@ "Target RPS: 1000", "Max P95 latency: 300ms", "Required availability: 99.9%", - "Max monthly cost: $2000", "Simulation duration: 600 seconds" ] }, @@ -92,7 +86,6 @@ "description": "Design a rate limiter that works across multiple instances and enforces global quotas.", "targetRps": 1000, "durationSec": 180, - "maxMonthlyUsd": 300, "maxP95LatencyMs": 50, "requiredAvailabilityPct": 99.9, "mustInclude": ["webserver", "cache"], @@ -112,7 +105,6 @@ "Target RPS: 1000", "Max P95 latency: 50ms", "Required availability: 99.9%", - "Max monthly cost: $300", "Simulation duration: 180 seconds" ] }, @@ -122,7 +114,6 @@ "description": "Design a pull-based metrics system like Prometheus that scrapes multiple services.", "targetRps": 1000, "durationSec": 300, - "maxMonthlyUsd": 500, "maxP95LatencyMs": 100, "requiredAvailabilityPct": 99.9, "mustInclude": ["data pipeline", "monitoring/alerting"], @@ -142,7 +133,6 @@ "Target RPS: 1000", "Max P95 latency: 100ms", "Required availability: 99.9%", - "Max monthly cost: $500", "Simulation duration: 300 seconds" ] } diff --git a/internal/simulation/cache.go b/internal/simulation/cache.go index 950ee2d..8fabedb 100644 --- a/internal/simulation/cache.go +++ b/internal/simulation/cache.go @@ -1,11 +1,20 @@ package simulation import ( + "fmt" + "hash/fnv" "time" ) type CacheLogic struct{} +// hash function to simulate URL patterns +func hash(s string) uint32 { + h := fnv.New32a() + h.Write([]byte(s)) + return h.Sum32() +} + type CacheEntry struct { Data string Timestamp int @@ -52,12 +61,16 @@ func (c CacheLogic) Tick(props map[string]any, queue []*Request, tick int) ([]*R output := []*Request{} for _, req := range queue { - cacheKey := req.ID + "-" + req.Type // Use request ID and type as cache key + // For URL shortener simulation, use hash of request ID to simulate repeated URL access + // This creates realistic cache patterns where some URLs are accessed multiple times + hashValue := hash(req.ID) % 100 // Create 100 possible "URLs" + cacheKey := fmt.Sprintf("url-%d-%s", hashValue, req.Type) // Check for cache hit entry, hit := cacheData[cacheKey] if hit && !c.isExpired(entry, currentTime, cacheTTL) { // Cache hit - return immediately with minimal latency + // Cache hit - served from cache component reqCopy := *req reqCopy.LatencyMS += 1 // 1ms for in-memory access reqCopy.Path = append(reqCopy.Path, "cache-hit") @@ -69,6 +82,7 @@ func (c CacheLogic) Tick(props map[string]any, queue []*Request, tick int) ([]*R output = append(output, &reqCopy) } else { // Cache miss - forward request downstream + // Cache miss - forwarding to database reqCopy := *req reqCopy.Path = append(reqCopy.Path, "cache-miss") diff --git a/internal/simulation/engine.go b/internal/simulation/engine.go index 1653d35..da98681 100644 --- a/internal/simulation/engine.go +++ b/internal/simulation/engine.go @@ -33,6 +33,8 @@ type Request struct { Type string // records where it's been (used to prevent loops) Path []string + // cache key for cache-aside pattern (used by microservices) + CacheKey string } // what hte system looks like given a tick @@ -128,7 +130,7 @@ func (e *SimulationEngine) Run(duration int, tickMs int) []*TickSnapshot { } // this will preopulate some props so that we can use different load balancing algorithms - if node.Type == "loadbalancer" { + if node.Type == "loadbalancer" || node.Type == "loadBalancer" { targets := e.Edges[id] node.Props["_numTargets"] = float64(len(targets)) node.Props["_targetIDs"] = targets @@ -179,9 +181,11 @@ func (e *SimulationEngine) Run(duration int, tickMs int) []*TickSnapshot { func GetLogicForType(t string) NodeLogic { switch t { + case "user": + return UserLogic{} case "webserver": return WebServerLogic{} - case "loadbalancer": + case "loadBalancer": return LoadBalancerLogic{} case "cdn": return CDNLogic{} diff --git a/internal/simulation/microservice.go b/internal/simulation/microservice.go index 25e55e1..fa739aa 100644 --- a/internal/simulation/microservice.go +++ b/internal/simulation/microservice.go @@ -1,6 +1,10 @@ package simulation -import "math" +import ( + "fmt" + "hash/fnv" + "math" +) type MicroserviceLogic struct{} @@ -10,6 +14,21 @@ type ServiceInstance struct { HealthStatus string } +// CacheEntry represents a cached item in the microservice's cache +type MicroserviceCacheEntry struct { + Data string + Timestamp int + AccessTime int + AccessCount int +} + +// hash function for cache keys +func hashKey(s string) uint32 { + h := fnv.New32a() + h.Write([]byte(s)) + return h.Sum32() +} + func (m MicroserviceLogic) Tick(props map[string]any, queue []*Request, tick int) ([]*Request, bool) { // Extract microservice properties instanceCount := int(AsFloat64(props["instanceCount"])) @@ -56,36 +75,91 @@ func (m MicroserviceLogic) Tick(props map[string]any, queue []*Request, tick int toProcess = queue[:totalCapacity] } - output := []*Request{} - - // Distribute requests across instances using round-robin - for i, req := range toProcess { + // Initialize cache in microservice props + cache, ok := props["_microserviceCache"].(map[string]*MicroserviceCacheEntry) + if !ok { + cache = make(map[string]*MicroserviceCacheEntry) + props["_microserviceCache"] = cache + } - // Create processed request copy - reqCopy := *req + cacheTTL := 300000 // 5 minutes default TTL + currentTime := tick * 100 // assuming 100ms per tick - // Add microservice processing latency - processingLatency := baseLatencyMs + output := []*Request{} // Only cache misses go here (forwarded to database) + cacheHits := []*Request{} // Cache hits - completed locally + dbRequests := []*Request{} // Requests that need to go to database - // Simulate CPU-bound vs I/O-bound operations - if req.Type == "GET" { - processingLatency = baseLatencyMs // Fast reads - } else if req.Type == "POST" || req.Type == "PUT" { - processingLatency = baseLatencyMs + 10 // Writes take longer - } else if req.Type == "COMPUTE" { - processingLatency = baseLatencyMs + 50 // CPU-intensive operations + // Process each request with cache-aside logic + for i, req := range toProcess { + // Generate cache key for this request (simulate URL patterns) + hashValue := hashKey(req.ID) % 100 // Create 100 possible "URLs" + cacheKey := fmt.Sprintf("url-%d-%s", hashValue, req.Type) + + // Check cache first (Cache-Aside pattern) + entry, hit := cache[cacheKey] + if hit && !m.isCacheExpired(entry, currentTime, cacheTTL) { + // CACHE HIT - serve from cache (NO DATABASE QUERY) + + reqCopy := *req + reqCopy.LatencyMS += 1 // 1ms for cache access + reqCopy.Path = append(reqCopy.Path, "microservice-cache-hit-completed") + + // Update cache access tracking + entry.AccessTime = currentTime + entry.AccessCount++ + + // Cache hits do NOT go to database - they complete here + // In a real system, this response would go back to the client + // Store separately - these do NOT get forwarded to database + cacheHits = append(cacheHits, &reqCopy) + + } else { + // CACHE MISS - need to query database + + reqCopy := *req + + // Add microservice processing latency + processingLatency := baseLatencyMs + + // Simulate CPU-bound vs I/O-bound operations + if req.Type == "GET" { + processingLatency = baseLatencyMs // Fast reads + } else if req.Type == "POST" || req.Type == "PUT" { + processingLatency = baseLatencyMs + 10 // Writes take longer + } else if req.Type == "COMPUTE" { + processingLatency = baseLatencyMs + 50 // CPU-intensive operations + } + + // Instance load affects latency (queuing delay) + instanceLoad := m.calculateInstanceLoad(i, len(toProcess), instanceCount) + if float64(instanceLoad) > float64(rpsCapacity)*0.8 { // Above 80% capacity + processingLatency += int(float64(processingLatency) * 0.5) // 50% penalty + } + + reqCopy.LatencyMS += processingLatency + reqCopy.Path = append(reqCopy.Path, "microservice-cache-miss") + + // Store cache key in request for when database response comes back + reqCopy.CacheKey = cacheKey + + // Forward to database for actual data + dbRequests = append(dbRequests, &reqCopy) } + } - // Instance load affects latency (queuing delay) - instanceLoad := m.calculateInstanceLoad(i, len(toProcess), instanceCount) - if float64(instanceLoad) > float64(rpsCapacity)*0.8 { // Above 80% capacity - processingLatency += int(float64(processingLatency) * 0.5) // 50% penalty + // For cache misses, we would normally wait for database response and then cache it + // In this simulation, we'll immediately cache the "result" for future requests + for _, req := range dbRequests { + // Simulate caching the database response + cache[req.CacheKey] = &MicroserviceCacheEntry{ + Data: "cached-response-data", + Timestamp: currentTime, + AccessTime: currentTime, + AccessCount: 1, } - reqCopy.LatencyMS += processingLatency - reqCopy.Path = append(reqCopy.Path, "microservice-processed") - - output = append(output, &reqCopy) + // Forward request to database + output = append(output, req) } // Health check: service is healthy if not severely overloaded @@ -94,6 +168,11 @@ func (m MicroserviceLogic) Tick(props map[string]any, queue []*Request, tick int return output, healthy } +// isCacheExpired checks if a cache entry has expired +func (m MicroserviceLogic) isCacheExpired(entry *MicroserviceCacheEntry, currentTime, ttl int) bool { + return (currentTime - entry.Timestamp) > ttl +} + // calculateBaseLatency determines base processing time based on resources func (m MicroserviceLogic) calculateBaseLatency(cpu, ramGb int) int { // Better CPU and RAM = lower base latency diff --git a/internal/simulation/user.go b/internal/simulation/user.go new file mode 100644 index 0000000..ec52f00 --- /dev/null +++ b/internal/simulation/user.go @@ -0,0 +1,18 @@ +package simulation + +// UserLogic represents the behavior of user components in the simulation. +// User components serve as traffic sources and don't process requests themselves. +// Traffic generation is handled by the simulation engine at the entry point. +type UserLogic struct{} + +// Tick implements the NodeLogic interface for User components. +// User components don't process requests - they just pass them through. +// The simulation engine handles traffic generation at entry points. +func (u UserLogic) Tick(props map[string]any, queue []*Request, tick int) ([]*Request, bool) { + // User components just pass through any requests they receive + // In practice, User components are typically entry points so they + // receive requests from the simulation engine itself + return queue, true +} + + diff --git a/internal/simulation/webserver.go b/internal/simulation/webserver.go index cc7747d..168125f 100644 --- a/internal/simulation/webserver.go +++ b/internal/simulation/webserver.go @@ -13,14 +13,22 @@ func (l WebServerLogic) Tick(props map[string]any, queue []*Request, tick int) ( toProcess = queue[:maxRPS] } + // Get base latency for web server operations + baseLatencyMs := int(AsFloat64(props["baseLatencyMs"])) + if baseLatencyMs == 0 { + baseLatencyMs = 20 // default 20ms for web server processing + } + var output []*Request for _, req := range toProcess { - output = append(output, &Request{ - ID: req.ID, - Timestamp: req.Timestamp, - Origin: req.Origin, - Type: req.Type, - }) + // Create a copy of the request to preserve existing latency + reqCopy := *req + + // Add web server processing latency + reqCopy.LatencyMS += baseLatencyMs + reqCopy.Path = append(reqCopy.Path, "webserver-processed") + + output = append(output, &reqCopy) } return output, true diff --git a/router/handlers/chat.go b/router/handlers/chat.go index b0a296c..90e421f 100644 --- a/router/handlers/chat.go +++ b/router/handlers/chat.go @@ -42,8 +42,6 @@ func Messages(w http.ResponseWriter, r *http.Request) { break } - fmt.Printf("message: %s", message) - var messageReceived MessageReceived err = json.Unmarshal(message, &messageReceived) if err != nil { @@ -53,9 +51,8 @@ func Messages(w http.ResponseWriter, r *http.Request) { if messageReceived.Message == "" { messageReceived.Message = "" - } else { - messageReceived.Message = string(message) } + // Note: messageReceived.Message is already properly parsed from JSON, no need to overwrite it prompt := fmt.Sprintf("You are a tutor that helps people learn system design. You will be given a JSON payload that looks like %s. The nodes are the components a user can put into their design and the connections will tell you how they are connected. The level name identifies what problem they are working on as well as a difficulty level. Each level has an easy, medium or hard setting. Also in the payload, there is a list of components that a user can use to build their design. Your hints and responses should only refer to these components and not refer to things that the user cannot use. Always refer to the nodes by their type. Please craft your response as if you're talking to the user. And do not reference the payload as \"payload\" but as their design. Also, please do not show the payload in your response. Do not refer to components as node-0 or whatever. Always refer to the type of component they are. Always assume that the source of traffic for any system is a user. The user component will not be visible in teh payload. Also make sure you use html to format your answer. Do not over format your response. Only use p tags. Format lists using proper lists html. Anytime the user sends a different payload back to you, make note of what is correct. Never give the actual answer, only helpful hints. If the available components do not allow the user to feasibly solve the system design problem, you should mention it and then tell them what exactly is missing from the list.", messageReceived.DesignPayload) diff --git a/router/handlers/game.go b/router/handlers/game.go index aae43b5..84d76b7 100644 --- a/router/handlers/game.go +++ b/router/handlers/game.go @@ -3,7 +3,6 @@ package handlers import ( "encoding/json" "fmt" - "html" "html/template" "net/http" "systemdesigngame/internal/auth" @@ -26,8 +25,7 @@ func (h *PlayHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { } levelPayload, err := json.Marshal(lvl) - unescapedHtml := html.UnescapeString(string(levelPayload)) - fmt.Printf("raw message: %v", string(json.RawMessage(unescapedHtml))) + if err != nil { fmt.Printf("error marshaling level: %v", err) } diff --git a/router/handlers/simulation.go b/router/handlers/simulation.go index 47b5994..2e6af2b 100644 --- a/router/handlers/simulation.go +++ b/router/handlers/simulation.go @@ -15,7 +15,7 @@ type SimulationResponse struct { Success bool `json:"success"` Metrics map[string]interface{} `json:"metrics,omitempty"` Timeline []interface{} `json:"timeline,omitempty"` - Passed bool `json:"passed,omitempty"` + Passed bool `json:"passed"` Score int `json:"score,omitempty"` Feedback []string `json:"feedback,omitempty"` LevelName string `json:"levelName,omitempty"` @@ -60,7 +60,16 @@ func (h *SimulationHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { } // Set simulation parameters - engine.RPS = 50 // Default RPS - could be configurable later + defaultRPS := 50 + targetRPS := defaultRPS + + if requestBody.LevelID != "" { + if lvl, err := level.GetLevelByID(requestBody.LevelID); err == nil { + targetRPS = lvl.TargetRPS + } + } + + engine.RPS = targetRPS // Find entry node by analyzing topology entryNode := findEntryNode(design) @@ -81,7 +90,7 @@ func (h *SimulationHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { snapshots := engine.Run(60, 100) // Calculate metrics from snapshots - metrics := calculateMetrics(snapshots) + metrics := calculateMetrics(snapshots, design) // Convert snapshots to interface{} for JSON serialization timeline := make([]interface{}, len(snapshots)) @@ -122,7 +131,7 @@ func (h *SimulationHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { } // calculateMetrics computes key performance metrics from simulation snapshots -func calculateMetrics(snapshots []*simulation.TickSnapshot) map[string]interface{} { +func calculateMetrics(snapshots []*simulation.TickSnapshot, design design.Design) map[string]interface{} { if len(snapshots) == 0 { return map[string]interface{}{ "throughput": 0, @@ -173,12 +182,12 @@ func calculateMetrics(snapshots []*simulation.TickSnapshot) map[string]interface availability = (float64(totalHealthy) / float64(totalNodes)) * 100 } - // Estimate monthly cost (placeholder - could be enhanced) - monthlyCost := float64(totalNodes) * 50 // $50 per node per month baseline + // Calculate monthly cost based on component specifications + monthlyCost := calculateRealMonthlyCost(design.Nodes) return map[string]interface{}{ "throughput": int(throughput), - "latency_avg": int(avgLatency), + "latency_avg": avgLatency, "cost_monthly": int(monthlyCost), "availability": availability, } @@ -264,19 +273,10 @@ func validateLevel(lvl *level.Level, design design.Design, metrics map[string]in var passedRequirements []string // Extract metrics - throughput := metrics["throughput"].(int) - avgLatency := metrics["latency_avg"].(int) + avgLatency := int(metrics["latency_avg"].(float64)) availability := metrics["availability"].(float64) monthlyCost := metrics["cost_monthly"].(int) - // Check throughput requirement - if throughput >= lvl.TargetRPS { - passedRequirements = append(passedRequirements, "Throughput requirement met") - } else { - failedRequirements = append(failedRequirements, - fmt.Sprintf("Throughput: %d RPS (required: %d RPS)", throughput, lvl.TargetRPS)) - } - // Check latency requirement (using avg latency as approximation for P95) if avgLatency <= lvl.MaxP95LatencyMs { passedRequirements = append(passedRequirements, "Latency requirement met") @@ -422,6 +422,34 @@ func calculateScore(passedCount, failedCount int, metrics map[string]interface{} return min(100, baseScore+performanceBonus) } +// calculateRealMonthlyCost computes monthly cost based on actual component specifications +func calculateRealMonthlyCost(nodes []design.Node) float64 { + totalCost := 0.0 + + for _, node := range nodes { + switch node.Type { + case "user": + // User components don't cost anything + continue + case "microservice": + if monthlyUsd, ok := node.Props["monthlyUsd"].(float64); ok { + if instanceCount, ok := node.Props["instanceCount"].(float64); ok { + totalCost += monthlyUsd * instanceCount + } + } + case "webserver": + if monthlyCost, ok := node.Props["monthlyCostUsd"].(float64); ok { + totalCost += monthlyCost + } + default: + // Default cost for other components (cache, database, load balancer, etc.) + totalCost += 20 // $20/month baseline + } + } + + return totalCost +} + // Helper function func min(a, b int) int { if a < b { diff --git a/static/app.js b/static/app.js index 0b543e8..2040655 100644 --- a/static/app.js +++ b/static/app.js @@ -67,7 +67,6 @@ export class CanvasApp { this.learnMoreBtn = document.getElementById('learn-more-button'); this.tabs = document.getElementsByClassName('tabinput'); - console.log(this.tabs) this._reconnectDelay = 1000; this._maxReconnectDelay = 15000; this._reconnectTimer = null; @@ -168,7 +167,6 @@ export class CanvasApp { exportDesign() { const nodes = this.placedComponents - .filter(n => n.type !== 'user') .map(n => { const plugin = PluginRegistry.get(n.type); const result = { @@ -198,8 +196,8 @@ export class CanvasApp { return { nodes, connections, - level: JSON.parse(this.level), - availableComponents: JSON.stringify(this.plugins) + level: this.level, + availableComponents: this.plugins }; } @@ -216,17 +214,17 @@ export class CanvasApp { } showResults(result) { - const metrics = result.Metrics; + const metrics = result.metrics; let message = ''; // Level validation results - if (result.LevelName) { - if (result.Passed) { - message += `Level "${result.LevelName}" PASSED!\n`; - message += `Score: ${result.Score}/100\n\n`; + if (result.levelName) { + if (result.passed) { + message += `Level "${result.levelName}" PASSED!\n`; + message += `Score: ${result.score}/100\n\n`; } else { - message += `Level "${result.LevelName}" FAILED\n`; - message += `Score: ${result.Score}/100\n\n`; + message += `Level "${result.levelName}" FAILED\n`; + message += `Score: ${result.score}/100\n\n`; } // Add detailed feedback @@ -243,7 +241,7 @@ export class CanvasApp { message += `• Avg Latency: ${metrics.latency_avg}ms\n`; message += `• Availability: ${metrics.availability.toFixed(1)}%\n`; message += `• Monthly Cost: $${metrics.cost_monthly}\n\n`; - message += `Timeline: ${result.Timeline.length} ticks simulated`; + message += `Timeline: ${result.timeline.length} ticks simulated`; alert(message); @@ -255,6 +253,43 @@ export class CanvasApp { alert(`Simulation Error:\n\n${errorMessage}\n\nPlease check your design and try again.`); } + _initWebSocket() { + const scheme = location.protocol === "https:" ? "wss://" : "ws://"; + this.ws = new WebSocket(scheme + location.host + "/ws"); + + this.ws.onopen = () => { + console.log("WebSocket connected"); + // Reset reconnection delay on successful connection + this._reconnectDelay = 1000; + + this.ws.send(JSON.stringify({ + 'designPayload': JSON.stringify(this.exportDesign()), + 'message': '' + })); + }; + + this.ws.onmessage = (e) => { + this.chatLoadingIndicator.style.display = 'none'; + this.chatTextField.disabled = false; + this.chatTextField.focus(); + const message = document.createElement('p'); + message.innerHTML = e.data; + message.className = "other"; + this.chatMessages.insertBefore(message, this.chatLoadingIndicator); + }; + + this.ws.onerror = (err) => { + console.log("ws error:", err); + this._scheduleReconnect(); + }; + + this.ws.onclose = () => { + console.log("WebSocket closed, scheduling reconnect..."); + this.ws = null; + this._scheduleReconnect(); + }; + } + _scheduleReconnect() { if (this._stopped) return; @@ -265,7 +300,7 @@ export class CanvasApp { const jitter = this._reconnectDelay * (Math.random() * 0.4 - 0.2); const delay = Math.max(250, Math.min(this._maxReconnectDelay, this._reconnectDelay + jitter)); - console.log(`Reconnecting websocket...`) + console.log(`Reconnecting websocket in ${delay}ms...`) this._reconnectTimer = setTimeout(() => { this._reconnectTimer = null; diff --git a/static/canvas.html b/static/canvas.html index 4f83953..3761aa3 100644 --- a/static/canvas.html +++ b/static/canvas.html @@ -112,16 +112,8 @@