package checker import ( "context" "crypto/tls" "fmt" "net" "net/http" "regexp" "strconv" "time" ) // Status represents the lifecycle state of a single test. type Status string // Result statuses emitted on the channel. const ( StatusRunning Status = "running" StatusPassed Status = "passed" StatusFailed Status = "failed" StatusSkipped Status = "skipped" // StatusWarn is a "soft pass" — the test technically succeeded but // the user should know about a degradation (e.g. voice quality at the // upper end of acceptable, or all Discord voice domains resolve but // the proxy filters their TCP). Frontend renders it like StatusPassed // but keeps the Hint visible. StatusWarn Status = "warn" ) // Result is one event in the diagnostic stream. Multiple Results may be // emitted per test (one per attempt: running + passed/failed; on retry, // running again then passed/failed). type Result struct { ID string `json:"id"` Status Status `json:"status"` Metric string `json:"metric,omitempty"` Error string `json:"error,omitempty"` Hint string `json:"hint,omitempty"` RawHex string `json:"raw_hex,omitempty"` Duration time.Duration `json:"duration_ms"` Attempt int `json:"attempt"` } // Config drives Run. Zero-value fields receive defaults via applyDefaults. type Config struct { ProxyHost string ProxyPort int UseAuth bool ProxyLogin string ProxyPassword string PerTestTimeout time.Duration MaxRetries int RetryBackoff time.Duration DiscordGateway string DiscordAPI string StunServer string // Voice-quality burst tuning (see runVoiceQuality). Defaults: 30 // packets, 20ms between sends. VoiceBurstCount int VoiceBurstInterval time.Duration } // applyDefaults returns a copy of cfg with zero-valued knobs filled in. func applyDefaults(cfg Config) Config { if cfg.PerTestTimeout <= 0 { cfg.PerTestTimeout = 5 * time.Second } if cfg.MaxRetries < 0 { cfg.MaxRetries = 0 } if cfg.MaxRetries == 0 { // Distinguish "explicit 0" from "unset" — spec says default is 1. // applyDefaults runs on a copy of the caller's Config; we treat // a literal zero as "use default" so a fresh `Config{}` works. cfg.MaxRetries = 1 } if cfg.RetryBackoff < 0 { cfg.RetryBackoff = 500 * time.Millisecond } if cfg.RetryBackoff == 0 { cfg.RetryBackoff = 500 * time.Millisecond } if cfg.DiscordGateway == "" { cfg.DiscordGateway = "gateway.discord.gg:443" } if cfg.DiscordAPI == "" { cfg.DiscordAPI = "https://discord.com/api/v9/gateway" } if cfg.StunServer == "" { cfg.StunServer = "stun.l.google.com:19302" } if cfg.VoiceBurstCount <= 0 { cfg.VoiceBurstCount = 30 } if cfg.VoiceBurstInterval <= 0 { cfg.VoiceBurstInterval = 20 * time.Millisecond } return cfg } // Run executes the 7-step diagnostic and streams Results on the returned // channel. The channel is closed when the run finishes (or is cancelled). // // Cancel ctx to abort: the in-flight test emits a Failed Result with // Error="cancelled", and remaining tests each emit a single Skipped Result. func Run(ctx context.Context, cfg Config) <-chan Result { cfg = applyDefaults(cfg) ch := make(chan Result, 16) go func() { defer close(ch) e := &executor{ctx: ctx, cfg: cfg, ch: ch} defer e.cleanup() e.runTCP() e.runGreet() if cfg.UseAuth { e.runAuth() } e.runConnect() e.runUDP() e.runVoiceQuality() e.runAPI() }() return ch } // executor carries shared state across the 7 test methods. type executor struct { ctx context.Context cfg Config ch chan<- Result // tcpConn is opened in runTCP and reused by greet/auth/connect. tcpConn net.Conn // udpConn2 is the SECOND TCP control channel opened in runUDP. // Must stay alive until stun finishes — the SOCKS5 spec requires // the control TCP connection to remain up for the relay to be // valid. udpConn2 net.Conn // udpRelay is the UDP relay endpoint announced by the proxy in // the UDP ASSOCIATE reply. udpRelay *net.UDPAddr // udpClient is our local UDP socket used to talk to the relay. udpClient net.PacketConn // Step gating: each xOK is set true on success (or "soft pass" // warn for voice-quality). tcpOK, greetOK, authOK, connectOK, udpOK, voiceQualityOK bool // Cancellation latch. Once any test emits a "cancelled" failure, // remaining tests emit a single Skipped result with the same reason. cancelled bool } // cleanup closes any state opened during the run. func (e *executor) cleanup() { if e.tcpConn != nil { _ = e.tcpConn.Close() } if e.udpConn2 != nil { _ = e.udpConn2.Close() } if e.udpClient != nil { _ = e.udpClient.Close() } } // emit sends a Result on the channel, respecting ctx so a stalled consumer // doesn't block us forever. func (e *executor) emit(r Result) { select { case e.ch <- r: case <-e.ctx.Done(): // Best-effort: try once more so we don't drop user-visible // information just because cancel raced the send. select { case e.ch <- r: default: } } } // emitSkipped pushes a single skipped Result with a constant reason. func (e *executor) emitSkipped(id, reason string) { e.emit(Result{ID: id, Status: StatusSkipped, Error: reason}) } // emitCancelled pushes a single failed Result with Error="cancelled". func (e *executor) emitCancelled(id string, attempt int, dur time.Duration) { e.cancelled = true e.emit(Result{ ID: id, Status: StatusFailed, Error: "cancelled", Hint: hintFor(id, context.Canceled), Attempt: attempt, Duration: dur, }) } // shouldSkip checks high-level guard conditions and emits the appropriate // pre-test Result if we shouldn't run. Returns true if the caller should // abort the test. func (e *executor) shouldSkip(id string, depOK bool) bool { if e.cancelled { e.emitSkipped(id, "cancelled") return true } if !depOK { e.emitSkipped(id, skipReason) return true } if err := e.ctx.Err(); err != nil { e.emitCancelled(id, 1, 0) return true } return false } const skipReason = "depends on previous failed step" // rawHexRE pulls "...(raw=DEADBEEF)" out of a wrapped error string. var rawHexRE = regexp.MustCompile(`\(raw=([0-9a-fA-F]+)\)`) // extractRawHex pulls the hex payload out of our `(raw=XX...)` error // wrapping convention. Returns "" if absent. func extractRawHex(s string) string { m := rawHexRE.FindStringSubmatch(s) if len(m) == 2 { return m[1] } return "" } // runAttempt is the inner loop shared by all tests. It handles emitting // running/passed/failed results, retry classification and backoff. // // run does the actual work for one attempt and returns metric + err. func (e *executor) runAttempt(id string, run func(ctx context.Context) (string, error)) (ok bool) { maxAttempts := 1 + e.cfg.MaxRetries for attempt := 1; attempt <= maxAttempts; attempt++ { if err := e.ctx.Err(); err != nil { e.emitCancelled(id, attempt, 0) return false } // Emit running for this attempt. e.emit(Result{ID: id, Status: StatusRunning, Attempt: attempt}) attemptCtx, cancel := context.WithTimeout(e.ctx, e.cfg.PerTestTimeout) start := time.Now() metric, err := run(attemptCtx) dur := time.Since(start) cancel() if err == nil { e.emit(Result{ ID: id, Status: StatusPassed, Metric: metric, Attempt: attempt, Duration: dur, }) return true } // Parent-ctx cancelled? Emit cancelled and stop (no retry // into a cancelled context). We check the PARENT ctx, not // attemptCtx (which always expires after PerTestTimeout). if e.ctx.Err() != nil { e.emitCancelled(id, attempt, dur) return false } // Per-attempt deadline expired (PerTestTimeout fired) — // treat as a transient timeout. We need to override // classifyError here because err's chain contains // context.DeadlineExceeded (joinCtxErr embeds attemptCtx.Err) // which classifyError treats as permanent. The semantic // distinction is "our per-test budget vs caller cancel" — // the former is exactly what retries are for. var class Classification if isContextErr(err) { // Parent ctx is fine (checked above), so this is a // per-attempt deadline = transient. class = ClassificationTransient } else { class = classifyError(err) } canRetry := class == ClassificationTransient && attempt < maxAttempts if canRetry { // Failed-but-will-retry: still emit Failed for the // observer (so they see the attempt happened), but // loop. Some consumers only show the LAST failure; // emitting every attempt is the more transparent // option. Spec says "emit running + passed/failed // per attempt". e.emit(Result{ ID: id, Status: StatusFailed, Error: err.Error(), Hint: hintFor(id, err), RawHex: extractRawHex(err.Error()), Attempt: attempt, Duration: dur, }) // Sleep with cancel awareness. select { case <-time.After(e.cfg.RetryBackoff): case <-e.ctx.Done(): // Caller cancelled during backoff — stop without retry. return false } continue } // Final failure (permanent or out of retries). e.emit(Result{ ID: id, Status: StatusFailed, Error: err.Error(), Hint: hintFor(id, err), RawHex: extractRawHex(err.Error()), Attempt: attempt, Duration: dur, }) return false } return false } // proxyAddr returns the SOCKS5 proxy host:port string. func (e *executor) proxyAddr() string { return net.JoinHostPort(e.cfg.ProxyHost, strconv.Itoa(e.cfg.ProxyPort)) } // runTCP — Test 1: dial the proxy. func (e *executor) runTCP() { if e.cancelled { e.emitSkipped("tcp", "cancelled") return } if err := e.ctx.Err(); err != nil { e.emitCancelled("tcp", 1, 0) return } ok := e.runAttempt("tcp", func(ctx context.Context) (string, error) { // Close any prior conn from a previous attempt. if e.tcpConn != nil { _ = e.tcpConn.Close() e.tcpConn = nil } var d net.Dialer start := time.Now() conn, err := d.DialContext(ctx, "tcp", e.proxyAddr()) if err != nil { return "", err } e.tcpConn = conn ms := time.Since(start).Milliseconds() return fmt.Sprintf("%dms", ms), nil }) e.tcpOK = ok } // runGreet — Test 2: SOCKS5 method negotiation. func (e *executor) runGreet() { if e.shouldSkip("greet", e.tcpOK) { return } ok := e.runAttempt("greet", func(ctx context.Context) (string, error) { // Each attempt needs a fresh conn — the previous attempt // may have written bytes that left the proxy mid-handshake. if err := e.redialTCPIfNeeded(ctx); err != nil { return "", err } method, _, err := socks5Greeting(ctx, e.tcpConn, e.cfg.UseAuth) if err != nil { // Force redial on next attempt. _ = e.tcpConn.Close() e.tcpConn = nil return "", err } switch method { case 0x00: return "no auth", nil case 0x02: return "auth required", nil default: return fmt.Sprintf("method=0x%02X", method), nil } }) e.greetOK = ok } // redialTCPIfNeeded drops and re-opens tcpConn. This is called at the // start of each greet/auth/connect attempt after the first to give every // attempt a fresh connection — the proxy may have advanced state on the // previous attempt that we can't roll back. // // On the FIRST attempt for greet, we expect tcpConn to already be open // (from runTCP). The simple rule: if tcpConn==nil, redial; otherwise // keep it. The retry path closes tcpConn before re-running this loop. func (e *executor) redialTCPIfNeeded(ctx context.Context) error { if e.tcpConn != nil { return nil } var d net.Dialer conn, err := d.DialContext(ctx, "tcp", e.proxyAddr()) if err != nil { return err } e.tcpConn = conn return nil } // runAuth — Test 3: user/pass sub-negotiation. Only emitted when UseAuth. func (e *executor) runAuth() { if e.shouldSkip("auth", e.greetOK) { return } ok := e.runAttempt("auth", func(ctx context.Context) (string, error) { // On retry: drop the conn and start fresh from greet+auth. // (We can't replay only auth — the proxy has already moved // past method negotiation.) // retry detection: if we have nil tcpConn here, we lost it // in a prior failed attempt and need to redial+regreet. if e.tcpConn == nil { var d net.Dialer conn, derr := d.DialContext(ctx, "tcp", e.proxyAddr()) if derr != nil { return "", derr } e.tcpConn = conn if _, _, gerr := socks5Greeting(ctx, e.tcpConn, true); gerr != nil { return "", gerr } } _, err := socks5Auth(ctx, e.tcpConn, e.cfg.ProxyLogin, e.cfg.ProxyPassword) if err != nil { // Force redial+regreet on next attempt. _ = e.tcpConn.Close() e.tcpConn = nil return "", err } return "ok", nil }) e.authOK = ok } // runConnect — Test 4: SOCKS5 CONNECT to Discord gateway. func (e *executor) runConnect() { dep := e.greetOK && (!e.cfg.UseAuth || e.authOK) if e.shouldSkip("connect", dep) { return } host, portStr, splitErr := net.SplitHostPort(e.cfg.DiscordGateway) if splitErr != nil { e.emit(Result{ ID: "connect", Status: StatusFailed, Error: fmt.Sprintf("bad DiscordGateway %q: %s", e.cfg.DiscordGateway, splitErr.Error()), Hint: hintFor("connect", splitErr), Attempt: 1, }) return } port64, perr := strconv.ParseUint(portStr, 10, 16) if perr != nil { e.emit(Result{ ID: "connect", Status: StatusFailed, Error: fmt.Sprintf("bad DiscordGateway port %q: %s", portStr, perr.Error()), Hint: hintFor("connect", perr), Attempt: 1, }) return } port := uint16(port64) ok := e.runAttempt("connect", func(ctx context.Context) (string, error) { // On retry: redial+greet+(auth) before re-CONNECT. if e.tcpConn == nil { var d net.Dialer conn, derr := d.DialContext(ctx, "tcp", e.proxyAddr()) if derr != nil { return "", derr } e.tcpConn = conn if _, _, gerr := socks5Greeting(ctx, e.tcpConn, e.cfg.UseAuth); gerr != nil { return "", gerr } if e.cfg.UseAuth { if _, aerr := socks5Auth(ctx, e.tcpConn, e.cfg.ProxyLogin, e.cfg.ProxyPassword); aerr != nil { return "", aerr } } } _, err := socks5Connect(ctx, e.tcpConn, host, port) if err != nil { _ = e.tcpConn.Close() e.tcpConn = nil return "", err } return "REP=00", nil }) e.connectOK = ok } // runUDP — Test 5: open second TCP control channel and UDP ASSOCIATE. func (e *executor) runUDP() { dep := e.greetOK && (!e.cfg.UseAuth || e.authOK) if e.shouldSkip("udp", dep) { return } ok := e.runAttempt("udp", func(ctx context.Context) (string, error) { // Always use a fresh control channel for UDP ASSOCIATE. if e.udpConn2 != nil { _ = e.udpConn2.Close() e.udpConn2 = nil } var d net.Dialer conn, err := d.DialContext(ctx, "tcp", e.proxyAddr()) if err != nil { return "", err } e.udpConn2 = conn if _, _, gerr := socks5Greeting(ctx, conn, e.cfg.UseAuth); gerr != nil { return "", gerr } if e.cfg.UseAuth { if _, aerr := socks5Auth(ctx, conn, e.cfg.ProxyLogin, e.cfg.ProxyPassword); aerr != nil { return "", aerr } } relay, _, uerr := socks5UDPAssociate(ctx, conn) if uerr != nil { return "", uerr } e.udpRelay = relay return fmt.Sprintf("relay %s:%d", relay.IP.String(), relay.Port), nil }) e.udpOK = ok } // runVoiceQuality — Test 6: 30-packet STUN burst through the SOCKS5 UDP // relay. Computes loss, jitter, p50/p95 RTT and gates on thresholds: // // - StatusPassed: loss ≤ 5%, jitter ≤ 30ms, p50 ≤ 250ms. // - StatusWarn: loss ≤ 15%, jitter ≤ 60ms, p50 ≤ 400ms — voice will // work but with audible glitches. // - StatusFailed: anything worse, OR no replies at all. // // On warn/pass, voiceQualityOK is true (downstream tests proceed). On // failure it stays false. func (e *executor) runVoiceQuality() { if e.shouldSkip("voice-quality", e.udpOK) { return } host, portStr, splitErr := net.SplitHostPort(e.cfg.StunServer) if splitErr != nil { e.emit(Result{ ID: "voice-quality", Status: StatusFailed, Error: fmt.Sprintf("bad StunServer %q: %s", e.cfg.StunServer, splitErr.Error()), Hint: hintFor("voice-quality", splitErr), Attempt: 1, }) return } port64, perr := strconv.ParseUint(portStr, 10, 16) if perr != nil { e.emit(Result{ ID: "voice-quality", Status: StatusFailed, Error: fmt.Sprintf("bad StunServer port %q: %s", portStr, perr.Error()), Hint: hintFor("voice-quality", perr), Attempt: 1, }) return } stunPort := uint16(port64) maxAttempts := 1 + e.cfg.MaxRetries for attempt := 1; attempt <= maxAttempts; attempt++ { if err := e.ctx.Err(); err != nil { e.emitCancelled("voice-quality", attempt, 0) return } e.emit(Result{ID: "voice-quality", Status: StatusRunning, Attempt: attempt}) // Per-test budget: cap burst+listen at PerTestTimeout. attemptCtx, cancel := context.WithTimeout(e.ctx, e.cfg.PerTestTimeout) start := time.Now() // Open a fresh local UDP socket per attempt. if e.udpClient != nil { _ = e.udpClient.Close() e.udpClient = nil } pc, perr := net.ListenPacket("udp", ":0") if perr != nil { cancel() dur := time.Since(start) class := classifyError(perr) canRetry := class == ClassificationTransient && attempt < maxAttempts e.emit(Result{ ID: "voice-quality", Status: StatusFailed, Error: fmt.Sprintf("voice-quality: listen udp: %s", perr.Error()), Hint: hintFor("voice-quality", perr), Attempt: attempt, Duration: dur, }) if canRetry { select { case <-time.After(e.cfg.RetryBackoff): continue case <-e.ctx.Done(): return } } return } e.udpClient = pc res, berr := runVoiceQualityBurst( attemptCtx, pc, e.udpRelay, host, stunPort, e.cfg.VoiceBurstCount, e.cfg.VoiceBurstInterval, ) dur := time.Since(start) cancel() if berr != nil { // Resolution / cancellation. Treat ctx-cancel separately. if e.ctx.Err() != nil { e.emitCancelled("voice-quality", attempt, dur) return } class := classifyError(berr) canRetry := class == ClassificationTransient && attempt < maxAttempts e.emit(Result{ ID: "voice-quality", Status: StatusFailed, Error: berr.Error(), Hint: hintFor("voice-quality", berr), Attempt: attempt, Duration: dur, }) if canRetry { select { case <-time.After(e.cfg.RetryBackoff): continue case <-e.ctx.Done(): return } } return } // 100% loss with no underlying error → the relay accepted UDP // (per test 5) but nothing came back. Treat as transient on // the first attempt; permanent on the second. if res.Received == 0 { canRetry := attempt < maxAttempts e.emit(Result{ ID: "voice-quality", Status: StatusFailed, Error: "no replies received", Hint: voiceQualityFailHint(100.0, 0, 0, 0), Metric: "loss=100%", Attempt: attempt, Duration: dur, }) if canRetry { select { case <-time.After(e.cfg.RetryBackoff): continue case <-e.ctx.Done(): return } } return } metric := fmt.Sprintf("loss=%.0f%% jitter=%.1fms p50=%.1fms", res.LossPct, res.JitterMS, res.P50RTTMS) switch { case res.LossPct <= 5.0 && res.JitterMS <= 30.0 && res.P50RTTMS <= 250.0: e.emit(Result{ ID: "voice-quality", Status: StatusPassed, Metric: metric, Attempt: attempt, Duration: dur, }) e.voiceQualityOK = true return case res.LossPct <= 15.0 && res.JitterMS <= 60.0 && res.P50RTTMS <= 400.0: e.emit(Result{ ID: "voice-quality", Status: StatusWarn, Metric: metric, Hint: voiceQualityWarnHint(res.LossPct, res.JitterMS, res.P50RTTMS), Attempt: attempt, Duration: dur, }) e.voiceQualityOK = true return default: canRetry := attempt < maxAttempts e.emit(Result{ ID: "voice-quality", Status: StatusFailed, Error: metric, Metric: metric, Hint: voiceQualityFailHint(res.LossPct, res.JitterMS, res.P50RTTMS, res.P95RTTMS), Attempt: attempt, Duration: dur, }) if canRetry { select { case <-time.After(e.cfg.RetryBackoff): continue case <-e.ctx.Done(): return } } return } } } // runAPI — Test 7: HTTP GET Discord API gateway URL through the proxy. func (e *executor) runAPI() { if e.shouldSkip("api", e.connectOK) { return } e.runAttempt("api", func(ctx context.Context) (string, error) { transport := &http.Transport{ DialContext: func(ctx context.Context, _network, addr string) (net.Conn, error) { return e.dialThroughProxy(ctx, addr) }, TLSClientConfig: &tls.Config{}, DisableKeepAlives: true, ResponseHeaderTimeout: e.cfg.PerTestTimeout, } client := &http.Client{ Transport: transport, Timeout: e.cfg.PerTestTimeout, } req, err := http.NewRequestWithContext(ctx, "GET", e.cfg.DiscordAPI, nil) if err != nil { return "", err } resp, err := client.Do(req) if err != nil { return "", err } defer resp.Body.Close() if resp.StatusCode == 200 || resp.StatusCode == 401 { return fmt.Sprintf("HTTP %d", resp.StatusCode), nil } return "", fmt.Errorf("api: HTTP %d", resp.StatusCode) }) } // dialThroughProxy is the http.Transport.DialContext used by runAPI. It // opens a TCP connection to the SOCKS5 proxy, performs greet+(auth)+CONNECT // to addr, then returns the established conn. func (e *executor) dialThroughProxy(ctx context.Context, addr string) (net.Conn, error) { host, portStr, err := net.SplitHostPort(addr) if err != nil { return nil, fmt.Errorf("api: split %q: %w", addr, err) } port64, err := strconv.ParseUint(portStr, 10, 16) if err != nil { return nil, fmt.Errorf("api: bad port %q: %w", portStr, err) } port := uint16(port64) var d net.Dialer conn, err := d.DialContext(ctx, "tcp", e.proxyAddr()) if err != nil { return nil, err } if _, _, gerr := socks5Greeting(ctx, conn, e.cfg.UseAuth); gerr != nil { _ = conn.Close() return nil, gerr } if e.cfg.UseAuth { if _, aerr := socks5Auth(ctx, conn, e.cfg.ProxyLogin, e.cfg.ProxyPassword); aerr != nil { _ = conn.Close() return nil, aerr } } if _, cerr := socks5Connect(ctx, conn, host, port); cerr != nil { _ = conn.Close() return nil, cerr } // Clear the deadline socks5* primitives applied — http.Transport // manages timing past this point. _ = conn.SetDeadline(time.Time{}) return conn, nil }