experimental/windivert: P2.1+P2.2 with WinDivert NETWORK+SOCKET layers

WIP snapshot before pivot to sing-box+TUN. Reached: - TCP redirect via streamdump pattern (swap+Outbound=0+reinject) - SOCKET layer for SYN-stage flow detection (avoids FLOW Establish-too-late race) - Lazy PID→name resolution (catches Update.exe inside procscan tick) - UDP forward via SOCKS5 UDP ASSOCIATE relay + manual reinject - Result: chat works, voice times out (Discord IP discovery / RTC handshake fails) Reason for pivot: WinDivert NAT-reinject pattern has subtle layer-3 semantics issues that DLL-injection / TUN-based proxies sidestep entirely. Going with embedded sing-box + wintun as the engine — proven path for Discord voice through SOCKS5. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-01 22:27:54 +03:00
parent 8ceb7775d7
commit 4074e68715
19 changed files with 2666 additions and 62 deletions
@@ -5,10 +5,83 @@ package divert
 import (
 	"errors"
 	"fmt"
+	"unsafe"

 	idivert "github.com/imgk/divert-go"
 )

+// idivertAddrLayout mirrors the imgk/divert-go private Address fields
+// so we can read the raw 64-byte union without going through their
+// (mis-aligned for FLOW events) accessor.
+type idivertAddrLayout struct {
+	Timestamp int64
+	Layer     uint8
+	Event     uint8
+	Flags     uint8
+	_         uint8
+	Length    uint32
+	Union     [64]byte
+}
+
+// parseFlowUnion decodes a WINDIVERT_DATA_FLOW from raw union bytes.
+// Layout per WinDivert v2 (MSVC default 8-byte alignment):
+//
+//	offset  0..7   EndpointId       UINT64
+//	offset  8..15  ParentEndpointId UINT64
+//	offset 16..19  ProcessId        UINT32
+//	offset 20..23  (padding to 4)   — not 8 because LocalAddr has 4-byte alignment
+//	offset 24..39  LocalAddr[4]     UINT32 — NO, wait.
+//
+// Actually WinDivert struct uses UINT32 (4-byte aligned), no padding
+// between ProcessId and LocalAddr. But we observed ProcessID and
+// Ports parse correctly via imgk's struct (which assumes offset 20
+// for LocalAddr). So that layout is right; the IPs zero-out must be
+// because *imgk's struct member [16]uint8 doesn't read what we think*.
+//
+// Mystery: imgk's Flow struct should give correct addresses. Yet we
+// see [0,0,0,0]. Re-inspect raw bytes.
+func parseFlowUnion(b []byte) *FlowEvent {
+	if len(b) < 64 {
+		return &FlowEvent{}
+	}
+	ev := &FlowEvent{
+		ProcessID: leU32(b[16:20]),
+		LocalRaw:  toAddr16(b[20:36]),
+		RemoteRaw: toAddr16(b[36:52]),
+		LocalPort: leU16(b[52:54]),
+		RemotePort: leU16(b[54:56]),
+		Protocol:  b[56],
+	}
+	// WinDivert v2.2.2 stores IPv4 as little-endian uint32 in the
+	// first 4 bytes of the 16-byte address slot (bytes 4..7 hold the
+	// 0xFFFF mapped-IPv6 prefix; bytes 8..15 are zero). To get the
+	// dot-notation IP A.B.C.D, reverse the byte order:
+	//   byte[0] = D (LSB), byte[1] = C, byte[2] = B, byte[3] = A (MSB).
+	ev.SrcAddr[0] = ev.LocalRaw[3]
+	ev.SrcAddr[1] = ev.LocalRaw[2]
+	ev.SrcAddr[2] = ev.LocalRaw[1]
+	ev.SrcAddr[3] = ev.LocalRaw[0]
+	ev.DstAddr[0] = ev.RemoteRaw[3]
+	ev.DstAddr[1] = ev.RemoteRaw[2]
+	ev.DstAddr[2] = ev.RemoteRaw[1]
+	ev.DstAddr[3] = ev.RemoteRaw[0]
+	ev.SrcPort = ev.LocalPort
+	ev.DstPort = ev.RemotePort
+	return ev
+}
+
+func leU32(b []byte) uint32 {
+	return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
+}
+func leU16(b []byte) uint16 {
+	return uint16(b[0]) | uint16(b[1])<<8
+}
+func toAddr16(b []byte) [16]byte {
+	var a [16]byte
+	copy(a[:], b)
+	return a
+}
+
 // Handle wraps a WinDivert handle.
 type Handle struct {
 	h *idivert.Handle
@@ -29,6 +102,187 @@ func Open(filter string) (*Handle, error) {
 	return &Handle{h: h}, nil
 }

+// OpenFlow opens a WinDivert handle at FLOW layer. FLOW handles
+// observe TCP/UDP flow establish + delete events with processId info
+// available — that's where we learn which 5-tuples belong to target
+// processes (processId field is invalid on the NETWORK layer filter
+// language). FLOW handles cannot Send packets — they're read-only by
+// design.
+//
+// Per WinDivert reference, FLOW handles MUST be opened with both
+// SNIFF (events only, no interception) and RECV_ONLY (no Send) flags,
+// otherwise WinDivertOpen rejects the request.
+func OpenFlow(filter string) (*Handle, error) {
+	h, err := idivert.Open(filter, idivert.LayerFlow, 0, idivert.FlagSniff|idivert.FlagRecvOnly)
+	if err != nil {
+		return nil, mapWinDivertErr(err)
+	}
+	return &Handle{h: h}, nil
+}
+
+// OpenSocket opens a WinDivert handle at SOCKET layer. SOCKET layer
+// fires events synchronously with socket syscalls (bind/connect/
+// listen/accept/close) — Connect specifically fires BEFORE the SYN
+// packet leaves the box, which gives us a window to populate our
+// redirect tables before the NETWORK-layer SYN arrives.
+//
+// Same flag rules as FLOW: must be SNIFF + RECV_ONLY.
+func OpenSocket(filter string) (*Handle, error) {
+	h, err := idivert.Open(filter, idivert.LayerSocket, 0, idivert.FlagSniff|idivert.FlagRecvOnly)
+	if err != nil {
+		return nil, mapWinDivertErr(err)
+	}
+	return &Handle{h: h}, nil
+}
+
+// SocketEvent represents a socket-layer event (Connect/Close/etc).
+type SocketEvent struct {
+	ProcessID uint32
+	Protocol  uint8 // 6=TCP, 17=UDP
+	SrcAddr   [4]byte
+	SrcPort   uint16
+	DstAddr   [4]byte
+	DstPort   uint16
+	Kind      SocketEventKind
+	LocalRaw  [16]byte // raw 16-byte slot for diagnostic
+	RemoteRaw [16]byte
+}
+
+// SocketEventKind enumerates the socket-layer events we care about.
+type SocketEventKind int
+
+const (
+	SocketKindUnknown SocketEventKind = iota
+	SocketKindBind
+	SocketKindConnect
+	SocketKindListen
+	SocketKindAccept
+	SocketKindClose
+)
+
+// RecvSocket blocks until a socket event arrives on a SOCKET-layer
+// handle. The packet payload is empty on SOCKET events; only the
+// address metadata matters.
+func (h *Handle) RecvSocket() (*SocketEvent, error) {
+	if h == nil || h.h == nil {
+		return nil, errors.New("handle closed")
+	}
+	buf := [4]byte{}
+	addr := new(idivert.Address)
+	_, err := h.h.Recv(buf[:], addr)
+	if err != nil {
+		return nil, mapWinDivertErr(err)
+	}
+	// SOCKET layer uses the same WINDIVERT_DATA_SOCKET layout as FLOW
+	// (verbatim per the WinDivert v2.2.2 header). We bypass the
+	// imgk/divert-go accessor for the same alignment-safety reason as
+	// RecvFlow and parse raw union bytes directly.
+	raw := (*idivertAddrLayout)(unsafe.Pointer(addr))
+	ev := parseSocketUnion(raw.Union[:])
+	switch addr.Event() {
+	case idivert.EventSocketBind:
+		ev.Kind = SocketKindBind
+	case idivert.EventSocketConnect:
+		ev.Kind = SocketKindConnect
+	case idivert.EventSocketListen:
+		ev.Kind = SocketKindListen
+	case idivert.EventSocketAccept:
+		ev.Kind = SocketKindAccept
+	case idivert.EventSocketClose:
+		ev.Kind = SocketKindClose
+	default:
+		return nil, fmt.Errorf("unexpected socket event %d", addr.Event())
+	}
+	return ev, nil
+}
+
+// parseSocketUnion mirrors parseFlowUnion: WINDIVERT_DATA_SOCKET is
+// byte-identical to WINDIVERT_DATA_FLOW per windivert.h v2.2.2.
+func parseSocketUnion(b []byte) *SocketEvent {
+	if len(b) < 64 {
+		return &SocketEvent{}
+	}
+	ev := &SocketEvent{
+		ProcessID: leU32(b[16:20]),
+		LocalRaw:  toAddr16(b[20:36]),
+		RemoteRaw: toAddr16(b[36:52]),
+		SrcPort:   leU16(b[52:54]),
+		DstPort:   leU16(b[54:56]),
+		Protocol:  b[56],
+	}
+	// Same byte-reverse trick as parseFlowUnion: WinDivert stores the
+	// IPv4 in the first 4 bytes of the slot as a host-byte-order
+	// uint32; reverse to get A.B.C.D in SrcAddr[0..3].
+	ev.SrcAddr[0] = ev.LocalRaw[3]
+	ev.SrcAddr[1] = ev.LocalRaw[2]
+	ev.SrcAddr[2] = ev.LocalRaw[1]
+	ev.SrcAddr[3] = ev.LocalRaw[0]
+	ev.DstAddr[0] = ev.RemoteRaw[3]
+	ev.DstAddr[1] = ev.RemoteRaw[2]
+	ev.DstAddr[2] = ev.RemoteRaw[1]
+	ev.DstAddr[3] = ev.RemoteRaw[0]
+	return ev
+}
+
+// FlowEvent represents a flow-establish/delete event from a FLOW
+// handle. SrcAddr/DstAddr are the IPv4 addresses (4 bytes, network
+// byte order: A.B.C.D = SrcAddr[0..3]). LocalRaw/RemoteRaw are the
+// raw 16-byte slots from WinDivert for diagnostic dumps.
+//
+// Established=true on EventFlowEstablished; false on EventFlowDeleted.
+type FlowEvent struct {
+	ProcessID   uint32
+	Protocol    uint8 // 6=TCP, 17=UDP
+	SrcAddr     [4]byte
+	SrcPort     uint16
+	DstAddr     [4]byte
+	DstPort     uint16
+	Established bool
+
+	// Diagnostic fields populated by parseFlowUnion. Used by
+	// debug-flow logging; production code should consume the
+	// SrcAddr/DstAddr/SrcPort/DstPort fields above.
+	LocalRaw   [16]byte
+	RemoteRaw  [16]byte
+	LocalPort  uint16
+	RemotePort uint16
+}
+
+// RecvFlow blocks until a flow event arrives on a FLOW-layer handle.
+// The packet payload is empty on FLOW events; only the address
+// metadata matters.
+//
+// Returns the event or an error from the wrapped handle (Shutdown
+// during close, etc).
+func (h *Handle) RecvFlow() (*FlowEvent, error) {
+	if h == nil || h.h == nil {
+		return nil, errors.New("handle closed")
+	}
+	// Per WinDivert docs flow event has zero-byte packet; we still
+	// need a non-nil buffer for the API.
+	buf := [4]byte{}
+	addr := new(idivert.Address)
+	_, err := h.h.Recv(buf[:], addr)
+	if err != nil {
+		return nil, mapWinDivertErr(err)
+	}
+	// imgk/divert-go's Flow accessor mis-aligns the union for FLOW
+	// events (it assumes 4-byte alignment after ProcessID, but MSVC
+	// pads to 8-byte boundary because the struct contains UINT64).
+	// We bypass the accessor and parse the raw union bytes ourselves.
+	raw := (*idivertAddrLayout)(unsafe.Pointer(addr))
+	ev := parseFlowUnion(raw.Union[:])
+	switch addr.Event() {
+	case idivert.EventFlowEstablished:
+		ev.Established = true
+	case idivert.EventFlowDeleted:
+		ev.Established = false
+	default:
+		return nil, fmt.Errorf("unexpected flow event %d", addr.Event())
+	}
+	return ev, nil
+}
+
 // Close closes the handle. Safe to call multiple times.
 func (h *Handle) Close() error {
 	if h == nil || h.h == nil {
@@ -72,6 +326,45 @@ func (h *Handle) Send(buf []byte, addr *idivert.Address) (int, error) {
 	return int(n), nil
 }

+// SendInjectInbound reinjects a fabricated IPv4 packet as inbound (i.e.
+// kernel delivers it via the receive path of whatever interface owns
+// the destination IP). Used by the UDPProxy to deliver SOCKS5 relay
+// responses back to a target process: we synthesize an IPv4+UDP packet
+// with src=remote_endpoint, dst=local_LAN_IP, then call this with
+// outbound=false and IP+UDP-checksum-valid flags set.
+//
+// Internally builds a fresh *idivert.Address with NETWORK layer + the
+// requested flags + zero interface index (WinDivert routes via default).
+//
+// Flags semantics (per WinDivert v2.2.2 windivert.h):
+//
+//	bit 1 (0x02) = Outbound — set if outbound, clear for inbound
+//	bit 5 (0x20) = IPChecksum  — packet has valid IPv4 header checksum
+//	bit 6 (0x40) = TCPChecksum — packet has valid TCP checksum
+//	bit 7 (0x80) = UDPChecksum — packet has valid UDP checksum
+func (h *Handle) SendInjectInbound(buf []byte, isUDP bool) (int, error) {
+	if h == nil || h.h == nil {
+		return 0, errors.New("handle closed")
+	}
+	addr := new(idivert.Address)
+	addr.SetLayer(idivert.LayerNetwork)
+	addr.SetEvent(idivert.EventNetworkPacket)
+	// Outbound bit (0x02) cleared (inbound). Sniffed (0x01) cleared.
+	// IPChecksum (0x20) set. UDP (0x80) or TCP (0x40) set per call.
+	var flags uint8 = 0x20
+	if isUDP {
+		flags |= 0x80
+	} else {
+		flags |= 0x40
+	}
+	addr.Flags = flags
+	n, err := h.h.Send(buf, addr)
+	if err != nil {
+		return 0, mapWinDivertErr(err)
+	}
+	return int(n), nil
+}
+
 // Sentinel errors mapped from raw Windows errors so the engine layer
 // can pattern-match without importing windows package.
 var (
@@ -24,22 +24,25 @@ type FilterParams struct {
 	// self-loops. If unparseable, "0.0.0.0" is substituted (caller
 	// should validate before calling).
 	UpstreamIP string
+
+	// LocalIP is the machine's LAN IP — listener binds here, so
+	// reinjected NAT'd packets (which still bear the original src)
+	// reach it. Must be excluded from the filter to prevent infinite
+	// recapture of NAT'd packets (we'd see them outbound again).
+	LocalIP string
 }

-// BuildFilter returns a WinDivert filter expression string suitable
-// for WinDivertOpen. The expression captures only outbound IPv4 TCP/UDP
-// from the listed PIDs, excluding our own process and the upstream
-// proxy's IP.
-func BuildFilter(p FilterParams) string {
+// BuildFlowFilter returns a filter expression for the FLOW layer handle.
+// processId is ONLY available at FLOW/SOCKET layers, not NETWORK — that's
+// why we run two handles in parallel: this FLOW handle observes which
+// 5-tuples belong to target PIDs, and the NETWORK handle (BuildNetworkFilter)
+// captures actual packets.
+//
+// Empty PID list → "false" (matches no flows).
+func BuildFlowFilter(p FilterParams) string {
 	if len(p.TargetPIDs) == 0 {
 		return "false"
 	}
-
-	upstream := p.UpstreamIP
-	if net.ParseIP(upstream).To4() == nil {
-		upstream = "0.0.0.0"
-	}
-
 	pidClauses := make([]string, len(p.TargetPIDs))
 	for i, pid := range p.TargetPIDs {
 		pidClauses[i] = fmt.Sprintf("processId == %d", pid)
@@ -47,15 +50,61 @@ func BuildFilter(p FilterParams) string {
 	pidClause := "(" + strings.Join(pidClauses, " or ") + ")"

 	parts := []string{
-		"outbound",
 		"(tcp or udp)",
 		"ip",
 		pidClause,
 		fmt.Sprintf("processId != %d", p.OwnPID),
-		fmt.Sprintf("ip.DstAddr != %s", upstream),
-		"not (ip.DstAddr >= 224.0.0.0 and ip.DstAddr <= 239.255.255.255)",
-		"not (ip.DstAddr >= 127.0.0.0 and ip.DstAddr <= 127.255.255.255)",
-		"not (ip.DstAddr >= 169.254.0.0 and ip.DstAddr <= 169.254.255.255)",
 	}
 	return strings.Join(parts, " and ")
 }
+
+// BuildNetworkFilter returns a filter expression for the NETWORK layer
+// handle. It captures all outbound IPv4 TCP/UDP except loopback,
+// multicast, link-local, and the upstream proxy. The engine then
+// narrows by consulting the flow tracker fed by the FLOW handle.
+//
+// We don't (can't) filter by processId here — see BuildFlowFilter.
+// Self-loop protection: ip.DstAddr != upstream blocks our own SOCKS5
+// uplink, and 127.0.0.0/8 exclusion blocks our loopback redirector.
+//
+// Range exclusions are spelled with explicit `<`/`>` rather than
+// `not (a and b)` because some WinDivert versions reject the latter
+// at filter compile time.
+func BuildNetworkFilter(p FilterParams) string {
+	upstream := p.UpstreamIP
+	if net.ParseIP(upstream).To4() == nil {
+		upstream = "0.0.0.0"
+	}
+	parts := []string{
+		"outbound",
+		"ip",
+		"(tcp or udp)",
+		fmt.Sprintf("ip.DstAddr != %s", upstream),
+		// Loopback 127.0.0.0/8
+		"(ip.DstAddr < 127.0.0.0 or ip.DstAddr > 127.255.255.255)",
+		// Multicast 224.0.0.0/4
+		"(ip.DstAddr < 224.0.0.0 or ip.DstAddr > 239.255.255.255)",
+		// Link-local 169.254.0.0/16
+		"(ip.DstAddr < 169.254.0.0 or ip.DstAddr > 169.254.255.255)",
+	}
+	// Exclude packets DESTINED to our own LAN IP — they're either
+	// intra-machine traffic we don't care about OR our own NAT'd
+	// reinjects coming back around. Without this we infinite-loop.
+	if p.LocalIP != "" && net.ParseIP(p.LocalIP).To4() != nil {
+		parts = append(parts, fmt.Sprintf("ip.DstAddr != %s", p.LocalIP))
+	}
+	return strings.Join(parts, " and ")
+}
+
+// BuildFilter is the legacy single-filter API. Kept for callers that
+// don't yet use the dual-handle architecture; equivalent to
+// BuildNetworkFilter (no processId — that clause is invalid at NETWORK
+// layer).
+//
+// Deprecated: use BuildFlowFilter + BuildNetworkFilter together.
+func BuildFilter(p FilterParams) string {
+	if len(p.TargetPIDs) == 0 {
+		return "false"
+	}
+	return BuildNetworkFilter(p)
+}
@@ -10,6 +10,8 @@ import (
 	"path/filepath"
 	"runtime"
 	"strings"
+	"syscall"
+	"unsafe"
 )

 // DriverPaths records where the WinDivert binaries landed after install.
@@ -56,9 +58,34 @@ func installDriverInto(dst string) (*DriverPaths, error) {
 	if err := writeIfDifferent(dllPath, winDivertDll, WinDivertDllSHA256); err != nil {
 		return nil, fmt.Errorf("install WinDivert.dll: %w", err)
 	}
+	// imgk/divert-go's LazyDLL("WinDivert.dll") relies on the standard
+	// Windows DLL search path. Our extracted binaries live in
+	// %PROGRAMDATA%\Drover\windivert\ which isn't on that path by
+	// default. SetDllDirectoryW prepends our directory so the lazy
+	// load resolves it. Must be called BEFORE the first divert.Open.
+	if err := setDllDirectory(dst); err != nil {
+		return nil, fmt.Errorf("SetDllDirectory %q: %w", dst, err)
+	}
 	return &DriverPaths{SysPath: sysPath, DllPath: dllPath}, nil
 }

+var (
+	kernel32              = syscall.NewLazyDLL("kernel32.dll")
+	procSetDllDirectoryW  = kernel32.NewProc("SetDllDirectoryW")
+)
+
+func setDllDirectory(path string) error {
+	p, err := syscall.UTF16PtrFromString(path)
+	if err != nil {
+		return err
+	}
+	r1, _, e1 := syscall.SyscallN(procSetDllDirectoryW.Addr(), uintptr(unsafe.Pointer(p)))
+	if r1 == 0 {
+		return e1
+	}
+	return nil
+}
+
 // writeIfDifferent compares the existing file's SHA256 to the expected
 // hash; if it matches, no-op. Otherwise overwrite atomically and verify
 // the resulting on-disk SHA matches expected.
@@ -81,6 +81,84 @@ func RewriteDst(b []byte, ip net.IP, port uint16) error {
 	return nil
 }

+// SwapAndSetDstPort applies the canonical streamdump-style NAT-redirect
+// rewrite: swap IPv4 src/dst, set TCP dst port to newDstPort. Keeps
+// the original TCP src port (so the listener sees a unique RemoteAddr
+// it can use to look up the flow). Recomputes both checksums.
+//
+// Use this on the FORWARD path (outbound from target process →
+// remote). After this rewrite, set addr.Outbound=0 and reinject —
+// the packet looks like remote → local on the inbound path, lands at
+// the listener.
+func SwapAndSetDstPort(b []byte, newDstPort uint16) error {
+	if _, err := ParseIPv4TCP(b); err != nil {
+		return err
+	}
+	ihl := int(b[0]&0x0f) * 4
+
+	// Swap src ↔ dst IPv4 (bytes 12..15 ↔ 16..19)
+	var src, dst [4]byte
+	copy(src[:], b[12:16])
+	copy(dst[:], b[16:20])
+	copy(b[12:16], dst[:])
+	copy(b[16:20], src[:])
+
+	// Set TCP dst port; src port unchanged.
+	binary.BigEndian.PutUint16(b[ihl+2:ihl+4], newDstPort)
+
+	// Recompute IP checksum
+	b[10], b[11] = 0, 0
+	cs := ipChecksum(b[:ihl])
+	b[10] = byte(cs >> 8)
+	b[11] = byte(cs & 0xff)
+
+	// Recompute TCP checksum
+	b[ihl+16], b[ihl+17] = 0, 0
+	cs = tcpChecksum(b[:ihl], b[ihl:])
+	b[ihl+16] = byte(cs >> 8)
+	b[ihl+17] = byte(cs & 0xff)
+	return nil
+}
+
+// SwapAndSetSrcPort applies the canonical streamdump-style return-path
+// rewrite: swap IPv4 src/dst, set TCP src port to newSrcPort (the
+// original target port the client expects to see, e.g. 443). Keeps
+// the original TCP dst port (which is the client's ephemeral port).
+//
+// Use this on the RETURN path (listener → client). After this rewrite,
+// set addr.Outbound=0 and reinject — the packet looks like remote →
+// local on the inbound path, matches the client's connect() pair, and
+// the client socket accepts the response as if from the real target.
+func SwapAndSetSrcPort(b []byte, newSrcPort uint16) error {
+	if _, err := ParseIPv4TCP(b); err != nil {
+		return err
+	}
+	ihl := int(b[0]&0x0f) * 4
+
+	// Swap src ↔ dst IPv4
+	var src, dst [4]byte
+	copy(src[:], b[12:16])
+	copy(dst[:], b[16:20])
+	copy(b[12:16], dst[:])
+	copy(b[16:20], src[:])
+
+	// Set TCP src port; dst port unchanged.
+	binary.BigEndian.PutUint16(b[ihl:ihl+2], newSrcPort)
+
+	// Recompute IP checksum
+	b[10], b[11] = 0, 0
+	cs := ipChecksum(b[:ihl])
+	b[10] = byte(cs >> 8)
+	b[11] = byte(cs & 0xff)
+
+	// Recompute TCP checksum
+	b[ihl+16], b[ihl+17] = 0, 0
+	cs = tcpChecksum(b[:ihl], b[ihl:])
+	b[ihl+16] = byte(cs >> 8)
+	b[ihl+17] = byte(cs & 0xff)
+	return nil
+}
+
 // ipChecksum is the standard 16-bit one's-complement sum over the IP
 // header (RFC 791). The "checksum field" must be zeroed before calling.
 func ipChecksum(hdr []byte) uint16 {
@@ -121,3 +199,233 @@ func tcpChecksum(ipHdr, tcpSeg []byte) uint16 {
 	}
 	return ^uint16(sum)
 }
+
+// IPv4UDPInfo is what we extract from a raw IPv4+UDP packet for our
+// per-flow mapping table.
+type IPv4UDPInfo struct {
+	SrcIP, DstIP     net.IP
+	SrcPort, DstPort uint16
+	IHL              int // IPv4 header length in bytes
+	UDPLen           uint16
+}
+
+// ParseIPv4UDP reads the IPv4 + UDP header pair out of an outbound
+// captured packet and returns the addressing info. Does NOT mutate
+// the buffer.
+//
+// Errors when:
+//   - buffer too short to contain a full IPv4+UDP header (28 bytes)
+//   - IP version is not 4
+//   - IP protocol is not 17 (UDP)
+func ParseIPv4UDP(b []byte) (*IPv4UDPInfo, error) {
+	if len(b) < 28 {
+		return nil, errors.New("packet shorter than IPv4+UDP minimum")
+	}
+	if b[0]>>4 != 4 {
+		return nil, errors.New("not IPv4")
+	}
+	ihl := int(b[0]&0x0f) * 4
+	if ihl < 20 || len(b) < ihl+8 {
+		return nil, errors.New("IPv4 IHL invalid or buffer truncated")
+	}
+	if b[9] != 17 {
+		return nil, errors.New("not UDP")
+	}
+	src := net.IPv4(b[12], b[13], b[14], b[15])
+	dst := net.IPv4(b[16], b[17], b[18], b[19])
+	srcPort := binary.BigEndian.Uint16(b[ihl : ihl+2])
+	dstPort := binary.BigEndian.Uint16(b[ihl+2 : ihl+4])
+	udpLen := binary.BigEndian.Uint16(b[ihl+4 : ihl+6])
+	return &IPv4UDPInfo{
+		SrcIP:   src,
+		DstIP:   dst,
+		SrcPort: srcPort,
+		DstPort: dstPort,
+		IHL:     ihl,
+		UDPLen:  udpLen,
+	}, nil
+}
+
+// SwapUDPAndSetDstPort applies the canonical streamdump-style swap to
+// a UDP packet: swap IPv4 src/dst, set UDP dst port to newDstPort.
+// Keeps the original UDP src port. Recomputes IP and UDP checksums.
+//
+// (For UDP, swap+reinject is generally NOT used by drover — the
+// engine's diverterLoop "consumes" target UDP packets and forwards
+// them through the SOCKS5 UDP relay directly. This helper is here for
+// completeness/symmetry with the TCP swap helpers and for tests.)
+func SwapUDPAndSetDstPort(b []byte, newDstPort uint16) error {
+	if _, err := ParseIPv4UDP(b); err != nil {
+		return err
+	}
+	ihl := int(b[0]&0x0f) * 4
+
+	// Swap src ↔ dst IPv4
+	var src, dst [4]byte
+	copy(src[:], b[12:16])
+	copy(dst[:], b[16:20])
+	copy(b[12:16], dst[:])
+	copy(b[16:20], src[:])
+
+	// Set UDP dst port
+	binary.BigEndian.PutUint16(b[ihl+2:ihl+4], newDstPort)
+
+	// Recompute IP checksum
+	b[10], b[11] = 0, 0
+	cs := ipChecksum(b[:ihl])
+	b[10] = byte(cs >> 8)
+	b[11] = byte(cs & 0xff)
+
+	// Recompute UDP checksum (offset ihl+6,ihl+7 inside UDP header)
+	udpLen := int(binary.BigEndian.Uint16(b[ihl+4 : ihl+6]))
+	if ihl+udpLen > len(b) {
+		udpLen = len(b) - ihl
+	}
+	b[ihl+6], b[ihl+7] = 0, 0
+	cs = udpChecksum(b[:ihl], b[ihl:ihl+udpLen])
+	// Zero is "no checksum" in IPv4 UDP. RFC 768 says when the
+	// computed checksum is zero, transmit it as 0xFFFF instead.
+	if cs == 0 {
+		cs = 0xFFFF
+	}
+	b[ihl+6] = byte(cs >> 8)
+	b[ihl+7] = byte(cs & 0xff)
+	return nil
+}
+
+// SwapUDPAndSetSrcPort applies the canonical streamdump-style return-
+// path swap to a UDP packet: swap IPv4 src/dst, set UDP src port to
+// newSrcPort (the original target port the client expects to see).
+// Recomputes IP and UDP checksums. (Symmetric counterpart to the TCP
+// helper; not currently used by the engine for the same reason as
+// SwapUDPAndSetDstPort, but exists for tests/parity.)
+func SwapUDPAndSetSrcPort(b []byte, newSrcPort uint16) error {
+	if _, err := ParseIPv4UDP(b); err != nil {
+		return err
+	}
+	ihl := int(b[0]&0x0f) * 4
+
+	// Swap src ↔ dst IPv4
+	var src, dst [4]byte
+	copy(src[:], b[12:16])
+	copy(dst[:], b[16:20])
+	copy(b[12:16], dst[:])
+	copy(b[16:20], src[:])
+
+	// Set UDP src port
+	binary.BigEndian.PutUint16(b[ihl:ihl+2], newSrcPort)
+
+	// Recompute IP checksum
+	b[10], b[11] = 0, 0
+	cs := ipChecksum(b[:ihl])
+	b[10] = byte(cs >> 8)
+	b[11] = byte(cs & 0xff)
+
+	// Recompute UDP checksum
+	udpLen := int(binary.BigEndian.Uint16(b[ihl+4 : ihl+6]))
+	if ihl+udpLen > len(b) {
+		udpLen = len(b) - ihl
+	}
+	b[ihl+6], b[ihl+7] = 0, 0
+	cs = udpChecksum(b[:ihl], b[ihl:ihl+udpLen])
+	if cs == 0 {
+		cs = 0xFFFF
+	}
+	b[ihl+6] = byte(cs >> 8)
+	b[ihl+7] = byte(cs & 0xff)
+	return nil
+}
+
+// BuildIPv4UDPInbound fabricates an IPv4+UDP packet for reinjection
+// as inbound (return path from upstream relay → Discord). Used by the
+// UDPProxy after the SOCKS5 relay sends back a response: we construct
+// a synthetic packet that looks like remote_endpoint → local_IP and
+// reinject it via WinDivert with addr.Outbound=0.
+//
+//	src      → original Discord destination (the UDP server)
+//	dst      → local LAN IP we bound on
+//	srcPort  → original destination port (e.g. 50007)
+//	dstPort  → Discord's ephemeral src port (so the kernel matches the
+//	           connect()-bound socket)
+//
+// The returned slice owns its own backing storage; callers may pass
+// it directly to (*Handle).Send.
+func BuildIPv4UDPInbound(srcIP, dstIP net.IP, srcPort, dstPort uint16, payload []byte) ([]byte, error) {
+	src := srcIP.To4()
+	dst := dstIP.To4()
+	if src == nil || dst == nil {
+		return nil, errors.New("BuildIPv4UDPInbound: src/dst must be IPv4")
+	}
+	if len(payload)+28 > 0xFFFF {
+		return nil, errors.New("BuildIPv4UDPInbound: payload too large for IPv4 datagram")
+	}
+
+	totalLen := 20 + 8 + len(payload)
+	buf := make([]byte, totalLen)
+
+	// IPv4 header (20 bytes, IHL=5, no options)
+	buf[0] = 0x45 // version=4, IHL=5
+	buf[1] = 0x00 // DSCP/ECN
+	binary.BigEndian.PutUint16(buf[2:4], uint16(totalLen))
+	binary.BigEndian.PutUint16(buf[4:6], 0) // ID
+	binary.BigEndian.PutUint16(buf[6:8], 0) // flags + frag
+	buf[8] = 64                              // TTL
+	buf[9] = 17                              // protocol = UDP
+	// checksum at [10..11] left zero for now
+	copy(buf[12:16], src)
+	copy(buf[16:20], dst)
+
+	// UDP header (8 bytes)
+	binary.BigEndian.PutUint16(buf[20:22], srcPort)
+	binary.BigEndian.PutUint16(buf[22:24], dstPort)
+	binary.BigEndian.PutUint16(buf[24:26], uint16(8+len(payload))) // UDP length
+	// UDP checksum at [26..27] left zero for now
+
+	// Payload
+	copy(buf[28:], payload)
+
+	// Recompute IP checksum
+	cs := ipChecksum(buf[:20])
+	buf[10] = byte(cs >> 8)
+	buf[11] = byte(cs & 0xff)
+
+	// Recompute UDP checksum (over pseudo-header + UDP segment)
+	cs = udpChecksum(buf[:20], buf[20:])
+	if cs == 0 {
+		cs = 0xFFFF // RFC 768: 0 means "checksum disabled", send 0xFFFF instead
+	}
+	buf[26] = byte(cs >> 8)
+	buf[27] = byte(cs & 0xff)
+
+	return buf, nil
+}
+
+// udpChecksum implements the RFC 768 pseudo-header checksum for IPv4
+// UDP. ipHdr must include src+dst addresses; udpSeg is the full UDP
+// header + payload (UDP "length" field already set; checksum field
+// inside udpSeg must be zeroed).
+//
+// IPv4 UDP checksum is technically OPTIONAL — a sender may transmit
+// 0 to indicate "no checksum". We always compute one since most
+// modern stacks (and Discord) expect a valid checksum.
+func udpChecksum(ipHdr, udpSeg []byte) uint16 {
+	var sum uint32
+	// Pseudo-header: src(4) dst(4) zero(1) proto(1) udp_len(2)
+	for i := 12; i <= 18; i += 2 {
+		sum += uint32(ipHdr[i])<<8 | uint32(ipHdr[i+1])
+	}
+	sum += uint32(17) // UDP protocol
+	udpLen := uint32(len(udpSeg))
+	sum += udpLen
+	// UDP segment (header + payload)
+	for i := 0; i+1 < len(udpSeg); i += 2 {
+		sum += uint32(udpSeg[i])<<8 | uint32(udpSeg[i+1])
+	}
+	if len(udpSeg)%2 == 1 {
+		sum += uint32(udpSeg[len(udpSeg)-1]) << 8
+	}
+	for sum>>16 != 0 {
+		sum = (sum & 0xffff) + (sum >> 16)
+	}
+	return ^uint16(sum)
+}
@@ -1,6 +1,7 @@
 package divert

 import (
+	"encoding/binary"
 	"net"
 	"testing"

@@ -112,3 +113,148 @@ func TestParseIPv4TCP_Errors(t *testing.T) {
 		})
 	}
 }
+
+// helloUDP is a minimum well-formed IPv4 + UDP datagram:
+//
+//	src=10.0.0.1:54321 dst=1.2.3.4:443 payload=4 bytes ABCD
+//
+// Total length: 20(IP) + 8(UDP) + 4(payload) = 32 bytes.
+var helloUDP = []byte{
+	// IPv4 header (20 bytes, IHL=5)
+	0x45, 0x00, 0x00, 0x20, 0xab, 0xcd, 0x40, 0x00, 0x40, 0x11, // proto=17 (UDP)
+	0x00, 0x00, // checksum placeholder
+	0x0a, 0x00, 0x00, 0x01, // src 10.0.0.1
+	0x01, 0x02, 0x03, 0x04, // dst 1.2.3.4
+	// UDP header (8 bytes)
+	0xd4, 0x31, 0x01, 0xbb, // src=54321 dst=443
+	0x00, 0x0c,             // length=12 (UDP header + 4 payload)
+	0x00, 0x00,             // checksum placeholder
+	// Payload (4 bytes)
+	'A', 'B', 'C', 'D',
+}
+
+func fillUDPTestChecksums(b []byte) {
+	// IP checksum
+	b[10], b[11] = 0, 0
+	cs := ipChecksum(b[:20])
+	b[10] = byte(cs >> 8)
+	b[11] = byte(cs & 0xff)
+	// UDP checksum (covers UDP header + payload + pseudo-header)
+	udpLen := int(binary.BigEndian.Uint16(b[24:26]))
+	b[26], b[27] = 0, 0
+	cs = udpChecksum(b[:20], b[20:20+udpLen])
+	if cs == 0 {
+		cs = 0xFFFF
+	}
+	b[26] = byte(cs >> 8)
+	b[27] = byte(cs & 0xff)
+}
+
+func TestParseIPv4UDP_Roundtrip(t *testing.T) {
+	pkt := make([]byte, len(helloUDP))
+	copy(pkt, helloUDP)
+	fillUDPTestChecksums(pkt)
+
+	p, err := ParseIPv4UDP(pkt)
+	require.NoError(t, err)
+
+	assert.Equal(t, "10.0.0.1", p.SrcIP.String())
+	assert.Equal(t, "1.2.3.4", p.DstIP.String())
+	assert.Equal(t, uint16(54321), p.SrcPort)
+	assert.Equal(t, uint16(443), p.DstPort)
+	assert.Equal(t, 20, p.IHL)
+	assert.Equal(t, uint16(12), p.UDPLen)
+}
+
+func TestParseIPv4UDP_Errors(t *testing.T) {
+	cases := []struct {
+		name string
+		b    []byte
+	}{
+		{"too_short", []byte{0x45}},
+		{"not_ipv4", []byte{0x60, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}},
+		{"not_udp", []byte{0x45, 0, 0, 20, 0, 0, 0, 0, 0, 6, /* TCP */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}},
+	}
+	for _, c := range cases {
+		t.Run(c.name, func(t *testing.T) {
+			_, err := ParseIPv4UDP(c.b)
+			assert.Error(t, err)
+		})
+	}
+}
+
+func TestSwapUDPAndSetDstPort(t *testing.T) {
+	pkt := make([]byte, len(helloUDP))
+	copy(pkt, helloUDP)
+	fillUDPTestChecksums(pkt)
+
+	require.NoError(t, SwapUDPAndSetDstPort(pkt, 8080))
+
+	p, err := ParseIPv4UDP(pkt)
+	require.NoError(t, err)
+	assert.Equal(t, "1.2.3.4", p.SrcIP.String(), "src should be original dst after swap")
+	assert.Equal(t, "10.0.0.1", p.DstIP.String(), "dst should be original src after swap")
+	assert.Equal(t, uint16(54321), p.SrcPort, "src port unchanged")
+	assert.Equal(t, uint16(8080), p.DstPort, "dst port set to new value")
+
+	// Validate IP checksum recomputed
+	ipCs := uint16(pkt[10])<<8 | uint16(pkt[11])
+	pkt[10], pkt[11] = 0, 0
+	expIP := ipChecksum(pkt[:20])
+	assert.Equal(t, expIP, ipCs, "IP checksum mismatch")
+}
+
+func TestSwapUDPAndSetSrcPort(t *testing.T) {
+	pkt := make([]byte, len(helloUDP))
+	copy(pkt, helloUDP)
+	fillUDPTestChecksums(pkt)
+
+	require.NoError(t, SwapUDPAndSetSrcPort(pkt, 50007))
+
+	p, err := ParseIPv4UDP(pkt)
+	require.NoError(t, err)
+	assert.Equal(t, "1.2.3.4", p.SrcIP.String())
+	assert.Equal(t, "10.0.0.1", p.DstIP.String())
+	assert.Equal(t, uint16(50007), p.SrcPort, "src port set to new value")
+	assert.Equal(t, uint16(443), p.DstPort, "dst port unchanged")
+}
+
+func TestBuildIPv4UDPInbound(t *testing.T) {
+	src := net.IPv4(140, 82, 121, 4)        // GitHub IP, just for variety
+	dst := net.IPv4(192, 168, 1, 50)        // local LAN
+	payload := []byte("hello voice")
+
+	pkt, err := BuildIPv4UDPInbound(src, dst, 50007, 50100, payload)
+	require.NoError(t, err)
+
+	// Total length: 20+8+11 = 39
+	assert.Len(t, pkt, 39)
+
+	// Re-parse and verify fields
+	p, err := ParseIPv4UDP(pkt)
+	require.NoError(t, err)
+	assert.Equal(t, "140.82.121.4", p.SrcIP.String())
+	assert.Equal(t, "192.168.1.50", p.DstIP.String())
+	assert.Equal(t, uint16(50007), p.SrcPort)
+	assert.Equal(t, uint16(50100), p.DstPort)
+	assert.Equal(t, uint16(8+len(payload)), p.UDPLen)
+
+	// Payload after headers
+	assert.Equal(t, payload, pkt[28:])
+
+	// IP checksum valid: clearing + recomputing should match
+	ipCs := uint16(pkt[10])<<8 | uint16(pkt[11])
+	pkt[10], pkt[11] = 0, 0
+	expIP := ipChecksum(pkt[:20])
+	assert.Equal(t, expIP, ipCs, "IP checksum should be valid")
+
+	// UDP checksum valid (and non-zero)
+	udpCs := uint16(pkt[26])<<8 | uint16(pkt[27])
+	assert.NotEqual(t, uint16(0), udpCs, "UDP checksum should be non-zero (RFC 768 trick)")
+}
+
+func TestBuildIPv4UDPInbound_NotIPv4(t *testing.T) {
+	v6 := net.ParseIP("::1")
+	_, err := BuildIPv4UDPInbound(v6, net.IPv4(1, 2, 3, 4), 1, 2, []byte("x"))
+	assert.Error(t, err)
+}