From f7656beb1aa4fc4ed504876252ac4ecc5dae4c1e Mon Sep 17 00:00:00 2001 From: devthejo Date: Sat, 17 Jan 2026 22:58:40 +0100 Subject: [PATCH] fix(ws): stabilization try 4 --- src/hooks/useWsWatchdog.js | 65 ++++++++++++++++++- src/network/graphqlWs.js | 22 +++---- src/network/wsLink.js | 119 +++++++++++----------------------- src/scenes/Params/index.js | 6 ++ src/scenes/Profile/index.js | 5 ++ src/scenes/Relatives/index.js | 4 ++ 6 files changed, 129 insertions(+), 92 deletions(-) diff --git a/src/hooks/useWsWatchdog.js b/src/hooks/useWsWatchdog.js index 9beeb01..e75a125 100644 --- a/src/hooks/useWsWatchdog.js +++ b/src/hooks/useWsWatchdog.js @@ -14,6 +14,7 @@ const watchdogLogger = createLogger({ const HEARTBEAT_STALE_MS = 45_000; const CHECK_EVERY_MS = 10_000; const MIN_RESTART_INTERVAL_MS = 30_000; +const CONNECT_STALE_MS = 20_000; export default function useWsWatchdog({ enabled = true } = {}) { const { @@ -32,6 +33,7 @@ export default function useWsWatchdog({ enabled = true } = {}) { const wsLastHeartbeatDateRef = useRef(wsLastHeartbeatDate); const appStateRef = useRef(AppState.currentState); const wsLastRecoveryDateRef = useRef(wsLastRecoveryDate); + const connectBeganAtRef = useRef(null); useEffect(() => { wsLastHeartbeatDateRef.current = wsLastHeartbeatDate; @@ -58,7 +60,68 @@ export default function useWsWatchdog({ enabled = true } = {}) { const interval = setInterval(() => { if (appStateRef.current !== "active") return; if (!hasInternetConnection) return; - if (!wsConnected) return; + + // If the app has internet but WS is not connected for too long, + // proactively restart the WS transport. + if (!wsConnected) { + if (!connectBeganAtRef.current) { + connectBeganAtRef.current = Date.now(); + } + + const age = Date.now() - connectBeganAtRef.current; + if (age < CONNECT_STALE_MS) return; + + const now = Date.now(); + if (now - lastRestartRef.current < MIN_RESTART_INTERVAL_MS) return; + + // Global recovery throttle: avoid double restarts from multiple sources. + const lastRecovery = wsLastRecoveryDateRef.current + ? Date.parse(wsLastRecoveryDateRef.current) + : NaN; + if (Number.isFinite(lastRecovery)) { + const recoveryAge = now - lastRecovery; + if (recoveryAge < MIN_RESTART_INTERVAL_MS) return; + } + + lastRestartRef.current = now; + networkActions.WSRecoveryTouch(); + + watchdogLogger.warn( + "WS not connected while internet is up, restarting", + { + ageMs: age, + }, + ); + + try { + Sentry.addBreadcrumb({ + category: "websocket", + level: "warning", + message: "ws watchdog not connected", + data: { ageMs: age }, + }); + } catch (_e) { + // ignore + } + + try { + network.apolloClient?.restartWS?.(); + } catch (error) { + watchdogLogger.error("WS restart failed", { error }); + try { + Sentry.captureException(error, { + tags: { context: "ws-watchdog-restart-failed" }, + }); + } catch (_e) { + // ignore + } + } + + return; + } + + // Reset connect timer once connected. + connectBeganAtRef.current = null; if (!wsLastHeartbeatDateRef.current) return; const last = Date.parse(wsLastHeartbeatDateRef.current); diff --git a/src/network/graphqlWs.js b/src/network/graphqlWs.js index 63498f1..44dc5b2 100644 --- a/src/network/graphqlWs.js +++ b/src/network/graphqlWs.js @@ -38,16 +38,16 @@ export function createRestartableClient(options) { }, }); - return { - ...client, - restart: () => { - const now = Date.now(); - if (now - lastRestartTime < 2000) { - // Ignore restart request if less than 2 seconds since last restart - return; - } - lastRestartTime = now; - restart(); - }, + // Important: keep the original `graphql-ws` client object identity. + client.restart = () => { + const now = Date.now(); + if (now - lastRestartTime < 2000) { + // Ignore restart request if less than 2 seconds since last restart + return; + } + lastRestartTime = now; + restart(); }; + + return client; } diff --git a/src/network/wsLink.js b/src/network/wsLink.js index bc7abc5..24c7172 100644 --- a/src/network/wsLink.js +++ b/src/network/wsLink.js @@ -15,80 +15,59 @@ export default function createWsLink({ store, GRAPHQL_WS_URL }) { const PING_INTERVAL = 10_000; const PING_TIMEOUT = 5_000; - const MAX_RECONNECT_DELAY = 30000; // 30 seconds max delay - // const MAX_RECONNECT_ATTEMPTS = 5; // Limit reconnection attempts - const MAX_RECONNECT_ATTEMPTS = Infinity; // Limit reconnection attempts - // Graceful degradation: after prolonged WS reconnecting, surface app-level recovery - // via the existing reload mechanism (NetworkProviders will recreate Apollo). - const MAX_RECONNECT_TIME_MS = 5 * 60 * 1000; - let firstFailureAt = null; - - let reconnectAttempts = 0; - function getReconnectDelay() { - // Exponential backoff with max delay - const delay = Math.min( - 1000 * Math.pow(2, reconnectAttempts), - MAX_RECONNECT_DELAY, - ); - return delay * (0.5 + Math.random()); // Add jitter - } - - let reconnectTimeout; - function scheduleReconnect() { - // Clear any existing reconnect attempts - clearTimeout(reconnectTimeout); - - // Schedule a single reconnect attempt with exponential backoff - reconnectTimeout = setTimeout(() => { - try { - wsLogger.debug("Attempting scheduled reconnect", { - attempt: reconnectAttempts + 1, - delay: getReconnectDelay(), - }); - wsLink.client.restart(); - } catch (error) { - wsLogger.error("Failed to reconnect", { error }); - } - reconnectTimeout = null; - }, getReconnectDelay()); - } - - function cancelReconnect() { - if (reconnectTimeout) { - wsLogger.debug("Canceling scheduled reconnect"); - clearTimeout(reconnectTimeout); - reconnectTimeout = null; - } - } + // Let `graphql-ws` manage reconnection. + // Our own reconnect scheduling was causing overlapping connection attempts + // and intermittent RN Android `client is null` (send called on already-closed native socket). + const MAX_RECONNECT_ATTEMPTS = Infinity; const wsLink = new WebSocketLink({ url: GRAPHQL_WS_URL, connectionParams: () => { const { userToken } = getAuthState(); - const headers = { - "Sec-WebSocket-Protocol": "graphql-transport-ws", - }; - setBearerHeader(headers, userToken); + const headers = {}; + + // Important: only attach Authorization when we have a real token. + // Sending `Authorization: Bearer undefined` breaks WS auth on some backends. + if (userToken) { + setBearerHeader(headers, userToken); + } else { + wsLogger.warn("WS connectionParams without userToken", { + url: GRAPHQL_WS_URL, + }); + } + + // Note: Sec-WebSocket-Protocol is negotiated at the handshake level. + // Putting it in `connection_init.payload.headers` is ineffective and can + // confuse server-side auth header parsing. return { headers }; }, + // Do not use lazy sockets: some RN Android builds intermittently hit + // WebSocketModule send() with null client when the socket is created/ + // torn down rapidly around app-state transitions. + lazy: false, keepAlive: PING_INTERVAL, retryAttempts: MAX_RECONNECT_ATTEMPTS, retryWait: async () => { - const delay = getReconnectDelay(); + // `graphql-ws` passes the retry count to `retryWait(retries)`. + // Use a jittered exponential backoff, capped. + const retries = arguments[0] ?? 0; + const base = Math.min(1000 * Math.pow(2, retries), 30_000); + const delay = base * (0.5 + Math.random()); await new Promise((resolve) => setTimeout(resolve, delay)); }, shouldRetry: () => true, - lazy: true, on: { + opened: () => { + wsLogger.info("WebSocket opened", { + url: GRAPHQL_WS_URL, + }); + }, connected: (socket) => { wsLogger.info("WebSocket connected"); activeSocket = socket; - reconnectAttempts = 0; // Reset attempts on successful connection - firstFailureAt = null; networkActions.WSConnected(); networkActions.WSTouch(); - cancelReconnect(); // Cancel any pending reconnects // Clear any lingering ping timeouts if (pingTimeout) { @@ -104,38 +83,12 @@ export default function createWsLink({ store, GRAPHQL_WS_URL }) { }); networkActions.WSClosed(); - if (!firstFailureAt) { - firstFailureAt = Date.now(); - } - // Clear socket and timeouts activeSocket = undefined; if (pingTimeout) { clearTimeout(pingTimeout); pingTimeout = null; } - - // Schedule reconnect unless explicitly closed (1000) or going away (1001) - if (event.code !== 1000 && event.code !== 1001) { - const reconnectAge = Date.now() - firstFailureAt; - if (reconnectAge >= MAX_RECONNECT_TIME_MS) { - wsLogger.warn( - "WebSocket reconnecting too long, triggering app reload", - { - reconnectAgeMs: reconnectAge, - reconnectAttempts, - lastCloseCode: event.code, - }, - ); - networkActions.triggerReload(); - return; - } - - reconnectAttempts++; - scheduleReconnect(); - } else { - wsLogger.debug("Clean WebSocket closure - not reconnecting"); - } }, ping: (received) => { // wsLogger.debug("WebSocket ping", { received }); @@ -169,6 +122,12 @@ export default function createWsLink({ store, GRAPHQL_WS_URL }) { clearTimeout(pingTimeout); // pong is received, clear connection close timeout } }, + error: (error) => { + wsLogger.error("WebSocket error", { + message: error?.message, + url: GRAPHQL_WS_URL, + }); + }, }, }); diff --git a/src/scenes/Params/index.js b/src/scenes/Params/index.js index 976f105..d3c96be 100644 --- a/src/scenes/Params/index.js +++ b/src/scenes/Params/index.js @@ -16,7 +16,13 @@ export default withConnectivity(function Params() { variables: { deviceId, }, + skip: !deviceId, }); + + if (!deviceId) { + return ; + } + if (loading) { return ; } diff --git a/src/scenes/Profile/index.js b/src/scenes/Profile/index.js index da31fe4..bca57d4 100644 --- a/src/scenes/Profile/index.js +++ b/src/scenes/Profile/index.js @@ -41,6 +41,7 @@ export default withConnectivity(function Profile({ navigation, route }) { variables: { userId, }, + skip: !userId, }, ); @@ -113,6 +114,10 @@ export default withConnectivity(function Profile({ navigation, route }) { }); }, [navigation]); + if (!userId) { + return ; + } + if (loading) { return ; } diff --git a/src/scenes/Relatives/index.js b/src/scenes/Relatives/index.js index 491b337..0d3f1c6 100644 --- a/src/scenes/Relatives/index.js +++ b/src/scenes/Relatives/index.js @@ -31,9 +31,13 @@ export default withConnectivity(function Relatives({ navigation, route }) { variables: { userId, }, + skip: !userId, }); const { loading, error, data } = relativesSubscription; + if (!userId) { + return ; + } if (loading) { return ; }