fix(ws): stabilization try 4

This commit is contained in:
devthejo 2026-01-17 22:58:40 +01:00
parent 6e717077f9
commit f7656beb1a
No known key found for this signature in database
GPG key ID: 00CCA7A92B1D5351
6 changed files with 129 additions and 92 deletions

View file

@ -14,6 +14,7 @@ const watchdogLogger = createLogger({
const HEARTBEAT_STALE_MS = 45_000;
const CHECK_EVERY_MS = 10_000;
const MIN_RESTART_INTERVAL_MS = 30_000;
const CONNECT_STALE_MS = 20_000;
export default function useWsWatchdog({ enabled = true } = {}) {
const {
@ -32,6 +33,7 @@ export default function useWsWatchdog({ enabled = true } = {}) {
const wsLastHeartbeatDateRef = useRef(wsLastHeartbeatDate);
const appStateRef = useRef(AppState.currentState);
const wsLastRecoveryDateRef = useRef(wsLastRecoveryDate);
const connectBeganAtRef = useRef(null);
useEffect(() => {
wsLastHeartbeatDateRef.current = wsLastHeartbeatDate;
@ -58,7 +60,68 @@ export default function useWsWatchdog({ enabled = true } = {}) {
const interval = setInterval(() => {
if (appStateRef.current !== "active") return;
if (!hasInternetConnection) return;
if (!wsConnected) return;
// If the app has internet but WS is not connected for too long,
// proactively restart the WS transport.
if (!wsConnected) {
if (!connectBeganAtRef.current) {
connectBeganAtRef.current = Date.now();
}
const age = Date.now() - connectBeganAtRef.current;
if (age < CONNECT_STALE_MS) return;
const now = Date.now();
if (now - lastRestartRef.current < MIN_RESTART_INTERVAL_MS) return;
// Global recovery throttle: avoid double restarts from multiple sources.
const lastRecovery = wsLastRecoveryDateRef.current
? Date.parse(wsLastRecoveryDateRef.current)
: NaN;
if (Number.isFinite(lastRecovery)) {
const recoveryAge = now - lastRecovery;
if (recoveryAge < MIN_RESTART_INTERVAL_MS) return;
}
lastRestartRef.current = now;
networkActions.WSRecoveryTouch();
watchdogLogger.warn(
"WS not connected while internet is up, restarting",
{
ageMs: age,
},
);
try {
Sentry.addBreadcrumb({
category: "websocket",
level: "warning",
message: "ws watchdog not connected",
data: { ageMs: age },
});
} catch (_e) {
// ignore
}
try {
network.apolloClient?.restartWS?.();
} catch (error) {
watchdogLogger.error("WS restart failed", { error });
try {
Sentry.captureException(error, {
tags: { context: "ws-watchdog-restart-failed" },
});
} catch (_e) {
// ignore
}
}
return;
}
// Reset connect timer once connected.
connectBeganAtRef.current = null;
if (!wsLastHeartbeatDateRef.current) return;
const last = Date.parse(wsLastHeartbeatDateRef.current);

View file

@ -38,9 +38,8 @@ export function createRestartableClient(options) {
},
});
return {
...client,
restart: () => {
// Important: keep the original `graphql-ws` client object identity.
client.restart = () => {
const now = Date.now();
if (now - lastRestartTime < 2000) {
// Ignore restart request if less than 2 seconds since last restart
@ -48,6 +47,7 @@ export function createRestartableClient(options) {
}
lastRestartTime = now;
restart();
},
};
return client;
}

View file

@ -15,80 +15,59 @@ export default function createWsLink({ store, GRAPHQL_WS_URL }) {
const PING_INTERVAL = 10_000;
const PING_TIMEOUT = 5_000;
const MAX_RECONNECT_DELAY = 30000; // 30 seconds max delay
// const MAX_RECONNECT_ATTEMPTS = 5; // Limit reconnection attempts
const MAX_RECONNECT_ATTEMPTS = Infinity; // Limit reconnection attempts
// Graceful degradation: after prolonged WS reconnecting, surface app-level recovery
// via the existing reload mechanism (NetworkProviders will recreate Apollo).
const MAX_RECONNECT_TIME_MS = 5 * 60 * 1000;
let firstFailureAt = null;
let reconnectAttempts = 0;
function getReconnectDelay() {
// Exponential backoff with max delay
const delay = Math.min(
1000 * Math.pow(2, reconnectAttempts),
MAX_RECONNECT_DELAY,
);
return delay * (0.5 + Math.random()); // Add jitter
}
let reconnectTimeout;
function scheduleReconnect() {
// Clear any existing reconnect attempts
clearTimeout(reconnectTimeout);
// Schedule a single reconnect attempt with exponential backoff
reconnectTimeout = setTimeout(() => {
try {
wsLogger.debug("Attempting scheduled reconnect", {
attempt: reconnectAttempts + 1,
delay: getReconnectDelay(),
});
wsLink.client.restart();
} catch (error) {
wsLogger.error("Failed to reconnect", { error });
}
reconnectTimeout = null;
}, getReconnectDelay());
}
function cancelReconnect() {
if (reconnectTimeout) {
wsLogger.debug("Canceling scheduled reconnect");
clearTimeout(reconnectTimeout);
reconnectTimeout = null;
}
}
// Let `graphql-ws` manage reconnection.
// Our own reconnect scheduling was causing overlapping connection attempts
// and intermittent RN Android `client is null` (send called on already-closed native socket).
const MAX_RECONNECT_ATTEMPTS = Infinity;
const wsLink = new WebSocketLink({
url: GRAPHQL_WS_URL,
connectionParams: () => {
const { userToken } = getAuthState();
const headers = {
"Sec-WebSocket-Protocol": "graphql-transport-ws",
};
const headers = {};
// Important: only attach Authorization when we have a real token.
// Sending `Authorization: Bearer undefined` breaks WS auth on some backends.
if (userToken) {
setBearerHeader(headers, userToken);
} else {
wsLogger.warn("WS connectionParams without userToken", {
url: GRAPHQL_WS_URL,
});
}
// Note: Sec-WebSocket-Protocol is negotiated at the handshake level.
// Putting it in `connection_init.payload.headers` is ineffective and can
// confuse server-side auth header parsing.
return { headers };
},
// Do not use lazy sockets: some RN Android builds intermittently hit
// WebSocketModule send() with null client when the socket is created/
// torn down rapidly around app-state transitions.
lazy: false,
keepAlive: PING_INTERVAL,
retryAttempts: MAX_RECONNECT_ATTEMPTS,
retryWait: async () => {
const delay = getReconnectDelay();
// `graphql-ws` passes the retry count to `retryWait(retries)`.
// Use a jittered exponential backoff, capped.
const retries = arguments[0] ?? 0;
const base = Math.min(1000 * Math.pow(2, retries), 30_000);
const delay = base * (0.5 + Math.random());
await new Promise((resolve) => setTimeout(resolve, delay));
},
shouldRetry: () => true,
lazy: true,
on: {
opened: () => {
wsLogger.info("WebSocket opened", {
url: GRAPHQL_WS_URL,
});
},
connected: (socket) => {
wsLogger.info("WebSocket connected");
activeSocket = socket;
reconnectAttempts = 0; // Reset attempts on successful connection
firstFailureAt = null;
networkActions.WSConnected();
networkActions.WSTouch();
cancelReconnect(); // Cancel any pending reconnects
// Clear any lingering ping timeouts
if (pingTimeout) {
@ -104,38 +83,12 @@ export default function createWsLink({ store, GRAPHQL_WS_URL }) {
});
networkActions.WSClosed();
if (!firstFailureAt) {
firstFailureAt = Date.now();
}
// Clear socket and timeouts
activeSocket = undefined;
if (pingTimeout) {
clearTimeout(pingTimeout);
pingTimeout = null;
}
// Schedule reconnect unless explicitly closed (1000) or going away (1001)
if (event.code !== 1000 && event.code !== 1001) {
const reconnectAge = Date.now() - firstFailureAt;
if (reconnectAge >= MAX_RECONNECT_TIME_MS) {
wsLogger.warn(
"WebSocket reconnecting too long, triggering app reload",
{
reconnectAgeMs: reconnectAge,
reconnectAttempts,
lastCloseCode: event.code,
},
);
networkActions.triggerReload();
return;
}
reconnectAttempts++;
scheduleReconnect();
} else {
wsLogger.debug("Clean WebSocket closure - not reconnecting");
}
},
ping: (received) => {
// wsLogger.debug("WebSocket ping", { received });
@ -169,6 +122,12 @@ export default function createWsLink({ store, GRAPHQL_WS_URL }) {
clearTimeout(pingTimeout); // pong is received, clear connection close timeout
}
},
error: (error) => {
wsLogger.error("WebSocket error", {
message: error?.message,
url: GRAPHQL_WS_URL,
});
},
},
});

View file

@ -16,7 +16,13 @@ export default withConnectivity(function Params() {
variables: {
deviceId,
},
skip: !deviceId,
});
if (!deviceId) {
return <Loader />;
}
if (loading) {
return <Loader />;
}

View file

@ -41,6 +41,7 @@ export default withConnectivity(function Profile({ navigation, route }) {
variables: {
userId,
},
skip: !userId,
},
);
@ -113,6 +114,10 @@ export default withConnectivity(function Profile({ navigation, route }) {
});
}, [navigation]);
if (!userId) {
return <Loader />;
}
if (loading) {
return <Loader />;
}

View file

@ -31,9 +31,13 @@ export default withConnectivity(function Relatives({ navigation, route }) {
variables: {
userId,
},
skip: !userId,
});
const { loading, error, data } = relativesSubscription;
if (!userId) {
return <Loader />;
}
if (loading) {
return <Loader />;
}