fix(ws): stabilization try 4
This commit is contained in:
parent
6e717077f9
commit
f7656beb1a
6 changed files with 129 additions and 92 deletions
|
|
@ -14,6 +14,7 @@ const watchdogLogger = createLogger({
|
|||
const HEARTBEAT_STALE_MS = 45_000;
|
||||
const CHECK_EVERY_MS = 10_000;
|
||||
const MIN_RESTART_INTERVAL_MS = 30_000;
|
||||
const CONNECT_STALE_MS = 20_000;
|
||||
|
||||
export default function useWsWatchdog({ enabled = true } = {}) {
|
||||
const {
|
||||
|
|
@ -32,6 +33,7 @@ export default function useWsWatchdog({ enabled = true } = {}) {
|
|||
const wsLastHeartbeatDateRef = useRef(wsLastHeartbeatDate);
|
||||
const appStateRef = useRef(AppState.currentState);
|
||||
const wsLastRecoveryDateRef = useRef(wsLastRecoveryDate);
|
||||
const connectBeganAtRef = useRef(null);
|
||||
|
||||
useEffect(() => {
|
||||
wsLastHeartbeatDateRef.current = wsLastHeartbeatDate;
|
||||
|
|
@ -58,7 +60,68 @@ export default function useWsWatchdog({ enabled = true } = {}) {
|
|||
const interval = setInterval(() => {
|
||||
if (appStateRef.current !== "active") return;
|
||||
if (!hasInternetConnection) return;
|
||||
if (!wsConnected) return;
|
||||
|
||||
// If the app has internet but WS is not connected for too long,
|
||||
// proactively restart the WS transport.
|
||||
if (!wsConnected) {
|
||||
if (!connectBeganAtRef.current) {
|
||||
connectBeganAtRef.current = Date.now();
|
||||
}
|
||||
|
||||
const age = Date.now() - connectBeganAtRef.current;
|
||||
if (age < CONNECT_STALE_MS) return;
|
||||
|
||||
const now = Date.now();
|
||||
if (now - lastRestartRef.current < MIN_RESTART_INTERVAL_MS) return;
|
||||
|
||||
// Global recovery throttle: avoid double restarts from multiple sources.
|
||||
const lastRecovery = wsLastRecoveryDateRef.current
|
||||
? Date.parse(wsLastRecoveryDateRef.current)
|
||||
: NaN;
|
||||
if (Number.isFinite(lastRecovery)) {
|
||||
const recoveryAge = now - lastRecovery;
|
||||
if (recoveryAge < MIN_RESTART_INTERVAL_MS) return;
|
||||
}
|
||||
|
||||
lastRestartRef.current = now;
|
||||
networkActions.WSRecoveryTouch();
|
||||
|
||||
watchdogLogger.warn(
|
||||
"WS not connected while internet is up, restarting",
|
||||
{
|
||||
ageMs: age,
|
||||
},
|
||||
);
|
||||
|
||||
try {
|
||||
Sentry.addBreadcrumb({
|
||||
category: "websocket",
|
||||
level: "warning",
|
||||
message: "ws watchdog not connected",
|
||||
data: { ageMs: age },
|
||||
});
|
||||
} catch (_e) {
|
||||
// ignore
|
||||
}
|
||||
|
||||
try {
|
||||
network.apolloClient?.restartWS?.();
|
||||
} catch (error) {
|
||||
watchdogLogger.error("WS restart failed", { error });
|
||||
try {
|
||||
Sentry.captureException(error, {
|
||||
tags: { context: "ws-watchdog-restart-failed" },
|
||||
});
|
||||
} catch (_e) {
|
||||
// ignore
|
||||
}
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
// Reset connect timer once connected.
|
||||
connectBeganAtRef.current = null;
|
||||
if (!wsLastHeartbeatDateRef.current) return;
|
||||
|
||||
const last = Date.parse(wsLastHeartbeatDateRef.current);
|
||||
|
|
|
|||
|
|
@ -38,9 +38,8 @@ export function createRestartableClient(options) {
|
|||
},
|
||||
});
|
||||
|
||||
return {
|
||||
...client,
|
||||
restart: () => {
|
||||
// Important: keep the original `graphql-ws` client object identity.
|
||||
client.restart = () => {
|
||||
const now = Date.now();
|
||||
if (now - lastRestartTime < 2000) {
|
||||
// Ignore restart request if less than 2 seconds since last restart
|
||||
|
|
@ -48,6 +47,7 @@ export function createRestartableClient(options) {
|
|||
}
|
||||
lastRestartTime = now;
|
||||
restart();
|
||||
},
|
||||
};
|
||||
|
||||
return client;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -15,80 +15,59 @@ export default function createWsLink({ store, GRAPHQL_WS_URL }) {
|
|||
|
||||
const PING_INTERVAL = 10_000;
|
||||
const PING_TIMEOUT = 5_000;
|
||||
const MAX_RECONNECT_DELAY = 30000; // 30 seconds max delay
|
||||
// const MAX_RECONNECT_ATTEMPTS = 5; // Limit reconnection attempts
|
||||
const MAX_RECONNECT_ATTEMPTS = Infinity; // Limit reconnection attempts
|
||||
|
||||
// Graceful degradation: after prolonged WS reconnecting, surface app-level recovery
|
||||
// via the existing reload mechanism (NetworkProviders will recreate Apollo).
|
||||
const MAX_RECONNECT_TIME_MS = 5 * 60 * 1000;
|
||||
let firstFailureAt = null;
|
||||
|
||||
let reconnectAttempts = 0;
|
||||
function getReconnectDelay() {
|
||||
// Exponential backoff with max delay
|
||||
const delay = Math.min(
|
||||
1000 * Math.pow(2, reconnectAttempts),
|
||||
MAX_RECONNECT_DELAY,
|
||||
);
|
||||
return delay * (0.5 + Math.random()); // Add jitter
|
||||
}
|
||||
|
||||
let reconnectTimeout;
|
||||
function scheduleReconnect() {
|
||||
// Clear any existing reconnect attempts
|
||||
clearTimeout(reconnectTimeout);
|
||||
|
||||
// Schedule a single reconnect attempt with exponential backoff
|
||||
reconnectTimeout = setTimeout(() => {
|
||||
try {
|
||||
wsLogger.debug("Attempting scheduled reconnect", {
|
||||
attempt: reconnectAttempts + 1,
|
||||
delay: getReconnectDelay(),
|
||||
});
|
||||
wsLink.client.restart();
|
||||
} catch (error) {
|
||||
wsLogger.error("Failed to reconnect", { error });
|
||||
}
|
||||
reconnectTimeout = null;
|
||||
}, getReconnectDelay());
|
||||
}
|
||||
|
||||
function cancelReconnect() {
|
||||
if (reconnectTimeout) {
|
||||
wsLogger.debug("Canceling scheduled reconnect");
|
||||
clearTimeout(reconnectTimeout);
|
||||
reconnectTimeout = null;
|
||||
}
|
||||
}
|
||||
// Let `graphql-ws` manage reconnection.
|
||||
// Our own reconnect scheduling was causing overlapping connection attempts
|
||||
// and intermittent RN Android `client is null` (send called on already-closed native socket).
|
||||
const MAX_RECONNECT_ATTEMPTS = Infinity;
|
||||
|
||||
const wsLink = new WebSocketLink({
|
||||
url: GRAPHQL_WS_URL,
|
||||
connectionParams: () => {
|
||||
const { userToken } = getAuthState();
|
||||
const headers = {
|
||||
"Sec-WebSocket-Protocol": "graphql-transport-ws",
|
||||
};
|
||||
const headers = {};
|
||||
|
||||
// Important: only attach Authorization when we have a real token.
|
||||
// Sending `Authorization: Bearer undefined` breaks WS auth on some backends.
|
||||
if (userToken) {
|
||||
setBearerHeader(headers, userToken);
|
||||
} else {
|
||||
wsLogger.warn("WS connectionParams without userToken", {
|
||||
url: GRAPHQL_WS_URL,
|
||||
});
|
||||
}
|
||||
|
||||
// Note: Sec-WebSocket-Protocol is negotiated at the handshake level.
|
||||
// Putting it in `connection_init.payload.headers` is ineffective and can
|
||||
// confuse server-side auth header parsing.
|
||||
return { headers };
|
||||
},
|
||||
// Do not use lazy sockets: some RN Android builds intermittently hit
|
||||
// WebSocketModule send() with null client when the socket is created/
|
||||
// torn down rapidly around app-state transitions.
|
||||
lazy: false,
|
||||
keepAlive: PING_INTERVAL,
|
||||
retryAttempts: MAX_RECONNECT_ATTEMPTS,
|
||||
retryWait: async () => {
|
||||
const delay = getReconnectDelay();
|
||||
// `graphql-ws` passes the retry count to `retryWait(retries)`.
|
||||
// Use a jittered exponential backoff, capped.
|
||||
const retries = arguments[0] ?? 0;
|
||||
const base = Math.min(1000 * Math.pow(2, retries), 30_000);
|
||||
const delay = base * (0.5 + Math.random());
|
||||
await new Promise((resolve) => setTimeout(resolve, delay));
|
||||
},
|
||||
shouldRetry: () => true,
|
||||
lazy: true,
|
||||
on: {
|
||||
opened: () => {
|
||||
wsLogger.info("WebSocket opened", {
|
||||
url: GRAPHQL_WS_URL,
|
||||
});
|
||||
},
|
||||
connected: (socket) => {
|
||||
wsLogger.info("WebSocket connected");
|
||||
activeSocket = socket;
|
||||
reconnectAttempts = 0; // Reset attempts on successful connection
|
||||
firstFailureAt = null;
|
||||
networkActions.WSConnected();
|
||||
networkActions.WSTouch();
|
||||
cancelReconnect(); // Cancel any pending reconnects
|
||||
|
||||
// Clear any lingering ping timeouts
|
||||
if (pingTimeout) {
|
||||
|
|
@ -104,38 +83,12 @@ export default function createWsLink({ store, GRAPHQL_WS_URL }) {
|
|||
});
|
||||
networkActions.WSClosed();
|
||||
|
||||
if (!firstFailureAt) {
|
||||
firstFailureAt = Date.now();
|
||||
}
|
||||
|
||||
// Clear socket and timeouts
|
||||
activeSocket = undefined;
|
||||
if (pingTimeout) {
|
||||
clearTimeout(pingTimeout);
|
||||
pingTimeout = null;
|
||||
}
|
||||
|
||||
// Schedule reconnect unless explicitly closed (1000) or going away (1001)
|
||||
if (event.code !== 1000 && event.code !== 1001) {
|
||||
const reconnectAge = Date.now() - firstFailureAt;
|
||||
if (reconnectAge >= MAX_RECONNECT_TIME_MS) {
|
||||
wsLogger.warn(
|
||||
"WebSocket reconnecting too long, triggering app reload",
|
||||
{
|
||||
reconnectAgeMs: reconnectAge,
|
||||
reconnectAttempts,
|
||||
lastCloseCode: event.code,
|
||||
},
|
||||
);
|
||||
networkActions.triggerReload();
|
||||
return;
|
||||
}
|
||||
|
||||
reconnectAttempts++;
|
||||
scheduleReconnect();
|
||||
} else {
|
||||
wsLogger.debug("Clean WebSocket closure - not reconnecting");
|
||||
}
|
||||
},
|
||||
ping: (received) => {
|
||||
// wsLogger.debug("WebSocket ping", { received });
|
||||
|
|
@ -169,6 +122,12 @@ export default function createWsLink({ store, GRAPHQL_WS_URL }) {
|
|||
clearTimeout(pingTimeout); // pong is received, clear connection close timeout
|
||||
}
|
||||
},
|
||||
error: (error) => {
|
||||
wsLogger.error("WebSocket error", {
|
||||
message: error?.message,
|
||||
url: GRAPHQL_WS_URL,
|
||||
});
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
|
|
|
|||
|
|
@ -16,7 +16,13 @@ export default withConnectivity(function Params() {
|
|||
variables: {
|
||||
deviceId,
|
||||
},
|
||||
skip: !deviceId,
|
||||
});
|
||||
|
||||
if (!deviceId) {
|
||||
return <Loader />;
|
||||
}
|
||||
|
||||
if (loading) {
|
||||
return <Loader />;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -41,6 +41,7 @@ export default withConnectivity(function Profile({ navigation, route }) {
|
|||
variables: {
|
||||
userId,
|
||||
},
|
||||
skip: !userId,
|
||||
},
|
||||
);
|
||||
|
||||
|
|
@ -113,6 +114,10 @@ export default withConnectivity(function Profile({ navigation, route }) {
|
|||
});
|
||||
}, [navigation]);
|
||||
|
||||
if (!userId) {
|
||||
return <Loader />;
|
||||
}
|
||||
|
||||
if (loading) {
|
||||
return <Loader />;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -31,9 +31,13 @@ export default withConnectivity(function Relatives({ navigation, route }) {
|
|||
variables: {
|
||||
userId,
|
||||
},
|
||||
skip: !userId,
|
||||
});
|
||||
|
||||
const { loading, error, data } = relativesSubscription;
|
||||
if (!userId) {
|
||||
return <Loader />;
|
||||
}
|
||||
if (loading) {
|
||||
return <Loader />;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue