fix(ws): stabilization try 4
This commit is contained in:
parent
6e717077f9
commit
f7656beb1a
6 changed files with 129 additions and 92 deletions
|
|
@ -14,6 +14,7 @@ const watchdogLogger = createLogger({
|
||||||
const HEARTBEAT_STALE_MS = 45_000;
|
const HEARTBEAT_STALE_MS = 45_000;
|
||||||
const CHECK_EVERY_MS = 10_000;
|
const CHECK_EVERY_MS = 10_000;
|
||||||
const MIN_RESTART_INTERVAL_MS = 30_000;
|
const MIN_RESTART_INTERVAL_MS = 30_000;
|
||||||
|
const CONNECT_STALE_MS = 20_000;
|
||||||
|
|
||||||
export default function useWsWatchdog({ enabled = true } = {}) {
|
export default function useWsWatchdog({ enabled = true } = {}) {
|
||||||
const {
|
const {
|
||||||
|
|
@ -32,6 +33,7 @@ export default function useWsWatchdog({ enabled = true } = {}) {
|
||||||
const wsLastHeartbeatDateRef = useRef(wsLastHeartbeatDate);
|
const wsLastHeartbeatDateRef = useRef(wsLastHeartbeatDate);
|
||||||
const appStateRef = useRef(AppState.currentState);
|
const appStateRef = useRef(AppState.currentState);
|
||||||
const wsLastRecoveryDateRef = useRef(wsLastRecoveryDate);
|
const wsLastRecoveryDateRef = useRef(wsLastRecoveryDate);
|
||||||
|
const connectBeganAtRef = useRef(null);
|
||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
wsLastHeartbeatDateRef.current = wsLastHeartbeatDate;
|
wsLastHeartbeatDateRef.current = wsLastHeartbeatDate;
|
||||||
|
|
@ -58,7 +60,68 @@ export default function useWsWatchdog({ enabled = true } = {}) {
|
||||||
const interval = setInterval(() => {
|
const interval = setInterval(() => {
|
||||||
if (appStateRef.current !== "active") return;
|
if (appStateRef.current !== "active") return;
|
||||||
if (!hasInternetConnection) return;
|
if (!hasInternetConnection) return;
|
||||||
if (!wsConnected) return;
|
|
||||||
|
// If the app has internet but WS is not connected for too long,
|
||||||
|
// proactively restart the WS transport.
|
||||||
|
if (!wsConnected) {
|
||||||
|
if (!connectBeganAtRef.current) {
|
||||||
|
connectBeganAtRef.current = Date.now();
|
||||||
|
}
|
||||||
|
|
||||||
|
const age = Date.now() - connectBeganAtRef.current;
|
||||||
|
if (age < CONNECT_STALE_MS) return;
|
||||||
|
|
||||||
|
const now = Date.now();
|
||||||
|
if (now - lastRestartRef.current < MIN_RESTART_INTERVAL_MS) return;
|
||||||
|
|
||||||
|
// Global recovery throttle: avoid double restarts from multiple sources.
|
||||||
|
const lastRecovery = wsLastRecoveryDateRef.current
|
||||||
|
? Date.parse(wsLastRecoveryDateRef.current)
|
||||||
|
: NaN;
|
||||||
|
if (Number.isFinite(lastRecovery)) {
|
||||||
|
const recoveryAge = now - lastRecovery;
|
||||||
|
if (recoveryAge < MIN_RESTART_INTERVAL_MS) return;
|
||||||
|
}
|
||||||
|
|
||||||
|
lastRestartRef.current = now;
|
||||||
|
networkActions.WSRecoveryTouch();
|
||||||
|
|
||||||
|
watchdogLogger.warn(
|
||||||
|
"WS not connected while internet is up, restarting",
|
||||||
|
{
|
||||||
|
ageMs: age,
|
||||||
|
},
|
||||||
|
);
|
||||||
|
|
||||||
|
try {
|
||||||
|
Sentry.addBreadcrumb({
|
||||||
|
category: "websocket",
|
||||||
|
level: "warning",
|
||||||
|
message: "ws watchdog not connected",
|
||||||
|
data: { ageMs: age },
|
||||||
|
});
|
||||||
|
} catch (_e) {
|
||||||
|
// ignore
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
network.apolloClient?.restartWS?.();
|
||||||
|
} catch (error) {
|
||||||
|
watchdogLogger.error("WS restart failed", { error });
|
||||||
|
try {
|
||||||
|
Sentry.captureException(error, {
|
||||||
|
tags: { context: "ws-watchdog-restart-failed" },
|
||||||
|
});
|
||||||
|
} catch (_e) {
|
||||||
|
// ignore
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reset connect timer once connected.
|
||||||
|
connectBeganAtRef.current = null;
|
||||||
if (!wsLastHeartbeatDateRef.current) return;
|
if (!wsLastHeartbeatDateRef.current) return;
|
||||||
|
|
||||||
const last = Date.parse(wsLastHeartbeatDateRef.current);
|
const last = Date.parse(wsLastHeartbeatDateRef.current);
|
||||||
|
|
|
||||||
|
|
@ -38,9 +38,8 @@ export function createRestartableClient(options) {
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
|
||||||
return {
|
// Important: keep the original `graphql-ws` client object identity.
|
||||||
...client,
|
client.restart = () => {
|
||||||
restart: () => {
|
|
||||||
const now = Date.now();
|
const now = Date.now();
|
||||||
if (now - lastRestartTime < 2000) {
|
if (now - lastRestartTime < 2000) {
|
||||||
// Ignore restart request if less than 2 seconds since last restart
|
// Ignore restart request if less than 2 seconds since last restart
|
||||||
|
|
@ -48,6 +47,7 @@ export function createRestartableClient(options) {
|
||||||
}
|
}
|
||||||
lastRestartTime = now;
|
lastRestartTime = now;
|
||||||
restart();
|
restart();
|
||||||
},
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
return client;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -15,80 +15,59 @@ export default function createWsLink({ store, GRAPHQL_WS_URL }) {
|
||||||
|
|
||||||
const PING_INTERVAL = 10_000;
|
const PING_INTERVAL = 10_000;
|
||||||
const PING_TIMEOUT = 5_000;
|
const PING_TIMEOUT = 5_000;
|
||||||
const MAX_RECONNECT_DELAY = 30000; // 30 seconds max delay
|
|
||||||
// const MAX_RECONNECT_ATTEMPTS = 5; // Limit reconnection attempts
|
|
||||||
const MAX_RECONNECT_ATTEMPTS = Infinity; // Limit reconnection attempts
|
|
||||||
|
|
||||||
// Graceful degradation: after prolonged WS reconnecting, surface app-level recovery
|
// Let `graphql-ws` manage reconnection.
|
||||||
// via the existing reload mechanism (NetworkProviders will recreate Apollo).
|
// Our own reconnect scheduling was causing overlapping connection attempts
|
||||||
const MAX_RECONNECT_TIME_MS = 5 * 60 * 1000;
|
// and intermittent RN Android `client is null` (send called on already-closed native socket).
|
||||||
let firstFailureAt = null;
|
const MAX_RECONNECT_ATTEMPTS = Infinity;
|
||||||
|
|
||||||
let reconnectAttempts = 0;
|
|
||||||
function getReconnectDelay() {
|
|
||||||
// Exponential backoff with max delay
|
|
||||||
const delay = Math.min(
|
|
||||||
1000 * Math.pow(2, reconnectAttempts),
|
|
||||||
MAX_RECONNECT_DELAY,
|
|
||||||
);
|
|
||||||
return delay * (0.5 + Math.random()); // Add jitter
|
|
||||||
}
|
|
||||||
|
|
||||||
let reconnectTimeout;
|
|
||||||
function scheduleReconnect() {
|
|
||||||
// Clear any existing reconnect attempts
|
|
||||||
clearTimeout(reconnectTimeout);
|
|
||||||
|
|
||||||
// Schedule a single reconnect attempt with exponential backoff
|
|
||||||
reconnectTimeout = setTimeout(() => {
|
|
||||||
try {
|
|
||||||
wsLogger.debug("Attempting scheduled reconnect", {
|
|
||||||
attempt: reconnectAttempts + 1,
|
|
||||||
delay: getReconnectDelay(),
|
|
||||||
});
|
|
||||||
wsLink.client.restart();
|
|
||||||
} catch (error) {
|
|
||||||
wsLogger.error("Failed to reconnect", { error });
|
|
||||||
}
|
|
||||||
reconnectTimeout = null;
|
|
||||||
}, getReconnectDelay());
|
|
||||||
}
|
|
||||||
|
|
||||||
function cancelReconnect() {
|
|
||||||
if (reconnectTimeout) {
|
|
||||||
wsLogger.debug("Canceling scheduled reconnect");
|
|
||||||
clearTimeout(reconnectTimeout);
|
|
||||||
reconnectTimeout = null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const wsLink = new WebSocketLink({
|
const wsLink = new WebSocketLink({
|
||||||
url: GRAPHQL_WS_URL,
|
url: GRAPHQL_WS_URL,
|
||||||
connectionParams: () => {
|
connectionParams: () => {
|
||||||
const { userToken } = getAuthState();
|
const { userToken } = getAuthState();
|
||||||
const headers = {
|
const headers = {};
|
||||||
"Sec-WebSocket-Protocol": "graphql-transport-ws",
|
|
||||||
};
|
// Important: only attach Authorization when we have a real token.
|
||||||
|
// Sending `Authorization: Bearer undefined` breaks WS auth on some backends.
|
||||||
|
if (userToken) {
|
||||||
setBearerHeader(headers, userToken);
|
setBearerHeader(headers, userToken);
|
||||||
|
} else {
|
||||||
|
wsLogger.warn("WS connectionParams without userToken", {
|
||||||
|
url: GRAPHQL_WS_URL,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// Note: Sec-WebSocket-Protocol is negotiated at the handshake level.
|
||||||
|
// Putting it in `connection_init.payload.headers` is ineffective and can
|
||||||
|
// confuse server-side auth header parsing.
|
||||||
return { headers };
|
return { headers };
|
||||||
},
|
},
|
||||||
|
// Do not use lazy sockets: some RN Android builds intermittently hit
|
||||||
|
// WebSocketModule send() with null client when the socket is created/
|
||||||
|
// torn down rapidly around app-state transitions.
|
||||||
|
lazy: false,
|
||||||
keepAlive: PING_INTERVAL,
|
keepAlive: PING_INTERVAL,
|
||||||
retryAttempts: MAX_RECONNECT_ATTEMPTS,
|
retryAttempts: MAX_RECONNECT_ATTEMPTS,
|
||||||
retryWait: async () => {
|
retryWait: async () => {
|
||||||
const delay = getReconnectDelay();
|
// `graphql-ws` passes the retry count to `retryWait(retries)`.
|
||||||
|
// Use a jittered exponential backoff, capped.
|
||||||
|
const retries = arguments[0] ?? 0;
|
||||||
|
const base = Math.min(1000 * Math.pow(2, retries), 30_000);
|
||||||
|
const delay = base * (0.5 + Math.random());
|
||||||
await new Promise((resolve) => setTimeout(resolve, delay));
|
await new Promise((resolve) => setTimeout(resolve, delay));
|
||||||
},
|
},
|
||||||
shouldRetry: () => true,
|
shouldRetry: () => true,
|
||||||
lazy: true,
|
|
||||||
on: {
|
on: {
|
||||||
|
opened: () => {
|
||||||
|
wsLogger.info("WebSocket opened", {
|
||||||
|
url: GRAPHQL_WS_URL,
|
||||||
|
});
|
||||||
|
},
|
||||||
connected: (socket) => {
|
connected: (socket) => {
|
||||||
wsLogger.info("WebSocket connected");
|
wsLogger.info("WebSocket connected");
|
||||||
activeSocket = socket;
|
activeSocket = socket;
|
||||||
reconnectAttempts = 0; // Reset attempts on successful connection
|
|
||||||
firstFailureAt = null;
|
|
||||||
networkActions.WSConnected();
|
networkActions.WSConnected();
|
||||||
networkActions.WSTouch();
|
networkActions.WSTouch();
|
||||||
cancelReconnect(); // Cancel any pending reconnects
|
|
||||||
|
|
||||||
// Clear any lingering ping timeouts
|
// Clear any lingering ping timeouts
|
||||||
if (pingTimeout) {
|
if (pingTimeout) {
|
||||||
|
|
@ -104,38 +83,12 @@ export default function createWsLink({ store, GRAPHQL_WS_URL }) {
|
||||||
});
|
});
|
||||||
networkActions.WSClosed();
|
networkActions.WSClosed();
|
||||||
|
|
||||||
if (!firstFailureAt) {
|
|
||||||
firstFailureAt = Date.now();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Clear socket and timeouts
|
// Clear socket and timeouts
|
||||||
activeSocket = undefined;
|
activeSocket = undefined;
|
||||||
if (pingTimeout) {
|
if (pingTimeout) {
|
||||||
clearTimeout(pingTimeout);
|
clearTimeout(pingTimeout);
|
||||||
pingTimeout = null;
|
pingTimeout = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Schedule reconnect unless explicitly closed (1000) or going away (1001)
|
|
||||||
if (event.code !== 1000 && event.code !== 1001) {
|
|
||||||
const reconnectAge = Date.now() - firstFailureAt;
|
|
||||||
if (reconnectAge >= MAX_RECONNECT_TIME_MS) {
|
|
||||||
wsLogger.warn(
|
|
||||||
"WebSocket reconnecting too long, triggering app reload",
|
|
||||||
{
|
|
||||||
reconnectAgeMs: reconnectAge,
|
|
||||||
reconnectAttempts,
|
|
||||||
lastCloseCode: event.code,
|
|
||||||
},
|
|
||||||
);
|
|
||||||
networkActions.triggerReload();
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
reconnectAttempts++;
|
|
||||||
scheduleReconnect();
|
|
||||||
} else {
|
|
||||||
wsLogger.debug("Clean WebSocket closure - not reconnecting");
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
ping: (received) => {
|
ping: (received) => {
|
||||||
// wsLogger.debug("WebSocket ping", { received });
|
// wsLogger.debug("WebSocket ping", { received });
|
||||||
|
|
@ -169,6 +122,12 @@ export default function createWsLink({ store, GRAPHQL_WS_URL }) {
|
||||||
clearTimeout(pingTimeout); // pong is received, clear connection close timeout
|
clearTimeout(pingTimeout); // pong is received, clear connection close timeout
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
error: (error) => {
|
||||||
|
wsLogger.error("WebSocket error", {
|
||||||
|
message: error?.message,
|
||||||
|
url: GRAPHQL_WS_URL,
|
||||||
|
});
|
||||||
|
},
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -16,7 +16,13 @@ export default withConnectivity(function Params() {
|
||||||
variables: {
|
variables: {
|
||||||
deviceId,
|
deviceId,
|
||||||
},
|
},
|
||||||
|
skip: !deviceId,
|
||||||
});
|
});
|
||||||
|
|
||||||
|
if (!deviceId) {
|
||||||
|
return <Loader />;
|
||||||
|
}
|
||||||
|
|
||||||
if (loading) {
|
if (loading) {
|
||||||
return <Loader />;
|
return <Loader />;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -41,6 +41,7 @@ export default withConnectivity(function Profile({ navigation, route }) {
|
||||||
variables: {
|
variables: {
|
||||||
userId,
|
userId,
|
||||||
},
|
},
|
||||||
|
skip: !userId,
|
||||||
},
|
},
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|
@ -113,6 +114,10 @@ export default withConnectivity(function Profile({ navigation, route }) {
|
||||||
});
|
});
|
||||||
}, [navigation]);
|
}, [navigation]);
|
||||||
|
|
||||||
|
if (!userId) {
|
||||||
|
return <Loader />;
|
||||||
|
}
|
||||||
|
|
||||||
if (loading) {
|
if (loading) {
|
||||||
return <Loader />;
|
return <Loader />;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -31,9 +31,13 @@ export default withConnectivity(function Relatives({ navigation, route }) {
|
||||||
variables: {
|
variables: {
|
||||||
userId,
|
userId,
|
||||||
},
|
},
|
||||||
|
skip: !userId,
|
||||||
});
|
});
|
||||||
|
|
||||||
const { loading, error, data } = relativesSubscription;
|
const { loading, error, data } = relativesSubscription;
|
||||||
|
if (!userId) {
|
||||||
|
return <Loader />;
|
||||||
|
}
|
||||||
if (loading) {
|
if (loading) {
|
||||||
return <Loader />;
|
return <Loader />;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue