fix(msteams): surface network errors blocking bot JWT validation and outbound replies (#77674) (#78081)

* fix(msteams): surface network errors blocking Teams bot JWT validation and outbound replies (#77674)

When login.botframework.com or smba.trafficmanager.net egress is blocked,
errors previously disappeared completely. JWT validator swallowed network
errors and returned false (401 looked identical to a bad credential), and
outbound send failures with transport-level codes had no hint pointing to
the Connector endpoint.

- sdk.ts: rethrow ECONNREFUSED/ENOTFOUND/EHOSTUNREACH/ETIMEDOUT/ECONNRESET
  from the JWKS key fetch so callers can distinguish firewall blocks from bad
  credentials; add isJwksNetworkError() helper
- monitor.ts: catch rethrown network errors in JWT middleware and log at
  runtime.error level with an actionable message pointing to
  login.botframework.com:443; upgrade allowlist resolution failures from
  runtime.log (optional/silent) to runtime.error
- errors.ts: add "network" kind to classifyMSTeamsSendError for transport-level
  errors (ECONNREFUSED, ENOTFOUND, etc.); add formatMSTeamsSendErrorHint for
  "network" kind pointing to smba.trafficmanager.net and egress rules
- monitor-handler.ts, message-handler.ts: remove spurious ?. from runtime.error
  calls (RuntimeEnv.error is a required non-optional field)

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* fix(msteams): surface blocked botframework egress

---------

Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
Co-authored-by: Brad Groux <3053586+BradGroux@users.noreply.github.com>
This commit is contained in:
Brandon
2026-05-06 00:11:06 -04:00
committed by GitHub
parent 5d7262c410
commit eecda912ee
8 changed files with 141 additions and 9 deletions

View File

@@ -6,6 +6,7 @@ Docs: https://docs.openclaw.ai
### Changes
- MS Teams: surface blocked Bot Framework egress by logging JWKS fetch network failures and adding a Bot Connector send hint for transport-level reply failures. Fixes #77674. (#78081) Thanks @Beandon13.
- PR triage: mark external pull requests with `proof: supplied` when Barnacle finds structured real behavior proof, keep stale negative proof labels in sync across CRLF-edited PR bodies, and let ClawSweeper own the stronger `proof: sufficient` judgement.
- Sessions CLI: show the selected agent runtime in the `openclaw sessions` table so terminal output matches the runtime visibility already present in JSON/status surfaces. Thanks @vincentkoc.
- Talk/voice: unify realtime relay, transcription relay, managed-room handoff, Voice Call, Google Meet, VoiceClaw, and native clients around a shared Talk session controller and add the Gateway-managed `talk.session.*` RPC surface.

View File

@@ -70,6 +70,36 @@ describe("msteams errors", () => {
).toContain("expired the content stream");
});
it("classifies transport-level network errors and provides smba egress hint (#77674)", () => {
const econnrefused = Object.assign(new Error("connect ECONNREFUSED"), { code: "ECONNREFUSED" });
const enotfound = Object.assign(new Error("getaddrinfo ENOTFOUND smba.trafficmanager.net"), {
code: "ENOTFOUND",
});
const etimedout = Object.assign(new Error("ETIMEDOUT"), { code: "ETIMEDOUT" });
expect(classifyMSTeamsSendError(econnrefused)).toMatchObject({
kind: "network",
errorCode: "ECONNREFUSED",
});
expect(classifyMSTeamsSendError(enotfound)).toMatchObject({
kind: "network",
errorCode: "ENOTFOUND",
});
expect(classifyMSTeamsSendError(etimedout)).toMatchObject({
kind: "network",
errorCode: "ETIMEDOUT",
});
// Hints for network errors must mention smba (Connector endpoint) and egress
expect(formatMSTeamsSendErrorHint({ kind: "network" })).toContain("smba");
expect(formatMSTeamsSendErrorHint({ kind: "network" })).toContain("egress");
});
it("still classifies HTTP errors as unknown when no status code and no network code", () => {
expect(classifyMSTeamsSendError(new Error("unexpected error")).kind).toBe("unknown");
expect(classifyMSTeamsSendError(null)).toMatchObject({ kind: "unknown" });
});
describe("isRevokedProxyError", () => {
it("returns true for revoked proxy TypeError", () => {
expect(

View File

@@ -149,7 +149,13 @@ function extractRetryAfterMs(err: unknown): number | null {
return null;
}
type MSTeamsSendErrorKind = "auth" | "throttled" | "transient" | "permanent" | "unknown";
type MSTeamsSendErrorKind =
| "auth"
| "throttled"
| "transient"
| "permanent"
| "network"
| "unknown";
type MSTeamsSendErrorClassification = {
kind: MSTeamsSendErrorKind;
@@ -204,6 +210,21 @@ export function classifyMSTeamsSendError(err: unknown): MSTeamsSendErrorClassifi
return { kind: "permanent", statusCode, errorCode };
}
// Transport-level errors (no HTTP status code) — check for well-known
// network error codes that indicate egress is blocked (#77674).
if (statusCode == null) {
const networkCode = isRecord(err) && typeof err.code === "string" ? err.code : null;
if (
networkCode === "ECONNREFUSED" ||
networkCode === "ENOTFOUND" ||
networkCode === "EHOSTUNREACH" ||
networkCode === "ETIMEDOUT" ||
networkCode === "ECONNRESET"
) {
return { kind: "network", errorCode: networkCode };
}
}
return {
kind: "unknown",
statusCode: statusCode ?? undefined,
@@ -242,5 +263,8 @@ export function formatMSTeamsSendErrorHint(
if (classification.kind === "transient") {
return "transient Teams/Bot Framework error; retry may succeed";
}
if (classification.kind === "network") {
return "transport-level failure sending reply to Teams Bot Connector (smba.trafficmanager.net) — check egress firewall rules allow outbound HTTPS to smba.trafficmanager.net";
}
return undefined;
}

View File

@@ -459,7 +459,7 @@ export function registerMSTeamsHandlers<T extends MSTeamsActivityHandler>(
try {
await handleTeamsMessage(context as MSTeamsTurnContext);
} catch (err) {
deps.runtime.error?.(`msteams handler failed: ${formatUnknownError(err)}`);
deps.runtime.error(`msteams handler failed: ${formatUnknownError(err)}`);
}
await next();
});
@@ -520,7 +520,7 @@ export function registerMSTeamsHandlers<T extends MSTeamsActivityHandler>(
try {
await handleReaction(context as MSTeamsTurnContext, "added");
} catch (err) {
deps.runtime.error?.(`msteams reaction handler failed: ${String(err)}`);
deps.runtime.error(`msteams reaction handler failed: ${String(err)}`);
}
await next();
});
@@ -529,7 +529,7 @@ export function registerMSTeamsHandlers<T extends MSTeamsActivityHandler>(
try {
await handleReaction(context as MSTeamsTurnContext, "removed");
} catch (err) {
deps.runtime.error?.(`msteams reaction handler failed: ${String(err)}`);
deps.runtime.error(`msteams reaction handler failed: ${String(err)}`);
}
await next();
});

View File

@@ -908,7 +908,7 @@ export function createMSTeamsMessageHandler(deps: MSTeamsMessageHandlerDeps) {
);
} catch (err) {
log.error("dispatch failed", { error: formatUnknownError(err) });
runtime.error?.(`msteams dispatch failed: ${formatUnknownError(err)}`);
runtime.error(`msteams dispatch failed: ${formatUnknownError(err)}`);
try {
await context.sendActivity("⚠️ Something went wrong. Please try again.");
} catch {
@@ -971,7 +971,7 @@ export function createMSTeamsMessageHandler(deps: MSTeamsMessageHandlerDeps) {
});
},
onError: (err) => {
runtime.error?.(`msteams debounce flush failed: ${formatUnknownError(err)}`);
runtime.error(`msteams debounce flush failed: ${formatUnknownError(err)}`);
},
});

View File

@@ -194,7 +194,11 @@ export async function monitorMSTeamsProvider(
}
}
} catch (err) {
runtime.log?.(`msteams resolve failed; using config entries. ${formatUnknownError(err)}`);
// Log at error (not log) — allowlist resolution failures leave the bot in a
// degraded state where Graph-resolved IDs are missing (#77674).
runtime?.error(
`msteams resolve failed; falling back to raw config entries — allowlist members resolved via Graph may be missing. ${formatUnknownError(err)}`,
);
}
msteamsCfg = {
@@ -300,7 +304,23 @@ export async function monitorMSTeamsProvider(
next();
})
.catch((err) => {
log.debug?.(`JWT validation error: ${formatUnknownError(err)}`);
// Network-level failures (DNS, firewall, TLS toward login.botframework.com)
// are rethrown by the validator so we can log them visibly. Without this,
// they look identical to a bad credential at default log levels (#77674).
const isNetworkFailure =
err instanceof Error &&
/ECONNREFUSED|ENOTFOUND|EHOSTUNREACH|ETIMEDOUT|ECONNRESET/i.test(
(err as NodeJS.ErrnoException).code ?? err.message,
);
if (isNetworkFailure) {
// Network failure fetching JWKS keys — log visibly so operators can
// identify egress blocks to login.botframework.com (#77674).
runtime?.error(
`msteams: JWKS key fetch failed — check egress to login.botframework.com:443 (firewall or DNS may be blocking it). Bot will 401 all inbound requests until this is resolved. Error: ${formatUnknownError(err)}`,
);
} else {
log.debug?.(`JWT validation error: ${formatUnknownError(err)}`);
}
res.status(401).json({ error: "Unauthorized" });
});
});

View File

@@ -392,6 +392,31 @@ describe("createBotFrameworkJwtValidator", () => {
await expect(validator.validate("Bearer no-iss")).resolves.toBe(false);
expect(jwtState.verifyCalls).toHaveLength(0);
});
it("rethrows JWKS network errors (ECONNREFUSED) instead of silently returning false (#77674)", async () => {
// Simulate a firewall blocking egress to login.botframework.com.
// The top-level vi.mock("jwks-rsa") sets up a class-level mock, so we spy
// on the prototype to override getSigningKey for this test only.
const networkErr = Object.assign(new Error("connect ECONNREFUSED 40.126.25.32:443"), {
code: "ECONNREFUSED",
});
const { JwksClient } = await import("jwks-rsa");
vi.spyOn(JwksClient.prototype, "getSigningKey").mockRejectedValueOnce(networkErr);
jwtState.decodedPayload = { iss: "https://api.botframework.com" };
const validator = await createBotFrameworkJwtValidator(creds);
// Network errors must bubble out — callers can then log them at warn/error
// level rather than silently returning 401 that looks like a bad credential.
await expect(validator.validate("Bearer token-firewall")).rejects.toThrow("ECONNREFUSED");
});
it("returns false (not throws) for non-network JWKS errors like bad signature (#77674)", async () => {
// Auth errors (bad signature, expired token) should still return false.
jwtState.decodedPayload = { iss: "https://api.botframework.com" };
jwtState.verifyBehavior = "throw";
const validator = await createBotFrameworkJwtValidator(creds);
await expect(validator.validate("Bearer token-bad-sig")).resolves.toBe(false);
});
});
function makeFakeSdk() {

View File

@@ -876,9 +876,41 @@ export async function createBotFrameworkJwtValidator(creds: MSTeamsCredentials):
return false;
}
return true;
} catch {
} catch (err) {
// Network-level failures (DNS, firewall, TLS) must be distinguished from
// invalid tokens so callers can log them at an appropriate severity.
// Rethrow so the JWT middleware can emit an actionable warning instead of
// silently returning 401 (which looks identical to a bad credential).
if (isJwksNetworkError(err)) {
throw err;
}
return false;
}
},
};
}
/**
* Return true when the error originated from a network-level failure fetching
* the JWKS endpoint (DNS resolution, connection refused, TLS handshake, etc.)
* rather than from token verification logic.
*/
function isJwksNetworkError(err: unknown): boolean {
if (!(err instanceof Error)) {
return false;
}
const code = (err as NodeJS.ErrnoException).code;
if (
code === "ECONNREFUSED" ||
code === "ENOTFOUND" ||
code === "EHOSTUNREACH" ||
code === "ETIMEDOUT" ||
code === "ECONNRESET"
) {
return true;
}
// jwks-rsa wraps fetch failures with a message containing the URL or "key fetching"
return (
/jwks|key fetch|getSigningKey/i.test(err.message) && /network|fetch|connect/i.test(err.message)
);
}