mirror of
https://github.com/openclaw/openclaw.git
synced 2026-04-27 09:02:15 +00:00
feat: implement rate limiting and error handling in chat API
- Added rate limiting functionality to the chat API, allowing a maximum number of requests per IP within a specified time window. - Implemented error handling for rate limit exceeded responses, including appropriate headers and retry instructions. - Enhanced error handling for other API errors, providing user-friendly messages for various failure scenarios. - Updated README to include new environment variables for rate limiting configuration.
This commit is contained in:
@@ -23,7 +23,15 @@ This generates embeddings for all doc chunks and stores them in
|
||||
OPENAI_API_KEY=sk-... pnpm docs:chat:serve:vector
|
||||
```
|
||||
|
||||
Defaults to `http://localhost:3001`. Health check:
|
||||
Defaults to `http://localhost:3001`. Optional environment variables:
|
||||
|
||||
| Variable | Default | Description |
|
||||
|----------|---------|-------------|
|
||||
| `PORT` | `3001` | Server port |
|
||||
| `RATE_LIMIT` | `20` | Max requests per window per IP |
|
||||
| `RATE_WINDOW_MS` | `60000` | Rate limit window in milliseconds |
|
||||
|
||||
Health check:
|
||||
|
||||
```bash
|
||||
curl http://localhost:3001/health
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#!/usr/bin/env bun
|
||||
/**
|
||||
* Docs-chat API with RAG (vector search).
|
||||
* Env: OPENAI_API_KEY, DOCS_CHAT_DB, PORT
|
||||
* Env: OPENAI_API_KEY, DOCS_CHAT_DB, PORT, RATE_LIMIT, RATE_WINDOW_MS
|
||||
*/
|
||||
import path from "node:path";
|
||||
import { fileURLToPath } from "node:url";
|
||||
@@ -15,6 +15,62 @@ const defaultDbPath = path.join(__dirname, ".lance-db");
|
||||
const dbPath = process.env.DOCS_CHAT_DB || defaultDbPath;
|
||||
const port = Number(process.env.PORT || 3001);
|
||||
|
||||
// Rate limiting configuration
|
||||
const RATE_LIMIT = Number(process.env.RATE_LIMIT || 20); // requests per window
|
||||
const RATE_WINDOW_MS = Number(process.env.RATE_WINDOW_MS || 60_000); // 1 minute
|
||||
const MAX_MESSAGE_LENGTH = 2000; // characters
|
||||
const MAX_BODY_SIZE = 8192; // bytes
|
||||
|
||||
// In-memory rate limit store (IP -> { count, resetAt })
|
||||
const rateLimitStore = new Map<string, { count: number; resetAt: number }>();
|
||||
|
||||
// Periodically clean up expired entries to prevent memory leaks
|
||||
setInterval(() => {
|
||||
const now = Date.now();
|
||||
for (const [ip, record] of rateLimitStore) {
|
||||
if (now > record.resetAt) {
|
||||
rateLimitStore.delete(ip);
|
||||
}
|
||||
}
|
||||
}, RATE_WINDOW_MS);
|
||||
|
||||
/**
|
||||
* Check if an IP is rate limited. Returns remaining requests or -1 if blocked.
|
||||
*/
|
||||
function checkRateLimit(ip: string): { allowed: boolean; remaining: number; resetAt: number } {
|
||||
const now = Date.now();
|
||||
const record = rateLimitStore.get(ip);
|
||||
|
||||
if (!record || now > record.resetAt) {
|
||||
// New window
|
||||
rateLimitStore.set(ip, { count: 1, resetAt: now + RATE_WINDOW_MS });
|
||||
return { allowed: true, remaining: RATE_LIMIT - 1, resetAt: now + RATE_WINDOW_MS };
|
||||
}
|
||||
|
||||
if (record.count >= RATE_LIMIT) {
|
||||
return { allowed: false, remaining: 0, resetAt: record.resetAt };
|
||||
}
|
||||
|
||||
record.count++;
|
||||
return { allowed: true, remaining: RATE_LIMIT - record.count, resetAt: record.resetAt };
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract client IP from request, handling proxies.
|
||||
*/
|
||||
function getClientIP(req: http.IncomingMessage): string {
|
||||
// Check common proxy headers (trust these only if behind a known proxy)
|
||||
const forwarded = req.headers["x-forwarded-for"];
|
||||
if (typeof forwarded === "string") {
|
||||
return forwarded.split(",")[0].trim();
|
||||
}
|
||||
const realIp = req.headers["x-real-ip"];
|
||||
if (typeof realIp === "string") {
|
||||
return realIp.trim();
|
||||
}
|
||||
return req.socket.remoteAddress || "unknown";
|
||||
}
|
||||
|
||||
// Validate API key
|
||||
const apiKey = process.env.OPENAI_API_KEY;
|
||||
if (!apiKey) {
|
||||
@@ -91,8 +147,17 @@ async function streamOpenAI(
|
||||
}
|
||||
|
||||
async function handleChat(req: http.IncomingMessage, res: http.ServerResponse) {
|
||||
// Read body with size limit to prevent memory exhaustion
|
||||
let body = "";
|
||||
for await (const chunk of req) body += chunk;
|
||||
let bodySize = 0;
|
||||
for await (const chunk of req) {
|
||||
bodySize += chunk.length;
|
||||
if (bodySize > MAX_BODY_SIZE) {
|
||||
sendJson(res, 413, { error: "Request too large" });
|
||||
return;
|
||||
}
|
||||
body += chunk;
|
||||
}
|
||||
|
||||
let message = "";
|
||||
try {
|
||||
@@ -107,6 +172,20 @@ async function handleChat(req: http.IncomingMessage, res: http.ServerResponse) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Validate message length to prevent token stuffing
|
||||
const trimmedMessage = message.trim();
|
||||
if (!trimmedMessage) {
|
||||
sendJson(res, 400, { error: "message required" });
|
||||
return;
|
||||
}
|
||||
if (trimmedMessage.length > MAX_MESSAGE_LENGTH) {
|
||||
sendJson(res, 400, {
|
||||
error: `Message too long (max ${MAX_MESSAGE_LENGTH} characters)`,
|
||||
});
|
||||
return;
|
||||
}
|
||||
message = trimmedMessage;
|
||||
|
||||
// Use RAG retriever instead of keyword matching
|
||||
const results = await retriever.retrieve(message, 8);
|
||||
|
||||
@@ -167,6 +246,25 @@ const server = http.createServer(async (req, res) => {
|
||||
}
|
||||
|
||||
if (req.method === "POST" && req.url === "/chat") {
|
||||
// Apply rate limiting
|
||||
const clientIP = getClientIP(req);
|
||||
const rateCheck = checkRateLimit(clientIP);
|
||||
|
||||
// Add rate limit headers
|
||||
res.setHeader("X-RateLimit-Limit", RATE_LIMIT);
|
||||
res.setHeader("X-RateLimit-Remaining", Math.max(0, rateCheck.remaining));
|
||||
res.setHeader("X-RateLimit-Reset", Math.ceil(rateCheck.resetAt / 1000));
|
||||
|
||||
if (!rateCheck.allowed) {
|
||||
const retryAfter = Math.ceil((rateCheck.resetAt - Date.now()) / 1000);
|
||||
res.setHeader("Retry-After", retryAfter);
|
||||
sendJson(res, 429, {
|
||||
error: "Too many requests. Please wait before trying again.",
|
||||
retryAfter,
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
await handleChat(req, res);
|
||||
return;
|
||||
}
|
||||
@@ -179,4 +277,7 @@ server.listen(port, async () => {
|
||||
console.error(
|
||||
`docs-chat API (RAG) running at http://localhost:${port} (chunks: ${count})`,
|
||||
);
|
||||
console.error(
|
||||
`Rate limit: ${RATE_LIMIT} requests per ${RATE_WINDOW_MS / 1000}s window`,
|
||||
);
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user