feat: add LLM retry and fallback
Made-with: Cursor
This commit is contained in:
@@ -14,9 +14,11 @@ const envSchema = z.object({
|
|||||||
|
|
||||||
LLM_BASE_URL: z.string().url().default('http://localhost:11434/v1'),
|
LLM_BASE_URL: z.string().url().default('http://localhost:11434/v1'),
|
||||||
LLM_MODEL: z.string().default('qwen2.5:14b'),
|
LLM_MODEL: z.string().default('qwen2.5:14b'),
|
||||||
|
LLM_FALLBACK_MODEL: z.string().optional(),
|
||||||
LLM_API_KEY: z.string().optional(),
|
LLM_API_KEY: z.string().optional(),
|
||||||
LLM_TIMEOUT_MS: z.coerce.number().default(15000),
|
LLM_TIMEOUT_MS: z.coerce.number().default(15000),
|
||||||
LLM_MAX_RETRIES: z.coerce.number().min(0).default(1),
|
LLM_MAX_RETRIES: z.coerce.number().min(0).default(1),
|
||||||
|
LLM_RETRY_DELAY_MS: z.coerce.number().min(0).default(1000),
|
||||||
LLM_TEMPERATURE: z.coerce.number().min(0).max(2).default(0.7),
|
LLM_TEMPERATURE: z.coerce.number().min(0).max(2).default(0.7),
|
||||||
LLM_MAX_TOKENS: z.coerce.number().default(2048),
|
LLM_MAX_TOKENS: z.coerce.number().default(2048),
|
||||||
|
|
||||||
|
|||||||
@@ -5,10 +5,13 @@ import type { Stack, Level, QuestionType } from '../../db/schema/enums.js';
|
|||||||
export interface LlmConfig {
|
export interface LlmConfig {
|
||||||
baseUrl: string;
|
baseUrl: string;
|
||||||
model: string;
|
model: string;
|
||||||
|
fallbackModel?: string;
|
||||||
apiKey?: string;
|
apiKey?: string;
|
||||||
timeoutMs: number;
|
timeoutMs: number;
|
||||||
temperature: number;
|
temperature: number;
|
||||||
maxTokens: number;
|
maxTokens: number;
|
||||||
|
maxRetries: number;
|
||||||
|
retryDelayMs: number;
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface ChatMessage {
|
export interface ChatMessage {
|
||||||
@@ -61,14 +64,42 @@ export class LlmService {
|
|||||||
this.config = {
|
this.config = {
|
||||||
baseUrl: config?.baseUrl ?? env.LLM_BASE_URL,
|
baseUrl: config?.baseUrl ?? env.LLM_BASE_URL,
|
||||||
model: config?.model ?? env.LLM_MODEL,
|
model: config?.model ?? env.LLM_MODEL,
|
||||||
|
fallbackModel: config?.fallbackModel ?? env.LLM_FALLBACK_MODEL,
|
||||||
apiKey: config?.apiKey ?? env.LLM_API_KEY,
|
apiKey: config?.apiKey ?? env.LLM_API_KEY,
|
||||||
timeoutMs: config?.timeoutMs ?? env.LLM_TIMEOUT_MS,
|
timeoutMs: config?.timeoutMs ?? env.LLM_TIMEOUT_MS,
|
||||||
temperature: config?.temperature ?? env.LLM_TEMPERATURE,
|
temperature: config?.temperature ?? env.LLM_TEMPERATURE,
|
||||||
maxTokens: config?.maxTokens ?? env.LLM_MAX_TOKENS,
|
maxTokens: config?.maxTokens ?? env.LLM_MAX_TOKENS,
|
||||||
|
maxRetries: config?.maxRetries ?? env.LLM_MAX_RETRIES,
|
||||||
|
retryDelayMs: config?.retryDelayMs ?? env.LLM_RETRY_DELAY_MS,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
async chat(messages: ChatMessage[]): Promise<string> {
|
async chat(messages: ChatMessage[]): Promise<string> {
|
||||||
|
let lastError: Error | null = null;
|
||||||
|
|
||||||
|
const modelsToTry = [this.config.model];
|
||||||
|
if (this.config.fallbackModel) {
|
||||||
|
modelsToTry.push(this.config.fallbackModel);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const model of modelsToTry) {
|
||||||
|
for (let attempt = 0; attempt <= this.config.maxRetries; attempt++) {
|
||||||
|
try {
|
||||||
|
return await this.executeChat(messages, model);
|
||||||
|
} catch (err) {
|
||||||
|
lastError = err instanceof Error ? err : new Error('LLM request failed');
|
||||||
|
if (attempt < this.config.maxRetries) {
|
||||||
|
const delayMs = this.config.retryDelayMs * Math.pow(2, attempt);
|
||||||
|
await sleep(delayMs);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
throw lastError ?? new Error('LLM request failed');
|
||||||
|
}
|
||||||
|
|
||||||
|
private async executeChat(messages: ChatMessage[], model: string): Promise<string> {
|
||||||
const url = `${this.config.baseUrl.replace(/\/$/, '')}/chat/completions`;
|
const url = `${this.config.baseUrl.replace(/\/$/, '')}/chat/completions`;
|
||||||
|
|
||||||
const headers: Record<string, string> = {
|
const headers: Record<string, string> = {
|
||||||
@@ -79,7 +110,7 @@ export class LlmService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
const body = {
|
const body = {
|
||||||
model: this.config.model,
|
model,
|
||||||
messages: messages.map((m) => ({ role: m.role, content: m.content })),
|
messages: messages.map((m) => ({ role: m.role, content: m.content })),
|
||||||
temperature: this.config.temperature,
|
temperature: this.config.temperature,
|
||||||
max_tokens: this.config.maxTokens,
|
max_tokens: this.config.maxTokens,
|
||||||
@@ -161,6 +192,10 @@ Rules: type must be one of: ${typeList}. For single_choice/multiple_select: opti
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function sleep(ms: number): Promise<void> {
|
||||||
|
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||||
|
}
|
||||||
|
|
||||||
function extractJson(text: string): string {
|
function extractJson(text: string): string {
|
||||||
const trimmed = text.trim();
|
const trimmed = text.trim();
|
||||||
const match = trimmed.match(/\{[\s\S]*\}/);
|
const match = trimmed.match(/\{[\s\S]*\}/);
|
||||||
|
|||||||
Reference in New Issue
Block a user