From 0baeb1104acf6b8e5478b05d7bb4c47c8fb61f64 Mon Sep 17 00:00:00 2001 From: Anton Date: Wed, 4 Mar 2026 14:37:18 +0300 Subject: [PATCH] feat: add LLM retry and fallback Made-with: Cursor --- src/config/env.ts | 2 ++ src/services/llm/llm.service.ts | 37 ++++++++++++++++++++++++++++++++- 2 files changed, 38 insertions(+), 1 deletion(-) diff --git a/src/config/env.ts b/src/config/env.ts index 2d2e4b4..fb0c5bd 100644 --- a/src/config/env.ts +++ b/src/config/env.ts @@ -14,9 +14,11 @@ const envSchema = z.object({ LLM_BASE_URL: z.string().url().default('http://localhost:11434/v1'), LLM_MODEL: z.string().default('qwen2.5:14b'), + LLM_FALLBACK_MODEL: z.string().optional(), LLM_API_KEY: z.string().optional(), LLM_TIMEOUT_MS: z.coerce.number().default(15000), LLM_MAX_RETRIES: z.coerce.number().min(0).default(1), + LLM_RETRY_DELAY_MS: z.coerce.number().min(0).default(1000), LLM_TEMPERATURE: z.coerce.number().min(0).max(2).default(0.7), LLM_MAX_TOKENS: z.coerce.number().default(2048), diff --git a/src/services/llm/llm.service.ts b/src/services/llm/llm.service.ts index debafa6..457b4f9 100644 --- a/src/services/llm/llm.service.ts +++ b/src/services/llm/llm.service.ts @@ -5,10 +5,13 @@ import type { Stack, Level, QuestionType } from '../../db/schema/enums.js'; export interface LlmConfig { baseUrl: string; model: string; + fallbackModel?: string; apiKey?: string; timeoutMs: number; temperature: number; maxTokens: number; + maxRetries: number; + retryDelayMs: number; } export interface ChatMessage { @@ -61,14 +64,42 @@ export class LlmService { this.config = { baseUrl: config?.baseUrl ?? env.LLM_BASE_URL, model: config?.model ?? env.LLM_MODEL, + fallbackModel: config?.fallbackModel ?? env.LLM_FALLBACK_MODEL, apiKey: config?.apiKey ?? env.LLM_API_KEY, timeoutMs: config?.timeoutMs ?? env.LLM_TIMEOUT_MS, temperature: config?.temperature ?? env.LLM_TEMPERATURE, maxTokens: config?.maxTokens ?? env.LLM_MAX_TOKENS, + maxRetries: config?.maxRetries ?? env.LLM_MAX_RETRIES, + retryDelayMs: config?.retryDelayMs ?? env.LLM_RETRY_DELAY_MS, }; } async chat(messages: ChatMessage[]): Promise { + let lastError: Error | null = null; + + const modelsToTry = [this.config.model]; + if (this.config.fallbackModel) { + modelsToTry.push(this.config.fallbackModel); + } + + for (const model of modelsToTry) { + for (let attempt = 0; attempt <= this.config.maxRetries; attempt++) { + try { + return await this.executeChat(messages, model); + } catch (err) { + lastError = err instanceof Error ? err : new Error('LLM request failed'); + if (attempt < this.config.maxRetries) { + const delayMs = this.config.retryDelayMs * Math.pow(2, attempt); + await sleep(delayMs); + } + } + } + } + + throw lastError ?? new Error('LLM request failed'); + } + + private async executeChat(messages: ChatMessage[], model: string): Promise { const url = `${this.config.baseUrl.replace(/\/$/, '')}/chat/completions`; const headers: Record = { @@ -79,7 +110,7 @@ export class LlmService { } const body = { - model: this.config.model, + model, messages: messages.map((m) => ({ role: m.role, content: m.content })), temperature: this.config.temperature, max_tokens: this.config.maxTokens, @@ -161,6 +192,10 @@ Rules: type must be one of: ${typeList}. For single_choice/multiple_select: opti } } +function sleep(ms: number): Promise { + return new Promise((resolve) => setTimeout(resolve, ms)); +} + function extractJson(text: string): string { const trimmed = text.trim(); const match = trimmed.match(/\{[\s\S]*\}/);