From 8b57dd987e13cfb3cda9744e9a7e147974be1b35 Mon Sep 17 00:00:00 2001 From: vakabunga Date: Sat, 14 Mar 2026 20:12:27 +0300 Subject: [PATCH] Revert SSE streaming for PDF import, use synchronous flow SSE streaming added unnecessary complexity and latency due to buffering issues across Node.js event loop, Nginx proxy, and Docker layers. Reverted to a simple synchronous request/response for PDF conversion. Kept extractLlmErrorMessage for user-friendly LLM errors, lazy-loaded pdf-parse, and extended Nginx timeout. --- backend/src/routes/import.ts | 88 ++++--------- backend/src/services/pdfToStatement.ts | 159 +++--------------------- frontend/nginx.conf | 4 +- frontend/src/App.tsx | 21 ++-- frontend/src/api/import.ts | 71 ----------- frontend/src/components/ImportModal.tsx | 87 +++---------- frontend/src/components/Layout.tsx | 68 +--------- frontend/src/context/ImportContext.tsx | 114 ----------------- frontend/src/pages/HistoryPage.tsx | 10 +- frontend/src/styles/index.css | 146 ---------------------- 10 files changed, 73 insertions(+), 695 deletions(-) delete mode 100644 frontend/src/context/ImportContext.tsx diff --git a/backend/src/routes/import.ts b/backend/src/routes/import.ts index cc38c89..d1188b1 100644 --- a/backend/src/routes/import.ts +++ b/backend/src/routes/import.ts @@ -3,7 +3,7 @@ import multer from 'multer'; import { asyncHandler } from '../utils'; import { importStatement, isValidationError } from '../services/import'; import { - convertPdfToStatementStreaming, + convertPdfToStatement, isPdfConversionError, } from '../services/pdfToStatement'; @@ -28,10 +28,6 @@ function isJsonFile(file: { mimetype: string; originalname: string }): boolean { ); } -function sseWrite(res: import('express').Response, data: Record) { - res.write(`data: ${JSON.stringify(data)}\n\n`); -} - const router = Router(); router.post( @@ -55,68 +51,28 @@ router.post( return; } - if (isPdfFile(file)) { - res.setHeader('Content-Type', 'text/event-stream'); - res.setHeader('Cache-Control', 'no-cache'); - res.setHeader('Connection', 'keep-alive'); - res.setHeader('X-Accel-Buffering', 'no'); - res.socket?.setNoDelay(true); - res.flushHeaders(); - - try { - const converted = await convertPdfToStatementStreaming( - file.buffer, - (stage, progress, message) => { - sseWrite(res, { stage, progress, message }); - }, - ); - - if (isPdfConversionError(converted)) { - sseWrite(res, { - stage: 'error', - message: converted.message, - }); - res.end(); - return; - } - - const result = await importStatement(converted); - if (isValidationError(result)) { - sseWrite(res, { - stage: 'error', - message: (result as { message: string }).message, - }); - res.end(); - return; - } - - sseWrite(res, { - stage: 'done', - progress: 100, - result, - }); - } catch (err) { - console.error('SSE import error:', err); - sseWrite(res, { - stage: 'error', - message: 'Внутренняя ошибка сервера', - }); - } - - res.end(); - return; - } - - // JSON files — synchronous response as before let body: unknown; - try { - body = JSON.parse(file.buffer.toString('utf-8')); - } catch { - res.status(400).json({ - error: 'BAD_REQUEST', - message: 'Некорректный JSON-файл', - }); - return; + + if (isPdfFile(file)) { + const converted = await convertPdfToStatement(file.buffer); + if (isPdfConversionError(converted)) { + res.status(converted.status).json({ + error: converted.error, + message: converted.message, + }); + return; + } + body = converted; + } else { + try { + body = JSON.parse(file.buffer.toString('utf-8')); + } catch { + res.status(400).json({ + error: 'BAD_REQUEST', + message: 'Некорректный JSON-файл', + }); + return; + } } const result = await importStatement(body); diff --git a/backend/src/services/pdfToStatement.ts b/backend/src/services/pdfToStatement.ts index 2e66b32..1205061 100644 --- a/backend/src/services/pdfToStatement.ts +++ b/backend/src/services/pdfToStatement.ts @@ -131,128 +131,31 @@ export async function convertPdfToStatement( }; } - return parseConversionResult(content); - } catch (err) { - console.error('LLM conversion error:', err); - return { - status: 502, - error: 'BAD_GATEWAY', - message: extractLlmErrorMessage(err), - }; - } -} - -export type ProgressStage = 'pdf' | 'llm' | 'import'; -export type OnProgress = (stage: ProgressStage, progress: number, message: string) => void; - -const LLM_PROGRESS_MIN = 10; -const LLM_PROGRESS_MAX = 98; -const LLM_PROGRESS_RANGE = LLM_PROGRESS_MAX - LLM_PROGRESS_MIN; -const THROTTLE_MS = 300; - -function yieldToEventLoop(): Promise { - return new Promise(resolve => setImmediate(resolve)); -} - -export async function convertPdfToStatementStreaming( - buffer: Buffer, - onProgress: OnProgress, -): Promise { - if (!config.llmApiKey || config.llmApiKey.trim() === '') { - return { - status: 503, - error: 'SERVICE_UNAVAILABLE', - message: 'Конвертация PDF недоступна: не задан LLM_API_KEY', - }; - } - - onProgress('pdf', 2, 'Извлечение текста из PDF...'); - await yieldToEventLoop(); - - let text: string; - try { - const result = await getPdfParse()(buffer); - text = result.text || ''; - } catch (err) { - console.error('PDF extraction error:', err); - return { - status: 400, - error: 'BAD_REQUEST', - message: 'Не удалось обработать PDF-файл', - }; - } - - if (!text || text.trim().length === 0) { - return { - status: 400, - error: 'BAD_REQUEST', - message: 'Не удалось извлечь текст из PDF', - }; - } - - onProgress('pdf', 8, 'Текст извлечён, отправка в LLM...'); - await yieldToEventLoop(); - - const openai = new OpenAI({ - apiKey: config.llmApiKey, - ...(config.llmApiBaseUrl && { baseURL: config.llmApiBaseUrl }), - timeout: 5 * 60 * 1000, - }); - - try { - const stream = await openai.chat.completions.create({ - model: config.llmModel, - messages: [ - { role: 'system', content: PDF2JSON_PROMPT }, - { role: 'user', content: `Текст выписки:\n\n${text}` }, - ], - temperature: 0, - max_tokens: 32768, - stream: true, - }); - - const expectedChars = Math.max(2_000, Math.min(text.length * 2, 30_000)); - - let accumulated = ''; - let charsReceived = 0; - let lastEmitTime = 0; - - for await (const chunk of stream) { - const delta = chunk.choices[0]?.delta?.content; - if (delta) { - accumulated += delta; - charsReceived += delta.length; - - const now = Date.now(); - if (now - lastEmitTime >= THROTTLE_MS) { - const ratio = Math.min(1, charsReceived / expectedChars); - const llmProgress = Math.min( - LLM_PROGRESS_MAX, - Math.round(ratio * LLM_PROGRESS_RANGE + LLM_PROGRESS_MIN), - ); - onProgress('llm', llmProgress, 'Конвертация через LLM...'); - lastEmitTime = now; - // Let the event loop flush socket writes to the network - await yieldToEventLoop(); - } - } - } - - onProgress('llm', LLM_PROGRESS_MAX, 'LLM завершил, обработка результата...'); - await yieldToEventLoop(); - - const content = accumulated.trim(); - if (!content) { + const jsonMatch = content.match(/\{[\s\S]*\}/); + const jsonStr = jsonMatch ? jsonMatch[0] : content; + let parsed: unknown; + try { + parsed = JSON.parse(jsonStr); + } catch { return { status: 422, error: 'VALIDATION_ERROR', - message: 'Результат конвертации пуст', + message: 'Результат конвертации не является валидным JSON', }; } - return parseConversionResult(content); + const data = parsed as Record; + if (data.schemaVersion !== '1.0') { + return { + status: 422, + error: 'VALIDATION_ERROR', + message: 'Результат конвертации не соответствует схеме 1.0', + }; + } + + return parsed as StatementFile; } catch (err) { - console.error('LLM streaming error:', err); + console.error('LLM conversion error:', err); return { status: 502, error: 'BAD_GATEWAY', @@ -274,29 +177,3 @@ function extractLlmErrorMessage(err: unknown): string { } return 'Временная ошибка конвертации'; } - -function parseConversionResult(content: string): StatementFile | PdfConversionError { - const jsonMatch = content.match(/\{[\s\S]*\}/); - const jsonStr = jsonMatch ? jsonMatch[0] : content; - let parsed: unknown; - try { - parsed = JSON.parse(jsonStr); - } catch { - return { - status: 422, - error: 'VALIDATION_ERROR', - message: 'Результат конвертации не является валидным JSON', - }; - } - - const data = parsed as Record; - if (data.schemaVersion !== '1.0') { - return { - status: 422, - error: 'VALIDATION_ERROR', - message: 'Результат конвертации не соответствует схеме 1.0', - }; - } - - return parsed as StatementFile; -} diff --git a/frontend/nginx.conf b/frontend/nginx.conf index 9d758cd..841229d 100644 --- a/frontend/nginx.conf +++ b/frontend/nginx.conf @@ -3,7 +3,7 @@ server { root /usr/share/nginx/html; index index.html; - # Import endpoint — SSE streaming, long timeout, no buffering + # Import endpoint — long timeout for LLM processing location /api/import { proxy_pass http://family-budget-backend:3000; proxy_http_version 1.1; @@ -14,8 +14,6 @@ server { proxy_cookie_path / /; proxy_connect_timeout 5s; proxy_read_timeout 600s; - proxy_buffering off; - gzip off; client_max_body_size 15m; } diff --git a/frontend/src/App.tsx b/frontend/src/App.tsx index e37f15b..135747f 100644 --- a/frontend/src/App.tsx +++ b/frontend/src/App.tsx @@ -1,6 +1,5 @@ import { Routes, Route, Navigate } from 'react-router-dom'; import { useAuth } from './context/AuthContext'; -import { ImportProvider } from './context/ImportContext'; import { Layout } from './components/Layout'; import { LoginPage } from './pages/LoginPage'; import { HistoryPage } from './pages/HistoryPage'; @@ -19,16 +18,14 @@ export function App() { } return ( - - - - } /> - } /> - } /> - } /> - } /> - - - + + + } /> + } /> + } /> + } /> + } /> + + ); } diff --git a/frontend/src/api/import.ts b/frontend/src/api/import.ts index 293eb34..af936b5 100644 --- a/frontend/src/api/import.ts +++ b/frontend/src/api/import.ts @@ -11,74 +11,3 @@ export async function importStatement( formData, ); } - -export interface SseProgressEvent { - stage: 'pdf' | 'llm' | 'import'; - progress: number; - message: string; -} - -export interface SseDoneEvent { - stage: 'done'; - progress: 100; - result: ImportStatementResponse; -} - -export interface SseErrorEvent { - stage: 'error'; - message: string; -} - -export type SseEvent = SseProgressEvent | SseDoneEvent | SseErrorEvent; - -export async function importStatementStream( - file: File, - onEvent: (event: SseEvent) => void, -): Promise { - const formData = new FormData(); - formData.append('file', file); - - const res = await fetch('/api/import/statement', { - method: 'POST', - body: formData, - credentials: 'include', - }); - - if (!res.ok) { - let msg = 'Ошибка импорта'; - try { - const body = await res.json(); - if (body.message) msg = body.message; - } catch { /* use default */ } - onEvent({ stage: 'error', message: msg }); - return; - } - - const reader = res.body?.getReader(); - if (!reader) { - onEvent({ stage: 'error', message: 'Streaming не поддерживается' }); - return; - } - - const decoder = new TextDecoder(); - let buffer = ''; - - while (true) { - const { done, value } = await reader.read(); - if (done) break; - - buffer += decoder.decode(value, { stream: true }); - - const lines = buffer.split('\n'); - buffer = lines.pop() ?? ''; - - for (const line of lines) { - const trimmed = line.trim(); - if (!trimmed.startsWith('data: ')) continue; - try { - const parsed = JSON.parse(trimmed.slice(6)) as SseEvent; - onEvent(parsed); - } catch { /* skip malformed lines */ } - } - } -} diff --git a/frontend/src/components/ImportModal.tsx b/frontend/src/components/ImportModal.tsx index fe98df6..107ffda 100644 --- a/frontend/src/components/ImportModal.tsx +++ b/frontend/src/components/ImportModal.tsx @@ -2,7 +2,6 @@ import { useState, useRef } from 'react'; import type { ImportStatementResponse } from '@family-budget/shared'; import { importStatement } from '../api/import'; import { updateAccount } from '../api/accounts'; -import { useImport } from '../context/ImportContext'; interface Props { onClose: () => void; @@ -10,19 +9,13 @@ interface Props { } export function ImportModal({ onClose, onDone }: Props) { - const { importState, startImport, clearImport } = useImport(); - - const [jsonResult, setJsonResult] = useState(null); - const [jsonError, setJsonError] = useState(''); - const [jsonLoading, setJsonLoading] = useState(false); + const [result, setResult] = useState(null); + const [error, setError] = useState(''); + const [loading, setLoading] = useState(false); const [alias, setAlias] = useState(''); const [aliasSaved, setAliasSaved] = useState(false); const fileRef = useRef(null); - const result = importState.result ?? jsonResult; - const error = importState.error || jsonError; - const loading = importState.active || jsonLoading; - const handleFileChange = async ( e: React.ChangeEvent, ) => { @@ -35,45 +28,26 @@ export function ImportModal({ onClose, onDone }: Props) { const isJson = type === 'application/json' || name.endsWith('.json'); if (!isPdf && !isJson) { - setJsonError('Допустимы только файлы PDF или JSON'); + setError('Допустимы только файлы PDF или JSON'); return; } - setJsonError(''); - setJsonResult(null); + setLoading(true); + setError(''); + setResult(null); - if (isPdf) { - startImport(file); - return; - } - - setJsonLoading(true); try { const resp = await importStatement(file); - setJsonResult(resp); + setResult(resp); } catch (err: unknown) { const msg = err instanceof Error ? err.message : 'Ошибка импорта'; - setJsonError(msg); + setError(msg); } finally { - setJsonLoading(false); + setLoading(false); } }; - const handleClose = () => { - if (importState.active) { - if (window.confirm('Импорт продолжится в фоне. Закрыть окно?')) { - onClose(); - } - } else { - onClose(); - } - }; - - const handleDone = () => { - onDone(); - }; - const handleSaveAlias = async () => { if (!result || !alias.trim()) return; try { @@ -84,21 +58,12 @@ export function ImportModal({ onClose, onDone }: Props) { } }; - const stageLabel = (stage: string) => { - switch (stage) { - case 'pdf': return 'Извлечение текста...'; - case 'llm': return 'Конвертация через LLM...'; - case 'import': return 'Сохранение в базу...'; - default: return 'Обработка...'; - } - }; - return ( -
+
e.stopPropagation()}>

Импорт выписки

-
@@ -106,7 +71,7 @@ export function ImportModal({ onClose, onDone }: Props) {
{error &&
{error}
} - {!result && !importState.active && ( + {!result && (

Выберите файл выписки (PDF или JSON, формат 1.0)

- {jsonLoading && ( + {loading && (
Импорт...
)}
)} - {importState.active && ( -
-
-
-
-
-

- {stageLabel(importState.stage)} {importState.progress}% -

-
-
- )} - {result && (
@@ -201,15 +149,16 @@ export function ImportModal({ onClose, onDone }: Props) {
{result ? ( - ) : ( )}
diff --git a/frontend/src/components/Layout.tsx b/frontend/src/components/Layout.tsx index a1e7b08..fa38ca7 100644 --- a/frontend/src/components/Layout.tsx +++ b/frontend/src/components/Layout.tsx @@ -1,70 +1,6 @@ -import { useState, useEffect, useRef, useCallback, type ReactNode } from 'react'; +import { useState, type ReactNode } from 'react'; import { NavLink } from 'react-router-dom'; import { useAuth } from '../context/AuthContext'; -import { useImport } from '../context/ImportContext'; - -function ImportProgressBar() { - const { importState, clearImport, openModal } = useImport(); - const [visible, setVisible] = useState(false); - const hideTimerRef = useRef | undefined>(undefined); - - const isActive = importState.active; - const isDone = importState.stage === 'done'; - const isError = importState.stage === 'error'; - const showBar = isActive || isDone || isError; - - useEffect(() => { - if (showBar) { - setVisible(true); - if (hideTimerRef.current) clearTimeout(hideTimerRef.current); - } - if (isDone || isError) { - hideTimerRef.current = setTimeout(() => { - setVisible(false); - clearImport(); - }, 10_000); - } - return () => { - if (hideTimerRef.current) clearTimeout(hideTimerRef.current); - }; - }, [showBar, isDone, isError, clearImport]); - - const handleClick = useCallback(() => { - if (hideTimerRef.current) clearTimeout(hideTimerRef.current); - openModal(); - setVisible(false); - }, [openModal]); - - if (!visible) return null; - - const barClass = isError - ? 'import-progress-bar import-progress-bar--error' - : isDone - ? 'import-progress-bar import-progress-bar--done' - : 'import-progress-bar'; - - const labelText = isError - ? `Ошибка импорта: ${importState.message}` - : isDone && importState.result - ? `Импорт завершён — ${importState.result.imported} операций` - : `${importState.message} ${importState.progress}%`; - - return ( -
-
- -
- ); -} export function Layout({ children }: { children: ReactNode }) { const { user, logout } = useAuth(); @@ -74,8 +10,6 @@ export function Layout({ children }: { children: ReactNode }) { return (
- - @@ -264,7 +262,7 @@ export function HistoryPage() { {showImport && ( setShowImport(false)} onDone={handleImportDone} /> )} diff --git a/frontend/src/styles/index.css b/frontend/src/styles/index.css index fdca8f7..9f3d3d5 100644 --- a/frontend/src/styles/index.css +++ b/frontend/src/styles/index.css @@ -1058,152 +1058,6 @@ textarea { font-weight: 500; } -/* Import progress bar in modal */ -.import-progress-modal { - padding: 20px 0; -} - -.import-progress-modal-bar { - height: 8px; - background: var(--color-border); - border-radius: 4px; - overflow: hidden; - margin-bottom: 10px; -} - -.import-progress-modal-fill { - height: 100%; - background: linear-gradient(90deg, var(--color-primary), #6366f1); - border-radius: 4px; - transition: width 0.3s ease; - position: relative; -} - -.import-progress-modal-fill::after { - content: ''; - position: absolute; - inset: 0; - background: linear-gradient( - 90deg, - transparent 0%, - rgba(255, 255, 255, 0.3) 50%, - transparent 100% - ); - animation: shimmer 1.5s infinite; -} - -.import-progress-modal-label { - text-align: center; - color: var(--color-text-secondary); - font-size: 14px; -} - -/* ================================================================ - Import progress bar (fixed top, Layout) - ================================================================ */ - -.import-progress-bar { - position: fixed; - top: 0; - left: 0; - right: 0; - z-index: 300; - height: 4px; - background: var(--color-border); -} - -.import-progress-bar--done { - background: var(--color-success-light); -} - -.import-progress-bar--error { - background: var(--color-danger-light); -} - -.import-progress-bar__fill { - height: 100%; - border-radius: 0 2px 2px 0; - transition: width 0.3s ease; - position: relative; - background: linear-gradient(90deg, var(--color-primary), #6366f1); -} - -.import-progress-bar__fill::after { - content: ''; - position: absolute; - inset: 0; - background: linear-gradient( - 90deg, - transparent 0%, - rgba(255, 255, 255, 0.4) 50%, - transparent 100% - ); - animation: shimmer 1.5s infinite; -} - -.import-progress-bar--done .import-progress-bar__fill { - background: var(--color-success); -} - -.import-progress-bar--done .import-progress-bar__fill::after { - animation: none; -} - -.import-progress-bar--error .import-progress-bar__fill { - background: var(--color-danger); - width: 100% !important; -} - -.import-progress-bar--error .import-progress-bar__fill::after { - animation: none; -} - -@keyframes shimmer { - 0% { - transform: translateX(-100%); - } - 100% { - transform: translateX(100%); - } -} - -.import-progress-label { - position: fixed; - top: 6px; - right: 16px; - z-index: 301; - background: var(--color-surface); - border: 1px solid var(--color-border); - border-radius: 16px; - padding: 4px 14px; - font-size: 12px; - font-weight: 500; - color: var(--color-text-secondary); - box-shadow: var(--shadow-sm); - white-space: nowrap; - cursor: default; - font-family: inherit; -} - -.import-progress-label--clickable { - cursor: pointer; -} - -.import-progress-label--clickable:hover { - background: var(--color-bg); - border-color: var(--color-border-hover); -} - -.import-progress-bar--done .import-progress-label { - color: var(--color-success); - border-color: var(--color-success); -} - -.import-progress-bar--error .import-progress-label { - color: var(--color-danger); - border-color: var(--color-danger); -} - /* ================================================================ Tabs ================================================================ */ -- 2.49.1