feat: stream PDF import progress via SSE with global progress bar

Replace the synchronous PDF import with Server-Sent Events streaming
between the backend (LLM) and the browser. The user can now close the
import modal and continue working while the conversion runs — a fixed
progress bar in the Layout shows real-time stage and percentage.
This commit is contained in:
vakabunga
2026-03-14 16:18:31 +03:00
parent 627706228b
commit 1d7fbea9ef
9 changed files with 680 additions and 70 deletions

View File

@@ -108,6 +108,7 @@ export async function convertPdfToStatement(
const openai = new OpenAI({
apiKey: config.llmApiKey,
...(config.llmApiBaseUrl && { baseURL: config.llmApiBaseUrl }),
timeout: 5 * 60 * 1000,
});
try {
@@ -130,29 +131,7 @@ export async function convertPdfToStatement(
};
}
const jsonMatch = content.match(/\{[\s\S]*\}/);
const jsonStr = jsonMatch ? jsonMatch[0] : content;
let parsed: unknown;
try {
parsed = JSON.parse(jsonStr);
} catch {
return {
status: 422,
error: 'VALIDATION_ERROR',
message: 'Результат конвертации не является валидным JSON',
};
}
const data = parsed as Record<string, unknown>;
if (data.schemaVersion !== '1.0') {
return {
status: 422,
error: 'VALIDATION_ERROR',
message: 'Результат конвертации не соответствует схеме 1.0',
};
}
return parsed as StatementFile;
return parseConversionResult(content);
} catch (err) {
console.error('LLM conversion error:', err);
return {
@@ -162,3 +141,125 @@ export async function convertPdfToStatement(
};
}
}
export type ProgressStage = 'pdf' | 'llm' | 'import';
export type OnProgress = (stage: ProgressStage, progress: number, message: string) => void;
const EXPECTED_CHARS = 15_000;
export async function convertPdfToStatementStreaming(
buffer: Buffer,
onProgress: OnProgress,
): Promise<StatementFile | PdfConversionError> {
if (!config.llmApiKey || config.llmApiKey.trim() === '') {
return {
status: 503,
error: 'SERVICE_UNAVAILABLE',
message: 'Конвертация PDF недоступна: не задан LLM_API_KEY',
};
}
onProgress('pdf', 2, 'Извлечение текста из PDF...');
let text: string;
try {
const result = await getPdfParse()(buffer);
text = result.text || '';
} catch (err) {
console.error('PDF extraction error:', err);
return {
status: 400,
error: 'BAD_REQUEST',
message: 'Не удалось обработать PDF-файл',
};
}
if (!text || text.trim().length === 0) {
return {
status: 400,
error: 'BAD_REQUEST',
message: 'Не удалось извлечь текст из PDF',
};
}
onProgress('pdf', 8, 'Текст извлечён, отправка в LLM...');
const openai = new OpenAI({
apiKey: config.llmApiKey,
...(config.llmApiBaseUrl && { baseURL: config.llmApiBaseUrl }),
timeout: 5 * 60 * 1000,
});
try {
const stream = await openai.chat.completions.create({
model: config.llmModel,
messages: [
{ role: 'system', content: PDF2JSON_PROMPT },
{ role: 'user', content: `Текст выписки:\n\n${text}` },
],
temperature: 0,
max_tokens: 32768,
stream: true,
});
let accumulated = '';
let charsReceived = 0;
for await (const chunk of stream) {
const delta = chunk.choices[0]?.delta?.content;
if (delta) {
accumulated += delta;
charsReceived += delta.length;
const llmProgress = Math.min(
85,
Math.round((charsReceived / EXPECTED_CHARS) * 75 + 10),
);
onProgress('llm', llmProgress, 'Конвертация через LLM...');
}
}
const content = accumulated.trim();
if (!content) {
return {
status: 422,
error: 'VALIDATION_ERROR',
message: 'Результат конвертации пуст',
};
}
return parseConversionResult(content);
} catch (err) {
console.error('LLM streaming error:', err);
return {
status: 502,
error: 'BAD_GATEWAY',
message: 'Временная ошибка конвертации',
};
}
}
function parseConversionResult(content: string): StatementFile | PdfConversionError {
const jsonMatch = content.match(/\{[\s\S]*\}/);
const jsonStr = jsonMatch ? jsonMatch[0] : content;
let parsed: unknown;
try {
parsed = JSON.parse(jsonStr);
} catch {
return {
status: 422,
error: 'VALIDATION_ERROR',
message: 'Результат конвертации не является валидным JSON',
};
}
const data = parsed as Record<string, unknown>;
if (data.schemaVersion !== '1.0') {
return {
status: 422,
error: 'VALIDATION_ERROR',
message: 'Результат конвертации не соответствует схеме 1.0',
};
}
return parsed as StatementFile;
}