From 975f2c4fd2d7c26d5c65e6188554fc3e44f433c1 Mon Sep 17 00:00:00 2001 From: Anton Date: Fri, 13 Mar 2026 13:38:02 +0300 Subject: [PATCH] feat: adds PDF import with conversion to JSON 1.0 - Accept only PDF and JSON files in import modal and API - Convert PDF statements to JSON 1.0 via LLM (OpenAI-compatible) - Use multipart/form-data for file upload (multer, 15 MB limit) - Add LLM_API_KEY and LLM_API_BASE_URL for configurable LLM endpoint - Update ImportModal to validate type and send FormData - Add postFormData to API client for file upload --- .gitignore | 1 + backend/.env.example | 8 + backend/README.md | 5 +- backend/package.json | 4 + backend/src/config.ts | 6 + backend/src/routes/import.ts | 70 +++- backend/src/services/pdfToStatement.ts | 156 ++++++++ docs/backlog/api_import.md | 18 +- frontend/src/api/client.ts | 37 ++ frontend/src/api/import.ts | 9 +- frontend/src/components/ImportModal.tsx | 28 +- package-lock.json | 449 ++++++++++++++++++++++-- pdf2json.md | 2 - 13 files changed, 745 insertions(+), 48 deletions(-) create mode 100644 backend/src/services/pdfToStatement.ts diff --git a/.gitignore b/.gitignore index d4e705c..062a7a7 100644 --- a/.gitignore +++ b/.gitignore @@ -22,3 +22,4 @@ jan-feb.json history.xlsx match_analysis.py match_report.txt +statements/ diff --git a/backend/.env.example b/backend/.env.example index 49a3e5e..9157af9 100644 --- a/backend/.env.example +++ b/backend/.env.example @@ -10,3 +10,11 @@ APP_USER_PASSWORD=changeme SESSION_TIMEOUT_MS=10800000 PORT=3000 + +# Ключ OpenAI API для конвертации PDF-выписок в JSON (опционально) +# Без него импорт PDF будет недоступен (503) +LLM_API_KEY= + +# Базовый URL API LLM (опционально). По умолчанию https://api.openai.com +# Примеры: Ollama — http://localhost:11434/v1, Azure — https://YOUR_RESOURCE.openai.azure.com/openai/deployments/YOUR_DEPLOYMENT +LLM_API_BASE_URL= diff --git a/backend/README.md b/backend/README.md index bf638b2..2d273c1 100644 --- a/backend/README.md +++ b/backend/README.md @@ -45,6 +45,8 @@ npm run dev -w backend | `APP_USER_PASSWORD` | `changeme` | Пароль для входа в приложение | | `SESSION_TIMEOUT_MS` | `10800000` | Таймаут сессии по бездействию (3 часа) | | `PORT` | `3000` | Порт HTTP-сервера | +| `LLM_API_KEY` | — | Ключ OpenAI API для конвертации PDF в JSON; без него импорт PDF возвращает 503 | +| `LLM_API_BASE_URL` | `https://api.openai.com/v1` | Адрес LLM API (OpenAI-совместимый); для локальной модели, напр. Ollama: `http://localhost:11434/v1` | ## Структура проекта @@ -61,6 +63,7 @@ backend/src/ ├── services/ │ ├── auth.ts — login / logout / me │ ├── import.ts — валидация, fingerprint, direction, атомарный импорт +│ ├── pdfToStatement.ts — конвертация PDF → JSON 1.0 через LLM (OpenAI) │ ├── transactions.ts — список с фильтрами + обновление (categoryId, comment) │ ├── accounts.ts — список счетов, обновление алиаса │ ├── categories.ts — список категорий (фильтр isActive) @@ -90,7 +93,7 @@ backend/src/ | Метод | URL | Описание | |--------|----------------------------|-----------------------------------------| -| POST | `/api/import/statement` | Импорт банковской выписки (JSON 1.0) | +| POST | `/api/import/statement` | Импорт банковской выписки (PDF или JSON 1.0; PDF конвертируется через LLM) | ### Транзакции diff --git a/backend/package.json b/backend/package.json index bc6def6..3492225 100644 --- a/backend/package.json +++ b/backend/package.json @@ -14,6 +14,9 @@ "cors": "^2.8.6", "dotenv": "^17.3.1", "express": "^5.2.1", + "multer": "^2.1.1", + "openai": "^6.27.0", + "pdf-parse": "^2.4.5", "pg": "^8.19.0", "uuid": "^13.0.0" }, @@ -21,6 +24,7 @@ "@types/cookie-parser": "^1.4.10", "@types/cors": "^2.8.19", "@types/express": "^5.0.6", + "@types/multer": "^2.1.0", "@types/node": "^25.3.3", "@types/pg": "^8.18.0", "@types/uuid": "^10.0.0", diff --git a/backend/src/config.ts b/backend/src/config.ts index bacb89f..376ba6a 100644 --- a/backend/src/config.ts +++ b/backend/src/config.ts @@ -18,4 +18,10 @@ export const config = { appUserPassword: process.env.APP_USER_PASSWORD || 'changeme', sessionTimeoutMs: parseInt(process.env.SESSION_TIMEOUT_MS || '10800000', 10), + + /** API-ключ для LLM (OpenAI или совместимый). Обязателен для конвертации PDF. */ + llmApiKey: process.env.LLM_API_KEY || '', + + /** Базовый URL API LLM. По умолчанию https://api.openai.com. Для Ollama: http://localhost:11434/v1 */ + llmApiBaseUrl: process.env.LLM_API_BASE_URL || undefined, }; diff --git a/backend/src/routes/import.ts b/backend/src/routes/import.ts index d92d615..d1188b1 100644 --- a/backend/src/routes/import.ts +++ b/backend/src/routes/import.ts @@ -1,13 +1,81 @@ import { Router } from 'express'; +import multer from 'multer'; import { asyncHandler } from '../utils'; import { importStatement, isValidationError } from '../services/import'; +import { + convertPdfToStatement, + isPdfConversionError, +} from '../services/pdfToStatement'; + +const upload = multer({ + storage: multer.memoryStorage(), + limits: { fileSize: 15 * 1024 * 1024 }, +}); + +function isPdfFile(file: { mimetype: string; originalname: string }): boolean { + const name = file.originalname.toLowerCase(); + return ( + file.mimetype === 'application/pdf' || + name.endsWith('.pdf') + ); +} + +function isJsonFile(file: { mimetype: string; originalname: string }): boolean { + const name = file.originalname.toLowerCase(); + return ( + file.mimetype === 'application/json' || + name.endsWith('.json') + ); +} const router = Router(); router.post( '/statement', + upload.single('file'), asyncHandler(async (req, res) => { - const result = await importStatement(req.body); + const file = req.file; + if (!file) { + res.status(400).json({ + error: 'BAD_REQUEST', + message: 'Файл не загружен', + }); + return; + } + + if (!isPdfFile(file) && !isJsonFile(file)) { + res.status(400).json({ + error: 'BAD_REQUEST', + message: 'Допустимы только файлы PDF или JSON', + }); + return; + } + + let body: unknown; + + if (isPdfFile(file)) { + const converted = await convertPdfToStatement(file.buffer); + if (isPdfConversionError(converted)) { + res.status(converted.status).json({ + error: converted.error, + message: converted.message, + }); + return; + } + body = converted; + } else { + try { + body = JSON.parse(file.buffer.toString('utf-8')); + } catch { + res.status(400).json({ + error: 'BAD_REQUEST', + message: 'Некорректный JSON-файл', + }); + return; + } + } + + const result = await importStatement(body); if (isValidationError(result)) { res.status((result as { status: number }).status).json({ error: (result as { error: string }).error, diff --git a/backend/src/services/pdfToStatement.ts b/backend/src/services/pdfToStatement.ts new file mode 100644 index 0000000..120f160 --- /dev/null +++ b/backend/src/services/pdfToStatement.ts @@ -0,0 +1,156 @@ +import { PDFParse } from 'pdf-parse'; +import OpenAI from 'openai'; +import { config } from '../config'; +import type { StatementFile } from '@family-budget/shared'; + +const PDF2JSON_PROMPT = `Ты — конвертер банковских выписок. Твоя задача: извлечь данные из текста банковской выписки ниже и вернуть строго один валидный JSON-объект в формате ниже. Никакого текста до или после JSON, только сам объект. + +## Структура выходного JSON + +{ + "schemaVersion": "1.0", + "bank": "<название банка из выписки>", + "statement": { + "accountNumber": "<номер счёта, только цифры, без пробелов>", + "currency": "RUB", + "openingBalance": <число в копейках, целое>, + "closingBalance": <число в копейках, целое>, + "exportedAt": "<дата экспорта в формате ISO 8601 с offset, например 2026-02-27T13:23:00+03:00>" + }, + "transactions": [ + { + "operationAt": "<дата и время операции в формате ISO 8601 с offset>", + "amountSigned": <число: положительное для прихода, отрицательное для расхода; в копейках>, + "commission": <число, целое, >= 0, в копейках>, + "description": "<полное описание операции из выписки>" + } + ] +} + +## Правила конвертации + +1. Суммы — всегда в копейках (рубли × 100). Пример: 500,00 ₽ → 50000, -1234,56 ₽ → -123456. +2. amountSigned: приход — положительное, расход — отрицательное. +3. operationAt — дата и время, если не указано — 00:00:00, offset +03:00 для МСК. +4. commission — если не указана — 0. +5. description — полный текст операции как в выписке. +6. accountNumber — только цифры, без пробелов и дефисов. +7. openingBalance / closingBalance — в копейках. +8. bank — краткое название (VTB, Sberbank, Тинькофф). +9. exportedAt — дата формирования выписки. +10. transactions — хронологический порядок. + +## Требования + +- transactions не должен быть пустым. +- Все числа — целые. +- Даты — ISO 8601 с offset. +- currency всегда "RUB". +- schemaVersion всегда "1.0".`; + +export interface PdfConversionError { + status: number; + error: string; + message: string; +} + +export function isPdfConversionError(r: unknown): r is PdfConversionError { + return ( + typeof r === 'object' && + r !== null && + 'status' in r && + 'error' in r && + 'message' in r + ); +} + +export async function convertPdfToStatement( + buffer: Buffer, +): Promise { + if (!config.llmApiKey || config.llmApiKey.trim() === '') { + return { + status: 503, + error: 'SERVICE_UNAVAILABLE', + message: 'Конвертация PDF недоступна: не задан LLM_API_KEY', + }; + } + + let text: string; + try { + const parser = new PDFParse({ data: buffer }); + const result = await parser.getText(); + text = result.text || ''; + await parser.destroy(); + } catch (err) { + console.error('PDF extraction error:', err); + return { + status: 400, + error: 'BAD_REQUEST', + message: 'Не удалось обработать PDF-файл', + }; + } + + if (!text || text.trim().length === 0) { + return { + status: 400, + error: 'BAD_REQUEST', + message: 'Не удалось извлечь текст из PDF', + }; + } + + const openai = new OpenAI({ + apiKey: config.llmApiKey, + ...(config.llmApiBaseUrl && { baseURL: config.llmApiBaseUrl }), + }); + + try { + const completion = await openai.chat.completions.create({ + model: 'gpt-4o-mini', + messages: [ + { role: 'system', content: PDF2JSON_PROMPT }, + { role: 'user', content: `Текст выписки:\n\n${text}` }, + ], + temperature: 0, + }); + + const content = completion.choices[0]?.message?.content?.trim(); + if (!content) { + return { + status: 422, + error: 'VALIDATION_ERROR', + message: 'Результат конвертации пуст', + }; + } + + const jsonMatch = content.match(/\{[\s\S]*\}/); + const jsonStr = jsonMatch ? jsonMatch[0] : content; + let parsed: unknown; + try { + parsed = JSON.parse(jsonStr); + } catch { + return { + status: 422, + error: 'VALIDATION_ERROR', + message: 'Результат конвертации не является валидным JSON', + }; + } + + const data = parsed as Record; + if (data.schemaVersion !== '1.0') { + return { + status: 422, + error: 'VALIDATION_ERROR', + message: 'Результат конвертации не соответствует схеме 1.0', + }; + } + + return parsed as StatementFile; + } catch (err) { + console.error('LLM conversion error:', err); + return { + status: 502, + error: 'BAD_GATEWAY', + message: 'Временная ошибка конвертации', + }; + } +} diff --git a/docs/backlog/api_import.md b/docs/backlog/api_import.md index 991a2a5..47acd41 100644 --- a/docs/backlog/api_import.md +++ b/docs/backlog/api_import.md @@ -2,7 +2,7 @@ ## Назначение -Эндпоинт принимает банковскую выписку в формате JSON 1.0 (см. `format.md`) и атомарно импортирует транзакции в БД. +Эндпоинт принимает банковскую выписку (PDF или JSON) и атомарно импортирует транзакции в БД. ## Метод и URL @@ -16,7 +16,15 @@ ## Тело запроса -JSON строго по схеме 1.0 (`format.md`). Content-Type: `application/json`. +**Content-Type:** `multipart/form-data`. Поле: `file`. + +Допустимые типы файлов: +- **PDF** — банковская выписка. Конвертируется в JSON 1.0 через LLM (требуется `LLM_API_KEY`). +- **JSON** — файл по схеме 1.0 (см. `format.md`). + +При другом типе файла — `400 Bad Request`: «Допустимы только файлы PDF или JSON». + +Пример структуры JSON 1.0: ```json { @@ -207,6 +215,8 @@ accountNumber|operationAt|amountSigned|commission|normalizedDescription | Код | Ситуация | |-----|--------------------------------------------------------------------------| | 200 | Импорт выполнен успешно | -| 400 | Невалидный JSON или нарушение структуры схемы 1.0 | +| 400 | Файл не загружен; неверный тип (не PDF и не JSON); некорректный JSON; ошибка извлечения текста из PDF | | 401 | Нет действующей сессии | -| 422 | Семантическая ошибка валидации (некорректные данные, ошибка при вставке) | +| 422 | Семантическая ошибка валидации; результат конвертации PDF не соответствует схеме 1.0 | +| 502 | Ошибка LLM при конвертации PDF | +| 503 | Конвертация PDF недоступна (не задан LLM_API_KEY) | diff --git a/frontend/src/api/client.ts b/frontend/src/api/client.ts index 4c4a3ed..98be616 100644 --- a/frontend/src/api/client.ts +++ b/frontend/src/api/client.ts @@ -53,6 +53,40 @@ async function request(url: string, options: RequestInit = {}): Promise { return res.json(); } +async function requestFormData(url: string, formData: FormData): Promise { + const res = await fetch(url, { + method: 'POST', + body: formData, + credentials: 'include', + // Do not set Content-Type — browser sets multipart/form-data with boundary + }); + + if (res.status === 401) { + let body: ApiError; + try { + body = await res.json(); + } catch { + body = { error: 'UNAUTHORIZED', message: 'Сессия истекла' }; + } + if (!url.includes('/api/auth/login')) { + onUnauthorized?.(); + } + throw new ApiException(401, body); + } + + if (!res.ok) { + let body: ApiError; + try { + body = await res.json(); + } catch { + body = { error: 'UNKNOWN', message: res.statusText }; + } + throw new ApiException(res.status, body); + } + + return res.json(); +} + export const api = { get: (url: string) => request(url), @@ -62,6 +96,9 @@ export const api = { body: body != null ? JSON.stringify(body) : undefined, }), + postFormData: (url: string, formData: FormData) => + requestFormData(url, formData), + put: (url: string, body: unknown) => request(url, { method: 'PUT', body: JSON.stringify(body) }), diff --git a/frontend/src/api/import.ts b/frontend/src/api/import.ts index 64bf447..af936b5 100644 --- a/frontend/src/api/import.ts +++ b/frontend/src/api/import.ts @@ -2,7 +2,12 @@ import type { ImportStatementResponse } from '@family-budget/shared'; import { api } from './client'; export async function importStatement( - file: unknown, + file: File, ): Promise { - return api.post('/api/import/statement', file); + const formData = new FormData(); + formData.append('file', file); + return api.postFormData( + '/api/import/statement', + formData, + ); } diff --git a/frontend/src/components/ImportModal.tsx b/frontend/src/components/ImportModal.tsx index 95ef739..8bc7208 100644 --- a/frontend/src/components/ImportModal.tsx +++ b/frontend/src/components/ImportModal.tsx @@ -22,23 +22,27 @@ export function ImportModal({ onClose, onDone }: Props) { const file = e.target.files?.[0]; if (!file) return; + const name = file.name.toLowerCase(); + const type = file.type; + const isPdf = type === 'application/pdf' || name.endsWith('.pdf'); + const isJson = type === 'application/json' || name.endsWith('.json'); + + if (!isPdf && !isJson) { + setError('Допустимы только файлы PDF или JSON'); + return; + } + setLoading(true); setError(''); setResult(null); try { - const text = await file.text(); - const json = JSON.parse(text); - const resp = await importStatement(json); + const resp = await importStatement(file); setResult(resp); } catch (err: unknown) { - if (err instanceof SyntaxError) { - setError('Некорректный JSON-файл'); - } else { - const msg = - err instanceof Error ? err.message : 'Ошибка импорта'; - setError(msg); - } + const msg = + err instanceof Error ? err.message : 'Ошибка импорта'; + setError(msg); } finally { setLoading(false); } @@ -69,11 +73,11 @@ export function ImportModal({ onClose, onDone }: Props) { {!result && (
-

Выберите JSON-файл выписки (формат 1.0)

+

Выберите файл выписки (PDF или JSON, формат 1.0)

diff --git a/package-lock.json b/package-lock.json index b159813..c6b16d1 100644 --- a/package-lock.json +++ b/package-lock.json @@ -22,6 +22,9 @@ "cors": "^2.8.6", "dotenv": "^17.3.1", "express": "^5.2.1", + "multer": "^2.1.1", + "openai": "^6.27.0", + "pdf-parse": "^2.4.5", "pg": "^8.19.0", "uuid": "^13.0.0" }, @@ -29,6 +32,7 @@ "@types/cookie-parser": "^1.4.10", "@types/cors": "^2.8.19", "@types/express": "^5.0.6", + "@types/multer": "^2.1.0", "@types/node": "^25.3.3", "@types/pg": "^8.18.0", "@types/uuid": "^10.0.0", @@ -85,6 +89,7 @@ "integrity": "sha512-CGOfOJqWjg2qW/Mb6zNsDm+u5vFQ8DxXfbM09z69p5Z6+mE1ikP2jUXw+j42Pf1XTYED2Rni5f95npYeuwMDQA==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@babel/code-frame": "^7.29.0", "@babel/generator": "^7.29.0", @@ -849,6 +854,190 @@ "@jridgewell/sourcemap-codec": "^1.4.14" } }, + "node_modules/@napi-rs/canvas": { + "version": "0.1.80", + "resolved": "https://registry.npmjs.org/@napi-rs/canvas/-/canvas-0.1.80.tgz", + "integrity": "sha512-DxuT1ClnIPts1kQx8FBmkk4BQDTfI5kIzywAaMjQSXfNnra5UFU9PwurXrl+Je3bJ6BGsp/zmshVVFbCmyI+ww==", + "license": "MIT", + "workspaces": [ + "e2e/*" + ], + "engines": { + "node": ">= 10" + }, + "optionalDependencies": { + "@napi-rs/canvas-android-arm64": "0.1.80", + "@napi-rs/canvas-darwin-arm64": "0.1.80", + "@napi-rs/canvas-darwin-x64": "0.1.80", + "@napi-rs/canvas-linux-arm-gnueabihf": "0.1.80", + "@napi-rs/canvas-linux-arm64-gnu": "0.1.80", + "@napi-rs/canvas-linux-arm64-musl": "0.1.80", + "@napi-rs/canvas-linux-riscv64-gnu": "0.1.80", + "@napi-rs/canvas-linux-x64-gnu": "0.1.80", + "@napi-rs/canvas-linux-x64-musl": "0.1.80", + "@napi-rs/canvas-win32-x64-msvc": "0.1.80" + } + }, + "node_modules/@napi-rs/canvas-android-arm64": { + "version": "0.1.80", + "resolved": "https://registry.npmjs.org/@napi-rs/canvas-android-arm64/-/canvas-android-arm64-0.1.80.tgz", + "integrity": "sha512-sk7xhN/MoXeuExlggf91pNziBxLPVUqF2CAVnB57KLG/pz7+U5TKG8eXdc3pm0d7Od0WreB6ZKLj37sX9muGOQ==", + "cpu": [ + "arm64" + ], + "license": "MIT", + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/canvas-darwin-arm64": { + "version": "0.1.80", + "resolved": "https://registry.npmjs.org/@napi-rs/canvas-darwin-arm64/-/canvas-darwin-arm64-0.1.80.tgz", + "integrity": "sha512-O64APRTXRUiAz0P8gErkfEr3lipLJgM6pjATwavZ22ebhjYl/SUbpgM0xcWPQBNMP1n29afAC/Us5PX1vg+JNQ==", + "cpu": [ + "arm64" + ], + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/canvas-darwin-x64": { + "version": "0.1.80", + "resolved": "https://registry.npmjs.org/@napi-rs/canvas-darwin-x64/-/canvas-darwin-x64-0.1.80.tgz", + "integrity": "sha512-FqqSU7qFce0Cp3pwnTjVkKjjOtxMqRe6lmINxpIZYaZNnVI0H5FtsaraZJ36SiTHNjZlUB69/HhxNDT1Aaa9vA==", + "cpu": [ + "x64" + ], + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/canvas-linux-arm-gnueabihf": { + "version": "0.1.80", + "resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-arm-gnueabihf/-/canvas-linux-arm-gnueabihf-0.1.80.tgz", + "integrity": "sha512-eyWz0ddBDQc7/JbAtY4OtZ5SpK8tR4JsCYEZjCE3dI8pqoWUC8oMwYSBGCYfsx2w47cQgQCgMVRVTFiiO38hHQ==", + "cpu": [ + "arm" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/canvas-linux-arm64-gnu": { + "version": "0.1.80", + "resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-arm64-gnu/-/canvas-linux-arm64-gnu-0.1.80.tgz", + "integrity": "sha512-qwA63t8A86bnxhuA/GwOkK3jvb+XTQaTiVML0vAWoHyoZYTjNs7BzoOONDgTnNtr8/yHrq64XXzUoLqDzU+Uuw==", + "cpu": [ + "arm64" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/canvas-linux-arm64-musl": { + "version": "0.1.80", + "resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-arm64-musl/-/canvas-linux-arm64-musl-0.1.80.tgz", + "integrity": "sha512-1XbCOz/ymhj24lFaIXtWnwv/6eFHXDrjP0jYkc6iHQ9q8oXKzUX1Lc6bu+wuGiLhGh2GS/2JlfORC5ZcXimRcg==", + "cpu": [ + "arm64" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/canvas-linux-riscv64-gnu": { + "version": "0.1.80", + "resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-riscv64-gnu/-/canvas-linux-riscv64-gnu-0.1.80.tgz", + "integrity": "sha512-XTzR125w5ZMs0lJcxRlS1K3P5RaZ9RmUsPtd1uGt+EfDyYMu4c6SEROYsxyatbbu/2+lPe7MPHOO/0a0x7L/gw==", + "cpu": [ + "riscv64" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/canvas-linux-x64-gnu": { + "version": "0.1.80", + "resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-x64-gnu/-/canvas-linux-x64-gnu-0.1.80.tgz", + "integrity": "sha512-BeXAmhKg1kX3UCrJsYbdQd3hIMDH/K6HnP/pG2LuITaXhXBiNdh//TVVVVCBbJzVQaV5gK/4ZOCMrQW9mvuTqA==", + "cpu": [ + "x64" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/canvas-linux-x64-musl": { + "version": "0.1.80", + "resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-x64-musl/-/canvas-linux-x64-musl-0.1.80.tgz", + "integrity": "sha512-x0XvZWdHbkgdgucJsRxprX/4o4sEed7qo9rCQA9ugiS9qE2QvP0RIiEugtZhfLH3cyI+jIRFJHV4Fuz+1BHHMg==", + "cpu": [ + "x64" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/canvas-win32-x64-msvc": { + "version": "0.1.80", + "resolved": "https://registry.npmjs.org/@napi-rs/canvas-win32-x64-msvc/-/canvas-win32-x64-msvc-0.1.80.tgz", + "integrity": "sha512-Z8jPsM6df5V8B1HrCHB05+bDiCxjE9QA//3YrkKIdVDEwn5RKaqOxCJDRJkl48cJbylcrJbW4HxZbTte8juuPg==", + "cpu": [ + "x64" + ], + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">= 10" + } + }, "node_modules/@rolldown/pluginutils": { "version": "1.0.0-beta.27", "resolved": "https://registry.npmjs.org/@rolldown/pluginutils/-/pluginutils-1.0.0-beta.27.tgz", @@ -1368,6 +1557,7 @@ "integrity": "sha512-sKYVuV7Sv9fbPIt/442koC7+IIwK5olP1KWeD88e/idgoJqDm3JV/YUiPwkoKK92ylff2MGxSz1CSjsXelx0YA==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@types/body-parser": "*", "@types/express-serve-static-core": "^5.0.0", @@ -1394,6 +1584,16 @@ "dev": true, "license": "MIT" }, + "node_modules/@types/multer": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/@types/multer/-/multer-2.1.0.tgz", + "integrity": "sha512-zYZb0+nJhOHtPpGDb3vqPjwpdeGlGC157VpkqNQL+UU2qwoacoQ7MpsAmUptI/0Oa127X32JzWDqQVEXp2RcIA==", + "dev": true, + "license": "MIT", + "dependencies": { + "@types/express": "*" + } + }, "node_modules/@types/node": { "version": "25.3.3", "resolved": "https://registry.npmjs.org/@types/node/-/node-25.3.3.tgz", @@ -1436,6 +1636,7 @@ "integrity": "sha512-ilcTH/UniCkMdtexkoCN0bI7pMcJDvmQFPvuPvmEaYA/NSfFTAgdUSLAoVjaRJm7+6PvcM+q1zYOwS4wTYMF9w==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "csstype": "^3.2.2" } @@ -1512,6 +1713,12 @@ "node": ">= 0.6" } }, + "node_modules/append-field": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/append-field/-/append-field-1.0.0.tgz", + "integrity": "sha512-klpgFSWLW1ZEs8svjfb7g4qWY0YS5imI82dTg+QahUvJ8YqAY0P10Uk8tTyh9ZGuYEZEMaeJYCF5BFuX552hsw==", + "license": "MIT" + }, "node_modules/baseline-browser-mapping": { "version": "2.10.0", "resolved": "https://registry.npmjs.org/baseline-browser-mapping/-/baseline-browser-mapping-2.10.0.tgz", @@ -1569,6 +1776,7 @@ } ], "license": "MIT", + "peer": true, "dependencies": { "baseline-browser-mapping": "^2.9.0", "caniuse-lite": "^1.0.30001759", @@ -1583,6 +1791,23 @@ "node": "^6 || ^7 || ^8 || ^9 || ^10 || ^11 || ^12 || >=13.7" } }, + "node_modules/buffer-from": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/buffer-from/-/buffer-from-1.1.2.tgz", + "integrity": "sha512-E+XQCRwSbaaiChtv6k6Dwgc+bx+Bs6vuKJHHl5kox/BaKbhiXzqQOwK4cO22yElGp2OCmjwVhT3HmxgyPGnJfQ==", + "license": "MIT" + }, + "node_modules/busboy": { + "version": "1.6.0", + "resolved": "https://registry.npmjs.org/busboy/-/busboy-1.6.0.tgz", + "integrity": "sha512-8SFQbg/0hQ9xy3UNTB0YEnsNBbWfhf7RtnzpL7TkBiTBRfrQ9Fxcnz7VJsleJpyp6rVLvXiuORqjlHi5q+PYuA==", + "dependencies": { + "streamsearch": "^1.1.0" + }, + "engines": { + "node": ">=10.16.0" + } + }, "node_modules/bytes": { "version": "3.1.2", "resolved": "https://registry.npmjs.org/bytes/-/bytes-3.1.2.tgz", @@ -1651,6 +1876,21 @@ "node": ">=6" } }, + "node_modules/concat-stream": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/concat-stream/-/concat-stream-2.0.0.tgz", + "integrity": "sha512-MWufYdFw53ccGjCA+Ol7XJYpAlW6/prSMzuPOTRnJGcGzuhLn4Scrz7qf6o8bROZ514ltazcIFJZevcfbo0x7A==", + "engines": [ + "node >= 6.0" + ], + "license": "MIT", + "dependencies": { + "buffer-from": "^1.0.0", + "inherits": "^2.0.3", + "readable-stream": "^3.0.2", + "typedarray": "^0.0.6" + } + }, "node_modules/content-disposition": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/content-disposition/-/content-disposition-1.0.1.tgz", @@ -2470,6 +2710,68 @@ "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==", "license": "MIT" }, + "node_modules/multer": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/multer/-/multer-2.1.1.tgz", + "integrity": "sha512-mo+QTzKlx8R7E5ylSXxWzGoXoZbOsRMpyitcht8By2KHvMbf3tjwosZ/Mu/XYU6UuJ3VZnODIrak5ZrPiPyB6A==", + "license": "MIT", + "dependencies": { + "append-field": "^1.0.0", + "busboy": "^1.6.0", + "concat-stream": "^2.0.0", + "type-is": "^1.6.18" + }, + "engines": { + "node": ">= 10.16.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, + "node_modules/multer/node_modules/media-typer": { + "version": "0.3.0", + "resolved": "https://registry.npmjs.org/media-typer/-/media-typer-0.3.0.tgz", + "integrity": "sha512-dq+qelQ9akHpcOl/gUVRTxVIOkAJ1wR3QAvb4RsVjS8oVoFjDGTc679wJYmUmknUF5HwMLOgb5O+a3KxfWapPQ==", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/multer/node_modules/mime-db": { + "version": "1.52.0", + "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz", + "integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/multer/node_modules/mime-types": { + "version": "2.1.35", + "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz", + "integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==", + "license": "MIT", + "dependencies": { + "mime-db": "1.52.0" + }, + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/multer/node_modules/type-is": { + "version": "1.6.18", + "resolved": "https://registry.npmjs.org/type-is/-/type-is-1.6.18.tgz", + "integrity": "sha512-TkRKr9sUTxEH8MdfuCSP7VizJyzRNMjj2J2do2Jr3Kym598JVdEksuzPQCnlFPW4ky9Q+iA+ma9BGm06XQBy8g==", + "license": "MIT", + "dependencies": { + "media-typer": "0.3.0", + "mime-types": "~2.1.24" + }, + "engines": { + "node": ">= 0.6" + } + }, "node_modules/nanoid": { "version": "3.3.11", "resolved": "https://registry.npmjs.org/nanoid/-/nanoid-3.3.11.tgz", @@ -2547,6 +2849,27 @@ "wrappy": "1" } }, + "node_modules/openai": { + "version": "6.27.0", + "resolved": "https://registry.npmjs.org/openai/-/openai-6.27.0.tgz", + "integrity": "sha512-osTKySlrdYrLYTt0zjhY8yp0JUBmWDCN+Q+QxsV4xMQnnoVFpylgKGgxwN8sSdTNw0G4y+WUXs4eCMWpyDNWZQ==", + "license": "Apache-2.0", + "bin": { + "openai": "bin/cli" + }, + "peerDependencies": { + "ws": "^8.18.0", + "zod": "^3.25 || ^4.0" + }, + "peerDependenciesMeta": { + "ws": { + "optional": true + }, + "zod": { + "optional": true + } + } + }, "node_modules/parseurl": { "version": "1.3.3", "resolved": "https://registry.npmjs.org/parseurl/-/parseurl-1.3.3.tgz", @@ -2566,11 +2889,44 @@ "url": "https://opencollective.com/express" } }, + "node_modules/pdf-parse": { + "version": "2.4.5", + "resolved": "https://registry.npmjs.org/pdf-parse/-/pdf-parse-2.4.5.tgz", + "integrity": "sha512-mHU89HGh7v+4u2ubfnevJ03lmPgQ5WU4CxAVmTSh/sxVTEDYd1er/dKS/A6vg77NX47KTEoihq8jZBLr8Cxuwg==", + "license": "Apache-2.0", + "dependencies": { + "@napi-rs/canvas": "0.1.80", + "pdfjs-dist": "5.4.296" + }, + "bin": { + "pdf-parse": "bin/cli.mjs" + }, + "engines": { + "node": ">=20.16.0 <21 || >=22.3.0" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/mehmet-kozan" + } + }, + "node_modules/pdfjs-dist": { + "version": "5.4.296", + "resolved": "https://registry.npmjs.org/pdfjs-dist/-/pdfjs-dist-5.4.296.tgz", + "integrity": "sha512-DlOzet0HO7OEnmUmB6wWGJrrdvbyJKftI1bhMitK7O2N8W2gc757yyYBbINy9IDafXAV9wmKr9t7xsTaNKRG5Q==", + "license": "Apache-2.0", + "engines": { + "node": ">=20.16.0 || >=22.3.0" + }, + "optionalDependencies": { + "@napi-rs/canvas": "^0.1.80" + } + }, "node_modules/pg": { "version": "8.19.0", "resolved": "https://registry.npmjs.org/pg/-/pg-8.19.0.tgz", "integrity": "sha512-QIcLGi508BAHkQ3pJNptsFz5WQMlpGbuBGBaIaXsWK8mel2kQ/rThYI+DbgjUvZrIr7MiuEuc9LcChJoEZK1xQ==", "license": "MIT", + "peer": true, "dependencies": { "pg-connection-string": "^2.11.0", "pg-pool": "^3.12.0", @@ -2668,6 +3024,7 @@ "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==", "dev": true, "license": "MIT", + "peer": true, "engines": { "node": ">=12" }, @@ -2817,6 +3174,7 @@ "resolved": "https://registry.npmjs.org/react/-/react-19.2.4.tgz", "integrity": "sha512-9nfp2hYpCwOjAN+8TZFGhtWEwgvWHXqESH8qT89AT/lWklpLON22Lc8pEtnpsZz7VmawabSU0gCjnj8aC0euHQ==", "license": "MIT", + "peer": true, "engines": { "node": ">=0.10.0" } @@ -2826,6 +3184,7 @@ "resolved": "https://registry.npmjs.org/react-dom/-/react-dom-19.2.4.tgz", "integrity": "sha512-AXJdLo8kgMbimY95O2aKQqsz2iWi9jMgKJhRBAxECE4IFxfcazB2LmzloIoibJI3C12IlY20+KFaLv+71bUJeQ==", "license": "MIT", + "peer": true, "dependencies": { "scheduler": "^0.27.0" }, @@ -2931,6 +3290,20 @@ "react-dom": ">=16.6.0" } }, + "node_modules/readable-stream": { + "version": "3.6.2", + "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.2.tgz", + "integrity": "sha512-9u/sniCrY3D5WdsERHzHE4G2YCXqoG5FTHUiCC4SIbr6XcLZBY05ya9EKjYek9O5xOAwjGq+1JdGBAS7Q9ScoA==", + "license": "MIT", + "dependencies": { + "inherits": "^2.0.3", + "string_decoder": "^1.1.1", + "util-deprecate": "^1.0.1" + }, + "engines": { + "node": ">= 6" + } + }, "node_modules/recharts": { "version": "2.15.4", "resolved": "https://registry.npmjs.org/recharts/-/recharts-2.15.4.tgz", @@ -3034,6 +3407,26 @@ "node": ">= 18" } }, + "node_modules/safe-buffer": { + "version": "5.2.1", + "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz", + "integrity": "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "license": "MIT" + }, "node_modules/safer-buffer": { "version": "2.1.2", "resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz", @@ -3213,6 +3606,23 @@ "node": ">= 0.8" } }, + "node_modules/streamsearch": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/streamsearch/-/streamsearch-1.1.0.tgz", + "integrity": "sha512-Mcc5wHehp9aXz1ax6bZUyY5afg9u2rv5cqQI3mRrYkGC8rW2hM02jWuwjtL++LS5qinSyhj2QfLyNsuc+VsExg==", + "engines": { + "node": ">=10.0.0" + } + }, + "node_modules/string_decoder": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.3.0.tgz", + "integrity": "sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA==", + "license": "MIT", + "dependencies": { + "safe-buffer": "~5.2.0" + } + }, "node_modules/tiny-invariant": { "version": "1.3.3", "resolved": "https://registry.npmjs.org/tiny-invariant/-/tiny-invariant-1.3.3.tgz", @@ -3278,7 +3688,6 @@ "os": [ "aix" ], - "peer": true, "engines": { "node": ">=18" } @@ -3296,7 +3705,6 @@ "os": [ "android" ], - "peer": true, "engines": { "node": ">=18" } @@ -3314,7 +3722,6 @@ "os": [ "android" ], - "peer": true, "engines": { "node": ">=18" } @@ -3332,7 +3739,6 @@ "os": [ "android" ], - "peer": true, "engines": { "node": ">=18" } @@ -3350,7 +3756,6 @@ "os": [ "darwin" ], - "peer": true, "engines": { "node": ">=18" } @@ -3368,7 +3773,6 @@ "os": [ "darwin" ], - "peer": true, "engines": { "node": ">=18" } @@ -3386,7 +3790,6 @@ "os": [ "freebsd" ], - "peer": true, "engines": { "node": ">=18" } @@ -3404,7 +3807,6 @@ "os": [ "freebsd" ], - "peer": true, "engines": { "node": ">=18" } @@ -3422,7 +3824,6 @@ "os": [ "linux" ], - "peer": true, "engines": { "node": ">=18" } @@ -3440,7 +3841,6 @@ "os": [ "linux" ], - "peer": true, "engines": { "node": ">=18" } @@ -3458,7 +3858,6 @@ "os": [ "linux" ], - "peer": true, "engines": { "node": ">=18" } @@ -3476,7 +3875,6 @@ "os": [ "linux" ], - "peer": true, "engines": { "node": ">=18" } @@ -3494,7 +3892,6 @@ "os": [ "linux" ], - "peer": true, "engines": { "node": ">=18" } @@ -3512,7 +3909,6 @@ "os": [ "linux" ], - "peer": true, "engines": { "node": ">=18" } @@ -3530,7 +3926,6 @@ "os": [ "linux" ], - "peer": true, "engines": { "node": ">=18" } @@ -3548,7 +3943,6 @@ "os": [ "linux" ], - "peer": true, "engines": { "node": ">=18" } @@ -3566,7 +3960,6 @@ "os": [ "linux" ], - "peer": true, "engines": { "node": ">=18" } @@ -3584,7 +3977,6 @@ "os": [ "netbsd" ], - "peer": true, "engines": { "node": ">=18" } @@ -3602,7 +3994,6 @@ "os": [ "netbsd" ], - "peer": true, "engines": { "node": ">=18" } @@ -3620,7 +4011,6 @@ "os": [ "openbsd" ], - "peer": true, "engines": { "node": ">=18" } @@ -3638,7 +4028,6 @@ "os": [ "openbsd" ], - "peer": true, "engines": { "node": ">=18" } @@ -3656,7 +4045,6 @@ "os": [ "openharmony" ], - "peer": true, "engines": { "node": ">=18" } @@ -3674,7 +4062,6 @@ "os": [ "sunos" ], - "peer": true, "engines": { "node": ">=18" } @@ -3692,7 +4079,6 @@ "os": [ "win32" ], - "peer": true, "engines": { "node": ">=18" } @@ -3710,7 +4096,6 @@ "os": [ "win32" ], - "peer": true, "engines": { "node": ">=18" } @@ -3728,7 +4113,6 @@ "os": [ "win32" ], - "peer": true, "engines": { "node": ">=18" } @@ -3789,6 +4173,12 @@ "node": ">= 0.6" } }, + "node_modules/typedarray": { + "version": "0.0.6", + "resolved": "https://registry.npmjs.org/typedarray/-/typedarray-0.0.6.tgz", + "integrity": "sha512-/aCDEGatGvZ2BIk+HmLf4ifCJFwvKFNb9/JeZPMulfgFracn9QFcAf5GO8B/mweUjSoblS5In0cWhqpfs/5PQA==", + "license": "MIT" + }, "node_modules/typescript": { "version": "5.9.3", "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.9.3.tgz", @@ -3850,6 +4240,12 @@ "browserslist": ">= 4.21.0" } }, + "node_modules/util-deprecate": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz", + "integrity": "sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw==", + "license": "MIT" + }, "node_modules/uuid": { "version": "13.0.0", "resolved": "https://registry.npmjs.org/uuid/-/uuid-13.0.0.tgz", @@ -3900,6 +4296,7 @@ "integrity": "sha512-+Oxm7q9hDoLMyJOYfUYBuHQo+dkAloi33apOPP56pzj+vsdJDzr+j1NISE5pyaAuKL4A3UD34qd0lx5+kfKp2g==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "esbuild": "^0.25.0", "fdir": "^6.4.4", diff --git a/pdf2json.md b/pdf2json.md index 026e164..648cd55 100644 --- a/pdf2json.md +++ b/pdf2json.md @@ -1,5 +1,3 @@ -# Промпт для конвертации PDF банковской выписки в JSON - Ты — конвертер банковских выписок. Твоя задача: извлечь данные из прикреплённого PDF банковской выписки и вернуть строго один валидный JSON-объект в формате ниже. Никакого текста до или после JSON, только сам объект. ## Структура выходного JSON -- 2.49.1