From 21ac3deff4c35eb73205d487a5e4e8ce9ae1da1e Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Tue, 20 Jan 2026 16:31:59 +0100 Subject: [PATCH 1/3] :sparkles: add better error support --- src/errors/mindeeError.ts | 10 ++++ src/input/sources/inputSource.ts | 4 +- src/input/sources/localInputSource.ts | 9 +-- src/input/sources/streamInput.ts | 37 +++++++++--- src/input/sources/urlInput.ts | 81 +++++++++++++++++++++------ tests/input/sources.spec.ts | 10 +++- 6 files changed, 118 insertions(+), 33 deletions(-) diff --git a/src/errors/mindeeError.ts b/src/errors/mindeeError.ts index 53c90ff6..1494d8b9 100644 --- a/src/errors/mindeeError.ts +++ b/src/errors/mindeeError.ts @@ -10,6 +10,16 @@ export class MindeeError extends Error { } } +/** + * Custom Mindee error relating to improper inputs. + */ +export class MindeeInputError extends MindeeError { + constructor(message: string) { + super(message); + this.name = "MindeeInputError"; + } +} + /** * Custom Mindee error relating to improper mimetypes in inputs. */ diff --git a/src/input/sources/inputSource.ts b/src/input/sources/inputSource.ts index 5513e38d..2ff07934 100644 --- a/src/input/sources/inputSource.ts +++ b/src/input/sources/inputSource.ts @@ -1,3 +1,5 @@ +import { MindeeInputError } from "../../errors/mindeeError"; + /** * @param {string} inputType - the type of input used in file ("base64", "path", "dummy"). * NB: dummy is only used for tests purposes @@ -17,7 +19,7 @@ export abstract class InputSource { protected initialized: boolean = false; async init() { - throw new Error("not Implemented"); + throw new MindeeInputError("not Implemented"); } public isInitialized() { diff --git a/src/input/sources/localInputSource.ts b/src/input/sources/localInputSource.ts index 203b6c39..b4b20424 100644 --- a/src/input/sources/localInputSource.ts +++ b/src/input/sources/localInputSource.ts @@ -14,6 +14,7 @@ import { INPUT_TYPE_BYTES, INPUT_TYPE_PATH, INPUT_TYPE_BUFFER } from "./inputSource"; +import { MindeeInputError } from "../../errors/mindeeError"; export const MIMETYPES = new Map([ [".pdf", "application/pdf"], @@ -49,7 +50,7 @@ export abstract class LocalInputSource extends InputSource { if (!ALLOWED_INPUT_TYPES.includes(inputType)) { const allowed = Array.from(ALLOWED_INPUT_TYPES.keys()).join(", "); errorHandler.throw( - new Error(`Invalid input type, must be one of ${allowed}.`) + new MindeeInputError(`Invalid input type, must be one of ${allowed}.`) ); } this.inputType = inputType; @@ -58,7 +59,7 @@ export abstract class LocalInputSource extends InputSource { protected async checkMimetype(): Promise { if (!(this.fileObject instanceof Buffer)) { - throw new Error( + throw new MindeeInputError( `MIME type cannot be verified on input source of type ${this.inputType}.` ); } @@ -76,7 +77,7 @@ export abstract class LocalInputSource extends InputSource { } if (!mimeType) { const allowed = Array.from(MIMETYPES.keys()).join(", "); - const err = new Error(`Invalid file type, must be one of ${allowed}.`); + const err = new MindeeInputError(`Invalid file type, must be one of ${allowed}.`); errorHandler.throw(err); } logger.debug(`File is of type: ${mimeType}`); @@ -101,7 +102,7 @@ export abstract class LocalInputSource extends InputSource { */ isPdf(): boolean { if (!this.initialized) { - throw new Error( + throw new MindeeInputError( "The `init()` method must be called before calling `isPdf()`." ); } diff --git a/src/input/sources/streamInput.ts b/src/input/sources/streamInput.ts index 71037612..63cca21b 100644 --- a/src/input/sources/streamInput.ts +++ b/src/input/sources/streamInput.ts @@ -2,7 +2,7 @@ import { Readable } from "stream"; import { LocalInputSource } from "./localInputSource"; import { INPUT_TYPE_STREAM } from "./inputSource"; import { logger } from "../../logger"; -import { MindeeError } from "../../errors"; +import { MindeeInputError } from "../../errors/mindeeError"; interface StreamInputProps { inputStream: Readable; @@ -21,27 +21,50 @@ export class StreamInput extends LocalInputSource { this.inputStream = inputStream; } - async init() { + async init(signal? : AbortSignal) { if (this.initialized) { return; } logger.debug("Loading from stream"); - this.fileObject = await this.stream2buffer(this.inputStream); + this.fileObject = await this.stream2buffer(this.inputStream, signal); this.mimeType = await this.checkMimetype(); this.initialized = true; } - async stream2buffer(stream: Readable): Promise { + async stream2buffer(stream: Readable, signal?: AbortSignal): Promise { return new Promise((resolve, reject) => { if (stream.closed || stream.destroyed) { - return reject(new MindeeError("Stream is already closed")); + return reject(new MindeeInputError("Stream is already closed")); } + if (signal?.aborted) { + return reject(new MindeeInputError("Operation aborted")); + } + + const onAbort = () => { + stream.destroy(); + reject(new MindeeInputError("Operation aborted")); + }; + + if (signal) { + signal.addEventListener("abort", onAbort, { once: true }); + } + + + const cleanup = () => { + signal?.removeEventListener("abort", onAbort); + }; const _buf: Buffer[] = []; stream.pause(); stream.on("data", (chunk) => _buf.push(chunk)); - stream.on("end", () => resolve(Buffer.concat(_buf))); - stream.on("error", (err) => reject(new Error(`Error converting stream - ${err}`))); + stream.on("end", () => { + cleanup(); + resolve(Buffer.concat(_buf)); + }); + stream.on("error", (err) => { + cleanup(); + reject(new MindeeInputError(`Error converting stream - ${err}`)); + }); stream.resume(); }); } diff --git a/src/input/sources/urlInput.ts b/src/input/sources/urlInput.ts index 708fcd16..fe60460c 100644 --- a/src/input/sources/urlInput.ts +++ b/src/input/sources/urlInput.ts @@ -7,22 +7,27 @@ import { request as httpsRequest } from "https"; import { IncomingMessage } from "http"; import { BytesInput } from "./bytesInput"; import { logger } from "../../logger"; +import { MindeeInputError } from "../../errors/mindeeError"; export class UrlInput extends InputSource { public readonly url: string; + private signal?: AbortSignal; - constructor({ url }: { url: string }) { + constructor({ url, signal }: { url: string, signal?: AbortSignal}) { super(); this.url = url; + this.signal = signal; } - async init() { + async init(signal?: AbortSignal) { if (this.initialized) { return; } + this.signal = signal ?? this.signal; + logger.debug(`source URL: ${this.url}`); if (!this.url.toLowerCase().startsWith("https")) { - throw new Error("URL must be HTTPS"); + throw new MindeeInputError("URL must be HTTPS"); } this.fileObject = this.url; this.initialized = true; @@ -34,8 +39,9 @@ export class UrlInput extends InputSource { token?: string; headers?: Record; maxRedirects?: number; + signal?: AbortSignal; }): Promise<{ content: Buffer; finalUrl: string }> { - const { username, password, token, headers = {}, maxRedirects = 3 } = options; + const { username, password, token, headers = {}, maxRedirects = 3, signal } = options; if (token) { headers["Authorization"] = `Bearer ${token}`; @@ -43,7 +49,7 @@ export class UrlInput extends InputSource { const auth = username && password ? `${username}:${password}` : undefined; - return await this.makeRequest(this.url, auth, headers, 0, maxRedirects); + return await this.makeRequest(this.url, auth, headers, 0, maxRedirects, signal); } async saveToFile(options: { @@ -54,9 +60,11 @@ export class UrlInput extends InputSource { token?: string; headers?: Record; maxRedirects?: number; + signal?: AbortSignal; }): Promise { - const { filepath, filename, ...fetchOptions } = options; - const { content, finalUrl } = await this.fetchFileContent(fetchOptions); + const { filepath, filename, signal, ...fetchOptions } = options; + const effectiveSignal = signal ?? this.signal; + const { content, finalUrl } = await this.fetchFileContent({ ...fetchOptions, signal: effectiveSignal }); const finalFilename = this.fillFilename(filename, finalUrl); const fullPath = `${filepath}/${finalFilename}`; await writeFile(fullPath, content); @@ -70,9 +78,11 @@ export class UrlInput extends InputSource { token?: string; headers?: Record; maxRedirects?: number; + signal?: AbortSignal; } = {}): Promise { - const { filename, ...fetchOptions } = options; - const { content, finalUrl } = await this.fetchFileContent(fetchOptions); + const { filename, signal, ...fetchOptions } = options; + const effectiveSignal = signal ?? this.signal; + const { content, finalUrl } = await this.fetchFileContent({ ...fetchOptions, signal: effectiveSignal }); const finalFilename = this.fillFilename(filename, finalUrl); return new BytesInput({ inputBytes: content, filename: finalFilename }); } @@ -111,8 +121,13 @@ export class UrlInput extends InputSource { auth: string | undefined, headers: Record, redirects: number, - maxRedirects: number + maxRedirects: number, + signal?: AbortSignal ): Promise<{ content: Buffer; finalUrl: string }> { + if (signal?.aborted) { + throw new MindeeInputError("Operation aborted"); + } + const parsedUrl = new URL(url); const options = { hostname: parsedUrl.hostname, @@ -123,28 +138,58 @@ export class UrlInput extends InputSource { }; const response = await new Promise((resolve, reject) => { - const req = httpsRequest(options, resolve); - req.on("error", reject); + if (signal?.aborted) { + return reject(new MindeeInputError("Operation aborted")); + } + + const onAbort = () => { + req.destroy(); + reject(new MindeeInputError("Operation aborted")); + }; + + if (signal) { + signal.addEventListener("abort", onAbort, { once: true }); + } + + const req = httpsRequest(options, (res) => { + signal?.removeEventListener("abort", onAbort); + resolve(res); + }); + req.on("error", (err) => { + signal?.removeEventListener("abort", onAbort); + reject(err); + }); req.end(); }); if (response.statusCode && response.statusCode >= 300 && response.statusCode < 400) { if (redirects === maxRedirects) { - throw new Error(`Can't reach URL after ${redirects} out of ${maxRedirects} redirects, aborting operation.`); + throw new MindeeInputError( + `Can't reach URL after ${redirects} out of ${maxRedirects} redirects, aborting operation.` + ); } if (response.headers.location) { - return await this.makeRequest(response.headers.location, auth, headers, redirects + 1, maxRedirects); + return await this.makeRequest(response.headers.location, auth, headers, redirects + 1, maxRedirects, signal); } - throw new Error("Redirect location not found"); + throw new MindeeInputError("Redirect location not found"); } if (!response.statusCode || response.statusCode >= 400 || response.statusCode < 200) { - throw new Error(`Couldn't retrieve file from server, error code ${response.statusCode}.`); + throw new MindeeInputError(`Couldn't retrieve file from server, error code ${response.statusCode}.`); } const chunks: Buffer[] = []; - for await (const chunk of response) { - chunks.push(chunk); + try { + for await (const chunk of response) { + if (signal?.aborted) { + response.destroy(); + throw new MindeeInputError("Operation aborted"); + } + chunks.push(chunk); + } + } catch (err) { + response.destroy(); + throw err; } return { content: Buffer.concat(chunks), finalUrl: url }; } diff --git a/tests/input/sources.spec.ts b/tests/input/sources.spec.ts index 39458629..31ebb2f8 100644 --- a/tests/input/sources.spec.ts +++ b/tests/input/sources.spec.ts @@ -21,6 +21,7 @@ import { extractTextFromPdf } from "../../src/pdf/pdfUtils"; import { logger } from "../../src/logger"; import { RESOURCE_PATH, V1_PRODUCT_PATH } from "../index"; import { Readable } from "stream"; +import { MindeeInputError } from "../../src/errors/mindeeError"; describe("Test different types of input", () => { const outputPath = path.join(RESOURCE_PATH, "output"); @@ -155,7 +156,8 @@ describe("Test different types of input", () => { await streamInput.init(); expect.fail("Should have thrown an error"); } catch (e: any) { - expect(e.toString()).to.eq("Error: Error converting stream - Error: aborted"); + expect(e).to.be.instanceOf(MindeeInputError); + expect(e.message).to.equal("Error converting stream - Error: aborted"); } }); @@ -174,7 +176,8 @@ describe("Test different types of input", () => { await streamInput.init(); expect.fail("Should have thrown an error"); } catch (e: any) { - expect(e.toString()).to.equal("MindeeError: Stream is already closed"); + expect(e).to.be.instanceOf(MindeeInputError); + expect(e.message).to.equal("Stream is already closed"); } }); @@ -200,7 +203,8 @@ describe("Test different types of input", () => { try { await streamInput.init(); } catch (e: any) { - expect(e.toString()).to.eq("Error: Error converting stream - Error: aborted"); + expect(e).to.be.instanceOf(MindeeInputError); + expect(e.message).to.equal("Error converting stream - Error: aborted"); } }); From ee6a0533d19d4fc562d42115db1bedb7d4ebc07b Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Tue, 20 Jan 2026 16:41:36 +0100 Subject: [PATCH 2/3] add better tests --- tests/input/sources.spec.ts | 21 +++++++ tests/input/urlInputSource.spec.ts | 91 +++++++++++++++++++++++++++++- 2 files changed, 111 insertions(+), 1 deletion(-) diff --git a/tests/input/sources.spec.ts b/tests/input/sources.spec.ts index 31ebb2f8..15c3c554 100644 --- a/tests/input/sources.spec.ts +++ b/tests/input/sources.spec.ts @@ -208,6 +208,27 @@ describe("Test different types of input", () => { } }); + it("should handle AbortSignal on streams via init()", async () => { + const filePath = path.join(V1_PRODUCT_PATH, "expense_receipts/default_sample.jpg"); + const stream = fs.createReadStream(filePath); + const controller = new AbortController(); + + const streamInput = new StreamInput({ + inputStream: stream, + filename: "aborted.jpg" + }); + + controller.abort(); + + try { + await streamInput.init(controller.signal); + expect.fail("Should have thrown an error"); + } catch (e: any) { + expect(e).to.be.instanceOf(MindeeInputError); + expect(e.message).to.equal("Operation aborted"); + } + }); + it("should accept raw bytes", async () => { const filePath = path.join(V1_PRODUCT_PATH, "expense_receipts/default_sample.jpg"); const inputBytes = await fs.promises.readFile(filePath); diff --git a/tests/input/urlInputSource.spec.ts b/tests/input/urlInputSource.spec.ts index d474ab8c..545f05be 100644 --- a/tests/input/urlInputSource.spec.ts +++ b/tests/input/urlInputSource.spec.ts @@ -2,6 +2,7 @@ import { BytesInput, UrlInput } from "../../src"; import { LocalInputSource } from "../../src/input"; import { expect } from "chai"; import nock from "nock"; +import { MindeeInputError } from "../../src/errors/mindeeError"; describe("Test URL input source", () => { describe("initializing", () => { @@ -62,7 +63,7 @@ describe("Test URL input source", () => { nock("https://example.com") .get("/original.pdf") - .reply(302, "", { location: redirectUrl }); // Not sure about that one. + .reply(302, "", { location: redirectUrl }); nock("https://example.com") .get("/redirected.pdf") @@ -129,6 +130,94 @@ describe("Test URL input source", () => { ); } }); + + it("should handle AbortSignal via constructor", async () => { + const url = "https://example.com/file.pdf"; + const fileContent = Buffer.from("dummy PDF content"); + const controller = new AbortController(); + + nock("https://example.com") + .get("/file.pdf") + .reply(200, fileContent); + + const urlInput = new UrlInput({ url, signal: controller.signal }); + + controller.abort(); + + try { + await urlInput.asLocalInputSource(); + expect.fail("Expected an error to be thrown"); + } catch (error) { + expect(error).to.be.instanceOf(Error); + expect((error as Error).message).to.equal("Operation aborted"); + } + }); + + it("should handle AbortSignal via asLocalInputSource options", async () => { + const url = "https://example.com/file.pdf"; + const fileContent = Buffer.from("dummy PDF content"); + const controller = new AbortController(); + + nock("https://example.com") + .get("/file.pdf") + .reply(200, fileContent); + + const urlInput = new UrlInput({ url }); + + controller.abort(); + + try { + await urlInput.asLocalInputSource({ signal: controller.signal }); + expect.fail("Expected an error to be thrown"); + } catch (e: any) { + expect(e).to.be.instanceOf(MindeeInputError); + expect((e as Error).message).to.equal("Operation aborted"); + } + }); + + it("should prefer asLocalInputSource signal over constructor signal", async () => { + const url = "https://example.com/file.pdf"; + const fileContent = Buffer.from("dummy PDF content"); + const constructorController = new AbortController(); + const optionsController = new AbortController(); + + nock("https://example.com") + .get("/file.pdf") + .reply(200, fileContent); + + const urlInput = new UrlInput({ url, signal: constructorController.signal }); + + optionsController.abort(); + + try { + await urlInput.asLocalInputSource({ signal: optionsController.signal }); + expect.fail("Expected an error to be thrown"); + } catch (e: any) { + expect(e).to.be.instanceOf(MindeeInputError); + expect((e as Error).message).to.equal("Operation aborted"); + } + }); + + it("should handle AbortSignal during download (slow response)", async () => { + const url = "https://example.com/largefile.pdf"; + const controller = new AbortController(); + nock("https://example.com") + .get("/largefile.pdf") + .delayBody(100) + .reply(200, Buffer.alloc(1000000)); + + const urlInput = new UrlInput({ url, signal: controller.signal }); + + setTimeout(() => controller.abort(), 10); + + try { + await urlInput.asLocalInputSource(); + expect.fail("Expected an error to be thrown"); + } catch (error) { + expect(error).to.be.instanceOf(Error); + expect((error as Error).message).to.equal("Operation aborted"); + } + }); }); }); }); From 32c5d9562ea349ae51505463fb7ffb80ad6167b4 Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Tue, 20 Jan 2026 16:42:54 +0100 Subject: [PATCH 3/3] bump dependency --- package-lock.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/package-lock.json b/package-lock.json index cb60284e..e80a3a4b 100644 --- a/package-lock.json +++ b/package-lock.json @@ -3690,9 +3690,9 @@ } }, "node_modules/ts-node/node_modules/diff": { - "version": "4.0.2", - "resolved": "https://registry.npmjs.org/diff/-/diff-4.0.2.tgz", - "integrity": "sha512-58lmxKSA4BNyLz+HHMUzlOEpg09FV+ev6ZMe3vJihgdxzgcwZ8VoEEPmALCZG9LmqfVoNMMKpttIYTVG6uDY7A==", + "version": "4.0.4", + "resolved": "https://registry.npmjs.org/diff/-/diff-4.0.4.tgz", + "integrity": "sha512-X07nttJQkwkfKfvTPG/KSnE2OMdcUCao6+eXF3wmnIQRn2aPAHH3VxDbDOdegkd6JbPsXqShpvEOHfAT+nCNwQ==", "dev": true, "license": "BSD-3-Clause", "engines": {