Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 10 additions & 0 deletions src/errors/mindeeError.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,16 @@ export class MindeeError extends Error {
}
}

/**
* Custom Mindee error relating to improper inputs.
*/
export class MindeeInputError extends MindeeError {
constructor(message: string) {
super(message);
this.name = "MindeeInputError";
}
}

/**
* Custom Mindee error relating to improper mimetypes in inputs.
*/
Expand Down
4 changes: 3 additions & 1 deletion src/input/sources/inputSource.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import { MindeeInputError } from "../../errors/mindeeError";

/**
* @param {string} inputType - the type of input used in file ("base64", "path", "dummy").
* NB: dummy is only used for tests purposes
Expand All @@ -17,7 +19,7 @@ export abstract class InputSource {
protected initialized: boolean = false;

async init() {
throw new Error("not Implemented");
throw new MindeeInputError("not Implemented");
}

public isInitialized() {
Expand Down
9 changes: 5 additions & 4 deletions src/input/sources/localInputSource.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import {
INPUT_TYPE_BYTES,
INPUT_TYPE_PATH, INPUT_TYPE_BUFFER
} from "./inputSource";
import { MindeeInputError } from "../../errors/mindeeError";

export const MIMETYPES = new Map<string, string>([
[".pdf", "application/pdf"],
Expand Down Expand Up @@ -49,7 +50,7 @@ export abstract class LocalInputSource extends InputSource {
if (!ALLOWED_INPUT_TYPES.includes(inputType)) {
const allowed = Array.from(ALLOWED_INPUT_TYPES.keys()).join(", ");
errorHandler.throw(
new Error(`Invalid input type, must be one of ${allowed}.`)
new MindeeInputError(`Invalid input type, must be one of ${allowed}.`)
);
}
this.inputType = inputType;
Expand All @@ -58,7 +59,7 @@ export abstract class LocalInputSource extends InputSource {

protected async checkMimetype(): Promise<string> {
if (!(this.fileObject instanceof Buffer)) {
throw new Error(
throw new MindeeInputError(
`MIME type cannot be verified on input source of type ${this.inputType}.`
);
}
Expand All @@ -76,7 +77,7 @@ export abstract class LocalInputSource extends InputSource {
}
if (!mimeType) {
const allowed = Array.from(MIMETYPES.keys()).join(", ");
const err = new Error(`Invalid file type, must be one of ${allowed}.`);
const err = new MindeeInputError(`Invalid file type, must be one of ${allowed}.`);
errorHandler.throw(err);
}
logger.debug(`File is of type: ${mimeType}`);
Expand All @@ -101,7 +102,7 @@ export abstract class LocalInputSource extends InputSource {
*/
isPdf(): boolean {
if (!this.initialized) {
throw new Error(
throw new MindeeInputError(
"The `init()` method must be called before calling `isPdf()`."
);
}
Expand Down
37 changes: 30 additions & 7 deletions src/input/sources/streamInput.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import { Readable } from "stream";
import { LocalInputSource } from "./localInputSource";
import { INPUT_TYPE_STREAM } from "./inputSource";
import { logger } from "../../logger";
import { MindeeError } from "../../errors";
import { MindeeInputError } from "../../errors/mindeeError";

interface StreamInputProps {
inputStream: Readable;
Expand All @@ -21,27 +21,50 @@ export class StreamInput extends LocalInputSource {
this.inputStream = inputStream;
}

async init() {
async init(signal? : AbortSignal) {
if (this.initialized) {
return;
}
logger.debug("Loading from stream");
this.fileObject = await this.stream2buffer(this.inputStream);
this.fileObject = await this.stream2buffer(this.inputStream, signal);
this.mimeType = await this.checkMimetype();
this.initialized = true;
}

async stream2buffer(stream: Readable): Promise<Buffer> {
async stream2buffer(stream: Readable, signal?: AbortSignal): Promise<Buffer> {
return new Promise<Buffer>((resolve, reject) => {
if (stream.closed || stream.destroyed) {
return reject(new MindeeError("Stream is already closed"));
return reject(new MindeeInputError("Stream is already closed"));
}
if (signal?.aborted) {
return reject(new MindeeInputError("Operation aborted"));
}

const onAbort = () => {
stream.destroy();
reject(new MindeeInputError("Operation aborted"));
};

if (signal) {
signal.addEventListener("abort", onAbort, { once: true });
}


const cleanup = () => {
signal?.removeEventListener("abort", onAbort);
};

const _buf: Buffer[] = [];
stream.pause();
stream.on("data", (chunk) => _buf.push(chunk));
stream.on("end", () => resolve(Buffer.concat(_buf)));
stream.on("error", (err) => reject(new Error(`Error converting stream - ${err}`)));
stream.on("end", () => {
cleanup();
resolve(Buffer.concat(_buf));
});
stream.on("error", (err) => {
cleanup();
reject(new MindeeInputError(`Error converting stream - ${err}`));
});
stream.resume();
});
}
Expand Down
81 changes: 63 additions & 18 deletions src/input/sources/urlInput.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,22 +7,27 @@ import { request as httpsRequest } from "https";
import { IncomingMessage } from "http";
import { BytesInput } from "./bytesInput";
import { logger } from "../../logger";
import { MindeeInputError } from "../../errors/mindeeError";

export class UrlInput extends InputSource {
public readonly url: string;
private signal?: AbortSignal;

constructor({ url }: { url: string }) {
constructor({ url, signal }: { url: string, signal?: AbortSignal}) {
super();
this.url = url;
this.signal = signal;
}

async init() {
async init(signal?: AbortSignal) {
if (this.initialized) {
return;
}
this.signal = signal ?? this.signal;

logger.debug(`source URL: ${this.url}`);
if (!this.url.toLowerCase().startsWith("https")) {
throw new Error("URL must be HTTPS");
throw new MindeeInputError("URL must be HTTPS");
}
this.fileObject = this.url;
this.initialized = true;
Expand All @@ -34,16 +39,17 @@ export class UrlInput extends InputSource {
token?: string;
headers?: Record<string, string>;
maxRedirects?: number;
signal?: AbortSignal;
}): Promise<{ content: Buffer; finalUrl: string }> {
const { username, password, token, headers = {}, maxRedirects = 3 } = options;
const { username, password, token, headers = {}, maxRedirects = 3, signal } = options;

if (token) {
headers["Authorization"] = `Bearer ${token}`;
}

const auth = username && password ? `${username}:${password}` : undefined;

return await this.makeRequest(this.url, auth, headers, 0, maxRedirects);
return await this.makeRequest(this.url, auth, headers, 0, maxRedirects, signal);
}

async saveToFile(options: {
Expand All @@ -54,9 +60,11 @@ export class UrlInput extends InputSource {
token?: string;
headers?: Record<string, string>;
maxRedirects?: number;
signal?: AbortSignal;
}): Promise<string> {
const { filepath, filename, ...fetchOptions } = options;
const { content, finalUrl } = await this.fetchFileContent(fetchOptions);
const { filepath, filename, signal, ...fetchOptions } = options;
const effectiveSignal = signal ?? this.signal;
const { content, finalUrl } = await this.fetchFileContent({ ...fetchOptions, signal: effectiveSignal });
const finalFilename = this.fillFilename(filename, finalUrl);
const fullPath = `${filepath}/${finalFilename}`;
await writeFile(fullPath, content);
Expand All @@ -70,9 +78,11 @@ export class UrlInput extends InputSource {
token?: string;
headers?: Record<string, string>;
maxRedirects?: number;
signal?: AbortSignal;
} = {}): Promise<BytesInput> {
const { filename, ...fetchOptions } = options;
const { content, finalUrl } = await this.fetchFileContent(fetchOptions);
const { filename, signal, ...fetchOptions } = options;
const effectiveSignal = signal ?? this.signal;
const { content, finalUrl } = await this.fetchFileContent({ ...fetchOptions, signal: effectiveSignal });
const finalFilename = this.fillFilename(filename, finalUrl);
return new BytesInput({ inputBytes: content, filename: finalFilename });
}
Expand Down Expand Up @@ -111,8 +121,13 @@ export class UrlInput extends InputSource {
auth: string | undefined,
headers: Record<string, string>,
redirects: number,
maxRedirects: number
maxRedirects: number,
signal?: AbortSignal
): Promise<{ content: Buffer; finalUrl: string }> {
if (signal?.aborted) {
throw new MindeeInputError("Operation aborted");
}

const parsedUrl = new URL(url);
const options = {
hostname: parsedUrl.hostname,
Expand All @@ -123,28 +138,58 @@ export class UrlInput extends InputSource {
};

const response = await new Promise<IncomingMessage>((resolve, reject) => {
const req = httpsRequest(options, resolve);
req.on("error", reject);
if (signal?.aborted) {
return reject(new MindeeInputError("Operation aborted"));
}

const onAbort = () => {
req.destroy();
reject(new MindeeInputError("Operation aborted"));
};

if (signal) {
signal.addEventListener("abort", onAbort, { once: true });
}

const req = httpsRequest(options, (res) => {
signal?.removeEventListener("abort", onAbort);
resolve(res);
});
req.on("error", (err) => {
signal?.removeEventListener("abort", onAbort);
reject(err);
});
req.end();
});

if (response.statusCode && response.statusCode >= 300 && response.statusCode < 400) {
if (redirects === maxRedirects) {
throw new Error(`Can't reach URL after ${redirects} out of ${maxRedirects} redirects, aborting operation.`);
throw new MindeeInputError(
`Can't reach URL after ${redirects} out of ${maxRedirects} redirects, aborting operation.`
);
}
if (response.headers.location) {
return await this.makeRequest(response.headers.location, auth, headers, redirects + 1, maxRedirects);
return await this.makeRequest(response.headers.location, auth, headers, redirects + 1, maxRedirects, signal);
}
throw new Error("Redirect location not found");
throw new MindeeInputError("Redirect location not found");
}

if (!response.statusCode || response.statusCode >= 400 || response.statusCode < 200) {
throw new Error(`Couldn't retrieve file from server, error code ${response.statusCode}.`);
throw new MindeeInputError(`Couldn't retrieve file from server, error code ${response.statusCode}.`);
}

const chunks: Buffer[] = [];
for await (const chunk of response) {
chunks.push(chunk);
try {
for await (const chunk of response) {
if (signal?.aborted) {
response.destroy();
throw new MindeeInputError("Operation aborted");
}
chunks.push(chunk);
}
} catch (err) {
response.destroy();
throw err;
}
return { content: Buffer.concat(chunks), finalUrl: url };
}
Expand Down
31 changes: 28 additions & 3 deletions tests/input/sources.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import { extractTextFromPdf } from "../../src/pdf/pdfUtils";
import { logger } from "../../src/logger";
import { RESOURCE_PATH, V1_PRODUCT_PATH } from "../index";
import { Readable } from "stream";
import { MindeeInputError } from "../../src/errors/mindeeError";

describe("Test different types of input", () => {
const outputPath = path.join(RESOURCE_PATH, "output");
Expand Down Expand Up @@ -155,7 +156,8 @@ describe("Test different types of input", () => {
await streamInput.init();
expect.fail("Should have thrown an error");
} catch (e: any) {
expect(e.toString()).to.eq("Error: Error converting stream - Error: aborted");
expect(e).to.be.instanceOf(MindeeInputError);
expect(e.message).to.equal("Error converting stream - Error: aborted");
}
});

Expand All @@ -174,7 +176,8 @@ describe("Test different types of input", () => {
await streamInput.init();
expect.fail("Should have thrown an error");
} catch (e: any) {
expect(e.toString()).to.equal("MindeeError: Stream is already closed");
expect(e).to.be.instanceOf(MindeeInputError);
expect(e.message).to.equal("Stream is already closed");
}
});

Expand All @@ -200,7 +203,29 @@ describe("Test different types of input", () => {
try {
await streamInput.init();
} catch (e: any) {
expect(e.toString()).to.eq("Error: Error converting stream - Error: aborted");
expect(e).to.be.instanceOf(MindeeInputError);
expect(e.message).to.equal("Error converting stream - Error: aborted");
}
});

it("should handle AbortSignal on streams via init()", async () => {
const filePath = path.join(V1_PRODUCT_PATH, "expense_receipts/default_sample.jpg");
const stream = fs.createReadStream(filePath);
const controller = new AbortController();

const streamInput = new StreamInput({
inputStream: stream,
filename: "aborted.jpg"
});

controller.abort();

try {
await streamInput.init(controller.signal);
expect.fail("Should have thrown an error");
} catch (e: any) {
expect(e).to.be.instanceOf(MindeeInputError);
expect(e.message).to.equal("Operation aborted");
}
});

Expand Down
Loading