Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"name": "hawk.workers",
"private": true,
"version": "0.1.1",
"version": "0.1.2",
"description": "Hawk workers",
"repository": "git@github.com:codex-team/hawk.workers.git",
"license": "BUSL-1.1",
Expand Down
2 changes: 1 addition & 1 deletion workers/grouper/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "hawk-worker-grouper",
"version": "0.0.1",
"version": "0.0.2",
"description": "Accepts processed errors from language-workers and saves it to the DB with grouping of similar ones. ",
"main": "src/index.ts",
"repository": "https://github.com/codex-team/hawk.workers/tree/master/workers/grouper",
Expand Down
96 changes: 89 additions & 7 deletions workers/grouper/src/data-filter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -36,24 +36,88 @@ export default class DataFilter {
private filteredValuePlaceholder = '[filtered]';

/**
* Possibly sensitive keys
* Possibly sensitive keys (lowercase; keys are compared via key.toLowerCase())
*/
private possiblySensitiveDataKeys = new Set([
'pan',
'secret',
'credentials',
'card[number]',
'password',
/**
* Authorization and sessions
*/
'auth',
'authorization',
'access_token',
'accesstoken',
'token',
'jwt',
'session',
'sessionid',
'session_id',
/**
* API keys and secure tokens
*/
'api_key',
'apikey',
'x-api-key',
'x-auth-token',
'bearer',
'client_secret',
'secret',
'credentials',
/**
* Passwords
*/
'password',
'passwd',
'mysql_pwd',
'oldpassword',
'old-password',
'old_password',
'newpassword',
'new-password',
'new_password',
/**
* Encryption keys
*/
'private_key',
'ssh_key',
/**
* Payments data
*/
'card',
'cardnumber',
'card[number]',
'creditcard',
'credit_card',
'pan',
'pin',
'security_code',
'stripetoken',
'cloudpayments_public_id',
'cloudpayments_secret',
/**
* Config and connections
*/
'dsn',
/**
* Personal data
*/
'ssn',
]);

/**
* Bank card PAN Regex
*/
private bankCardRegex = /^(?:4[0-9]{12}(?:[0-9]{3})?|[25][1-7][0-9]{14}|6(?:011|5[0-9][0-9])[0-9]{12}|3[47][0-9]{13}|3(?:0[0-5]|[68][0-9])[0-9]{11}|(?:2131|1800|35\d{3})\d{11})$/g;

/**
* MongoDB ObjectId Regex (24 hexadecimal characters)
*/
private objectIdRegex = /^[0-9a-fA-F]{24}$/;

/**
* UUID Regex - matches UUIDs with all dashes (8-4-4-4-12 format) or no dashes (32 hex chars)
*/
private uuidRegex = /^(?:[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}|[0-9a-fA-F]{32})$/;

/**
* Accept event and process 'addons' and 'context' fields.
* It mutates the original object
Expand Down Expand Up @@ -96,12 +160,30 @@ export default class DataFilter {
return value;
}

/**
* Check if value matches MongoDB ObjectId pattern (24 hex chars)
* ObjectIds should not be filtered
*/
if (this.objectIdRegex.test(value)) {
return value;
}

/**
* Check if value matches UUID pattern (with or without dashes)
* UUIDs should not be filtered
*/
if (this.uuidRegex.test(value)) {
return value;
}

/**
* Remove all non-digit chars
*/
const clean = value.replace(/\D/g, '');

// Reset last index to 0
/**
* Reset last index to 0
*/
this.bankCardRegex.lastIndex = 0;
if (!this.bankCardRegex.test(clean)) {
return value;
Expand Down
188 changes: 186 additions & 2 deletions workers/grouper/tests/data-filter.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,20 +28,57 @@ function generateEvent({ context, addons }: {context?: Json, addons?: EventAddon
}

/**
* Example of object with sensitive information
* Example of object with sensitive information.
* Keys intentionally use snake_case/kebab-case to match data-filter list.
*/
/* eslint-disable @typescript-eslint/naming-convention */
const sensitiveDataMock = {
pan: '5500 0000 0000 0004',
secret: 'D6A03F5C2E0E356F262D56F44370E1CD813583B2',
credentials: '70BA33708CBFB103F1A8E34AFEF333BA7DC021022B2D9AAA583AABB8058D8D67',
'card[number]': '5500 0000 0000 0004',
password: 'bFb7PBm6nZ7RJRq9',
oldpassword: 'oldSecret123',
newpassword: 'newSecret456',
'old-password': 'oldSecretHyphen',
old_password: 'oldSecretUnderscore',
'new-password': 'newSecretHyphen',
new_password: 'newSecretUnderscore',
auth: 'C4CA4238A0B923820DCC509A6F75849B',
// eslint-disable-next-line @typescript-eslint/naming-convention
access_token: '70BA33708CBFB103F1A8E34AFEF333BA7DC021022B2D9AAA583AABB8058D8D67',
accessToken: '70BA33708CBFB103F1A8E34AFEF333BA7DC021022B2D9AAA583AABB8058D8D67',
};

/**
* Additional sensitive keys (newly added / previously uncovered).
* Keys intentionally use snake_case to match data-filter list.
*/
const additionalSensitiveDataMock = {
authorization: 'Bearer abc123',
token: 'token-value',
jwt: 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9',
session: 'sess_xyz',
session_id: 'sid_789',
api_key: 'sk_live_xxx',
bearer: 'Bearer token',
client_secret: 'client_secret_value',
passwd: 'passwd_value',
mysql_pwd: 'mysql_pwd_value',
private_key: '-----BEGIN PRIVATE KEY-----',
ssh_key: 'ssh-rsa AAAA...',
card: '4111111111111111',
cardnumber: '5500000000000004',
creditcard: '4111111111111111',
pin: '1234',
security_code: '999',
stripetoken: 'tok_xxx',
cloudpayments_public_id: 'pk_xxx',
cloudpayments_secret: 'secret_xxx',
dsn: 'postgres://user:pass@host/db',
ssn: '123-45-6789',
};
/* eslint-enable @typescript-eslint/naming-convention */

describe('GrouperWorker', () => {
const dataFilter = new DataFilter();

Expand Down Expand Up @@ -123,6 +160,34 @@ describe('GrouperWorker', () => {
});
});

test('should filter additional sensitive keys (authorization, token, payment, dsn, ssn, etc.) in context', async () => {
const event = generateEvent({
context: additionalSensitiveDataMock,
});

dataFilter.processEvent(event);

Object.keys(additionalSensitiveDataMock).forEach((key) => {
expect(event.context[key]).toBe('[filtered]');
});
});

test('should filter additional sensitive keys in addons', async () => {
const event = generateEvent({
addons: {
vue: {
props: additionalSensitiveDataMock,
},
},
});

dataFilter.processEvent(event);

Object.keys(additionalSensitiveDataMock).forEach((key) => {
expect(event.addons['vue']['props'][key]).toBe('[filtered]');
});
});

test('should not replace values with keynames not in a list', async () => {
const normalValue = 'test123';
const event = generateEvent({
Expand All @@ -143,5 +208,124 @@ describe('GrouperWorker', () => {
expect(event.context['normalKey']).toBe(normalValue);
expect(event.addons['vue']['props']['normalKey']).toBe(normalValue);
});

test('should not filter UUID values that contain exactly 16 digits', async () => {
// These UUIDs contain exactly 16 digits, which when cleaned match PAN patterns
// Without UUID detection, they would be incorrectly filtered as credit cards
const uuidWithManyDigits = '4a1b2c3d-4e5f-6a7b-8c9d-0e1f2a3b4c5d'; // Cleans to 16 digits starting with 4
const uuidUpperCase = '5A1B2C3D-4E5F-6A7B-8C9D-0E1F2A3B4C5D'; // Cleans to 16 digits starting with 5
const uuidNoDashes = '2a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d'; // 32 hex chars without dashes

const event = generateEvent({
context: {
userId: uuidWithManyDigits,
requestId: uuidUpperCase,
transactionId: uuidNoDashes,
},
addons: {
vue: {
props: {
componentId: uuidWithManyDigits,
},
},
},
});

dataFilter.processEvent(event);

expect(event.context['userId']).toBe(uuidWithManyDigits);
expect(event.context['requestId']).toBe(uuidUpperCase);
expect(event.context['transactionId']).toBe(uuidNoDashes);
expect(event.addons['vue']['props']['componentId']).toBe(uuidWithManyDigits);
});

test('should not filter MongoDB ObjectId values that contain exactly 16 digits', async () => {
// These ObjectIds contain exactly 16 digits which when cleaned match PAN patterns
// Without ObjectId detection, they would be incorrectly filtered as credit cards
const objectIdWithManyDigits = '4111111111111111abcdefab'; // 16 digits + 8 hex letters = 24 chars, cleans to Visa pattern
const objectIdUpperCase = '5111111111111111ABCDEFAB'; // Cleans to Mastercard pattern
const objectIdMixedCase = '2111111111111111AbCdEfAb'; // Cleans to Maestro/Mastercard pattern

const event = generateEvent({
context: {
projectId: objectIdWithManyDigits,
workspaceId: objectIdUpperCase,
transactionId: objectIdMixedCase,
},
addons: {
hawk: {
projectId: objectIdWithManyDigits,
},
},
});

dataFilter.processEvent(event);

expect(event.context['projectId']).toBe(objectIdWithManyDigits);
expect(event.context['workspaceId']).toBe(objectIdUpperCase);
expect(event.context['transactionId']).toBe(objectIdMixedCase);
expect(event.addons['hawk']['projectId']).toBe(objectIdWithManyDigits);
});

test('should still filter actual PAN numbers with formatting characters', async () => {
// Test real Mastercard test number with spaces and dashes
const panWithSpaces = '5500 0000 0000 0004';
const panWithDashes = '5500-0000-0000-0004';

const event = generateEvent({
context: {
cardNumber: panWithSpaces,
paymentCard: panWithDashes,
},
});

dataFilter.processEvent(event);

expect(event.context['cardNumber']).toBe('[filtered]');
expect(event.context['paymentCard']).toBe('[filtered]');
});

test('should not filter values that are not UUIDs, ObjectIds, or PANs', async () => {
// These are edge cases that should NOT be filtered
const shortHex = '507f1f77bcf86cd7'; // 16 hex chars (not 24)
const longNumber = '67280841958304100309082499'; // 26 digits (too long for PAN)
const mixedAlphaNum = 'abc123def456ghi789'; // Mixed content

const event = generateEvent({
context: {
shortId: shortHex,
longId: longNumber,
mixedId: mixedAlphaNum,
},
});

dataFilter.processEvent(event);

expect(event.context['shortId']).toBe(shortHex);
expect(event.context['longId']).toBe(longNumber);
expect(event.context['mixedId']).toBe(mixedAlphaNum);
});

test('should filter UUIDs and ObjectIds when they are in sensitive key fields', async () => {
// Even if the value is a valid UUID or ObjectId, it should be filtered
// if the key name is in the sensitive keys list
const uuid = '550e8400-e29b-41d4-a716-446655440000';
const objectId = '507f1f77bcf86cd799439011';

const event = generateEvent({
context: {
password: uuid,
secret: objectId,
auth: '672808419583041003090824',
},
});

dataFilter.processEvent(event);

// All should be filtered because of sensitive key names
expect(event.context['password']).toBe('[filtered]');
expect(event.context['secret']).toBe('[filtered]');
expect(event.context['auth']).toBe('[filtered]');
});
});
});
Loading