Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
168 changes: 168 additions & 0 deletions apps/sim/app/api/tools/brightdata/dataset/route.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
import { randomUUID } from 'crypto'
import { createLogger } from '@sim/logger'
import { NextResponse } from 'next/server'

const logger = createLogger('BrightDataDatasetAPI')

export const maxDuration = 600

export async function POST(request: Request) {
const requestId = randomUUID().slice(0, 8)

try {
const body = await request.json()
const datasetId = typeof body?.datasetId === 'string' ? body.datasetId : undefined
const apiToken = typeof body?.apiToken === 'string' ? body.apiToken : undefined

if (!datasetId || !apiToken) {
return NextResponse.json({ error: 'Missing required parameters' }, { status: 400 })
}

const params: Record<string, unknown> = { ...body }
params.datasetId = undefined
params.apiToken = undefined

logger.info(`[${requestId}] Triggering dataset`, { datasetId })

const triggerResponse = await fetch(
`https://api.brightdata.com/datasets/v3/trigger?dataset_id=${encodeURIComponent(
datasetId
)}&include_errors=true`,
{
method: 'POST',
headers: {
Authorization: `Bearer ${apiToken}`,
'Content-Type': 'application/json',
},
body: JSON.stringify([params]),
}
)

const triggerText = await triggerResponse.text()
let triggerPayload: unknown = triggerText

try {
triggerPayload = JSON.parse(triggerText)
} catch {
triggerPayload = triggerText
}

if (!triggerResponse.ok) {
const errorMessage =
typeof triggerPayload === 'object' && triggerPayload !== null && 'error' in triggerPayload
? String((triggerPayload as { error?: unknown }).error)
: triggerResponse.statusText

logger.error(`[${requestId}] Dataset trigger failed`, {
datasetId,
status: triggerResponse.status,
error: errorMessage,
})

return NextResponse.json(
{ error: errorMessage || 'Dataset trigger failed' },
{ status: triggerResponse.status }
)
}

const snapshotId =
typeof triggerPayload === 'object' &&
triggerPayload !== null &&
'snapshot_id' in triggerPayload
? String((triggerPayload as { snapshot_id?: unknown }).snapshot_id ?? '')
: ''

if (!snapshotId) {
logger.error(`[${requestId}] Dataset trigger missing snapshot ID`, { datasetId })
return NextResponse.json({ error: 'No snapshot ID returned from request' }, { status: 500 })
}

logger.info(`[${requestId}] Dataset triggered`, { datasetId, snapshotId })

const maxAttempts = 600
let attempts = 0

while (attempts < maxAttempts) {
const snapshotResponse = await fetch(
`https://api.brightdata.com/datasets/v3/snapshot/${snapshotId}?format=json`,
{
method: 'GET',
headers: {
Authorization: `Bearer ${apiToken}`,
'Content-Type': 'application/json',
},
}
)

const snapshotText = await snapshotResponse.text()
let snapshotPayload: unknown = snapshotText

try {
snapshotPayload = JSON.parse(snapshotText)
} catch {
snapshotPayload = snapshotText
}

if (!snapshotResponse.ok) {
if (snapshotResponse.status === 400) {
const errorMessage =
typeof snapshotPayload === 'object' &&
snapshotPayload !== null &&
'error' in snapshotPayload
? String((snapshotPayload as { error?: unknown }).error)
: snapshotResponse.statusText

logger.error(`[${requestId}] Dataset snapshot fetch failed`, {
datasetId,
snapshotId,
status: snapshotResponse.status,
error: errorMessage,
})

return NextResponse.json(
{ error: errorMessage || 'Dataset snapshot fetch failed' },
{ status: snapshotResponse.status }
)
}

attempts += 1
await new Promise((resolve) => setTimeout(resolve, 1000))
continue
}

const status =
typeof snapshotPayload === 'object' &&
snapshotPayload !== null &&
'status' in snapshotPayload
? String((snapshotPayload as { status?: unknown }).status ?? '')
: ''

if (['running', 'building', 'starting'].includes(status)) {
attempts += 1
await new Promise((resolve) => setTimeout(resolve, 1000))
continue
}

const snapshotAt =
typeof snapshotPayload === 'object' &&
snapshotPayload !== null &&
'snapshot_at' in snapshotPayload
? String((snapshotPayload as { snapshot_at?: unknown }).snapshot_at ?? '')
: undefined

logger.info(`[${requestId}] Dataset snapshot received`, { datasetId, snapshotId })

return NextResponse.json({
data: snapshotPayload,
snapshot_at: snapshotAt || undefined,
})
}

logger.error(`[${requestId}] Dataset snapshot timed out`, { datasetId, snapshotId })
return NextResponse.json({ error: 'Timeout waiting for dataset snapshot' }, { status: 504 })
} catch (error) {
const message = error instanceof Error ? error.message : 'Dataset fetch failed'
logger.error(`[${requestId}] Dataset fetch failed`, { error: message })
return NextResponse.json({ error: message }, { status: 500 })
}
}
87 changes: 87 additions & 0 deletions apps/sim/app/api/tools/brightdata/scrape-markdown/route.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
import { randomUUID } from 'crypto'
import { createLogger } from '@sim/logger'
import { NextResponse } from 'next/server'

const logger = createLogger('BrightDataScrapeMarkdownAPI')

export async function POST(request: Request) {
const requestId = randomUUID().slice(0, 8)

try {
const body = await request.json()
const url = typeof body?.url === 'string' ? body.url : undefined
const apiToken = typeof body?.apiToken === 'string' ? body.apiToken : undefined
const unlockerZone = typeof body?.unlockerZone === 'string' ? body.unlockerZone : undefined

if (!url || !apiToken) {
return NextResponse.json({ error: 'Missing required parameters' }, { status: 400 })
}

logger.info(`[${requestId}] Scraping URL as markdown`, { url })

const response = await fetch('https://api.brightdata.com/request', {
method: 'POST',
headers: {
Authorization: `Bearer ${apiToken}`,
'Content-Type': 'application/json',
},
body: JSON.stringify({
zone: unlockerZone || 'mcp_unlocker',
url,
format: 'raw',
data_format: 'markdown',
}),
})

const responseText = await response.text()
let payload: unknown = responseText

try {
payload = JSON.parse(responseText)
} catch {
payload = responseText
}

if (!response.ok) {
const errorMessage =
typeof payload === 'object' && payload !== null && 'error' in payload
? String((payload as { error?: unknown }).error)
: response.statusText

logger.error(`[${requestId}] Scraping failed`, {
url,
status: response.status,
error: errorMessage,
})

return NextResponse.json(
{ error: errorMessage || 'Scraping failed' },
{ status: response.status }
)
}

const markdown =
typeof payload === 'object' && payload !== null && 'markdown' in payload
? String((payload as { markdown?: unknown }).markdown ?? '')
: typeof payload === 'string'
? payload
: JSON.stringify(payload)

const title =
typeof payload === 'object' && payload !== null && 'title' in payload
? String((payload as { title?: unknown }).title ?? '')
: undefined

logger.info(`[${requestId}] Scraping completed`, { url })

return NextResponse.json({
markdown,
url,
title: title || undefined,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

style: Redundant check - title is already undefined if falsy, so || undefined is unnecessary

Suggested change
title: title || undefined,
title,

Note: If this suggestion doesn't match your team's coding style, reply to this and let me know. I'll remember it for next time!

Prompt To Fix With AI
This is a comment left during a code review.
Path: apps/sim/app/api/tools/brightdata/scrape-markdown/route.ts
Line: 80:80

Comment:
**style:** Redundant check - title is already undefined if falsy, so `|| undefined` is unnecessary

```suggestion
      title,
```

<sub>Note: If this suggestion doesn't match your team's coding style, reply to this and let me know. I'll remember it for next time!</sub>

How can I resolve this? If you propose a fix, please make it concise.

})
} catch (error) {
const message = error instanceof Error ? error.message : 'Scraping failed'
logger.error(`[${requestId}] Scraping failed`, { error: message })
return NextResponse.json({ error: message }, { status: 500 })
}
}
105 changes: 105 additions & 0 deletions apps/sim/app/api/tools/brightdata/search-engine/route.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
import { randomUUID } from 'crypto'
import { createLogger } from '@sim/logger'
import { NextResponse } from 'next/server'

const logger = createLogger('BrightDataSearchEngineAPI')

export async function POST(request: Request) {
const requestId = randomUUID().slice(0, 8)

try {
const body = await request.json()
const query = typeof body?.query === 'string' ? body.query : undefined
const apiToken = typeof body?.apiToken === 'string' ? body.apiToken : undefined
const unlockerZone = typeof body?.unlockerZone === 'string' ? body.unlockerZone : undefined
const maxResults =
typeof body?.maxResults === 'number'
? body.maxResults
: typeof body?.maxResults === 'string'
? Number(body.maxResults)
: undefined

if (!query || !apiToken) {
return NextResponse.json({ error: 'Missing required parameters' }, { status: 400 })
}

logger.info(`[${requestId}] Searching`, { query, maxResults })

const searchUrl = `https://www.google.com/search?q=${encodeURIComponent(query)}&start=0&brd_json=1`

const response = await fetch('https://api.brightdata.com/request', {
method: 'POST',
headers: {
Authorization: `Bearer ${apiToken}`,
'Content-Type': 'application/json',
},
body: JSON.stringify({
zone: unlockerZone || 'mcp_unlocker',
url: searchUrl,
format: 'raw',
data_format: 'parsed_light',
}),
})

const responseText = await response.text()
let payload: unknown = responseText

try {
payload = JSON.parse(responseText)
} catch {
payload = responseText
}

if (!response.ok) {
const errorMessage =
typeof payload === 'object' && payload !== null && 'error' in payload
? String((payload as { error?: unknown }).error)
: response.statusText

logger.error(`[${requestId}] Search failed`, {
query,
status: response.status,
error: errorMessage,
})

return NextResponse.json(
{ error: errorMessage || 'Search failed' },
{ status: response.status }
)
}

let normalizedResults: Array<{ title: string; url: string; snippet: string }> = []

if (typeof payload === 'object' && payload !== null) {
const organic = (payload as { organic?: unknown }).organic
if (Array.isArray(organic)) {
normalizedResults = organic
.map((entry) => {
if (!entry || typeof entry !== 'object') return null
const rawTitle = (entry as { title?: unknown }).title
const rawLink = (entry as { link?: unknown }).link
const rawDescription = (entry as { description?: unknown }).description
const title = typeof rawTitle === 'string' ? rawTitle : ''
const url = typeof rawLink === 'string' ? rawLink : ''
const snippet = typeof rawDescription === 'string' ? rawDescription : ''
if (!title || !url) return null
return { title, url, snippet }
})
.filter(Boolean) as Array<{ title: string; url: string; snippet: string }>
}
}

const maxCount = Number.isFinite(maxResults) ? Number(maxResults) : undefined
const results = maxCount ? normalizedResults.slice(0, maxCount) : normalizedResults
Comment on lines +92 to +93
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

style: Redundant slice when maxResults is undefined - normalizedResults already contains all results

Note: If this suggestion doesn't match your team's coding style, reply to this and let me know. I'll remember it for next time!

Prompt To Fix With AI
This is a comment left during a code review.
Path: apps/sim/app/api/tools/brightdata/search-engine/route.ts
Line: 92:93

Comment:
**style:** Redundant slice when maxResults is undefined - `normalizedResults` already contains all results

<sub>Note: If this suggestion doesn't match your team's coding style, reply to this and let me know. I'll remember it for next time!</sub>

How can I resolve this? If you propose a fix, please make it concise.


logger.info(`[${requestId}] Search completed`, { resultCount: results.length })

return NextResponse.json({
results,
})
} catch (error) {
const message = error instanceof Error ? error.message : 'Search failed'
logger.error(`[${requestId}] Search failed`, { error: message })
return NextResponse.json({ error: message }, { status: 500 })
}
}
Loading