Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
142 changes: 126 additions & 16 deletions apps/docs/app/api/search/route.ts
Original file line number Diff line number Diff line change
@@ -1,16 +1,126 @@
import { createFromSource } from 'fumadocs-core/search/server'
import { source } from '@/lib/source'

export const revalidate = 3600 // Revalidate every hour

export const { GET } = createFromSource(source, {
localeMap: {
en: { language: 'english' },
es: { language: 'spanish' },
fr: { language: 'french' },
de: { language: 'german' },
// ja and zh are not supported by the stemmer library, so we'll skip language config for them
ja: {},
zh: {},
},
})
import { sql } from 'drizzle-orm'
import { type NextRequest, NextResponse } from 'next/server'
import { db, docsEmbeddings } from '@/lib/db'
import { generateSearchEmbedding } from '@/lib/embeddings'

export const runtime = 'nodejs'
export const revalidate = 0

/**
* Hybrid search API endpoint
* - English: Vector embeddings + keyword search
* - Other languages: Keyword search only
*/
export async function GET(request: NextRequest) {
try {
const searchParams = request.nextUrl.searchParams
const query = searchParams.get('query') || searchParams.get('q') || ''
const locale = searchParams.get('locale') || 'en'
const limit = Number.parseInt(searchParams.get('limit') || '10', 10)

if (!query || query.trim().length === 0) {
return NextResponse.json([])
}

const candidateLimit = limit * 3
const similarityThreshold = 0.6

const localeMap: Record<string, string> = {
en: 'english',
es: 'spanish',
fr: 'french',
de: 'german',
ja: 'simple', // PostgreSQL doesn't have Japanese support, use simple
zh: 'simple', // PostgreSQL doesn't have Chinese support, use simple
}
const tsConfig = localeMap[locale] || 'simple'

const useVectorSearch = locale === 'en'
let vectorResults: Array<{
chunkId: string
chunkText: string
sourceDocument: string
sourceLink: string
headerText: string
headerLevel: number
similarity: number
searchType: string
}> = []

if (useVectorSearch) {
const queryEmbedding = await generateSearchEmbedding(query)
vectorResults = await db
.select({
chunkId: docsEmbeddings.chunkId,
chunkText: docsEmbeddings.chunkText,
sourceDocument: docsEmbeddings.sourceDocument,
sourceLink: docsEmbeddings.sourceLink,
headerText: docsEmbeddings.headerText,
headerLevel: docsEmbeddings.headerLevel,
similarity: sql<number>`1 - (${docsEmbeddings.embedding} <=> ${JSON.stringify(queryEmbedding)}::vector)`,
searchType: sql<string>`'vector'`,
})
.from(docsEmbeddings)
.where(
sql`1 - (${docsEmbeddings.embedding} <=> ${JSON.stringify(queryEmbedding)}::vector) >= ${similarityThreshold}`
)
.orderBy(sql`${docsEmbeddings.embedding} <=> ${JSON.stringify(queryEmbedding)}::vector`)
.limit(candidateLimit)
}

const keywordResults = await db
.select({
chunkId: docsEmbeddings.chunkId,
chunkText: docsEmbeddings.chunkText,
sourceDocument: docsEmbeddings.sourceDocument,
sourceLink: docsEmbeddings.sourceLink,
headerText: docsEmbeddings.headerText,
headerLevel: docsEmbeddings.headerLevel,
similarity: sql<number>`ts_rank(${docsEmbeddings.chunkTextTsv}, plainto_tsquery(${tsConfig}, ${query}))`,
searchType: sql<string>`'keyword'`,
})
.from(docsEmbeddings)
.where(sql`${docsEmbeddings.chunkTextTsv} @@ plainto_tsquery(${tsConfig}, ${query})`)
.orderBy(
sql`ts_rank(${docsEmbeddings.chunkTextTsv}, plainto_tsquery(${tsConfig}, ${query})) DESC`
)
.limit(candidateLimit)

const seenIds = new Set<string>()
const mergedResults = []

for (let i = 0; i < Math.max(vectorResults.length, keywordResults.length); i++) {
if (i < vectorResults.length && !seenIds.has(vectorResults[i].chunkId)) {
mergedResults.push(vectorResults[i])
seenIds.add(vectorResults[i].chunkId)
}
if (i < keywordResults.length && !seenIds.has(keywordResults[i].chunkId)) {
mergedResults.push(keywordResults[i])
seenIds.add(keywordResults[i].chunkId)
}
}

const filteredResults = mergedResults.slice(0, limit)
const searchResults = filteredResults.map((result) => {
const title = result.headerText || result.sourceDocument.replace('.mdx', '')
const pathParts = result.sourceDocument
.replace('.mdx', '')
.split('/')
.map((part) => part.charAt(0).toUpperCase() + part.slice(1))

return {
id: result.chunkId,
type: 'page' as const,
url: result.sourceLink,
content: title,
breadcrumbs: pathParts,
}
})

return NextResponse.json(searchResults)
} catch (error) {
console.error('Semantic search error:', error)

return NextResponse.json([])
}
}
4 changes: 4 additions & 0 deletions apps/docs/lib/db.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
import { db } from '@sim/db'
import { docsEmbeddings } from '@sim/db/schema'

export { db, docsEmbeddings }
40 changes: 40 additions & 0 deletions apps/docs/lib/embeddings.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
/**
* Generate embeddings for search queries using OpenAI API
*/
export async function generateSearchEmbedding(query: string): Promise<number[]> {
const apiKey = process.env.OPENAI_API_KEY

if (!apiKey) {
throw new Error('OPENAI_API_KEY environment variable is required')
}

const response = await fetch('https://api.openai.com/v1/embeddings', {
method: 'POST',
headers: {
Authorization: `Bearer ${apiKey}`,
'Content-Type': 'application/json',
},
body: JSON.stringify({
input: query,
model: 'text-embedding-3-small',
encoding_format: 'float',
}),
})

if (!response.ok) {
const errorText = await response.text()
throw new Error(`OpenAI API failed: ${response.status} ${response.statusText} - ${errorText}`)
}

const data = await response.json()

if (!data?.data || !Array.isArray(data.data) || data.data.length === 0) {
throw new Error('OpenAI API returned invalid response structure: missing or empty data array')
}

if (!data.data[0]?.embedding || !Array.isArray(data.data[0].embedding)) {
throw new Error('OpenAI API returned invalid response structure: missing or invalid embedding')
}

return data.data[0].embedding
}
3 changes: 3 additions & 0 deletions apps/docs/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,19 @@
"type-check": "tsc --noEmit"
},
"dependencies": {
"@sim/db": "workspace:*",
"@tabler/icons-react": "^3.31.0",
"@vercel/og": "^0.6.5",
"class-variance-authority": "^0.7.1",
"clsx": "^2.1.1",
"drizzle-orm": "^0.44.5",
"fumadocs-core": "16.2.3",
"fumadocs-mdx": "14.1.0",
"fumadocs-ui": "16.2.3",
"lucide-react": "^0.511.0",
"next": "16.1.0-canary.21",
"next-themes": "^0.4.6",
"postgres": "^3.4.5",
"react": "19.2.1",
"react-dom": "19.2.1",
"tailwind-merge": "^3.0.2"
Expand Down
3 changes: 3 additions & 0 deletions bun.lock
Original file line number Diff line number Diff line change
Expand Up @@ -44,16 +44,19 @@
"name": "docs",
"version": "0.0.0",
"dependencies": {
"@sim/db": "workspace:*",
"@tabler/icons-react": "^3.31.0",
"@vercel/og": "^0.6.5",
"class-variance-authority": "^0.7.1",
"clsx": "^2.1.1",
"drizzle-orm": "^0.44.5",
"fumadocs-core": "16.2.3",
"fumadocs-mdx": "14.1.0",
"fumadocs-ui": "16.2.3",
"lucide-react": "^0.511.0",
"next": "16.1.0-canary.21",
"next-themes": "^0.4.6",
"postgres": "^3.4.5",
"react": "19.2.1",
"react-dom": "19.2.1",
"tailwind-merge": "^3.0.2",
Expand Down
Loading