From 321d4c1ddda742327717ad64fdd2fc6ff150e3ba Mon Sep 17 00:00:00 2001
From: Cursor Agent <cursoragent@cursor.com>
Date: Tue, 9 Dec 2025 11:29:09 +0000
Subject: [PATCH 01/49] feat: Add bulk embedding and ingest mode

Co-authored-by: jdavid10001 <jdavid10001@gmail.com>
---
 README.md                               |  27 +-
 dev/helpers/embed.ts                    |  36 +++
 dev/payload.config.ts                   |   3 +
 dev/specs/bulkEmbed.spec.ts             | 191 +++++++++++++
 dev/specs/config.spec.ts                |  24 +-
 src/admin/components/EmbedAllButton.tsx |  54 ++++
 src/collections/bulkEmbeddingsRuns.ts   | 111 ++++++++
 src/collections/embeddings.ts           |   8 +
 src/endpoints/bulkEmbed.ts              |  61 ++++
 src/exports/client.ts                   |   1 +
 src/exports/rsc.ts                      |   2 +
 src/index.ts                            |  35 ++-
 src/tasks/bulkEmbedAll.ts               | 357 ++++++++++++++++++++++++
 src/types.ts                            |  85 ++++++
 14 files changed, 989 insertions(+), 6 deletions(-)
 create mode 100644 dev/specs/bulkEmbed.spec.ts
 create mode 100644 src/admin/components/EmbedAllButton.tsx
 create mode 100644 src/collections/bulkEmbeddingsRuns.ts
 create mode 100644 src/endpoints/bulkEmbed.ts
 create mode 100644 src/exports/client.ts
 create mode 100644 src/exports/rsc.ts
 create mode 100644 src/tasks/bulkEmbedAll.ts

diff --git a/README.md b/README.md
index fd617c8..ec36941 100644
--- a/README.md
+++ b/README.md
@@ -6,6 +6,7 @@ A Payload CMS plugin that adds vector search capabilities to your collections us
 
 - 🔍 **Semantic Search**: Vectorize any collection for intelligent content discovery
 - 🚀 **Automatic**: Documents are automatically vectorized when created or updated, and vectors are deleted as soon as the document is deleted.
+- 🧵 **Bulk embedding**: Run “Embed all” batches that backfill only documents missing the current `embeddingVersion`.
 - 📊 **PostgreSQL Integration**: Built on pgvector for high-performance vector operations
 - ⚡ **Background Processing**: Uses Payload's job system for non-blocking vectorization
 - 🎯 **Flexible Chunking**: Drive chunk creation yourself with `toKnowledgePool` functions so you can combine any fields or content types
@@ -189,6 +190,8 @@ The embeddings collection name will be the same as the knowledge pool name.
 - `embedQuery`: `EmbedQueryFn` - Function to embed search queries
 - `embeddingVersion`: `string` - Version string for tracking model changes
 - `extensionFields?`: `Field[]` - Optional fields to extend the embeddings collection schema
+- `ingestMode?`: `'realtime' | 'bulk'` - Default `realtime` queues embeddings immediately. `bulk` skips realtime embedding, deletes stale vectors on updates, and relies on the bulk job to backfill.
+- `bulkEmbeddings?`: Provider-specific callbacks for batch embedding (`prepareBulkEmbeddings`, `pollBulkEmbeddings`, `completeBulkEmbeddings`). If omitted, the plugin falls back to using `embedDocs` in-process.
 
 #### CollectionVectorizeOption
 
@@ -299,6 +302,27 @@ Search for similar content using vector similarity.
 }
 ```
 
+### Bulk embedding (Embed all)
+
+- Each knowledge pool’s embeddings list shows an **Embed all** admin button that queues a `payloadcms-vectorize:bulk-embed-all` job.
+- Bulk runs only include documents that are missing embeddings for the pool’s current `embeddingVersion`.
+- Progress is recorded in the `vector-bulk-embeddings-runs` collection (fields: `pool`, `embeddingVersion`, `providerBatchId`, `status`, counts, timestamps, `error`).
+- Endpoint: **POST** `/api/vector-bulk-embed`
+
+```jsonc
+{
+  "knowledgePool": "main"
+}
+```
+
+Bulk callbacks are provider-agnostic:
+
+- `prepareBulkEmbeddings({ payload, knowledgePool, embeddingVersion, inputs })`
+- `pollBulkEmbeddings({ payload, knowledgePool, providerBatchId })`
+- `completeBulkEmbeddings({ payload, knowledgePool, providerBatchId })`
+
+If `bulkEmbeddings` is not provided, the plugin falls back to running `embedDocs` locally.
+
 ## Changelog
 
 See [CHANGELOG.md](./CHANGELOG.md) for release history, migration notes, and upgrade guides.
@@ -339,13 +363,12 @@ Thank you for the stars! The following updates have been completed:
 
 - **Multiple Knowledge Pools**: You can create separate knowledge pools with independent configurations (dims, ivfflatLists, embedding functions) and needs. Each pool operates independently, allowing you to organize your vectorized content by domain, use case, or any other criteria that makes sense for your application.
 - **More expressive queries**: Added ability to change query limit, search on certain collections or certain fields
+- **Bulk embed all**: Batch backfills with admin button, provider callbacks, and run tracking.
 
 The following features are planned for future releases based on community interest and stars:
 
 - **Migrations for vector dimensions**: Easy migration tools for changing vector dimensions and/or ivfflatLists after initial setup
 - **MongoDB support**: Extend vector search capabilities to MongoDB databases
 - **Vercel support**: Optimized deployment and configuration for Vercel hosting
-- **Batch embedding**: More efficient bulk embedding operations for large datasets
-- **'Embed all' button**: Admin UI button to re-embed all content after embeddingVersion changes
 
 **Want to see these features sooner?** Star this repository and open issues for the features you need most!
diff --git a/dev/helpers/embed.ts b/dev/helpers/embed.ts
index 18ac59d..d70b87e 100644
--- a/dev/helpers/embed.ts
+++ b/dev/helpers/embed.ts
@@ -1,5 +1,6 @@
 import { voyage } from 'voyage-ai-provider'
 import { embed, embedMany } from 'ai'
+import type { BulkEmbeddingsCallbacks } from 'payloadcms-vectorize'
 
 export const voyageEmbedDocs = async (texts: string[]): Promise<number[][]> => {
   const embedResult = await embedMany({
@@ -54,3 +55,38 @@ export function makeDummyEmbedDocs(dims: number) {
   }
 }
 export const testEmbeddingVersion = 'test-v1'
+
+export function makeLocalBulkEmbeddingsCallbacks(dims: number): BulkEmbeddingsCallbacks {
+  const pendingInputs = new Map<string, Array<{ id: string; text: string }>>()
+  const embedDocs = makeDummyEmbedDocs(dims)
+  return {
+    prepareBulkEmbeddings: async ({ inputs }) => {
+      const providerBatchId = `local-${dims}-${Date.now()}`
+      pendingInputs.set(providerBatchId, inputs)
+      return {
+        providerBatchId,
+        status: 'queued',
+        counts: { inputs: inputs.length },
+      }
+    },
+    pollBulkEmbeddings: async ({ providerBatchId }) => {
+      if (!pendingInputs.has(providerBatchId)) {
+        return { status: 'failed', error: 'unknown batch' }
+      }
+      return { status: 'succeeded' }
+    },
+    completeBulkEmbeddings: async ({ providerBatchId }) => {
+      const inputs = pendingInputs.get(providerBatchId) || []
+      const embeddings = await embedDocs(inputs.map((i) => i.text))
+      pendingInputs.delete(providerBatchId)
+      return {
+        status: 'succeeded',
+        outputs: embeddings.map((vector, idx) => ({
+          id: inputs[idx]?.id ?? String(idx),
+          embedding: vector,
+        })),
+        counts: { inputs: inputs.length, succeeded: embeddings.length, failed: 0 },
+      }
+    },
+  }
+}
diff --git a/dev/payload.config.ts b/dev/payload.config.ts
index 74ea031..2b376ce 100644
--- a/dev/payload.config.ts
+++ b/dev/payload.config.ts
@@ -9,6 +9,7 @@ import {
   voyageEmbedDocs,
   voyageEmbedQuery,
   makeDummyEmbedQuery,
+  makeLocalBulkEmbeddingsCallbacks,
 } from './helpers/embed.js'
 import sharp from 'sharp'
 import { fileURLToPath } from 'url'
@@ -122,6 +123,8 @@ const buildConfigWithPostgres = async () => {
             embedDocs,
             embedQuery,
             embeddingVersion: testEmbeddingVersion,
+            ingestMode: 'realtime',
+            bulkEmbeddings: makeLocalBulkEmbeddingsCallbacks(dims),
           },
         },
       }),
diff --git a/dev/specs/bulkEmbed.spec.ts b/dev/specs/bulkEmbed.spec.ts
new file mode 100644
index 0000000..386d319
--- /dev/null
+++ b/dev/specs/bulkEmbed.spec.ts
@@ -0,0 +1,191 @@
+import type { Payload, SanitizedConfig } from 'payload'
+
+import { buildConfig, getPayload } from 'payload'
+import { beforeAll, describe, expect, test } from 'vitest'
+import { postgresAdapter } from '@payloadcms/db-postgres'
+import { lexicalEditor } from '@payloadcms/richtext-lexical'
+import { createVectorizeIntegration } from 'payloadcms-vectorize'
+import { BULK_EMBEDDINGS_RUNS_SLUG } from '../../src/collections/bulkEmbeddingsRuns.js'
+import { createBulkEmbedAllTask } from '../../src/tasks/bulkEmbedAll.js'
+import { createTestDb } from './utils.js'
+import { makeDummyEmbedDocs, makeDummyEmbedQuery, makeLocalBulkEmbeddingsCallbacks, testEmbeddingVersion } from 'helpers/embed.js'
+
+const DIMS = 8
+
+describe('Bulk embed ingest mode', () => {
+  let payload: Payload
+  let config: SanitizedConfig
+  const dbName = 'bulk_embed_test'
+
+  const integration = createVectorizeIntegration({
+    default: {
+      dims: DIMS,
+      ivfflatLists: 1,
+    },
+  })
+
+  const pluginOptions = {
+    knowledgePools: {
+      default: {
+        collections: {
+          posts: {
+            toKnowledgePool: async (doc: any) => [{ chunk: doc.title }],
+          },
+        },
+        embedDocs: makeDummyEmbedDocs(DIMS),
+        embedQuery: makeDummyEmbedQuery(DIMS),
+        embeddingVersion: testEmbeddingVersion,
+        ingestMode: 'bulk' as const,
+        bulkEmbeddings: makeLocalBulkEmbeddingsCallbacks(DIMS),
+      },
+    },
+  }
+
+  beforeAll(async () => {
+    await createTestDb({ dbName })
+    config = await buildConfig({
+      secret: 'test-secret',
+      editor: lexicalEditor(),
+      collections: [
+        {
+          slug: 'posts',
+          fields: [{ name: 'title', type: 'text' }],
+        },
+      ],
+      db: postgresAdapter({
+        extensions: ['vector'],
+        afterSchemaInit: [integration.afterSchemaInitHook],
+        pool: {
+          connectionString: `postgresql://postgres:password@localhost:5433/${dbName}`,
+        },
+      }),
+      plugins: [integration.payloadcmsVectorize(pluginOptions)],
+      jobs: { tasks: [] },
+    })
+
+    payload = await getPayload({ config })
+  })
+
+  test('queues no realtime embeddings and bulk job backfills missing docs', async () => {
+    const post = await payload.create({
+      collection: 'posts',
+      data: { title: 'Bulk Mode Title' } as any,
+    })
+
+    const initialEmbeds = await payload.find({
+      collection: 'default',
+      where: {
+        and: [
+          { sourceCollection: { equals: 'posts' } },
+          { docId: { equals: String(post.id) } },
+        ],
+      },
+    })
+    expect(initialEmbeds.totalDocs).toBe(0)
+
+    const run = await payload.create({
+      collection: BULK_EMBEDDINGS_RUNS_SLUG,
+      data: {
+        pool: 'default',
+        embeddingVersion: testEmbeddingVersion,
+        status: 'queued',
+      },
+    })
+
+    const bulkTask = createBulkEmbedAllTask({
+      knowledgePools: pluginOptions.knowledgePools,
+    })
+
+    await bulkTask.handler({
+      input: { runId: String(run.id) },
+      req: { payload } as any,
+    })
+
+    const embeds = await payload.find({
+      collection: 'default',
+      where: {
+        and: [
+          { sourceCollection: { equals: 'posts' } },
+          { docId: { equals: String(post.id) } },
+        ],
+      },
+    })
+    expect(embeds.totalDocs).toBeGreaterThan(0)
+    expect(embeds.docs[0]?.chunkText).toContain('Bulk Mode Title')
+
+    const runDoc = await payload.findByID({
+      collection: BULK_EMBEDDINGS_RUNS_SLUG,
+      id: run.id,
+    })
+    expect(runDoc.status).toBe('succeeded')
+    expect(runDoc.inputs).toBeGreaterThan(0)
+  })
+
+  test('document updates clear stale embeddings and rerun populates new chunks', async () => {
+    const post = await payload.create({
+      collection: 'posts',
+      data: { title: 'Original' } as any,
+    })
+
+    // First run to embed
+    const firstRun = await payload.create({
+      collection: BULK_EMBEDDINGS_RUNS_SLUG,
+      data: {
+        pool: 'default',
+        embeddingVersion: testEmbeddingVersion,
+        status: 'queued',
+      },
+    })
+    const bulkTask = createBulkEmbedAllTask({
+      knowledgePools: pluginOptions.knowledgePools,
+    })
+    await bulkTask.handler({
+      input: { runId: String(firstRun.id) },
+      req: { payload } as any,
+    })
+
+    // Update document - should delete embeddings in bulk mode
+    await payload.update({
+      collection: 'posts',
+      id: post.id,
+      data: { title: 'Updated Title' } as any,
+    })
+
+    const afterUpdateEmbeds = await payload.find({
+      collection: 'default',
+      where: {
+        and: [
+          { sourceCollection: { equals: 'posts' } },
+          { docId: { equals: String(post.id) } },
+        ],
+      },
+    })
+    expect(afterUpdateEmbeds.totalDocs).toBe(0)
+
+    // Run again to backfill
+    const secondRun = await payload.create({
+      collection: BULK_EMBEDDINGS_RUNS_SLUG,
+      data: {
+        pool: 'default',
+        embeddingVersion: testEmbeddingVersion,
+        status: 'queued',
+      },
+    })
+    await bulkTask.handler({
+      input: { runId: String(secondRun.id) },
+      req: { payload } as any,
+    })
+
+    const embedsAfterRerun = await payload.find({
+      collection: 'default',
+      where: {
+        and: [
+          { sourceCollection: { equals: 'posts' } },
+          { docId: { equals: String(post.id) } },
+        ],
+      },
+    })
+    expect(embedsAfterRerun.totalDocs).toBeGreaterThan(0)
+    expect(embedsAfterRerun.docs[0]?.chunkText).toContain('Updated Title')
+  })
+})
diff --git a/dev/specs/config.spec.ts b/dev/specs/config.spec.ts
index f6457f6..0a467f2 100644
--- a/dev/specs/config.spec.ts
+++ b/dev/specs/config.spec.ts
@@ -6,9 +6,12 @@ describe('jobs.tasks merging', () => {
     const cfg = await buildDummyConfig({ jobs: { tasks: [] } })
     const tasks = cfg.jobs?.tasks
     expect(Array.isArray(tasks)).toBe(true)
-    expect(tasks).toEqual([
-      { slug: 'payloadcms-vectorize:vectorize', handler: expect.any(Function) },
-    ])
+    expect(tasks).toEqual(
+      expect.arrayContaining([
+        { slug: 'payloadcms-vectorize:vectorize', handler: expect.any(Function) },
+        { slug: 'payloadcms-vectorize:bulk-embed-all', handler: expect.any(Function) },
+      ]),
+    )
   })
 })
 
@@ -24,6 +27,11 @@ describe('/vector-search endpoint', () => {
           method: 'post',
           handler: expect.any(Function),
         }),
+        expect.objectContaining({
+          path: '/vector-bulk-embed',
+          method: 'post',
+          handler: expect.any(Function),
+        }),
       ]),
     )
   })
@@ -40,6 +48,11 @@ describe('/vector-search endpoint', () => {
           method: 'post',
           handler: expect.any(Function),
         }),
+        expect.objectContaining({
+          path: '/vector-bulk-embed',
+          method: 'post',
+          handler: expect.any(Function),
+        }),
       ]),
     )
   })
@@ -56,6 +69,11 @@ describe('/vector-search endpoint', () => {
           method: 'post',
           handler: expect.any(Function),
         }),
+        expect.objectContaining({
+          path: '/vector-bulk-embed',
+          method: 'post',
+          handler: expect.any(Function),
+        }),
       ]),
     )
   })
diff --git a/src/admin/components/EmbedAllButton.tsx b/src/admin/components/EmbedAllButton.tsx
new file mode 100644
index 0000000..666f2de
--- /dev/null
+++ b/src/admin/components/EmbedAllButton.tsx
@@ -0,0 +1,54 @@
+'use client'
+
+import React, { useState } from 'react'
+
+type EmbedAllButtonProps = {
+  collectionSlug: string
+  hasCreatePermission?: boolean
+  newDocumentURL?: string
+}
+
+export const EmbedAllButton: React.FC<EmbedAllButtonProps> = ({ collectionSlug }) => {
+  const [isSubmitting, setIsSubmitting] = useState(false)
+  const [message, setMessage] = useState<string | null>(null)
+
+  const handleClick = async () => {
+    setIsSubmitting(true)
+    setMessage(null)
+    try {
+      const res = await fetch('/api/vector-bulk-embed', {
+        method: 'POST',
+        headers: {
+          'Content-Type': 'application/json',
+        },
+        body: JSON.stringify({ knowledgePool: collectionSlug }),
+      })
+      const data = await res.json()
+      if (!res.ok) {
+        setMessage(data?.error || 'Failed to queue bulk embed run')
+        return
+      }
+      setMessage(`Queued bulk embed run ${data.runId}`)
+    } catch (error: any) {
+      setMessage(error?.message || 'Failed to queue bulk embed run')
+    } finally {
+      setIsSubmitting(false)
+    }
+  }
+
+  return (
+    <div style={{ marginBottom: '1rem', display: 'flex', gap: '0.75rem', alignItems: 'center' }}>
+      <button
+        type="button"
+        className="btn btn--style-primary"
+        onClick={handleClick}
+        disabled={isSubmitting}
+      >
+        {isSubmitting ? 'Submitting…' : 'Embed all'}
+      </button>
+      {message ? <span style={{ fontSize: '0.9rem' }}>{message}</span> : null}
+    </div>
+  )
+}
+
+export default EmbedAllButton
diff --git a/src/collections/bulkEmbeddingsRuns.ts b/src/collections/bulkEmbeddingsRuns.ts
new file mode 100644
index 0000000..ea8dbe3
--- /dev/null
+++ b/src/collections/bulkEmbeddingsRuns.ts
@@ -0,0 +1,111 @@
+import type { CollectionConfig } from 'payload'
+import type { BulkEmbeddingRunStatus } from '../types.js'
+
+export const BULK_EMBEDDINGS_RUNS_SLUG = 'vector-bulk-embeddings-runs'
+
+const statusOptions: BulkEmbeddingRunStatus[] = [
+  'queued',
+  'running',
+  'succeeded',
+  'failed',
+  'canceled',
+]
+
+export const createBulkEmbeddingsRunsCollection = (): CollectionConfig => ({
+  slug: BULK_EMBEDDINGS_RUNS_SLUG,
+  admin: {
+    useAsTitle: 'pool',
+    description:
+      'Bulk embedding run records. Created automatically when the Embed all action is triggered.',
+    defaultColumns: ['pool', 'status', 'inputs', 'succeeded', 'failed', 'submittedAt'],
+  },
+  access: {
+    read: () => true,
+    create: () => true,
+    update: () => true,
+    delete: () => false,
+  },
+  fields: [
+    {
+      name: 'pool',
+      type: 'text',
+      required: true,
+      admin: {
+        description: 'Knowledge pool slug',
+      },
+    },
+    {
+      name: 'embeddingVersion',
+      type: 'text',
+      required: true,
+      admin: {
+        description: 'Embedding version at submission time',
+      },
+    },
+    {
+      name: 'inputFileRef',
+      type: 'text',
+      admin: {
+        description: 'Provider file or input reference used for the batch',
+      },
+    },
+    {
+      name: 'providerBatchId',
+      type: 'text',
+      admin: {
+        description: 'Provider batch identifier',
+      },
+    },
+    {
+      name: 'status',
+      type: 'select',
+      options: statusOptions.map((value) => ({ value, label: value })),
+      required: true,
+      defaultValue: 'queued',
+    },
+    {
+      name: 'inputs',
+      type: 'number',
+      defaultValue: 0,
+    },
+    {
+      name: 'succeeded',
+      type: 'number',
+      defaultValue: 0,
+    },
+    {
+      name: 'failed',
+      type: 'number',
+      defaultValue: 0,
+    },
+    {
+      name: 'submittedAt',
+      type: 'date',
+      admin: { description: 'Timestamp when the batch was submitted' },
+    },
+    {
+      name: 'completedAt',
+      type: 'date',
+      admin: { description: 'Timestamp when the batch finished' },
+    },
+    {
+      name: 'error',
+      type: 'textarea',
+      admin: {
+        description: 'Failure reason if the run ended in error',
+      },
+    },
+  ],
+  timestamps: true,
+  indexes: [
+    {
+      fields: ['pool'],
+    },
+    {
+      fields: ['providerBatchId'],
+    },
+    {
+      fields: ['status'],
+    },
+  ],
+})
diff --git a/src/collections/embeddings.ts b/src/collections/embeddings.ts
index 2b02bd7..5637b50 100644
--- a/src/collections/embeddings.ts
+++ b/src/collections/embeddings.ts
@@ -25,6 +25,14 @@ export const createEmbeddingsCollection = (
     admin: {
       description:
         'Vector embeddings for search and similarity queries. Created by the payloadcms-vectorize plugin. Embeddings cannot be added or modified, only deleted, through the admin panel. No other restrictions enforced.',
+      components: {
+        beforeList: [
+          {
+            path: 'payloadcms-vectorize/client#EmbedAllButton',
+            exportName: 'EmbedAllButton',
+          },
+        ],
+      },
     },
     access: {
       create: () => false, // Cannot add new embeddings through admin panel
diff --git a/src/endpoints/bulkEmbed.ts b/src/endpoints/bulkEmbed.ts
new file mode 100644
index 0000000..edd5b31
--- /dev/null
+++ b/src/endpoints/bulkEmbed.ts
@@ -0,0 +1,61 @@
+import type { PayloadHandler } from 'payload'
+import { BULK_EMBEDDINGS_RUNS_SLUG } from '../collections/bulkEmbeddingsRuns.js'
+import type { KnowledgePoolDynamicConfig, KnowledgePoolName } from '../types.js'
+
+export const createBulkEmbedHandler = (
+  knowledgePools: Record<KnowledgePoolName, KnowledgePoolDynamicConfig>,
+  queueName?: string,
+): PayloadHandler => {
+  const handler: PayloadHandler = async (req) => {
+    if (!req || !req.json) {
+      return Response.json({ error: 'Request is required' }, { status: 400 })
+    }
+    try {
+      const body = await req.json()
+      const knowledgePool = body?.knowledgePool as KnowledgePoolName
+      if (!knowledgePool) {
+        return Response.json(
+          { error: 'knowledgePool is required and must be a string' },
+          { status: 400 },
+        )
+      }
+      const poolConfig = knowledgePools[knowledgePool]
+      if (!poolConfig) {
+        return Response.json(
+          { error: `Knowledge pool "${knowledgePool}" not found` },
+          { status: 400 },
+        )
+      }
+
+      const payload = req.payload
+      const run = await payload.create({
+        collection: BULK_EMBEDDINGS_RUNS_SLUG,
+        data: {
+          pool: knowledgePool,
+          embeddingVersion: poolConfig.embeddingVersion,
+          status: 'queued',
+        },
+      })
+
+      await payload.jobs.queue<'payloadcms-vectorize:bulk-embed-all'>({
+        task: 'payloadcms-vectorize:bulk-embed-all',
+        input: {
+          runId: String(run.id),
+        },
+        req,
+        ...(queueName ? { queue: queueName } : {}),
+      })
+
+      return Response.json(
+        {
+          runId: String(run.id),
+          status: 'queued',
+        },
+        { status: 202 },
+      )
+    } catch (error) {
+      return Response.json({ error: 'Failed to queue bulk embed run' }, { status: 500 })
+    }
+  }
+  return handler
+}
diff --git a/src/exports/client.ts b/src/exports/client.ts
new file mode 100644
index 0000000..eaa8a1d
--- /dev/null
+++ b/src/exports/client.ts
@@ -0,0 +1 @@
+export { EmbedAllButton } from '../admin/components/EmbedAllButton.js'
diff --git a/src/exports/rsc.ts b/src/exports/rsc.ts
new file mode 100644
index 0000000..e9a98cc
--- /dev/null
+++ b/src/exports/rsc.ts
@@ -0,0 +1,2 @@
+// Placeholder RSC export; no server-specific components yet.
+export {}
diff --git a/src/index.ts b/src/index.ts
index 68a3b3f..15ec71d 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -14,6 +14,9 @@ import type { PostgresAdapterArgs } from '@payloadcms/db-postgres'
 import { createVectorizeTask } from './tasks/vectorize.js'
 import { createVectorSearchHandler } from './endpoints/vectorSearch.js'
 import { clearEmbeddingsTables, registerEmbeddingsTable } from './drizzle/tables.js'
+import { createBulkEmbeddingsRunsCollection, BULK_EMBEDDINGS_RUNS_SLUG } from './collections/bulkEmbeddingsRuns.js'
+import { createBulkEmbedAllTask } from './tasks/bulkEmbedAll.js'
+import { createBulkEmbedHandler } from './endpoints/bulkEmbed.js'
 
 export type * from './types.js'
 
@@ -119,6 +122,12 @@ export const createVectorizeIntegration = <TPoolNames extends KnowledgePoolName>
       // Ensure collections array exists
       config.collections = [...(config.collections || [])]
 
+      // Ensure bulk runs collection exists once
+      const bulkRunsCollection = createBulkEmbeddingsRunsCollection()
+      if (!config.collections.find((c) => c.slug === BULK_EMBEDDINGS_RUNS_SLUG)) {
+        config.collections.push(bulkRunsCollection)
+      }
+
       // Validate static/dynamic configs share the same pool names
       for (const poolName in pluginOptions.knowledgePools) {
         if (!staticConfigs[poolName]) {
@@ -182,6 +191,10 @@ export const createVectorizeIntegration = <TPoolNames extends KnowledgePoolName>
         knowledgePools: pluginOptions.knowledgePools,
       })
       tasks.push(vectorizeTask)
+      const bulkEmbedTask = createBulkEmbedAllTask({
+        knowledgePools: pluginOptions.knowledgePools,
+      })
+      tasks.push(bulkEmbedTask)
 
       config.jobs = {
         ...incomingJobs,
@@ -208,6 +221,20 @@ export const createVectorizeIntegration = <TPoolNames extends KnowledgePoolName>
                 const collectionConfig = dynamic.collections[collectionSlug]
                 if (!collectionConfig) continue
 
+                if ((dynamic.ingestMode || 'realtime') === 'bulk') {
+                  // In bulk mode, clear stale embeddings and let the bulk job recreate them
+                  await payload.delete({
+                    collection: pool,
+                    where: {
+                      and: [
+                        { sourceCollection: { equals: collectionSlug } },
+                        { docId: { equals: String(doc.id) } },
+                      ],
+                    },
+                  })
+                  continue
+                }
+
                 await payload.jobs.queue<'payloadcms-vectorize:vectorize'>({
                   task: 'payloadcms-vectorize:vectorize',
                   input: {
@@ -270,14 +297,20 @@ export const createVectorizeIntegration = <TPoolNames extends KnowledgePoolName>
       if (pluginOptions.endpointOverrides?.enabled !== false) {
         const path = pluginOptions.endpointOverrides?.path || '/vector-search'
         const inputEndpoints = config.endpoints || []
-        config.endpoints = [
+        const endpoints = [
           ...inputEndpoints,
           {
             path,
             method: 'post',
             handler: createVectorSearchHandler(pluginOptions.knowledgePools),
           },
+          {
+            path: '/vector-bulk-embed',
+            method: 'post',
+            handler: createBulkEmbedHandler(pluginOptions.knowledgePools, pluginOptions.queueName),
+          },
         ]
+        config.endpoints = endpoints
       }
 
       return config
diff --git a/src/tasks/bulkEmbedAll.ts b/src/tasks/bulkEmbedAll.ts
new file mode 100644
index 0000000..4a35cb2
--- /dev/null
+++ b/src/tasks/bulkEmbedAll.ts
@@ -0,0 +1,357 @@
+import { Payload, TaskConfig, TaskHandlerResult } from 'payload'
+import {
+  BulkEmbeddingInput,
+  BulkEmbeddingsCallbacks,
+  KnowledgePoolDynamicConfig,
+  KnowledgePoolName,
+} from '../types.js'
+import { BULK_EMBEDDINGS_RUNS_SLUG } from '../collections/bulkEmbeddingsRuns.js'
+import { isPostgresPayload, PostgresPayload } from '../types.js'
+
+type BulkEmbedAllTaskInput = {
+  runId: string
+}
+
+type BulkEmbedAllTaskOutput = {
+  runId: string
+  status: string
+}
+
+const TERMINAL_STATUSES = new Set(['succeeded', 'failed', 'canceled'])
+const fallbackInputsCache = new Map<string, BulkEmbeddingInput[]>()
+
+export function createFallbackBulkCallbacks(
+  dynamicConfig: KnowledgePoolDynamicConfig,
+): BulkEmbeddingsCallbacks {
+  return {
+    prepareBulkEmbeddings: async ({ inputs }) => {
+      const providerBatchId = `local-${Date.now()}-${Math.random().toString(16).slice(2)}`
+      fallbackInputsCache.set(providerBatchId, inputs)
+      return {
+        providerBatchId,
+        status: 'queued',
+        counts: { inputs: inputs.length },
+      }
+    },
+    pollBulkEmbeddings: async ({ providerBatchId }) => {
+      if (!fallbackInputsCache.has(providerBatchId)) {
+        return { status: 'failed', error: 'Unknown local batch' }
+      }
+      return { status: 'succeeded', counts: { inputs: fallbackInputsCache.get(providerBatchId)?.length } }
+    },
+    completeBulkEmbeddings: async ({ providerBatchId }) => {
+      const inputs = fallbackInputsCache.get(providerBatchId) || []
+      const embeddings = await dynamicConfig.embedDocs(inputs.map((i) => i.text))
+      const outputs = embeddings.map((vector, idx) => {
+        const input = inputs[idx]
+        return {
+          id: input?.id ?? String(idx),
+          embedding: Array.isArray(vector) ? vector : Array.from(vector),
+        }
+      })
+      fallbackInputsCache.delete(providerBatchId)
+      return {
+        status: 'succeeded',
+        outputs,
+        counts: { inputs: inputs.length, succeeded: outputs.length, failed: inputs.length - outputs.length },
+      }
+    },
+  }
+}
+
+export const createBulkEmbedAllTask = ({
+  knowledgePools,
+}: {
+  knowledgePools: Record<KnowledgePoolName, KnowledgePoolDynamicConfig>
+}): TaskConfig<BulkEmbedAllTaskInput> => {
+  const task: TaskConfig<BulkEmbedAllTaskInput> = {
+    slug: 'payloadcms-vectorize:bulk-embed-all',
+    handler: async ({ input, req }): Promise<TaskHandlerResult<BulkEmbedAllTaskOutput>> => {
+      if (!input?.runId) {
+        throw new Error('[payloadcms-vectorize] bulk embed runId is required')
+      }
+      const payload = req.payload
+      const run = await payload.findByID({
+        collection: BULK_EMBEDDINGS_RUNS_SLUG,
+        id: input.runId,
+      })
+      const poolName = (run as any)?.pool as KnowledgePoolName
+      if (!poolName) {
+        throw new Error(`[payloadcms-vectorize] bulk embed run ${input.runId} missing pool`)
+      }
+      const dynamicConfig = knowledgePools[poolName]
+      if (!dynamicConfig) {
+        throw new Error(
+          `[payloadcms-vectorize] knowledgePool "${poolName}" not found for bulk embed run ${input.runId}`,
+        )
+      }
+
+      const callbacks = dynamicConfig.bulkEmbeddings || createFallbackBulkCallbacks(dynamicConfig)
+      const embeddingVersion = dynamicConfig.embeddingVersion
+
+      const inputs = await collectMissingEmbeddings({
+        payload,
+        poolName,
+        dynamicConfig,
+        embeddingVersion,
+      })
+
+      const inputsCount = inputs.length
+      if (inputsCount === 0) {
+        await payload.update({
+          id: input.runId,
+          collection: BULK_EMBEDDINGS_RUNS_SLUG,
+          data: {
+            status: 'succeeded',
+            inputs: 0,
+            succeeded: 0,
+            failed: 0,
+            completedAt: new Date().toISOString(),
+          },
+        })
+        return { output: { runId: input.runId, status: 'succeeded' } }
+      }
+
+      const prepare = (await callbacks.prepareBulkEmbeddings({
+        payload,
+        knowledgePool: poolName,
+        embeddingVersion,
+        inputs,
+      })) || { providerBatchId: `local-${Date.now()}` }
+
+      const providerBatchId = prepare.providerBatchId
+      let status = prepare.status ?? 'running'
+      await payload.update({
+        id: input.runId,
+        collection: BULK_EMBEDDINGS_RUNS_SLUG,
+        data: {
+          providerBatchId,
+          inputFileRef: prepare.inputFileRef,
+          status,
+          inputs: prepare.counts?.inputs ?? inputsCount,
+          submittedAt: new Date().toISOString(),
+        },
+      })
+
+      // Poll until terminal
+      let pollResult: any = null
+      const maxPolls = 10
+      let polls = 0
+      while (!TERMINAL_STATUSES.has(status) && polls < maxPolls) {
+        pollResult = await callbacks.pollBulkEmbeddings({
+          payload,
+          knowledgePool: poolName,
+          providerBatchId,
+        })
+        status = pollResult.status
+        await payload.update({
+          id: input.runId,
+          collection: BULK_EMBEDDINGS_RUNS_SLUG,
+          data: {
+            status,
+            inputs: pollResult.counts?.inputs ?? inputsCount,
+            succeeded: pollResult.counts?.succeeded,
+            failed: pollResult.counts?.failed,
+            error: pollResult.error,
+          },
+        })
+        if (TERMINAL_STATUSES.has(status)) break
+        polls += 1
+        const delay = pollResult.nextPollMs ?? 1000
+        await new Promise((resolve) => setTimeout(resolve, delay))
+      }
+
+      if (status !== 'succeeded') {
+        await payload.update({
+          id: input.runId,
+          collection: BULK_EMBEDDINGS_RUNS_SLUG,
+          data: {
+            status,
+            error: pollResult?.error,
+            completedAt: new Date().toISOString(),
+          },
+        })
+        return { output: { runId: input.runId, status } }
+      }
+
+      const completion =
+        (await callbacks.completeBulkEmbeddings({
+          payload,
+          knowledgePool: poolName,
+          providerBatchId,
+        })) || { status, outputs: [] }
+
+      const outputs = completion.outputs || []
+      const inputsById = new Map(inputs.map((input) => [input.id, input]))
+      const successfulOutputs = outputs.filter((o) => !o.error && o.embedding)
+      const failedCount = completion.counts?.failed ?? inputsCount - successfulOutputs.length
+
+      // Remove existing embeddings for successful doc ids before writing new vectors
+      const docKeys = new Set<string>()
+      for (const output of successfulOutputs) {
+        const inputMeta = inputsById.get(output.id)?.metadata
+        if (!inputMeta) continue
+        docKeys.add(`${inputMeta.sourceCollection}:${inputMeta.docId}`)
+      }
+      for (const key of docKeys) {
+        const [sourceCollection, docId] = key.split(':')
+        await payload.delete({
+          collection: poolName,
+          where: {
+            and: [
+              { sourceCollection: { equals: sourceCollection } },
+              { docId: { equals: String(docId) } },
+            ],
+          },
+        })
+      }
+
+      for (const output of successfulOutputs) {
+        const input = inputsById.get(output.id)
+        if (!input || !output.embedding) continue
+
+        const embeddingArray = Array.isArray(output.embedding)
+          ? output.embedding
+          : Array.from(output.embedding)
+
+        const { chunkIndex, sourceCollection, docId, embeddingVersion: version, ...rest } =
+          input.metadata
+        const chunkText = input.text
+
+        const created = await payload.create({
+          collection: poolName,
+          data: {
+            sourceCollection,
+            docId: String(docId),
+            chunkIndex,
+            chunkText,
+            embeddingVersion: version,
+            ...rest,
+            embedding: embeddingArray,
+          } as any,
+        })
+        await persistVectorColumn({
+          payload,
+          poolName,
+          vector: embeddingArray,
+          id: String((created as any)?.id ?? ''),
+        })
+      }
+
+      await payload.update({
+        id: input.runId,
+        collection: BULK_EMBEDDINGS_RUNS_SLUG,
+        data: {
+          status: completion.status ?? 'succeeded',
+          inputs: completion.counts?.inputs ?? inputsCount,
+          succeeded: completion.counts?.succeeded ?? successfulOutputs.length,
+          failed: failedCount,
+          error: completion.error,
+          completedAt: new Date().toISOString(),
+        },
+      })
+
+      return {
+        output: {
+          runId: input.runId,
+          status: completion.status ?? 'succeeded',
+        },
+      }
+    },
+  }
+
+  return task
+}
+
+async function persistVectorColumn(args: {
+  payload: Payload
+  poolName: KnowledgePoolName
+  vector: number[] | Float32Array
+  id: string
+}) {
+  const { payload, poolName, vector, id } = args
+  if (!isPostgresPayload(payload)) {
+    throw new Error('[payloadcms-vectorize] Bulk embeddings require the Postgres adapter')
+  }
+  const postgresPayload = payload as PostgresPayload
+  const schemaName = postgresPayload.db.schemaName || 'public'
+  const literal = `[${Array.from(vector).join(',')}]`
+  const sql = `UPDATE "${schemaName}"."${poolName}" SET embedding = $1 WHERE id = $2`
+  const runSQL = async (statement: string, params?: any[]) => {
+    if (postgresPayload.db.pool?.query) return postgresPayload.db.pool.query(statement, params)
+    if (postgresPayload.db.drizzle?.execute) return postgresPayload.db.drizzle.execute(statement)
+    throw new Error('[payloadcms-vectorize] Failed to persist vector column')
+  }
+  try {
+    await runSQL(sql, [literal, id])
+  } catch (e) {
+    payload.logger.error('[payloadcms-vectorize] Failed to persist vector column', e as Error)
+    throw e
+  }
+}
+
+async function collectMissingEmbeddings(args: {
+  payload: Payload
+  poolName: KnowledgePoolName
+  dynamicConfig: KnowledgePoolDynamicConfig
+  embeddingVersion: string
+}): Promise<BulkEmbeddingInput[]> {
+  const { payload, poolName, dynamicConfig, embeddingVersion } = args
+  const inputs: BulkEmbeddingInput[] = []
+
+  for (const collectionSlug of Object.keys(dynamicConfig.collections)) {
+    const collectionConfig = dynamicConfig.collections[collectionSlug]
+    if (!collectionConfig) continue
+    const toKnowledgePool = collectionConfig.toKnowledgePool
+    let page = 1
+    const limit = 50
+
+    // Paginate through source collection docs
+    while (true) {
+      const res = await payload.find({
+        collection: collectionSlug,
+        page,
+        limit,
+      })
+      const docs = (res as any)?.docs || []
+      if (!docs.length) break
+      const totalPages = (res as any)?.totalPages ?? page
+
+      for (const doc of docs) {
+        const existing = await payload.find({
+          collection: poolName,
+          where: {
+            and: [
+              { sourceCollection: { equals: collectionSlug } },
+              { docId: { equals: String(doc.id) } },
+              { embeddingVersion: { equals: embeddingVersion } },
+            ],
+          },
+          limit: 1,
+        })
+        if (existing.totalDocs > 0) continue
+
+        const chunkData = await toKnowledgePool(doc, payload)
+        chunkData.forEach((chunkEntry, idx) => {
+          if (!chunkEntry?.chunk) return
+          const { chunk, ...extensionFields } = chunkEntry
+          inputs.push({
+            id: `${collectionSlug}:${doc.id}:${idx}`,
+            text: chunk,
+            metadata: {
+              sourceCollection: collectionSlug,
+              docId: String(doc.id),
+              chunkIndex: idx,
+              embeddingVersion,
+              ...extensionFields,
+            },
+          })
+        })
+      }
+      page += 1
+      if (page > totalPages) break
+    }
+  }
+
+  return inputs
+}
diff --git a/src/types.ts b/src/types.ts
index 5e2fff6..79a706d 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -13,6 +13,8 @@ export type CollectionVectorizeOption = {
   toKnowledgePool: ToKnowledgePoolFn
 }
 
+export type IngestMode = 'realtime' | 'bulk'
+
 /** Knowledge pool name identifier */
 export type KnowledgePoolName = string
 
@@ -38,6 +40,89 @@ export type KnowledgePoolDynamicConfig = {
   embeddingVersion: string
   /** Optional fields to extend the knowledge pool collection schema */
   extensionFields?: Field[]
+  /** Controls whether docs embed immediately or are staged for bulk runs */
+  ingestMode?: IngestMode
+  /** Provider-specific bulk embedding callbacks */
+  bulkEmbeddings?: BulkEmbeddingsCallbacks
+}
+
+export type BulkEmbeddingRunStatus = 'queued' | 'running' | 'succeeded' | 'failed' | 'canceled'
+
+export type BulkEmbeddingInput = {
+  /** Stable identifier for correlating outputs (should be unique per chunk) */
+  id: string
+  /** Raw text to embed */
+  text: string
+  metadata: {
+    sourceCollection: string
+    docId: string
+    chunkIndex: number
+    embeddingVersion: string
+    [key: string]: any
+  }
+}
+
+export type BulkEmbeddingOutput = {
+  id: string
+  embedding?: number[] | Float32Array
+  error?: string | null
+}
+
+export type BulkEmbeddingCounts = {
+  inputs?: number
+  succeeded?: number
+  failed?: number
+}
+
+export type PrepareBulkEmbeddingsArgs = {
+  payload: Payload
+  knowledgePool: KnowledgePoolName
+  embeddingVersion: string
+  inputs: BulkEmbeddingInput[]
+}
+
+export type PrepareBulkEmbeddingsResult = {
+  providerBatchId: string
+  inputFileRef?: string
+  status?: BulkEmbeddingRunStatus
+  counts?: BulkEmbeddingCounts
+}
+
+export type PollBulkEmbeddingsArgs = {
+  payload: Payload
+  knowledgePool: KnowledgePoolName
+  providerBatchId: string
+}
+
+export type PollBulkEmbeddingsResult = {
+  status: BulkEmbeddingRunStatus
+  counts?: BulkEmbeddingCounts
+  error?: string
+  /** Optional delay hint in ms before the next poll */
+  nextPollMs?: number
+}
+
+export type CompleteBulkEmbeddingsArgs = {
+  payload: Payload
+  knowledgePool: KnowledgePoolName
+  providerBatchId: string
+}
+
+export type CompleteBulkEmbeddingsResult = {
+  status: BulkEmbeddingRunStatus
+  outputs: BulkEmbeddingOutput[]
+  counts?: BulkEmbeddingCounts
+  error?: string
+}
+
+export type BulkEmbeddingsCallbacks = {
+  prepareBulkEmbeddings: (
+    args: PrepareBulkEmbeddingsArgs,
+  ) => Promise<PrepareBulkEmbeddingsResult | void>
+  pollBulkEmbeddings: (args: PollBulkEmbeddingsArgs) => Promise<PollBulkEmbeddingsResult>
+  completeBulkEmbeddings: (
+    args: CompleteBulkEmbeddingsArgs,
+  ) => Promise<CompleteBulkEmbeddingsResult | void>
 }
 
 export type PayloadcmsVectorizeConfig<TPoolNames extends KnowledgePoolName = KnowledgePoolName> = {

From f71a67402fb3b4ddb05ffc204f7e5281b8e14144 Mon Sep 17 00:00:00 2001
From: techiejd <62455039+techiejd@users.noreply.github.com>
Date: Tue, 16 Dec 2025 14:50:50 +0700
Subject: [PATCH 02/49] Better API

---
 README.md                   | 12 ++++++++----
 dev/helpers/embed.ts        |  4 ++--
 dev/payload.config.ts       |  6 ++++--
 dev/specs/bulkEmbed.spec.ts | 33 ++++++++++++++-------------------
 src/index.ts                |  7 +++++--
 src/tasks/bulkEmbedAll.ts   | 35 +++++++++++++++++++++++------------
 src/types.ts                | 10 +++++-----
 7 files changed, 61 insertions(+), 46 deletions(-)

diff --git a/README.md b/README.md
index ec36941..c891446 100644
--- a/README.md
+++ b/README.md
@@ -190,8 +190,12 @@ The embeddings collection name will be the same as the knowledge pool name.
 - `embedQuery`: `EmbedQueryFn` - Function to embed search queries
 - `embeddingVersion`: `string` - Version string for tracking model changes
 - `extensionFields?`: `Field[]` - Optional fields to extend the embeddings collection schema
-- `ingestMode?`: `'realtime' | 'bulk'` - Default `realtime` queues embeddings immediately. `bulk` skips realtime embedding, deletes stale vectors on updates, and relies on the bulk job to backfill.
-- `bulkEmbeddings?`: Provider-specific callbacks for batch embedding (`prepareBulkEmbeddings`, `pollBulkEmbeddings`, `completeBulkEmbeddings`). If omitted, the plugin falls back to using `embedDocs` in-process.
+- `bulkEmbeddings?`: Configuration for bulk embedding operations:
+  - `ingestMode?`: `'realtime' | 'bulk'` - Default `realtime` queues embeddings immediately. `bulk` skips realtime embedding, deletes stale vectors on updates, and relies on the bulk job to backfill.
+  - `prepareBulkEmbeddings`: Callback to prepare a bulk embedding batch
+  - `pollBulkEmbeddings`: Callback to poll the status of a bulk embedding batch
+  - `completeBulkEmbeddings`: Callback to retrieve completed embeddings from a batch
+    If `bulkEmbeddings` is omitted, the plugin falls back to using `embedDocs` in-process.
 
 #### CollectionVectorizeOption
 
@@ -218,7 +222,7 @@ Because you control the output, you can mix different field types, discard empty
 
 ## PostgreSQL Custom Schema Support
 
-The plugin reads the `schemaName` configuration from your Postgres adapter within the Payload config.  
+The plugin reads the `schemaName` configuration from your Postgres adapter within the Payload config.
 
 When you configure a custom schema via `postgresAdapter({ schemaName: 'custom' })`, all plugin SQL queries (for vector columns, indexes, and embeddings) are qualified with that schema name. This is useful for multi-tenant setups or when content tables live in a dedicated schema.
 
@@ -311,7 +315,7 @@ Search for similar content using vector similarity.
 
 ```jsonc
 {
-  "knowledgePool": "main"
+  "knowledgePool": "main",
 }
 ```
 
diff --git a/dev/helpers/embed.ts b/dev/helpers/embed.ts
index d70b87e..540e56c 100644
--- a/dev/helpers/embed.ts
+++ b/dev/helpers/embed.ts
@@ -1,6 +1,6 @@
 import { voyage } from 'voyage-ai-provider'
 import { embed, embedMany } from 'ai'
-import type { BulkEmbeddingsCallbacks } from 'payloadcms-vectorize'
+import type { BulkEmbeddingsConfig } from 'payloadcms-vectorize'
 
 export const voyageEmbedDocs = async (texts: string[]): Promise<number[][]> => {
   const embedResult = await embedMany({
@@ -56,7 +56,7 @@ export function makeDummyEmbedDocs(dims: number) {
 }
 export const testEmbeddingVersion = 'test-v1'
 
-export function makeLocalBulkEmbeddingsCallbacks(dims: number): BulkEmbeddingsCallbacks {
+export function makeLocalBulkEmbeddingsCallbacks(dims: number): BulkEmbeddingsConfig {
   const pendingInputs = new Map<string, Array<{ id: string; text: string }>>()
   const embedDocs = makeDummyEmbedDocs(dims)
   return {
diff --git a/dev/payload.config.ts b/dev/payload.config.ts
index 2b376ce..0350447 100644
--- a/dev/payload.config.ts
+++ b/dev/payload.config.ts
@@ -123,8 +123,10 @@ const buildConfigWithPostgres = async () => {
             embedDocs,
             embedQuery,
             embeddingVersion: testEmbeddingVersion,
-            ingestMode: 'realtime',
-            bulkEmbeddings: makeLocalBulkEmbeddingsCallbacks(dims),
+            bulkEmbeddings: {
+              ...makeLocalBulkEmbeddingsCallbacks(dims),
+              ingestMode: 'realtime',
+            },
           },
         },
       }),
diff --git a/dev/specs/bulkEmbed.spec.ts b/dev/specs/bulkEmbed.spec.ts
index 386d319..7f46bde 100644
--- a/dev/specs/bulkEmbed.spec.ts
+++ b/dev/specs/bulkEmbed.spec.ts
@@ -8,7 +8,12 @@ import { createVectorizeIntegration } from 'payloadcms-vectorize'
 import { BULK_EMBEDDINGS_RUNS_SLUG } from '../../src/collections/bulkEmbeddingsRuns.js'
 import { createBulkEmbedAllTask } from '../../src/tasks/bulkEmbedAll.js'
 import { createTestDb } from './utils.js'
-import { makeDummyEmbedDocs, makeDummyEmbedQuery, makeLocalBulkEmbeddingsCallbacks, testEmbeddingVersion } from 'helpers/embed.js'
+import {
+  makeDummyEmbedDocs,
+  makeDummyEmbedQuery,
+  makeLocalBulkEmbeddingsCallbacks,
+  testEmbeddingVersion,
+} from 'helpers/embed.js'
 
 const DIMS = 8
 
@@ -35,8 +40,10 @@ describe('Bulk embed ingest mode', () => {
         embedDocs: makeDummyEmbedDocs(DIMS),
         embedQuery: makeDummyEmbedQuery(DIMS),
         embeddingVersion: testEmbeddingVersion,
-        ingestMode: 'bulk' as const,
-        bulkEmbeddings: makeLocalBulkEmbeddingsCallbacks(DIMS),
+        bulkEmbeddings: {
+          ...makeLocalBulkEmbeddingsCallbacks(DIMS),
+          ingestMode: 'bulk' as const,
+        },
       },
     },
   }
@@ -75,10 +82,7 @@ describe('Bulk embed ingest mode', () => {
     const initialEmbeds = await payload.find({
       collection: 'default',
       where: {
-        and: [
-          { sourceCollection: { equals: 'posts' } },
-          { docId: { equals: String(post.id) } },
-        ],
+        and: [{ sourceCollection: { equals: 'posts' } }, { docId: { equals: String(post.id) } }],
       },
     })
     expect(initialEmbeds.totalDocs).toBe(0)
@@ -104,10 +108,7 @@ describe('Bulk embed ingest mode', () => {
     const embeds = await payload.find({
       collection: 'default',
       where: {
-        and: [
-          { sourceCollection: { equals: 'posts' } },
-          { docId: { equals: String(post.id) } },
-        ],
+        and: [{ sourceCollection: { equals: 'posts' } }, { docId: { equals: String(post.id) } }],
       },
     })
     expect(embeds.totalDocs).toBeGreaterThan(0)
@@ -154,10 +155,7 @@ describe('Bulk embed ingest mode', () => {
     const afterUpdateEmbeds = await payload.find({
       collection: 'default',
       where: {
-        and: [
-          { sourceCollection: { equals: 'posts' } },
-          { docId: { equals: String(post.id) } },
-        ],
+        and: [{ sourceCollection: { equals: 'posts' } }, { docId: { equals: String(post.id) } }],
       },
     })
     expect(afterUpdateEmbeds.totalDocs).toBe(0)
@@ -179,10 +177,7 @@ describe('Bulk embed ingest mode', () => {
     const embedsAfterRerun = await payload.find({
       collection: 'default',
       where: {
-        and: [
-          { sourceCollection: { equals: 'posts' } },
-          { docId: { equals: String(post.id) } },
-        ],
+        and: [{ sourceCollection: { equals: 'posts' } }, { docId: { equals: String(post.id) } }],
       },
     })
     expect(embedsAfterRerun.totalDocs).toBeGreaterThan(0)
diff --git a/src/index.ts b/src/index.ts
index 15ec71d..bcf6cc1 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -14,7 +14,10 @@ import type { PostgresAdapterArgs } from '@payloadcms/db-postgres'
 import { createVectorizeTask } from './tasks/vectorize.js'
 import { createVectorSearchHandler } from './endpoints/vectorSearch.js'
 import { clearEmbeddingsTables, registerEmbeddingsTable } from './drizzle/tables.js'
-import { createBulkEmbeddingsRunsCollection, BULK_EMBEDDINGS_RUNS_SLUG } from './collections/bulkEmbeddingsRuns.js'
+import {
+  createBulkEmbeddingsRunsCollection,
+  BULK_EMBEDDINGS_RUNS_SLUG,
+} from './collections/bulkEmbeddingsRuns.js'
 import { createBulkEmbedAllTask } from './tasks/bulkEmbedAll.js'
 import { createBulkEmbedHandler } from './endpoints/bulkEmbed.js'
 
@@ -221,7 +224,7 @@ export const createVectorizeIntegration = <TPoolNames extends KnowledgePoolName>
                 const collectionConfig = dynamic.collections[collectionSlug]
                 if (!collectionConfig) continue
 
-                if ((dynamic.ingestMode || 'realtime') === 'bulk') {
+                if ((dynamic.bulkEmbeddings?.ingestMode || 'realtime') === 'bulk') {
                   // In bulk mode, clear stale embeddings and let the bulk job recreate them
                   await payload.delete({
                     collection: pool,
diff --git a/src/tasks/bulkEmbedAll.ts b/src/tasks/bulkEmbedAll.ts
index 4a35cb2..0bc4cca 100644
--- a/src/tasks/bulkEmbedAll.ts
+++ b/src/tasks/bulkEmbedAll.ts
@@ -1,7 +1,7 @@
 import { Payload, TaskConfig, TaskHandlerResult } from 'payload'
 import {
   BulkEmbeddingInput,
-  BulkEmbeddingsCallbacks,
+  BulkEmbeddingsConfig,
   KnowledgePoolDynamicConfig,
   KnowledgePoolName,
 } from '../types.js'
@@ -22,7 +22,7 @@ const fallbackInputsCache = new Map<string, BulkEmbeddingInput[]>()
 
 export function createFallbackBulkCallbacks(
   dynamicConfig: KnowledgePoolDynamicConfig,
-): BulkEmbeddingsCallbacks {
+): BulkEmbeddingsConfig {
   return {
     prepareBulkEmbeddings: async ({ inputs }) => {
       const providerBatchId = `local-${Date.now()}-${Math.random().toString(16).slice(2)}`
@@ -37,7 +37,10 @@ export function createFallbackBulkCallbacks(
       if (!fallbackInputsCache.has(providerBatchId)) {
         return { status: 'failed', error: 'Unknown local batch' }
       }
-      return { status: 'succeeded', counts: { inputs: fallbackInputsCache.get(providerBatchId)?.length } }
+      return {
+        status: 'succeeded',
+        counts: { inputs: fallbackInputsCache.get(providerBatchId)?.length },
+      }
     },
     completeBulkEmbeddings: async ({ providerBatchId }) => {
       const inputs = fallbackInputsCache.get(providerBatchId) || []
@@ -53,7 +56,11 @@ export function createFallbackBulkCallbacks(
       return {
         status: 'succeeded',
         outputs,
-        counts: { inputs: inputs.length, succeeded: outputs.length, failed: inputs.length - outputs.length },
+        counts: {
+          inputs: inputs.length,
+          succeeded: outputs.length,
+          failed: inputs.length - outputs.length,
+        },
       }
     },
   }
@@ -174,12 +181,11 @@ export const createBulkEmbedAllTask = ({
         return { output: { runId: input.runId, status } }
       }
 
-      const completion =
-        (await callbacks.completeBulkEmbeddings({
-          payload,
-          knowledgePool: poolName,
-          providerBatchId,
-        })) || { status, outputs: [] }
+      const completion = (await callbacks.completeBulkEmbeddings({
+        payload,
+        knowledgePool: poolName,
+        providerBatchId,
+      })) || { status, outputs: [] }
 
       const outputs = completion.outputs || []
       const inputsById = new Map(inputs.map((input) => [input.id, input]))
@@ -214,8 +220,13 @@ export const createBulkEmbedAllTask = ({
           ? output.embedding
           : Array.from(output.embedding)
 
-        const { chunkIndex, sourceCollection, docId, embeddingVersion: version, ...rest } =
-          input.metadata
+        const {
+          chunkIndex,
+          sourceCollection,
+          docId,
+          embeddingVersion: version,
+          ...rest
+        } = input.metadata
         const chunkText = input.text
 
         const created = await payload.create({
diff --git a/src/types.ts b/src/types.ts
index 79a706d..d2147ac 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -40,10 +40,8 @@ export type KnowledgePoolDynamicConfig = {
   embeddingVersion: string
   /** Optional fields to extend the knowledge pool collection schema */
   extensionFields?: Field[]
-  /** Controls whether docs embed immediately or are staged for bulk runs */
-  ingestMode?: IngestMode
-  /** Provider-specific bulk embedding callbacks */
-  bulkEmbeddings?: BulkEmbeddingsCallbacks
+  /** User provided bulk embedding configuration */
+  bulkEmbeddings?: BulkEmbeddingsConfig
 }
 
 export type BulkEmbeddingRunStatus = 'queued' | 'running' | 'succeeded' | 'failed' | 'canceled'
@@ -115,7 +113,9 @@ export type CompleteBulkEmbeddingsResult = {
   error?: string
 }
 
-export type BulkEmbeddingsCallbacks = {
+export type BulkEmbeddingsConfig = {
+  /** Controls whether docs embed immediately or are staged for bulk runs */
+  ingestMode?: IngestMode
   prepareBulkEmbeddings: (
     args: PrepareBulkEmbeddingsArgs,
   ) => Promise<PrepareBulkEmbeddingsResult | void>

From 3dc508df209fd9562bf7448954df7e6d968968ca Mon Sep 17 00:00:00 2001
From: techiejd <62455039+techiejd@users.noreply.github.com>
Date: Tue, 16 Dec 2025 23:03:16 +0700
Subject: [PATCH 03/49] WIP

---
 CHANGELOG.md                            |  22 ++
 README.md                               |  53 +++-
 dev/helpers/embed.ts                    | 241 ++++++++++++++--
 dev/payload.config.ts                   |  19 +-
 dev/specs/bulkEmbed.spec.ts             | 363 ++++++++++++++++++++++--
 package.json                            |  12 +-
 src/admin/components/EmbedAllButton.tsx |  29 +-
 src/collections/embeddings.ts           |  16 ++
 src/endpoints/bulkEmbed.ts              |   4 +-
 src/exports/rsc.ts                      |   2 -
 src/index.ts                            |  43 ++-
 src/tasks/bulkEmbedAll.ts               | 255 ++++++++++-------
 src/types.ts                            |  11 +-
 13 files changed, 881 insertions(+), 189 deletions(-)
 delete mode 100644 src/exports/rsc.ts

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 7e51c31..081b342 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,28 @@
 
 All notable changes to this project will be documented in this file.
 
+## 0.5.0 - 2025-12-17
+
+### Breaking Changes
+
+- **`queueName` renamed to `realtimeQueueName`**: The plugin option `queueName` has been renamed to `realtimeQueueName` to clarify that it only affects realtime vectorization jobs.
+
+### New Features
+
+- **`bulkQueueName` option**: New plugin option to isolate bulk embedding workloads to a dedicated queue. Required when any knowledge pool uses bulk ingest mode (`bulkEmbeddings.ingestMode === 'bulk'`).
+- **Non-blocking bulk polling**: Bulk jobs now use separate, short-lived tasks that can safely handle long-running providers (hours/days) without blocking worker processes.
+- **Improved admin UX**: The "Embed all" button now:
+  - Disables when bulk embeddings are not configured for a pool
+  - Links to the latest bulk run for easy status tracking
+- **Enhanced bulk provider support**: Added real Voyage AI Batch API integration in dev environment, demonstrating production-ready bulk embedding with file uploads and async polling.
+
+### Tests & Reliability
+
+- Added comprehensive tests for realtime vs bulk ingest behavior
+- Added tests for bulk polling error conditions (`failed`, `canceled` statuses)
+- Added tests for bulk fan-in behavior (multiple documents processed in single run)
+- Improved test coverage for edge cases in bulk embedding workflow
+
 ## 0.4.1 - 2025-12-02
 
 ### Added
diff --git a/README.md b/README.md
index c891446..41c446d 100644
--- a/README.md
+++ b/README.md
@@ -165,12 +165,13 @@ const { results } = await response.json()
 
 ### Plugin Options
 
-| Option              | Type                                                | Required | Description                              |
-| ------------------- | --------------------------------------------------- | -------- | ---------------------------------------- |
-| `knowledgePools`    | `Record<KnowledgePool, KnowledgePoolDynamicConfig>` | ✅       | Knowledge pools and their configurations |
-| `queueName`         | `string`                                            | ❌       | Custom queue name for background jobs    |
-| `endpointOverrides` | `object`                                            | ❌       | Customize the search endpoint            |
-| `disabled`          | `boolean`                                           | ❌       | Disable plugin while keeping schema      |
+| Option              | Type                                                | Required | Description                                                                |
+| ------------------- | --------------------------------------------------- | -------- | -------------------------------------------------------------------------- |
+| `knowledgePools`    | `Record<KnowledgePool, KnowledgePoolDynamicConfig>` | ✅       | Knowledge pools and their configurations                                   |
+| `realtimeQueueName` | `string`                                            | ❌       | Custom queue name for realtime vectorization jobs                          |
+| `bulkQueueName`     | `string`                                            | ❌       | Queue name for bulk embedding jobs (required if any pool uses bulk ingest) |
+| `endpointOverrides` | `object`                                            | ❌       | Customize the search endpoint                                              |
+| `disabled`          | `boolean`                                           | ❌       | Disable plugin while keeping schema                                        |
 
 ### Knowledge Pool Config
 
@@ -191,11 +192,41 @@ The embeddings collection name will be the same as the knowledge pool name.
 - `embeddingVersion`: `string` - Version string for tracking model changes
 - `extensionFields?`: `Field[]` - Optional fields to extend the embeddings collection schema
 - `bulkEmbeddings?`: Configuration for bulk embedding operations:
-  - `ingestMode?`: `'realtime' | 'bulk'` - Default `realtime` queues embeddings immediately. `bulk` skips realtime embedding, deletes stale vectors on updates, and relies on the bulk job to backfill.
-  - `prepareBulkEmbeddings`: Callback to prepare a bulk embedding batch
-  - `pollBulkEmbeddings`: Callback to poll the status of a bulk embedding batch
-  - `completeBulkEmbeddings`: Callback to retrieve completed embeddings from a batch
-    If `bulkEmbeddings` is omitted, the plugin falls back to using `embedDocs` in-process.
+  - `ingestMode?`: `'realtime' | 'bulk'` - Default `'realtime'` queues embeddings immediately. `'bulk'` skips realtime embedding, deletes stale vectors on updates, and relies on the bulk job to backfill.
+  - `prepareBulkEmbeddings(args)`: Callback to prepare a bulk embedding batch
+  - `pollBulkEmbeddings(args)`: Callback to poll the status of a bulk embedding batch
+  - `completeBulkEmbeddings(args)`: Callback to retrieve completed embeddings from a batch
+    If `bulkEmbeddings` is omitted for a pool, the "Embed all" button is disabled and bulk is not available.
+
+### Bulk Task Model
+
+When bulk ingest mode is enabled, the plugin uses separate Payload jobs for reliability with long-running providers:
+
+- **`prepare-bulk-embedding`**: One-shot task that collects missing embeddings and submits them to the provider. Short-lived.
+- **`poll-or-complete-bulk-embedding`**: Polls the provider status and completes embedding ingestion when ready. Can requeue itself until completion.
+
+### Queue Configuration
+
+For production deployments with bulk embedding:
+
+```typescript
+// Recommended production setup
+plugins: [
+  payloadcmsVectorize({
+    knowledgePools: { /* ... */ },
+    realtimeQueueName: 'vectorize-realtime', // Separate realtime jobs (Optional)
+    bulkQueueName: 'vectorize-bulk',        // Isolate bulk workloads (Required if any knowledge pool uses bulk ingestion of any kind)
+  }),
+]
+
+// Configure Payload queues
+jobs: {
+  queues: {
+    'vectorize-realtime': { concurrency: 5 },
+    'vectorize-bulk': { concurrency: 2 },
+  },
+}
+```
 
 #### CollectionVectorizeOption
 
diff --git a/dev/helpers/embed.ts b/dev/helpers/embed.ts
index 540e56c..29fc53b 100644
--- a/dev/helpers/embed.ts
+++ b/dev/helpers/embed.ts
@@ -1,6 +1,11 @@
 import { voyage } from 'voyage-ai-provider'
 import { embed, embedMany } from 'ai'
-import type { BulkEmbeddingsConfig } from 'payloadcms-vectorize'
+import type {
+  BulkEmbeddingInput,
+  BulkEmbeddingOutput,
+  BulkEmbeddingRunStatus,
+  BulkEmbeddingsConfig,
+} from 'payloadcms-vectorize'
 
 export const voyageEmbedDocs = async (texts: string[]): Promise<number[][]> => {
   const embedResult = await embedMany({
@@ -56,36 +61,226 @@ export function makeDummyEmbedDocs(dims: number) {
 }
 export const testEmbeddingVersion = 'test-v1'
 
-export function makeLocalBulkEmbeddingsCallbacks(dims: number): BulkEmbeddingsConfig {
-  const pendingInputs = new Map<string, Array<{ id: string; text: string }>>()
-  const embedDocs = makeDummyEmbedDocs(dims)
+// Real Voyage Batch API implementation
+export function makeVoyageBulkEmbeddingsConfig(): BulkEmbeddingsConfig {
+  // Store batch state in memory for dev purposes
+  const batchState = new Map<
+    string,
+    {
+      inputs: BulkEmbeddingInput[]
+      batchId: string
+      outputFileId?: string
+    }
+  >()
+
   return {
+    ingestMode: 'bulk',
     prepareBulkEmbeddings: async ({ inputs }) => {
-      const providerBatchId = `local-${dims}-${Date.now()}`
-      pendingInputs.set(providerBatchId, inputs)
-      return {
-        providerBatchId,
-        status: 'queued',
-        counts: { inputs: inputs.length },
+      try {
+        // Create JSONL content for Voyage batch
+        const jsonlLines = inputs.map((input) => {
+          return JSON.stringify({
+            custom_id: input.id,
+            body: {
+              input: [input.text],
+              model: 'voyage-3.5-lite',
+              input_type: 'document',
+            },
+          })
+        })
+        const jsonlContent = jsonlLines.join('\n')
+
+        // Upload file to Voyage Files API using FormData
+        const formData = new FormData()
+        const blob = new Blob([jsonlContent], { type: 'application/jsonl' })
+        formData.append('file', blob, 'batch-input.jsonl')
+        formData.append('purpose', 'batch')
+
+        const uploadResponse = await fetch('https://api.voyageai.com/v1/files', {
+          method: 'POST',
+          headers: {
+            Authorization: `Bearer ${process.env.VOYAGE_API_KEY}`,
+          },
+          body: formData,
+        })
+
+        if (!uploadResponse.ok) {
+          const error = await uploadResponse.text()
+          throw new Error(`Voyage file upload failed: ${error}`)
+        }
+
+        const fileData = await uploadResponse.json()
+        const fileId = fileData.id
+
+        // Create batch
+        const batchResponse = await fetch('https://api.voyageai.com/v1/batches', {
+          method: 'POST',
+          headers: {
+            Authorization: `Bearer ${process.env.VOYAGE_API_KEY}`,
+            'Content-Type': 'application/json',
+          },
+          body: JSON.stringify({
+            input_file_id: fileId,
+            endpoint: '/v1/embeddings',
+            completion_window: '24h',
+          }),
+        })
+
+        if (!batchResponse.ok) {
+          const error = await batchResponse.text()
+          throw new Error(`Voyage batch creation failed: ${error}`)
+        }
+
+        const batchData = await batchResponse.json()
+        const batchId = batchData.id
+
+        // Store state for later retrieval
+        batchState.set(batchId, {
+          inputs,
+          batchId,
+        })
+
+        return {
+          providerBatchId: batchId,
+          status: batchData.status || 'queued',
+          counts: { inputs: inputs.length },
+        }
+      } catch (error) {
+        console.error('Voyage prepareBulkEmbeddings error:', error)
+        throw error
       }
     },
+
     pollBulkEmbeddings: async ({ providerBatchId }) => {
-      if (!pendingInputs.has(providerBatchId)) {
-        return { status: 'failed', error: 'unknown batch' }
+      try {
+        const response = await fetch(`https://api.voyageai.com/v1/batches/${providerBatchId}`, {
+          headers: {
+            Authorization: `Bearer ${process.env.VOYAGE_API_KEY}`,
+          },
+        })
+
+        if (!response.ok) {
+          const error = await response.text()
+          return { status: 'failed', error: `Voyage API error: ${error}` }
+        }
+
+        const batchData = await response.json()
+
+        // Map Voyage status to our status
+        let status: BulkEmbeddingRunStatus
+        switch (batchData.status) {
+          case 'queued':
+          case 'validating':
+            status = 'queued'
+            break
+          case 'running':
+          case 'finalizing':
+            status = 'running'
+            break
+          case 'completed':
+            status = 'succeeded'
+            break
+          case 'failed':
+          case 'cancelled':
+          case 'expired':
+            status = batchData.status === 'cancelled' ? 'canceled' : 'failed'
+            break
+          default:
+            status = 'running'
+        }
+
+        // Store output file ID if available
+        if (batchData.output_file_id) {
+          const state = batchState.get(providerBatchId)
+          if (state) {
+            state.outputFileId = batchData.output_file_id
+          }
+        }
+
+        return {
+          status,
+          counts: batchData.request_counts
+            ? {
+                inputs: batchData.request_counts.total || 0,
+                succeeded: batchData.request_counts.completed || 0,
+                failed: batchData.request_counts.failed || 0,
+              }
+            : undefined,
+          nextPollMs: status === 'running' || status === 'queued' ? 10000 : undefined, // Poll every 10s if not terminal
+        }
+      } catch (error) {
+        console.error('Voyage pollBulkEmbeddings error:', error)
+        return { status: 'failed', error: 'Failed to poll batch status' }
       }
-      return { status: 'succeeded' }
     },
+
     completeBulkEmbeddings: async ({ providerBatchId }) => {
-      const inputs = pendingInputs.get(providerBatchId) || []
-      const embeddings = await embedDocs(inputs.map((i) => i.text))
-      pendingInputs.delete(providerBatchId)
-      return {
-        status: 'succeeded',
-        outputs: embeddings.map((vector, idx) => ({
-          id: inputs[idx]?.id ?? String(idx),
-          embedding: vector,
-        })),
-        counts: { inputs: inputs.length, succeeded: embeddings.length, failed: 0 },
+      try {
+        const state = batchState.get(providerBatchId)
+        if (!state?.outputFileId) {
+          throw new Error('No output file available for batch')
+        }
+
+        // Download output file
+        const response = await fetch(
+          `https://api.voyageai.com/v1/files/${state.outputFileId}/content`,
+          {
+            headers: {
+              Authorization: `Bearer ${process.env.VOYAGE_API_KEY}`,
+            },
+          },
+        )
+
+        if (!response.ok) {
+          const error = await response.text()
+          throw new Error(`Failed to download output file: ${error}`)
+        }
+
+        const jsonlContent = await response.text()
+        const lines = jsonlContent.trim().split('\n')
+
+        const outputs: BulkEmbeddingOutput[] = []
+        let succeeded = 0
+        let failed = 0
+
+        for (const line of lines) {
+          if (!line.trim()) continue
+          try {
+            const result = JSON.parse(line)
+            if (result.error) {
+              outputs.push({
+                id: result.custom_id,
+                error: result.error.message || 'Unknown error',
+              })
+              failed++
+            } else {
+              outputs.push({
+                id: result.custom_id,
+                embedding: result.response.body.data[0].embedding,
+              })
+              succeeded++
+            }
+          } catch (parseError) {
+            console.error('Failed to parse output line:', line, parseError)
+            failed++
+          }
+        }
+
+        // Clean up state
+        batchState.delete(providerBatchId)
+
+        return {
+          status: 'succeeded',
+          outputs,
+          counts: {
+            inputs: state.inputs.length,
+            succeeded,
+            failed,
+          },
+        }
+      } catch (error) {
+        console.error('Voyage completeBulkEmbeddings error:', error)
+        throw error
       }
     },
   }
diff --git a/dev/payload.config.ts b/dev/payload.config.ts
index 0350447..056681c 100644
--- a/dev/payload.config.ts
+++ b/dev/payload.config.ts
@@ -9,7 +9,7 @@ import {
   voyageEmbedDocs,
   voyageEmbedQuery,
   makeDummyEmbedQuery,
-  makeLocalBulkEmbeddingsCallbacks,
+  makeVoyageBulkEmbeddingsConfig,
 } from './helpers/embed.js'
 import sharp from 'sharp'
 import { fileURLToPath } from 'url'
@@ -79,12 +79,22 @@ const buildConfigWithPostgres = async () => {
     email: testEmailAdapter,
     jobs: {
       tasks: [],
+      queues: {
+        'vectorize-bulk': {
+          concurrency: 2,
+        },
+      },
       autoRun: [
         {
           cron: '*/5 * * * * *', // Run every 5 seconds in development
           limit: 10,
           queue: 'default',
         },
+        {
+          cron: '*/10 * * * * *', // Run every 10 seconds for bulk jobs
+          limit: 5,
+          queue: 'vectorize-bulk',
+        },
       ],
       jobsCollectionOverrides: ({ defaultJobsCollection }) => {
         // Make jobs collection visible in admin for debugging
@@ -123,12 +133,11 @@ const buildConfigWithPostgres = async () => {
             embedDocs,
             embedQuery,
             embeddingVersion: testEmbeddingVersion,
-            bulkEmbeddings: {
-              ...makeLocalBulkEmbeddingsCallbacks(dims),
-              ingestMode: 'realtime',
-            },
+            bulkEmbeddings: makeVoyageBulkEmbeddingsConfig(),
           },
         },
+        realtimeQueueName: 'vectorize-realtime',
+        bulkQueueName: 'vectorize-bulk',
       }),
     ],
     secret: process.env.PAYLOAD_SECRET || 'test-secret_key',
diff --git a/dev/specs/bulkEmbed.spec.ts b/dev/specs/bulkEmbed.spec.ts
index 7f46bde..868dd7b 100644
--- a/dev/specs/bulkEmbed.spec.ts
+++ b/dev/specs/bulkEmbed.spec.ts
@@ -6,17 +6,49 @@ import { postgresAdapter } from '@payloadcms/db-postgres'
 import { lexicalEditor } from '@payloadcms/richtext-lexical'
 import { createVectorizeIntegration } from 'payloadcms-vectorize'
 import { BULK_EMBEDDINGS_RUNS_SLUG } from '../../src/collections/bulkEmbeddingsRuns.js'
-import { createBulkEmbedAllTask } from '../../src/tasks/bulkEmbedAll.js'
-import { createTestDb } from './utils.js'
 import {
-  makeDummyEmbedDocs,
-  makeDummyEmbedQuery,
-  makeLocalBulkEmbeddingsCallbacks,
-  testEmbeddingVersion,
-} from 'helpers/embed.js'
+  createPrepareBulkEmbeddingTask,
+  createPollOrCompleteBulkEmbeddingTask,
+} from '../../src/tasks/bulkEmbedAll.js'
+import { createTestDb } from './utils.js'
+import { makeDummyEmbedDocs, makeDummyEmbedQuery, testEmbeddingVersion } from 'helpers/embed.js'
+import type { BulkEmbeddingsConfig, BulkEmbeddingRunStatus } from '../../src/types.js'
 
 const DIMS = 8
 
+// Mock bulk embeddings configs for testing
+function createMockBulkEmbeddings(statusSequence: BulkEmbeddingRunStatus[]): BulkEmbeddingsConfig {
+  let callCount = 0
+  const embeddings = makeDummyEmbedDocs(DIMS)
+
+  return {
+    ingestMode: 'bulk',
+    prepareBulkEmbeddings: async ({ inputs }) => {
+      return {
+        providerBatchId: `mock-${Date.now()}`,
+        status: 'queued',
+        counts: { inputs: inputs.length },
+      }
+    },
+    pollBulkEmbeddings: async () => {
+      const status = statusSequence[Math.min(callCount++, statusSequence.length - 1)]
+      return {
+        status,
+        counts: status === 'succeeded' ? { inputs: 1, succeeded: 1, failed: 0 } : undefined,
+      }
+    },
+    completeBulkEmbeddings: async ({ providerBatchId }) => {
+      const inputs = [{ id: 'test-1', text: 'test text', metadata: {} }]
+      const vectors = await embeddings([inputs[0].text])
+      return {
+        status: 'succeeded',
+        outputs: [{ id: inputs[0].id, embedding: vectors[0] }],
+        counts: { inputs: 1, succeeded: 1, failed: 0 },
+      }
+    },
+  }
+}
+
 describe('Bulk embed ingest mode', () => {
   let payload: Payload
   let config: SanitizedConfig
@@ -40,12 +72,10 @@ describe('Bulk embed ingest mode', () => {
         embedDocs: makeDummyEmbedDocs(DIMS),
         embedQuery: makeDummyEmbedQuery(DIMS),
         embeddingVersion: testEmbeddingVersion,
-        bulkEmbeddings: {
-          ...makeLocalBulkEmbeddingsCallbacks(DIMS),
-          ingestMode: 'bulk' as const,
-        },
+        bulkEmbeddings: createMockBulkEmbeddings(['succeeded']),
       },
     },
+    bulkQueueName: 'vectorize-bulk',
   }
 
   beforeAll(async () => {
@@ -73,7 +103,7 @@ describe('Bulk embed ingest mode', () => {
     payload = await getPayload({ config })
   })
 
-  test('queues no realtime embeddings and bulk job backfills missing docs', async () => {
+  test('bulk ingest mode queues no realtime embeddings and bulk job backfills missing docs', async () => {
     const post = await payload.create({
       collection: 'posts',
       data: { title: 'Bulk Mode Title' } as any,
@@ -96,11 +126,22 @@ describe('Bulk embed ingest mode', () => {
       },
     })
 
-    const bulkTask = createBulkEmbedAllTask({
+    // Run prepare task
+    const prepareTask = createPrepareBulkEmbeddingTask({
       knowledgePools: pluginOptions.knowledgePools,
+      bulkQueueName: pluginOptions.bulkQueueName,
+    })
+    await prepareTask.handler({
+      input: { runId: String(run.id) },
+      req: { payload } as any,
     })
 
-    await bulkTask.handler({
+    // Run poll/complete task
+    const pollTask = createPollOrCompleteBulkEmbeddingTask({
+      knowledgePools: pluginOptions.knowledgePools,
+      bulkQueueName: pluginOptions.bulkQueueName,
+    })
+    await pollTask.handler({
       input: { runId: String(run.id) },
       req: { payload } as any,
     })
@@ -122,7 +163,7 @@ describe('Bulk embed ingest mode', () => {
     expect(runDoc.inputs).toBeGreaterThan(0)
   })
 
-  test('document updates clear stale embeddings and rerun populates new chunks', async () => {
+  test('bulk ingest mode clears stale embeddings on document updates and rerun populates new chunks', async () => {
     const post = await payload.create({
       collection: 'posts',
       data: { title: 'Original' } as any,
@@ -137,14 +178,19 @@ describe('Bulk embed ingest mode', () => {
         status: 'queued',
       },
     })
-    const bulkTask = createBulkEmbedAllTask({
+
+    const prepareTask = createPrepareBulkEmbeddingTask({
       knowledgePools: pluginOptions.knowledgePools,
+      bulkQueueName: pluginOptions.bulkQueueName,
     })
-    await bulkTask.handler({
-      input: { runId: String(firstRun.id) },
-      req: { payload } as any,
+    const pollTask = createPollOrCompleteBulkEmbeddingTask({
+      knowledgePools: pluginOptions.knowledgePools,
+      bulkQueueName: pluginOptions.bulkQueueName,
     })
 
+    await prepareTask.handler({ input: { runId: String(firstRun.id) }, req: { payload } as any })
+    await pollTask.handler({ input: { runId: String(firstRun.id) }, req: { payload } as any })
+
     // Update document - should delete embeddings in bulk mode
     await payload.update({
       collection: 'posts',
@@ -169,10 +215,8 @@ describe('Bulk embed ingest mode', () => {
         status: 'queued',
       },
     })
-    await bulkTask.handler({
-      input: { runId: String(secondRun.id) },
-      req: { payload } as any,
-    })
+    await prepareTask.handler({ input: { runId: String(secondRun.id) }, req: { payload } as any })
+    await pollTask.handler({ input: { runId: String(secondRun.id) }, req: { payload } as any })
 
     const embedsAfterRerun = await payload.find({
       collection: 'default',
@@ -183,4 +227,277 @@ describe('Bulk embed ingest mode', () => {
     expect(embedsAfterRerun.totalDocs).toBeGreaterThan(0)
     expect(embedsAfterRerun.docs[0]?.chunkText).toContain('Updated Title')
   })
+
+  test('realtime ingest mode queues vectorize jobs on document creation', async () => {
+    const realtimePluginOptions = {
+      knowledgePools: {
+        default: {
+          collections: {
+            posts: {
+              toKnowledgePool: async (doc: any) => [{ chunk: doc.title }],
+            },
+          },
+          embedDocs: makeDummyEmbedDocs(DIMS),
+          embedQuery: makeDummyEmbedQuery(DIMS),
+          embeddingVersion: testEmbeddingVersion,
+          // No bulkEmbeddings - should default to realtime
+        },
+      },
+    }
+
+    const realtimeConfig = await buildConfig({
+      secret: 'test-secret',
+      editor: lexicalEditor(),
+      collections: [
+        {
+          slug: 'posts',
+          fields: [{ name: 'title', type: 'text' }],
+        },
+      ],
+      db: postgresAdapter({
+        extensions: ['vector'],
+        afterSchemaInit: [integration.afterSchemaInitHook],
+        pool: {
+          connectionString: `postgresql://postgres:password@localhost:5433/${dbName}`,
+        },
+      }),
+      plugins: [integration.payloadcmsVectorize(realtimePluginOptions)],
+      jobs: { tasks: [] },
+    })
+
+    const realtimePayload = await getPayload({ config: realtimeConfig })
+
+    // Create a document - should trigger realtime vectorization
+    const post = await realtimePayload.create({
+      collection: 'posts',
+      data: { title: 'Realtime Test' } as any,
+    })
+
+    // Check that embeddings were created immediately
+    const embeds = await realtimePayload.find({
+      collection: 'default',
+      where: {
+        and: [{ sourceCollection: { equals: 'posts' } }, { docId: { equals: String(post.id) } }],
+      },
+    })
+    expect(embeds.totalDocs).toBeGreaterThan(0)
+    expect(embeds.docs[0]?.chunkText).toBe('Realtime Test')
+  })
+
+  test('bulk polling handles failed status correctly', async () => {
+    const failedBulkOptions = {
+      knowledgePools: {
+        default: {
+          collections: {
+            posts: {
+              toKnowledgePool: async (doc: any) => [{ chunk: doc.title }],
+            },
+          },
+          embedDocs: makeDummyEmbedDocs(DIMS),
+          embedQuery: makeDummyEmbedQuery(DIMS),
+          embeddingVersion: testEmbeddingVersion,
+          bulkEmbeddings: createMockBulkEmbeddings(['failed']),
+        },
+      },
+      bulkQueueName: 'vectorize-bulk',
+    }
+
+    const failedConfig = await buildConfig({
+      secret: 'test-secret',
+      editor: lexicalEditor(),
+      collections: [
+        {
+          slug: 'posts',
+          fields: [{ name: 'title', type: 'text' }],
+        },
+      ],
+      db: postgresAdapter({
+        extensions: ['vector'],
+        afterSchemaInit: [integration.afterSchemaInitHook],
+        pool: {
+          connectionString: `postgresql://postgres:password@localhost:5433/${dbName}`,
+        },
+      }),
+      plugins: [integration.payloadcmsVectorize(failedBulkOptions)],
+      jobs: { tasks: [] },
+    })
+
+    const failedPayload = await getPayload({ config: failedConfig })
+
+    const post = await failedPayload.create({
+      collection: 'posts',
+      data: { title: 'Failed Test' } as any,
+    })
+
+    const run = await failedPayload.create({
+      collection: BULK_EMBEDDINGS_RUNS_SLUG,
+      data: {
+        pool: 'default',
+        embeddingVersion: testEmbeddingVersion,
+        status: 'queued',
+      },
+    })
+
+    const prepareTask = createPrepareBulkEmbeddingTask({
+      knowledgePools: failedBulkOptions.knowledgePools,
+      bulkQueueName: failedBulkOptions.bulkQueueName,
+    })
+    const pollTask = createPollOrCompleteBulkEmbeddingTask({
+      knowledgePools: failedBulkOptions.knowledgePools,
+      bulkQueueName: failedBulkOptions.bulkQueueName,
+    })
+
+    await prepareTask.handler({
+      input: { runId: String(run.id) },
+      req: { payload: failedPayload } as any,
+    })
+    await pollTask.handler({
+      input: { runId: String(run.id) },
+      req: { payload: failedPayload } as any,
+    })
+
+    const runDoc = await failedPayload.findByID({
+      collection: BULK_EMBEDDINGS_RUNS_SLUG,
+      id: run.id,
+    })
+    expect(runDoc.status).toBe('failed')
+    // Should not call completeBulkEmbeddings, so no embeddings created
+    const embeds = await failedPayload.find({
+      collection: 'default',
+      where: {
+        and: [{ sourceCollection: { equals: 'posts' } }, { docId: { equals: String(post.id) } }],
+      },
+    })
+    expect(embeds.totalDocs).toBe(0)
+  })
+
+  test('bulk polling handles canceled status correctly', async () => {
+    const canceledBulkOptions = {
+      knowledgePools: {
+        default: {
+          collections: {
+            posts: {
+              toKnowledgePool: async (doc: any) => [{ chunk: doc.title }],
+            },
+          },
+          embedDocs: makeDummyEmbedDocs(DIMS),
+          embedQuery: makeDummyEmbedQuery(DIMS),
+          embeddingVersion: testEmbeddingVersion,
+          bulkEmbeddings: createMockBulkEmbeddings(['canceled']),
+        },
+      },
+      bulkQueueName: 'vectorize-bulk',
+    }
+
+    const canceledConfig = await buildConfig({
+      secret: 'test-secret',
+      editor: lexicalEditor(),
+      collections: [
+        {
+          slug: 'posts',
+          fields: [{ name: 'title', type: 'text' }],
+        },
+      ],
+      db: postgresAdapter({
+        extensions: ['vector'],
+        afterSchemaInit: [integration.afterSchemaInitHook],
+        pool: {
+          connectionString: `postgresql://postgres:password@localhost:5433/${dbName}`,
+        },
+      }),
+      plugins: [integration.payloadcmsVectorize(canceledBulkOptions)],
+      jobs: { tasks: [] },
+    })
+
+    const canceledPayload = await getPayload({ config: canceledConfig })
+
+    const run = await canceledPayload.create({
+      collection: BULK_EMBEDDINGS_RUNS_SLUG,
+      data: {
+        pool: 'default',
+        embeddingVersion: testEmbeddingVersion,
+        status: 'queued',
+      },
+    })
+
+    const prepareTask = createPrepareBulkEmbeddingTask({
+      knowledgePools: canceledBulkOptions.knowledgePools,
+      bulkQueueName: canceledBulkOptions.bulkQueueName,
+    })
+    const pollTask = createPollOrCompleteBulkEmbeddingTask({
+      knowledgePools: canceledBulkOptions.knowledgePools,
+      bulkQueueName: canceledBulkOptions.bulkQueueName,
+    })
+
+    await prepareTask.handler({
+      input: { runId: String(run.id) },
+      req: { payload: canceledPayload } as any,
+    })
+    await pollTask.handler({
+      input: { runId: String(run.id) },
+      req: { payload: canceledPayload } as any,
+    })
+
+    const runDoc = await canceledPayload.findByID({
+      collection: BULK_EMBEDDINGS_RUNS_SLUG,
+      id: run.id,
+    })
+    expect(runDoc.status).toBe('canceled')
+  })
+
+  test('bulk fan-in: multiple documents created before bulk task runs are all processed in single run', async () => {
+    // Create multiple documents
+    const post1 = await payload.create({
+      collection: 'posts',
+      data: { title: 'Post 1' } as any,
+    })
+    const post2 = await payload.create({
+      collection: 'posts',
+      data: { title: 'Post 2' } as any,
+    })
+
+    // Verify no embeddings initially
+    const initialEmbeds = await payload.find({
+      collection: 'default',
+      where: { sourceCollection: { equals: 'posts' } },
+    })
+    expect(initialEmbeds.totalDocs).toBe(0)
+
+    // Create single bulk run
+    const run = await payload.create({
+      collection: BULK_EMBEDDINGS_RUNS_SLUG,
+      data: {
+        pool: 'default',
+        embeddingVersion: testEmbeddingVersion,
+        status: 'queued',
+      },
+    })
+
+    // Run bulk tasks
+    const prepareTask = createPrepareBulkEmbeddingTask({
+      knowledgePools: pluginOptions.knowledgePools,
+      bulkQueueName: pluginOptions.bulkQueueName,
+    })
+    const pollTask = createPollOrCompleteBulkEmbeddingTask({
+      knowledgePools: pluginOptions.knowledgePools,
+      bulkQueueName: pluginOptions.bulkQueueName,
+    })
+
+    await prepareTask.handler({ input: { runId: String(run.id) }, req: { payload } as any })
+    await pollTask.handler({ input: { runId: String(run.id) }, req: { payload } as any })
+
+    // Verify all documents got embeddings
+    const finalEmbeds = await payload.find({
+      collection: 'default',
+      where: { sourceCollection: { equals: 'posts' } },
+    })
+    expect(finalEmbeds.totalDocs).toBe(2)
+
+    const runDoc = await payload.findByID({
+      collection: BULK_EMBEDDINGS_RUNS_SLUG,
+      id: run.id,
+    })
+    expect(runDoc.status).toBe('succeeded')
+    expect(runDoc.inputs).toBe(2)
+  })
 })
diff --git a/package.json b/package.json
index fadeddf..06d4ef9 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "payloadcms-vectorize",
-  "version": "0.4.1",
+  "version": "0.5.0",
   "description": "A plugin to vectorize collections for RAG in Payload 3.0",
   "license": "MIT",
   "type": "module",
@@ -14,11 +14,6 @@
       "import": "./src/exports/client.ts",
       "types": "./src/exports/client.ts",
       "default": "./src/exports/client.ts"
-    },
-    "./rsc": {
-      "import": "./src/exports/rsc.ts",
-      "types": "./src/exports/rsc.ts",
-      "default": "./src/exports/rsc.ts"
     }
   },
   "main": "./src/index.ts",
@@ -111,11 +106,6 @@
         "import": "./dist/exports/client.js",
         "types": "./dist/exports/client.d.ts",
         "default": "./dist/exports/client.js"
-      },
-      "./rsc": {
-        "import": "./dist/exports/rsc.js",
-        "types": "./dist/exports/rsc.d.ts",
-        "default": "./dist/exports/rsc.js"
       }
     },
     "main": "./dist/index.js",
diff --git a/src/admin/components/EmbedAllButton.tsx b/src/admin/components/EmbedAllButton.tsx
index 666f2de..6ca02a7 100644
--- a/src/admin/components/EmbedAllButton.tsx
+++ b/src/admin/components/EmbedAllButton.tsx
@@ -2,13 +2,22 @@
 
 import React, { useState } from 'react'
 
-type EmbedAllButtonProps = {
+type EmbedAllButtonServerProps = {
+  hasBulkEmbeddings: boolean
+}
+
+type EmbedAllButtonClientProps = {
   collectionSlug: string
   hasCreatePermission?: boolean
   newDocumentURL?: string
 }
 
-export const EmbedAllButton: React.FC<EmbedAllButtonProps> = ({ collectionSlug }) => {
+type EmbedAllButtonProps = EmbedAllButtonServerProps & EmbedAllButtonClientProps
+
+export const EmbedAllButton: React.FC<EmbedAllButtonProps> = ({
+  collectionSlug,
+  hasBulkEmbeddings,
+}) => {
   const [isSubmitting, setIsSubmitting] = useState(false)
   const [message, setMessage] = useState<string | null>(null)
 
@@ -36,6 +45,22 @@ export const EmbedAllButton: React.FC<EmbedAllButtonProps> = ({ collectionSlug }
     }
   }
 
+  if (!hasBulkEmbeddings) {
+    return (
+      <div style={{ marginBottom: '1rem', display: 'flex', gap: '0.75rem', alignItems: 'center' }}>
+        <button
+          type="button"
+          className="btn btn--style-secondary"
+          disabled
+          title="Bulk embedding not implemented for this pool"
+        >
+          Embed all
+        </button>
+        <span style={{ fontSize: '0.9rem', color: '#666' }}>Bulk embedding not configured</span>
+      </div>
+    )
+  }
+
   return (
     <div style={{ marginBottom: '1rem', display: 'flex', gap: '0.75rem', alignItems: 'center' }}>
       <button
diff --git a/src/collections/embeddings.ts b/src/collections/embeddings.ts
index 5637b50..13cba94 100644
--- a/src/collections/embeddings.ts
+++ b/src/collections/embeddings.ts
@@ -30,6 +30,22 @@ export const createEmbeddingsCollection = (
           {
             path: 'payloadcms-vectorize/client#EmbedAllButton',
             exportName: 'EmbedAllButton',
+            serverProps: {
+              hasBulkEmbeddings: ({ payload, params }: { payload: any; params: any }) => {
+                // Get the knowledge pool name from the collection slug
+                const poolName = params?.slug as string
+                if (!poolName) return false
+
+                // Access plugin options from payload config
+                const pluginOptions = payload.config.plugins?.find(
+                  (p: any) => p.payloadcmsVectorize,
+                )?.payloadcmsVectorize
+
+                if (!pluginOptions?.knowledgePools?.[poolName]) return false
+
+                return !!pluginOptions.knowledgePools[poolName].bulkEmbeddings
+              },
+            },
           },
         ],
       },
diff --git a/src/endpoints/bulkEmbed.ts b/src/endpoints/bulkEmbed.ts
index edd5b31..d04b7d1 100644
--- a/src/endpoints/bulkEmbed.ts
+++ b/src/endpoints/bulkEmbed.ts
@@ -37,8 +37,8 @@ export const createBulkEmbedHandler = (
         },
       })
 
-      await payload.jobs.queue<'payloadcms-vectorize:bulk-embed-all'>({
-        task: 'payloadcms-vectorize:bulk-embed-all',
+      await payload.jobs.queue<'payloadcms-vectorize:prepare-bulk-embedding'>({
+        task: 'payloadcms-vectorize:prepare-bulk-embedding',
         input: {
           runId: String(run.id),
         },
diff --git a/src/exports/rsc.ts b/src/exports/rsc.ts
deleted file mode 100644
index e9a98cc..0000000
--- a/src/exports/rsc.ts
+++ /dev/null
@@ -1,2 +0,0 @@
-// Placeholder RSC export; no server-specific components yet.
-export {}
diff --git a/src/index.ts b/src/index.ts
index bcf6cc1..f7509d0 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -18,7 +18,10 @@ import {
   createBulkEmbeddingsRunsCollection,
   BULK_EMBEDDINGS_RUNS_SLUG,
 } from './collections/bulkEmbeddingsRuns.js'
-import { createBulkEmbedAllTask } from './tasks/bulkEmbedAll.js'
+import {
+  createPrepareBulkEmbeddingTask,
+  createPollOrCompleteBulkEmbeddingTask,
+} from './tasks/bulkEmbedAll.js'
 import { createBulkEmbedHandler } from './endpoints/bulkEmbed.js'
 
 export type * from './types.js'
@@ -183,6 +186,21 @@ export const createVectorizeIntegration = <TPoolNames extends KnowledgePoolName>
         }
       }
 
+      // Validate bulk queue requirements
+      let bulkIngestEnabled = false
+      for (const poolName in pluginOptions.knowledgePools) {
+        const dynamicConfig = pluginOptions.knowledgePools[poolName]
+        if ((dynamicConfig.bulkEmbeddings?.ingestMode || 'realtime') === 'bulk') {
+          bulkIngestEnabled = true
+          break
+        }
+      }
+      if (bulkIngestEnabled && !pluginOptions.bulkQueueName) {
+        throw new Error(
+          '[payloadcms-vectorize] bulkQueueName is required when any knowledge pool uses bulk ingest mode (bulkEmbeddings.ingestMode === \"bulk\").',
+        )
+      }
+
       // Exit early if disabled, but keep embeddings collections present for migrations
       if (pluginOptions.disabled) return config
 
@@ -194,10 +212,16 @@ export const createVectorizeIntegration = <TPoolNames extends KnowledgePoolName>
         knowledgePools: pluginOptions.knowledgePools,
       })
       tasks.push(vectorizeTask)
-      const bulkEmbedTask = createBulkEmbedAllTask({
+      const prepareBulkEmbedTask = createPrepareBulkEmbeddingTask({
+        knowledgePools: pluginOptions.knowledgePools,
+        bulkQueueName: pluginOptions.bulkQueueName,
+      })
+      tasks.push(prepareBulkEmbedTask)
+      const pollOrCompleteBulkEmbedTask = createPollOrCompleteBulkEmbeddingTask({
         knowledgePools: pluginOptions.knowledgePools,
+        bulkQueueName: pluginOptions.bulkQueueName,
       })
-      tasks.push(bulkEmbedTask)
+      tasks.push(pollOrCompleteBulkEmbedTask)
 
       config.jobs = {
         ...incomingJobs,
@@ -246,7 +270,9 @@ export const createVectorizeIntegration = <TPoolNames extends KnowledgePoolName>
                     knowledgePool: pool,
                   },
                   req: req,
-                  ...(pluginOptions.queueName ? { queue: pluginOptions.queueName } : {}),
+                  ...(pluginOptions.realtimeQueueName
+                    ? { queue: pluginOptions.realtimeQueueName }
+                    : {}),
                 })
               }
               return
@@ -304,13 +330,16 @@ export const createVectorizeIntegration = <TPoolNames extends KnowledgePoolName>
           ...inputEndpoints,
           {
             path,
-            method: 'post',
+            method: 'post' as const,
             handler: createVectorSearchHandler(pluginOptions.knowledgePools),
           },
           {
             path: '/vector-bulk-embed',
-            method: 'post',
-            handler: createBulkEmbedHandler(pluginOptions.knowledgePools, pluginOptions.queueName),
+            method: 'post' as const,
+            handler: createBulkEmbedHandler(
+              pluginOptions.knowledgePools,
+              pluginOptions.bulkQueueName,
+            ),
           },
         ]
         config.endpoints = endpoints
diff --git a/src/tasks/bulkEmbedAll.ts b/src/tasks/bulkEmbedAll.ts
index 0bc4cca..951858d 100644
--- a/src/tasks/bulkEmbedAll.ts
+++ b/src/tasks/bulkEmbedAll.ts
@@ -8,92 +8,92 @@ import {
 import { BULK_EMBEDDINGS_RUNS_SLUG } from '../collections/bulkEmbeddingsRuns.js'
 import { isPostgresPayload, PostgresPayload } from '../types.js'
 
-type BulkEmbedAllTaskInput = {
+type PrepareBulkEmbeddingTaskInput = {
   runId: string
 }
 
-type BulkEmbedAllTaskOutput = {
+type PrepareBulkEmbeddingTaskOutput = {
   runId: string
   status: string
 }
 
+type PrepareBulkEmbeddingTaskInputOutput = {
+  input: PrepareBulkEmbeddingTaskInput
+  output: PrepareBulkEmbeddingTaskOutput
+}
+
+type PollOrCompleteBulkEmbeddingTaskInput = {
+  runId: string
+}
+
+type PollOrCompleteBulkEmbeddingTaskOutput = {
+  runId: string
+  status: string
+}
+
+type PollOrCompleteBulkEmbeddingTaskInputOutput = {
+  input: PollOrCompleteBulkEmbeddingTaskInput
+  output: PollOrCompleteBulkEmbeddingTaskOutput
+}
+
 const TERMINAL_STATUSES = new Set(['succeeded', 'failed', 'canceled'])
-const fallbackInputsCache = new Map<string, BulkEmbeddingInput[]>()
-
-export function createFallbackBulkCallbacks(
-  dynamicConfig: KnowledgePoolDynamicConfig,
-): BulkEmbeddingsConfig {
-  return {
-    prepareBulkEmbeddings: async ({ inputs }) => {
-      const providerBatchId = `local-${Date.now()}-${Math.random().toString(16).slice(2)}`
-      fallbackInputsCache.set(providerBatchId, inputs)
-      return {
-        providerBatchId,
-        status: 'queued',
-        counts: { inputs: inputs.length },
-      }
-    },
-    pollBulkEmbeddings: async ({ providerBatchId }) => {
-      if (!fallbackInputsCache.has(providerBatchId)) {
-        return { status: 'failed', error: 'Unknown local batch' }
-      }
-      return {
-        status: 'succeeded',
-        counts: { inputs: fallbackInputsCache.get(providerBatchId)?.length },
-      }
-    },
-    completeBulkEmbeddings: async ({ providerBatchId }) => {
-      const inputs = fallbackInputsCache.get(providerBatchId) || []
-      const embeddings = await dynamicConfig.embedDocs(inputs.map((i) => i.text))
-      const outputs = embeddings.map((vector, idx) => {
-        const input = inputs[idx]
-        return {
-          id: input?.id ?? String(idx),
-          embedding: Array.isArray(vector) ? vector : Array.from(vector),
-        }
-      })
-      fallbackInputsCache.delete(providerBatchId)
-      return {
-        status: 'succeeded',
-        outputs,
-        counts: {
-          inputs: inputs.length,
-          succeeded: outputs.length,
-          failed: inputs.length - outputs.length,
-        },
-      }
-    },
+
+// Helper to load and validate run + config
+async function loadRunAndConfig({
+  payload,
+  runId,
+  knowledgePools,
+}: {
+  payload: Payload
+  runId: string
+  knowledgePools: Record<KnowledgePoolName, KnowledgePoolDynamicConfig>
+}) {
+  const run = await payload.findByID({
+    collection: BULK_EMBEDDINGS_RUNS_SLUG,
+    id: runId,
+  })
+  const poolName = (run as any)?.pool as KnowledgePoolName
+  if (!poolName) {
+    throw new Error(`[payloadcms-vectorize] bulk embed run ${runId} missing pool`)
+  }
+  const dynamicConfig = knowledgePools[poolName]
+  if (!dynamicConfig) {
+    throw new Error(
+      `[payloadcms-vectorize] knowledgePool "${poolName}" not found for bulk embed run ${runId}`,
+    )
+  }
+  if (!dynamicConfig.bulkEmbeddings) {
+    throw new Error(
+      `[payloadcms-vectorize] knowledgePool "${poolName}" does not have bulkEmbeddings configured`,
+    )
   }
+  return { run, poolName, dynamicConfig }
 }
 
-export const createBulkEmbedAllTask = ({
+export const createPrepareBulkEmbeddingTask = ({
   knowledgePools,
+  bulkQueueName,
 }: {
   knowledgePools: Record<KnowledgePoolName, KnowledgePoolDynamicConfig>
-}): TaskConfig<BulkEmbedAllTaskInput> => {
-  const task: TaskConfig<BulkEmbedAllTaskInput> = {
-    slug: 'payloadcms-vectorize:bulk-embed-all',
-    handler: async ({ input, req }): Promise<TaskHandlerResult<BulkEmbedAllTaskOutput>> => {
+  bulkQueueName?: string
+}): TaskConfig<PrepareBulkEmbeddingTaskInputOutput> => {
+  const task: TaskConfig<PrepareBulkEmbeddingTaskInputOutput> = {
+    slug: 'payloadcms-vectorize:prepare-bulk-embedding',
+    handler: async ({
+      input,
+      req,
+    }): Promise<TaskHandlerResult<PrepareBulkEmbeddingTaskInputOutput>> => {
       if (!input?.runId) {
         throw new Error('[payloadcms-vectorize] bulk embed runId is required')
       }
       const payload = req.payload
-      const run = await payload.findByID({
-        collection: BULK_EMBEDDINGS_RUNS_SLUG,
-        id: input.runId,
+      const { run, poolName, dynamicConfig } = await loadRunAndConfig({
+        payload,
+        runId: input.runId,
+        knowledgePools,
       })
-      const poolName = (run as any)?.pool as KnowledgePoolName
-      if (!poolName) {
-        throw new Error(`[payloadcms-vectorize] bulk embed run ${input.runId} missing pool`)
-      }
-      const dynamicConfig = knowledgePools[poolName]
-      if (!dynamicConfig) {
-        throw new Error(
-          `[payloadcms-vectorize] knowledgePool "${poolName}" not found for bulk embed run ${input.runId}`,
-        )
-      }
 
-      const callbacks = dynamicConfig.bulkEmbeddings || createFallbackBulkCallbacks(dynamicConfig)
+      const callbacks = dynamicConfig.bulkEmbeddings!
       const embeddingVersion = dynamicConfig.embeddingVersion
 
       const inputs = await collectMissingEmbeddings({
@@ -140,57 +140,116 @@ export const createBulkEmbedAllTask = ({
         },
       })
 
-      // Poll until terminal
-      let pollResult: any = null
-      const maxPolls = 10
-      let polls = 0
-      while (!TERMINAL_STATUSES.has(status) && polls < maxPolls) {
-        pollResult = await callbacks.pollBulkEmbeddings({
-          payload,
-          knowledgePool: poolName,
-          providerBatchId,
-        })
-        status = pollResult.status
-        await payload.update({
-          id: input.runId,
-          collection: BULK_EMBEDDINGS_RUNS_SLUG,
-          data: {
-            status,
-            inputs: pollResult.counts?.inputs ?? inputsCount,
-            succeeded: pollResult.counts?.succeeded,
-            failed: pollResult.counts?.failed,
-            error: pollResult.error,
-          },
+      // Queue the poll task
+      await payload.jobs.queue<'payloadcms-vectorize:poll-or-complete-bulk-embedding'>({
+        task: 'payloadcms-vectorize:poll-or-complete-bulk-embedding',
+        input: { runId: input.runId },
+        req,
+        ...(bulkQueueName ? { queue: bulkQueueName } : {}),
+      })
+
+      return { output: { runId: input.runId, status: 'prepared' } }
+    },
+  }
+
+  return task
+}
+
+export const createPollOrCompleteBulkEmbeddingTask = ({
+  knowledgePools,
+  bulkQueueName,
+}: {
+  knowledgePools: Record<KnowledgePoolName, KnowledgePoolDynamicConfig>
+  bulkQueueName?: string
+}): TaskConfig<PollOrCompleteBulkEmbeddingTaskInputOutput> => {
+  const task: TaskConfig<PollOrCompleteBulkEmbeddingTaskInputOutput> = {
+    slug: 'payloadcms-vectorize:poll-or-complete-bulk-embedding',
+    handler: async ({
+      input,
+      req,
+    }): Promise<TaskHandlerResult<PollOrCompleteBulkEmbeddingTaskInputOutput>> => {
+      if (!input?.runId) {
+        throw new Error('[payloadcms-vectorize] bulk embed runId is required')
+      }
+      const payload = req.payload
+      const { run, poolName, dynamicConfig } = await loadRunAndConfig({
+        payload,
+        runId: input.runId,
+        knowledgePools,
+      })
+
+      const callbacks = dynamicConfig.bulkEmbeddings!
+      const providerBatchId = (run as any).providerBatchId
+      const embeddingVersion = dynamicConfig.embeddingVersion
+
+      // Check if already terminal
+      const currentStatus = (run as any).status
+      if (TERMINAL_STATUSES.has(currentStatus)) {
+        return { output: { runId: input.runId, status: currentStatus } }
+      }
+
+      // Poll once
+      const pollResult = await callbacks.pollBulkEmbeddings({
+        payload,
+        knowledgePool: poolName,
+        providerBatchId,
+      })
+
+      const newStatus = pollResult.status
+      await payload.update({
+        id: input.runId,
+        collection: BULK_EMBEDDINGS_RUNS_SLUG,
+        data: {
+          status: newStatus,
+          inputs: pollResult.counts?.inputs,
+          succeeded: pollResult.counts?.succeeded,
+          failed: pollResult.counts?.failed,
+          error: pollResult.error,
+        },
+      })
+
+      // If still not terminal, requeue this task
+      if (!TERMINAL_STATUSES.has(newStatus)) {
+        await payload.jobs.queue<'payloadcms-vectorize:poll-or-complete-bulk-embedding'>({
+          task: 'payloadcms-vectorize:poll-or-complete-bulk-embedding',
+          input: { runId: input.runId },
+          req,
+          ...(bulkQueueName ? { queue: bulkQueueName } : {}),
         })
-        if (TERMINAL_STATUSES.has(status)) break
-        polls += 1
-        const delay = pollResult.nextPollMs ?? 1000
-        await new Promise((resolve) => setTimeout(resolve, delay))
+        return { output: { runId: input.runId, status: 'polling' } }
       }
 
-      if (status !== 'succeeded') {
+      // Terminal - handle success vs failure
+      if (newStatus !== 'succeeded') {
         await payload.update({
           id: input.runId,
           collection: BULK_EMBEDDINGS_RUNS_SLUG,
           data: {
-            status,
-            error: pollResult?.error,
             completedAt: new Date().toISOString(),
           },
         })
-        return { output: { runId: input.runId, status } }
+        return { output: { runId: input.runId, status: newStatus } }
       }
 
+      // Success - complete the embeddings
       const completion = (await callbacks.completeBulkEmbeddings({
         payload,
         knowledgePool: poolName,
         providerBatchId,
-      })) || { status, outputs: [] }
+      })) || { status: newStatus, outputs: [] }
 
       const outputs = completion.outputs || []
+
+      // Re-collect inputs to match outputs (in case they changed during polling)
+      const inputs = await collectMissingEmbeddings({
+        payload,
+        poolName,
+        dynamicConfig,
+        embeddingVersion,
+      })
       const inputsById = new Map(inputs.map((input) => [input.id, input]))
       const successfulOutputs = outputs.filter((o) => !o.error && o.embedding)
-      const failedCount = completion.counts?.failed ?? inputsCount - successfulOutputs.length
+      const failedCount = completion.counts?.failed ?? outputs.length - successfulOutputs.length
 
       // Remove existing embeddings for successful doc ids before writing new vectors
       const docKeys = new Set<string>()
@@ -254,7 +313,7 @@ export const createBulkEmbedAllTask = ({
         collection: BULK_EMBEDDINGS_RUNS_SLUG,
         data: {
           status: completion.status ?? 'succeeded',
-          inputs: completion.counts?.inputs ?? inputsCount,
+          inputs: completion.counts?.inputs ?? outputs.length,
           succeeded: completion.counts?.succeeded ?? successfulOutputs.length,
           failed: failedCount,
           error: completion.error,
diff --git a/src/types.ts b/src/types.ts
index d2147ac..772d40e 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -128,11 +128,12 @@ export type BulkEmbeddingsConfig = {
 export type PayloadcmsVectorizeConfig<TPoolNames extends KnowledgePoolName = KnowledgePoolName> = {
   /** Knowledge pools and their dynamic configurations */
   knowledgePools: Record<TPoolNames, KnowledgePoolDynamicConfig>
-  /** Task queue name.
-   * Default is payloadcms default queue (undefined)
-   * You must setup the job in your payload config
-   * (with either an undefined or defined queue name). */
-  queueName?: string
+  /** Queue name for realtime vectorization jobs.
+   * Default is Payload's default queue (undefined). */
+  realtimeQueueName?: string
+  /** Queue name for bulk embedding jobs.
+   * Required at runtime if any knowledge pool uses any bulk ingestion (`bulkEmbeddings !== undefined`). */
+  bulkQueueName?: string
   /** Endpoint overrides for searching vectorized content */
   endpointOverrides?: {
     // Default is '/vector-search' (which gets turned into '/api/vector-search')

From a773cc133c7eeee164176bbe1302535a6b1b57f2 Mon Sep 17 00:00:00 2001
From: techiejd <62455039+techiejd@users.noreply.github.com>
Date: Thu, 18 Dec 2025 16:11:53 +0700
Subject: [PATCH 04/49] WIP

---
 CHANGELOG.md                |   3 +-
 README.md                   |  30 +++---
 dev/payload.config.ts       |  11 +-
 dev/specs/bulkEmbed.spec.ts | 197 +++++++++++++++++++++++++++---------
 src/index.ts                |  34 ++++---
 src/tasks/bulkEmbedAll.ts   |  45 ++++----
 src/types.ts                |   5 +-
 7 files changed, 224 insertions(+), 101 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 081b342..e36fa33 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,10 +7,11 @@ All notable changes to this project will be documented in this file.
 ### Breaking Changes
 
 - **`queueName` renamed to `realtimeQueueName`**: The plugin option `queueName` has been renamed to `realtimeQueueName` to clarify that it only affects realtime vectorization jobs.
+- **`bulkQueueName` changed to `bulkQueueNames`**: The plugin option `bulkQueueName` has been replaced with `bulkQueueNames` object containing `prepareBulkEmbedQueueName` and `pollOrCompleteQueueName` for separate queue isolation of bulk preparation vs polling workloads.
 
 ### New Features
 
-- **`bulkQueueName` option**: New plugin option to isolate bulk embedding workloads to a dedicated queue. Required when any knowledge pool uses bulk ingest mode (`bulkEmbeddings.ingestMode === 'bulk'`).
+- **`bulkQueueNames` option**: New plugin option to isolate bulk embedding workloads across separate queues for preparation and polling. Required when any knowledge pool uses bulk ingest mode (`bulkEmbeddings.ingestMode === 'bulk'`).
 - **Non-blocking bulk polling**: Bulk jobs now use separate, short-lived tasks that can safely handle long-running providers (hours/days) without blocking worker processes.
 - **Improved admin UX**: The "Embed all" button now:
   - Disables when bulk embeddings are not configured for a pool
diff --git a/README.md b/README.md
index 41c446d..0cd2141 100644
--- a/README.md
+++ b/README.md
@@ -165,13 +165,13 @@ const { results } = await response.json()
 
 ### Plugin Options
 
-| Option              | Type                                                | Required | Description                                                                |
-| ------------------- | --------------------------------------------------- | -------- | -------------------------------------------------------------------------- |
-| `knowledgePools`    | `Record<KnowledgePool, KnowledgePoolDynamicConfig>` | ✅       | Knowledge pools and their configurations                                   |
-| `realtimeQueueName` | `string`                                            | ❌       | Custom queue name for realtime vectorization jobs                          |
-| `bulkQueueName`     | `string`                                            | ❌       | Queue name for bulk embedding jobs (required if any pool uses bulk ingest) |
-| `endpointOverrides` | `object`                                            | ❌       | Customize the search endpoint                                              |
-| `disabled`          | `boolean`                                           | ❌       | Disable plugin while keeping schema                                        |
+| Option              | Type                                                                   | Required | Description                                                                 |
+| ------------------- | ---------------------------------------------------------------------- | -------- | --------------------------------------------------------------------------- |
+| `knowledgePools`    | `Record<KnowledgePool, KnowledgePoolDynamicConfig>`                    | ✅       | Knowledge pools and their configurations                                    |
+| `realtimeQueueName` | `string`                                                               | ❌       | Custom queue name for realtime vectorization jobs                           |
+| `bulkQueueNames`    | `{prepareBulkEmbedQueueName: string, pollOrCompleteQueueName: string}` | ❌       | Queue names for bulk embedding jobs (required if any pool uses bulk ingest) |
+| `endpointOverrides` | `object`                                                               | ❌       | Customize the search endpoint                                               |
+| `disabled`          | `boolean`                                                              | ❌       | Disable plugin while keeping schema                                         |
 
 ### Knowledge Pool Config
 
@@ -215,16 +215,20 @@ plugins: [
   payloadcmsVectorize({
     knowledgePools: { /* ... */ },
     realtimeQueueName: 'vectorize-realtime', // Separate realtime jobs (Optional)
-    bulkQueueName: 'vectorize-bulk',        // Isolate bulk workloads (Required if any knowledge pool uses bulk ingestion of any kind)
+    bulkQueueNames: {
+      prepareBulkEmbedQueueName: 'vectorize-bulk-prepare', // Daily bulk preparation (Required if any knowledge pool uses bulk ingestion)
+      pollOrCompleteQueueName: 'vectorize-bulk-poll',       // Frequent polling/completion (Required if any knowledge pool uses bulk ingestion)
+    },
   }),
 ]
 
-// Configure Payload queues
 jobs: {
-  queues: {
-    'vectorize-realtime': { concurrency: 5 },
-    'vectorize-bulk': { concurrency: 2 },
-  },
+  // Payload processes jobs via autoRun. Use different schedules for different workloads.
+  autoRun: [
+    { cron: '*/5 * * * * *', limit: 10, queue: 'vectorize-realtime' }, // Optional: Process realtime jobs every 5 seconds
+    { cron: '0 0 * * * *', limit: 1, queue: 'vectorize-bulk-prepare' }, // Required: Run bulk preparation once per hour (or daily)
+    { cron: '*/30 * * * * *', limit: 5, queue: 'vectorize-bulk-poll' }, // Required: Poll bulk status every 30 seconds
+  ],
 }
 ```
 
diff --git a/dev/payload.config.ts b/dev/payload.config.ts
index 056681c..d8a2090 100644
--- a/dev/payload.config.ts
+++ b/dev/payload.config.ts
@@ -79,11 +79,6 @@ const buildConfigWithPostgres = async () => {
     email: testEmailAdapter,
     jobs: {
       tasks: [],
-      queues: {
-        'vectorize-bulk': {
-          concurrency: 2,
-        },
-      },
       autoRun: [
         {
           cron: '*/5 * * * * *', // Run every 5 seconds in development
@@ -136,8 +131,10 @@ const buildConfigWithPostgres = async () => {
             bulkEmbeddings: makeVoyageBulkEmbeddingsConfig(),
           },
         },
-        realtimeQueueName: 'vectorize-realtime',
-        bulkQueueName: 'vectorize-bulk',
+        bulkQueueNames: {
+          prepareBulkEmbedQueueName: 'vectorize-bulk-prepare',
+          pollOrCompleteQueueName: 'vectorize-bulk-poll',
+        },
       }),
     ],
     secret: process.env.PAYLOAD_SECRET || 'test-secret_key',
diff --git a/dev/specs/bulkEmbed.spec.ts b/dev/specs/bulkEmbed.spec.ts
index 868dd7b..b34b455 100644
--- a/dev/specs/bulkEmbed.spec.ts
+++ b/dev/specs/bulkEmbed.spec.ts
@@ -1,7 +1,8 @@
 import type { Payload, SanitizedConfig } from 'payload'
 
 import { buildConfig, getPayload } from 'payload'
-import { beforeAll, describe, expect, test } from 'vitest'
+import { beforeAll, beforeEach, describe, expect, test, vi } from 'vitest'
+import { createVectorizeTask } from '../../src/tasks/vectorize.js'
 import { postgresAdapter } from '@payloadcms/db-postgres'
 import { lexicalEditor } from '@payloadcms/richtext-lexical'
 import { createVectorizeIntegration } from 'payloadcms-vectorize'
@@ -19,11 +20,13 @@ const DIMS = 8
 // Mock bulk embeddings configs for testing
 function createMockBulkEmbeddings(statusSequence: BulkEmbeddingRunStatus[]): BulkEmbeddingsConfig {
   let callCount = 0
+  let lastInputs: BulkEmbeddingInput[] = []
   const embeddings = makeDummyEmbedDocs(DIMS)
 
   return {
     ingestMode: 'bulk',
     prepareBulkEmbeddings: async ({ inputs }) => {
+      lastInputs = inputs
       return {
         providerBatchId: `mock-${Date.now()}`,
         status: 'queued',
@@ -34,16 +37,25 @@ function createMockBulkEmbeddings(statusSequence: BulkEmbeddingRunStatus[]): Bul
       const status = statusSequence[Math.min(callCount++, statusSequence.length - 1)]
       return {
         status,
-        counts: status === 'succeeded' ? { inputs: 1, succeeded: 1, failed: 0 } : undefined,
+        counts:
+          status === 'succeeded'
+            ? { inputs: lastInputs.length, succeeded: lastInputs.length, failed: 0 }
+            : undefined,
       }
     },
-    completeBulkEmbeddings: async ({ providerBatchId }) => {
-      const inputs = [{ id: 'test-1', text: 'test text', metadata: {} }]
-      const vectors = await embeddings([inputs[0].text])
+    completeBulkEmbeddings: async () => {
+      if (!lastInputs.length) {
+        return { status: 'succeeded', outputs: [], counts: { inputs: 0, succeeded: 0, failed: 0 } }
+      }
+      const vectors = await embeddings(lastInputs.map((i) => i.text))
+      const outputs = lastInputs.map((input, idx) => ({
+        id: input.id,
+        embedding: vectors[idx],
+      }))
       return {
         status: 'succeeded',
-        outputs: [{ id: inputs[0].id, embedding: vectors[0] }],
-        counts: { inputs: 1, succeeded: 1, failed: 0 },
+        outputs,
+        counts: { inputs: outputs.length, succeeded: outputs.length, failed: 0 },
       }
     },
   }
@@ -75,7 +87,10 @@ describe('Bulk embed ingest mode', () => {
         bulkEmbeddings: createMockBulkEmbeddings(['succeeded']),
       },
     },
-    bulkQueueName: 'vectorize-bulk',
+    bulkQueueNames: {
+      prepareBulkEmbedQueueName: 'vectorize-bulk-prepare',
+      pollOrCompleteQueueName: 'vectorize-bulk-poll',
+    },
   }
 
   beforeAll(async () => {
@@ -103,7 +118,22 @@ describe('Bulk embed ingest mode', () => {
     payload = await getPayload({ config })
   })
 
+  beforeEach(async () => {
+    // Clean runs and embeddings between tests to avoid cross-test leakage
+    await payload.delete({
+      collection: BULK_EMBEDDINGS_RUNS_SLUG,
+      where: { id: { exists: true } },
+    })
+    await payload.delete({
+      collection: 'default',
+      where: { id: { exists: true } },
+    })
+    vi.restoreAllMocks()
+  })
+
   test('bulk ingest mode queues no realtime embeddings and bulk job backfills missing docs', async () => {
+    const queueSpy = vi.spyOn(payload.jobs, 'queue')
+
     const post = await payload.create({
       collection: 'posts',
       data: { title: 'Bulk Mode Title' } as any,
@@ -117,29 +147,56 @@ describe('Bulk embed ingest mode', () => {
     })
     expect(initialEmbeds.totalDocs).toBe(0)
 
-    const run = await payload.create({
+    // Bulk mode should queue prepare task automatically
+    expect(queueSpy).toHaveBeenCalledWith(
+      expect.objectContaining({
+        task: 'payloadcms-vectorize:prepare-bulk-embedding',
+      }),
+    )
+
+    // Get the latest queued run created by the hook
+    const { docs: latestRuns } = await payload.find({
       collection: BULK_EMBEDDINGS_RUNS_SLUG,
+      where: { pool: { equals: 'default' } },
+      sort: '-createdAt',
+      limit: 1,
+    })
+    const run = latestRuns[0]
+    expect(run).toBeDefined()
+
+    // Seed a stale embedding that should be deleted during poll/complete
+    await payload.create({
+      collection: 'default',
       data: {
-        pool: 'default',
-        embeddingVersion: testEmbeddingVersion,
-        status: 'queued',
+        sourceCollection: 'posts',
+        docId: String(post.id),
+        chunkIndex: 0,
+        chunkText: 'stale chunk',
+        embeddingVersion: 'old-version',
       },
     })
 
     // Run prepare task
     const prepareTask = createPrepareBulkEmbeddingTask({
       knowledgePools: pluginOptions.knowledgePools,
-      bulkQueueName: pluginOptions.bulkQueueName,
+      pollOrCompleteQueueName: pluginOptions.bulkQueueNames?.pollOrCompleteQueueName,
     })
     await prepareTask.handler({
       input: { runId: String(run.id) },
       req: { payload } as any,
     })
 
+    // Prepare should have queued the poll task on the poll queue
+    expect(queueSpy).toHaveBeenCalledWith(
+      expect.objectContaining({
+        task: 'payloadcms-vectorize:poll-or-complete-bulk-embedding',
+      }),
+    )
+
     // Run poll/complete task
     const pollTask = createPollOrCompleteBulkEmbeddingTask({
       knowledgePools: pluginOptions.knowledgePools,
-      bulkQueueName: pluginOptions.bulkQueueName,
+      pollOrCompleteQueueName: pluginOptions.bulkQueueNames?.pollOrCompleteQueueName,
     })
     await pollTask.handler({
       input: { runId: String(run.id) },
@@ -152,7 +209,7 @@ describe('Bulk embed ingest mode', () => {
         and: [{ sourceCollection: { equals: 'posts' } }, { docId: { equals: String(post.id) } }],
       },
     })
-    expect(embeds.totalDocs).toBeGreaterThan(0)
+    expect(embeds.totalDocs).toBe(1) // stale embedding should have been deleted and replaced
     expect(embeds.docs[0]?.chunkText).toContain('Bulk Mode Title')
 
     const runDoc = await payload.findByID({
@@ -164,34 +221,37 @@ describe('Bulk embed ingest mode', () => {
   })
 
   test('bulk ingest mode clears stale embeddings on document updates and rerun populates new chunks', async () => {
+    const queueSpy = vi.spyOn(payload.jobs, 'queue')
+
     const post = await payload.create({
       collection: 'posts',
       data: { title: 'Original' } as any,
     })
 
-    // First run to embed
-    const firstRun = await payload.create({
-      collection: BULK_EMBEDDINGS_RUNS_SLUG,
-      data: {
-        pool: 'default',
-        embeddingVersion: testEmbeddingVersion,
-        status: 'queued',
-      },
-    })
+    // First run to embed (auto-queued on create)
+    const firstRun = (
+      await payload.find({
+        collection: BULK_EMBEDDINGS_RUNS_SLUG,
+        where: { pool: { equals: 'default' } },
+        sort: '-createdAt',
+        limit: 1,
+      })
+    ).docs[0]
+    expect(firstRun).toBeDefined()
 
     const prepareTask = createPrepareBulkEmbeddingTask({
       knowledgePools: pluginOptions.knowledgePools,
-      bulkQueueName: pluginOptions.bulkQueueName,
+      pollOrCompleteQueueName: pluginOptions.bulkQueueNames?.pollOrCompleteQueueName,
     })
     const pollTask = createPollOrCompleteBulkEmbeddingTask({
       knowledgePools: pluginOptions.knowledgePools,
-      bulkQueueName: pluginOptions.bulkQueueName,
+      pollOrCompleteQueueName: pluginOptions.bulkQueueNames?.pollOrCompleteQueueName,
     })
 
     await prepareTask.handler({ input: { runId: String(firstRun.id) }, req: { payload } as any })
     await pollTask.handler({ input: { runId: String(firstRun.id) }, req: { payload } as any })
 
-    // Update document - should delete embeddings in bulk mode
+    // Update document - embeddings should remain until poll/completion of the next run
     await payload.update({
       collection: 'posts',
       id: post.id,
@@ -204,17 +264,22 @@ describe('Bulk embed ingest mode', () => {
         and: [{ sourceCollection: { equals: 'posts' } }, { docId: { equals: String(post.id) } }],
       },
     })
-    expect(afterUpdateEmbeds.totalDocs).toBe(0)
+    expect(afterUpdateEmbeds.totalDocs).toBeGreaterThan(0) // no upfront delete; still present until bulk completion
+
+    // Next run should have been queued by the update hook
+    expect(queueSpy).toHaveBeenCalledWith(
+      expect.objectContaining({ task: 'payloadcms-vectorize:prepare-bulk-embedding' }),
+    )
+    const secondRun = (
+      await payload.find({
+        collection: BULK_EMBEDDINGS_RUNS_SLUG,
+        where: { pool: { equals: 'default' } },
+        sort: '-createdAt',
+        limit: 1,
+      })
+    ).docs[0]
+    expect(secondRun).toBeDefined()
 
-    // Run again to backfill
-    const secondRun = await payload.create({
-      collection: BULK_EMBEDDINGS_RUNS_SLUG,
-      data: {
-        pool: 'default',
-        embeddingVersion: testEmbeddingVersion,
-        status: 'queued',
-      },
-    })
     await prepareTask.handler({ input: { runId: String(secondRun.id) }, req: { payload } as any })
     await pollTask.handler({ input: { runId: String(secondRun.id) }, req: { payload } as any })
 
@@ -240,9 +305,26 @@ describe('Bulk embed ingest mode', () => {
           embedDocs: makeDummyEmbedDocs(DIMS),
           embedQuery: makeDummyEmbedQuery(DIMS),
           embeddingVersion: testEmbeddingVersion,
-          // No bulkEmbeddings - should default to realtime
+          bulkEmbeddings: {
+            ingestMode: 'realtime',
+            prepareBulkEmbeddings: async () => ({
+              providerBatchId: 'noop',
+              status: 'succeeded',
+              counts: { inputs: 0, succeeded: 0, failed: 0 },
+            }),
+            pollBulkEmbeddings: async () => ({ status: 'succeeded' }),
+            completeBulkEmbeddings: async () => ({
+              status: 'succeeded',
+              outputs: [],
+              counts: { inputs: 0 },
+            }),
+          },
         },
       },
+      bulkQueueNames: {
+        prepareBulkEmbedQueueName: 'vectorize-bulk-prepare',
+        pollOrCompleteQueueName: 'vectorize-bulk-poll',
+      },
     }
 
     const realtimeConfig = await buildConfig({
@@ -273,7 +355,22 @@ describe('Bulk embed ingest mode', () => {
       data: { title: 'Realtime Test' } as any,
     })
 
-    // Check that embeddings were created immediately
+    // Manually run vectorize task since jobs queue is not processed in tests
+    const vectorizeTask = createVectorizeTask({
+      knowledgePools: realtimePluginOptions.knowledgePools,
+    })
+    await vectorizeTask.handler({
+      input: {
+        doc: post,
+        collection: 'posts',
+        knowledgePool: 'default',
+      } as any,
+      req: { payload: realtimePayload } as any,
+      inlineTask: vi.fn(),
+      tasks: {} as any,
+      job: {} as any,
+    })
+
     const embeds = await realtimePayload.find({
       collection: 'default',
       where: {
@@ -299,7 +396,10 @@ describe('Bulk embed ingest mode', () => {
           bulkEmbeddings: createMockBulkEmbeddings(['failed']),
         },
       },
-      bulkQueueName: 'vectorize-bulk',
+      bulkQueueNames: {
+        prepareBulkEmbedQueueName: 'vectorize-bulk-prepare',
+        pollOrCompleteQueueName: 'vectorize-bulk-poll',
+      },
     }
 
     const failedConfig = await buildConfig({
@@ -340,11 +440,11 @@ describe('Bulk embed ingest mode', () => {
 
     const prepareTask = createPrepareBulkEmbeddingTask({
       knowledgePools: failedBulkOptions.knowledgePools,
-      bulkQueueName: failedBulkOptions.bulkQueueName,
+      pollOrCompleteQueueName: failedBulkOptions.bulkQueueNames?.pollOrCompleteQueueName,
     })
     const pollTask = createPollOrCompleteBulkEmbeddingTask({
       knowledgePools: failedBulkOptions.knowledgePools,
-      bulkQueueName: failedBulkOptions.bulkQueueName,
+      pollOrCompleteQueueName: failedBulkOptions.bulkQueueNames?.pollOrCompleteQueueName,
     })
 
     await prepareTask.handler({
@@ -386,7 +486,10 @@ describe('Bulk embed ingest mode', () => {
           bulkEmbeddings: createMockBulkEmbeddings(['canceled']),
         },
       },
-      bulkQueueName: 'vectorize-bulk',
+      bulkQueueNames: {
+        prepareBulkEmbedQueueName: 'vectorize-bulk-prepare',
+        pollOrCompleteQueueName: 'vectorize-bulk-poll',
+      },
     }
 
     const canceledConfig = await buildConfig({
@@ -422,11 +525,11 @@ describe('Bulk embed ingest mode', () => {
 
     const prepareTask = createPrepareBulkEmbeddingTask({
       knowledgePools: canceledBulkOptions.knowledgePools,
-      bulkQueueName: canceledBulkOptions.bulkQueueName,
+      pollOrCompleteQueueName: canceledBulkOptions.bulkQueueNames?.pollOrCompleteQueueName,
     })
     const pollTask = createPollOrCompleteBulkEmbeddingTask({
       knowledgePools: canceledBulkOptions.knowledgePools,
-      bulkQueueName: canceledBulkOptions.bulkQueueName,
+      pollOrCompleteQueueName: canceledBulkOptions.bulkQueueNames?.pollOrCompleteQueueName,
     })
 
     await prepareTask.handler({
@@ -476,11 +579,11 @@ describe('Bulk embed ingest mode', () => {
     // Run bulk tasks
     const prepareTask = createPrepareBulkEmbeddingTask({
       knowledgePools: pluginOptions.knowledgePools,
-      bulkQueueName: pluginOptions.bulkQueueName,
+      pollOrCompleteQueueName: pluginOptions.bulkQueueNames?.pollOrCompleteQueueName,
     })
     const pollTask = createPollOrCompleteBulkEmbeddingTask({
       knowledgePools: pluginOptions.knowledgePools,
-      bulkQueueName: pluginOptions.bulkQueueName,
+      pollOrCompleteQueueName: pluginOptions.bulkQueueNames?.pollOrCompleteQueueName,
     })
 
     await prepareTask.handler({ input: { runId: String(run.id) }, req: { payload } as any })
diff --git a/src/index.ts b/src/index.ts
index f7509d0..49d503d 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -195,9 +195,9 @@ export const createVectorizeIntegration = <TPoolNames extends KnowledgePoolName>
           break
         }
       }
-      if (bulkIngestEnabled && !pluginOptions.bulkQueueName) {
+      if (bulkIngestEnabled && !pluginOptions.bulkQueueNames) {
         throw new Error(
-          '[payloadcms-vectorize] bulkQueueName is required when any knowledge pool uses bulk ingest mode (bulkEmbeddings.ingestMode === \"bulk\").',
+          '[payloadcms-vectorize] bulkQueueNames is required when any knowledge pool uses bulk ingest mode (bulkEmbeddings.ingestMode === \"bulk\").',
         )
       }
 
@@ -214,12 +214,12 @@ export const createVectorizeIntegration = <TPoolNames extends KnowledgePoolName>
       tasks.push(vectorizeTask)
       const prepareBulkEmbedTask = createPrepareBulkEmbeddingTask({
         knowledgePools: pluginOptions.knowledgePools,
-        bulkQueueName: pluginOptions.bulkQueueName,
+        pollOrCompleteQueueName: pluginOptions.bulkQueueNames?.pollOrCompleteQueueName,
       })
       tasks.push(prepareBulkEmbedTask)
       const pollOrCompleteBulkEmbedTask = createPollOrCompleteBulkEmbeddingTask({
         knowledgePools: pluginOptions.knowledgePools,
-        bulkQueueName: pluginOptions.bulkQueueName,
+        pollOrCompleteQueueName: pluginOptions.bulkQueueNames?.pollOrCompleteQueueName,
       })
       tasks.push(pollOrCompleteBulkEmbedTask)
 
@@ -249,16 +249,24 @@ export const createVectorizeIntegration = <TPoolNames extends KnowledgePoolName>
                 if (!collectionConfig) continue
 
                 if ((dynamic.bulkEmbeddings?.ingestMode || 'realtime') === 'bulk') {
-                  // In bulk mode, clear stale embeddings and let the bulk job recreate them
-                  await payload.delete({
-                    collection: pool,
-                    where: {
-                      and: [
-                        { sourceCollection: { equals: collectionSlug } },
-                        { docId: { equals: String(doc.id) } },
-                      ],
+                  // In bulk mode, queue a bulk run and let poll/completion handle deletes
+                  const run = await payload.create({
+                    collection: BULK_EMBEDDINGS_RUNS_SLUG,
+                    data: {
+                      pool,
+                      embeddingVersion: dynamic.embeddingVersion,
+                      status: 'queued',
                     },
                   })
+
+                  await payload.jobs.queue<'payloadcms-vectorize:prepare-bulk-embedding'>({
+                    task: 'payloadcms-vectorize:prepare-bulk-embedding',
+                    input: { runId: String(run.id) },
+                    req,
+                    ...(pluginOptions.bulkQueueNames?.prepareBulkEmbedQueueName
+                      ? { queue: pluginOptions.bulkQueueNames.prepareBulkEmbedQueueName }
+                      : {}),
+                  })
                   continue
                 }
 
@@ -338,7 +346,7 @@ export const createVectorizeIntegration = <TPoolNames extends KnowledgePoolName>
             method: 'post' as const,
             handler: createBulkEmbedHandler(
               pluginOptions.knowledgePools,
-              pluginOptions.bulkQueueName,
+              pluginOptions.bulkQueueNames?.prepareBulkEmbedQueueName,
             ),
           },
         ]
diff --git a/src/tasks/bulkEmbedAll.ts b/src/tasks/bulkEmbedAll.ts
index 951858d..4517ee0 100644
--- a/src/tasks/bulkEmbedAll.ts
+++ b/src/tasks/bulkEmbedAll.ts
@@ -72,10 +72,10 @@ async function loadRunAndConfig({
 
 export const createPrepareBulkEmbeddingTask = ({
   knowledgePools,
-  bulkQueueName,
+  pollOrCompleteQueueName,
 }: {
   knowledgePools: Record<KnowledgePoolName, KnowledgePoolDynamicConfig>
-  bulkQueueName?: string
+  pollOrCompleteQueueName?: string
 }): TaskConfig<PrepareBulkEmbeddingTaskInputOutput> => {
   const task: TaskConfig<PrepareBulkEmbeddingTaskInputOutput> = {
     slug: 'payloadcms-vectorize:prepare-bulk-embedding',
@@ -119,6 +119,26 @@ export const createPrepareBulkEmbeddingTask = ({
         return { output: { runId: input.runId, status: 'succeeded' } }
       }
 
+      // Clear existing embeddings for the docs in this batch before submitting
+      const docKeysToClear = new Set<string>()
+      for (const input of inputs) {
+        const meta = input.metadata
+        if (!meta) continue
+        docKeysToClear.add(`${meta.sourceCollection}:${meta.docId}`)
+      }
+      for (const key of docKeysToClear) {
+        const [sourceCollection, docId] = key.split(':')
+        await payload.delete({
+          collection: poolName,
+          where: {
+            and: [
+              { sourceCollection: { equals: sourceCollection } },
+              { docId: { equals: String(docId) } },
+            ],
+          },
+        })
+      }
+
       const prepare = (await callbacks.prepareBulkEmbeddings({
         payload,
         knowledgePool: poolName,
@@ -145,7 +165,7 @@ export const createPrepareBulkEmbeddingTask = ({
         task: 'payloadcms-vectorize:poll-or-complete-bulk-embedding',
         input: { runId: input.runId },
         req,
-        ...(bulkQueueName ? { queue: bulkQueueName } : {}),
+        ...(pollOrCompleteQueueName ? { queue: pollOrCompleteQueueName } : {}),
       })
 
       return { output: { runId: input.runId, status: 'prepared' } }
@@ -157,10 +177,10 @@ export const createPrepareBulkEmbeddingTask = ({
 
 export const createPollOrCompleteBulkEmbeddingTask = ({
   knowledgePools,
-  bulkQueueName,
+  pollOrCompleteQueueName,
 }: {
   knowledgePools: Record<KnowledgePoolName, KnowledgePoolDynamicConfig>
-  bulkQueueName?: string
+  pollOrCompleteQueueName?: string
 }): TaskConfig<PollOrCompleteBulkEmbeddingTaskInputOutput> => {
   const task: TaskConfig<PollOrCompleteBulkEmbeddingTaskInputOutput> = {
     slug: 'payloadcms-vectorize:poll-or-complete-bulk-embedding',
@@ -214,7 +234,7 @@ export const createPollOrCompleteBulkEmbeddingTask = ({
           task: 'payloadcms-vectorize:poll-or-complete-bulk-embedding',
           input: { runId: input.runId },
           req,
-          ...(bulkQueueName ? { queue: bulkQueueName } : {}),
+          ...(pollOrCompleteQueueName ? { queue: pollOrCompleteQueueName } : {}),
         })
         return { output: { runId: input.runId, status: 'polling' } }
       }
@@ -388,19 +408,6 @@ async function collectMissingEmbeddings(args: {
       const totalPages = (res as any)?.totalPages ?? page
 
       for (const doc of docs) {
-        const existing = await payload.find({
-          collection: poolName,
-          where: {
-            and: [
-              { sourceCollection: { equals: collectionSlug } },
-              { docId: { equals: String(doc.id) } },
-              { embeddingVersion: { equals: embeddingVersion } },
-            ],
-          },
-          limit: 1,
-        })
-        if (existing.totalDocs > 0) continue
-
         const chunkData = await toKnowledgePool(doc, payload)
         chunkData.forEach((chunkEntry, idx) => {
           if (!chunkEntry?.chunk) return
diff --git a/src/types.ts b/src/types.ts
index 772d40e..58c9b69 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -133,7 +133,10 @@ export type PayloadcmsVectorizeConfig<TPoolNames extends KnowledgePoolName = Kno
   realtimeQueueName?: string
   /** Queue name for bulk embedding jobs.
    * Required at runtime if any knowledge pool uses any bulk ingestion (`bulkEmbeddings !== undefined`). */
-  bulkQueueName?: string
+  bulkQueueNames?: {
+    prepareBulkEmbedQueueName: string
+    pollOrCompleteQueueName: string
+  }
   /** Endpoint overrides for searching vectorized content */
   endpointOverrides?: {
     // Default is '/vector-search' (which gets turned into '/api/vector-search')

From 87f78dee6bcad5d0d5810e940612039f184dcd68 Mon Sep 17 00:00:00 2001
From: techiejd <62455039+techiejd@users.noreply.github.com>
Date: Fri, 19 Dec 2025 22:56:36 +0700
Subject: [PATCH 05/49] WIP

---
 dev/specs/bulkEmbed.spec.ts | 782 +++++++++++++++++-------------------
 dev/specs/utils.ts          |  53 +--
 src/index.ts                |  34 ++
 src/tasks/bulkEmbedAll.ts   | 101 ++++-
 4 files changed, 513 insertions(+), 457 deletions(-)

diff --git a/dev/specs/bulkEmbed.spec.ts b/dev/specs/bulkEmbed.spec.ts
index b34b455..306d426 100644
--- a/dev/specs/bulkEmbed.spec.ts
+++ b/dev/specs/bulkEmbed.spec.ts
@@ -1,24 +1,33 @@
 import type { Payload, SanitizedConfig } from 'payload'
 
 import { buildConfig, getPayload } from 'payload'
-import { beforeAll, beforeEach, describe, expect, test, vi } from 'vitest'
+import { afterEach, beforeAll, beforeEach, describe, expect, test, vi } from 'vitest'
 import { createVectorizeTask } from '../../src/tasks/vectorize.js'
 import { postgresAdapter } from '@payloadcms/db-postgres'
 import { lexicalEditor } from '@payloadcms/richtext-lexical'
 import { createVectorizeIntegration } from 'payloadcms-vectorize'
 import { BULK_EMBEDDINGS_RUNS_SLUG } from '../../src/collections/bulkEmbeddingsRuns.js'
-import {
-  createPrepareBulkEmbeddingTask,
-  createPollOrCompleteBulkEmbeddingTask,
-} from '../../src/tasks/bulkEmbedAll.js'
-import { createTestDb } from './utils.js'
+import { createTestDb, waitForBulkJobs } from './utils.js'
 import { makeDummyEmbedDocs, makeDummyEmbedQuery, testEmbeddingVersion } from 'helpers/embed.js'
-import type { BulkEmbeddingsConfig, BulkEmbeddingRunStatus } from '../../src/types.js'
+import type {
+  BulkEmbeddingsConfig,
+  BulkEmbeddingInput,
+  BulkEmbeddingRunStatus,
+} from '../../src/types.js'
 
 const DIMS = 8
+const BULK_QUEUE_NAMES = {
+  prepareBulkEmbedQueueName: 'vectorize-bulk-prepare',
+  pollOrCompleteQueueName: 'vectorize-bulk-poll',
+}
+
+type MockOptions = {
+  statusSequence: BulkEmbeddingRunStatus[]
+  partialFailure?: { failIds: string[] }
+}
 
-// Mock bulk embeddings configs for testing
-function createMockBulkEmbeddings(statusSequence: BulkEmbeddingRunStatus[]): BulkEmbeddingsConfig {
+function createMockBulkEmbeddings(options: MockOptions): BulkEmbeddingsConfig {
+  const { statusSequence, partialFailure } = options
   let callCount = 0
   let lastInputs: BulkEmbeddingInput[] = []
   const embeddings = makeDummyEmbedDocs(DIMS)
@@ -35,12 +44,13 @@ function createMockBulkEmbeddings(statusSequence: BulkEmbeddingRunStatus[]): Bul
     },
     pollBulkEmbeddings: async () => {
       const status = statusSequence[Math.min(callCount++, statusSequence.length - 1)]
+      const counts =
+        status === 'succeeded'
+          ? { inputs: lastInputs.length, succeeded: lastInputs.length, failed: 0 }
+          : undefined
       return {
         status,
-        counts:
-          status === 'succeeded'
-            ? { inputs: lastInputs.length, succeeded: lastInputs.length, failed: 0 }
-            : undefined,
+        counts,
       }
     },
     completeBulkEmbeddings: async () => {
@@ -48,20 +58,24 @@ function createMockBulkEmbeddings(statusSequence: BulkEmbeddingRunStatus[]): Bul
         return { status: 'succeeded', outputs: [], counts: { inputs: 0, succeeded: 0, failed: 0 } }
       }
       const vectors = await embeddings(lastInputs.map((i) => i.text))
-      const outputs = lastInputs.map((input, idx) => ({
-        id: input.id,
-        embedding: vectors[idx],
-      }))
+      const outputs = lastInputs.map((input, idx) => {
+        const shouldFail = partialFailure?.failIds?.includes(input.id)
+        return shouldFail
+          ? { id: input.id, error: 'fail' }
+          : { id: input.id, embedding: vectors[idx] }
+      })
+      const succeeded = outputs.filter((o) => (o as any).embedding).length
+      const failed = outputs.length - succeeded
       return {
         status: 'succeeded',
         outputs,
-        counts: { inputs: outputs.length, succeeded: outputs.length, failed: 0 },
+        counts: { inputs: outputs.length, succeeded, failed },
       }
     },
   }
 }
 
-describe('Bulk embed ingest mode', () => {
+describe('Bulk embed ingest mode with version/time gating', () => {
   let payload: Payload
   let config: SanitizedConfig
   const dbName = 'bulk_embed_test'
@@ -73,7 +87,7 @@ describe('Bulk embed ingest mode', () => {
     },
   })
 
-  const pluginOptions = {
+  const basePluginOptions = {
     knowledgePools: {
       default: {
         collections: {
@@ -84,17 +98,17 @@ describe('Bulk embed ingest mode', () => {
         embedDocs: makeDummyEmbedDocs(DIMS),
         embedQuery: makeDummyEmbedQuery(DIMS),
         embeddingVersion: testEmbeddingVersion,
-        bulkEmbeddings: createMockBulkEmbeddings(['succeeded']),
+        bulkEmbeddings: createMockBulkEmbeddings({ statusSequence: ['succeeded'] }),
       },
     },
-    bulkQueueNames: {
-      prepareBulkEmbedQueueName: 'vectorize-bulk-prepare',
-      pollOrCompleteQueueName: 'vectorize-bulk-poll',
-    },
+    bulkQueueNames: BULK_QUEUE_NAMES,
   }
 
   beforeAll(async () => {
     await createTestDb({ dbName })
+  })
+
+  const buildPayload = async (pluginOpts = basePluginOptions) => {
     config = await buildConfig({
       secret: 'test-secret',
       editor: lexicalEditor(),
@@ -111,368 +125,397 @@ describe('Bulk embed ingest mode', () => {
           connectionString: `postgresql://postgres:password@localhost:5433/${dbName}`,
         },
       }),
-      plugins: [integration.payloadcmsVectorize(pluginOptions)],
-      jobs: { tasks: [] },
+      plugins: [integration.payloadcmsVectorize(pluginOpts)],
+      jobs: {
+        tasks: [],
+        autoRun: [
+          {
+            cron: '*/2 * * * * *', // run prepare queue every 2s
+            limit: 10,
+            queue: pluginOpts.bulkQueueNames?.prepareBulkEmbedQueueName,
+          },
+          {
+            cron: '*/2 * * * * *', // run poll queue every 2s
+            limit: 10,
+            queue: pluginOpts.bulkQueueNames?.pollOrCompleteQueueName,
+          },
+        ],
+      },
     })
 
     payload = await getPayload({ config })
-  })
+    return payload
+  }
 
-  beforeEach(async () => {
-    // Clean runs and embeddings between tests to avoid cross-test leakage
-    await payload.delete({
+  const clearAll = async (pl: Payload) => {
+    await (pl as any).delete({
       collection: BULK_EMBEDDINGS_RUNS_SLUG,
       where: { id: { exists: true } },
     })
-    await payload.delete({
+    await (pl as any).delete({
       collection: 'default',
       where: { id: { exists: true } },
     })
-    vi.restoreAllMocks()
-  })
-
-  test('bulk ingest mode queues no realtime embeddings and bulk job backfills missing docs', async () => {
-    const queueSpy = vi.spyOn(payload.jobs, 'queue')
-
-    const post = await payload.create({
+    await (pl as any).delete({
       collection: 'posts',
-      data: { title: 'Bulk Mode Title' } as any,
+      where: { id: { exists: true } },
     })
-
-    const initialEmbeds = await payload.find({
-      collection: 'default',
-      where: {
-        and: [{ sourceCollection: { equals: 'posts' } }, { docId: { equals: String(post.id) } }],
-      },
+    await (pl as any).delete({
+      collection: 'payload-jobs',
+      where: { id: { exists: true } },
     })
-    expect(initialEmbeds.totalDocs).toBe(0)
+  }
 
-    // Bulk mode should queue prepare task automatically
-    expect(queueSpy).toHaveBeenCalledWith(
-      expect.objectContaining({
-        task: 'payloadcms-vectorize:prepare-bulk-embedding',
-      }),
-    )
+  beforeEach(async () => {
+    await buildPayload()
+  })
 
-    // Get the latest queued run created by the hook
-    const { docs: latestRuns } = await payload.find({
-      collection: BULK_EMBEDDINGS_RUNS_SLUG,
-      where: { pool: { equals: 'default' } },
-      sort: '-createdAt',
-      limit: 1,
-    })
-    const run = latestRuns[0]
-    expect(run).toBeDefined()
+  afterEach(async () => {
+    await clearAll(payload)
+    vi.restoreAllMocks()
+  })
 
-    // Seed a stale embedding that should be deleted during poll/complete
-    await payload.create({
-      collection: 'default',
+  async function createSucceededBaseline({
+    version = testEmbeddingVersion,
+    completedAt = new Date().toISOString(),
+  } = {}) {
+    return (payload as any).create({
+      collection: BULK_EMBEDDINGS_RUNS_SLUG,
       data: {
-        sourceCollection: 'posts',
-        docId: String(post.id),
-        chunkIndex: 0,
-        chunkText: 'stale chunk',
-        embeddingVersion: 'old-version',
+        pool: 'default',
+        embeddingVersion: version,
+        status: 'succeeded',
+        completedAt,
       },
     })
+  }
 
-    // Run prepare task
-    const prepareTask = createPrepareBulkEmbeddingTask({
-      knowledgePools: pluginOptions.knowledgePools,
-      pollOrCompleteQueueName: pluginOptions.bulkQueueNames?.pollOrCompleteQueueName,
-    })
-    await prepareTask.handler({
-      input: { runId: String(run.id) },
-      req: { payload } as any,
-    })
-
-    // Prepare should have queued the poll task on the poll queue
-    expect(queueSpy).toHaveBeenCalledWith(
-      expect.objectContaining({
-        task: 'payloadcms-vectorize:poll-or-complete-bulk-embedding',
-      }),
-    )
-
-    // Run poll/complete task
-    const pollTask = createPollOrCompleteBulkEmbeddingTask({
-      knowledgePools: pluginOptions.knowledgePools,
-      pollOrCompleteQueueName: pluginOptions.bulkQueueNames?.pollOrCompleteQueueName,
-    })
-    await pollTask.handler({
-      input: { runId: String(run.id) },
-      req: { payload } as any,
-    })
+  test('fresh project, no docs → counts 0 baseline', async () => {
+    // onInit queues first run automatically; no docs should yield zero counts
+    await waitForBulkJobs(payload)
+    const runDoc = (
+      await (payload as any).find({
+        collection: BULK_EMBEDDINGS_RUNS_SLUG,
+        where: { pool: { equals: 'default' } },
+        sort: '-createdAt',
+        limit: 1,
+      })
+    ).docs[0]
+    expect(runDoc.inputs).toBe(0)
+    expect(runDoc.succeeded).toBe(0)
+  })
 
-    const embeds = await payload.find({
-      collection: 'default',
-      where: {
-        and: [{ sourceCollection: { equals: 'posts' } }, { docId: { equals: String(post.id) } }],
+  test('enabling bulk later queues first run automatically', async () => {
+    // Start without bulkEmbeddings configured
+    const noBulkOptions = {
+      ...basePluginOptions,
+      knowledgePools: {
+        default: {
+          ...basePluginOptions.knowledgePools.default,
+          bulkEmbeddings: undefined,
+        },
       },
-    })
-    expect(embeds.totalDocs).toBe(1) // stale embedding should have been deleted and replaced
-    expect(embeds.docs[0]?.chunkText).toContain('Bulk Mode Title')
-
-    const runDoc = await payload.findByID({
+    }
+    const noBulkPayload = await buildPayload(noBulkOptions as any)
+    await noBulkPayload.create({ collection: 'posts', data: { title: 'NoBulk' } as any })
+    // No bulk runs should exist
+    const none = await (noBulkPayload as any).find({
       collection: BULK_EMBEDDINGS_RUNS_SLUG,
-      id: run.id,
+      where: { pool: { equals: 'default' } },
     })
+    expect(none.totalDocs).toBe(0)
+
+    // Rebuild with bulk enabled; onInit should queue the first run
+    const bulkPayload = await buildPayload(basePluginOptions)
+    await bulkPayload.create({ collection: 'posts', data: { title: 'WithBulk' } as any })
+    await waitForBulkJobs(bulkPayload)
+    const runDoc = (
+      await (bulkPayload as any).find({
+        collection: BULK_EMBEDDINGS_RUNS_SLUG,
+        where: { pool: { equals: 'default' } },
+        sort: '-createdAt',
+        limit: 1,
+      })
+    ).docs[0]
+    expect(runDoc).toBeDefined()
     expect(runDoc.status).toBe('succeeded')
-    expect(runDoc.inputs).toBeGreaterThan(0)
   })
 
-  test('bulk ingest mode clears stale embeddings on document updates and rerun populates new chunks', async () => {
-    const queueSpy = vi.spyOn(payload.jobs, 'queue')
-
-    const post = await payload.create({
-      collection: 'posts',
-      data: { title: 'Original' } as any,
+  test('fresh project, docs exist → embeds all and establishes baseline', async () => {
+    const post = await payload.create({ collection: 'posts', data: { title: 'First' } as any })
+    await waitForBulkJobs(payload)
+    const embeds = await payload.find({
+      collection: 'default',
+      where: { docId: { equals: String(post.id) } },
     })
-
-    // First run to embed (auto-queued on create)
-    const firstRun = (
-      await payload.find({
+    expect(embeds.totalDocs).toBe(1)
+    const runDoc = (
+      await (payload as any).find({
         collection: BULK_EMBEDDINGS_RUNS_SLUG,
         where: { pool: { equals: 'default' } },
         sort: '-createdAt',
         limit: 1,
       })
     ).docs[0]
-    expect(firstRun).toBeDefined()
-
-    const prepareTask = createPrepareBulkEmbeddingTask({
-      knowledgePools: pluginOptions.knowledgePools,
-      pollOrCompleteQueueName: pluginOptions.bulkQueueNames?.pollOrCompleteQueueName,
-    })
-    const pollTask = createPollOrCompleteBulkEmbeddingTask({
-      knowledgePools: pluginOptions.knowledgePools,
-      pollOrCompleteQueueName: pluginOptions.bulkQueueNames?.pollOrCompleteQueueName,
-    })
+    expect(runDoc.status).toBe('succeeded')
+  })
 
-    await prepareTask.handler({ input: { runId: String(firstRun.id) }, req: { payload } as any })
-    await pollTask.handler({ input: { runId: String(firstRun.id) }, req: { payload } as any })
+  test('version bump re-embeds all even without updates', async () => {
+    await clearAll(payload)
+    const baselinePayload = await buildPayload()
+    await baselinePayload.create({ collection: 'posts', data: { title: 'Old' } as any })
+    await waitForBulkJobs(baselinePayload) // initial baseline run
+    await createSucceededBaseline({ version: 'old-version', completedAt: new Date().toISOString() })
 
-    // Update document - embeddings should remain until poll/completion of the next run
-    await payload.update({
+    const bumpedOptions = {
+      ...basePluginOptions,
+      knowledgePools: {
+        default: {
+          ...basePluginOptions.knowledgePools.default,
+          embeddingVersion: 'new-version',
+          bulkEmbeddings: createMockBulkEmbeddings({ statusSequence: ['succeeded'] }),
+        },
+      },
+    }
+    // rebuild payload with bumped options so onInit queues a version-mismatch run
+    const bumpedPayload = await buildPayload(bumpedOptions)
+    const postAfter = await bumpedPayload.create({
       collection: 'posts',
-      id: post.id,
-      data: { title: 'Updated Title' } as any,
+      data: { title: 'Old' } as any,
     })
+    await waitForBulkJobs(bumpedPayload)
 
-    const afterUpdateEmbeds = await payload.find({
+    const embeds = await bumpedPayload.find({
       collection: 'default',
-      where: {
-        and: [{ sourceCollection: { equals: 'posts' } }, { docId: { equals: String(post.id) } }],
-      },
+      where: { docId: { equals: String(postAfter.id) } },
     })
-    expect(afterUpdateEmbeds.totalDocs).toBeGreaterThan(0) // no upfront delete; still present until bulk completion
-
-    // Next run should have been queued by the update hook
-    expect(queueSpy).toHaveBeenCalledWith(
-      expect.objectContaining({ task: 'payloadcms-vectorize:prepare-bulk-embedding' }),
-    )
-    const secondRun = (
-      await payload.find({
+    expect(embeds.totalDocs).toBeGreaterThan(0)
+    const runDoc = (
+      await (bumpedPayload as any).find({
         collection: BULK_EMBEDDINGS_RUNS_SLUG,
         where: { pool: { equals: 'default' } },
         sort: '-createdAt',
         limit: 1,
       })
     ).docs[0]
-    expect(secondRun).toBeDefined()
+    expect(runDoc.inputs).toBeGreaterThan(0)
+  })
 
-    await prepareTask.handler({ input: { runId: String(secondRun.id) }, req: { payload } as any })
-    await pollTask.handler({ input: { runId: String(secondRun.id) }, req: { payload } as any })
+  test('no version bump and no updates → zero eligible and succeed', async () => {
+    const post = await payload.create({ collection: 'posts', data: { title: 'Stable' } as any })
+    await waitForBulkJobs(payload)
+    await createSucceededBaseline()
 
-    const embedsAfterRerun = await payload.find({
+    await waitForBulkJobs(payload)
+    const runDoc = (
+      await (payload as any).find({
+        collection: BULK_EMBEDDINGS_RUNS_SLUG,
+        where: { pool: { equals: 'default' } },
+        sort: '-createdAt',
+        limit: 1,
+      })
+    ).docs[0]
+    expect(runDoc.inputs).toBe(0)
+    const embeds = await payload.find({
       collection: 'default',
-      where: {
-        and: [{ sourceCollection: { equals: 'posts' } }, { docId: { equals: String(post.id) } }],
-      },
+      where: { docId: { equals: String(post.id) } },
     })
-    expect(embedsAfterRerun.totalDocs).toBeGreaterThan(0)
-    expect(embedsAfterRerun.docs[0]?.chunkText).toContain('Updated Title')
+    expect(embeds.totalDocs).toBeGreaterThan(0)
   })
 
-  test('realtime ingest mode queues vectorize jobs on document creation', async () => {
-    const realtimePluginOptions = {
-      knowledgePools: {
-        default: {
-          collections: {
-            posts: {
-              toKnowledgePool: async (doc: any) => [{ chunk: doc.title }],
-            },
-          },
-          embedDocs: makeDummyEmbedDocs(DIMS),
-          embedQuery: makeDummyEmbedQuery(DIMS),
-          embeddingVersion: testEmbeddingVersion,
-          bulkEmbeddings: {
-            ingestMode: 'realtime',
-            prepareBulkEmbeddings: async () => ({
-              providerBatchId: 'noop',
-              status: 'succeeded',
-              counts: { inputs: 0, succeeded: 0, failed: 0 },
-            }),
-            pollBulkEmbeddings: async () => ({ status: 'succeeded' }),
-            completeBulkEmbeddings: async () => ({
-              status: 'succeeded',
-              outputs: [],
-              counts: { inputs: 0 },
-            }),
-          },
-        },
-      },
-      bulkQueueNames: {
-        prepareBulkEmbedQueueName: 'vectorize-bulk-prepare',
-        pollOrCompleteQueueName: 'vectorize-bulk-poll',
-      },
-    }
+  test('updatedAt gating: updated after last bulk is eligible; before is skipped', async () => {
+    await clearAll(payload)
+    const gatingPayload = await buildPayload()
+    const oldPost = await gatingPayload.create({
+      collection: 'posts',
+      data: { title: 'Old' } as any,
+    })
+    await waitForBulkJobs(gatingPayload) // baseline run
+    const baselineTime = new Date()
+    await createSucceededBaseline({ completedAt: baselineTime.toISOString() })
 
-    const realtimeConfig = await buildConfig({
-      secret: 'test-secret',
-      editor: lexicalEditor(),
-      collections: [
-        {
-          slug: 'posts',
-          fields: [{ name: 'title', type: 'text' }],
-        },
-      ],
-      db: postgresAdapter({
-        extensions: ['vector'],
-        afterSchemaInit: [integration.afterSchemaInitHook],
-        pool: {
-          connectionString: `postgresql://postgres:password@localhost:5433/${dbName}`,
-        },
-      }),
-      plugins: [integration.payloadcmsVectorize(realtimePluginOptions)],
-      jobs: { tasks: [] },
+    const newPost = await gatingPayload.create({
+      collection: 'posts',
+      data: { title: 'New' } as any,
+    })
+    await gatingPayload.update({
+      collection: 'posts',
+      id: newPost.id,
+      data: { title: 'New Updated' } as any,
     })
 
-    const realtimePayload = await getPayload({ config: realtimeConfig })
+    await waitForBulkJobs(gatingPayload)
 
-    // Create a document - should trigger realtime vectorization
-    const post = await realtimePayload.create({
-      collection: 'posts',
-      data: { title: 'Realtime Test' } as any,
+    const embedsOld = await gatingPayload.find({
+      collection: 'default',
+      where: { docId: { equals: String(oldPost.id) } },
     })
+    const embedsNew = await gatingPayload.find({
+      collection: 'default',
+      where: { docId: { equals: String(newPost.id) } },
+    })
+    expect(embedsOld.totalDocs).toBe(1)
+    expect(embedsNew.totalDocs).toBe(1)
+  })
 
-    // Manually run vectorize task since jobs queue is not processed in tests
-    const vectorizeTask = createVectorizeTask({
-      knowledgePools: realtimePluginOptions.knowledgePools,
-    })
-    await vectorizeTask.handler({
-      input: {
-        doc: post,
-        collection: 'posts',
-        knowledgePool: 'default',
-      } as any,
-      req: { payload: realtimePayload } as any,
-      inlineTask: vi.fn(),
-      tasks: {} as any,
-      job: {} as any,
+  test('missing embedding for current version is eligible even if not updated', async () => {
+    const post = await payload.create({ collection: 'posts', data: { title: 'Missing' } as any })
+    await createSucceededBaseline()
+    await payload.delete({
+      collection: 'default',
+      where: { docId: { equals: String(post.id) } },
+    })
+    await waitForBulkJobs(payload)
+    const embeds = await payload.find({
+      collection: 'default',
+      where: { docId: { equals: String(post.id) } },
     })
+    expect(embeds.totalDocs).toBe(1)
+  })
 
-    const embeds = await realtimePayload.find({
+  test('stale replacement happens at completion, not prepare', async () => {
+    const post = await payload.create({ collection: 'posts', data: { title: 'Stale' } as any })
+    await waitForBulkJobs(payload)
+    await payload.update({
+      collection: 'posts',
+      id: post.id,
+      data: { title: 'Fresh Title' } as any,
+    })
+    const embedsBefore = await payload.find({
       collection: 'default',
-      where: {
-        and: [{ sourceCollection: { equals: 'posts' } }, { docId: { equals: String(post.id) } }],
-      },
+      where: { docId: { equals: String(post.id) } },
     })
-    expect(embeds.totalDocs).toBeGreaterThan(0)
-    expect(embeds.docs[0]?.chunkText).toBe('Realtime Test')
+    expect(embedsBefore.totalDocs).toBeGreaterThan(0)
+    await waitForBulkJobs(payload)
+    const embedsAfter = await payload.find({
+      collection: 'default',
+      where: { docId: { equals: String(post.id) } },
+    })
+    expect(embedsAfter.docs[0]?.chunkText).toContain('Fresh Title')
   })
 
-  test('bulk polling handles failed status correctly', async () => {
-    const failedBulkOptions = {
+  test('partial outputs only delete/replace succeeded doc IDs', async () => {
+    await clearAll(payload)
+    const partialPayload = await buildPayload({
+      ...basePluginOptions,
       knowledgePools: {
         default: {
-          collections: {
-            posts: {
-              toKnowledgePool: async (doc: any) => [{ chunk: doc.title }],
-            },
-          },
-          embedDocs: makeDummyEmbedDocs(DIMS),
-          embedQuery: makeDummyEmbedQuery(DIMS),
-          embeddingVersion: testEmbeddingVersion,
-          bulkEmbeddings: createMockBulkEmbeddings(['failed']),
+          ...basePluginOptions.knowledgePools.default,
+          bulkEmbeddings: createMockBulkEmbeddings({
+            statusSequence: ['succeeded'],
+            partialFailure: { failIds: [] },
+          }),
         },
       },
-      bulkQueueNames: {
-        prepareBulkEmbedQueueName: 'vectorize-bulk-prepare',
-        pollOrCompleteQueueName: 'vectorize-bulk-poll',
-      },
-    }
-
-    const failedConfig = await buildConfig({
-      secret: 'test-secret',
-      editor: lexicalEditor(),
-      collections: [
-        {
-          slug: 'posts',
-          fields: [{ name: 'title', type: 'text' }],
-        },
-      ],
-      db: postgresAdapter({
-        extensions: ['vector'],
-        afterSchemaInit: [integration.afterSchemaInitHook],
-        pool: {
-          connectionString: `postgresql://postgres:password@localhost:5433/${dbName}`,
-        },
-      }),
-      plugins: [integration.payloadcmsVectorize(failedBulkOptions)],
-      jobs: { tasks: [] },
     })
-
-    const failedPayload = await getPayload({ config: failedConfig })
-
-    const post = await failedPayload.create({
-      collection: 'posts',
-      data: { title: 'Failed Test' } as any,
-    })
-
-    const run = await failedPayload.create({
-      collection: BULK_EMBEDDINGS_RUNS_SLUG,
-      data: {
-        pool: 'default',
-        embeddingVersion: testEmbeddingVersion,
-        status: 'queued',
+    const post1 = await partialPayload.create({ collection: 'posts', data: { title: 'P1' } as any })
+    const post2 = await partialPayload.create({ collection: 'posts', data: { title: 'P2' } as any })
+    // Rebuild with failIds after IDs are known
+    const partialPayloadWithFails = await buildPayload({
+      ...basePluginOptions,
+      knowledgePools: {
+        default: {
+          ...basePluginOptions.knowledgePools.default,
+          bulkEmbeddings: createMockBulkEmbeddings({
+            statusSequence: ['succeeded'],
+            partialFailure: { failIds: [`posts:${post2.id}:0`] },
+          }),
+        },
       },
     })
+    await waitForBulkJobs(partialPayloadWithFails)
 
-    const prepareTask = createPrepareBulkEmbeddingTask({
-      knowledgePools: failedBulkOptions.knowledgePools,
-      pollOrCompleteQueueName: failedBulkOptions.bulkQueueNames?.pollOrCompleteQueueName,
+    const embedsP1 = await partialPayloadWithFails.find({
+      collection: 'default',
+      where: { docId: { equals: String(post1.id) } },
     })
-    const pollTask = createPollOrCompleteBulkEmbeddingTask({
-      knowledgePools: failedBulkOptions.knowledgePools,
-      pollOrCompleteQueueName: failedBulkOptions.bulkQueueNames?.pollOrCompleteQueueName,
+    const embedsP2 = await partialPayloadWithFails.find({
+      collection: 'default',
+      where: { docId: { equals: String(post2.id) } },
     })
+    expect(embedsP1.totalDocs).toBe(1)
+    expect(embedsP2.totalDocs).toBe(0)
+  })
 
-    await prepareTask.handler({
-      input: { runId: String(run.id) },
-      req: { payload: failedPayload } as any,
+  test('polling requeues when non-terminal then succeeds', async () => {
+    await clearAll(payload)
+    const loopPayload = await buildPayload({
+      ...basePluginOptions,
+      knowledgePools: {
+        default: {
+          ...basePluginOptions.knowledgePools.default,
+          bulkEmbeddings: createMockBulkEmbeddings({ statusSequence: ['running', 'succeeded'] }),
+        },
+      },
     })
-    await pollTask.handler({
-      input: { runId: String(run.id) },
-      req: { payload: failedPayload } as any,
+    const queueSpy = vi.spyOn(loopPayload.jobs, 'queue')
+    const opts = {
+      ...basePluginOptions,
+      knowledgePools: {
+        default: {
+          ...basePluginOptions.knowledgePools.default,
+          bulkEmbeddings: createMockBulkEmbeddings({ statusSequence: ['running', 'succeeded'] }),
+        },
+      },
+    }
+    const post = await loopPayload.create({ collection: 'posts', data: { title: 'Loop' } as any })
+    await waitForBulkJobs(loopPayload)
+    expect(queueSpy).toHaveBeenCalledWith(
+      expect.objectContaining({ task: 'payloadcms-vectorize:poll-or-complete-bulk-embedding' }),
+    )
+    await waitForBulkJobs(loopPayload)
+    const embeds = await loopPayload.find({
+      collection: 'default',
+      where: { docId: { equals: String(post.id) } },
     })
+    expect(embeds.totalDocs).toBe(1)
+  })
 
-    const runDoc = await failedPayload.findByID({
-      collection: BULK_EMBEDDINGS_RUNS_SLUG,
-      id: run.id,
+  test('failed polling stops and does not complete embeddings', async () => {
+    await clearAll(payload)
+    const failedPayload = await buildPayload({
+      ...basePluginOptions,
+      knowledgePools: {
+        default: {
+          ...basePluginOptions.knowledgePools.default,
+          bulkEmbeddings: createMockBulkEmbeddings({ statusSequence: ['failed'] }),
+        },
+      },
     })
-    expect(runDoc.status).toBe('failed')
-    // Should not call completeBulkEmbeddings, so no embeddings created
+    const post = await failedPayload.create({ collection: 'posts', data: { title: 'Fail' } as any })
+    await waitForBulkJobs(failedPayload)
     const embeds = await failedPayload.find({
       collection: 'default',
-      where: {
-        and: [{ sourceCollection: { equals: 'posts' } }, { docId: { equals: String(post.id) } }],
+      where: { docId: { equals: String(post.id) } },
+    })
+    expect(embeds.totalDocs).toBe(0)
+  })
+
+  test('canceled polling stops', async () => {
+    await clearAll(payload)
+    const canceledPayload = await buildPayload({
+      ...basePluginOptions,
+      knowledgePools: {
+        default: {
+          ...basePluginOptions.knowledgePools.default,
+          bulkEmbeddings: createMockBulkEmbeddings({ statusSequence: ['canceled'] }),
+        },
       },
     })
+    const post = await canceledPayload.create({
+      collection: 'posts',
+      data: { title: 'Cancel' } as any,
+    })
+    await waitForBulkJobs(canceledPayload)
+    const embeds = await canceledPayload.find({
+      collection: 'default',
+      where: { docId: { equals: String(post.id) } },
+    })
     expect(embeds.totalDocs).toBe(0)
   })
 
-  test('bulk polling handles canceled status correctly', async () => {
-    const canceledBulkOptions = {
+  test('realtime ingest mode still queues vectorize jobs', async () => {
+    const realtimeOptions = {
       knowledgePools: {
         default: {
           collections: {
@@ -483,124 +526,45 @@ describe('Bulk embed ingest mode', () => {
           embedDocs: makeDummyEmbedDocs(DIMS),
           embedQuery: makeDummyEmbedQuery(DIMS),
           embeddingVersion: testEmbeddingVersion,
-          bulkEmbeddings: createMockBulkEmbeddings(['canceled']),
+          bulkEmbeddings: {
+            ingestMode: 'realtime' as const,
+            prepareBulkEmbeddings: async () => ({
+              providerBatchId: 'noop',
+              status: 'succeeded' as const,
+              counts: { inputs: 0, succeeded: 0, failed: 0 },
+            }),
+            pollBulkEmbeddings: async () => ({ status: 'succeeded' }),
+            completeBulkEmbeddings: async () => ({
+              status: 'succeeded' as const,
+              outputs: [],
+              counts: { inputs: 0 },
+            }),
+          },
         },
       },
-      bulkQueueNames: {
-        prepareBulkEmbedQueueName: 'vectorize-bulk-prepare',
-        pollOrCompleteQueueName: 'vectorize-bulk-poll',
-      },
+      bulkQueueNames: BULK_QUEUE_NAMES,
     }
 
-    const canceledConfig = await buildConfig({
-      secret: 'test-secret',
-      editor: lexicalEditor(),
-      collections: [
-        {
-          slug: 'posts',
-          fields: [{ name: 'title', type: 'text' }],
-        },
-      ],
-      db: postgresAdapter({
-        extensions: ['vector'],
-        afterSchemaInit: [integration.afterSchemaInitHook],
-        pool: {
-          connectionString: `postgresql://postgres:password@localhost:5433/${dbName}`,
-        },
-      }),
-      plugins: [integration.payloadcmsVectorize(canceledBulkOptions)],
-      jobs: { tasks: [] },
-    })
-
-    const canceledPayload = await getPayload({ config: canceledConfig })
-
-    const run = await canceledPayload.create({
-      collection: BULK_EMBEDDINGS_RUNS_SLUG,
-      data: {
-        pool: 'default',
-        embeddingVersion: testEmbeddingVersion,
-        status: 'queued',
-      },
-    })
-
-    const prepareTask = createPrepareBulkEmbeddingTask({
-      knowledgePools: canceledBulkOptions.knowledgePools,
-      pollOrCompleteQueueName: canceledBulkOptions.bulkQueueNames?.pollOrCompleteQueueName,
-    })
-    const pollTask = createPollOrCompleteBulkEmbeddingTask({
-      knowledgePools: canceledBulkOptions.knowledgePools,
-      pollOrCompleteQueueName: canceledBulkOptions.bulkQueueNames?.pollOrCompleteQueueName,
-    })
-
-    await prepareTask.handler({
-      input: { runId: String(run.id) },
-      req: { payload: canceledPayload } as any,
-    })
-    await pollTask.handler({
-      input: { runId: String(run.id) },
-      req: { payload: canceledPayload } as any,
-    })
-
-    const runDoc = await canceledPayload.findByID({
-      collection: BULK_EMBEDDINGS_RUNS_SLUG,
-      id: run.id,
-    })
-    expect(runDoc.status).toBe('canceled')
-  })
-
-  test('bulk fan-in: multiple documents created before bulk task runs are all processed in single run', async () => {
-    // Create multiple documents
-    const post1 = await payload.create({
-      collection: 'posts',
-      data: { title: 'Post 1' } as any,
-    })
-    const post2 = await payload.create({
+    const realtimePayload = await buildPayload(realtimeOptions as any)
+    const post = await realtimePayload.create({
       collection: 'posts',
-      data: { title: 'Post 2' } as any,
-    })
-
-    // Verify no embeddings initially
-    const initialEmbeds = await payload.find({
-      collection: 'default',
-      where: { sourceCollection: { equals: 'posts' } },
-    })
-    expect(initialEmbeds.totalDocs).toBe(0)
-
-    // Create single bulk run
-    const run = await payload.create({
-      collection: BULK_EMBEDDINGS_RUNS_SLUG,
-      data: {
-        pool: 'default',
-        embeddingVersion: testEmbeddingVersion,
-        status: 'queued',
-      },
+      data: { title: 'Realtime Test' } as any,
     })
-
-    // Run bulk tasks
-    const prepareTask = createPrepareBulkEmbeddingTask({
-      knowledgePools: pluginOptions.knowledgePools,
-      pollOrCompleteQueueName: pluginOptions.bulkQueueNames?.pollOrCompleteQueueName,
+    const vectorizeTask = createVectorizeTask({
+      knowledgePools: realtimeOptions.knowledgePools as any,
     })
-    const pollTask = createPollOrCompleteBulkEmbeddingTask({
-      knowledgePools: pluginOptions.knowledgePools,
-      pollOrCompleteQueueName: pluginOptions.bulkQueueNames?.pollOrCompleteQueueName,
+    const vectorizeHandler = vectorizeTask.handler as any
+    await vectorizeHandler({
+      input: { doc: post, collection: 'posts', knowledgePool: 'default' } as any,
+      req: { payload: realtimePayload } as any,
+      inlineTask: vi.fn(),
+      tasks: {} as any,
+      job: {} as any,
     })
-
-    await prepareTask.handler({ input: { runId: String(run.id) }, req: { payload } as any })
-    await pollTask.handler({ input: { runId: String(run.id) }, req: { payload } as any })
-
-    // Verify all documents got embeddings
-    const finalEmbeds = await payload.find({
+    const embeds = await realtimePayload.find({
       collection: 'default',
-      where: { sourceCollection: { equals: 'posts' } },
+      where: { docId: { equals: String(post.id) } },
     })
-    expect(finalEmbeds.totalDocs).toBe(2)
-
-    const runDoc = await payload.findByID({
-      collection: BULK_EMBEDDINGS_RUNS_SLUG,
-      id: run.id,
-    })
-    expect(runDoc.status).toBe('succeeded')
-    expect(runDoc.inputs).toBe(2)
+    expect(embeds.totalDocs).toBeGreaterThan(0)
   })
 })
diff --git a/dev/specs/utils.ts b/dev/specs/utils.ts
index 93bf25b..be19f49 100644
--- a/dev/specs/utils.ts
+++ b/dev/specs/utils.ts
@@ -14,37 +14,38 @@ export const createTestDb = async ({ dbName }: { dbName: string }) => {
   await client.end()
 }
 
-// Helper function to wait for vectorization jobs to complete
-export async function waitForVectorizationJobs(payload: Payload, maxWaitMs = 10000) {
+async function waitForTasks(
+  payload: Payload,
+  taskSlugs: string[],
+  maxWaitMs = 10000,
+  intervalMs = 250,
+) {
   const startTime = Date.now()
   while (Date.now() - startTime < maxWaitMs) {
-    const jobs = await payload.find({
+    const pending = await payload.find({
       collection: 'payload-jobs',
       where: {
-        and: [
-          { taskSlug: { equals: 'payloadcms-vectorize:vectorize' } },
-          { processing: { equals: true } },
-        ],
+        and: [{ taskSlug: { in: taskSlugs } }, { completedAt: { exists: false } }],
       },
     })
-    if (jobs.totalDocs === 0) {
-      // No running vectorization jobs, check if any are pending
-      const pendingJobs = await payload.find({
-        collection: 'payload-jobs',
-        where: {
-          and: [
-            { taskSlug: { equals: 'payloadcms-vectorize:vectorize' } },
-            { processing: { equals: false } },
-            { completedAt: { equals: null } },
-          ],
-        },
-      })
-      if (pendingJobs.totalDocs === 0) {
-        return // All jobs completed
-      }
-    }
-    await new Promise((resolve) => setTimeout(resolve, 500)) // Check every 500ms
+    if (pending.totalDocs === 0) return
+    await new Promise((resolve) => setTimeout(resolve, intervalMs))
   }
-  // Fallback: wait a bit more if we hit the timeout
-  await new Promise((resolve) => setTimeout(resolve, 2000))
+  // One last grace wait
+  await new Promise((resolve) => setTimeout(resolve, 500))
+}
+
+export async function waitForVectorizationJobs(payload: Payload, maxWaitMs = 10000) {
+  await waitForTasks(payload, ['payloadcms-vectorize:vectorize'], maxWaitMs)
+}
+
+export async function waitForBulkJobs(payload: Payload, maxWaitMs = 10000) {
+  await waitForTasks(
+    payload,
+    [
+      'payloadcms-vectorize:prepare-bulk-embedding',
+      'payloadcms-vectorize:poll-or-complete-bulk-embedding',
+    ],
+    maxWaitMs,
+  )
 }
diff --git a/src/index.ts b/src/index.ts
index 49d503d..9e4d0e8 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -328,6 +328,40 @@ export const createVectorizeIntegration = <TPoolNames extends KnowledgePoolName>
             dims: staticConfig.dims,
             ivfflatLists: staticConfig.ivfflatLists,
           })
+
+          // If bulk ingest is configured for this pool, ensure a baseline run exists and is queued
+          const dynamicConfig = pluginOptions.knowledgePools?.[poolName]
+          if (
+            dynamicConfig?.bulkEmbeddings &&
+            dynamicConfig.bulkEmbeddings.ingestMode !== 'realtime'
+          ) {
+            const existingSucceeded = await payload.find({
+              collection: BULK_EMBEDDINGS_RUNS_SLUG,
+              where: {
+                and: [{ pool: { equals: poolName } }, { status: { equals: 'succeeded' } }],
+              },
+              limit: 1,
+              sort: '-completedAt',
+            })
+            if (!existingSucceeded.totalDocs) {
+              const run = await payload.create({
+                collection: BULK_EMBEDDINGS_RUNS_SLUG,
+                data: {
+                  pool: poolName,
+                  embeddingVersion: dynamicConfig.embeddingVersion,
+                  status: 'queued',
+                },
+              })
+              await payload.jobs.queue<'payloadcms-vectorize:prepare-bulk-embedding'>({
+                task: 'payloadcms-vectorize:prepare-bulk-embedding',
+                input: { runId: String(run.id) },
+                req: { payload } as any,
+                ...(pluginOptions.bulkQueueNames?.prepareBulkEmbedQueueName
+                  ? { queue: pluginOptions.bulkQueueNames.prepareBulkEmbedQueueName }
+                  : {}),
+              })
+            }
+          }
         }
       }
 
diff --git a/src/tasks/bulkEmbedAll.ts b/src/tasks/bulkEmbedAll.ts
index 4517ee0..fe20b4a 100644
--- a/src/tasks/bulkEmbedAll.ts
+++ b/src/tasks/bulkEmbedAll.ts
@@ -96,11 +96,34 @@ export const createPrepareBulkEmbeddingTask = ({
       const callbacks = dynamicConfig.bulkEmbeddings!
       const embeddingVersion = dynamicConfig.embeddingVersion
 
+      const latestSucceededRun = await payload.find({
+        collection: BULK_EMBEDDINGS_RUNS_SLUG,
+        where: {
+          and: [
+            { pool: { equals: poolName } },
+            { status: { equals: 'succeeded' } },
+            { completedAt: { exists: true } },
+          ],
+        },
+        limit: 1,
+        sort: '-completedAt',
+      })
+
+      const baselineRun = (latestSucceededRun as any)?.docs?.[0]
+      const baselineVersion: string | undefined = baselineRun?.embeddingVersion
+      const lastBulkCompletedAt: string | undefined = baselineRun?.completedAt
+      const currentEmbeddingVersion = embeddingVersion
+      const versionMismatch =
+        baselineVersion !== undefined && baselineVersion !== currentEmbeddingVersion
+
       const inputs = await collectMissingEmbeddings({
         payload,
         poolName,
         dynamicConfig,
-        embeddingVersion,
+        embeddingVersion: currentEmbeddingVersion,
+        lastBulkCompletedAt,
+        versionMismatch,
+        hasBaseline: Boolean(baselineRun),
       })
 
       const inputsCount = inputs.length
@@ -119,26 +142,6 @@ export const createPrepareBulkEmbeddingTask = ({
         return { output: { runId: input.runId, status: 'succeeded' } }
       }
 
-      // Clear existing embeddings for the docs in this batch before submitting
-      const docKeysToClear = new Set<string>()
-      for (const input of inputs) {
-        const meta = input.metadata
-        if (!meta) continue
-        docKeysToClear.add(`${meta.sourceCollection}:${meta.docId}`)
-      }
-      for (const key of docKeysToClear) {
-        const [sourceCollection, docId] = key.split(':')
-        await payload.delete({
-          collection: poolName,
-          where: {
-            and: [
-              { sourceCollection: { equals: sourceCollection } },
-              { docId: { equals: String(docId) } },
-            ],
-          },
-        })
-      }
-
       const prepare = (await callbacks.prepareBulkEmbeddings({
         payload,
         knowledgePool: poolName,
@@ -266,6 +269,8 @@ export const createPollOrCompleteBulkEmbeddingTask = ({
         poolName,
         dynamicConfig,
         embeddingVersion,
+        versionMismatch: true,
+        hasBaseline: true,
       })
       const inputsById = new Map(inputs.map((input) => [input.id, input]))
       const successfulOutputs = outputs.filter((o) => !o.error && o.embedding)
@@ -385,10 +390,24 @@ async function collectMissingEmbeddings(args: {
   poolName: KnowledgePoolName
   dynamicConfig: KnowledgePoolDynamicConfig
   embeddingVersion: string
+  lastBulkCompletedAt?: string
+  versionMismatch: boolean
+  hasBaseline: boolean
 }): Promise<BulkEmbeddingInput[]> {
-  const { payload, poolName, dynamicConfig, embeddingVersion } = args
+  const {
+    payload,
+    poolName,
+    dynamicConfig,
+    embeddingVersion,
+    lastBulkCompletedAt,
+    versionMismatch,
+    hasBaseline,
+  } = args
   const inputs: BulkEmbeddingInput[] = []
 
+  const includeAll = versionMismatch || !hasBaseline
+  const lastCompletedAtDate = lastBulkCompletedAt ? new Date(lastBulkCompletedAt) : undefined
+
   for (const collectionSlug of Object.keys(dynamicConfig.collections)) {
     const collectionConfig = dynamicConfig.collections[collectionSlug]
     if (!collectionConfig) continue
@@ -408,6 +427,22 @@ async function collectMissingEmbeddings(args: {
       const totalPages = (res as any)?.totalPages ?? page
 
       for (const doc of docs) {
+        const docUpdatedAt = doc?.updatedAt ? new Date(doc.updatedAt) : undefined
+        let shouldInclude = includeAll
+        if (!shouldInclude) {
+          const updatedAfter =
+            docUpdatedAt && lastCompletedAtDate ? docUpdatedAt > lastCompletedAtDate : false
+          const hasCurrentEmbedding = await docHasEmbeddingVersion({
+            payload,
+            poolName,
+            sourceCollection: collectionSlug,
+            docId: String(doc.id),
+            embeddingVersion,
+          })
+          shouldInclude = updatedAfter || !hasCurrentEmbedding
+        }
+        if (!shouldInclude) continue
+
         const chunkData = await toKnowledgePool(doc, payload)
         chunkData.forEach((chunkEntry, idx) => {
           if (!chunkEntry?.chunk) return
@@ -432,3 +467,25 @@ async function collectMissingEmbeddings(args: {
 
   return inputs
 }
+
+async function docHasEmbeddingVersion(args: {
+  payload: Payload
+  poolName: KnowledgePoolName
+  sourceCollection: string
+  docId: string
+  embeddingVersion: string
+}): Promise<boolean> {
+  const { payload, poolName, sourceCollection, docId, embeddingVersion } = args
+  const existing = await payload.find({
+    collection: poolName,
+    where: {
+      and: [
+        { sourceCollection: { equals: sourceCollection } },
+        { docId: { equals: String(docId) } },
+        { embeddingVersion: { equals: embeddingVersion } },
+      ],
+    },
+    limit: 1,
+  })
+  return (existing as any)?.totalDocs > 0
+}

From c3ea2ea6c8b51e5d59b5b549d158d1ad88469aee Mon Sep 17 00:00:00 2001
From: techiejd <62455039+techiejd@users.noreply.github.com>
Date: Sat, 20 Dec 2025 18:55:56 +0700
Subject: [PATCH 06/49] Moves metadata into its own table so it doesn't need to
 be stored in the files payload

---
 dev/specs/bulkEmbed.spec.ts                   | 129 ++++++++++++++++
 src/collections/bulkEmbeddingInputMetadata.ts |  75 +++++++++
 src/collections/bulkEmbeddingsRuns.ts         |   7 +-
 src/index.ts                                  |   9 ++
 src/tasks/bulkEmbedAll.ts                     | 143 ++++++++++++++----
 src/types.ts                                  |  19 ++-
 6 files changed, 340 insertions(+), 42 deletions(-)
 create mode 100644 src/collections/bulkEmbeddingInputMetadata.ts

diff --git a/dev/specs/bulkEmbed.spec.ts b/dev/specs/bulkEmbed.spec.ts
index 306d426..6f7a071 100644
--- a/dev/specs/bulkEmbed.spec.ts
+++ b/dev/specs/bulkEmbed.spec.ts
@@ -7,6 +7,7 @@ import { postgresAdapter } from '@payloadcms/db-postgres'
 import { lexicalEditor } from '@payloadcms/richtext-lexical'
 import { createVectorizeIntegration } from 'payloadcms-vectorize'
 import { BULK_EMBEDDINGS_RUNS_SLUG } from '../../src/collections/bulkEmbeddingsRuns.js'
+import { BULK_EMBEDDINGS_INPUT_METADATA_SLUG } from '../../src/collections/bulkEmbeddingInputMetadata.js'
 import { createTestDb, waitForBulkJobs } from './utils.js'
 import { makeDummyEmbedDocs, makeDummyEmbedQuery, testEmbeddingVersion } from 'helpers/embed.js'
 import type {
@@ -152,6 +153,10 @@ describe('Bulk embed ingest mode with version/time gating', () => {
       collection: BULK_EMBEDDINGS_RUNS_SLUG,
       where: { id: { exists: true } },
     })
+    await (pl as any).delete({
+      collection: BULK_EMBEDDINGS_INPUT_METADATA_SLUG,
+      where: { id: { exists: true } },
+    })
     await (pl as any).delete({
       collection: 'default',
       where: { id: { exists: true } },
@@ -514,6 +519,130 @@ describe('Bulk embed ingest mode with version/time gating', () => {
     expect(embeds.totalDocs).toBe(0)
   })
 
+  test('stores metadata records for inputs before provider submit', async () => {
+    await clearAll(payload)
+    const metaPayload = await buildPayload({
+      ...basePluginOptions,
+      knowledgePools: {
+        default: {
+          ...basePluginOptions.knowledgePools.default,
+          extensionFields: [{ name: 'category', type: 'text' }],
+          collections: {
+            posts: {
+              toKnowledgePool: async (doc: any) => [{ chunk: doc.title, category: 'tech' }],
+            },
+          },
+        },
+      },
+    })
+    const createSpy = vi.spyOn(metaPayload, 'create')
+    await metaPayload.create({ collection: 'posts', data: { title: 'Meta' } as any })
+    await waitForBulkJobs(metaPayload)
+    expect(
+      createSpy.mock.calls.some(
+        (call) =>
+          call[0]?.collection === BULK_EMBEDDINGS_INPUT_METADATA_SLUG && call[0]?.data?.inputId,
+      ),
+    ).toBe(true)
+    createSpy.mockRestore()
+  })
+
+  test('extension fields are merged when writing embeddings from metadata table', async () => {
+    await clearAll(payload)
+    const metaPayload = await buildPayload({
+      ...basePluginOptions,
+      knowledgePools: {
+        default: {
+          ...basePluginOptions.knowledgePools.default,
+          extensionFields: [
+            { name: 'category', type: 'text' },
+            { name: 'priority', type: 'number' },
+          ],
+          collections: {
+            posts: {
+              toKnowledgePool: async (doc: any) => [
+                { chunk: doc.title, category: 'tech', priority: 3 },
+              ],
+            },
+          },
+        },
+      },
+    })
+    const post = await metaPayload.create({
+      collection: 'posts',
+      data: { title: 'Ext merge' } as any,
+    })
+    await waitForBulkJobs(metaPayload)
+    const embeds = await metaPayload.find({
+      collection: 'default',
+      where: { docId: { equals: String(post.id) } },
+    })
+    expect(embeds.totalDocs).toBe(1)
+    expect(embeds.docs[0]).toHaveProperty('category', 'tech')
+    expect(embeds.docs[0]).toHaveProperty('priority', 3)
+  })
+
+  test('multiple chunks keep their respective extension fields', async () => {
+    await clearAll(payload)
+    const multiPayload = await buildPayload({
+      ...basePluginOptions,
+      knowledgePools: {
+        default: {
+          ...basePluginOptions.knowledgePools.default,
+          extensionFields: [
+            { name: 'category', type: 'text' },
+            { name: 'priority', type: 'number' },
+          ],
+          collections: {
+            posts: {
+              toKnowledgePool: async () => [
+                { chunk: 'Chunk 1', category: 'a', priority: 1 },
+                { chunk: 'Chunk 2', category: 'b', priority: 2 },
+              ],
+            },
+          },
+        },
+      },
+    })
+    const post = await multiPayload.create({
+      collection: 'posts',
+      data: { title: 'Two' } as any,
+    })
+    await waitForBulkJobs(multiPayload)
+    const embeds = await multiPayload.find({
+      collection: 'default',
+      where: { docId: { equals: String(post.id) } },
+      sort: 'chunkIndex',
+    })
+    expect(embeds.totalDocs).toBe(2)
+    expect(embeds.docs[0]).toMatchObject({ category: 'a', priority: 1, chunkIndex: 0 })
+    expect(embeds.docs[1]).toMatchObject({ category: 'b', priority: 2, chunkIndex: 1 })
+  })
+
+  test('metadata table is cleaned after successful completion', async () => {
+    await clearAll(payload)
+    const cleanPayload = await buildPayload({
+      ...basePluginOptions,
+      knowledgePools: {
+        default: {
+          ...basePluginOptions.knowledgePools.default,
+          collections: {
+            posts: {
+              toKnowledgePool: async (doc: any) => [{ chunk: doc.title }],
+            },
+          },
+        },
+      },
+    })
+    await cleanPayload.create({ collection: 'posts', data: { title: 'Cleanup' } as any })
+    await waitForBulkJobs(cleanPayload)
+    const metadata = await cleanPayload.find({
+      collection: BULK_EMBEDDINGS_INPUT_METADATA_SLUG,
+      where: { run: { exists: true } },
+    })
+    expect(metadata.totalDocs).toBe(0)
+  })
+
   test('realtime ingest mode still queues vectorize jobs', async () => {
     const realtimeOptions = {
       knowledgePools: {
diff --git a/src/collections/bulkEmbeddingInputMetadata.ts b/src/collections/bulkEmbeddingInputMetadata.ts
new file mode 100644
index 0000000..22263fe
--- /dev/null
+++ b/src/collections/bulkEmbeddingInputMetadata.ts
@@ -0,0 +1,75 @@
+import type { CollectionConfig } from 'payload'
+import { BULK_EMBEDDINGS_RUNS_SLUG } from './bulkEmbeddingsRuns.js'
+
+export const BULK_EMBEDDINGS_INPUT_METADATA_SLUG = 'vector-bulk-embedding-input-metadata'
+
+export const createBulkEmbeddingInputMetadataCollection = (): CollectionConfig => ({
+  slug: BULK_EMBEDDINGS_INPUT_METADATA_SLUG,
+  admin: {
+    useAsTitle: 'inputId',
+    description: 'Stores per-input metadata for bulk embedding runs.',
+    defaultColumns: ['run', 'inputId', 'sourceCollection', 'docId', 'chunkIndex'],
+  },
+  access: {
+    // Anyone can read; only internal (local API) can mutate.
+    read: () => true,
+    create: ({ req }) => req?.payloadAPI === 'local',
+    update: ({ req }) => req?.payloadAPI === 'local',
+    delete: ({ req }) => req?.payloadAPI === 'local',
+  },
+  fields: [
+    {
+      name: 'run',
+      type: 'relationship',
+      relationTo: BULK_EMBEDDINGS_RUNS_SLUG,
+      required: true,
+      admin: { description: 'Bulk run this input belongs to' },
+    },
+    {
+      name: 'inputId',
+      type: 'text',
+      required: true,
+    },
+    {
+      name: 'text',
+      type: 'textarea',
+      required: true,
+      admin: { description: 'Original chunk text' },
+    },
+    {
+      name: 'sourceCollection',
+      type: 'text',
+      required: true,
+    },
+    {
+      name: 'docId',
+      type: 'text',
+      required: true,
+    },
+    {
+      name: 'chunkIndex',
+      type: 'number',
+      required: true,
+    },
+    {
+      name: 'embeddingVersion',
+      type: 'text',
+      required: true,
+    },
+    {
+      name: 'extensionFields',
+      type: 'json',
+      admin: {
+        description: 'Extension field values for this chunk',
+      },
+    },
+  ],
+  indexes: [
+    {
+      fields: ['run', 'inputId'],
+    },
+    {
+      fields: ['run'],
+    },
+  ],
+})
diff --git a/src/collections/bulkEmbeddingsRuns.ts b/src/collections/bulkEmbeddingsRuns.ts
index ea8dbe3..677b9f0 100644
--- a/src/collections/bulkEmbeddingsRuns.ts
+++ b/src/collections/bulkEmbeddingsRuns.ts
@@ -20,10 +20,11 @@ export const createBulkEmbeddingsRunsCollection = (): CollectionConfig => ({
     defaultColumns: ['pool', 'status', 'inputs', 'succeeded', 'failed', 'submittedAt'],
   },
   access: {
+    // Anyone can read; only internal (local API) can mutate.
     read: () => true,
-    create: () => true,
-    update: () => true,
-    delete: () => false,
+    create: ({ req }) => req?.payloadAPI === 'local',
+    update: ({ req }) => req?.payloadAPI === 'local',
+    delete: ({ req }) => req?.payloadAPI === 'local',
   },
   fields: [
     {
diff --git a/src/index.ts b/src/index.ts
index 9e4d0e8..d206c7e 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -18,6 +18,10 @@ import {
   createBulkEmbeddingsRunsCollection,
   BULK_EMBEDDINGS_RUNS_SLUG,
 } from './collections/bulkEmbeddingsRuns.js'
+import {
+  BULK_EMBEDDINGS_INPUT_METADATA_SLUG,
+  createBulkEmbeddingInputMetadataCollection,
+} from './collections/bulkEmbeddingInputMetadata.js'
 import {
   createPrepareBulkEmbeddingTask,
   createPollOrCompleteBulkEmbeddingTask,
@@ -133,6 +137,11 @@ export const createVectorizeIntegration = <TPoolNames extends KnowledgePoolName>
       if (!config.collections.find((c) => c.slug === BULK_EMBEDDINGS_RUNS_SLUG)) {
         config.collections.push(bulkRunsCollection)
       }
+      // Ensure bulk input metadata collection exists once
+      const bulkInputMetadataCollection = createBulkEmbeddingInputMetadataCollection()
+      if (!config.collections.find((c) => c.slug === BULK_EMBEDDINGS_INPUT_METADATA_SLUG)) {
+        config.collections.push(bulkInputMetadataCollection)
+      }
 
       // Validate static/dynamic configs share the same pool names
       for (const poolName in pluginOptions.knowledgePools) {
diff --git a/src/tasks/bulkEmbedAll.ts b/src/tasks/bulkEmbedAll.ts
index fe20b4a..964af76 100644
--- a/src/tasks/bulkEmbedAll.ts
+++ b/src/tasks/bulkEmbedAll.ts
@@ -2,10 +2,12 @@ import { Payload, TaskConfig, TaskHandlerResult } from 'payload'
 import {
   BulkEmbeddingInput,
   BulkEmbeddingsConfig,
+  CollectedEmbeddingInput,
   KnowledgePoolDynamicConfig,
   KnowledgePoolName,
 } from '../types.js'
 import { BULK_EMBEDDINGS_RUNS_SLUG } from '../collections/bulkEmbeddingsRuns.js'
+import { BULK_EMBEDDINGS_INPUT_METADATA_SLUG } from '../collections/bulkEmbeddingInputMetadata.js'
 import { isPostgresPayload, PostgresPayload } from '../types.js'
 
 type PrepareBulkEmbeddingTaskInput = {
@@ -116,7 +118,7 @@ export const createPrepareBulkEmbeddingTask = ({
       const versionMismatch =
         baselineVersion !== undefined && baselineVersion !== currentEmbeddingVersion
 
-      const inputs = await collectMissingEmbeddings({
+      const inputsWithMetadata = await collectMissingEmbeddings({
         payload,
         poolName,
         dynamicConfig,
@@ -126,7 +128,7 @@ export const createPrepareBulkEmbeddingTask = ({
         hasBaseline: Boolean(baselineRun),
       })
 
-      const inputsCount = inputs.length
+      const inputsCount = inputsWithMetadata.length
       if (inputsCount === 0) {
         await payload.update({
           id: input.runId,
@@ -142,11 +144,35 @@ export const createPrepareBulkEmbeddingTask = ({
         return { output: { runId: input.runId, status: 'succeeded' } }
       }
 
+      // Persist metadata for this run so we can rebuild embeddings later
+      await Promise.all(
+        inputsWithMetadata.map((inputWithMeta) =>
+          payload.create({
+            collection: BULK_EMBEDDINGS_INPUT_METADATA_SLUG,
+            data: {
+              run: input.runId,
+              inputId: inputWithMeta.id,
+              text: inputWithMeta.text,
+              sourceCollection: inputWithMeta.metadata.sourceCollection,
+              docId: inputWithMeta.metadata.docId,
+              chunkIndex: inputWithMeta.metadata.chunkIndex,
+              embeddingVersion: inputWithMeta.metadata.embeddingVersion,
+              extensionFields: inputWithMeta.metadata.extensionFields,
+            },
+          }),
+        ),
+      )
+
+      const providerInputs: BulkEmbeddingInput[] = inputsWithMetadata.map(({ id, text }) => ({
+        id,
+        text,
+      }))
+
       const prepare = (await callbacks.prepareBulkEmbeddings({
         payload,
         knowledgePool: poolName,
         embeddingVersion,
-        inputs,
+        inputs: providerInputs,
       })) || { providerBatchId: `local-${Date.now()}` }
 
       const providerBatchId = prepare.providerBatchId
@@ -263,25 +289,21 @@ export const createPollOrCompleteBulkEmbeddingTask = ({
 
       const outputs = completion.outputs || []
 
-      // Re-collect inputs to match outputs (in case they changed during polling)
-      const inputs = await collectMissingEmbeddings({
+      // Load stored metadata for this run
+      const metadataById = await loadInputMetadataByRun({
         payload,
-        poolName,
-        dynamicConfig,
-        embeddingVersion,
-        versionMismatch: true,
-        hasBaseline: true,
+        runId: input.runId,
       })
-      const inputsById = new Map(inputs.map((input) => [input.id, input]))
+
       const successfulOutputs = outputs.filter((o) => !o.error && o.embedding)
       const failedCount = completion.counts?.failed ?? outputs.length - successfulOutputs.length
 
       // Remove existing embeddings for successful doc ids before writing new vectors
       const docKeys = new Set<string>()
       for (const output of successfulOutputs) {
-        const inputMeta = inputsById.get(output.id)?.metadata
-        if (!inputMeta) continue
-        docKeys.add(`${inputMeta.sourceCollection}:${inputMeta.docId}`)
+        const meta = metadataById.get(output.id)
+        if (!meta) continue
+        docKeys.add(`${meta.sourceCollection}:${meta.docId}`)
       }
       for (const key of docKeys) {
         const [sourceCollection, docId] = key.split(':')
@@ -297,31 +319,24 @@ export const createPollOrCompleteBulkEmbeddingTask = ({
       }
 
       for (const output of successfulOutputs) {
-        const input = inputsById.get(output.id)
-        if (!input || !output.embedding) continue
+        const meta = metadataById.get(output.id)
+        if (!meta || !output.embedding) continue
 
         const embeddingArray = Array.isArray(output.embedding)
           ? output.embedding
           : Array.from(output.embedding)
 
-        const {
-          chunkIndex,
-          sourceCollection,
-          docId,
-          embeddingVersion: version,
-          ...rest
-        } = input.metadata
-        const chunkText = input.text
+        const chunkText = meta.text
 
         const created = await payload.create({
           collection: poolName,
           data: {
-            sourceCollection,
-            docId: String(docId),
-            chunkIndex,
+            sourceCollection: meta.sourceCollection,
+            docId: String(meta.docId),
+            chunkIndex: meta.chunkIndex,
             chunkText,
-            embeddingVersion: version,
-            ...rest,
+            embeddingVersion: meta.embeddingVersion,
+            ...(meta.extensionFields || {}),
             embedding: embeddingArray,
           } as any,
         })
@@ -333,6 +348,12 @@ export const createPollOrCompleteBulkEmbeddingTask = ({
         })
       }
 
+      // Cleanup stored metadata for this run
+      await payload.delete({
+        collection: BULK_EMBEDDINGS_INPUT_METADATA_SLUG,
+        where: { run: { equals: input.runId } },
+      })
+
       await payload.update({
         id: input.runId,
         collection: BULK_EMBEDDINGS_RUNS_SLUG,
@@ -393,7 +414,7 @@ async function collectMissingEmbeddings(args: {
   lastBulkCompletedAt?: string
   versionMismatch: boolean
   hasBaseline: boolean
-}): Promise<BulkEmbeddingInput[]> {
+}): Promise<CollectedEmbeddingInput[]> {
   const {
     payload,
     poolName,
@@ -403,7 +424,7 @@ async function collectMissingEmbeddings(args: {
     versionMismatch,
     hasBaseline,
   } = args
-  const inputs: BulkEmbeddingInput[] = []
+  const inputs: CollectedEmbeddingInput[] = []
 
   const includeAll = versionMismatch || !hasBaseline
   const lastCompletedAtDate = lastBulkCompletedAt ? new Date(lastBulkCompletedAt) : undefined
@@ -455,7 +476,7 @@ async function collectMissingEmbeddings(args: {
               docId: String(doc.id),
               chunkIndex: idx,
               embeddingVersion,
-              ...extensionFields,
+              extensionFields,
             },
           })
         })
@@ -489,3 +510,61 @@ async function docHasEmbeddingVersion(args: {
   })
   return (existing as any)?.totalDocs > 0
 }
+
+async function loadInputMetadataByRun(args: { payload: Payload; runId: string }): Promise<
+  Map<
+    string,
+    {
+      text: string
+      sourceCollection: string
+      docId: string
+      chunkIndex: number
+      embeddingVersion: string
+      extensionFields?: Record<string, any>
+    }
+  >
+> {
+  const { payload, runId } = args
+  const map = new Map<
+    string,
+    {
+      text: string
+      sourceCollection: string
+      docId: string
+      chunkIndex: number
+      embeddingVersion: string
+      extensionFields?: Record<string, any>
+    }
+  >()
+
+  let page = 1
+  const limit = 100
+  while (true) {
+    const res = await payload.find({
+      collection: BULK_EMBEDDINGS_INPUT_METADATA_SLUG,
+      page,
+      limit,
+      where: { run: { equals: runId } },
+      sort: 'inputId',
+    })
+    const docs = (res as any)?.docs || []
+    if (!docs.length) break
+
+    for (const doc of docs) {
+      map.set(String(doc.inputId), {
+        text: doc.text,
+        sourceCollection: doc.sourceCollection,
+        docId: String(doc.docId),
+        chunkIndex: doc.chunkIndex,
+        embeddingVersion: doc.embeddingVersion,
+        extensionFields: doc.extensionFields || undefined,
+      })
+    }
+
+    const totalPages = (res as any)?.totalPages ?? page
+    page += 1
+    if (page > totalPages) break
+  }
+
+  return map
+}
diff --git a/src/types.ts b/src/types.ts
index 58c9b69..b0ec452 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -51,15 +51,20 @@ export type BulkEmbeddingInput = {
   id: string
   /** Raw text to embed */
   text: string
-  metadata: {
-    sourceCollection: string
-    docId: string
-    chunkIndex: number
-    embeddingVersion: string
-    [key: string]: any
-  }
 }
 
+/** Internal metadata we persist per input to rebuild embeddings after provider returns outputs */
+export type BulkEmbeddingInputMetadata = {
+  sourceCollection: string
+  docId: string
+  chunkIndex: number
+  embeddingVersion: string
+  /** Arbitrary extension fields returned by toKnowledgePool */
+  extensionFields?: Record<string, any>
+}
+
+export type CollectedEmbeddingInput = BulkEmbeddingInput & { metadata: BulkEmbeddingInputMetadata }
+
 export type BulkEmbeddingOutput = {
   id: string
   embedding?: number[] | Float32Array

From e847a9bbf56d1ffdf36d24fd0fd445f18816aeb4 Mon Sep 17 00:00:00 2001
From: techiejd <62455039+techiejd@users.noreply.github.com>
Date: Sat, 20 Dec 2025 23:34:58 +0700
Subject: [PATCH 07/49] WIP

---
 README.md                              |   5 +-
 dev/specs/bulkEmbed.initNoBulk.spec.ts |  90 ++++++++
 dev/specs/bulkEmbed.spec.ts            | 301 ++++++++++---------------
 dev/specs/utils.ts                     | 181 ++++++++++++++-
 src/index.ts                           |  15 +-
 src/tasks/bulkEmbedAll.ts              |   6 +-
 6 files changed, 412 insertions(+), 186 deletions(-)
 create mode 100644 dev/specs/bulkEmbed.initNoBulk.spec.ts

diff --git a/README.md b/README.md
index 0cd2141..3841e20 100644
--- a/README.md
+++ b/README.md
@@ -28,9 +28,9 @@ pnpm add payloadcms-vectorize
 
 ## Quick Start
 
-### 0. Install pgvector
+### 0. Have pgvector permissions
 
-The plugin automatically creates the `vector` extension when Payload initializes. However, your PostgreSQL database user must have permission to create extensions. If your user doesn't have these permissions, you may need to manually create the extension once:
+The plugin expects `vector` extension to be configured when Payload initializes. Your PostgreSQL database user must have permission to create extensions. If your user doesn't have these permissions, someone with permissions may need to manually create the extension once:
 
 ```sql
 CREATE EXTENSION IF NOT EXISTS vector;
@@ -104,6 +104,7 @@ const { afterSchemaInitHook, payloadcmsVectorize } = createVectorizeIntegration(
 export default buildConfig({
   // ... your existing config
   db: postgresAdapter({
+    // configure the 'vector' extension.
     extensions: ['vector'],
     // afterSchemaInitHook adds 'vector' to your schema
     afterSchemaInit: [afterSchemaInitHook],
diff --git a/dev/specs/bulkEmbed.initNoBulk.spec.ts b/dev/specs/bulkEmbed.initNoBulk.spec.ts
new file mode 100644
index 0000000..820c602
--- /dev/null
+++ b/dev/specs/bulkEmbed.initNoBulk.spec.ts
@@ -0,0 +1,90 @@
+import type { Payload } from 'payload'
+import { describe, expect, test } from 'vitest'
+import {
+  BULK_QUEUE_NAMES,
+  DEFAULT_DIMS,
+  buildPayloadWithIntegration,
+  clearAllCollections,
+  createMockBulkEmbeddings,
+  createTestDb,
+  waitForBulkJobs,
+} from './utils.js'
+import { makeDummyEmbedDocs, makeDummyEmbedQuery, testEmbeddingVersion } from 'helpers/embed.js'
+import { BULK_EMBEDDINGS_RUNS_SLUG } from '../../src/collections/bulkEmbeddingsRuns.js'
+
+const DIMS = DEFAULT_DIMS
+
+describe('Bulk embed init without bulk', () => {
+  let payload: Payload
+  const dbName = 'bulk_embed_init_toggle'
+
+  const basePluginOptions = {
+    knowledgePools: {
+      default: {
+        collections: {
+          posts: {
+            toKnowledgePool: async (doc: any) => [{ chunk: doc.title }],
+          },
+        },
+        embedDocs: makeDummyEmbedDocs(DIMS),
+        embedQuery: makeDummyEmbedQuery(DIMS),
+        embeddingVersion: testEmbeddingVersion,
+        bulkEmbeddings: createMockBulkEmbeddings({ statusSequence: ['succeeded'] }, DIMS),
+      },
+    },
+    bulkQueueNames: BULK_QUEUE_NAMES,
+  }
+
+  // NOTE: skipped because Payload caches the first getPayload() instance per process,
+  // so toggling bulk on/off in a single process cannot be simulated reliably.
+  // Keep this isolated spec for future process-isolated runs.
+  test.skip('enabling bulk later queues first run automatically', async () => {
+    await createTestDb({ dbName })
+
+    // Build without bulk (plugin disabled so no hooks/onInit work)
+    const noBulkOptions = {
+      ...basePluginOptions,
+      disabled: true,
+      knowledgePools: {
+        default: {
+          ...basePluginOptions.knowledgePools.default,
+          bulkEmbeddings: undefined,
+        },
+      },
+    }
+    const { payload: noBulkPayload } = await buildPayloadWithIntegration({
+      dbName,
+      pluginOpts: noBulkOptions,
+      secret: `secret-nobulk-${Date.now()}`,
+      dims: DIMS,
+    })
+    await clearAllCollections(noBulkPayload)
+    await noBulkPayload.create({ collection: 'posts', data: { title: 'NoBulk' } as any })
+    const none = await (noBulkPayload as any).find({
+      collection: BULK_EMBEDDINGS_RUNS_SLUG,
+      where: { pool: { equals: 'default' } },
+    })
+    expect(none.totalDocs).toBe(0)
+
+    // Rebuild with bulk enabled; onInit should queue the first run
+    const { payload: bulkPayload } = await buildPayloadWithIntegration({
+      dbName,
+      pluginOpts: basePluginOptions,
+      secret: `secret-bulk-${Date.now()}`,
+      dims: DIMS,
+    })
+    payload = bulkPayload
+    await bulkPayload.create({ collection: 'posts', data: { title: 'WithBulk' } as any })
+    await waitForBulkJobs(bulkPayload)
+    const runDoc = (
+      await (bulkPayload as any).find({
+        collection: BULK_EMBEDDINGS_RUNS_SLUG,
+        where: { pool: { equals: 'default' } },
+        sort: '-createdAt',
+        limit: 1,
+      })
+    ).docs[0]
+    expect(runDoc).toBeDefined()
+    expect(runDoc.status).toBe('succeeded')
+  })
+})
diff --git a/dev/specs/bulkEmbed.spec.ts b/dev/specs/bulkEmbed.spec.ts
index 6f7a071..1730681 100644
--- a/dev/specs/bulkEmbed.spec.ts
+++ b/dev/specs/bulkEmbed.spec.ts
@@ -1,93 +1,28 @@
 import type { Payload, SanitizedConfig } from 'payload'
 
-import { buildConfig, getPayload } from 'payload'
 import { afterEach, beforeAll, beforeEach, describe, expect, test, vi } from 'vitest'
 import { createVectorizeTask } from '../../src/tasks/vectorize.js'
-import { postgresAdapter } from '@payloadcms/db-postgres'
-import { lexicalEditor } from '@payloadcms/richtext-lexical'
-import { createVectorizeIntegration } from 'payloadcms-vectorize'
 import { BULK_EMBEDDINGS_RUNS_SLUG } from '../../src/collections/bulkEmbeddingsRuns.js'
-import { BULK_EMBEDDINGS_INPUT_METADATA_SLUG } from '../../src/collections/bulkEmbeddingInputMetadata.js'
-import { createTestDb, waitForBulkJobs } from './utils.js'
+import {
+  BULK_QUEUE_NAMES,
+  DEFAULT_DIMS,
+  buildPayloadWithIntegration,
+  clearAllCollections,
+  createMockBulkEmbeddings,
+  createSucceededBaselineRun,
+  createTestDb,
+  waitForBulkJobs,
+} from './utils.js'
 import { makeDummyEmbedDocs, makeDummyEmbedQuery, testEmbeddingVersion } from 'helpers/embed.js'
-import type {
-  BulkEmbeddingsConfig,
-  BulkEmbeddingInput,
-  BulkEmbeddingRunStatus,
-} from '../../src/types.js'
-
-const DIMS = 8
-const BULK_QUEUE_NAMES = {
-  prepareBulkEmbedQueueName: 'vectorize-bulk-prepare',
-  pollOrCompleteQueueName: 'vectorize-bulk-poll',
-}
-
-type MockOptions = {
-  statusSequence: BulkEmbeddingRunStatus[]
-  partialFailure?: { failIds: string[] }
-}
-
-function createMockBulkEmbeddings(options: MockOptions): BulkEmbeddingsConfig {
-  const { statusSequence, partialFailure } = options
-  let callCount = 0
-  let lastInputs: BulkEmbeddingInput[] = []
-  const embeddings = makeDummyEmbedDocs(DIMS)
-
-  return {
-    ingestMode: 'bulk',
-    prepareBulkEmbeddings: async ({ inputs }) => {
-      lastInputs = inputs
-      return {
-        providerBatchId: `mock-${Date.now()}`,
-        status: 'queued',
-        counts: { inputs: inputs.length },
-      }
-    },
-    pollBulkEmbeddings: async () => {
-      const status = statusSequence[Math.min(callCount++, statusSequence.length - 1)]
-      const counts =
-        status === 'succeeded'
-          ? { inputs: lastInputs.length, succeeded: lastInputs.length, failed: 0 }
-          : undefined
-      return {
-        status,
-        counts,
-      }
-    },
-    completeBulkEmbeddings: async () => {
-      if (!lastInputs.length) {
-        return { status: 'succeeded', outputs: [], counts: { inputs: 0, succeeded: 0, failed: 0 } }
-      }
-      const vectors = await embeddings(lastInputs.map((i) => i.text))
-      const outputs = lastInputs.map((input, idx) => {
-        const shouldFail = partialFailure?.failIds?.includes(input.id)
-        return shouldFail
-          ? { id: input.id, error: 'fail' }
-          : { id: input.id, embedding: vectors[idx] }
-      })
-      const succeeded = outputs.filter((o) => (o as any).embedding).length
-      const failed = outputs.length - succeeded
-      return {
-        status: 'succeeded',
-        outputs,
-        counts: { inputs: outputs.length, succeeded, failed },
-      }
-    },
-  }
-}
+import type { BulkEmbeddingsConfig } from '../../src/types.js'
+
+const DIMS = DEFAULT_DIMS
 
 describe('Bulk embed ingest mode with version/time gating', () => {
   let payload: Payload
   let config: SanitizedConfig
   const dbName = 'bulk_embed_test'
 
-  const integration = createVectorizeIntegration({
-    default: {
-      dims: DIMS,
-      ivfflatLists: 1,
-    },
-  })
-
   const basePluginOptions = {
     knowledgePools: {
       default: {
@@ -109,74 +44,30 @@ describe('Bulk embed ingest mode with version/time gating', () => {
     await createTestDb({ dbName })
   })
 
-  const buildPayload = async (pluginOpts = basePluginOptions) => {
-    config = await buildConfig({
-      secret: 'test-secret',
-      editor: lexicalEditor(),
-      collections: [
-        {
-          slug: 'posts',
-          fields: [{ name: 'title', type: 'text' }],
-        },
-      ],
-      db: postgresAdapter({
-        extensions: ['vector'],
-        afterSchemaInit: [integration.afterSchemaInitHook],
-        pool: {
-          connectionString: `postgresql://postgres:password@localhost:5433/${dbName}`,
-        },
-      }),
-      plugins: [integration.payloadcmsVectorize(pluginOpts)],
-      jobs: {
-        tasks: [],
-        autoRun: [
-          {
-            cron: '*/2 * * * * *', // run prepare queue every 2s
-            limit: 10,
-            queue: pluginOpts.bulkQueueNames?.prepareBulkEmbedQueueName,
-          },
-          {
-            cron: '*/2 * * * * *', // run poll queue every 2s
-            limit: 10,
-            queue: pluginOpts.bulkQueueNames?.pollOrCompleteQueueName,
-          },
-        ],
-      },
+  const buildPayload = async (
+    pluginOpts = basePluginOptions,
+    options?: { dbName?: string; secret?: string },
+  ) => {
+    const dbToUse = options?.dbName || dbName
+    const secret = options?.secret || 'test-secret'
+    console.log('building payload with pluginOpts', pluginOpts)
+    const built = await buildPayloadWithIntegration({
+      dbName: dbToUse,
+      pluginOpts,
+      secret,
+      dims: DIMS,
     })
-
-    payload = await getPayload({ config })
+    payload = built.payload
+    config = built.config
     return payload
   }
 
-  const clearAll = async (pl: Payload) => {
-    await (pl as any).delete({
-      collection: BULK_EMBEDDINGS_RUNS_SLUG,
-      where: { id: { exists: true } },
-    })
-    await (pl as any).delete({
-      collection: BULK_EMBEDDINGS_INPUT_METADATA_SLUG,
-      where: { id: { exists: true } },
-    })
-    await (pl as any).delete({
-      collection: 'default',
-      where: { id: { exists: true } },
-    })
-    await (pl as any).delete({
-      collection: 'posts',
-      where: { id: { exists: true } },
-    })
-    await (pl as any).delete({
-      collection: 'payload-jobs',
-      where: { id: { exists: true } },
-    })
-  }
-
   beforeEach(async () => {
     await buildPayload()
   })
 
   afterEach(async () => {
-    await clearAll(payload)
+    await clearAllCollections(payload)
     vi.restoreAllMocks()
   })
 
@@ -184,15 +75,7 @@ describe('Bulk embed ingest mode with version/time gating', () => {
     version = testEmbeddingVersion,
     completedAt = new Date().toISOString(),
   } = {}) {
-    return (payload as any).create({
-      collection: BULK_EMBEDDINGS_RUNS_SLUG,
-      data: {
-        pool: 'default',
-        embeddingVersion: version,
-        status: 'succeeded',
-        completedAt,
-      },
-    })
+    return createSucceededBaselineRun(payload, { version, completedAt })
   }
 
   test('fresh project, no docs → counts 0 baseline', async () => {
@@ -211,9 +94,17 @@ describe('Bulk embed ingest mode with version/time gating', () => {
   })
 
   test('enabling bulk later queues first run automatically', async () => {
+    // Clear state from the default payload built in beforeEach
+    await clearAllCollections(payload)
+
+    // Use a fresh database for the toggle scenario to avoid residual runs
+    const tempDbName = `${dbName}_toggle_${Date.now()}`
+    await createTestDb({ dbName: tempDbName })
+
     // Start without bulkEmbeddings configured
     const noBulkOptions = {
       ...basePluginOptions,
+      disabled: true,
       knowledgePools: {
         default: {
           ...basePluginOptions.knowledgePools.default,
@@ -221,17 +112,31 @@ describe('Bulk embed ingest mode with version/time gating', () => {
         },
       },
     }
-    const noBulkPayload = await buildPayload(noBulkOptions as any)
+    console.log('building noBulkPayload with noBulkOptions', noBulkOptions)
+    const noBulkPayload = await buildPayload(noBulkOptions as any, {
+      dbName: tempDbName,
+      secret: `secret-nobulk-${Date.now()}`,
+    })
+    console.log('noBulkPayload')
+    // Clear any runs that might have been created by previous payload instance
+    console.log('clearing all')
+    await clearAllCollections(noBulkPayload)
+    console.log('creating post')
     await noBulkPayload.create({ collection: 'posts', data: { title: 'NoBulk' } as any })
-    // No bulk runs should exist
+    console.log('creating post done')
+    // No bulk runs should exist (onInit shouldn't create runs when bulkEmbeddings is undefined)
     const none = await (noBulkPayload as any).find({
       collection: BULK_EMBEDDINGS_RUNS_SLUG,
       where: { pool: { equals: 'default' } },
     })
+    console.log('none', none)
     expect(none.totalDocs).toBe(0)
 
     // Rebuild with bulk enabled; onInit should queue the first run
-    const bulkPayload = await buildPayload(basePluginOptions)
+    const bulkPayload = await buildPayload(basePluginOptions, {
+      dbName: tempDbName,
+      secret: `secret-bulk-${Date.now()}`,
+    })
     await bulkPayload.create({ collection: 'posts', data: { title: 'WithBulk' } as any })
     await waitForBulkJobs(bulkPayload)
     const runDoc = (
@@ -246,9 +151,16 @@ describe('Bulk embed ingest mode with version/time gating', () => {
     expect(runDoc.status).toBe('succeeded')
   })
 
-  test('fresh project, docs exist → embeds all and establishes baseline', async () => {
+  test('fresh project, add doc → establishes baseline and then embeds all', async () => {
+    // Wait for the initial onInit-queued baseline run to complete (0 docs)
+    await waitForBulkJobs(payload)
+
+    // Now create the post, which will queue another run via afterChange
     const post = await payload.create({ collection: 'posts', data: { title: 'First' } as any })
+
+    // Wait for the post-creation-queued run to complete
     await waitForBulkJobs(payload)
+
     const embeds = await payload.find({
       collection: 'default',
       where: { docId: { equals: String(post.id) } },
@@ -266,11 +178,21 @@ describe('Bulk embed ingest mode with version/time gating', () => {
   })
 
   test('version bump re-embeds all even without updates', async () => {
-    await clearAll(payload)
-    const baselinePayload = await buildPayload()
+    // TODO(techiejd): Why is this clear all necessary?
+    await clearAllCollections(payload)
+    const baselineOptions = {
+      ...basePluginOptions,
+      knowledgePools: {
+        default: {
+          ...basePluginOptions.knowledgePools.default,
+          embeddingVersion: 'old-version',
+          bulkEmbeddings: createMockBulkEmbeddings({ statusSequence: ['succeeded'] }),
+        },
+      },
+    }
+    const baselinePayload = await buildPayload(baselineOptions)
     await baselinePayload.create({ collection: 'posts', data: { title: 'Old' } as any })
-    await waitForBulkJobs(baselinePayload) // initial baseline run
-    await createSucceededBaseline({ version: 'old-version', completedAt: new Date().toISOString() })
+    await waitForBulkJobs(baselinePayload) // initial baseline run with 'old-version'
 
     const bumpedOptions = {
       ...basePluginOptions,
@@ -294,7 +216,7 @@ describe('Bulk embed ingest mode with version/time gating', () => {
       collection: 'default',
       where: { docId: { equals: String(postAfter.id) } },
     })
-    expect(embeds.totalDocs).toBeGreaterThan(0)
+    expect(embeds.totalDocs).toBe(1)
     const runDoc = (
       await (bumpedPayload as any).find({
         collection: BULK_EMBEDDINGS_RUNS_SLUG,
@@ -303,33 +225,60 @@ describe('Bulk embed ingest mode with version/time gating', () => {
         limit: 1,
       })
     ).docs[0]
-    expect(runDoc.inputs).toBeGreaterThan(0)
+    expect(runDoc.inputs).toBe(1)
   })
 
   test('no version bump and no updates → zero eligible and succeed', async () => {
+    // Wait for initial onInit-queued baseline run
+    await waitForBulkJobs(payload)
+
+    // Create post and wait for it to be embedded (establishes baseline)
     const post = await payload.create({ collection: 'posts', data: { title: 'Stable' } as any })
     await waitForBulkJobs(payload)
-    await createSucceededBaseline()
 
+    // Verify baseline exists
+    const embeds = await payload.find({
+      collection: 'default',
+      where: { docId: { equals: String(post.id) } },
+    })
+    expect(embeds.totalDocs).toBe(1)
+
+    // Explicitly trigger a new bulk run (simulating manual trigger or API call)
+    const run = await payload.create({
+      collection: BULK_EMBEDDINGS_RUNS_SLUG,
+      data: {
+        pool: 'default',
+        embeddingVersion: testEmbeddingVersion,
+        status: 'queued',
+      },
+    })
+
+    await payload.jobs.queue<'payloadcms-vectorize:prepare-bulk-embedding'>({
+      task: 'payloadcms-vectorize:prepare-bulk-embedding',
+      input: { runId: String(run.id) },
+      req: { payload } as any,
+      ...(BULK_QUEUE_NAMES.prepareBulkEmbedQueueName
+        ? { queue: BULK_QUEUE_NAMES.prepareBulkEmbedQueueName }
+        : {}),
+    })
+
+    // Wait for the explicitly triggered run to complete
     await waitForBulkJobs(payload)
+
+    // Verify the new run found zero eligible documents and succeeded
     const runDoc = (
       await (payload as any).find({
         collection: BULK_EMBEDDINGS_RUNS_SLUG,
-        where: { pool: { equals: 'default' } },
-        sort: '-createdAt',
-        limit: 1,
+        where: { id: { equals: String(run.id) } },
       })
     ).docs[0]
+    expect(runDoc.status).toBe('succeeded')
     expect(runDoc.inputs).toBe(0)
-    const embeds = await payload.find({
-      collection: 'default',
-      where: { docId: { equals: String(post.id) } },
-    })
-    expect(embeds.totalDocs).toBeGreaterThan(0)
+    expect(runDoc.succeeded).toBe(0)
   })
 
-  test('updatedAt gating: updated after last bulk is eligible; before is skipped', async () => {
-    await clearAll(payload)
+  /*test('updatedAt gating: updated after last bulk is eligible; before is skipped', async () => {
+    await clearAllCollections(payload)
     const gatingPayload = await buildPayload()
     const oldPost = await gatingPayload.create({
       collection: 'posts',
@@ -400,7 +349,7 @@ describe('Bulk embed ingest mode with version/time gating', () => {
   })
 
   test('partial outputs only delete/replace succeeded doc IDs', async () => {
-    await clearAll(payload)
+    await clearAllCollections(payload)
     const partialPayload = await buildPayload({
       ...basePluginOptions,
       knowledgePools: {
@@ -443,7 +392,7 @@ describe('Bulk embed ingest mode with version/time gating', () => {
   })
 
   test('polling requeues when non-terminal then succeeds', async () => {
-    await clearAll(payload)
+    await clearAllCollections(payload)
     const loopPayload = await buildPayload({
       ...basePluginOptions,
       knowledgePools: {
@@ -477,7 +426,7 @@ describe('Bulk embed ingest mode with version/time gating', () => {
   })
 
   test('failed polling stops and does not complete embeddings', async () => {
-    await clearAll(payload)
+    await clearAllCollections(payload)
     const failedPayload = await buildPayload({
       ...basePluginOptions,
       knowledgePools: {
@@ -497,7 +446,7 @@ describe('Bulk embed ingest mode with version/time gating', () => {
   })
 
   test('canceled polling stops', async () => {
-    await clearAll(payload)
+    await clearAllCollections(payload)
     const canceledPayload = await buildPayload({
       ...basePluginOptions,
       knowledgePools: {
@@ -520,7 +469,7 @@ describe('Bulk embed ingest mode with version/time gating', () => {
   })
 
   test('stores metadata records for inputs before provider submit', async () => {
-    await clearAll(payload)
+    await clearAllCollections(payload)
     const metaPayload = await buildPayload({
       ...basePluginOptions,
       knowledgePools: {
@@ -548,7 +497,7 @@ describe('Bulk embed ingest mode with version/time gating', () => {
   })
 
   test('extension fields are merged when writing embeddings from metadata table', async () => {
-    await clearAll(payload)
+    await clearAllCollections(payload)
     const metaPayload = await buildPayload({
       ...basePluginOptions,
       knowledgePools: {
@@ -583,7 +532,7 @@ describe('Bulk embed ingest mode with version/time gating', () => {
   })
 
   test('multiple chunks keep their respective extension fields', async () => {
-    await clearAll(payload)
+    await clearAllCollections(payload)
     const multiPayload = await buildPayload({
       ...basePluginOptions,
       knowledgePools: {
@@ -620,7 +569,7 @@ describe('Bulk embed ingest mode with version/time gating', () => {
   })
 
   test('metadata table is cleaned after successful completion', async () => {
-    await clearAll(payload)
+    await clearAllCollections(payload)
     const cleanPayload = await buildPayload({
       ...basePluginOptions,
       knowledgePools: {
@@ -695,5 +644,5 @@ describe('Bulk embed ingest mode with version/time gating', () => {
       where: { docId: { equals: String(post.id) } },
     })
     expect(embeds.totalDocs).toBeGreaterThan(0)
-  })
+  })*/
 })
diff --git a/dev/specs/utils.ts b/dev/specs/utils.ts
index be19f49..f0d6a40 100644
--- a/dev/specs/utils.ts
+++ b/dev/specs/utils.ts
@@ -1,6 +1,18 @@
 // dev/create-alt-db.ts
-import { Payload } from 'payload'
+import type { Payload, SanitizedConfig } from 'payload'
+import { buildConfig, getPayload } from 'payload'
 import { Client } from 'pg'
+import { postgresAdapter } from '@payloadcms/db-postgres'
+import { lexicalEditor } from '@payloadcms/richtext-lexical'
+import { createVectorizeIntegration } from 'payloadcms-vectorize'
+import { BULK_EMBEDDINGS_RUNS_SLUG } from '../../src/collections/bulkEmbeddingsRuns.js'
+import { BULK_EMBEDDINGS_INPUT_METADATA_SLUG } from '../../src/collections/bulkEmbeddingInputMetadata.js'
+import { makeDummyEmbedDocs } from 'helpers/embed.js'
+import type {
+  BulkEmbeddingsConfig,
+  BulkEmbeddingInput,
+  BulkEmbeddingRunStatus,
+} from '../../src/types.js'
 
 export const createTestDb = async ({ dbName }: { dbName: string }) => {
   const adminUri =
@@ -20,6 +32,11 @@ async function waitForTasks(
   maxWaitMs = 10000,
   intervalMs = 250,
 ) {
+  const hasJobsCollection = (payload as any)?.config?.collections?.some(
+    (c: any) => c.slug === 'payload-jobs',
+  )
+  if (!hasJobsCollection) return
+
   const startTime = Date.now()
   while (Date.now() - startTime < maxWaitMs) {
     const pending = await payload.find({
@@ -49,3 +66,165 @@ export async function waitForBulkJobs(payload: Payload, maxWaitMs = 10000) {
     maxWaitMs,
   )
 }
+
+export const DEFAULT_DIMS = 8
+export const BULK_QUEUE_NAMES = {
+  prepareBulkEmbedQueueName: 'vectorize-bulk-prepare',
+  pollOrCompleteQueueName: 'vectorize-bulk-poll',
+}
+
+type MockOptions = {
+  statusSequence: BulkEmbeddingRunStatus[]
+  partialFailure?: { failIds: string[] }
+}
+
+export function createMockBulkEmbeddings(
+  options: MockOptions,
+  dims: number = DEFAULT_DIMS,
+): BulkEmbeddingsConfig {
+  const { statusSequence, partialFailure } = options
+  let callCount = 0
+  let lastInputs: BulkEmbeddingInput[] = []
+  const embeddings = makeDummyEmbedDocs(dims)
+
+  return {
+    ingestMode: 'bulk',
+    prepareBulkEmbeddings: async ({ inputs }) => {
+      lastInputs = inputs
+      return {
+        providerBatchId: `mock-${Date.now()}`,
+        status: 'queued',
+        counts: { inputs: inputs.length },
+      }
+    },
+    pollBulkEmbeddings: async () => {
+      const status = statusSequence[Math.min(callCount++, statusSequence.length - 1)]
+      const counts =
+        status === 'succeeded'
+          ? { inputs: lastInputs.length, succeeded: lastInputs.length, failed: 0 }
+          : undefined
+      return {
+        status,
+        counts,
+      }
+    },
+    completeBulkEmbeddings: async () => {
+      if (!lastInputs.length) {
+        return { status: 'succeeded', outputs: [], counts: { inputs: 0, succeeded: 0, failed: 0 } }
+      }
+      const vectors = await embeddings(lastInputs.map((i) => i.text))
+      const outputs = lastInputs.map((input, idx) => {
+        const shouldFail = partialFailure?.failIds?.includes(input.id)
+        return shouldFail
+          ? { id: input.id, error: 'fail' }
+          : { id: input.id, embedding: vectors[idx] }
+      })
+      const succeeded = outputs.filter((o) => (o as any).embedding).length
+      const failed = outputs.length - succeeded
+      return {
+        status: 'succeeded',
+        outputs,
+        counts: { inputs: outputs.length, succeeded, failed },
+      }
+    },
+  }
+}
+
+export type BuildPayloadArgs = {
+  dbName: string
+  pluginOpts: any
+  secret?: string
+  dims?: number
+}
+
+export async function buildPayloadWithIntegration({
+  dbName,
+  pluginOpts,
+  secret = 'test-secret',
+  dims = DEFAULT_DIMS,
+}: BuildPayloadArgs): Promise<{ payload: Payload; config: SanitizedConfig }> {
+  const integration = createVectorizeIntegration({
+    default: {
+      dims,
+      ivfflatLists: 1,
+    },
+  })
+
+  const config = await buildConfig({
+    secret,
+    editor: lexicalEditor(),
+    collections: [
+      {
+        slug: 'posts',
+        fields: [{ name: 'title', type: 'text' }],
+      },
+    ],
+    db: postgresAdapter({
+      extensions: ['vector'],
+      afterSchemaInit: [integration.afterSchemaInitHook],
+      pool: {
+        connectionString: `postgresql://postgres:password@localhost:5433/${dbName}`,
+      },
+    }),
+    plugins: [integration.payloadcmsVectorize(pluginOpts)],
+    jobs: {
+      tasks: [],
+      autoRun: [
+        {
+          cron: '*/2 * * * * *',
+          limit: 10,
+          queue: pluginOpts.bulkQueueNames?.prepareBulkEmbedQueueName,
+        },
+        {
+          cron: '*/2 * * * * *',
+          limit: 10,
+          queue: pluginOpts.bulkQueueNames?.pollOrCompleteQueueName,
+        },
+      ],
+    },
+  })
+
+  const payload = await getPayload({ config })
+  return { payload, config }
+}
+
+export const clearAllCollections = async (pl: Payload) => {
+  const hasCollection = (slug: string) =>
+    !!(pl as any)?.config?.collections?.some((c: any) => c.slug === slug)
+
+  const safeDelete = async (slug: string) => {
+    if (!hasCollection(slug)) return
+    try {
+      await (pl as any).delete({
+        collection: slug,
+        where: { id: { exists: true } },
+      })
+    } catch {
+      // ignore if collection not registered in this payload instance
+    }
+  }
+
+  await safeDelete(BULK_EMBEDDINGS_RUNS_SLUG)
+  await safeDelete(BULK_EMBEDDINGS_INPUT_METADATA_SLUG)
+  await safeDelete('default')
+  await safeDelete('posts')
+  await safeDelete('payload-jobs')
+}
+
+export async function createSucceededBaselineRun(
+  payload: Payload,
+  {
+    version,
+    completedAt = new Date().toISOString(),
+  }: { version?: string; completedAt?: string } = {},
+) {
+  return (payload as any).create({
+    collection: BULK_EMBEDDINGS_RUNS_SLUG,
+    data: {
+      pool: 'default',
+      embeddingVersion: version ?? '',
+      status: 'succeeded',
+      completedAt,
+    },
+  })
+}
diff --git a/src/index.ts b/src/index.ts
index d206c7e..49a78fb 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -258,6 +258,11 @@ export const createVectorizeIntegration = <TPoolNames extends KnowledgePoolName>
                 if (!collectionConfig) continue
 
                 if ((dynamic.bulkEmbeddings?.ingestMode || 'realtime') === 'bulk') {
+                  console.log(
+                    '[payloadcms-vectorize] afterChange enqueue bulk run',
+                    pool,
+                    dynamic.bulkEmbeddings,
+                  )
                   // In bulk mode, queue a bulk run and let poll/completion handle deletes
                   const run = await payload.create({
                     collection: BULK_EMBEDDINGS_RUNS_SLUG,
@@ -340,10 +345,7 @@ export const createVectorizeIntegration = <TPoolNames extends KnowledgePoolName>
 
           // If bulk ingest is configured for this pool, ensure a baseline run exists and is queued
           const dynamicConfig = pluginOptions.knowledgePools?.[poolName]
-          if (
-            dynamicConfig?.bulkEmbeddings &&
-            dynamicConfig.bulkEmbeddings.ingestMode !== 'realtime'
-          ) {
+          if (dynamicConfig?.bulkEmbeddings?.ingestMode === 'bulk') {
             const existingSucceeded = await payload.find({
               collection: BULK_EMBEDDINGS_RUNS_SLUG,
               where: {
@@ -353,6 +355,11 @@ export const createVectorizeIntegration = <TPoolNames extends KnowledgePoolName>
               sort: '-completedAt',
             })
             if (!existingSucceeded.totalDocs) {
+              console.log(
+                '[payloadcms-vectorize] queuing baseline bulk run',
+                poolName,
+                dynamicConfig?.bulkEmbeddings,
+              )
               const run = await payload.create({
                 collection: BULK_EMBEDDINGS_RUNS_SLUG,
                 data: {
diff --git a/src/tasks/bulkEmbedAll.ts b/src/tasks/bulkEmbedAll.ts
index 964af76..0064843 100644
--- a/src/tasks/bulkEmbedAll.ts
+++ b/src/tasks/bulkEmbedAll.ts
@@ -150,7 +150,7 @@ export const createPrepareBulkEmbeddingTask = ({
           payload.create({
             collection: BULK_EMBEDDINGS_INPUT_METADATA_SLUG,
             data: {
-              run: input.runId,
+              run: (run as any).id,
               inputId: inputWithMeta.id,
               text: inputWithMeta.text,
               sourceCollection: inputWithMeta.metadata.sourceCollection,
@@ -292,7 +292,7 @@ export const createPollOrCompleteBulkEmbeddingTask = ({
       // Load stored metadata for this run
       const metadataById = await loadInputMetadataByRun({
         payload,
-        runId: input.runId,
+        runId: String((run as any).id),
       })
 
       const successfulOutputs = outputs.filter((o) => !o.error && o.embedding)
@@ -351,7 +351,7 @@ export const createPollOrCompleteBulkEmbeddingTask = ({
       // Cleanup stored metadata for this run
       await payload.delete({
         collection: BULK_EMBEDDINGS_INPUT_METADATA_SLUG,
-        where: { run: { equals: input.runId } },
+        where: { run: { equals: (run as any).id } },
       })
 
       await payload.update({

From 5a89415bb82144bc2e6aebe1f377d3729e8bf94e Mon Sep 17 00:00:00 2001
From: techiejd <62455039+techiejd@users.noreply.github.com>
Date: Sun, 21 Dec 2025 14:47:14 +0700
Subject: [PATCH 08/49] WIP

---
 dev/specs/bulkEmbed.initNoBulk.spec.ts |   6 +-
 dev/specs/utils.ts                     |   5 +-
 package.json                           |  14 +-
 pnpm-lock.yaml                         | 638 +++++++------------------
 4 files changed, 187 insertions(+), 476 deletions(-)

diff --git a/dev/specs/bulkEmbed.initNoBulk.spec.ts b/dev/specs/bulkEmbed.initNoBulk.spec.ts
index 820c602..a81517d 100644
--- a/dev/specs/bulkEmbed.initNoBulk.spec.ts
+++ b/dev/specs/bulkEmbed.initNoBulk.spec.ts
@@ -38,7 +38,7 @@ describe('Bulk embed init without bulk', () => {
   // NOTE: skipped because Payload caches the first getPayload() instance per process,
   // so toggling bulk on/off in a single process cannot be simulated reliably.
   // Keep this isolated spec for future process-isolated runs.
-  test.skip('enabling bulk later queues first run automatically', async () => {
+  test('enabling bulk later queues first run automatically', async () => {
     await createTestDb({ dbName })
 
     // Build without bulk (plugin disabled so no hooks/onInit work)
@@ -55,8 +55,8 @@ describe('Bulk embed init without bulk', () => {
     const { payload: noBulkPayload } = await buildPayloadWithIntegration({
       dbName,
       pluginOpts: noBulkOptions,
-      secret: `secret-nobulk-${Date.now()}`,
       dims: DIMS,
+      key: `noBulkPayload-${dbName}-${Date.now()}`,
     })
     await clearAllCollections(noBulkPayload)
     await noBulkPayload.create({ collection: 'posts', data: { title: 'NoBulk' } as any })
@@ -70,8 +70,8 @@ describe('Bulk embed init without bulk', () => {
     const { payload: bulkPayload } = await buildPayloadWithIntegration({
       dbName,
       pluginOpts: basePluginOptions,
-      secret: `secret-bulk-${Date.now()}`,
       dims: DIMS,
+      key: `bulkPayload-${dbName}-${Date.now()}`,
     })
     payload = bulkPayload
     await bulkPayload.create({ collection: 'posts', data: { title: 'WithBulk' } as any })
diff --git a/dev/specs/utils.ts b/dev/specs/utils.ts
index f0d6a40..f7eb317 100644
--- a/dev/specs/utils.ts
+++ b/dev/specs/utils.ts
@@ -135,6 +135,7 @@ export type BuildPayloadArgs = {
   pluginOpts: any
   secret?: string
   dims?: number
+  key?: string
 }
 
 export async function buildPayloadWithIntegration({
@@ -142,6 +143,7 @@ export async function buildPayloadWithIntegration({
   pluginOpts,
   secret = 'test-secret',
   dims = DEFAULT_DIMS,
+  key,
 }: BuildPayloadArgs): Promise<{ payload: Payload; config: SanitizedConfig }> {
   const integration = createVectorizeIntegration({
     default: {
@@ -184,7 +186,8 @@ export async function buildPayloadWithIntegration({
     },
   })
 
-  const payload = await getPayload({ config })
+  const payloadKey = key ?? `payload-${dbName}-${Date.now()}`
+  const payload = await getPayload({ config, key: payloadKey, cron: true })
   return { payload, config }
 }
 
diff --git a/package.json b/package.json
index 06d4ef9..1c38b60 100644
--- a/package.json
+++ b/package.json
@@ -44,12 +44,12 @@
   },
   "devDependencies": {
     "@eslint/eslintrc": "^3.2.0",
-    "@payloadcms/db-postgres": "3.37.0",
-    "@payloadcms/db-sqlite": "3.37.0",
+    "@payloadcms/db-postgres": "3.54.0",
+    "@payloadcms/db-sqlite": "3.54.0",
     "@payloadcms/eslint-config": "3.9.0",
-    "@payloadcms/next": "3.37.0",
-    "@payloadcms/richtext-lexical": "3.37.0",
-    "@payloadcms/ui": "3.37.0",
+    "@payloadcms/next": "3.54.0",
+    "@payloadcms/richtext-lexical": "3.54.0",
+    "@payloadcms/ui": "3.54.0",
     "@playwright/test": "^1.52.0",
     "@swc-node/register": "1.10.9",
     "@swc/cli": "0.6.0",
@@ -71,7 +71,7 @@
     "jest": "^30.2.0",
     "next": "15.4.4",
     "open": "^10.1.0",
-    "payload": "3.37.0",
+    "payload": "3.54.0",
     "pg": "^8.16.3",
     "postcss": "^8.5.6",
     "prettier": "^3.4.2",
@@ -89,7 +89,7 @@
     "voyageai": "^0.0.8"
   },
   "peerDependencies": {
-    "payload": "^3.37.0"
+    "payload": ">=3 <4"
   },
   "engines": {
     "node": "^18.20.2 || >=20.9.0",
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index 414ccc9..85e6f42 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -16,23 +16,23 @@ importers:
         specifier: ^3.2.0
         version: 3.3.1
       '@payloadcms/db-postgres':
-        specifier: 3.37.0
-        version: 3.37.0(@libsql/client@0.14.0)(@opentelemetry/api@1.9.0)(@types/react@19.1.8)(payload@3.37.0(graphql@16.11.0)(typescript@5.7.3))(react@19.1.0)
+        specifier: 3.54.0
+        version: 3.54.0(@libsql/client@0.14.0)(@opentelemetry/api@1.9.0)(payload@3.54.0(graphql@16.11.0)(typescript@5.7.3))
       '@payloadcms/db-sqlite':
-        specifier: 3.37.0
-        version: 3.37.0(@opentelemetry/api@1.9.0)(@types/pg@8.15.5)(@types/react@19.1.8)(payload@3.37.0(graphql@16.11.0)(typescript@5.7.3))(pg@8.16.3)(react@19.1.0)
+        specifier: 3.54.0
+        version: 3.54.0(@opentelemetry/api@1.9.0)(@types/pg@8.15.5)(payload@3.54.0(graphql@16.11.0)(typescript@5.7.3))(pg@8.16.3)
       '@payloadcms/eslint-config':
         specifier: 3.9.0
-        version: 3.9.0(@typescript-eslint/eslint-plugin@8.44.1(@typescript-eslint/parser@8.44.1(eslint@9.36.0(jiti@2.6.1))(typescript@5.7.3))(eslint@9.36.0(jiti@2.6.1))(typescript@5.7.3))(jest@30.2.0(@types/node@22.18.6)(babel-plugin-macros@3.1.0)(esbuild-register@3.6.0(esbuild@0.19.12)))(jiti@2.6.1)
+        version: 3.9.0(@typescript-eslint/eslint-plugin@8.44.1(@typescript-eslint/parser@8.44.1(eslint@9.36.0(jiti@2.6.1))(typescript@5.7.3))(eslint@9.36.0(jiti@2.6.1))(typescript@5.7.3))(jest@30.2.0(@types/node@22.18.6)(babel-plugin-macros@3.1.0)(esbuild-register@3.6.0(esbuild@0.25.10)))(jiti@2.6.1)
       '@payloadcms/next':
-        specifier: 3.37.0
-        version: 3.37.0(@types/react@19.1.8)(graphql@16.11.0)(monaco-editor@0.53.0)(next@15.4.4(@babel/core@7.28.4)(@opentelemetry/api@1.9.0)(@playwright/test@1.55.1)(babel-plugin-macros@3.1.0)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(sass@1.77.4))(payload@3.37.0(graphql@16.11.0)(typescript@5.7.3))(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(typescript@5.7.3)
+        specifier: 3.54.0
+        version: 3.54.0(@types/react@19.1.8)(graphql@16.11.0)(monaco-editor@0.53.0)(next@15.4.4(@babel/core@7.28.4)(@opentelemetry/api@1.9.0)(@playwright/test@1.55.1)(babel-plugin-macros@3.1.0)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(sass@1.77.4))(payload@3.54.0(graphql@16.11.0)(typescript@5.7.3))(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(typescript@5.7.3)
       '@payloadcms/richtext-lexical':
-        specifier: 3.37.0
-        version: 3.37.0(@faceless-ui/modal@3.0.0-beta.2(react-dom@19.1.0(react@19.1.0))(react@19.1.0))(@faceless-ui/scroll-info@2.0.0(react-dom@19.1.0(react@19.1.0))(react@19.1.0))(@payloadcms/next@3.37.0(@types/react@19.1.8)(graphql@16.11.0)(monaco-editor@0.53.0)(next@15.4.4(@babel/core@7.28.4)(@opentelemetry/api@1.9.0)(@playwright/test@1.55.1)(babel-plugin-macros@3.1.0)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(sass@1.77.4))(payload@3.37.0(graphql@16.11.0)(typescript@5.7.3))(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(typescript@5.7.3))(@types/react@19.1.8)(monaco-editor@0.53.0)(next@15.4.4(@babel/core@7.28.4)(@opentelemetry/api@1.9.0)(@playwright/test@1.55.1)(babel-plugin-macros@3.1.0)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(sass@1.77.4))(payload@3.37.0(graphql@16.11.0)(typescript@5.7.3))(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(typescript@5.7.3)(yjs@13.6.27)
+        specifier: 3.54.0
+        version: 3.54.0(@faceless-ui/modal@3.0.0-beta.2(react-dom@19.1.0(react@19.1.0))(react@19.1.0))(@faceless-ui/scroll-info@2.0.0(react-dom@19.1.0(react@19.1.0))(react@19.1.0))(@payloadcms/next@3.54.0(@types/react@19.1.8)(graphql@16.11.0)(monaco-editor@0.53.0)(next@15.4.4(@babel/core@7.28.4)(@opentelemetry/api@1.9.0)(@playwright/test@1.55.1)(babel-plugin-macros@3.1.0)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(sass@1.77.4))(payload@3.54.0(graphql@16.11.0)(typescript@5.7.3))(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(typescript@5.7.3))(@types/react@19.1.8)(monaco-editor@0.53.0)(next@15.4.4(@babel/core@7.28.4)(@opentelemetry/api@1.9.0)(@playwright/test@1.55.1)(babel-plugin-macros@3.1.0)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(sass@1.77.4))(payload@3.54.0(graphql@16.11.0)(typescript@5.7.3))(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(typescript@5.7.3)(yjs@13.6.27)
       '@payloadcms/ui':
-        specifier: 3.37.0
-        version: 3.37.0(@types/react@19.1.8)(monaco-editor@0.53.0)(next@15.4.4(@babel/core@7.28.4)(@opentelemetry/api@1.9.0)(@playwright/test@1.55.1)(babel-plugin-macros@3.1.0)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(sass@1.77.4))(payload@3.37.0(graphql@16.11.0)(typescript@5.7.3))(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(typescript@5.7.3)
+        specifier: 3.54.0
+        version: 3.54.0(@types/react@19.1.8)(monaco-editor@0.53.0)(next@15.4.4(@babel/core@7.28.4)(@opentelemetry/api@1.9.0)(@playwright/test@1.55.1)(babel-plugin-macros@3.1.0)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(sass@1.77.4))(payload@3.54.0(graphql@16.11.0)(typescript@5.7.3))(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(typescript@5.7.3)
       '@playwright/test':
         specifier: ^1.52.0
         version: 1.55.1
@@ -89,7 +89,7 @@ importers:
         version: 16.11.0
       jest:
         specifier: ^30.2.0
-        version: 30.2.0(@types/node@22.18.6)(babel-plugin-macros@3.1.0)(esbuild-register@3.6.0(esbuild@0.19.12))
+        version: 30.2.0(@types/node@22.18.6)(babel-plugin-macros@3.1.0)(esbuild-register@3.6.0(esbuild@0.25.10))
       next:
         specifier: 15.4.4
         version: 15.4.4(@babel/core@7.28.4)(@opentelemetry/api@1.9.0)(@playwright/test@1.55.1)(babel-plugin-macros@3.1.0)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(sass@1.77.4)
@@ -97,8 +97,8 @@ importers:
         specifier: ^10.1.0
         version: 10.2.0
       payload:
-        specifier: 3.37.0
-        version: 3.37.0(graphql@16.11.0)(typescript@5.7.3)
+        specifier: 3.54.0
+        version: 3.54.0(graphql@16.11.0)(typescript@5.7.3)
       pg:
         specifier: ^8.16.3
         version: 8.16.3
@@ -134,10 +134,10 @@ importers:
         version: 5.7.3
       vite-tsconfig-paths:
         specifier: ^5.1.4
-        version: 5.1.4(typescript@5.7.3)(vite@7.1.7(@types/node@22.18.6)(jiti@2.6.1)(lightningcss@1.30.1)(sass@1.77.4)(tsx@4.19.2)(yaml@2.8.1))
+        version: 5.1.4(typescript@5.7.3)(vite@7.1.7(@types/node@22.18.6)(jiti@2.6.1)(lightningcss@1.30.1)(sass@1.77.4)(tsx@4.20.3)(yaml@2.8.1))
       vitest:
         specifier: ^3.1.2
-        version: 3.2.4(@types/debug@4.1.12)(@types/node@22.18.6)(jiti@2.6.1)(lightningcss@1.30.1)(sass@1.77.4)(tsx@4.19.2)(yaml@2.8.1)
+        version: 3.2.4(@types/debug@4.1.12)(@types/node@22.18.6)(jiti@2.6.1)(lightningcss@1.30.1)(sass@1.77.4)(tsx@4.20.3)(yaml@2.8.1)
       voyage-ai-provider:
         specifier: ^2.0.0
         version: 2.0.0(zod@4.1.12)
@@ -389,9 +389,6 @@ packages:
   '@emotion/cache@11.14.0':
     resolution: {integrity: sha512-L/B1lc/TViYk4DcpGxtAVbx0ZyiKM5ktoIyafGkH6zg/tj+mA+NE//aPYKG0k8kCHSHVJrpLpcAlOBEXQ3SavA==}
 
-  '@emotion/css@11.13.5':
-    resolution: {integrity: sha512-wQdD0Xhkn3Qy2VNcIzbLP9MR8TafI0MJb7BEAXKp+w4+XqErksWR4OXomuDzPsN4InLdGhVe6EYcn2ZIUCpB8w==}
-
   '@emotion/hash@0.9.2':
     resolution: {integrity: sha512-MyqliTZGuOm3+5ZRSaaBGP3USLw6+EGykkwZns2EPC5g8jJ4z9OrdZY9apkl3+UP9+sdz76YYkwCKP5gh8iY3g==}
 
@@ -435,12 +432,6 @@ packages:
     resolution: {integrity: sha512-FxEMIkJKnodyA1OaCUoEvbYRkoZlLZ4d/eXFu9Fh8CbBBgP5EmZxrfTRyN0qpXZ4vOvqnE5YdRdcrmUUXuU+dA==}
     deprecated: 'Merged into tsx: https://tsx.is'
 
-  '@esbuild/aix-ppc64@0.19.12':
-    resolution: {integrity: sha512-bmoCYyWdEL3wDQIVbcyzRyeKLgk2WtWLTWz1ZIAZF/EGbNOwSA6ew3PftJ1PqMiOOGu0OyFMzG53L0zqIpPeNA==}
-    engines: {node: '>=12'}
-    cpu: [ppc64]
-    os: [aix]
-
   '@esbuild/aix-ppc64@0.23.1':
     resolution: {integrity: sha512-6VhYk1diRqrhBAqpJEdjASR/+WVRtfjpqKuNw11cLiaWpAT/Uu+nokB+UJnevzy/P9C/ty6AOe0dwueMrGh/iQ==}
     engines: {node: '>=18'}
@@ -459,12 +450,6 @@ packages:
     cpu: [arm64]
     os: [android]
 
-  '@esbuild/android-arm64@0.19.12':
-    resolution: {integrity: sha512-P0UVNGIienjZv3f5zq0DP3Nt2IE/3plFzuaS96vihvD0Hd6H/q4WXUGpCxD/E8YrSXfNyRPbpTq+T8ZQioSuPA==}
-    engines: {node: '>=12'}
-    cpu: [arm64]
-    os: [android]
-
   '@esbuild/android-arm64@0.23.1':
     resolution: {integrity: sha512-xw50ipykXcLstLeWH7WRdQuysJqejuAGPd30vd1i5zSyKK3WE+ijzHmLKxdiCMtH1pHz78rOg0BKSYOSB/2Khw==}
     engines: {node: '>=18'}
@@ -483,12 +468,6 @@ packages:
     cpu: [arm]
     os: [android]
 
-  '@esbuild/android-arm@0.19.12':
-    resolution: {integrity: sha512-qg/Lj1mu3CdQlDEEiWrlC4eaPZ1KztwGJ9B6J+/6G+/4ewxJg7gqj8eVYWvao1bXrqGiW2rsBZFSX3q2lcW05w==}
-    engines: {node: '>=12'}
-    cpu: [arm]
-    os: [android]
-
   '@esbuild/android-arm@0.23.1':
     resolution: {integrity: sha512-uz6/tEy2IFm9RYOyvKl88zdzZfwEfKZmnX9Cj1BHjeSGNuGLuMD1kR8y5bteYmwqKm1tj8m4cb/aKEorr6fHWQ==}
     engines: {node: '>=18'}
@@ -507,12 +486,6 @@ packages:
     cpu: [x64]
     os: [android]
 
-  '@esbuild/android-x64@0.19.12':
-    resolution: {integrity: sha512-3k7ZoUW6Q6YqhdhIaq/WZ7HwBpnFBlW905Fa4s4qWJyiNOgT1dOqDiVAQFwBH7gBRZr17gLrlFCRzF6jFh7Kew==}
-    engines: {node: '>=12'}
-    cpu: [x64]
-    os: [android]
-
   '@esbuild/android-x64@0.23.1':
     resolution: {integrity: sha512-nlN9B69St9BwUoB+jkyU090bru8L0NA3yFvAd7k8dNsVH8bi9a8cUAUSEcEEgTp2z3dbEDGJGfP6VUnkQnlReg==}
     engines: {node: '>=18'}
@@ -531,12 +504,6 @@ packages:
     cpu: [arm64]
     os: [darwin]
 
-  '@esbuild/darwin-arm64@0.19.12':
-    resolution: {integrity: sha512-B6IeSgZgtEzGC42jsI+YYu9Z3HKRxp8ZT3cqhvliEHovq8HSX2YX8lNocDn79gCKJXOSaEot9MVYky7AKjCs8g==}
-    engines: {node: '>=12'}
-    cpu: [arm64]
-    os: [darwin]
-
   '@esbuild/darwin-arm64@0.23.1':
     resolution: {integrity: sha512-YsS2e3Wtgnw7Wq53XXBLcV6JhRsEq8hkfg91ESVadIrzr9wO6jJDMZnCQbHm1Guc5t/CdDiFSSfWP58FNuvT3Q==}
     engines: {node: '>=18'}
@@ -555,12 +522,6 @@ packages:
     cpu: [x64]
     os: [darwin]
 
-  '@esbuild/darwin-x64@0.19.12':
-    resolution: {integrity: sha512-hKoVkKzFiToTgn+41qGhsUJXFlIjxI/jSYeZf3ugemDYZldIXIxhvwN6erJGlX4t5h417iFuheZ7l+YVn05N3A==}
-    engines: {node: '>=12'}
-    cpu: [x64]
-    os: [darwin]
-
   '@esbuild/darwin-x64@0.23.1':
     resolution: {integrity: sha512-aClqdgTDVPSEGgoCS8QDG37Gu8yc9lTHNAQlsztQ6ENetKEO//b8y31MMu2ZaPbn4kVsIABzVLXYLhCGekGDqw==}
     engines: {node: '>=18'}
@@ -579,12 +540,6 @@ packages:
     cpu: [arm64]
     os: [freebsd]
 
-  '@esbuild/freebsd-arm64@0.19.12':
-    resolution: {integrity: sha512-4aRvFIXmwAcDBw9AueDQ2YnGmz5L6obe5kmPT8Vd+/+x/JMVKCgdcRwH6APrbpNXsPz+K653Qg8HB/oXvXVukA==}
-    engines: {node: '>=12'}
-    cpu: [arm64]
-    os: [freebsd]
-
   '@esbuild/freebsd-arm64@0.23.1':
     resolution: {integrity: sha512-h1k6yS8/pN/NHlMl5+v4XPfikhJulk4G+tKGFIOwURBSFzE8bixw1ebjluLOjfwtLqY0kewfjLSrO6tN2MgIhA==}
     engines: {node: '>=18'}
@@ -603,12 +558,6 @@ packages:
     cpu: [x64]
     os: [freebsd]
 
-  '@esbuild/freebsd-x64@0.19.12':
-    resolution: {integrity: sha512-EYoXZ4d8xtBoVN7CEwWY2IN4ho76xjYXqSXMNccFSx2lgqOG/1TBPW0yPx1bJZk94qu3tX0fycJeeQsKovA8gg==}
-    engines: {node: '>=12'}
-    cpu: [x64]
-    os: [freebsd]
-
   '@esbuild/freebsd-x64@0.23.1':
     resolution: {integrity: sha512-lK1eJeyk1ZX8UklqFd/3A60UuZ/6UVfGT2LuGo3Wp4/z7eRTRYY+0xOu2kpClP+vMTi9wKOfXi2vjUpO1Ro76g==}
     engines: {node: '>=18'}
@@ -627,12 +576,6 @@ packages:
     cpu: [arm64]
     os: [linux]
 
-  '@esbuild/linux-arm64@0.19.12':
-    resolution: {integrity: sha512-EoTjyYyLuVPfdPLsGVVVC8a0p1BFFvtpQDB/YLEhaXyf/5bczaGeN15QkR+O4S5LeJ92Tqotve7i1jn35qwvdA==}
-    engines: {node: '>=12'}
-    cpu: [arm64]
-    os: [linux]
-
   '@esbuild/linux-arm64@0.23.1':
     resolution: {integrity: sha512-/93bf2yxencYDnItMYV/v116zff6UyTjo4EtEQjUBeGiVpMmffDNUyD9UN2zV+V3LRV3/on4xdZ26NKzn6754g==}
     engines: {node: '>=18'}
@@ -651,12 +594,6 @@ packages:
     cpu: [arm]
     os: [linux]
 
-  '@esbuild/linux-arm@0.19.12':
-    resolution: {integrity: sha512-J5jPms//KhSNv+LO1S1TX1UWp1ucM6N6XuL6ITdKWElCu8wXP72l9MM0zDTzzeikVyqFE6U8YAV9/tFyj0ti+w==}
-    engines: {node: '>=12'}
-    cpu: [arm]
-    os: [linux]
-
   '@esbuild/linux-arm@0.23.1':
     resolution: {integrity: sha512-CXXkzgn+dXAPs3WBwE+Kvnrf4WECwBdfjfeYHpMeVxWE0EceB6vhWGShs6wi0IYEqMSIzdOF1XjQ/Mkm5d7ZdQ==}
     engines: {node: '>=18'}
@@ -675,12 +612,6 @@ packages:
     cpu: [ia32]
     os: [linux]
 
-  '@esbuild/linux-ia32@0.19.12':
-    resolution: {integrity: sha512-Thsa42rrP1+UIGaWz47uydHSBOgTUnwBwNq59khgIwktK6x60Hivfbux9iNR0eHCHzOLjLMLfUMLCypBkZXMHA==}
-    engines: {node: '>=12'}
-    cpu: [ia32]
-    os: [linux]
-
   '@esbuild/linux-ia32@0.23.1':
     resolution: {integrity: sha512-VTN4EuOHwXEkXzX5nTvVY4s7E/Krz7COC8xkftbbKRYAl96vPiUssGkeMELQMOnLOJ8k3BY1+ZY52tttZnHcXQ==}
     engines: {node: '>=18'}
@@ -699,12 +630,6 @@ packages:
     cpu: [loong64]
     os: [linux]
 
-  '@esbuild/linux-loong64@0.19.12':
-    resolution: {integrity: sha512-LiXdXA0s3IqRRjm6rV6XaWATScKAXjI4R4LoDlvO7+yQqFdlr1Bax62sRwkVvRIrwXxvtYEHHI4dm50jAXkuAA==}
-    engines: {node: '>=12'}
-    cpu: [loong64]
-    os: [linux]
-
   '@esbuild/linux-loong64@0.23.1':
     resolution: {integrity: sha512-Vx09LzEoBa5zDnieH8LSMRToj7ir/Jeq0Gu6qJ/1GcBq9GkfoEAoXvLiW1U9J1qE/Y/Oyaq33w5p2ZWrNNHNEw==}
     engines: {node: '>=18'}
@@ -723,12 +648,6 @@ packages:
     cpu: [mips64el]
     os: [linux]
 
-  '@esbuild/linux-mips64el@0.19.12':
-    resolution: {integrity: sha512-fEnAuj5VGTanfJ07ff0gOA6IPsvrVHLVb6Lyd1g2/ed67oU1eFzL0r9WL7ZzscD+/N6i3dWumGE1Un4f7Amf+w==}
-    engines: {node: '>=12'}
-    cpu: [mips64el]
-    os: [linux]
-
   '@esbuild/linux-mips64el@0.23.1':
     resolution: {integrity: sha512-nrFzzMQ7W4WRLNUOU5dlWAqa6yVeI0P78WKGUo7lg2HShq/yx+UYkeNSE0SSfSure0SqgnsxPvmAUu/vu0E+3Q==}
     engines: {node: '>=18'}
@@ -747,12 +666,6 @@ packages:
     cpu: [ppc64]
     os: [linux]
 
-  '@esbuild/linux-ppc64@0.19.12':
-    resolution: {integrity: sha512-nYJA2/QPimDQOh1rKWedNOe3Gfc8PabU7HT3iXWtNUbRzXS9+vgB0Fjaqr//XNbd82mCxHzik2qotuI89cfixg==}
-    engines: {node: '>=12'}
-    cpu: [ppc64]
-    os: [linux]
-
   '@esbuild/linux-ppc64@0.23.1':
     resolution: {integrity: sha512-dKN8fgVqd0vUIjxuJI6P/9SSSe/mB9rvA98CSH2sJnlZ/OCZWO1DJvxj8jvKTfYUdGfcq2dDxoKaC6bHuTlgcw==}
     engines: {node: '>=18'}
@@ -771,12 +684,6 @@ packages:
     cpu: [riscv64]
     os: [linux]
 
-  '@esbuild/linux-riscv64@0.19.12':
-    resolution: {integrity: sha512-2MueBrlPQCw5dVJJpQdUYgeqIzDQgw3QtiAHUC4RBz9FXPrskyyU3VI1hw7C0BSKB9OduwSJ79FTCqtGMWqJHg==}
-    engines: {node: '>=12'}
-    cpu: [riscv64]
-    os: [linux]
-
   '@esbuild/linux-riscv64@0.23.1':
     resolution: {integrity: sha512-5AV4Pzp80fhHL83JM6LoA6pTQVWgB1HovMBsLQ9OZWLDqVY8MVobBXNSmAJi//Csh6tcY7e7Lny2Hg1tElMjIA==}
     engines: {node: '>=18'}
@@ -795,12 +702,6 @@ packages:
     cpu: [s390x]
     os: [linux]
 
-  '@esbuild/linux-s390x@0.19.12':
-    resolution: {integrity: sha512-+Pil1Nv3Umes4m3AZKqA2anfhJiVmNCYkPchwFJNEJN5QxmTs1uzyy4TvmDrCRNT2ApwSari7ZIgrPeUx4UZDg==}
-    engines: {node: '>=12'}
-    cpu: [s390x]
-    os: [linux]
-
   '@esbuild/linux-s390x@0.23.1':
     resolution: {integrity: sha512-9ygs73tuFCe6f6m/Tb+9LtYxWR4c9yg7zjt2cYkjDbDpV/xVn+68cQxMXCjUpYwEkze2RcU/rMnfIXNRFmSoDw==}
     engines: {node: '>=18'}
@@ -819,12 +720,6 @@ packages:
     cpu: [x64]
     os: [linux]
 
-  '@esbuild/linux-x64@0.19.12':
-    resolution: {integrity: sha512-B71g1QpxfwBvNrfyJdVDexenDIt1CiDN1TIXLbhOw0KhJzE78KIFGX6OJ9MrtC0oOqMWf+0xop4qEU8JrJTwCg==}
-    engines: {node: '>=12'}
-    cpu: [x64]
-    os: [linux]
-
   '@esbuild/linux-x64@0.23.1':
     resolution: {integrity: sha512-EV6+ovTsEXCPAp58g2dD68LxoP/wK5pRvgy0J/HxPGB009omFPv3Yet0HiaqvrIrgPTBuC6wCH1LTOY91EO5hQ==}
     engines: {node: '>=18'}
@@ -849,12 +744,6 @@ packages:
     cpu: [x64]
     os: [netbsd]
 
-  '@esbuild/netbsd-x64@0.19.12':
-    resolution: {integrity: sha512-3ltjQ7n1owJgFbuC61Oj++XhtzmymoCihNFgT84UAmJnxJfm4sYCiSLTXZtE00VWYpPMYc+ZQmB6xbSdVh0JWA==}
-    engines: {node: '>=12'}
-    cpu: [x64]
-    os: [netbsd]
-
   '@esbuild/netbsd-x64@0.23.1':
     resolution: {integrity: sha512-aevEkCNu7KlPRpYLjwmdcuNz6bDFiE7Z8XC4CPqExjTvrHugh28QzUXVOZtiYghciKUacNktqxdpymplil1beA==}
     engines: {node: '>=18'}
@@ -885,12 +774,6 @@ packages:
     cpu: [x64]
     os: [openbsd]
 
-  '@esbuild/openbsd-x64@0.19.12':
-    resolution: {integrity: sha512-RbrfTB9SWsr0kWmb9srfF+L933uMDdu9BIzdA7os2t0TXhCRjrQyCeOt6wVxr79CKD4c+p+YhCj31HBkYcXebw==}
-    engines: {node: '>=12'}
-    cpu: [x64]
-    os: [openbsd]
-
   '@esbuild/openbsd-x64@0.23.1':
     resolution: {integrity: sha512-aY2gMmKmPhxfU+0EdnN+XNtGbjfQgwZj43k8G3fyrDM/UdZww6xrWxmDkuz2eCZchqVeABjV5BpildOrUbBTqA==}
     engines: {node: '>=18'}
@@ -915,12 +798,6 @@ packages:
     cpu: [x64]
     os: [sunos]
 
-  '@esbuild/sunos-x64@0.19.12':
-    resolution: {integrity: sha512-HKjJwRrW8uWtCQnQOz9qcU3mUZhTUQvi56Q8DPTLLB+DawoiQdjsYq+j+D3s9I8VFtDr+F9CjgXKKC4ss89IeA==}
-    engines: {node: '>=12'}
-    cpu: [x64]
-    os: [sunos]
-
   '@esbuild/sunos-x64@0.23.1':
     resolution: {integrity: sha512-RBRT2gqEl0IKQABT4XTj78tpk9v7ehp+mazn2HbUeZl1YMdaGAQqhapjGTCe7uw7y0frDi4gS0uHzhvpFuI1sA==}
     engines: {node: '>=18'}
@@ -939,12 +816,6 @@ packages:
     cpu: [arm64]
     os: [win32]
 
-  '@esbuild/win32-arm64@0.19.12':
-    resolution: {integrity: sha512-URgtR1dJnmGvX864pn1B2YUYNzjmXkuJOIqG2HdU62MVS4EHpU2946OZoTMnRUHklGtJdJZ33QfzdjGACXhn1A==}
-    engines: {node: '>=12'}
-    cpu: [arm64]
-    os: [win32]
-
   '@esbuild/win32-arm64@0.23.1':
     resolution: {integrity: sha512-4O+gPR5rEBe2FpKOVyiJ7wNDPA8nGzDuJ6gN4okSA1gEOYZ67N8JPk58tkWtdtPeLz7lBnY6I5L3jdsr3S+A6A==}
     engines: {node: '>=18'}
@@ -963,12 +834,6 @@ packages:
     cpu: [ia32]
     os: [win32]
 
-  '@esbuild/win32-ia32@0.19.12':
-    resolution: {integrity: sha512-+ZOE6pUkMOJfmxmBZElNOx72NKpIa/HFOMGzu8fqzQJ5kgf6aTGrcJaFsNiVMH4JKpMipyK+7k0n2UXN7a8YKQ==}
-    engines: {node: '>=12'}
-    cpu: [ia32]
-    os: [win32]
-
   '@esbuild/win32-ia32@0.23.1':
     resolution: {integrity: sha512-BcaL0Vn6QwCwre3Y717nVHZbAa4UBEigzFm6VdsVdT/MbZ38xoj1X9HPkZhbmaBGUD1W8vxAfffbDe8bA6AKnQ==}
     engines: {node: '>=18'}
@@ -987,12 +852,6 @@ packages:
     cpu: [x64]
     os: [win32]
 
-  '@esbuild/win32-x64@0.19.12':
-    resolution: {integrity: sha512-T1QyPSDCyMXaO3pzBkF96E8xMkiRYbUEZADd29SyPGabqxMViNoii+NcK7eWJAEoU6RZyEm5lVSIjTmcdoB9HA==}
-    engines: {node: '>=12'}
-    cpu: [x64]
-    os: [win32]
-
   '@esbuild/win32-x64@0.23.1':
     resolution: {integrity: sha512-BHpFFeslkWrXWyUPnbKm+xYYVYruCinGcftSBaa8zoF9hZO4BcSCFUvHVTtzpIY6YzUnYtuEhZ+C9iEXjxnasg==}
     engines: {node: '>=18'}
@@ -1905,20 +1764,20 @@ packages:
     cpu: [x64]
     os: [win32]
 
-  '@payloadcms/db-postgres@3.37.0':
-    resolution: {integrity: sha512-ffEwoBFLPXlscRuYWL395Z2TyLCcYOPFqKHAuCH0RNrpBN7f0XE6HLOK503+BBNKR2T3DFLt8xu/ygMDrM1VDQ==}
+  '@payloadcms/db-postgres@3.54.0':
+    resolution: {integrity: sha512-C1tADNHHRTfXyJ6UXTPrHhDVji8kQFKPmT3R/LxOfXwMX5SQanfGx4dGotug647EuZCLgP9YH/ASmGgSFG2RjQ==}
     peerDependencies:
-      payload: 3.37.0
+      payload: 3.54.0
 
-  '@payloadcms/db-sqlite@3.37.0':
-    resolution: {integrity: sha512-RtGX7zqOoddarYic/rbbXXAUjKV2M8FF6U9i8cubRMBZLR9SR1CFVmlFN5jBgoS73OQBb7AoqbY3b6QpbebEhg==}
+  '@payloadcms/db-sqlite@3.54.0':
+    resolution: {integrity: sha512-dwH8EPTdkMKtty6yrgmAt94q4jtQkGx93SFecNFI/4GxqUBGzCpOTFKXmyKB2mBxngKQsp6NJ77VJk4NjOwffg==}
     peerDependencies:
-      payload: 3.37.0
+      payload: 3.54.0
 
-  '@payloadcms/drizzle@3.37.0':
-    resolution: {integrity: sha512-HCIqnTSHQILTFaXY2u+bjDwA+UPaNVOku6ZrHo57rdSy8fXQgT9Namf2IPdSeRM9pNBtX+sgHDnRaSowL0YTIA==}
+  '@payloadcms/drizzle@3.54.0':
+    resolution: {integrity: sha512-nz3luQKWav/CGfPKIRMl9lwmgs11HYWvlTpsGUnni8/cJl4EZgi05/Z4yLz3T6RJLbzyCpqvV1D+8tmdRPIHBw==}
     peerDependencies:
-      payload: 3.37.0
+      payload: 3.54.0
 
   '@payloadcms/eslint-config@3.9.0':
     resolution: {integrity: sha512-4St7Ol8zaShcnVEk9AS3nYpKhtBLEsIXFkz94n5c3GsJdnWG0RfibJMZN4px01UXqHFkTJaM3NTggJk1nzx+VA==}
@@ -1926,41 +1785,41 @@ packages:
   '@payloadcms/eslint-plugin@3.9.0':
     resolution: {integrity: sha512-EEGxhm+8geOHzdxjfRXgcEXxfx8+43ZVW9b6+6DTHrNliP/vsZzXWIDI8lQlxlk6Zc7n15hMBbgcs20F7/GM4A==}
 
-  '@payloadcms/graphql@3.37.0':
-    resolution: {integrity: sha512-Z+fEQR4NoCFGWkR3lTlw+1agsVn2LpxXJyeojORb8ZjZMwI8RlTEMa0llBQF5YeoabQao4Bkg2iG7fC9BZSZSw==}
+  '@payloadcms/graphql@3.54.0':
+    resolution: {integrity: sha512-ODHig9spx8EN0GSp74PSKtDby+lDrZRh1M93AuPZNblheVs5SBuxzsyz5XmZhM8s70F/zn3h4CmGmGDEiSDr3w==}
     hasBin: true
     peerDependencies:
       graphql: ^16.8.1
-      payload: 3.37.0
+      payload: 3.54.0
 
-  '@payloadcms/next@3.37.0':
-    resolution: {integrity: sha512-C8jXKA/+C0DBJgQdL9ZNZ/TLFRwpHx10ne9j0tO1zZZsFkhU+/znZPcYYOpnbOfnuI+l3HNQmZ+Hg6fEJzNtiQ==}
+  '@payloadcms/next@3.54.0':
+    resolution: {integrity: sha512-LgOvV4VIRlgyfp9sIBMg5llWfZMnMUmOwDbpq9n7tGAUhSb7MoOFfnEoGyAONHHJhH3/No8vtvpDGRpbFX9UJw==}
     engines: {node: ^18.20.2 || >=20.9.0}
     peerDependencies:
       graphql: ^16.8.1
       next: ^15.2.3
-      payload: 3.37.0
+      payload: 3.54.0
 
-  '@payloadcms/richtext-lexical@3.37.0':
-    resolution: {integrity: sha512-AhmrhX29NbBVdiKmzLx4vUyXE9rRcKDOZPJzD4dDN73qGz4lewrLDaUF+A2nyFxCLGBMYXKQNrg3j8YK/3UNaw==}
+  '@payloadcms/richtext-lexical@3.54.0':
+    resolution: {integrity: sha512-qyd3FQqj41zNE7eaIxkhAvQuX96FK5dfCF7bODQyT1Wb5R8OUKyM6tPVqUzuvo8Iz17xM0UqnNNaCjwKrxLj6Q==}
     engines: {node: ^18.20.2 || >=20.9.0}
     peerDependencies:
       '@faceless-ui/modal': 3.0.0-beta.2
       '@faceless-ui/scroll-info': 2.0.0
-      '@payloadcms/next': 3.37.0
-      payload: 3.37.0
+      '@payloadcms/next': 3.54.0
+      payload: 3.54.0
       react: ^19.0.0 || ^19.0.0-rc-65a56d0e-20241020
       react-dom: ^19.0.0 || ^19.0.0-rc-65a56d0e-20241020
 
-  '@payloadcms/translations@3.37.0':
-    resolution: {integrity: sha512-eFfvyREcyVB8XktSJ8Dazvs57qiKYZCESQcmsTgIwHTKskXwgZNtDXMYMbGQRDlPUgfIGab2uQSRtAnVuM9MaA==}
+  '@payloadcms/translations@3.54.0':
+    resolution: {integrity: sha512-21DQ0LaM+6PljjuV8fVdfXq/Icpdaj9kzT3tOoVZumUtiqBYzYU//oVBe2z17lLcoJTRwaG+RK1Pq2oGY1lR1A==}
 
-  '@payloadcms/ui@3.37.0':
-    resolution: {integrity: sha512-zGHYzY8xqeGTIV9UeoDuHS4jPPeL3bFW3fNICQe9A0xgYyMwkBmEjyKF5n/miBYCzBmX0O7ztDtu8kayIDU3yg==}
+  '@payloadcms/ui@3.54.0':
+    resolution: {integrity: sha512-Io+kUZ41gtFwAhAlLjWXqBww5pGv1h6a6Lbv4wNhOojYi0U9yAir4ddDdnkTs7UAmQlxXEQeQYbQ7z/CmdKHrA==}
     engines: {node: ^18.20.2 || >=20.9.0}
     peerDependencies:
       next: ^15.2.3
-      payload: 3.37.0
+      payload: 3.54.0
       react: ^19.0.0 || ^19.0.0-rc-65a56d0e-20241020
       react-dom: ^19.0.0 || ^19.0.0-rc-65a56d0e-20241020
 
@@ -3068,10 +2927,6 @@ packages:
   buffer-from@1.1.2:
     resolution: {integrity: sha512-E+XQCRwSbaaiChtv6k6Dwgc+bx+Bs6vuKJHHl5kox/BaKbhiXzqQOwK4cO22yElGp2OCmjwVhT3HmxgyPGnJfQ==}
 
-  buffer-writer@2.0.0:
-    resolution: {integrity: sha512-a7ZpuTZU1TRtnwyCNW3I5dc0wWNC3VR9S++Ewyk2HHZdrO3CQJqSpd+95Us590V6AL7JqUAH2IwZ/398PmNFgw==}
-    engines: {node: '>=4'}
-
   buffer@5.7.1:
     resolution: {integrity: sha512-EHcyIPBQ4BSGlvjB16k5KgAJ27CIsHY/2JBmCRReo48y9rQ3MaUzWX3KVlBa4U7MyX02HdVj0K7C3WaB3ju7FQ==}
 
@@ -3185,9 +3040,6 @@ packages:
   cjs-module-lexer@2.1.0:
     resolution: {integrity: sha512-UX0OwmYRYQQetfrLEZeewIFFI+wSTofC+pMBLNuH3RUuu/xzG1oz84UCEDOSoQlN3fZ4+AzmV50ZYvGqkMh9yA==}
 
-  classnames@2.5.1:
-    resolution: {integrity: sha512-saHYOzhIQs6wy2sVxTM6bUDsQO4F50V9RQ22qBpEdCW+I+/Wmke2HOl6lS6dTpdxVhb88/I6+Hs+438c3lfUow==}
-
   client-only@0.0.1:
     resolution: {integrity: sha512-IV3Ou0jSMzZrd3pZ48nLkT9DA7Ag1pnPzaiQhpW7c3RbcqqzvzzVu+L8gfqMp/8IM2MQtSiqaCxrrcfu8I8rMA==}
 
@@ -3289,8 +3141,8 @@ packages:
     resolution: {integrity: sha512-piICUB6ei4IlTv1+653yq5+KoqfBYmj9bw6LqXoOneTMDXk5nM1qt12mFW1caG3LlJXEKW1Bp0WggEmIfQB34g==}
     engines: {node: '>= 14'}
 
-  croner@9.0.0:
-    resolution: {integrity: sha512-onMB0OkDjkXunhdW9htFjEhqrD54+M94i6ackoUkjHKbRnXdyEyKRelp4nJ1kAz32+s27jP1FsebpJCVl0BsvA==}
+  croner@9.1.0:
+    resolution: {integrity: sha512-p9nwwR4qyT5W996vBZhdvBCnMhicY5ytZkR4D1Xj0wuTDEiMnjwR57Q3RXYY/s0EpX6Ay3vgIcfaR+ewGHsi+g==}
     engines: {node: '>=18.0'}
 
   cross-env@7.0.3:
@@ -3444,10 +3296,6 @@ packages:
   devlop@1.1.0:
     resolution: {integrity: sha512-RWmIqhcFf1lRYBvNmr7qTNuyCt/7/ns2jbpp1+PalgE/rDQcBT0fioSMUpJ93irlUhC5hrg4cYqe6U+0ImW0rA==}
 
-  diff@5.2.0:
-    resolution: {integrity: sha512-uIFDxqpRZGZ6ThOk84hEfqWoHx2devRFvpTZcTHur85vImfaxUbTW9Ryh4CpCuDnToOP1CEtXKIgytHBPVff5A==}
-    engines: {node: '>=0.3.1'}
-
   docker-compose@1.3.0:
     resolution: {integrity: sha512-7Gevk/5eGD50+eMD+XDnFnOrruFkL0kSd7jEG4cjmqweDSUhB7i0g8is/nBdVpl+Bx338SqIB2GLKm32M+Vs6g==}
     engines: {node: '>= 6.0.0'}
@@ -3475,40 +3323,40 @@ packages:
     resolution: {integrity: sha512-JVUnt+DUIzu87TABbhPmNfVdBDt18BLOWjMUFJMSi/Qqg7NTYtabbvSNJGOJ7afbRuv9D/lngizHtP7QyLQ+9w==}
     engines: {node: '>=12'}
 
-  drizzle-kit@0.28.0:
-    resolution: {integrity: sha512-KqI+CS2Ga9GYIrXpxpCDUJJrH/AT/k4UY0Pb4oRgQEGkgN1EdCnqp664cXgwPWjDr5RBtTsjZipw8+8C//K63A==}
+  drizzle-kit@0.31.4:
+    resolution: {integrity: sha512-tCPWVZWZqWVx2XUsVpJRnH9Mx0ClVOf5YUHerZ5so1OKSlqww4zy1R5ksEdGRcO3tM3zj0PYN6V48TbQCL1RfA==}
     hasBin: true
 
-  drizzle-orm@0.36.1:
-    resolution: {integrity: sha512-F4hbimnMEhyWzDowQB4xEuVJJWXLHZYD7FYwvo8RImY+N7pStGqsbfmT95jDbec1s4qKmQbiuxEDZY90LRrfIw==}
+  drizzle-orm@0.44.2:
+    resolution: {integrity: sha512-zGAqBzWWkVSFjZpwPOrmCrgO++1kZ5H/rZ4qTGeGOe18iXGVJWf3WPfHOVwFIbmi8kHjfJstC6rJomzGx8g/dQ==}
     peerDependencies:
       '@aws-sdk/client-rds-data': '>=3'
-      '@cloudflare/workers-types': '>=3'
+      '@cloudflare/workers-types': '>=4'
       '@electric-sql/pglite': '>=0.2.0'
       '@libsql/client': '>=0.10.0'
       '@libsql/client-wasm': '>=0.10.0'
-      '@neondatabase/serverless': '>=0.1'
+      '@neondatabase/serverless': '>=0.10.0'
       '@op-engineering/op-sqlite': '>=2'
       '@opentelemetry/api': ^1.4.1
-      '@planetscale/database': '>=1'
+      '@planetscale/database': '>=1.13'
       '@prisma/client': '*'
       '@tidbcloud/serverless': '*'
       '@types/better-sqlite3': '*'
       '@types/pg': '*'
-      '@types/react': '>=18'
       '@types/sql.js': '*'
+      '@upstash/redis': '>=1.34.7'
       '@vercel/postgres': '>=0.8.0'
       '@xata.io/client': '*'
       better-sqlite3: '>=7'
       bun-types: '*'
-      expo-sqlite: '>=13.2.0'
+      expo-sqlite: '>=14.0.0'
+      gel: '>=2'
       knex: '*'
       kysely: '*'
       mysql2: '>=2'
       pg: '>=8'
       postgres: '>=3'
       prisma: '*'
-      react: '>=18'
       sql.js: '>=1'
       sqlite3: '>=5'
     peerDependenciesMeta:
@@ -3538,10 +3386,10 @@ packages:
         optional: true
       '@types/pg':
         optional: true
-      '@types/react':
-        optional: true
       '@types/sql.js':
         optional: true
+      '@upstash/redis':
+        optional: true
       '@vercel/postgres':
         optional: true
       '@xata.io/client':
@@ -3552,6 +3400,8 @@ packages:
         optional: true
       expo-sqlite:
         optional: true
+      gel:
+        optional: true
       knex:
         optional: true
       kysely:
@@ -3564,8 +3414,6 @@ packages:
         optional: true
       prisma:
         optional: true
-      react:
-        optional: true
       sql.js:
         optional: true
       sqlite3:
@@ -3646,11 +3494,6 @@ packages:
     engines: {node: '>=12'}
     hasBin: true
 
-  esbuild@0.19.12:
-    resolution: {integrity: sha512-aARqgq8roFBj054KvQr5f1sFu0D65G+miZRCuJyJ0G13Zwx7vRar5Zhn2tkQNzIXcBrNVsv/8stehpj+GAjgbg==}
-    engines: {node: '>=12'}
-    hasBin: true
-
   esbuild@0.23.1:
     resolution: {integrity: sha512-VVNz/9Sa0bs5SELtn3f7qhJCDPCF5oMEl5cO9/SSinpE9hbPVvxbd572HH5AKiP7WD8INO53GgfDDhRjkylHEg==}
     engines: {node: '>=18'}
@@ -4344,6 +4187,10 @@ packages:
     resolution: {integrity: sha512-4gd7VpWNQNB4UKKCFFVcp1AVv+FMOgs9NKzjHKusc8jTMhd5eL1NqQqOpE0KzMds804/yHlglp3uxgluOqAPLw==}
     engines: {node: '>= 0.4'}
 
+  ipaddr.js@2.2.0:
+    resolution: {integrity: sha512-Ag3wB2o37wslZS19hZqorUnrnzSkpOVy+IiiDEiTqNubEYpYuHWIf6K4psgN2ZWKExS4xhVCrRVfb/wfW8fWJA==}
+    engines: {node: '>= 10'}
+
   is-alphabetical@2.0.1:
     resolution: {integrity: sha512-FWyyY60MeTNyeSRpkM2Iry0G9hpr7/9kD40mD/cGQEuilcZYS4okz8SN2Q6rLCJ8gbCt6fN+rC+6tMGS99LaxQ==}
 
@@ -5264,9 +5111,6 @@ packages:
   package-json-from-dist@1.0.1:
     resolution: {integrity: sha512-UEZIS3/by4OC8vL3P2dTXRETpebLI2NiI5vIrjaD/5UtrkFX/tNbwjTSRAGC/+7CAo2pIcBaRgWmcBBHcsaCIw==}
 
-  packet-reader@1.0.0:
-    resolution: {integrity: sha512-HAKu/fG3HpHFO0AA8WE8q2g+gBJaZ9MG7fcKk+IJPLTGAD6Psw4443l+9DGRbOIh3/aXr7Phy0TjilYivJo5XQ==}
-
   parent-module@1.0.1:
     resolution: {integrity: sha512-GQ2EWRpQV8/o+Aw8YqtfZZPfNRWZYkbidE9k5rpl/hC3vtHHBfGm2Ifi6qWV+coDGkrUKZAxE3Lot5kcsRlh+g==}
     engines: {node: '>=6'}
@@ -5311,8 +5155,8 @@ packages:
     resolution: {integrity: sha512-//nshmD55c46FuFw26xV/xFAaB5HF9Xdap7HJBBnrKdAd6/GxDBaNA1870O79+9ueg61cZLSVc+OaFlfmObYVQ==}
     engines: {node: '>= 14.16'}
 
-  payload@3.37.0:
-    resolution: {integrity: sha512-fihg4c/SyuvCAkOFKCwTblkQ9t5fE77nACJ/3BEZlPr8Ikl4AgfkBfc29yZnf+GFiEuaul1j0KYwI3le0wex/g==}
+  payload@3.54.0:
+    resolution: {integrity: sha512-TQJptraB8bSnNlm/mCqVHHEdBUZLIQWpJsRdns7SggFIoewA/WLFX2LpLbjur8WY+4zhdaOUEm7GDzzRw506pw==}
     engines: {node: ^18.20.2 || >=20.9.0}
     hasBin: true
     peerDependencies:
@@ -5355,15 +5199,6 @@ packages:
     resolution: {integrity: sha512-o2XFanIMy/3+mThw69O8d4n1E5zsLhdO+OPqswezu7Z5ekP4hYDqlDjlmOpYMbzY2Br0ufCwJLdDIXeNVwcWFg==}
     engines: {node: '>=10'}
 
-  pg@8.11.3:
-    resolution: {integrity: sha512-+9iuvG8QfaaUrrph+kpF24cXkH1YOOUeArRNYIxq1viYHZagBxrTno7cecY1Fa44tJeZvaoG+Djpkc3JwehN5g==}
-    engines: {node: '>= 8.0.0'}
-    peerDependencies:
-      pg-native: '>=3.0.1'
-    peerDependenciesMeta:
-      pg-native:
-        optional: true
-
   pg@8.16.3:
     resolution: {integrity: sha512-enxc1h0jA/aq5oSDMvqyW3q89ra6XIIDZgCX9vkMrnz5DFTw/Ny3Li2lFQ+pt3L6MCgm/5o2o8HW9hiJji+xvw==}
     engines: {node: '>= 16.0.0'}
@@ -5558,13 +5393,6 @@ packages:
       react: ^16.9.0 || ^17 || ^18 || ^19 || ^19.0.0-rc
       react-dom: ^16.9.0 || ^17 || ^18 || ^19 || ^19.0.0-rc
 
-  react-diff-viewer-continued@4.0.5:
-    resolution: {integrity: sha512-L43gIPdhHgu1MYdip4vNqAt5s2JLICKe2/RyGUr2ohAxfhYaH1+QZ6vBO0qgo4xGBhE3jmvbOA/swq4/gdS/0g==}
-    engines: {node: '>= 16'}
-    peerDependencies:
-      react: ^15.3.0 || ^16.0.0 || ^17.0.0 || ^18.0.0 || ^19.0.0
-      react-dom: ^15.3.0 || ^16.0.0 || ^17.0.0 || ^18.0.0 || ^19.0.0
-
   react-dom@19.1.0:
     resolution: {integrity: sha512-Xs1hdnE+DyKgeHJeJznQmYMIBG3TKIHJJT95Q58nHLSrElKlGQqDTR2HQ9fx5CN/Gk6Vh/kupBTDLU11/nDk/g==}
     peerDependencies:
@@ -5758,6 +5586,7 @@ packages:
 
   scmp@2.1.0:
     resolution: {integrity: sha512-o/mRQGk9Rcer/jEEw/yw4mwo3EU/NvYvp577/Btqrym9Qy5/MdWGBqipbALgd2lrdWTJ5/gqDusxfnQBxOxT2Q==}
+    deprecated: Just use Node.js's crypto.timingSafeEqual()
 
   scslre@0.3.0:
     resolution: {integrity: sha512-3A6sD0WYP7+QrjbfNA2FN3FsOaGGFoekCVgTyypy53gPxhbkCIjtO6YWgdrfM+n/8sI8JeXZOIxsHjMTNxQ4nQ==}
@@ -6221,6 +6050,11 @@ packages:
     engines: {node: '>=18.0.0'}
     hasBin: true
 
+  tsx@4.20.3:
+    resolution: {integrity: sha512-qjbnuR9Tr+FJOMBqJCW5ehvIo/buZq7vH7qD7JziU98h6l3qGy0a/yPFjwO+y0/T7GFpNgNAvEcPPVfyT8rrPQ==}
+    engines: {node: '>=18.0.0'}
+    hasBin: true
+
   tweetnacl@0.14.5:
     resolution: {integrity: sha512-KXXFFdAbFXY4geFIwoyNK+f5Z1b7swfXABfL7HXCmoIWMKU3dmS26672A4EeQtDzLKy7SXmfBu51JolvEKwtGA==}
 
@@ -6288,6 +6122,10 @@ packages:
   undici-types@6.21.0:
     resolution: {integrity: sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ==}
 
+  undici@7.10.0:
+    resolution: {integrity: sha512-u5otvFBOBZvmdjWLVW+5DAc9Nkq8f24g0O9oY7qw2JVIF1VocIFoyz9JFkuVOS2j41AufeO0xnlweJ2RLT8nGw==}
+    engines: {node: '>=20.18.1'}
+
   undici@7.16.0:
     resolution: {integrity: sha512-QEg3HPMll0o3t2ourKwOeUAZ159Kn9mx5pnzHRQO8+Wixmh88YdZRiIwat0iNzNNXn0yoEtXJqFpyW7eM8BV7g==}
     engines: {node: '>=20.18.1'}
@@ -6887,16 +6725,6 @@ snapshots:
       '@emotion/weak-memoize': 0.4.0
       stylis: 4.2.0
 
-  '@emotion/css@11.13.5':
-    dependencies:
-      '@emotion/babel-plugin': 11.13.5
-      '@emotion/cache': 11.14.0
-      '@emotion/serialize': 1.3.3
-      '@emotion/sheet': 1.4.0
-      '@emotion/utils': 1.4.2
-    transitivePeerDependencies:
-      - supports-color
-
   '@emotion/hash@0.9.2': {}
 
   '@emotion/memoize@0.9.0': {}
@@ -6947,9 +6775,6 @@ snapshots:
       '@esbuild-kit/core-utils': 3.3.2
       get-tsconfig: 4.10.1
 
-  '@esbuild/aix-ppc64@0.19.12':
-    optional: true
-
   '@esbuild/aix-ppc64@0.23.1':
     optional: true
 
@@ -6959,9 +6784,6 @@ snapshots:
   '@esbuild/android-arm64@0.18.20':
     optional: true
 
-  '@esbuild/android-arm64@0.19.12':
-    optional: true
-
   '@esbuild/android-arm64@0.23.1':
     optional: true
 
@@ -6971,9 +6793,6 @@ snapshots:
   '@esbuild/android-arm@0.18.20':
     optional: true
 
-  '@esbuild/android-arm@0.19.12':
-    optional: true
-
   '@esbuild/android-arm@0.23.1':
     optional: true
 
@@ -6983,9 +6802,6 @@ snapshots:
   '@esbuild/android-x64@0.18.20':
     optional: true
 
-  '@esbuild/android-x64@0.19.12':
-    optional: true
-
   '@esbuild/android-x64@0.23.1':
     optional: true
 
@@ -6995,9 +6811,6 @@ snapshots:
   '@esbuild/darwin-arm64@0.18.20':
     optional: true
 
-  '@esbuild/darwin-arm64@0.19.12':
-    optional: true
-
   '@esbuild/darwin-arm64@0.23.1':
     optional: true
 
@@ -7007,9 +6820,6 @@ snapshots:
   '@esbuild/darwin-x64@0.18.20':
     optional: true
 
-  '@esbuild/darwin-x64@0.19.12':
-    optional: true
-
   '@esbuild/darwin-x64@0.23.1':
     optional: true
 
@@ -7019,9 +6829,6 @@ snapshots:
   '@esbuild/freebsd-arm64@0.18.20':
     optional: true
 
-  '@esbuild/freebsd-arm64@0.19.12':
-    optional: true
-
   '@esbuild/freebsd-arm64@0.23.1':
     optional: true
 
@@ -7031,9 +6838,6 @@ snapshots:
   '@esbuild/freebsd-x64@0.18.20':
     optional: true
 
-  '@esbuild/freebsd-x64@0.19.12':
-    optional: true
-
   '@esbuild/freebsd-x64@0.23.1':
     optional: true
 
@@ -7043,9 +6847,6 @@ snapshots:
   '@esbuild/linux-arm64@0.18.20':
     optional: true
 
-  '@esbuild/linux-arm64@0.19.12':
-    optional: true
-
   '@esbuild/linux-arm64@0.23.1':
     optional: true
 
@@ -7055,9 +6856,6 @@ snapshots:
   '@esbuild/linux-arm@0.18.20':
     optional: true
 
-  '@esbuild/linux-arm@0.19.12':
-    optional: true
-
   '@esbuild/linux-arm@0.23.1':
     optional: true
 
@@ -7067,9 +6865,6 @@ snapshots:
   '@esbuild/linux-ia32@0.18.20':
     optional: true
 
-  '@esbuild/linux-ia32@0.19.12':
-    optional: true
-
   '@esbuild/linux-ia32@0.23.1':
     optional: true
 
@@ -7079,9 +6874,6 @@ snapshots:
   '@esbuild/linux-loong64@0.18.20':
     optional: true
 
-  '@esbuild/linux-loong64@0.19.12':
-    optional: true
-
   '@esbuild/linux-loong64@0.23.1':
     optional: true
 
@@ -7091,9 +6883,6 @@ snapshots:
   '@esbuild/linux-mips64el@0.18.20':
     optional: true
 
-  '@esbuild/linux-mips64el@0.19.12':
-    optional: true
-
   '@esbuild/linux-mips64el@0.23.1':
     optional: true
 
@@ -7103,9 +6892,6 @@ snapshots:
   '@esbuild/linux-ppc64@0.18.20':
     optional: true
 
-  '@esbuild/linux-ppc64@0.19.12':
-    optional: true
-
   '@esbuild/linux-ppc64@0.23.1':
     optional: true
 
@@ -7115,9 +6901,6 @@ snapshots:
   '@esbuild/linux-riscv64@0.18.20':
     optional: true
 
-  '@esbuild/linux-riscv64@0.19.12':
-    optional: true
-
   '@esbuild/linux-riscv64@0.23.1':
     optional: true
 
@@ -7127,9 +6910,6 @@ snapshots:
   '@esbuild/linux-s390x@0.18.20':
     optional: true
 
-  '@esbuild/linux-s390x@0.19.12':
-    optional: true
-
   '@esbuild/linux-s390x@0.23.1':
     optional: true
 
@@ -7139,9 +6919,6 @@ snapshots:
   '@esbuild/linux-x64@0.18.20':
     optional: true
 
-  '@esbuild/linux-x64@0.19.12':
-    optional: true
-
   '@esbuild/linux-x64@0.23.1':
     optional: true
 
@@ -7154,9 +6931,6 @@ snapshots:
   '@esbuild/netbsd-x64@0.18.20':
     optional: true
 
-  '@esbuild/netbsd-x64@0.19.12':
-    optional: true
-
   '@esbuild/netbsd-x64@0.23.1':
     optional: true
 
@@ -7172,9 +6946,6 @@ snapshots:
   '@esbuild/openbsd-x64@0.18.20':
     optional: true
 
-  '@esbuild/openbsd-x64@0.19.12':
-    optional: true
-
   '@esbuild/openbsd-x64@0.23.1':
     optional: true
 
@@ -7187,9 +6958,6 @@ snapshots:
   '@esbuild/sunos-x64@0.18.20':
     optional: true
 
-  '@esbuild/sunos-x64@0.19.12':
-    optional: true
-
   '@esbuild/sunos-x64@0.23.1':
     optional: true
 
@@ -7199,9 +6967,6 @@ snapshots:
   '@esbuild/win32-arm64@0.18.20':
     optional: true
 
-  '@esbuild/win32-arm64@0.19.12':
-    optional: true
-
   '@esbuild/win32-arm64@0.23.1':
     optional: true
 
@@ -7211,9 +6976,6 @@ snapshots:
   '@esbuild/win32-ia32@0.18.20':
     optional: true
 
-  '@esbuild/win32-ia32@0.19.12':
-    optional: true
-
   '@esbuild/win32-ia32@0.23.1':
     optional: true
 
@@ -7223,9 +6985,6 @@ snapshots:
   '@esbuild/win32-x64@0.18.20':
     optional: true
 
-  '@esbuild/win32-x64@0.19.12':
-    optional: true
-
   '@esbuild/win32-x64@0.23.1':
     optional: true
 
@@ -7684,7 +7443,7 @@ snapshots:
       jest-util: 30.2.0
       slash: 3.0.0
 
-  '@jest/core@30.2.0(babel-plugin-macros@3.1.0)(esbuild-register@3.6.0(esbuild@0.19.12))':
+  '@jest/core@30.2.0(babel-plugin-macros@3.1.0)(esbuild-register@3.6.0(esbuild@0.25.10))':
     dependencies:
       '@jest/console': 30.2.0
       '@jest/pattern': 30.0.1
@@ -7699,7 +7458,7 @@ snapshots:
       exit-x: 0.2.2
       graceful-fs: 4.2.11
       jest-changed-files: 30.2.0
-      jest-config: 30.2.0(@types/node@22.18.6)(babel-plugin-macros@3.1.0)(esbuild-register@3.6.0(esbuild@0.19.12))
+      jest-config: 30.2.0(@types/node@22.18.6)(babel-plugin-macros@3.1.0)(esbuild-register@3.6.0(esbuild@0.25.10))
       jest-haste-map: 30.2.0
       jest-message-util: 30.2.0
       jest-regex-util: 30.0.1
@@ -8257,15 +8016,15 @@ snapshots:
   '@oxc-resolver/binding-win32-x64-msvc@1.12.0':
     optional: true
 
-  '@payloadcms/db-postgres@3.37.0(@libsql/client@0.14.0)(@opentelemetry/api@1.9.0)(@types/react@19.1.8)(payload@3.37.0(graphql@16.11.0)(typescript@5.7.3))(react@19.1.0)':
+  '@payloadcms/db-postgres@3.54.0(@libsql/client@0.14.0)(@opentelemetry/api@1.9.0)(payload@3.54.0(graphql@16.11.0)(typescript@5.7.3))':
     dependencies:
-      '@payloadcms/drizzle': 3.37.0(@libsql/client@0.14.0)(@opentelemetry/api@1.9.0)(@types/pg@8.10.2)(@types/react@19.1.8)(payload@3.37.0(graphql@16.11.0)(typescript@5.7.3))(pg@8.11.3)(react@19.1.0)
+      '@payloadcms/drizzle': 3.54.0(@libsql/client@0.14.0)(@opentelemetry/api@1.9.0)(@types/pg@8.10.2)(payload@3.54.0(graphql@16.11.0)(typescript@5.7.3))(pg@8.16.3)
       '@types/pg': 8.10.2
       console-table-printer: 2.12.1
-      drizzle-kit: 0.28.0
-      drizzle-orm: 0.36.1(@libsql/client@0.14.0)(@opentelemetry/api@1.9.0)(@types/pg@8.10.2)(@types/react@19.1.8)(pg@8.11.3)(react@19.1.0)
-      payload: 3.37.0(graphql@16.11.0)(typescript@5.7.3)
-      pg: 8.11.3
+      drizzle-kit: 0.31.4
+      drizzle-orm: 0.44.2(@libsql/client@0.14.0)(@opentelemetry/api@1.9.0)(@types/pg@8.10.2)(pg@8.16.3)
+      payload: 3.54.0(graphql@16.11.0)(typescript@5.7.3)
+      pg: 8.16.3
       prompts: 2.4.2
       to-snake-case: 1.0.0
       uuid: 10.0.0
@@ -8282,32 +8041,32 @@ snapshots:
       - '@prisma/client'
       - '@tidbcloud/serverless'
       - '@types/better-sqlite3'
-      - '@types/react'
       - '@types/sql.js'
+      - '@upstash/redis'
       - '@vercel/postgres'
       - '@xata.io/client'
       - better-sqlite3
       - bun-types
       - expo-sqlite
+      - gel
       - knex
       - kysely
       - mysql2
       - pg-native
       - postgres
       - prisma
-      - react
       - sql.js
       - sqlite3
       - supports-color
 
-  '@payloadcms/db-sqlite@3.37.0(@opentelemetry/api@1.9.0)(@types/pg@8.15.5)(@types/react@19.1.8)(payload@3.37.0(graphql@16.11.0)(typescript@5.7.3))(pg@8.16.3)(react@19.1.0)':
+  '@payloadcms/db-sqlite@3.54.0(@opentelemetry/api@1.9.0)(@types/pg@8.15.5)(payload@3.54.0(graphql@16.11.0)(typescript@5.7.3))(pg@8.16.3)':
     dependencies:
       '@libsql/client': 0.14.0
-      '@payloadcms/drizzle': 3.37.0(@libsql/client@0.14.0)(@opentelemetry/api@1.9.0)(@types/pg@8.15.5)(@types/react@19.1.8)(payload@3.37.0(graphql@16.11.0)(typescript@5.7.3))(pg@8.16.3)(react@19.1.0)
+      '@payloadcms/drizzle': 3.54.0(@libsql/client@0.14.0)(@opentelemetry/api@1.9.0)(@types/pg@8.15.5)(payload@3.54.0(graphql@16.11.0)(typescript@5.7.3))(pg@8.16.3)
       console-table-printer: 2.12.1
-      drizzle-kit: 0.28.0
-      drizzle-orm: 0.36.1(@libsql/client@0.14.0)(@opentelemetry/api@1.9.0)(@types/pg@8.15.5)(@types/react@19.1.8)(pg@8.16.3)(react@19.1.0)
-      payload: 3.37.0(graphql@16.11.0)(typescript@5.7.3)
+      drizzle-kit: 0.31.4
+      drizzle-orm: 0.44.2(@libsql/client@0.14.0)(@opentelemetry/api@1.9.0)(@types/pg@8.15.5)(pg@8.16.3)
+      payload: 3.54.0(graphql@16.11.0)(typescript@5.7.3)
       prompts: 2.4.2
       to-snake-case: 1.0.0
       uuid: 9.0.0
@@ -8324,31 +8083,32 @@ snapshots:
       - '@tidbcloud/serverless'
       - '@types/better-sqlite3'
       - '@types/pg'
-      - '@types/react'
       - '@types/sql.js'
+      - '@upstash/redis'
       - '@vercel/postgres'
       - '@xata.io/client'
       - better-sqlite3
       - bufferutil
       - bun-types
       - expo-sqlite
+      - gel
       - knex
       - kysely
       - mysql2
       - pg
       - postgres
       - prisma
-      - react
       - sql.js
       - sqlite3
       - supports-color
       - utf-8-validate
 
-  '@payloadcms/drizzle@3.37.0(@libsql/client@0.14.0)(@opentelemetry/api@1.9.0)(@types/pg@8.10.2)(@types/react@19.1.8)(payload@3.37.0(graphql@16.11.0)(typescript@5.7.3))(pg@8.11.3)(react@19.1.0)':
+  '@payloadcms/drizzle@3.54.0(@libsql/client@0.14.0)(@opentelemetry/api@1.9.0)(@types/pg@8.10.2)(payload@3.54.0(graphql@16.11.0)(typescript@5.7.3))(pg@8.16.3)':
     dependencies:
       console-table-printer: 2.12.1
-      drizzle-orm: 0.36.1(@libsql/client@0.14.0)(@opentelemetry/api@1.9.0)(@types/pg@8.10.2)(@types/react@19.1.8)(pg@8.11.3)(react@19.1.0)
-      payload: 3.37.0(graphql@16.11.0)(typescript@5.7.3)
+      dequal: 2.0.3
+      drizzle-orm: 0.44.2(@libsql/client@0.14.0)(@opentelemetry/api@1.9.0)(@types/pg@8.10.2)(pg@8.16.3)
+      payload: 3.54.0(graphql@16.11.0)(typescript@5.7.3)
       prompts: 2.4.2
       to-snake-case: 1.0.0
       uuid: 9.0.0
@@ -8366,28 +8126,29 @@ snapshots:
       - '@tidbcloud/serverless'
       - '@types/better-sqlite3'
       - '@types/pg'
-      - '@types/react'
       - '@types/sql.js'
+      - '@upstash/redis'
       - '@vercel/postgres'
       - '@xata.io/client'
       - better-sqlite3
       - bun-types
       - expo-sqlite
+      - gel
       - knex
       - kysely
       - mysql2
       - pg
       - postgres
       - prisma
-      - react
       - sql.js
       - sqlite3
 
-  '@payloadcms/drizzle@3.37.0(@libsql/client@0.14.0)(@opentelemetry/api@1.9.0)(@types/pg@8.15.5)(@types/react@19.1.8)(payload@3.37.0(graphql@16.11.0)(typescript@5.7.3))(pg@8.16.3)(react@19.1.0)':
+  '@payloadcms/drizzle@3.54.0(@libsql/client@0.14.0)(@opentelemetry/api@1.9.0)(@types/pg@8.15.5)(payload@3.54.0(graphql@16.11.0)(typescript@5.7.3))(pg@8.16.3)':
     dependencies:
       console-table-printer: 2.12.1
-      drizzle-orm: 0.36.1(@libsql/client@0.14.0)(@opentelemetry/api@1.9.0)(@types/pg@8.15.5)(@types/react@19.1.8)(pg@8.16.3)(react@19.1.0)
-      payload: 3.37.0(graphql@16.11.0)(typescript@5.7.3)
+      dequal: 2.0.3
+      drizzle-orm: 0.44.2(@libsql/client@0.14.0)(@opentelemetry/api@1.9.0)(@types/pg@8.15.5)(pg@8.16.3)
+      payload: 3.54.0(graphql@16.11.0)(typescript@5.7.3)
       prompts: 2.4.2
       to-snake-case: 1.0.0
       uuid: 9.0.0
@@ -8405,35 +8166,35 @@ snapshots:
       - '@tidbcloud/serverless'
       - '@types/better-sqlite3'
       - '@types/pg'
-      - '@types/react'
       - '@types/sql.js'
+      - '@upstash/redis'
       - '@vercel/postgres'
       - '@xata.io/client'
       - better-sqlite3
       - bun-types
       - expo-sqlite
+      - gel
       - knex
       - kysely
       - mysql2
       - pg
       - postgres
       - prisma
-      - react
       - sql.js
       - sqlite3
 
-  '@payloadcms/eslint-config@3.9.0(@typescript-eslint/eslint-plugin@8.44.1(@typescript-eslint/parser@8.44.1(eslint@9.36.0(jiti@2.6.1))(typescript@5.7.3))(eslint@9.36.0(jiti@2.6.1))(typescript@5.7.3))(jest@30.2.0(@types/node@22.18.6)(babel-plugin-macros@3.1.0)(esbuild-register@3.6.0(esbuild@0.19.12)))(jiti@2.6.1)':
+  '@payloadcms/eslint-config@3.9.0(@typescript-eslint/eslint-plugin@8.44.1(@typescript-eslint/parser@8.44.1(eslint@9.36.0(jiti@2.6.1))(typescript@5.7.3))(eslint@9.36.0(jiti@2.6.1))(typescript@5.7.3))(jest@30.2.0(@types/node@22.18.6)(babel-plugin-macros@3.1.0)(esbuild-register@3.6.0(esbuild@0.25.10)))(jiti@2.6.1)':
     dependencies:
       '@eslint-react/eslint-plugin': 1.16.1(eslint@9.14.0(jiti@2.6.1))(typescript@5.7.2)
       '@eslint/js': 9.14.0
-      '@payloadcms/eslint-plugin': 3.9.0(@typescript-eslint/eslint-plugin@8.44.1(@typescript-eslint/parser@8.44.1(eslint@9.36.0(jiti@2.6.1))(typescript@5.7.3))(eslint@9.36.0(jiti@2.6.1))(typescript@5.7.3))(jest@30.2.0(@types/node@22.18.6)(babel-plugin-macros@3.1.0)(esbuild-register@3.6.0(esbuild@0.19.12)))(jiti@2.6.1)
+      '@payloadcms/eslint-plugin': 3.9.0(@typescript-eslint/eslint-plugin@8.44.1(@typescript-eslint/parser@8.44.1(eslint@9.36.0(jiti@2.6.1))(typescript@5.7.3))(eslint@9.36.0(jiti@2.6.1))(typescript@5.7.3))(jest@30.2.0(@types/node@22.18.6)(babel-plugin-macros@3.1.0)(esbuild-register@3.6.0(esbuild@0.25.10)))(jiti@2.6.1)
       '@types/eslint': 9.6.1
       '@types/eslint__js': 8.42.3
       '@typescript-eslint/parser': 8.14.0(eslint@9.14.0(jiti@2.6.1))(typescript@5.7.2)
       eslint: 9.14.0(jiti@2.6.1)
       eslint-config-prettier: 9.1.0(eslint@9.14.0(jiti@2.6.1))
       eslint-plugin-import-x: 4.4.2(eslint@9.14.0(jiti@2.6.1))(typescript@5.7.2)
-      eslint-plugin-jest: 28.9.0(@typescript-eslint/eslint-plugin@8.44.1(@typescript-eslint/parser@8.44.1(eslint@9.36.0(jiti@2.6.1))(typescript@5.7.3))(eslint@9.36.0(jiti@2.6.1))(typescript@5.7.3))(eslint@9.14.0(jiti@2.6.1))(jest@30.2.0(@types/node@22.18.6)(babel-plugin-macros@3.1.0)(esbuild-register@3.6.0(esbuild@0.19.12)))(typescript@5.7.2)
+      eslint-plugin-jest: 28.9.0(@typescript-eslint/eslint-plugin@8.44.1(@typescript-eslint/parser@8.44.1(eslint@9.36.0(jiti@2.6.1))(typescript@5.7.3))(eslint@9.36.0(jiti@2.6.1))(typescript@5.7.3))(eslint@9.14.0(jiti@2.6.1))(jest@30.2.0(@types/node@22.18.6)(babel-plugin-macros@3.1.0)(esbuild-register@3.6.0(esbuild@0.25.10)))(typescript@5.7.2)
       eslint-plugin-jest-dom: 5.4.0(eslint@9.14.0(jiti@2.6.1))
       eslint-plugin-jsx-a11y: 6.10.2(eslint@9.14.0(jiti@2.6.1))
       eslint-plugin-perfectionist: 3.9.1(eslint@9.14.0(jiti@2.6.1))(typescript@5.7.2)
@@ -8453,7 +8214,7 @@ snapshots:
       - svelte-eslint-parser
       - vue-eslint-parser
 
-  '@payloadcms/eslint-plugin@3.9.0(@typescript-eslint/eslint-plugin@8.44.1(@typescript-eslint/parser@8.44.1(eslint@9.36.0(jiti@2.6.1))(typescript@5.7.3))(eslint@9.36.0(jiti@2.6.1))(typescript@5.7.3))(jest@30.2.0(@types/node@22.18.6)(babel-plugin-macros@3.1.0)(esbuild-register@3.6.0(esbuild@0.19.12)))(jiti@2.6.1)':
+  '@payloadcms/eslint-plugin@3.9.0(@typescript-eslint/eslint-plugin@8.44.1(@typescript-eslint/parser@8.44.1(eslint@9.36.0(jiti@2.6.1))(typescript@5.7.3))(eslint@9.36.0(jiti@2.6.1))(typescript@5.7.3))(jest@30.2.0(@types/node@22.18.6)(babel-plugin-macros@3.1.0)(esbuild-register@3.6.0(esbuild@0.25.10)))(jiti@2.6.1)':
     dependencies:
       '@eslint-react/eslint-plugin': 1.16.1(eslint@9.14.0(jiti@2.6.1))(typescript@5.7.2)
       '@eslint/js': 9.14.0
@@ -8463,7 +8224,7 @@ snapshots:
       eslint: 9.14.0(jiti@2.6.1)
       eslint-config-prettier: 9.1.0(eslint@9.14.0(jiti@2.6.1))
       eslint-plugin-import-x: 4.4.2(eslint@9.14.0(jiti@2.6.1))(typescript@5.7.2)
-      eslint-plugin-jest: 28.9.0(@typescript-eslint/eslint-plugin@8.44.1(@typescript-eslint/parser@8.44.1(eslint@9.36.0(jiti@2.6.1))(typescript@5.7.3))(eslint@9.36.0(jiti@2.6.1))(typescript@5.7.3))(eslint@9.14.0(jiti@2.6.1))(jest@30.2.0(@types/node@22.18.6)(babel-plugin-macros@3.1.0)(esbuild-register@3.6.0(esbuild@0.19.12)))(typescript@5.7.2)
+      eslint-plugin-jest: 28.9.0(@typescript-eslint/eslint-plugin@8.44.1(@typescript-eslint/parser@8.44.1(eslint@9.36.0(jiti@2.6.1))(typescript@5.7.3))(eslint@9.36.0(jiti@2.6.1))(typescript@5.7.3))(eslint@9.14.0(jiti@2.6.1))(jest@30.2.0(@types/node@22.18.6)(babel-plugin-macros@3.1.0)(esbuild-register@3.6.0(esbuild@0.25.10)))(typescript@5.7.2)
       eslint-plugin-jest-dom: 5.4.0(eslint@9.14.0(jiti@2.6.1))
       eslint-plugin-jsx-a11y: 6.10.2(eslint@9.14.0(jiti@2.6.1))
       eslint-plugin-perfectionist: 3.9.1(eslint@9.14.0(jiti@2.6.1))(typescript@5.7.2)
@@ -8483,23 +8244,23 @@ snapshots:
       - svelte-eslint-parser
       - vue-eslint-parser
 
-  '@payloadcms/graphql@3.37.0(graphql@16.11.0)(payload@3.37.0(graphql@16.11.0)(typescript@5.7.3))(typescript@5.7.3)':
+  '@payloadcms/graphql@3.54.0(graphql@16.11.0)(payload@3.54.0(graphql@16.11.0)(typescript@5.7.3))(typescript@5.7.3)':
     dependencies:
       graphql: 16.11.0
       graphql-scalars: 1.22.2(graphql@16.11.0)
-      payload: 3.37.0(graphql@16.11.0)(typescript@5.7.3)
+      payload: 3.54.0(graphql@16.11.0)(typescript@5.7.3)
       pluralize: 8.0.0
       ts-essentials: 10.0.3(typescript@5.7.3)
       tsx: 4.19.2
     transitivePeerDependencies:
       - typescript
 
-  '@payloadcms/next@3.37.0(@types/react@19.1.8)(graphql@16.11.0)(monaco-editor@0.53.0)(next@15.4.4(@babel/core@7.28.4)(@opentelemetry/api@1.9.0)(@playwright/test@1.55.1)(babel-plugin-macros@3.1.0)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(sass@1.77.4))(payload@3.37.0(graphql@16.11.0)(typescript@5.7.3))(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(typescript@5.7.3)':
+  '@payloadcms/next@3.54.0(@types/react@19.1.8)(graphql@16.11.0)(monaco-editor@0.53.0)(next@15.4.4(@babel/core@7.28.4)(@opentelemetry/api@1.9.0)(@playwright/test@1.55.1)(babel-plugin-macros@3.1.0)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(sass@1.77.4))(payload@3.54.0(graphql@16.11.0)(typescript@5.7.3))(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(typescript@5.7.3)':
     dependencies:
       '@dnd-kit/core': 6.0.8(react-dom@19.1.0(react@19.1.0))(react@19.1.0)
-      '@payloadcms/graphql': 3.37.0(graphql@16.11.0)(payload@3.37.0(graphql@16.11.0)(typescript@5.7.3))(typescript@5.7.3)
-      '@payloadcms/translations': 3.37.0
-      '@payloadcms/ui': 3.37.0(@types/react@19.1.8)(monaco-editor@0.53.0)(next@15.4.4(@babel/core@7.28.4)(@opentelemetry/api@1.9.0)(@playwright/test@1.55.1)(babel-plugin-macros@3.1.0)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(sass@1.77.4))(payload@3.37.0(graphql@16.11.0)(typescript@5.7.3))(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(typescript@5.7.3)
+      '@payloadcms/graphql': 3.54.0(graphql@16.11.0)(payload@3.54.0(graphql@16.11.0)(typescript@5.7.3))(typescript@5.7.3)
+      '@payloadcms/translations': 3.54.0
+      '@payloadcms/ui': 3.54.0(@types/react@19.1.8)(monaco-editor@0.53.0)(next@15.4.4(@babel/core@7.28.4)(@opentelemetry/api@1.9.0)(@playwright/test@1.55.1)(babel-plugin-macros@3.1.0)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(sass@1.77.4))(payload@3.54.0(graphql@16.11.0)(typescript@5.7.3))(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(typescript@5.7.3)
       busboy: 1.6.0
       dequal: 2.0.3
       file-type: 19.3.0
@@ -8509,9 +8270,8 @@ snapshots:
       http-status: 2.1.0
       next: 15.4.4(@babel/core@7.28.4)(@opentelemetry/api@1.9.0)(@playwright/test@1.55.1)(babel-plugin-macros@3.1.0)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(sass@1.77.4)
       path-to-regexp: 6.3.0
-      payload: 3.37.0(graphql@16.11.0)(typescript@5.7.3)
+      payload: 3.54.0(graphql@16.11.0)(typescript@5.7.3)
       qs-esm: 7.0.2
-      react-diff-viewer-continued: 4.0.5(@types/react@19.1.8)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)
       sass: 1.77.4
       uuid: 10.0.0
     transitivePeerDependencies:
@@ -8522,7 +8282,7 @@ snapshots:
       - supports-color
       - typescript
 
-  '@payloadcms/richtext-lexical@3.37.0(@faceless-ui/modal@3.0.0-beta.2(react-dom@19.1.0(react@19.1.0))(react@19.1.0))(@faceless-ui/scroll-info@2.0.0(react-dom@19.1.0(react@19.1.0))(react@19.1.0))(@payloadcms/next@3.37.0(@types/react@19.1.8)(graphql@16.11.0)(monaco-editor@0.53.0)(next@15.4.4(@babel/core@7.28.4)(@opentelemetry/api@1.9.0)(@playwright/test@1.55.1)(babel-plugin-macros@3.1.0)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(sass@1.77.4))(payload@3.37.0(graphql@16.11.0)(typescript@5.7.3))(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(typescript@5.7.3))(@types/react@19.1.8)(monaco-editor@0.53.0)(next@15.4.4(@babel/core@7.28.4)(@opentelemetry/api@1.9.0)(@playwright/test@1.55.1)(babel-plugin-macros@3.1.0)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(sass@1.77.4))(payload@3.37.0(graphql@16.11.0)(typescript@5.7.3))(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(typescript@5.7.3)(yjs@13.6.27)':
+  '@payloadcms/richtext-lexical@3.54.0(@faceless-ui/modal@3.0.0-beta.2(react-dom@19.1.0(react@19.1.0))(react@19.1.0))(@faceless-ui/scroll-info@2.0.0(react-dom@19.1.0(react@19.1.0))(react@19.1.0))(@payloadcms/next@3.54.0(@types/react@19.1.8)(graphql@16.11.0)(monaco-editor@0.53.0)(next@15.4.4(@babel/core@7.28.4)(@opentelemetry/api@1.9.0)(@playwright/test@1.55.1)(babel-plugin-macros@3.1.0)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(sass@1.77.4))(payload@3.54.0(graphql@16.11.0)(typescript@5.7.3))(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(typescript@5.7.3))(@types/react@19.1.8)(monaco-editor@0.53.0)(next@15.4.4(@babel/core@7.28.4)(@opentelemetry/api@1.9.0)(@playwright/test@1.55.1)(babel-plugin-macros@3.1.0)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(sass@1.77.4))(payload@3.54.0(graphql@16.11.0)(typescript@5.7.3))(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(typescript@5.7.3)(yjs@13.6.27)':
     dependencies:
       '@faceless-ui/modal': 3.0.0-beta.2(react-dom@19.1.0(react@19.1.0))(react@19.1.0)
       '@faceless-ui/scroll-info': 2.0.0(react-dom@19.1.0(react@19.1.0))(react@19.1.0)
@@ -8536,12 +8296,13 @@ snapshots:
       '@lexical/selection': 0.28.0
       '@lexical/table': 0.28.0
       '@lexical/utils': 0.28.0
-      '@payloadcms/next': 3.37.0(@types/react@19.1.8)(graphql@16.11.0)(monaco-editor@0.53.0)(next@15.4.4(@babel/core@7.28.4)(@opentelemetry/api@1.9.0)(@playwright/test@1.55.1)(babel-plugin-macros@3.1.0)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(sass@1.77.4))(payload@3.37.0(graphql@16.11.0)(typescript@5.7.3))(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(typescript@5.7.3)
-      '@payloadcms/translations': 3.37.0
-      '@payloadcms/ui': 3.37.0(@types/react@19.1.8)(monaco-editor@0.53.0)(next@15.4.4(@babel/core@7.28.4)(@opentelemetry/api@1.9.0)(@playwright/test@1.55.1)(babel-plugin-macros@3.1.0)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(sass@1.77.4))(payload@3.37.0(graphql@16.11.0)(typescript@5.7.3))(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(typescript@5.7.3)
+      '@payloadcms/next': 3.54.0(@types/react@19.1.8)(graphql@16.11.0)(monaco-editor@0.53.0)(next@15.4.4(@babel/core@7.28.4)(@opentelemetry/api@1.9.0)(@playwright/test@1.55.1)(babel-plugin-macros@3.1.0)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(sass@1.77.4))(payload@3.54.0(graphql@16.11.0)(typescript@5.7.3))(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(typescript@5.7.3)
+      '@payloadcms/translations': 3.54.0
+      '@payloadcms/ui': 3.54.0(@types/react@19.1.8)(monaco-editor@0.53.0)(next@15.4.4(@babel/core@7.28.4)(@opentelemetry/api@1.9.0)(@playwright/test@1.55.1)(babel-plugin-macros@3.1.0)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(sass@1.77.4))(payload@3.54.0(graphql@16.11.0)(typescript@5.7.3))(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(typescript@5.7.3)
       '@types/uuid': 10.0.0
       acorn: 8.12.1
       bson-objectid: 2.0.4
+      csstype: 3.1.3
       dequal: 2.0.3
       escape-html: 1.0.3
       jsox: 1.2.121
@@ -8549,7 +8310,7 @@ snapshots:
       mdast-util-from-markdown: 2.0.2
       mdast-util-mdx-jsx: 3.1.3
       micromark-extension-mdx-jsx: 3.0.1
-      payload: 3.37.0(graphql@16.11.0)(typescript@5.7.3)
+      payload: 3.54.0(graphql@16.11.0)(typescript@5.7.3)
       qs-esm: 7.0.2
       react: 19.1.0
       react-dom: 19.1.0(react@19.1.0)
@@ -8564,27 +8325,28 @@ snapshots:
       - typescript
       - yjs
 
-  '@payloadcms/translations@3.37.0':
+  '@payloadcms/translations@3.54.0':
     dependencies:
       date-fns: 4.1.0
 
-  '@payloadcms/ui@3.37.0(@types/react@19.1.8)(monaco-editor@0.53.0)(next@15.4.4(@babel/core@7.28.4)(@opentelemetry/api@1.9.0)(@playwright/test@1.55.1)(babel-plugin-macros@3.1.0)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(sass@1.77.4))(payload@3.37.0(graphql@16.11.0)(typescript@5.7.3))(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(typescript@5.7.3)':
+  '@payloadcms/ui@3.54.0(@types/react@19.1.8)(monaco-editor@0.53.0)(next@15.4.4(@babel/core@7.28.4)(@opentelemetry/api@1.9.0)(@playwright/test@1.55.1)(babel-plugin-macros@3.1.0)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(sass@1.77.4))(payload@3.54.0(graphql@16.11.0)(typescript@5.7.3))(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(typescript@5.7.3)':
     dependencies:
       '@date-fns/tz': 1.2.0
       '@dnd-kit/core': 6.0.8(react-dom@19.1.0(react@19.1.0))(react@19.1.0)
       '@dnd-kit/sortable': 7.0.2(@dnd-kit/core@6.0.8(react-dom@19.1.0(react@19.1.0))(react@19.1.0))(react@19.1.0)
+      '@dnd-kit/utilities': 3.2.2(react@19.1.0)
       '@faceless-ui/modal': 3.0.0-beta.2(react-dom@19.1.0(react@19.1.0))(react@19.1.0)
       '@faceless-ui/scroll-info': 2.0.0(react-dom@19.1.0(react@19.1.0))(react@19.1.0)
       '@faceless-ui/window-info': 3.0.1(react-dom@19.1.0(react@19.1.0))(react@19.1.0)
       '@monaco-editor/react': 4.7.0(monaco-editor@0.53.0)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)
-      '@payloadcms/translations': 3.37.0
+      '@payloadcms/translations': 3.54.0
       bson-objectid: 2.0.4
       date-fns: 4.1.0
       dequal: 2.0.3
       md5: 2.3.0
       next: 15.4.4(@babel/core@7.28.4)(@opentelemetry/api@1.9.0)(@playwright/test@1.55.1)(babel-plugin-macros@3.1.0)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(sass@1.77.4)
       object-to-formdata: 4.5.1
-      payload: 3.37.0(graphql@16.11.0)(typescript@5.7.3)
+      payload: 3.54.0(graphql@16.11.0)(typescript@5.7.3)
       qs-esm: 7.0.2
       react: 19.1.0
       react-datepicker: 7.6.0(react-dom@19.1.0(react@19.1.0))(react@19.1.0)
@@ -9373,13 +9135,13 @@ snapshots:
       chai: 5.3.3
       tinyrainbow: 2.0.0
 
-  '@vitest/mocker@3.2.4(vite@7.1.7(@types/node@22.18.6)(jiti@2.6.1)(lightningcss@1.30.1)(sass@1.77.4)(tsx@4.19.2)(yaml@2.8.1))':
+  '@vitest/mocker@3.2.4(vite@7.1.7(@types/node@22.18.6)(jiti@2.6.1)(lightningcss@1.30.1)(sass@1.77.4)(tsx@4.20.3)(yaml@2.8.1))':
     dependencies:
       '@vitest/spy': 3.2.4
       estree-walker: 3.0.3
       magic-string: 0.30.19
     optionalDependencies:
-      vite: 7.1.7(@types/node@22.18.6)(jiti@2.6.1)(lightningcss@1.30.1)(sass@1.77.4)(tsx@4.19.2)(yaml@2.8.1)
+      vite: 7.1.7(@types/node@22.18.6)(jiti@2.6.1)(lightningcss@1.30.1)(sass@1.77.4)(tsx@4.20.3)(yaml@2.8.1)
 
   '@vitest/pretty-format@3.2.4':
     dependencies:
@@ -9847,8 +9609,6 @@ snapshots:
 
   buffer-from@1.1.2: {}
 
-  buffer-writer@2.0.0: {}
-
   buffer@5.7.1:
     dependencies:
       base64-js: 1.5.1
@@ -9960,8 +9720,6 @@ snapshots:
 
   cjs-module-lexer@2.1.0: {}
 
-  classnames@2.5.1: {}
-
   client-only@0.0.1: {}
 
   cliui@7.0.4:
@@ -10067,7 +9825,7 @@ snapshots:
       crc-32: 1.2.2
       readable-stream: 4.7.0
 
-  croner@9.0.0: {}
+  croner@9.1.0: {}
 
   cross-env@7.0.3:
     dependencies:
@@ -10184,8 +9942,6 @@ snapshots:
     dependencies:
       dequal: 2.0.3
 
-  diff@5.2.0: {}
-
   docker-compose@1.3.0:
     dependencies:
       yaml: 2.8.1
@@ -10226,32 +9982,28 @@ snapshots:
 
   dotenv@17.2.3: {}
 
-  drizzle-kit@0.28.0:
+  drizzle-kit@0.31.4:
     dependencies:
       '@drizzle-team/brocli': 0.10.2
       '@esbuild-kit/esm-loader': 2.6.5
-      esbuild: 0.19.12
-      esbuild-register: 3.6.0(esbuild@0.19.12)
+      esbuild: 0.25.10
+      esbuild-register: 3.6.0(esbuild@0.25.10)
     transitivePeerDependencies:
       - supports-color
 
-  drizzle-orm@0.36.1(@libsql/client@0.14.0)(@opentelemetry/api@1.9.0)(@types/pg@8.10.2)(@types/react@19.1.8)(pg@8.11.3)(react@19.1.0):
+  drizzle-orm@0.44.2(@libsql/client@0.14.0)(@opentelemetry/api@1.9.0)(@types/pg@8.10.2)(pg@8.16.3):
     optionalDependencies:
       '@libsql/client': 0.14.0
       '@opentelemetry/api': 1.9.0
       '@types/pg': 8.10.2
-      '@types/react': 19.1.8
-      pg: 8.11.3
-      react: 19.1.0
+      pg: 8.16.3
 
-  drizzle-orm@0.36.1(@libsql/client@0.14.0)(@opentelemetry/api@1.9.0)(@types/pg@8.15.5)(@types/react@19.1.8)(pg@8.16.3)(react@19.1.0):
+  drizzle-orm@0.44.2(@libsql/client@0.14.0)(@opentelemetry/api@1.9.0)(@types/pg@8.15.5)(pg@8.16.3):
     optionalDependencies:
       '@libsql/client': 0.14.0
       '@opentelemetry/api': 1.9.0
       '@types/pg': 8.15.5
-      '@types/react': 19.1.8
       pg: 8.16.3
-      react: 19.1.0
 
   dunder-proto@1.0.1:
     dependencies:
@@ -10385,10 +10137,10 @@ snapshots:
       is-date-object: 1.1.0
       is-symbol: 1.1.1
 
-  esbuild-register@3.6.0(esbuild@0.19.12):
+  esbuild-register@3.6.0(esbuild@0.25.10):
     dependencies:
       debug: 4.4.3
-      esbuild: 0.19.12
+      esbuild: 0.25.10
     transitivePeerDependencies:
       - supports-color
 
@@ -10417,32 +10169,6 @@ snapshots:
       '@esbuild/win32-ia32': 0.18.20
       '@esbuild/win32-x64': 0.18.20
 
-  esbuild@0.19.12:
-    optionalDependencies:
-      '@esbuild/aix-ppc64': 0.19.12
-      '@esbuild/android-arm': 0.19.12
-      '@esbuild/android-arm64': 0.19.12
-      '@esbuild/android-x64': 0.19.12
-      '@esbuild/darwin-arm64': 0.19.12
-      '@esbuild/darwin-x64': 0.19.12
-      '@esbuild/freebsd-arm64': 0.19.12
-      '@esbuild/freebsd-x64': 0.19.12
-      '@esbuild/linux-arm': 0.19.12
-      '@esbuild/linux-arm64': 0.19.12
-      '@esbuild/linux-ia32': 0.19.12
-      '@esbuild/linux-loong64': 0.19.12
-      '@esbuild/linux-mips64el': 0.19.12
-      '@esbuild/linux-ppc64': 0.19.12
-      '@esbuild/linux-riscv64': 0.19.12
-      '@esbuild/linux-s390x': 0.19.12
-      '@esbuild/linux-x64': 0.19.12
-      '@esbuild/netbsd-x64': 0.19.12
-      '@esbuild/openbsd-x64': 0.19.12
-      '@esbuild/sunos-x64': 0.19.12
-      '@esbuild/win32-arm64': 0.19.12
-      '@esbuild/win32-ia32': 0.19.12
-      '@esbuild/win32-x64': 0.19.12
-
   esbuild@0.23.1:
     optionalDependencies:
       '@esbuild/aix-ppc64': 0.23.1
@@ -10636,13 +10362,13 @@ snapshots:
       eslint: 9.14.0(jiti@2.6.1)
       requireindex: 1.2.0
 
-  eslint-plugin-jest@28.9.0(@typescript-eslint/eslint-plugin@8.44.1(@typescript-eslint/parser@8.44.1(eslint@9.36.0(jiti@2.6.1))(typescript@5.7.3))(eslint@9.36.0(jiti@2.6.1))(typescript@5.7.3))(eslint@9.14.0(jiti@2.6.1))(jest@30.2.0(@types/node@22.18.6)(babel-plugin-macros@3.1.0)(esbuild-register@3.6.0(esbuild@0.19.12)))(typescript@5.7.2):
+  eslint-plugin-jest@28.9.0(@typescript-eslint/eslint-plugin@8.44.1(@typescript-eslint/parser@8.44.1(eslint@9.36.0(jiti@2.6.1))(typescript@5.7.3))(eslint@9.36.0(jiti@2.6.1))(typescript@5.7.3))(eslint@9.14.0(jiti@2.6.1))(jest@30.2.0(@types/node@22.18.6)(babel-plugin-macros@3.1.0)(esbuild-register@3.6.0(esbuild@0.25.10)))(typescript@5.7.2):
     dependencies:
       '@typescript-eslint/utils': 8.44.1(eslint@9.14.0(jiti@2.6.1))(typescript@5.7.2)
       eslint: 9.14.0(jiti@2.6.1)
     optionalDependencies:
       '@typescript-eslint/eslint-plugin': 8.44.1(@typescript-eslint/parser@8.44.1(eslint@9.36.0(jiti@2.6.1))(typescript@5.7.3))(eslint@9.36.0(jiti@2.6.1))(typescript@5.7.3)
-      jest: 30.2.0(@types/node@22.18.6)(babel-plugin-macros@3.1.0)(esbuild-register@3.6.0(esbuild@0.19.12))
+      jest: 30.2.0(@types/node@22.18.6)(babel-plugin-macros@3.1.0)(esbuild-register@3.6.0(esbuild@0.25.10))
     transitivePeerDependencies:
       - supports-color
       - typescript
@@ -11380,6 +11106,8 @@ snapshots:
       hasown: 2.0.2
       side-channel: 1.1.0
 
+  ipaddr.js@2.2.0: {}
+
   is-alphabetical@2.0.1: {}
 
   is-alphanumerical@2.0.1:
@@ -11630,15 +11358,15 @@ snapshots:
       - babel-plugin-macros
       - supports-color
 
-  jest-cli@30.2.0(@types/node@22.18.6)(babel-plugin-macros@3.1.0)(esbuild-register@3.6.0(esbuild@0.19.12)):
+  jest-cli@30.2.0(@types/node@22.18.6)(babel-plugin-macros@3.1.0)(esbuild-register@3.6.0(esbuild@0.25.10)):
     dependencies:
-      '@jest/core': 30.2.0(babel-plugin-macros@3.1.0)(esbuild-register@3.6.0(esbuild@0.19.12))
+      '@jest/core': 30.2.0(babel-plugin-macros@3.1.0)(esbuild-register@3.6.0(esbuild@0.25.10))
       '@jest/test-result': 30.2.0
       '@jest/types': 30.2.0
       chalk: 4.1.2
       exit-x: 0.2.2
       import-local: 3.2.0
-      jest-config: 30.2.0(@types/node@22.18.6)(babel-plugin-macros@3.1.0)(esbuild-register@3.6.0(esbuild@0.19.12))
+      jest-config: 30.2.0(@types/node@22.18.6)(babel-plugin-macros@3.1.0)(esbuild-register@3.6.0(esbuild@0.25.10))
       jest-util: 30.2.0
       jest-validate: 30.2.0
       yargs: 17.7.2
@@ -11649,7 +11377,7 @@ snapshots:
       - supports-color
       - ts-node
 
-  jest-config@30.2.0(@types/node@22.18.6)(babel-plugin-macros@3.1.0)(esbuild-register@3.6.0(esbuild@0.19.12)):
+  jest-config@30.2.0(@types/node@22.18.6)(babel-plugin-macros@3.1.0)(esbuild-register@3.6.0(esbuild@0.25.10)):
     dependencies:
       '@babel/core': 7.28.4
       '@jest/get-type': 30.1.0
@@ -11677,7 +11405,7 @@ snapshots:
       strip-json-comments: 3.1.1
     optionalDependencies:
       '@types/node': 22.18.6
-      esbuild-register: 3.6.0(esbuild@0.19.12)
+      esbuild-register: 3.6.0(esbuild@0.25.10)
     transitivePeerDependencies:
       - babel-plugin-macros
       - supports-color
@@ -11897,12 +11625,12 @@ snapshots:
       merge-stream: 2.0.0
       supports-color: 8.1.1
 
-  jest@30.2.0(@types/node@22.18.6)(babel-plugin-macros@3.1.0)(esbuild-register@3.6.0(esbuild@0.19.12)):
+  jest@30.2.0(@types/node@22.18.6)(babel-plugin-macros@3.1.0)(esbuild-register@3.6.0(esbuild@0.25.10)):
     dependencies:
-      '@jest/core': 30.2.0(babel-plugin-macros@3.1.0)(esbuild-register@3.6.0(esbuild@0.19.12))
+      '@jest/core': 30.2.0(babel-plugin-macros@3.1.0)(esbuild-register@3.6.0(esbuild@0.25.10))
       '@jest/types': 30.2.0
       import-local: 3.2.0
-      jest-cli: 30.2.0(@types/node@22.18.6)(babel-plugin-macros@3.1.0)(esbuild-register@3.6.0(esbuild@0.19.12))
+      jest-cli: 30.2.0(@types/node@22.18.6)(babel-plugin-macros@3.1.0)(esbuild-register@3.6.0(esbuild@0.25.10))
     transitivePeerDependencies:
       - '@types/node'
       - babel-plugin-macros
@@ -12580,8 +12308,6 @@ snapshots:
 
   package-json-from-dist@1.0.1: {}
 
-  packet-reader@1.0.0: {}
-
   parent-module@1.0.1:
     dependencies:
       callsites: 3.1.0
@@ -12624,17 +12350,17 @@ snapshots:
 
   pathval@2.0.1: {}
 
-  payload@3.37.0(graphql@16.11.0)(typescript@5.7.3):
+  payload@3.54.0(graphql@16.11.0)(typescript@5.7.3):
     dependencies:
       '@next/env': 15.5.4
-      '@payloadcms/translations': 3.37.0
+      '@payloadcms/translations': 3.54.0
       '@types/busboy': 1.5.4
       ajv: 8.17.1
       bson-objectid: 2.0.4
       busboy: 1.6.0
       ci-info: 4.3.0
       console-table-printer: 2.12.1
-      croner: 9.0.0
+      croner: 9.1.0
       dataloader: 2.2.3
       deepmerge: 4.3.1
       file-type: 19.3.0
@@ -12642,6 +12368,7 @@ snapshots:
       graphql: 16.11.0
       http-status: 2.1.0
       image-size: 2.0.2
+      ipaddr.js: 2.2.0
       jose: 5.9.6
       json-schema-to-typescript: 15.0.3
       minimist: 1.2.8
@@ -12653,7 +12380,8 @@ snapshots:
       sanitize-filename: 1.6.3
       scmp: 2.1.0
       ts-essentials: 10.0.3(typescript@5.7.3)
-      tsx: 4.19.2
+      tsx: 4.20.3
+      undici: 7.10.0
       uuid: 10.0.0
       ws: 8.18.3
     transitivePeerDependencies:
@@ -12674,10 +12402,6 @@ snapshots:
 
   pg-numeric@1.0.2: {}
 
-  pg-pool@3.10.1(pg@8.11.3):
-    dependencies:
-      pg: 8.11.3
-
   pg-pool@3.10.1(pg@8.16.3):
     dependencies:
       pg: 8.16.3
@@ -12702,18 +12426,6 @@ snapshots:
       postgres-interval: 3.0.0
       postgres-range: 1.1.4
 
-  pg@8.11.3:
-    dependencies:
-      buffer-writer: 2.0.0
-      packet-reader: 1.0.0
-      pg-connection-string: 2.9.1
-      pg-pool: 3.10.1(pg@8.11.3)
-      pg-protocol: 1.10.3
-      pg-types: 2.2.0
-      pgpass: 1.0.5
-    optionalDependencies:
-      pg-cloudflare: 1.2.7
-
   pg@8.16.3:
     dependencies:
       pg-connection-string: 2.9.1
@@ -12913,19 +12625,6 @@ snapshots:
       react: 19.1.0
       react-dom: 19.1.0(react@19.1.0)
 
-  react-diff-viewer-continued@4.0.5(@types/react@19.1.8)(react-dom@19.1.0(react@19.1.0))(react@19.1.0):
-    dependencies:
-      '@emotion/css': 11.13.5
-      '@emotion/react': 11.14.0(@types/react@19.1.8)(react@19.1.0)
-      classnames: 2.5.1
-      diff: 5.2.0
-      memoize-one: 6.0.0
-      react: 19.1.0
-      react-dom: 19.1.0(react@19.1.0)
-    transitivePeerDependencies:
-      - '@types/react'
-      - supports-color
-
   react-dom@19.1.0(react@19.1.0):
     dependencies:
       react: 19.1.0
@@ -13737,7 +13436,14 @@ snapshots:
   tsx@4.19.2:
     dependencies:
       esbuild: 0.23.1
-      get-tsconfig: 4.8.1
+      get-tsconfig: 4.10.1
+    optionalDependencies:
+      fsevents: 2.3.3
+
+  tsx@4.20.3:
+    dependencies:
+      esbuild: 0.25.10
+      get-tsconfig: 4.10.1
     optionalDependencies:
       fsevents: 2.3.3
 
@@ -13817,6 +13523,8 @@ snapshots:
 
   undici-types@6.21.0: {}
 
+  undici@7.10.0: {}
+
   undici@7.16.0: {}
 
   unist-util-is@6.0.0:
@@ -13910,13 +13618,13 @@ snapshots:
       '@types/unist': 3.0.3
       unist-util-stringify-position: 4.0.0
 
-  vite-node@3.2.4(@types/node@22.18.6)(jiti@2.6.1)(lightningcss@1.30.1)(sass@1.77.4)(tsx@4.19.2)(yaml@2.8.1):
+  vite-node@3.2.4(@types/node@22.18.6)(jiti@2.6.1)(lightningcss@1.30.1)(sass@1.77.4)(tsx@4.20.3)(yaml@2.8.1):
     dependencies:
       cac: 6.7.14
       debug: 4.4.3
       es-module-lexer: 1.7.0
       pathe: 2.0.3
-      vite: 7.1.7(@types/node@22.18.6)(jiti@2.6.1)(lightningcss@1.30.1)(sass@1.77.4)(tsx@4.19.2)(yaml@2.8.1)
+      vite: 7.1.7(@types/node@22.18.6)(jiti@2.6.1)(lightningcss@1.30.1)(sass@1.77.4)(tsx@4.20.3)(yaml@2.8.1)
     transitivePeerDependencies:
       - '@types/node'
       - jiti
@@ -13931,18 +13639,18 @@ snapshots:
       - tsx
       - yaml
 
-  vite-tsconfig-paths@5.1.4(typescript@5.7.3)(vite@7.1.7(@types/node@22.18.6)(jiti@2.6.1)(lightningcss@1.30.1)(sass@1.77.4)(tsx@4.19.2)(yaml@2.8.1)):
+  vite-tsconfig-paths@5.1.4(typescript@5.7.3)(vite@7.1.7(@types/node@22.18.6)(jiti@2.6.1)(lightningcss@1.30.1)(sass@1.77.4)(tsx@4.20.3)(yaml@2.8.1)):
     dependencies:
       debug: 4.4.3
       globrex: 0.1.2
       tsconfck: 3.1.6(typescript@5.7.3)
     optionalDependencies:
-      vite: 7.1.7(@types/node@22.18.6)(jiti@2.6.1)(lightningcss@1.30.1)(sass@1.77.4)(tsx@4.19.2)(yaml@2.8.1)
+      vite: 7.1.7(@types/node@22.18.6)(jiti@2.6.1)(lightningcss@1.30.1)(sass@1.77.4)(tsx@4.20.3)(yaml@2.8.1)
     transitivePeerDependencies:
       - supports-color
       - typescript
 
-  vite@7.1.7(@types/node@22.18.6)(jiti@2.6.1)(lightningcss@1.30.1)(sass@1.77.4)(tsx@4.19.2)(yaml@2.8.1):
+  vite@7.1.7(@types/node@22.18.6)(jiti@2.6.1)(lightningcss@1.30.1)(sass@1.77.4)(tsx@4.20.3)(yaml@2.8.1):
     dependencies:
       esbuild: 0.25.10
       fdir: 6.5.0(picomatch@4.0.3)
@@ -13956,14 +13664,14 @@ snapshots:
       jiti: 2.6.1
       lightningcss: 1.30.1
       sass: 1.77.4
-      tsx: 4.19.2
+      tsx: 4.20.3
       yaml: 2.8.1
 
-  vitest@3.2.4(@types/debug@4.1.12)(@types/node@22.18.6)(jiti@2.6.1)(lightningcss@1.30.1)(sass@1.77.4)(tsx@4.19.2)(yaml@2.8.1):
+  vitest@3.2.4(@types/debug@4.1.12)(@types/node@22.18.6)(jiti@2.6.1)(lightningcss@1.30.1)(sass@1.77.4)(tsx@4.20.3)(yaml@2.8.1):
     dependencies:
       '@types/chai': 5.2.2
       '@vitest/expect': 3.2.4
-      '@vitest/mocker': 3.2.4(vite@7.1.7(@types/node@22.18.6)(jiti@2.6.1)(lightningcss@1.30.1)(sass@1.77.4)(tsx@4.19.2)(yaml@2.8.1))
+      '@vitest/mocker': 3.2.4(vite@7.1.7(@types/node@22.18.6)(jiti@2.6.1)(lightningcss@1.30.1)(sass@1.77.4)(tsx@4.20.3)(yaml@2.8.1))
       '@vitest/pretty-format': 3.2.4
       '@vitest/runner': 3.2.4
       '@vitest/snapshot': 3.2.4
@@ -13981,8 +13689,8 @@ snapshots:
       tinyglobby: 0.2.15
       tinypool: 1.1.1
       tinyrainbow: 2.0.0
-      vite: 7.1.7(@types/node@22.18.6)(jiti@2.6.1)(lightningcss@1.30.1)(sass@1.77.4)(tsx@4.19.2)(yaml@2.8.1)
-      vite-node: 3.2.4(@types/node@22.18.6)(jiti@2.6.1)(lightningcss@1.30.1)(sass@1.77.4)(tsx@4.19.2)(yaml@2.8.1)
+      vite: 7.1.7(@types/node@22.18.6)(jiti@2.6.1)(lightningcss@1.30.1)(sass@1.77.4)(tsx@4.20.3)(yaml@2.8.1)
+      vite-node: 3.2.4(@types/node@22.18.6)(jiti@2.6.1)(lightningcss@1.30.1)(sass@1.77.4)(tsx@4.20.3)(yaml@2.8.1)
       why-is-node-running: 2.3.0
     optionalDependencies:
       '@types/debug': 4.1.12

From 69363a92ef530e05a3643bb7cc5433ef315473f0 Mon Sep 17 00:00:00 2001
From: techiejd <62455039+techiejd@users.noreply.github.com>
Date: Sun, 21 Dec 2025 23:28:55 +0700
Subject: [PATCH 09/49] WIP

---
 dev/specs/bulkEmbed.spec.ts |  12 +-
 package.json                |  12 +-
 pnpm-lock.yaml              | 863 ++++++++++++++----------------------
 src/tasks/bulkEmbedAll.ts   |  32 +-
 src/types.ts                |   8 +-
 5 files changed, 355 insertions(+), 572 deletions(-)

diff --git a/dev/specs/bulkEmbed.spec.ts b/dev/specs/bulkEmbed.spec.ts
index 1730681..34b4c77 100644
--- a/dev/specs/bulkEmbed.spec.ts
+++ b/dev/specs/bulkEmbed.spec.ts
@@ -21,7 +21,8 @@ const DIMS = DEFAULT_DIMS
 describe('Bulk embed ingest mode with version/time gating', () => {
   let payload: Payload
   let config: SanitizedConfig
-  const dbName = 'bulk_embed_test'
+  const dbNameBase = 'bulk_embed_test'
+  let dbName: string
 
   const basePluginOptions = {
     knowledgePools: {
@@ -40,9 +41,7 @@ describe('Bulk embed ingest mode with version/time gating', () => {
     bulkQueueNames: BULK_QUEUE_NAMES,
   }
 
-  beforeAll(async () => {
-    await createTestDb({ dbName })
-  })
+  const makeDbName = () => `${dbNameBase}_${Date.now()}_${Math.random().toString(36).slice(2, 6)}`
 
   const buildPayload = async (
     pluginOpts = basePluginOptions,
@@ -56,6 +55,7 @@ describe('Bulk embed ingest mode with version/time gating', () => {
       pluginOpts,
       secret,
       dims: DIMS,
+      key: `${Date.now()}-${Math.random().toString(36).slice(2, 6)}`,
     })
     payload = built.payload
     config = built.config
@@ -63,6 +63,8 @@ describe('Bulk embed ingest mode with version/time gating', () => {
   }
 
   beforeEach(async () => {
+    dbName = makeDbName()
+    await createTestDb({ dbName })
     await buildPayload()
   })
 
@@ -98,7 +100,7 @@ describe('Bulk embed ingest mode with version/time gating', () => {
     await clearAllCollections(payload)
 
     // Use a fresh database for the toggle scenario to avoid residual runs
-    const tempDbName = `${dbName}_toggle_${Date.now()}`
+    const tempDbName = `${dbNameBase}_toggle_${Date.now()}`
     await createTestDb({ dbName: tempDbName })
 
     // Start without bulkEmbeddings configured
diff --git a/package.json b/package.json
index 1c38b60..0964bbd 100644
--- a/package.json
+++ b/package.json
@@ -44,12 +44,12 @@
   },
   "devDependencies": {
     "@eslint/eslintrc": "^3.2.0",
-    "@payloadcms/db-postgres": "3.54.0",
-    "@payloadcms/db-sqlite": "3.54.0",
+    "@payloadcms/db-postgres": "3.69.0",
+    "@payloadcms/db-sqlite": "3.69.0",
     "@payloadcms/eslint-config": "3.9.0",
-    "@payloadcms/next": "3.54.0",
-    "@payloadcms/richtext-lexical": "3.54.0",
-    "@payloadcms/ui": "3.54.0",
+    "@payloadcms/next": "3.69.0",
+    "@payloadcms/richtext-lexical": "3.69.0",
+    "@payloadcms/ui": "3.69.0",
     "@playwright/test": "^1.52.0",
     "@swc-node/register": "1.10.9",
     "@swc/cli": "0.6.0",
@@ -71,7 +71,7 @@
     "jest": "^30.2.0",
     "next": "15.4.4",
     "open": "^10.1.0",
-    "payload": "3.54.0",
+    "payload": "3.69.0",
     "pg": "^8.16.3",
     "postcss": "^8.5.6",
     "prettier": "^3.4.2",
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index 85e6f42..6e89703 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -16,23 +16,23 @@ importers:
         specifier: ^3.2.0
         version: 3.3.1
       '@payloadcms/db-postgres':
-        specifier: 3.54.0
-        version: 3.54.0(@libsql/client@0.14.0)(@opentelemetry/api@1.9.0)(payload@3.54.0(graphql@16.11.0)(typescript@5.7.3))
+        specifier: 3.69.0
+        version: 3.69.0(@libsql/client@0.14.0)(@opentelemetry/api@1.9.0)(payload@3.69.0(graphql@16.11.0)(typescript@5.7.3))
       '@payloadcms/db-sqlite':
-        specifier: 3.54.0
-        version: 3.54.0(@opentelemetry/api@1.9.0)(@types/pg@8.15.5)(payload@3.54.0(graphql@16.11.0)(typescript@5.7.3))(pg@8.16.3)
+        specifier: 3.69.0
+        version: 3.69.0(@opentelemetry/api@1.9.0)(@types/pg@8.15.5)(payload@3.69.0(graphql@16.11.0)(typescript@5.7.3))(pg@8.16.3)
       '@payloadcms/eslint-config':
         specifier: 3.9.0
         version: 3.9.0(@typescript-eslint/eslint-plugin@8.44.1(@typescript-eslint/parser@8.44.1(eslint@9.36.0(jiti@2.6.1))(typescript@5.7.3))(eslint@9.36.0(jiti@2.6.1))(typescript@5.7.3))(jest@30.2.0(@types/node@22.18.6)(babel-plugin-macros@3.1.0)(esbuild-register@3.6.0(esbuild@0.25.10)))(jiti@2.6.1)
       '@payloadcms/next':
-        specifier: 3.54.0
-        version: 3.54.0(@types/react@19.1.8)(graphql@16.11.0)(monaco-editor@0.53.0)(next@15.4.4(@babel/core@7.28.4)(@opentelemetry/api@1.9.0)(@playwright/test@1.55.1)(babel-plugin-macros@3.1.0)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(sass@1.77.4))(payload@3.54.0(graphql@16.11.0)(typescript@5.7.3))(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(typescript@5.7.3)
+        specifier: 3.69.0
+        version: 3.69.0(@types/react@19.1.8)(graphql@16.11.0)(monaco-editor@0.53.0)(next@15.4.4(@babel/core@7.28.4)(@opentelemetry/api@1.9.0)(@playwright/test@1.55.1)(babel-plugin-macros@3.1.0)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(sass@1.77.4))(payload@3.69.0(graphql@16.11.0)(typescript@5.7.3))(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(typescript@5.7.3)
       '@payloadcms/richtext-lexical':
-        specifier: 3.54.0
-        version: 3.54.0(@faceless-ui/modal@3.0.0-beta.2(react-dom@19.1.0(react@19.1.0))(react@19.1.0))(@faceless-ui/scroll-info@2.0.0(react-dom@19.1.0(react@19.1.0))(react@19.1.0))(@payloadcms/next@3.54.0(@types/react@19.1.8)(graphql@16.11.0)(monaco-editor@0.53.0)(next@15.4.4(@babel/core@7.28.4)(@opentelemetry/api@1.9.0)(@playwright/test@1.55.1)(babel-plugin-macros@3.1.0)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(sass@1.77.4))(payload@3.54.0(graphql@16.11.0)(typescript@5.7.3))(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(typescript@5.7.3))(@types/react@19.1.8)(monaco-editor@0.53.0)(next@15.4.4(@babel/core@7.28.4)(@opentelemetry/api@1.9.0)(@playwright/test@1.55.1)(babel-plugin-macros@3.1.0)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(sass@1.77.4))(payload@3.54.0(graphql@16.11.0)(typescript@5.7.3))(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(typescript@5.7.3)(yjs@13.6.27)
+        specifier: 3.69.0
+        version: 3.69.0(@faceless-ui/modal@3.0.0(react-dom@19.1.0(react@19.1.0))(react@19.1.0))(@faceless-ui/scroll-info@2.0.0(react-dom@19.1.0(react@19.1.0))(react@19.1.0))(@payloadcms/next@3.69.0(@types/react@19.1.8)(graphql@16.11.0)(monaco-editor@0.53.0)(next@15.4.4(@babel/core@7.28.4)(@opentelemetry/api@1.9.0)(@playwright/test@1.55.1)(babel-plugin-macros@3.1.0)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(sass@1.77.4))(payload@3.69.0(graphql@16.11.0)(typescript@5.7.3))(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(typescript@5.7.3))(@types/react@19.1.8)(monaco-editor@0.53.0)(next@15.4.4(@babel/core@7.28.4)(@opentelemetry/api@1.9.0)(@playwright/test@1.55.1)(babel-plugin-macros@3.1.0)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(sass@1.77.4))(payload@3.69.0(graphql@16.11.0)(typescript@5.7.3))(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(typescript@5.7.3)(yjs@13.6.27)
       '@payloadcms/ui':
-        specifier: 3.54.0
-        version: 3.54.0(@types/react@19.1.8)(monaco-editor@0.53.0)(next@15.4.4(@babel/core@7.28.4)(@opentelemetry/api@1.9.0)(@playwright/test@1.55.1)(babel-plugin-macros@3.1.0)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(sass@1.77.4))(payload@3.54.0(graphql@16.11.0)(typescript@5.7.3))(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(typescript@5.7.3)
+        specifier: 3.69.0
+        version: 3.69.0(@types/react@19.1.8)(monaco-editor@0.53.0)(next@15.4.4(@babel/core@7.28.4)(@opentelemetry/api@1.9.0)(@playwright/test@1.55.1)(babel-plugin-macros@3.1.0)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(sass@1.77.4))(payload@3.69.0(graphql@16.11.0)(typescript@5.7.3))(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(typescript@5.7.3)
       '@playwright/test':
         specifier: ^1.52.0
         version: 1.55.1
@@ -97,8 +97,8 @@ importers:
         specifier: ^10.1.0
         version: 10.2.0
       payload:
-        specifier: 3.54.0
-        version: 3.54.0(graphql@16.11.0)(typescript@5.7.3)
+        specifier: 3.69.0
+        version: 3.69.0(graphql@16.11.0)(typescript@5.7.3)
       pg:
         specifier: ^8.16.3
         version: 8.16.3
@@ -134,10 +134,10 @@ importers:
         version: 5.7.3
       vite-tsconfig-paths:
         specifier: ^5.1.4
-        version: 5.1.4(typescript@5.7.3)(vite@7.1.7(@types/node@22.18.6)(jiti@2.6.1)(lightningcss@1.30.1)(sass@1.77.4)(tsx@4.20.3)(yaml@2.8.1))
+        version: 5.1.4(typescript@5.7.3)(vite@7.1.7(@types/node@22.18.6)(jiti@2.6.1)(lightningcss@1.30.1)(sass@1.77.4)(tsx@4.20.6)(yaml@2.8.1))
       vitest:
         specifier: ^3.1.2
-        version: 3.2.4(@types/debug@4.1.12)(@types/node@22.18.6)(jiti@2.6.1)(lightningcss@1.30.1)(sass@1.77.4)(tsx@4.20.3)(yaml@2.8.1)
+        version: 3.2.4(@types/debug@4.1.12)(@types/node@22.18.6)(jiti@2.6.1)(lightningcss@1.30.1)(sass@1.77.4)(tsx@4.20.6)(yaml@2.8.1)
       voyage-ai-provider:
         specifier: ^2.0.0
         version: 2.0.0(zod@4.1.12)
@@ -360,6 +360,12 @@ packages:
       react: '>=16.8.0'
       react-dom: '>=16.8.0'
 
+  '@dnd-kit/modifiers@9.0.0':
+    resolution: {integrity: sha512-ybiLc66qRGuZoC20wdSSG6pDXFikui/dCNGthxv4Ndy8ylErY0N3KVxY2bgo7AWwIbxDmXDg3ylAFmnrjcbVvw==}
+    peerDependencies:
+      '@dnd-kit/core': ^6.3.0
+      react: '>=16.8.0'
+
   '@dnd-kit/sortable@7.0.2':
     resolution: {integrity: sha512-wDkBHHf9iCi1veM834Gbk1429bd4lHX4RpAwT0y2cHLf246GAvU2sVw/oxWNpPKQNQRQaeGXhAVgrOl1IT+iyA==}
     peerDependencies:
@@ -432,12 +438,6 @@ packages:
     resolution: {integrity: sha512-FxEMIkJKnodyA1OaCUoEvbYRkoZlLZ4d/eXFu9Fh8CbBBgP5EmZxrfTRyN0qpXZ4vOvqnE5YdRdcrmUUXuU+dA==}
     deprecated: 'Merged into tsx: https://tsx.is'
 
-  '@esbuild/aix-ppc64@0.23.1':
-    resolution: {integrity: sha512-6VhYk1diRqrhBAqpJEdjASR/+WVRtfjpqKuNw11cLiaWpAT/Uu+nokB+UJnevzy/P9C/ty6AOe0dwueMrGh/iQ==}
-    engines: {node: '>=18'}
-    cpu: [ppc64]
-    os: [aix]
-
   '@esbuild/aix-ppc64@0.25.10':
     resolution: {integrity: sha512-0NFWnA+7l41irNuaSVlLfgNT12caWJVLzp5eAVhZ0z1qpxbockccEt3s+149rE64VUI3Ml2zt8Nv5JVc4QXTsw==}
     engines: {node: '>=18'}
@@ -450,12 +450,6 @@ packages:
     cpu: [arm64]
     os: [android]
 
-  '@esbuild/android-arm64@0.23.1':
-    resolution: {integrity: sha512-xw50ipykXcLstLeWH7WRdQuysJqejuAGPd30vd1i5zSyKK3WE+ijzHmLKxdiCMtH1pHz78rOg0BKSYOSB/2Khw==}
-    engines: {node: '>=18'}
-    cpu: [arm64]
-    os: [android]
-
   '@esbuild/android-arm64@0.25.10':
     resolution: {integrity: sha512-LSQa7eDahypv/VO6WKohZGPSJDq5OVOo3UoFR1E4t4Gj1W7zEQMUhI+lo81H+DtB+kP+tDgBp+M4oNCwp6kffg==}
     engines: {node: '>=18'}
@@ -468,12 +462,6 @@ packages:
     cpu: [arm]
     os: [android]
 
-  '@esbuild/android-arm@0.23.1':
-    resolution: {integrity: sha512-uz6/tEy2IFm9RYOyvKl88zdzZfwEfKZmnX9Cj1BHjeSGNuGLuMD1kR8y5bteYmwqKm1tj8m4cb/aKEorr6fHWQ==}
-    engines: {node: '>=18'}
-    cpu: [arm]
-    os: [android]
-
   '@esbuild/android-arm@0.25.10':
     resolution: {integrity: sha512-dQAxF1dW1C3zpeCDc5KqIYuZ1tgAdRXNoZP7vkBIRtKZPYe2xVr/d3SkirklCHudW1B45tGiUlz2pUWDfbDD4w==}
     engines: {node: '>=18'}
@@ -486,12 +474,6 @@ packages:
     cpu: [x64]
     os: [android]
 
-  '@esbuild/android-x64@0.23.1':
-    resolution: {integrity: sha512-nlN9B69St9BwUoB+jkyU090bru8L0NA3yFvAd7k8dNsVH8bi9a8cUAUSEcEEgTp2z3dbEDGJGfP6VUnkQnlReg==}
-    engines: {node: '>=18'}
-    cpu: [x64]
-    os: [android]
-
   '@esbuild/android-x64@0.25.10':
     resolution: {integrity: sha512-MiC9CWdPrfhibcXwr39p9ha1x0lZJ9KaVfvzA0Wxwz9ETX4v5CHfF09bx935nHlhi+MxhA63dKRRQLiVgSUtEg==}
     engines: {node: '>=18'}
@@ -504,12 +486,6 @@ packages:
     cpu: [arm64]
     os: [darwin]
 
-  '@esbuild/darwin-arm64@0.23.1':
-    resolution: {integrity: sha512-YsS2e3Wtgnw7Wq53XXBLcV6JhRsEq8hkfg91ESVadIrzr9wO6jJDMZnCQbHm1Guc5t/CdDiFSSfWP58FNuvT3Q==}
-    engines: {node: '>=18'}
-    cpu: [arm64]
-    os: [darwin]
-
   '@esbuild/darwin-arm64@0.25.10':
     resolution: {integrity: sha512-JC74bdXcQEpW9KkV326WpZZjLguSZ3DfS8wrrvPMHgQOIEIG/sPXEN/V8IssoJhbefLRcRqw6RQH2NnpdprtMA==}
     engines: {node: '>=18'}
@@ -522,12 +498,6 @@ packages:
     cpu: [x64]
     os: [darwin]
 
-  '@esbuild/darwin-x64@0.23.1':
-    resolution: {integrity: sha512-aClqdgTDVPSEGgoCS8QDG37Gu8yc9lTHNAQlsztQ6ENetKEO//b8y31MMu2ZaPbn4kVsIABzVLXYLhCGekGDqw==}
-    engines: {node: '>=18'}
-    cpu: [x64]
-    os: [darwin]
-
   '@esbuild/darwin-x64@0.25.10':
     resolution: {integrity: sha512-tguWg1olF6DGqzws97pKZ8G2L7Ig1vjDmGTwcTuYHbuU6TTjJe5FXbgs5C1BBzHbJ2bo1m3WkQDbWO2PvamRcg==}
     engines: {node: '>=18'}
@@ -540,12 +510,6 @@ packages:
     cpu: [arm64]
     os: [freebsd]
 
-  '@esbuild/freebsd-arm64@0.23.1':
-    resolution: {integrity: sha512-h1k6yS8/pN/NHlMl5+v4XPfikhJulk4G+tKGFIOwURBSFzE8bixw1ebjluLOjfwtLqY0kewfjLSrO6tN2MgIhA==}
-    engines: {node: '>=18'}
-    cpu: [arm64]
-    os: [freebsd]
-
   '@esbuild/freebsd-arm64@0.25.10':
     resolution: {integrity: sha512-3ZioSQSg1HT2N05YxeJWYR+Libe3bREVSdWhEEgExWaDtyFbbXWb49QgPvFH8u03vUPX10JhJPcz7s9t9+boWg==}
     engines: {node: '>=18'}
@@ -558,12 +522,6 @@ packages:
     cpu: [x64]
     os: [freebsd]
 
-  '@esbuild/freebsd-x64@0.23.1':
-    resolution: {integrity: sha512-lK1eJeyk1ZX8UklqFd/3A60UuZ/6UVfGT2LuGo3Wp4/z7eRTRYY+0xOu2kpClP+vMTi9wKOfXi2vjUpO1Ro76g==}
-    engines: {node: '>=18'}
-    cpu: [x64]
-    os: [freebsd]
-
   '@esbuild/freebsd-x64@0.25.10':
     resolution: {integrity: sha512-LLgJfHJk014Aa4anGDbh8bmI5Lk+QidDmGzuC2D+vP7mv/GeSN+H39zOf7pN5N8p059FcOfs2bVlrRr4SK9WxA==}
     engines: {node: '>=18'}
@@ -576,12 +534,6 @@ packages:
     cpu: [arm64]
     os: [linux]
 
-  '@esbuild/linux-arm64@0.23.1':
-    resolution: {integrity: sha512-/93bf2yxencYDnItMYV/v116zff6UyTjo4EtEQjUBeGiVpMmffDNUyD9UN2zV+V3LRV3/on4xdZ26NKzn6754g==}
-    engines: {node: '>=18'}
-    cpu: [arm64]
-    os: [linux]
-
   '@esbuild/linux-arm64@0.25.10':
     resolution: {integrity: sha512-5luJWN6YKBsawd5f9i4+c+geYiVEw20FVW5x0v1kEMWNq8UctFjDiMATBxLvmmHA4bf7F6hTRaJgtghFr9iziQ==}
     engines: {node: '>=18'}
@@ -594,12 +546,6 @@ packages:
     cpu: [arm]
     os: [linux]
 
-  '@esbuild/linux-arm@0.23.1':
-    resolution: {integrity: sha512-CXXkzgn+dXAPs3WBwE+Kvnrf4WECwBdfjfeYHpMeVxWE0EceB6vhWGShs6wi0IYEqMSIzdOF1XjQ/Mkm5d7ZdQ==}
-    engines: {node: '>=18'}
-    cpu: [arm]
-    os: [linux]
-
   '@esbuild/linux-arm@0.25.10':
     resolution: {integrity: sha512-oR31GtBTFYCqEBALI9r6WxoU/ZofZl962pouZRTEYECvNF/dtXKku8YXcJkhgK/beU+zedXfIzHijSRapJY3vg==}
     engines: {node: '>=18'}
@@ -612,12 +558,6 @@ packages:
     cpu: [ia32]
     os: [linux]
 
-  '@esbuild/linux-ia32@0.23.1':
-    resolution: {integrity: sha512-VTN4EuOHwXEkXzX5nTvVY4s7E/Krz7COC8xkftbbKRYAl96vPiUssGkeMELQMOnLOJ8k3BY1+ZY52tttZnHcXQ==}
-    engines: {node: '>=18'}
-    cpu: [ia32]
-    os: [linux]
-
   '@esbuild/linux-ia32@0.25.10':
     resolution: {integrity: sha512-NrSCx2Kim3EnnWgS4Txn0QGt0Xipoumb6z6sUtl5bOEZIVKhzfyp/Lyw4C1DIYvzeW/5mWYPBFJU3a/8Yr75DQ==}
     engines: {node: '>=18'}
@@ -630,12 +570,6 @@ packages:
     cpu: [loong64]
     os: [linux]
 
-  '@esbuild/linux-loong64@0.23.1':
-    resolution: {integrity: sha512-Vx09LzEoBa5zDnieH8LSMRToj7ir/Jeq0Gu6qJ/1GcBq9GkfoEAoXvLiW1U9J1qE/Y/Oyaq33w5p2ZWrNNHNEw==}
-    engines: {node: '>=18'}
-    cpu: [loong64]
-    os: [linux]
-
   '@esbuild/linux-loong64@0.25.10':
     resolution: {integrity: sha512-xoSphrd4AZda8+rUDDfD9J6FUMjrkTz8itpTITM4/xgerAZZcFW7Dv+sun7333IfKxGG8gAq+3NbfEMJfiY+Eg==}
     engines: {node: '>=18'}
@@ -648,12 +582,6 @@ packages:
     cpu: [mips64el]
     os: [linux]
 
-  '@esbuild/linux-mips64el@0.23.1':
-    resolution: {integrity: sha512-nrFzzMQ7W4WRLNUOU5dlWAqa6yVeI0P78WKGUo7lg2HShq/yx+UYkeNSE0SSfSure0SqgnsxPvmAUu/vu0E+3Q==}
-    engines: {node: '>=18'}
-    cpu: [mips64el]
-    os: [linux]
-
   '@esbuild/linux-mips64el@0.25.10':
     resolution: {integrity: sha512-ab6eiuCwoMmYDyTnyptoKkVS3k8fy/1Uvq7Dj5czXI6DF2GqD2ToInBI0SHOp5/X1BdZ26RKc5+qjQNGRBelRA==}
     engines: {node: '>=18'}
@@ -666,12 +594,6 @@ packages:
     cpu: [ppc64]
     os: [linux]
 
-  '@esbuild/linux-ppc64@0.23.1':
-    resolution: {integrity: sha512-dKN8fgVqd0vUIjxuJI6P/9SSSe/mB9rvA98CSH2sJnlZ/OCZWO1DJvxj8jvKTfYUdGfcq2dDxoKaC6bHuTlgcw==}
-    engines: {node: '>=18'}
-    cpu: [ppc64]
-    os: [linux]
-
   '@esbuild/linux-ppc64@0.25.10':
     resolution: {integrity: sha512-NLinzzOgZQsGpsTkEbdJTCanwA5/wozN9dSgEl12haXJBzMTpssebuXR42bthOF3z7zXFWH1AmvWunUCkBE4EA==}
     engines: {node: '>=18'}
@@ -684,12 +606,6 @@ packages:
     cpu: [riscv64]
     os: [linux]
 
-  '@esbuild/linux-riscv64@0.23.1':
-    resolution: {integrity: sha512-5AV4Pzp80fhHL83JM6LoA6pTQVWgB1HovMBsLQ9OZWLDqVY8MVobBXNSmAJi//Csh6tcY7e7Lny2Hg1tElMjIA==}
-    engines: {node: '>=18'}
-    cpu: [riscv64]
-    os: [linux]
-
   '@esbuild/linux-riscv64@0.25.10':
     resolution: {integrity: sha512-FE557XdZDrtX8NMIeA8LBJX3dC2M8VGXwfrQWU7LB5SLOajfJIxmSdyL/gU1m64Zs9CBKvm4UAuBp5aJ8OgnrA==}
     engines: {node: '>=18'}
@@ -702,12 +618,6 @@ packages:
     cpu: [s390x]
     os: [linux]
 
-  '@esbuild/linux-s390x@0.23.1':
-    resolution: {integrity: sha512-9ygs73tuFCe6f6m/Tb+9LtYxWR4c9yg7zjt2cYkjDbDpV/xVn+68cQxMXCjUpYwEkze2RcU/rMnfIXNRFmSoDw==}
-    engines: {node: '>=18'}
-    cpu: [s390x]
-    os: [linux]
-
   '@esbuild/linux-s390x@0.25.10':
     resolution: {integrity: sha512-3BBSbgzuB9ajLoVZk0mGu+EHlBwkusRmeNYdqmznmMc9zGASFjSsxgkNsqmXugpPk00gJ0JNKh/97nxmjctdew==}
     engines: {node: '>=18'}
@@ -720,12 +630,6 @@ packages:
     cpu: [x64]
     os: [linux]
 
-  '@esbuild/linux-x64@0.23.1':
-    resolution: {integrity: sha512-EV6+ovTsEXCPAp58g2dD68LxoP/wK5pRvgy0J/HxPGB009omFPv3Yet0HiaqvrIrgPTBuC6wCH1LTOY91EO5hQ==}
-    engines: {node: '>=18'}
-    cpu: [x64]
-    os: [linux]
-
   '@esbuild/linux-x64@0.25.10':
     resolution: {integrity: sha512-QSX81KhFoZGwenVyPoberggdW1nrQZSvfVDAIUXr3WqLRZGZqWk/P4T8p2SP+de2Sr5HPcvjhcJzEiulKgnxtA==}
     engines: {node: '>=18'}
@@ -744,24 +648,12 @@ packages:
     cpu: [x64]
     os: [netbsd]
 
-  '@esbuild/netbsd-x64@0.23.1':
-    resolution: {integrity: sha512-aevEkCNu7KlPRpYLjwmdcuNz6bDFiE7Z8XC4CPqExjTvrHugh28QzUXVOZtiYghciKUacNktqxdpymplil1beA==}
-    engines: {node: '>=18'}
-    cpu: [x64]
-    os: [netbsd]
-
   '@esbuild/netbsd-x64@0.25.10':
     resolution: {integrity: sha512-7RTytDPGU6fek/hWuN9qQpeGPBZFfB4zZgcz2VK2Z5VpdUxEI8JKYsg3JfO0n/Z1E/6l05n0unDCNc4HnhQGig==}
     engines: {node: '>=18'}
     cpu: [x64]
     os: [netbsd]
 
-  '@esbuild/openbsd-arm64@0.23.1':
-    resolution: {integrity: sha512-3x37szhLexNA4bXhLrCC/LImN/YtWis6WXr1VESlfVtVeoFJBRINPJ3f0a/6LV8zpikqoUg4hyXw0sFBt5Cr+Q==}
-    engines: {node: '>=18'}
-    cpu: [arm64]
-    os: [openbsd]
-
   '@esbuild/openbsd-arm64@0.25.10':
     resolution: {integrity: sha512-5Se0VM9Wtq797YFn+dLimf2Zx6McttsH2olUBsDml+lm0GOCRVebRWUvDtkY4BWYv/3NgzS8b/UM3jQNh5hYyw==}
     engines: {node: '>=18'}
@@ -774,12 +666,6 @@ packages:
     cpu: [x64]
     os: [openbsd]
 
-  '@esbuild/openbsd-x64@0.23.1':
-    resolution: {integrity: sha512-aY2gMmKmPhxfU+0EdnN+XNtGbjfQgwZj43k8G3fyrDM/UdZww6xrWxmDkuz2eCZchqVeABjV5BpildOrUbBTqA==}
-    engines: {node: '>=18'}
-    cpu: [x64]
-    os: [openbsd]
-
   '@esbuild/openbsd-x64@0.25.10':
     resolution: {integrity: sha512-XkA4frq1TLj4bEMB+2HnI0+4RnjbuGZfet2gs/LNs5Hc7D89ZQBHQ0gL2ND6Lzu1+QVkjp3x1gIcPKzRNP8bXw==}
     engines: {node: '>=18'}
@@ -798,12 +684,6 @@ packages:
     cpu: [x64]
     os: [sunos]
 
-  '@esbuild/sunos-x64@0.23.1':
-    resolution: {integrity: sha512-RBRT2gqEl0IKQABT4XTj78tpk9v7ehp+mazn2HbUeZl1YMdaGAQqhapjGTCe7uw7y0frDi4gS0uHzhvpFuI1sA==}
-    engines: {node: '>=18'}
-    cpu: [x64]
-    os: [sunos]
-
   '@esbuild/sunos-x64@0.25.10':
     resolution: {integrity: sha512-fswk3XT0Uf2pGJmOpDB7yknqhVkJQkAQOcW/ccVOtfx05LkbWOaRAtn5SaqXypeKQra1QaEa841PgrSL9ubSPQ==}
     engines: {node: '>=18'}
@@ -816,12 +696,6 @@ packages:
     cpu: [arm64]
     os: [win32]
 
-  '@esbuild/win32-arm64@0.23.1':
-    resolution: {integrity: sha512-4O+gPR5rEBe2FpKOVyiJ7wNDPA8nGzDuJ6gN4okSA1gEOYZ67N8JPk58tkWtdtPeLz7lBnY6I5L3jdsr3S+A6A==}
-    engines: {node: '>=18'}
-    cpu: [arm64]
-    os: [win32]
-
   '@esbuild/win32-arm64@0.25.10':
     resolution: {integrity: sha512-ah+9b59KDTSfpaCg6VdJoOQvKjI33nTaQr4UluQwW7aEwZQsbMCfTmfEO4VyewOxx4RaDT/xCy9ra2GPWmO7Kw==}
     engines: {node: '>=18'}
@@ -834,12 +708,6 @@ packages:
     cpu: [ia32]
     os: [win32]
 
-  '@esbuild/win32-ia32@0.23.1':
-    resolution: {integrity: sha512-BcaL0Vn6QwCwre3Y717nVHZbAa4UBEigzFm6VdsVdT/MbZ38xoj1X9HPkZhbmaBGUD1W8vxAfffbDe8bA6AKnQ==}
-    engines: {node: '>=18'}
-    cpu: [ia32]
-    os: [win32]
-
   '@esbuild/win32-ia32@0.25.10':
     resolution: {integrity: sha512-QHPDbKkrGO8/cz9LKVnJU22HOi4pxZnZhhA2HYHez5Pz4JeffhDjf85E57Oyco163GnzNCVkZK0b/n4Y0UHcSw==}
     engines: {node: '>=18'}
@@ -852,12 +720,6 @@ packages:
     cpu: [x64]
     os: [win32]
 
-  '@esbuild/win32-x64@0.23.1':
-    resolution: {integrity: sha512-BHpFFeslkWrXWyUPnbKm+xYYVYruCinGcftSBaa8zoF9hZO4BcSCFUvHVTtzpIY6YzUnYtuEhZ+C9iEXjxnasg==}
-    engines: {node: '>=18'}
-    cpu: [x64]
-    os: [win32]
-
   '@esbuild/win32-x64@0.25.10':
     resolution: {integrity: sha512-9KpxSVFCu0iK1owoez6aC/s/EdUQLDN3adTxGCqxMVhrPDj6bt5dbrHDXUuq+Bs2vATFBBrQS5vdQ/Ed2P+nbw==}
     engines: {node: '>=18'}
@@ -954,11 +816,11 @@ packages:
     resolution: {integrity: sha512-Z5kJ+wU3oA7MMIqVR9tyZRtjYPr4OC004Q4Rw7pgOKUOKkJfZ3O24nz3WYfGRpMDNmcOi3TwQOmgm7B7Tpii0w==}
     engines: {node: ^18.18.0 || ^20.9.0 || >=21.1.0}
 
-  '@faceless-ui/modal@3.0.0-beta.2':
-    resolution: {integrity: sha512-UmXvz7Iw3KMO4Pm3llZczU4uc5pPQDb6rdqwoBvYDFgWvkraOAHKx0HxSZgwqQvqOhn8joEFBfFp6/Do2562ow==}
+  '@faceless-ui/modal@3.0.0':
+    resolution: {integrity: sha512-o3oEFsot99EQ8RJc1kL3s/nNMHX+y+WMXVzSSmca9L0l2MR6ez2QM1z1yIelJX93jqkLXQ9tW+R9tmsYa+O4Qg==}
     peerDependencies:
-      react: ^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0-rc.0
-      react-dom: ^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0-rc.0
+      react: ^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0
+      react-dom: ^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0
 
   '@faceless-ui/scroll-info@2.0.0':
     resolution: {integrity: sha512-BkyJ9OQ4bzpKjE3UhI8BhcG36ZgfB4run8TmlaR4oMFUbl59dfyarNfjveyimrxIso9RhFEja/AJ5nQmbcR9hw==}
@@ -1385,77 +1247,77 @@ packages:
   '@jsdevtools/ono@7.1.3':
     resolution: {integrity: sha512-4JQNk+3mVzK3xh2rqd6RB4J46qUR19azEHBneZyTZM+c456qOrbbM/5xcR8huNCCcbVt7+UmizG6GuUvPvKUYg==}
 
-  '@lexical/clipboard@0.28.0':
-    resolution: {integrity: sha512-LYqion+kAwFQJStA37JAEMxTL/m1WlZbotDfM/2WuONmlO0yWxiyRDI18oeCwhBD6LQQd9c3Ccxp9HFwUG1AVw==}
+  '@lexical/clipboard@0.35.0':
+    resolution: {integrity: sha512-ko7xSIIiayvDiqjNDX6fgH9RlcM6r9vrrvJYTcfGVBor5httx16lhIi0QJZ4+RNPvGtTjyFv4bwRmsixRRwImg==}
 
-  '@lexical/code@0.28.0':
-    resolution: {integrity: sha512-9LOKSWdRhxqAKRq5yveNC21XKtW4h2rmFNTucwMWZ9vLu9xteOHEwZdO1Qv82PFUmgCpAhg6EntmnZu9xD3K7Q==}
+  '@lexical/code@0.35.0':
+    resolution: {integrity: sha512-ox4DZwETQ9IA7+DS6PN8RJNwSAF7RMjL7YTVODIqFZ5tUFIf+5xoCHbz7Fll0Bvixlp12hVH90xnLwTLRGpkKw==}
 
-  '@lexical/devtools-core@0.28.0':
-    resolution: {integrity: sha512-Fk4itAjZ+MqTYXN84aE5RDf+wQX67N5nyo3JVxQTFZGAghx7Ux1xLWHB25zzD0YfjMtJ0NQROAbE3xdecZzxcQ==}
+  '@lexical/devtools-core@0.35.0':
+    resolution: {integrity: sha512-C2wwtsMCR6ZTfO0TqpSM17RLJWyfHmifAfCTjFtOJu15p3M6NO/nHYK5Mt7YMQteuS89mOjB4ng8iwoLEZ6QpQ==}
     peerDependencies:
       react: '>=17.x'
       react-dom: '>=17.x'
 
-  '@lexical/dragon@0.28.0':
-    resolution: {integrity: sha512-T6T8YaHnhU863ruuqmRHTLUYa8sfg/ArYcrnNGZGfpvvFTfFjpWb/ELOvOWo8N6Y/4fnSLjQ20aXexVW1KcTBQ==}
+  '@lexical/dragon@0.35.0':
+    resolution: {integrity: sha512-SL6mT5pcqrt6hEbJ16vWxip5+r3uvMd0bQV5UUxuk+cxIeuP86iTgRh0HFR7SM2dRTYovL6/tM/O+8QLAUGTIg==}
 
-  '@lexical/hashtag@0.28.0':
-    resolution: {integrity: sha512-zcqX9Qna4lj96bAUfwSQSVEhYQ0O5erSjrIhOVqEgeQ5ubz0EvqnnMbbwNHIb2n6jzSwAvpD/3UZJZtolh+zVg==}
+  '@lexical/hashtag@0.35.0':
+    resolution: {integrity: sha512-LYJWzXuO2ZjKsvQwrLkNZiS2TsjwYkKjlDgtugzejquTBQ/o/nfSn/MmVx6EkYLOYizaJemmZbz3IBh+u732FA==}
 
-  '@lexical/headless@0.28.0':
-    resolution: {integrity: sha512-btcaTfw9I/xQ/XYom6iKWgsPecmRawGd/5jOhP7QDtLUp7gxgM7/kiCZFYa8jDJO6j20rXuWTkc81ynVpKvjow==}
+  '@lexical/headless@0.35.0':
+    resolution: {integrity: sha512-UPmCqOsdGGC7/8Fkae2ADkTQfxTZOKxNEVKuqPfCkFs4Bag3s4z3V61jE+wYzqyU8eJh4DqZYSHoPzZCj8P9jg==}
 
-  '@lexical/history@0.28.0':
-    resolution: {integrity: sha512-CHzDxaGDn6qCFFhU0YKP1B8sgEb++0Ksqsj6BfDL/6TMxoLNQwRQhP3BUNNXl1kvUhxTQZgk3b9MjJZRaFKG9Q==}
+  '@lexical/history@0.35.0':
+    resolution: {integrity: sha512-onjDRLLxGbCfHexSxxrQaDaieIHyV28zCDrbxR5dxTfW8F8PxjuNyuaG0z6o468AXYECmclxkP+P4aT6poHEpQ==}
 
-  '@lexical/html@0.28.0':
-    resolution: {integrity: sha512-ayb0FPxr55Ko99/d9ewbfrApul4L0z+KpU2ZG03im7EvUPVLyIGLx4S0QguMDvQh0Vu+eJ7/EESuonDs5BCe3A==}
+  '@lexical/html@0.35.0':
+    resolution: {integrity: sha512-rXGFE5S5rKsg3tVnr1s4iEgOfCApNXGpIFI3T2jGEShaCZ5HLaBY9NVBXnE9Nb49e9bkDkpZ8FZd1qokCbQXbw==}
 
-  '@lexical/link@0.28.0':
-    resolution: {integrity: sha512-T5VKxpOnML5DcXv2lW3Le0vjNlcbdohZjS9f6PAvm6eX8EzBKDpLQCopr1/0KGdlLd1QrzQsykQrdU7ieC4LRg==}
+  '@lexical/link@0.35.0':
+    resolution: {integrity: sha512-+0Wx6cBwO8TfdMzpkYFacsmgFh8X1rkiYbq3xoLvk3qV8upYxaMzK1s8Q1cpKmWyI0aZrU6z7fiK4vUqB7+69w==}
 
-  '@lexical/list@0.28.0':
-    resolution: {integrity: sha512-3a8QcZ75n2TLxP+xkSPJ2V15jsysMLMe0YoObG+ew/sioVelIU8GciYsWBo5GgQmwSzJNQJeK5cJ9p1b71z2cg==}
+  '@lexical/list@0.35.0':
+    resolution: {integrity: sha512-owsmc8iwgExBX8sFe8fKTiwJVhYULt9hD1RZ/HwfaiEtRZZkINijqReOBnW2mJfRxBzhFSWc4NG3ISB+fHYzqw==}
 
-  '@lexical/mark@0.28.0':
-    resolution: {integrity: sha512-v5PzmTACsJrw3GvNZy2rgPxrNn9InLvLFoKqrSlNhhyvYNIAcuC4KVy00LKLja43Gw/fuB3QwKohYfAtM3yR3g==}
+  '@lexical/mark@0.35.0':
+    resolution: {integrity: sha512-W0hwMTAVeexvpk9/+J6n1G/sNkpI/Meq1yeDazahFLLAwXLHtvhIAq2P/klgFknDy1hr8X7rcsQuN/bqKcKHYg==}
 
-  '@lexical/markdown@0.28.0':
-    resolution: {integrity: sha512-F3JXClqN4cjmXYLDK0IztxkbZuqkqS/AVbxnhGvnDYHQ9Gp8l7BonczhOiPwmJCDubJrAACP0L9LCqyt0jDRFw==}
+  '@lexical/markdown@0.35.0':
+    resolution: {integrity: sha512-BlNyXZAt4gWidMw0SRWrhBETY1BpPglFBZI7yzfqukFqgXRh7HUQA28OYeI/nsx9pgNob8TiUduUwShqqvOdEA==}
 
-  '@lexical/offset@0.28.0':
-    resolution: {integrity: sha512-/SMDQgBPeWM936t04mtH6UAn3xAjP/meu9q136bcT3S7p7V8ew9JfNp9aznTPTx+2W3brJORAvUow7Xn1fSHmw==}
+  '@lexical/offset@0.35.0':
+    resolution: {integrity: sha512-DRE4Df6qYf2XiV6foh6KpGNmGAv2ANqt3oVXpyS6W8hTx3+cUuAA1APhCZmLNuU107um4zmHym7taCu6uXW5Yg==}
 
-  '@lexical/overflow@0.28.0':
-    resolution: {integrity: sha512-ppmhHXEZVicBm05w9EVflzwFavTVNAe4q0bkabWUeW0IoCT3Vg2A3JT7PC9ypmp+mboUD195foFEr1BBSv1Y8Q==}
+  '@lexical/overflow@0.35.0':
+    resolution: {integrity: sha512-B25YvnJQTGlZcrNv7b0PJBLWq3tl8sql497OHfYYLem7EOMPKKDGJScJAKM/91D4H/mMAsx5gnA/XgKobriuTg==}
 
-  '@lexical/plain-text@0.28.0':
-    resolution: {integrity: sha512-Jj2dCMDEfRuVetfDKcUes8J5jvAfZrLnILFlHxnu7y+lC+7R/NR403DYb3NJ8H7+lNiH1K15+U2K7ewbjxS6KQ==}
+  '@lexical/plain-text@0.35.0':
+    resolution: {integrity: sha512-lwBCUNMJf7Gujp2syVWMpKRahfbTv5Wq+H3HK1Q1gKH1P2IytPRxssCHvexw9iGwprSyghkKBlbF3fGpEdIJvQ==}
 
-  '@lexical/react@0.28.0':
-    resolution: {integrity: sha512-dWPnxrKrbQFjNqExqnaAsV0UEUgw/5M1ZYRWd5FGBGjHqVTCaX2jNHlKLMA68Od0VPIoOX2Zy1TYZ8ZKtsj5Dg==}
+  '@lexical/react@0.35.0':
+    resolution: {integrity: sha512-uYAZSqumH8tRymMef+A0f2hQvMwplKK9DXamcefnk3vSNDHHqRWQXpiUo6kD+rKWuQmMbVa5RW4xRQebXEW+1A==}
     peerDependencies:
       react: '>=17.x'
       react-dom: '>=17.x'
 
-  '@lexical/rich-text@0.28.0':
-    resolution: {integrity: sha512-y+vUWI+9uFupIb9UvssKU/DKcT9dFUZuQBu7utFkLadxCNyXQHeRjxzjzmvFiM3DBV0guPUDGu5VS5TPnIA+OA==}
+  '@lexical/rich-text@0.35.0':
+    resolution: {integrity: sha512-qEHu8g7vOEzz9GUz1VIUxZBndZRJPh9iJUFI+qTDHj+tQqnd5LCs+G9yz6jgNfiuWWpezTp0i1Vz/udNEuDPKQ==}
 
-  '@lexical/selection@0.28.0':
-    resolution: {integrity: sha512-AJDi67Nsexyejzp4dEQSVoPov4P+FJ0t1v6DxUU+YmcvV56QyJQi6ue0i/xd8unr75ZufzLsAC0cDJJCEI7QDA==}
+  '@lexical/selection@0.35.0':
+    resolution: {integrity: sha512-mMtDE7Q0nycXdFTTH/+ta6EBrBwxBB4Tg8QwsGntzQ1Cq//d838dpXpFjJOqHEeVHUqXpiuj+cBG8+bvz/rPRw==}
 
-  '@lexical/table@0.28.0':
-    resolution: {integrity: sha512-HMPCwXdj0sRWdlDzsHcNWRgbeKbEhn3L8LPhFnTq7q61gZ4YW2umdmuvQFKnIBcKq49drTH8cUwZoIwI8+AEEw==}
+  '@lexical/table@0.35.0':
+    resolution: {integrity: sha512-9jlTlkVideBKwsEnEkqkdg7A3mije1SvmfiqoYnkl1kKJCLA5iH90ywx327PU0p+bdnURAytWUeZPXaEuEl2OA==}
 
-  '@lexical/text@0.28.0':
-    resolution: {integrity: sha512-PT/A2RZv+ktn7SG/tJkOpGlYE6zjOND59VtRHnV/xciZ+jEJVaqAHtWjhbWibAIZQAkv/O7UouuDqzDaNTSGAA==}
+  '@lexical/text@0.35.0':
+    resolution: {integrity: sha512-uaMh46BkysV8hK8wQwp5g/ByZW+2hPDt8ahAErxtf8NuzQem1FHG/f5RTchmFqqUDVHO3qLNTv4AehEGmXv8MA==}
 
-  '@lexical/utils@0.28.0':
-    resolution: {integrity: sha512-Qw00DjkS1nRK7DLSgqJpJ77Ti2AuiOQ6m5eM38YojoWXkVmoxqKAUMaIbVNVKqjFgrQvKFF46sXxIJPbUQkB0w==}
+  '@lexical/utils@0.35.0':
+    resolution: {integrity: sha512-2H393EYDnFznYCDFOW3MHiRzwEO5M/UBhtUjvTT+9kc+qhX4U3zc8ixQalo5UmZ5B2nh7L/inXdTFzvSRXtsRA==}
 
-  '@lexical/yjs@0.28.0':
-    resolution: {integrity: sha512-rKHpUEd3nrvMY7ghmOC0AeGSYT7YIviba+JViaOzrCX4/Wtv5C/3Sl7Io12Z9k+s1BKmy7C28bOdQHvRWaD7vQ==}
+  '@lexical/yjs@0.35.0':
+    resolution: {integrity: sha512-3DSP7QpmTGYU9bN/yljP0PIao4tNIQtsR4ycauWNSawxs/GQCZtSmAPcLRnCm6qpqsDDjUtKjO/1Ej8FRp0m0w==}
     peerDependencies:
       yjs: '>=13.5.22'
 
@@ -1764,20 +1626,20 @@ packages:
     cpu: [x64]
     os: [win32]
 
-  '@payloadcms/db-postgres@3.54.0':
-    resolution: {integrity: sha512-C1tADNHHRTfXyJ6UXTPrHhDVji8kQFKPmT3R/LxOfXwMX5SQanfGx4dGotug647EuZCLgP9YH/ASmGgSFG2RjQ==}
+  '@payloadcms/db-postgres@3.69.0':
+    resolution: {integrity: sha512-Fz/hjP0z88zrsYz1UzaqnoM3L+yHymH+yWUIJnIf7jMCtnfi/ws5XBX/0DHILvxoVsCNc5XSx3fTjcBCJoYylw==}
     peerDependencies:
-      payload: 3.54.0
+      payload: 3.69.0
 
-  '@payloadcms/db-sqlite@3.54.0':
-    resolution: {integrity: sha512-dwH8EPTdkMKtty6yrgmAt94q4jtQkGx93SFecNFI/4GxqUBGzCpOTFKXmyKB2mBxngKQsp6NJ77VJk4NjOwffg==}
+  '@payloadcms/db-sqlite@3.69.0':
+    resolution: {integrity: sha512-JVsuGNAoI/zTgIlF7ZPOmWgQrUaLFvzNO3KBMoUpJAoX+vNDxmEaLPBE2+xJfuf/Y1SJm2f908LxKKQLHjR5Nw==}
     peerDependencies:
-      payload: 3.54.0
+      payload: 3.69.0
 
-  '@payloadcms/drizzle@3.54.0':
-    resolution: {integrity: sha512-nz3luQKWav/CGfPKIRMl9lwmgs11HYWvlTpsGUnni8/cJl4EZgi05/Z4yLz3T6RJLbzyCpqvV1D+8tmdRPIHBw==}
+  '@payloadcms/drizzle@3.69.0':
+    resolution: {integrity: sha512-Fsvij6ruoN7S7q1OfGYs/SSLy3s7wFd4E/efXHmRLjw53xtlRiGXIejmhiUEdNK4d5RHi0yjIblQsbTKnBvirw==}
     peerDependencies:
-      payload: 3.54.0
+      payload: 3.69.0
 
   '@payloadcms/eslint-config@3.9.0':
     resolution: {integrity: sha512-4St7Ol8zaShcnVEk9AS3nYpKhtBLEsIXFkz94n5c3GsJdnWG0RfibJMZN4px01UXqHFkTJaM3NTggJk1nzx+VA==}
@@ -1785,43 +1647,46 @@ packages:
   '@payloadcms/eslint-plugin@3.9.0':
     resolution: {integrity: sha512-EEGxhm+8geOHzdxjfRXgcEXxfx8+43ZVW9b6+6DTHrNliP/vsZzXWIDI8lQlxlk6Zc7n15hMBbgcs20F7/GM4A==}
 
-  '@payloadcms/graphql@3.54.0':
-    resolution: {integrity: sha512-ODHig9spx8EN0GSp74PSKtDby+lDrZRh1M93AuPZNblheVs5SBuxzsyz5XmZhM8s70F/zn3h4CmGmGDEiSDr3w==}
+  '@payloadcms/graphql@3.69.0':
+    resolution: {integrity: sha512-VRFacg4EneV7U9jqerQfNVL/O788WlZbdnMGSwds0mVwC5qPW3sktZhHxJCAksvAFJ+XXtx4GCMv/YK8DgEfxw==}
     hasBin: true
     peerDependencies:
       graphql: ^16.8.1
-      payload: 3.54.0
+      payload: 3.69.0
 
-  '@payloadcms/next@3.54.0':
-    resolution: {integrity: sha512-LgOvV4VIRlgyfp9sIBMg5llWfZMnMUmOwDbpq9n7tGAUhSb7MoOFfnEoGyAONHHJhH3/No8vtvpDGRpbFX9UJw==}
+  '@payloadcms/next@3.69.0':
+    resolution: {integrity: sha512-rkj/wvTDcbOkb8+v4jkZVdoJm2tvPUQL62CWoGnysy/ST+ZUwMr/70veLB7Ztr3uj6HKi6dU+31H+gG94UaMZg==}
     engines: {node: ^18.20.2 || >=20.9.0}
     peerDependencies:
       graphql: ^16.8.1
-      next: ^15.2.3
-      payload: 3.54.0
+      next: ^15.4.10
+      payload: 3.69.0
 
-  '@payloadcms/richtext-lexical@3.54.0':
-    resolution: {integrity: sha512-qyd3FQqj41zNE7eaIxkhAvQuX96FK5dfCF7bODQyT1Wb5R8OUKyM6tPVqUzuvo8Iz17xM0UqnNNaCjwKrxLj6Q==}
+  '@payloadcms/richtext-lexical@3.69.0':
+    resolution: {integrity: sha512-NwyTN3GY1FOFTP0V3gm+M0FxK4GDWL2R0p2DrirIi7nZOH2rw6P+FfvMvzobT/nxUsQYntBnjoGDWhqN+uq3lg==}
     engines: {node: ^18.20.2 || >=20.9.0}
     peerDependencies:
-      '@faceless-ui/modal': 3.0.0-beta.2
+      '@faceless-ui/modal': 3.0.0
       '@faceless-ui/scroll-info': 2.0.0
-      '@payloadcms/next': 3.54.0
-      payload: 3.54.0
-      react: ^19.0.0 || ^19.0.0-rc-65a56d0e-20241020
-      react-dom: ^19.0.0 || ^19.0.0-rc-65a56d0e-20241020
+      '@payloadcms/next': 3.69.0
+      payload: 3.69.0
+      react: ^19.0.1 || ^19.1.2 || ^19.2.1
+      react-dom: ^19.0.1 || ^19.1.2 || ^19.2.1
 
-  '@payloadcms/translations@3.54.0':
-    resolution: {integrity: sha512-21DQ0LaM+6PljjuV8fVdfXq/Icpdaj9kzT3tOoVZumUtiqBYzYU//oVBe2z17lLcoJTRwaG+RK1Pq2oGY1lR1A==}
+  '@payloadcms/translations@3.69.0':
+    resolution: {integrity: sha512-/27JphlweOy0FhkH9H5dRHpi6bHjqdKFkLciURsxyEXJFLsd9w/07e6clD48Lvr8SPdCmr09KLJKdXTjjT1ypQ==}
 
-  '@payloadcms/ui@3.54.0':
-    resolution: {integrity: sha512-Io+kUZ41gtFwAhAlLjWXqBww5pGv1h6a6Lbv4wNhOojYi0U9yAir4ddDdnkTs7UAmQlxXEQeQYbQ7z/CmdKHrA==}
+  '@payloadcms/ui@3.69.0':
+    resolution: {integrity: sha512-R/CFV9IF3LTHwBm+gFESpSYYqLejJgRijJarGFot5I1Kxa18mwNp+xUYHVUcJgGZ1xQ1iYzGuElEJwy1cJhMxg==}
     engines: {node: ^18.20.2 || >=20.9.0}
     peerDependencies:
-      next: ^15.2.3
-      payload: 3.54.0
-      react: ^19.0.0 || ^19.0.0-rc-65a56d0e-20241020
-      react-dom: ^19.0.0 || ^19.0.0-rc-65a56d0e-20241020
+      next: ^15.2.8 || ^15.3.8 || ^15.4.10 || ^15.5.9
+      payload: 3.69.0
+      react: ^19.0.1 || ^19.1.2 || ^19.2.1
+      react-dom: ^19.0.1 || ^19.1.2 || ^19.2.1
+
+  '@pinojs/redact@0.4.0':
+    resolution: {integrity: sha512-k2ENnmBugE/rzQfEcdWHcCY+/FM3VLzH9cYEsbdsoqrvzAKRhUZeRNhAZvB8OitQJ1TBed3yqWtdjzS6wJKBwg==}
 
   '@pkgjs/parseargs@0.11.0':
     resolution: {integrity: sha512-+1VkjdD0QBLPodGrJUeqarH8VAIvQODIbwh9XpP5Syisf7YoQgsJKPNFoqqLQlu+VQ/tVSshMR6loPMn8U+dPg==}
@@ -3323,12 +3188,12 @@ packages:
     resolution: {integrity: sha512-JVUnt+DUIzu87TABbhPmNfVdBDt18BLOWjMUFJMSi/Qqg7NTYtabbvSNJGOJ7afbRuv9D/lngizHtP7QyLQ+9w==}
     engines: {node: '>=12'}
 
-  drizzle-kit@0.31.4:
-    resolution: {integrity: sha512-tCPWVZWZqWVx2XUsVpJRnH9Mx0ClVOf5YUHerZ5so1OKSlqww4zy1R5ksEdGRcO3tM3zj0PYN6V48TbQCL1RfA==}
+  drizzle-kit@0.31.7:
+    resolution: {integrity: sha512-hOzRGSdyKIU4FcTSFYGKdXEjFsncVwHZ43gY3WU5Bz9j5Iadp6Rh6hxLSQ1IWXpKLBKt/d5y1cpSPcV+FcoQ1A==}
     hasBin: true
 
-  drizzle-orm@0.44.2:
-    resolution: {integrity: sha512-zGAqBzWWkVSFjZpwPOrmCrgO++1kZ5H/rZ4qTGeGOe18iXGVJWf3WPfHOVwFIbmi8kHjfJstC6rJomzGx8g/dQ==}
+  drizzle-orm@0.44.7:
+    resolution: {integrity: sha512-quIpnYznjU9lHshEOAYLoZ9s3jweleHlZIAWR/jX9gAWNg/JhQ1wj0KGRf7/Zm+obRrYd9GjPVJg790QY9N5AQ==}
     peerDependencies:
       '@aws-sdk/client-rds-data': '>=3'
       '@cloudflare/workers-types': '>=4'
@@ -3494,11 +3359,6 @@ packages:
     engines: {node: '>=12'}
     hasBin: true
 
-  esbuild@0.23.1:
-    resolution: {integrity: sha512-VVNz/9Sa0bs5SELtn3f7qhJCDPCF5oMEl5cO9/SSinpE9hbPVvxbd572HH5AKiP7WD8INO53GgfDDhRjkylHEg==}
-    engines: {node: '>=18'}
-    hasBin: true
-
   esbuild@0.25.10:
     resolution: {integrity: sha512-9RiGKvCwaqxO2owP61uQ4BgNborAQskMR6QusfWzQqv7AZOg5oGehdY2pRJMTKuwxd1IDBP4rSbI5lHzU7SMsQ==}
     engines: {node: '>=18'}
@@ -3847,10 +3707,6 @@ packages:
   fast-levenshtein@2.0.6:
     resolution: {integrity: sha512-DCXu6Ifhqcks7TZKY3Hxp3y6qphY5SJZmrWMDrKcERSOXWQdMhU9Ig/PYrzyw/ul9jOIyh0N4M0tbC5hodg8dw==}
 
-  fast-redact@3.5.0:
-    resolution: {integrity: sha512-dwsoQlS7h9hMeYUq1W++23NDcBLV4KqONnITDV9DjfS3q1SgDGVrBdvvTLUotWtPSD7asWDV9/CmsZPy8Hf70A==}
-    engines: {node: '>=6'}
-
   fast-safe-stringify@2.1.1:
     resolution: {integrity: sha512-W+KJc2dmILlPplD/H4K9l9LcAHAfPtP6BY84uVLXQ6Evcz9Lcg33Y2z1IVblT6xdY54PXYVHEv+0Wpq8Io6zkA==}
 
@@ -4639,8 +4495,8 @@ packages:
     resolution: {integrity: sha512-+bT2uH4E5LGE7h/n3evcS/sQlJXCpIp6ym8OWJ5eV6+67Dsql/LaaT7qJBAt2rzfoa/5QBGBhxDix1dMt2kQKQ==}
     engines: {node: '>= 0.8.0'}
 
-  lexical@0.28.0:
-    resolution: {integrity: sha512-dLE3O1PZg0TlZxRQo9YDpjCjDUj8zluGyBO9MHdjo21qZmMUNrxQPeCRt8fn2s5l4HKYFQ1YNgl7k1pOJB/vZQ==}
+  lexical@0.35.0:
+    resolution: {integrity: sha512-3VuV8xXhh5xJA6tzvfDvE0YBCMkIZUmxtRilJQDDdCgJCc+eut6qAv2qbN+pbqvarqcQqPN1UF+8YvsjmyOZpw==}
 
   lib0@0.2.114:
     resolution: {integrity: sha512-gcxmNFzA4hv8UYi8j43uPlQ7CGcyMJ2KQb5kZASw6SnAKAf10hK12i2fjrS3Cl/ugZa5Ui6WwIu1/6MIXiHttQ==}
@@ -5155,8 +5011,8 @@ packages:
     resolution: {integrity: sha512-//nshmD55c46FuFw26xV/xFAaB5HF9Xdap7HJBBnrKdAd6/GxDBaNA1870O79+9ueg61cZLSVc+OaFlfmObYVQ==}
     engines: {node: '>= 14.16'}
 
-  payload@3.54.0:
-    resolution: {integrity: sha512-TQJptraB8bSnNlm/mCqVHHEdBUZLIQWpJsRdns7SggFIoewA/WLFX2LpLbjur8WY+4zhdaOUEm7GDzzRw506pw==}
+  payload@3.69.0:
+    resolution: {integrity: sha512-LiIybFUjAYYVYN2kSaieqDyQDglGeYuyPtP0xqkvIF5EOAGdNMMLSPmGHquV1kPeeUvzgv/CVoNIMEOuRF9wmg==}
     engines: {node: ^18.20.2 || >=20.9.0}
     hasBin: true
     peerDependencies:
@@ -5225,15 +5081,15 @@ packages:
   pino-abstract-transport@2.0.0:
     resolution: {integrity: sha512-F63x5tizV6WCh4R6RHyi2Ml+M70DNRXt/+HANowMflpgGFMAym/VKm6G7ZOQRjqN7XbGxK1Lg9t6ZrtzOaivMw==}
 
-  pino-pretty@13.0.0:
-    resolution: {integrity: sha512-cQBBIVG3YajgoUjo1FdKVRX6t9XPxwB9lcNJVD5GCnNM4Y6T12YYx8c6zEejxQsU0wrg9TwmDulcE9LR7qcJqA==}
+  pino-pretty@13.1.2:
+    resolution: {integrity: sha512-3cN0tCakkT4f3zo9RXDIhy6GTvtYD6bK4CRBLN9j3E/ePqN1tugAXD5rGVfoChW6s0hiek+eyYlLNqc/BG7vBQ==}
     hasBin: true
 
   pino-std-serializers@7.0.0:
     resolution: {integrity: sha512-e906FRY0+tV27iq4juKzSYPbUj2do2X2JX4EzSca1631EB2QJQUqGbDuERal7LCtOpxl6x3+nvo9NPZcmjkiFA==}
 
-  pino@9.5.0:
-    resolution: {integrity: sha512-xSEmD4pLnV54t0NOUN16yCl7RIB1c5UUOse5HSyEXtBp+FgFQyPeDutc+Q2ZO7/22vImV7VfEjH/1zV2QuqvYw==}
+  pino@9.14.0:
+    resolution: {integrity: sha512-8OEwKp5juEvb/MjpIc4hjqfgCNysrS94RIOMXYvpYCdm/jglrKEiAYmiumbmGhCvs+IcInsphYDFwqrjr7398w==}
     hasBin: true
 
   pirates@4.0.7:
@@ -5331,8 +5187,8 @@ packages:
   process-nextick-args@2.0.1:
     resolution: {integrity: sha512-3ouUOpQhtgrbOa17J7+uxOTpITYWaGP7/AhoR3+A+/1e9skrzelGi/dXzEYyvbxubEF6Wn2ypscTKiKJFFn1ag==}
 
-  process-warning@4.0.1:
-    resolution: {integrity: sha512-3c2LzQ3rY9d0hc1emcsHhfT9Jwz0cChib/QN89oME2R451w5fy3f0afAhERFZAwrbDU43wk12d0ORBpDVME50Q==}
+  process-warning@5.0.0:
+    resolution: {integrity: sha512-a39t9ApHNx2L4+HBnQKqxxHNs1r7KF+Intd8Q/g1bUh6q0WIp9voPXJ/x0j+ZL45KF1pJd9+q2jLIRMfvEshkA==}
 
   process@0.11.10:
     resolution: {integrity: sha512-cdGef/drWFoydD1JsMzuFf8100nZl+GT+yacc2bEced5f9Rjk4z+WtFUTBu9PhOi9j/jfmBPu0mMEY4wIdAF8A==}
@@ -5387,6 +5243,10 @@ packages:
     resolution: {integrity: sha512-WuyALRjWPDGtt/wzJiadO5AXY+8hZ80hVpe6MyivgraREW751X3SbhRvG3eLKOYN+8VEvqLcf3wdnt44Z4S4SA==}
     engines: {node: '>=10'}
 
+  range-parser@1.2.1:
+    resolution: {integrity: sha512-Hrgsx+orqoygnmhFbKaHE6c296J+HTAQXoxEF6gNupROmmGJRoyzfG3ccAveqCBrwr/2yxQ5BVd/GTl5agOwSg==}
+    engines: {node: '>= 0.6'}
+
   react-datepicker@7.6.0:
     resolution: {integrity: sha512-9cQH6Z/qa4LrGhzdc3XoHbhrxNcMi9MKjZmYgF/1MNNaJwvdSjv3Xd+jjvrEEbKEf71ZgCA3n7fQbdwd70qCRw==}
     peerDependencies:
@@ -5592,8 +5452,8 @@ packages:
     resolution: {integrity: sha512-3A6sD0WYP7+QrjbfNA2FN3FsOaGGFoekCVgTyypy53gPxhbkCIjtO6YWgdrfM+n/8sI8JeXZOIxsHjMTNxQ4nQ==}
     engines: {node: ^14.0.0 || >=16.0.0}
 
-  secure-json-parse@2.7.0:
-    resolution: {integrity: sha512-6aU+Rwsezw7VR8/nyvKTx8QpWH9FrcYiXXlqC4z5d5XQBDRqtbfsRjnwGyqbi3gddNtWHuEk9OANUotL26qKUw==}
+  secure-json-parse@4.1.0:
+    resolution: {integrity: sha512-l4KnYfEyqYJxDwlNVyRfO2E4NTHfMKAWdUuA8J0yve2Dz/E/PdBepY03RvyJpssIpRFwJoCD55wA+mEDs6ByWA==}
 
   seek-bzip@2.0.0:
     resolution: {integrity: sha512-SMguiTnYrhpLdk3PwfzHeotrcwi8bNV4iemL9tx9poR/yeaMYwB9VzR1w7b57DuWpuqR8n6oZboi0hj3AxZxQg==}
@@ -5857,6 +5717,10 @@ packages:
     resolution: {integrity: sha512-6fPc+R4ihwqP6N/aIv2f1gMH8lOVtWQHoqC4yK6oSDVVocumAsfCqjkXnqiYMhmMwS/mEHLp7Vehlt3ql6lEig==}
     engines: {node: '>=8'}
 
+  strip-json-comments@5.0.3:
+    resolution: {integrity: sha512-1tB5mhVo7U+ETBKNf92xT4hrQa3pm0MZ0PQvuDnWgAAGHDsfp4lPSpiS6psrSiet87wyGPh9ft6wmhOMQ0hDiw==}
+    engines: {node: '>=14.16'}
+
   strip-literal@3.0.0:
     resolution: {integrity: sha512-TcccoMhJOM3OebGhSBEmp3UZ2SfDMZUEBdRA/9ynfLi8yYajyWX3JiXArcJt4Umh4vISpspkQIY8ZZoCqjbviA==}
 
@@ -6045,13 +5909,13 @@ packages:
   tslib@2.8.1:
     resolution: {integrity: sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==}
 
-  tsx@4.19.2:
-    resolution: {integrity: sha512-pOUl6Vo2LUq/bSa8S5q7b91cgNSjctn9ugq/+Mvow99qW6x/UZYwzxy/3NmqoT66eHYfCVvFvACC58UBPFf28g==}
+  tsx@4.20.3:
+    resolution: {integrity: sha512-qjbnuR9Tr+FJOMBqJCW5ehvIo/buZq7vH7qD7JziU98h6l3qGy0a/yPFjwO+y0/T7GFpNgNAvEcPPVfyT8rrPQ==}
     engines: {node: '>=18.0.0'}
     hasBin: true
 
-  tsx@4.20.3:
-    resolution: {integrity: sha512-qjbnuR9Tr+FJOMBqJCW5ehvIo/buZq7vH7qD7JziU98h6l3qGy0a/yPFjwO+y0/T7GFpNgNAvEcPPVfyT8rrPQ==}
+  tsx@4.20.6:
+    resolution: {integrity: sha512-ytQKuwgmrrkDTFP4LjR0ToE2nqgy886GpvRSpU0JAnrdBYppuY5rLkRUYPU1yCryb24SsKBTL/hlDQAEFVwtZg==}
     engines: {node: '>=18.0.0'}
     hasBin: true
 
@@ -6671,6 +6535,13 @@ snapshots:
       react-dom: 19.1.0(react@19.1.0)
       tslib: 2.8.1
 
+  '@dnd-kit/modifiers@9.0.0(@dnd-kit/core@6.0.8(react-dom@19.1.0(react@19.1.0))(react@19.1.0))(react@19.1.0)':
+    dependencies:
+      '@dnd-kit/core': 6.0.8(react-dom@19.1.0(react@19.1.0))(react@19.1.0)
+      '@dnd-kit/utilities': 3.2.2(react@19.1.0)
+      react: 19.1.0
+      tslib: 2.8.1
+
   '@dnd-kit/sortable@7.0.2(@dnd-kit/core@6.0.8(react-dom@19.1.0(react@19.1.0))(react@19.1.0))(react@19.1.0)':
     dependencies:
       '@dnd-kit/core': 6.0.8(react-dom@19.1.0(react@19.1.0))(react@19.1.0)
@@ -6775,153 +6646,102 @@ snapshots:
       '@esbuild-kit/core-utils': 3.3.2
       get-tsconfig: 4.10.1
 
-  '@esbuild/aix-ppc64@0.23.1':
-    optional: true
-
   '@esbuild/aix-ppc64@0.25.10':
     optional: true
 
   '@esbuild/android-arm64@0.18.20':
     optional: true
 
-  '@esbuild/android-arm64@0.23.1':
-    optional: true
-
   '@esbuild/android-arm64@0.25.10':
     optional: true
 
   '@esbuild/android-arm@0.18.20':
     optional: true
 
-  '@esbuild/android-arm@0.23.1':
-    optional: true
-
   '@esbuild/android-arm@0.25.10':
     optional: true
 
   '@esbuild/android-x64@0.18.20':
     optional: true
 
-  '@esbuild/android-x64@0.23.1':
-    optional: true
-
   '@esbuild/android-x64@0.25.10':
     optional: true
 
   '@esbuild/darwin-arm64@0.18.20':
     optional: true
 
-  '@esbuild/darwin-arm64@0.23.1':
-    optional: true
-
   '@esbuild/darwin-arm64@0.25.10':
     optional: true
 
   '@esbuild/darwin-x64@0.18.20':
     optional: true
 
-  '@esbuild/darwin-x64@0.23.1':
-    optional: true
-
   '@esbuild/darwin-x64@0.25.10':
     optional: true
 
   '@esbuild/freebsd-arm64@0.18.20':
     optional: true
 
-  '@esbuild/freebsd-arm64@0.23.1':
-    optional: true
-
   '@esbuild/freebsd-arm64@0.25.10':
     optional: true
 
   '@esbuild/freebsd-x64@0.18.20':
     optional: true
 
-  '@esbuild/freebsd-x64@0.23.1':
-    optional: true
-
   '@esbuild/freebsd-x64@0.25.10':
     optional: true
 
   '@esbuild/linux-arm64@0.18.20':
     optional: true
 
-  '@esbuild/linux-arm64@0.23.1':
-    optional: true
-
   '@esbuild/linux-arm64@0.25.10':
     optional: true
 
   '@esbuild/linux-arm@0.18.20':
     optional: true
 
-  '@esbuild/linux-arm@0.23.1':
-    optional: true
-
   '@esbuild/linux-arm@0.25.10':
     optional: true
 
   '@esbuild/linux-ia32@0.18.20':
     optional: true
 
-  '@esbuild/linux-ia32@0.23.1':
-    optional: true
-
   '@esbuild/linux-ia32@0.25.10':
     optional: true
 
   '@esbuild/linux-loong64@0.18.20':
     optional: true
 
-  '@esbuild/linux-loong64@0.23.1':
-    optional: true
-
   '@esbuild/linux-loong64@0.25.10':
     optional: true
 
   '@esbuild/linux-mips64el@0.18.20':
     optional: true
 
-  '@esbuild/linux-mips64el@0.23.1':
-    optional: true
-
   '@esbuild/linux-mips64el@0.25.10':
     optional: true
 
   '@esbuild/linux-ppc64@0.18.20':
     optional: true
 
-  '@esbuild/linux-ppc64@0.23.1':
-    optional: true
-
   '@esbuild/linux-ppc64@0.25.10':
     optional: true
 
   '@esbuild/linux-riscv64@0.18.20':
     optional: true
 
-  '@esbuild/linux-riscv64@0.23.1':
-    optional: true
-
   '@esbuild/linux-riscv64@0.25.10':
     optional: true
 
   '@esbuild/linux-s390x@0.18.20':
     optional: true
 
-  '@esbuild/linux-s390x@0.23.1':
-    optional: true
-
   '@esbuild/linux-s390x@0.25.10':
     optional: true
 
   '@esbuild/linux-x64@0.18.20':
     optional: true
 
-  '@esbuild/linux-x64@0.23.1':
-    optional: true
-
   '@esbuild/linux-x64@0.25.10':
     optional: true
 
@@ -6931,24 +6751,15 @@ snapshots:
   '@esbuild/netbsd-x64@0.18.20':
     optional: true
 
-  '@esbuild/netbsd-x64@0.23.1':
-    optional: true
-
   '@esbuild/netbsd-x64@0.25.10':
     optional: true
 
-  '@esbuild/openbsd-arm64@0.23.1':
-    optional: true
-
   '@esbuild/openbsd-arm64@0.25.10':
     optional: true
 
   '@esbuild/openbsd-x64@0.18.20':
     optional: true
 
-  '@esbuild/openbsd-x64@0.23.1':
-    optional: true
-
   '@esbuild/openbsd-x64@0.25.10':
     optional: true
 
@@ -6958,36 +6769,24 @@ snapshots:
   '@esbuild/sunos-x64@0.18.20':
     optional: true
 
-  '@esbuild/sunos-x64@0.23.1':
-    optional: true
-
   '@esbuild/sunos-x64@0.25.10':
     optional: true
 
   '@esbuild/win32-arm64@0.18.20':
     optional: true
 
-  '@esbuild/win32-arm64@0.23.1':
-    optional: true
-
   '@esbuild/win32-arm64@0.25.10':
     optional: true
 
   '@esbuild/win32-ia32@0.18.20':
     optional: true
 
-  '@esbuild/win32-ia32@0.23.1':
-    optional: true
-
   '@esbuild/win32-ia32@0.25.10':
     optional: true
 
   '@esbuild/win32-x64@0.18.20':
     optional: true
 
-  '@esbuild/win32-x64@0.23.1':
-    optional: true
-
   '@esbuild/win32-x64@0.25.10':
     optional: true
 
@@ -7168,7 +6967,7 @@ snapshots:
       '@eslint/core': 0.15.2
       levn: 0.4.1
 
-  '@faceless-ui/modal@3.0.0-beta.2(react-dom@19.1.0(react@19.1.0))(react@19.1.0)':
+  '@faceless-ui/modal@3.0.0(react-dom@19.1.0(react@19.1.0))(react@19.1.0)':
     dependencies:
       body-scroll-lock: 4.0.0-beta.0
       focus-trap: 7.5.4
@@ -7636,153 +7435,154 @@ snapshots:
 
   '@jsdevtools/ono@7.1.3': {}
 
-  '@lexical/clipboard@0.28.0':
+  '@lexical/clipboard@0.35.0':
     dependencies:
-      '@lexical/html': 0.28.0
-      '@lexical/list': 0.28.0
-      '@lexical/selection': 0.28.0
-      '@lexical/utils': 0.28.0
-      lexical: 0.28.0
+      '@lexical/html': 0.35.0
+      '@lexical/list': 0.35.0
+      '@lexical/selection': 0.35.0
+      '@lexical/utils': 0.35.0
+      lexical: 0.35.0
 
-  '@lexical/code@0.28.0':
+  '@lexical/code@0.35.0':
     dependencies:
-      '@lexical/utils': 0.28.0
-      lexical: 0.28.0
+      '@lexical/utils': 0.35.0
+      lexical: 0.35.0
       prismjs: 1.30.0
 
-  '@lexical/devtools-core@0.28.0(react-dom@19.1.0(react@19.1.0))(react@19.1.0)':
+  '@lexical/devtools-core@0.35.0(react-dom@19.1.0(react@19.1.0))(react@19.1.0)':
     dependencies:
-      '@lexical/html': 0.28.0
-      '@lexical/link': 0.28.0
-      '@lexical/mark': 0.28.0
-      '@lexical/table': 0.28.0
-      '@lexical/utils': 0.28.0
-      lexical: 0.28.0
+      '@lexical/html': 0.35.0
+      '@lexical/link': 0.35.0
+      '@lexical/mark': 0.35.0
+      '@lexical/table': 0.35.0
+      '@lexical/utils': 0.35.0
+      lexical: 0.35.0
       react: 19.1.0
       react-dom: 19.1.0(react@19.1.0)
 
-  '@lexical/dragon@0.28.0':
+  '@lexical/dragon@0.35.0':
     dependencies:
-      lexical: 0.28.0
+      lexical: 0.35.0
 
-  '@lexical/hashtag@0.28.0':
+  '@lexical/hashtag@0.35.0':
     dependencies:
-      '@lexical/utils': 0.28.0
-      lexical: 0.28.0
+      '@lexical/utils': 0.35.0
+      lexical: 0.35.0
 
-  '@lexical/headless@0.28.0':
+  '@lexical/headless@0.35.0':
     dependencies:
-      lexical: 0.28.0
+      lexical: 0.35.0
 
-  '@lexical/history@0.28.0':
+  '@lexical/history@0.35.0':
     dependencies:
-      '@lexical/utils': 0.28.0
-      lexical: 0.28.0
+      '@lexical/utils': 0.35.0
+      lexical: 0.35.0
 
-  '@lexical/html@0.28.0':
+  '@lexical/html@0.35.0':
     dependencies:
-      '@lexical/selection': 0.28.0
-      '@lexical/utils': 0.28.0
-      lexical: 0.28.0
+      '@lexical/selection': 0.35.0
+      '@lexical/utils': 0.35.0
+      lexical: 0.35.0
 
-  '@lexical/link@0.28.0':
+  '@lexical/link@0.35.0':
     dependencies:
-      '@lexical/utils': 0.28.0
-      lexical: 0.28.0
+      '@lexical/utils': 0.35.0
+      lexical: 0.35.0
 
-  '@lexical/list@0.28.0':
+  '@lexical/list@0.35.0':
     dependencies:
-      '@lexical/selection': 0.28.0
-      '@lexical/utils': 0.28.0
-      lexical: 0.28.0
+      '@lexical/selection': 0.35.0
+      '@lexical/utils': 0.35.0
+      lexical: 0.35.0
 
-  '@lexical/mark@0.28.0':
+  '@lexical/mark@0.35.0':
     dependencies:
-      '@lexical/utils': 0.28.0
-      lexical: 0.28.0
+      '@lexical/utils': 0.35.0
+      lexical: 0.35.0
 
-  '@lexical/markdown@0.28.0':
+  '@lexical/markdown@0.35.0':
     dependencies:
-      '@lexical/code': 0.28.0
-      '@lexical/link': 0.28.0
-      '@lexical/list': 0.28.0
-      '@lexical/rich-text': 0.28.0
-      '@lexical/text': 0.28.0
-      '@lexical/utils': 0.28.0
-      lexical: 0.28.0
+      '@lexical/code': 0.35.0
+      '@lexical/link': 0.35.0
+      '@lexical/list': 0.35.0
+      '@lexical/rich-text': 0.35.0
+      '@lexical/text': 0.35.0
+      '@lexical/utils': 0.35.0
+      lexical: 0.35.0
 
-  '@lexical/offset@0.28.0':
+  '@lexical/offset@0.35.0':
     dependencies:
-      lexical: 0.28.0
+      lexical: 0.35.0
 
-  '@lexical/overflow@0.28.0':
+  '@lexical/overflow@0.35.0':
     dependencies:
-      lexical: 0.28.0
+      lexical: 0.35.0
 
-  '@lexical/plain-text@0.28.0':
+  '@lexical/plain-text@0.35.0':
     dependencies:
-      '@lexical/clipboard': 0.28.0
-      '@lexical/selection': 0.28.0
-      '@lexical/utils': 0.28.0
-      lexical: 0.28.0
+      '@lexical/clipboard': 0.35.0
+      '@lexical/selection': 0.35.0
+      '@lexical/utils': 0.35.0
+      lexical: 0.35.0
 
-  '@lexical/react@0.28.0(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(yjs@13.6.27)':
+  '@lexical/react@0.35.0(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(yjs@13.6.27)':
     dependencies:
-      '@lexical/devtools-core': 0.28.0(react-dom@19.1.0(react@19.1.0))(react@19.1.0)
-      '@lexical/dragon': 0.28.0
-      '@lexical/hashtag': 0.28.0
-      '@lexical/history': 0.28.0
-      '@lexical/link': 0.28.0
-      '@lexical/list': 0.28.0
-      '@lexical/mark': 0.28.0
-      '@lexical/markdown': 0.28.0
-      '@lexical/overflow': 0.28.0
-      '@lexical/plain-text': 0.28.0
-      '@lexical/rich-text': 0.28.0
-      '@lexical/table': 0.28.0
-      '@lexical/text': 0.28.0
-      '@lexical/utils': 0.28.0
-      '@lexical/yjs': 0.28.0(yjs@13.6.27)
-      lexical: 0.28.0
+      '@floating-ui/react': 0.27.16(react-dom@19.1.0(react@19.1.0))(react@19.1.0)
+      '@lexical/devtools-core': 0.35.0(react-dom@19.1.0(react@19.1.0))(react@19.1.0)
+      '@lexical/dragon': 0.35.0
+      '@lexical/hashtag': 0.35.0
+      '@lexical/history': 0.35.0
+      '@lexical/link': 0.35.0
+      '@lexical/list': 0.35.0
+      '@lexical/mark': 0.35.0
+      '@lexical/markdown': 0.35.0
+      '@lexical/overflow': 0.35.0
+      '@lexical/plain-text': 0.35.0
+      '@lexical/rich-text': 0.35.0
+      '@lexical/table': 0.35.0
+      '@lexical/text': 0.35.0
+      '@lexical/utils': 0.35.0
+      '@lexical/yjs': 0.35.0(yjs@13.6.27)
+      lexical: 0.35.0
       react: 19.1.0
       react-dom: 19.1.0(react@19.1.0)
       react-error-boundary: 3.1.4(react@19.1.0)
     transitivePeerDependencies:
       - yjs
 
-  '@lexical/rich-text@0.28.0':
+  '@lexical/rich-text@0.35.0':
     dependencies:
-      '@lexical/clipboard': 0.28.0
-      '@lexical/selection': 0.28.0
-      '@lexical/utils': 0.28.0
-      lexical: 0.28.0
+      '@lexical/clipboard': 0.35.0
+      '@lexical/selection': 0.35.0
+      '@lexical/utils': 0.35.0
+      lexical: 0.35.0
 
-  '@lexical/selection@0.28.0':
+  '@lexical/selection@0.35.0':
     dependencies:
-      lexical: 0.28.0
+      lexical: 0.35.0
 
-  '@lexical/table@0.28.0':
+  '@lexical/table@0.35.0':
     dependencies:
-      '@lexical/clipboard': 0.28.0
-      '@lexical/utils': 0.28.0
-      lexical: 0.28.0
+      '@lexical/clipboard': 0.35.0
+      '@lexical/utils': 0.35.0
+      lexical: 0.35.0
 
-  '@lexical/text@0.28.0':
+  '@lexical/text@0.35.0':
     dependencies:
-      lexical: 0.28.0
+      lexical: 0.35.0
 
-  '@lexical/utils@0.28.0':
+  '@lexical/utils@0.35.0':
     dependencies:
-      '@lexical/list': 0.28.0
-      '@lexical/selection': 0.28.0
-      '@lexical/table': 0.28.0
-      lexical: 0.28.0
+      '@lexical/list': 0.35.0
+      '@lexical/selection': 0.35.0
+      '@lexical/table': 0.35.0
+      lexical: 0.35.0
 
-  '@lexical/yjs@0.28.0(yjs@13.6.27)':
+  '@lexical/yjs@0.35.0(yjs@13.6.27)':
     dependencies:
-      '@lexical/offset': 0.28.0
-      '@lexical/selection': 0.28.0
-      lexical: 0.28.0
+      '@lexical/offset': 0.35.0
+      '@lexical/selection': 0.35.0
+      lexical: 0.35.0
       yjs: 13.6.27
 
   '@libsql/client@0.14.0':
@@ -8016,14 +7816,14 @@ snapshots:
   '@oxc-resolver/binding-win32-x64-msvc@1.12.0':
     optional: true
 
-  '@payloadcms/db-postgres@3.54.0(@libsql/client@0.14.0)(@opentelemetry/api@1.9.0)(payload@3.54.0(graphql@16.11.0)(typescript@5.7.3))':
+  '@payloadcms/db-postgres@3.69.0(@libsql/client@0.14.0)(@opentelemetry/api@1.9.0)(payload@3.69.0(graphql@16.11.0)(typescript@5.7.3))':
     dependencies:
-      '@payloadcms/drizzle': 3.54.0(@libsql/client@0.14.0)(@opentelemetry/api@1.9.0)(@types/pg@8.10.2)(payload@3.54.0(graphql@16.11.0)(typescript@5.7.3))(pg@8.16.3)
+      '@payloadcms/drizzle': 3.69.0(@libsql/client@0.14.0)(@opentelemetry/api@1.9.0)(@types/pg@8.10.2)(payload@3.69.0(graphql@16.11.0)(typescript@5.7.3))(pg@8.16.3)
       '@types/pg': 8.10.2
       console-table-printer: 2.12.1
-      drizzle-kit: 0.31.4
-      drizzle-orm: 0.44.2(@libsql/client@0.14.0)(@opentelemetry/api@1.9.0)(@types/pg@8.10.2)(pg@8.16.3)
-      payload: 3.54.0(graphql@16.11.0)(typescript@5.7.3)
+      drizzle-kit: 0.31.7
+      drizzle-orm: 0.44.7(@libsql/client@0.14.0)(@opentelemetry/api@1.9.0)(@types/pg@8.10.2)(pg@8.16.3)
+      payload: 3.69.0(graphql@16.11.0)(typescript@5.7.3)
       pg: 8.16.3
       prompts: 2.4.2
       to-snake-case: 1.0.0
@@ -8059,14 +7859,14 @@ snapshots:
       - sqlite3
       - supports-color
 
-  '@payloadcms/db-sqlite@3.54.0(@opentelemetry/api@1.9.0)(@types/pg@8.15.5)(payload@3.54.0(graphql@16.11.0)(typescript@5.7.3))(pg@8.16.3)':
+  '@payloadcms/db-sqlite@3.69.0(@opentelemetry/api@1.9.0)(@types/pg@8.15.5)(payload@3.69.0(graphql@16.11.0)(typescript@5.7.3))(pg@8.16.3)':
     dependencies:
       '@libsql/client': 0.14.0
-      '@payloadcms/drizzle': 3.54.0(@libsql/client@0.14.0)(@opentelemetry/api@1.9.0)(@types/pg@8.15.5)(payload@3.54.0(graphql@16.11.0)(typescript@5.7.3))(pg@8.16.3)
+      '@payloadcms/drizzle': 3.69.0(@libsql/client@0.14.0)(@opentelemetry/api@1.9.0)(@types/pg@8.15.5)(payload@3.69.0(graphql@16.11.0)(typescript@5.7.3))(pg@8.16.3)
       console-table-printer: 2.12.1
-      drizzle-kit: 0.31.4
-      drizzle-orm: 0.44.2(@libsql/client@0.14.0)(@opentelemetry/api@1.9.0)(@types/pg@8.15.5)(pg@8.16.3)
-      payload: 3.54.0(graphql@16.11.0)(typescript@5.7.3)
+      drizzle-kit: 0.31.7
+      drizzle-orm: 0.44.7(@libsql/client@0.14.0)(@opentelemetry/api@1.9.0)(@types/pg@8.15.5)(pg@8.16.3)
+      payload: 3.69.0(graphql@16.11.0)(typescript@5.7.3)
       prompts: 2.4.2
       to-snake-case: 1.0.0
       uuid: 9.0.0
@@ -8103,12 +7903,12 @@ snapshots:
       - supports-color
       - utf-8-validate
 
-  '@payloadcms/drizzle@3.54.0(@libsql/client@0.14.0)(@opentelemetry/api@1.9.0)(@types/pg@8.10.2)(payload@3.54.0(graphql@16.11.0)(typescript@5.7.3))(pg@8.16.3)':
+  '@payloadcms/drizzle@3.69.0(@libsql/client@0.14.0)(@opentelemetry/api@1.9.0)(@types/pg@8.10.2)(payload@3.69.0(graphql@16.11.0)(typescript@5.7.3))(pg@8.16.3)':
     dependencies:
       console-table-printer: 2.12.1
       dequal: 2.0.3
-      drizzle-orm: 0.44.2(@libsql/client@0.14.0)(@opentelemetry/api@1.9.0)(@types/pg@8.10.2)(pg@8.16.3)
-      payload: 3.54.0(graphql@16.11.0)(typescript@5.7.3)
+      drizzle-orm: 0.44.7(@libsql/client@0.14.0)(@opentelemetry/api@1.9.0)(@types/pg@8.10.2)(pg@8.16.3)
+      payload: 3.69.0(graphql@16.11.0)(typescript@5.7.3)
       prompts: 2.4.2
       to-snake-case: 1.0.0
       uuid: 9.0.0
@@ -8143,12 +7943,12 @@ snapshots:
       - sql.js
       - sqlite3
 
-  '@payloadcms/drizzle@3.54.0(@libsql/client@0.14.0)(@opentelemetry/api@1.9.0)(@types/pg@8.15.5)(payload@3.54.0(graphql@16.11.0)(typescript@5.7.3))(pg@8.16.3)':
+  '@payloadcms/drizzle@3.69.0(@libsql/client@0.14.0)(@opentelemetry/api@1.9.0)(@types/pg@8.15.5)(payload@3.69.0(graphql@16.11.0)(typescript@5.7.3))(pg@8.16.3)':
     dependencies:
       console-table-printer: 2.12.1
       dequal: 2.0.3
-      drizzle-orm: 0.44.2(@libsql/client@0.14.0)(@opentelemetry/api@1.9.0)(@types/pg@8.15.5)(pg@8.16.3)
-      payload: 3.54.0(graphql@16.11.0)(typescript@5.7.3)
+      drizzle-orm: 0.44.7(@libsql/client@0.14.0)(@opentelemetry/api@1.9.0)(@types/pg@8.15.5)(pg@8.16.3)
+      payload: 3.69.0(graphql@16.11.0)(typescript@5.7.3)
       prompts: 2.4.2
       to-snake-case: 1.0.0
       uuid: 9.0.0
@@ -8244,23 +8044,25 @@ snapshots:
       - svelte-eslint-parser
       - vue-eslint-parser
 
-  '@payloadcms/graphql@3.54.0(graphql@16.11.0)(payload@3.54.0(graphql@16.11.0)(typescript@5.7.3))(typescript@5.7.3)':
+  '@payloadcms/graphql@3.69.0(graphql@16.11.0)(payload@3.69.0(graphql@16.11.0)(typescript@5.7.3))(typescript@5.7.3)':
     dependencies:
       graphql: 16.11.0
       graphql-scalars: 1.22.2(graphql@16.11.0)
-      payload: 3.54.0(graphql@16.11.0)(typescript@5.7.3)
+      payload: 3.69.0(graphql@16.11.0)(typescript@5.7.3)
       pluralize: 8.0.0
       ts-essentials: 10.0.3(typescript@5.7.3)
-      tsx: 4.19.2
+      tsx: 4.20.6
     transitivePeerDependencies:
       - typescript
 
-  '@payloadcms/next@3.54.0(@types/react@19.1.8)(graphql@16.11.0)(monaco-editor@0.53.0)(next@15.4.4(@babel/core@7.28.4)(@opentelemetry/api@1.9.0)(@playwright/test@1.55.1)(babel-plugin-macros@3.1.0)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(sass@1.77.4))(payload@3.54.0(graphql@16.11.0)(typescript@5.7.3))(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(typescript@5.7.3)':
+  '@payloadcms/next@3.69.0(@types/react@19.1.8)(graphql@16.11.0)(monaco-editor@0.53.0)(next@15.4.4(@babel/core@7.28.4)(@opentelemetry/api@1.9.0)(@playwright/test@1.55.1)(babel-plugin-macros@3.1.0)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(sass@1.77.4))(payload@3.69.0(graphql@16.11.0)(typescript@5.7.3))(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(typescript@5.7.3)':
     dependencies:
       '@dnd-kit/core': 6.0.8(react-dom@19.1.0(react@19.1.0))(react@19.1.0)
-      '@payloadcms/graphql': 3.54.0(graphql@16.11.0)(payload@3.54.0(graphql@16.11.0)(typescript@5.7.3))(typescript@5.7.3)
-      '@payloadcms/translations': 3.54.0
-      '@payloadcms/ui': 3.54.0(@types/react@19.1.8)(monaco-editor@0.53.0)(next@15.4.4(@babel/core@7.28.4)(@opentelemetry/api@1.9.0)(@playwright/test@1.55.1)(babel-plugin-macros@3.1.0)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(sass@1.77.4))(payload@3.54.0(graphql@16.11.0)(typescript@5.7.3))(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(typescript@5.7.3)
+      '@dnd-kit/modifiers': 9.0.0(@dnd-kit/core@6.0.8(react-dom@19.1.0(react@19.1.0))(react@19.1.0))(react@19.1.0)
+      '@dnd-kit/sortable': 7.0.2(@dnd-kit/core@6.0.8(react-dom@19.1.0(react@19.1.0))(react@19.1.0))(react@19.1.0)
+      '@payloadcms/graphql': 3.69.0(graphql@16.11.0)(payload@3.69.0(graphql@16.11.0)(typescript@5.7.3))(typescript@5.7.3)
+      '@payloadcms/translations': 3.69.0
+      '@payloadcms/ui': 3.69.0(@types/react@19.1.8)(monaco-editor@0.53.0)(next@15.4.4(@babel/core@7.28.4)(@opentelemetry/api@1.9.0)(@playwright/test@1.55.1)(babel-plugin-macros@3.1.0)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(sass@1.77.4))(payload@3.69.0(graphql@16.11.0)(typescript@5.7.3))(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(typescript@5.7.3)
       busboy: 1.6.0
       dequal: 2.0.3
       file-type: 19.3.0
@@ -8270,7 +8072,7 @@ snapshots:
       http-status: 2.1.0
       next: 15.4.4(@babel/core@7.28.4)(@opentelemetry/api@1.9.0)(@playwright/test@1.55.1)(babel-plugin-macros@3.1.0)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(sass@1.77.4)
       path-to-regexp: 6.3.0
-      payload: 3.54.0(graphql@16.11.0)(typescript@5.7.3)
+      payload: 3.69.0(graphql@16.11.0)(typescript@5.7.3)
       qs-esm: 7.0.2
       sass: 1.77.4
       uuid: 10.0.0
@@ -8282,23 +8084,24 @@ snapshots:
       - supports-color
       - typescript
 
-  '@payloadcms/richtext-lexical@3.54.0(@faceless-ui/modal@3.0.0-beta.2(react-dom@19.1.0(react@19.1.0))(react@19.1.0))(@faceless-ui/scroll-info@2.0.0(react-dom@19.1.0(react@19.1.0))(react@19.1.0))(@payloadcms/next@3.54.0(@types/react@19.1.8)(graphql@16.11.0)(monaco-editor@0.53.0)(next@15.4.4(@babel/core@7.28.4)(@opentelemetry/api@1.9.0)(@playwright/test@1.55.1)(babel-plugin-macros@3.1.0)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(sass@1.77.4))(payload@3.54.0(graphql@16.11.0)(typescript@5.7.3))(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(typescript@5.7.3))(@types/react@19.1.8)(monaco-editor@0.53.0)(next@15.4.4(@babel/core@7.28.4)(@opentelemetry/api@1.9.0)(@playwright/test@1.55.1)(babel-plugin-macros@3.1.0)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(sass@1.77.4))(payload@3.54.0(graphql@16.11.0)(typescript@5.7.3))(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(typescript@5.7.3)(yjs@13.6.27)':
+  '@payloadcms/richtext-lexical@3.69.0(@faceless-ui/modal@3.0.0(react-dom@19.1.0(react@19.1.0))(react@19.1.0))(@faceless-ui/scroll-info@2.0.0(react-dom@19.1.0(react@19.1.0))(react@19.1.0))(@payloadcms/next@3.69.0(@types/react@19.1.8)(graphql@16.11.0)(monaco-editor@0.53.0)(next@15.4.4(@babel/core@7.28.4)(@opentelemetry/api@1.9.0)(@playwright/test@1.55.1)(babel-plugin-macros@3.1.0)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(sass@1.77.4))(payload@3.69.0(graphql@16.11.0)(typescript@5.7.3))(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(typescript@5.7.3))(@types/react@19.1.8)(monaco-editor@0.53.0)(next@15.4.4(@babel/core@7.28.4)(@opentelemetry/api@1.9.0)(@playwright/test@1.55.1)(babel-plugin-macros@3.1.0)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(sass@1.77.4))(payload@3.69.0(graphql@16.11.0)(typescript@5.7.3))(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(typescript@5.7.3)(yjs@13.6.27)':
     dependencies:
-      '@faceless-ui/modal': 3.0.0-beta.2(react-dom@19.1.0(react@19.1.0))(react@19.1.0)
+      '@faceless-ui/modal': 3.0.0(react-dom@19.1.0(react@19.1.0))(react@19.1.0)
       '@faceless-ui/scroll-info': 2.0.0(react-dom@19.1.0(react@19.1.0))(react@19.1.0)
-      '@lexical/headless': 0.28.0
-      '@lexical/html': 0.28.0
-      '@lexical/link': 0.28.0
-      '@lexical/list': 0.28.0
-      '@lexical/mark': 0.28.0
-      '@lexical/react': 0.28.0(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(yjs@13.6.27)
-      '@lexical/rich-text': 0.28.0
-      '@lexical/selection': 0.28.0
-      '@lexical/table': 0.28.0
-      '@lexical/utils': 0.28.0
-      '@payloadcms/next': 3.54.0(@types/react@19.1.8)(graphql@16.11.0)(monaco-editor@0.53.0)(next@15.4.4(@babel/core@7.28.4)(@opentelemetry/api@1.9.0)(@playwright/test@1.55.1)(babel-plugin-macros@3.1.0)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(sass@1.77.4))(payload@3.54.0(graphql@16.11.0)(typescript@5.7.3))(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(typescript@5.7.3)
-      '@payloadcms/translations': 3.54.0
-      '@payloadcms/ui': 3.54.0(@types/react@19.1.8)(monaco-editor@0.53.0)(next@15.4.4(@babel/core@7.28.4)(@opentelemetry/api@1.9.0)(@playwright/test@1.55.1)(babel-plugin-macros@3.1.0)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(sass@1.77.4))(payload@3.54.0(graphql@16.11.0)(typescript@5.7.3))(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(typescript@5.7.3)
+      '@lexical/clipboard': 0.35.0
+      '@lexical/headless': 0.35.0
+      '@lexical/html': 0.35.0
+      '@lexical/link': 0.35.0
+      '@lexical/list': 0.35.0
+      '@lexical/mark': 0.35.0
+      '@lexical/react': 0.35.0(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(yjs@13.6.27)
+      '@lexical/rich-text': 0.35.0
+      '@lexical/selection': 0.35.0
+      '@lexical/table': 0.35.0
+      '@lexical/utils': 0.35.0
+      '@payloadcms/next': 3.69.0(@types/react@19.1.8)(graphql@16.11.0)(monaco-editor@0.53.0)(next@15.4.4(@babel/core@7.28.4)(@opentelemetry/api@1.9.0)(@playwright/test@1.55.1)(babel-plugin-macros@3.1.0)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(sass@1.77.4))(payload@3.69.0(graphql@16.11.0)(typescript@5.7.3))(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(typescript@5.7.3)
+      '@payloadcms/translations': 3.69.0
+      '@payloadcms/ui': 3.69.0(@types/react@19.1.8)(monaco-editor@0.53.0)(next@15.4.4(@babel/core@7.28.4)(@opentelemetry/api@1.9.0)(@playwright/test@1.55.1)(babel-plugin-macros@3.1.0)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(sass@1.77.4))(payload@3.69.0(graphql@16.11.0)(typescript@5.7.3))(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(typescript@5.7.3)
       '@types/uuid': 10.0.0
       acorn: 8.12.1
       bson-objectid: 2.0.4
@@ -8306,11 +8109,11 @@ snapshots:
       dequal: 2.0.3
       escape-html: 1.0.3
       jsox: 1.2.121
-      lexical: 0.28.0
+      lexical: 0.35.0
       mdast-util-from-markdown: 2.0.2
       mdast-util-mdx-jsx: 3.1.3
       micromark-extension-mdx-jsx: 3.0.1
-      payload: 3.54.0(graphql@16.11.0)(typescript@5.7.3)
+      payload: 3.69.0(graphql@16.11.0)(typescript@5.7.3)
       qs-esm: 7.0.2
       react: 19.1.0
       react-dom: 19.1.0(react@19.1.0)
@@ -8325,28 +8128,28 @@ snapshots:
       - typescript
       - yjs
 
-  '@payloadcms/translations@3.54.0':
+  '@payloadcms/translations@3.69.0':
     dependencies:
       date-fns: 4.1.0
 
-  '@payloadcms/ui@3.54.0(@types/react@19.1.8)(monaco-editor@0.53.0)(next@15.4.4(@babel/core@7.28.4)(@opentelemetry/api@1.9.0)(@playwright/test@1.55.1)(babel-plugin-macros@3.1.0)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(sass@1.77.4))(payload@3.54.0(graphql@16.11.0)(typescript@5.7.3))(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(typescript@5.7.3)':
+  '@payloadcms/ui@3.69.0(@types/react@19.1.8)(monaco-editor@0.53.0)(next@15.4.4(@babel/core@7.28.4)(@opentelemetry/api@1.9.0)(@playwright/test@1.55.1)(babel-plugin-macros@3.1.0)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(sass@1.77.4))(payload@3.69.0(graphql@16.11.0)(typescript@5.7.3))(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(typescript@5.7.3)':
     dependencies:
       '@date-fns/tz': 1.2.0
       '@dnd-kit/core': 6.0.8(react-dom@19.1.0(react@19.1.0))(react@19.1.0)
       '@dnd-kit/sortable': 7.0.2(@dnd-kit/core@6.0.8(react-dom@19.1.0(react@19.1.0))(react@19.1.0))(react@19.1.0)
       '@dnd-kit/utilities': 3.2.2(react@19.1.0)
-      '@faceless-ui/modal': 3.0.0-beta.2(react-dom@19.1.0(react@19.1.0))(react@19.1.0)
+      '@faceless-ui/modal': 3.0.0(react-dom@19.1.0(react@19.1.0))(react@19.1.0)
       '@faceless-ui/scroll-info': 2.0.0(react-dom@19.1.0(react@19.1.0))(react@19.1.0)
       '@faceless-ui/window-info': 3.0.1(react-dom@19.1.0(react@19.1.0))(react@19.1.0)
       '@monaco-editor/react': 4.7.0(monaco-editor@0.53.0)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)
-      '@payloadcms/translations': 3.54.0
+      '@payloadcms/translations': 3.69.0
       bson-objectid: 2.0.4
       date-fns: 4.1.0
       dequal: 2.0.3
       md5: 2.3.0
       next: 15.4.4(@babel/core@7.28.4)(@opentelemetry/api@1.9.0)(@playwright/test@1.55.1)(babel-plugin-macros@3.1.0)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(sass@1.77.4)
       object-to-formdata: 4.5.1
-      payload: 3.54.0(graphql@16.11.0)(typescript@5.7.3)
+      payload: 3.69.0(graphql@16.11.0)(typescript@5.7.3)
       qs-esm: 7.0.2
       react: 19.1.0
       react-datepicker: 7.6.0(react-dom@19.1.0(react@19.1.0))(react@19.1.0)
@@ -8364,6 +8167,8 @@ snapshots:
       - supports-color
       - typescript
 
+  '@pinojs/redact@0.4.0': {}
+
   '@pkgjs/parseargs@0.11.0':
     optional: true
 
@@ -9135,13 +8940,13 @@ snapshots:
       chai: 5.3.3
       tinyrainbow: 2.0.0
 
-  '@vitest/mocker@3.2.4(vite@7.1.7(@types/node@22.18.6)(jiti@2.6.1)(lightningcss@1.30.1)(sass@1.77.4)(tsx@4.20.3)(yaml@2.8.1))':
+  '@vitest/mocker@3.2.4(vite@7.1.7(@types/node@22.18.6)(jiti@2.6.1)(lightningcss@1.30.1)(sass@1.77.4)(tsx@4.20.6)(yaml@2.8.1))':
     dependencies:
       '@vitest/spy': 3.2.4
       estree-walker: 3.0.3
       magic-string: 0.30.19
     optionalDependencies:
-      vite: 7.1.7(@types/node@22.18.6)(jiti@2.6.1)(lightningcss@1.30.1)(sass@1.77.4)(tsx@4.20.3)(yaml@2.8.1)
+      vite: 7.1.7(@types/node@22.18.6)(jiti@2.6.1)(lightningcss@1.30.1)(sass@1.77.4)(tsx@4.20.6)(yaml@2.8.1)
 
   '@vitest/pretty-format@3.2.4':
     dependencies:
@@ -9982,7 +9787,7 @@ snapshots:
 
   dotenv@17.2.3: {}
 
-  drizzle-kit@0.31.4:
+  drizzle-kit@0.31.7:
     dependencies:
       '@drizzle-team/brocli': 0.10.2
       '@esbuild-kit/esm-loader': 2.6.5
@@ -9991,14 +9796,14 @@ snapshots:
     transitivePeerDependencies:
       - supports-color
 
-  drizzle-orm@0.44.2(@libsql/client@0.14.0)(@opentelemetry/api@1.9.0)(@types/pg@8.10.2)(pg@8.16.3):
+  drizzle-orm@0.44.7(@libsql/client@0.14.0)(@opentelemetry/api@1.9.0)(@types/pg@8.10.2)(pg@8.16.3):
     optionalDependencies:
       '@libsql/client': 0.14.0
       '@opentelemetry/api': 1.9.0
       '@types/pg': 8.10.2
       pg: 8.16.3
 
-  drizzle-orm@0.44.2(@libsql/client@0.14.0)(@opentelemetry/api@1.9.0)(@types/pg@8.15.5)(pg@8.16.3):
+  drizzle-orm@0.44.7(@libsql/client@0.14.0)(@opentelemetry/api@1.9.0)(@types/pg@8.15.5)(pg@8.16.3):
     optionalDependencies:
       '@libsql/client': 0.14.0
       '@opentelemetry/api': 1.9.0
@@ -10169,33 +9974,6 @@ snapshots:
       '@esbuild/win32-ia32': 0.18.20
       '@esbuild/win32-x64': 0.18.20
 
-  esbuild@0.23.1:
-    optionalDependencies:
-      '@esbuild/aix-ppc64': 0.23.1
-      '@esbuild/android-arm': 0.23.1
-      '@esbuild/android-arm64': 0.23.1
-      '@esbuild/android-x64': 0.23.1
-      '@esbuild/darwin-arm64': 0.23.1
-      '@esbuild/darwin-x64': 0.23.1
-      '@esbuild/freebsd-arm64': 0.23.1
-      '@esbuild/freebsd-x64': 0.23.1
-      '@esbuild/linux-arm': 0.23.1
-      '@esbuild/linux-arm64': 0.23.1
-      '@esbuild/linux-ia32': 0.23.1
-      '@esbuild/linux-loong64': 0.23.1
-      '@esbuild/linux-mips64el': 0.23.1
-      '@esbuild/linux-ppc64': 0.23.1
-      '@esbuild/linux-riscv64': 0.23.1
-      '@esbuild/linux-s390x': 0.23.1
-      '@esbuild/linux-x64': 0.23.1
-      '@esbuild/netbsd-x64': 0.23.1
-      '@esbuild/openbsd-arm64': 0.23.1
-      '@esbuild/openbsd-x64': 0.23.1
-      '@esbuild/sunos-x64': 0.23.1
-      '@esbuild/win32-arm64': 0.23.1
-      '@esbuild/win32-ia32': 0.23.1
-      '@esbuild/win32-x64': 0.23.1
-
   esbuild@0.25.10:
     optionalDependencies:
       '@esbuild/aix-ppc64': 0.25.10
@@ -10777,8 +10555,6 @@ snapshots:
 
   fast-levenshtein@2.0.6: {}
 
-  fast-redact@3.5.0: {}
-
   fast-safe-stringify@2.1.1: {}
 
   fast-uri@3.1.0: {}
@@ -11729,7 +11505,7 @@ snapshots:
       prelude-ls: 1.2.1
       type-check: 0.4.0
 
-  lexical@0.28.0: {}
+  lexical@0.35.0: {}
 
   lib0@0.2.114:
     dependencies:
@@ -12350,10 +12126,10 @@ snapshots:
 
   pathval@2.0.1: {}
 
-  payload@3.54.0(graphql@16.11.0)(typescript@5.7.3):
+  payload@3.69.0(graphql@16.11.0)(typescript@5.7.3):
     dependencies:
       '@next/env': 15.5.4
-      '@payloadcms/translations': 3.54.0
+      '@payloadcms/translations': 3.69.0
       '@types/busboy': 1.5.4
       ajv: 8.17.1
       bson-objectid: 2.0.4
@@ -12373,10 +12149,11 @@ snapshots:
       json-schema-to-typescript: 15.0.3
       minimist: 1.2.8
       path-to-regexp: 6.3.0
-      pino: 9.5.0
-      pino-pretty: 13.0.0
+      pino: 9.14.0
+      pino-pretty: 13.1.2
       pluralize: 8.0.0
       qs-esm: 7.0.2
+      range-parser: 1.2.1
       sanitize-filename: 1.6.3
       scmp: 2.1.0
       ts-essentials: 10.0.3(typescript@5.7.3)
@@ -12450,7 +12227,7 @@ snapshots:
     dependencies:
       split2: 4.2.0
 
-  pino-pretty@13.0.0:
+  pino-pretty@13.1.2:
     dependencies:
       colorette: 2.0.20
       dateformat: 4.6.3
@@ -12462,20 +12239,20 @@ snapshots:
       on-exit-leak-free: 2.1.2
       pino-abstract-transport: 2.0.0
       pump: 3.0.3
-      secure-json-parse: 2.7.0
+      secure-json-parse: 4.1.0
       sonic-boom: 4.2.0
-      strip-json-comments: 3.1.1
+      strip-json-comments: 5.0.3
 
   pino-std-serializers@7.0.0: {}
 
-  pino@9.5.0:
+  pino@9.14.0:
     dependencies:
+      '@pinojs/redact': 0.4.0
       atomic-sleep: 1.0.0
-      fast-redact: 3.5.0
       on-exit-leak-free: 2.1.2
       pino-abstract-transport: 2.0.0
       pino-std-serializers: 7.0.0
-      process-warning: 4.0.1
+      process-warning: 5.0.0
       quick-format-unescaped: 4.0.4
       real-require: 0.2.0
       safe-stable-stringify: 2.5.0
@@ -12554,7 +12331,7 @@ snapshots:
 
   process-nextick-args@2.0.1: {}
 
-  process-warning@4.0.1: {}
+  process-warning@5.0.0: {}
 
   process@0.11.10: {}
 
@@ -12617,6 +12394,8 @@ snapshots:
 
   quick-lru@5.1.1: {}
 
+  range-parser@1.2.1: {}
+
   react-datepicker@7.6.0(react-dom@19.1.0(react@19.1.0))(react@19.1.0):
     dependencies:
       '@floating-ui/react': 0.27.16(react-dom@19.1.0(react@19.1.0))(react@19.1.0)
@@ -12871,7 +12650,7 @@ snapshots:
       refa: 0.12.1
       regexp-ast-analysis: 0.7.1
 
-  secure-json-parse@2.7.0: {}
+  secure-json-parse@4.1.0: {}
 
   seek-bzip@2.0.0:
     dependencies:
@@ -13220,6 +12999,8 @@ snapshots:
 
   strip-json-comments@3.1.1: {}
 
+  strip-json-comments@5.0.3: {}
+
   strip-literal@3.0.0:
     dependencies:
       js-tokens: 9.0.1
@@ -13433,14 +13214,14 @@ snapshots:
 
   tslib@2.8.1: {}
 
-  tsx@4.19.2:
+  tsx@4.20.3:
     dependencies:
-      esbuild: 0.23.1
+      esbuild: 0.25.10
       get-tsconfig: 4.10.1
     optionalDependencies:
       fsevents: 2.3.3
 
-  tsx@4.20.3:
+  tsx@4.20.6:
     dependencies:
       esbuild: 0.25.10
       get-tsconfig: 4.10.1
@@ -13618,13 +13399,13 @@ snapshots:
       '@types/unist': 3.0.3
       unist-util-stringify-position: 4.0.0
 
-  vite-node@3.2.4(@types/node@22.18.6)(jiti@2.6.1)(lightningcss@1.30.1)(sass@1.77.4)(tsx@4.20.3)(yaml@2.8.1):
+  vite-node@3.2.4(@types/node@22.18.6)(jiti@2.6.1)(lightningcss@1.30.1)(sass@1.77.4)(tsx@4.20.6)(yaml@2.8.1):
     dependencies:
       cac: 6.7.14
       debug: 4.4.3
       es-module-lexer: 1.7.0
       pathe: 2.0.3
-      vite: 7.1.7(@types/node@22.18.6)(jiti@2.6.1)(lightningcss@1.30.1)(sass@1.77.4)(tsx@4.20.3)(yaml@2.8.1)
+      vite: 7.1.7(@types/node@22.18.6)(jiti@2.6.1)(lightningcss@1.30.1)(sass@1.77.4)(tsx@4.20.6)(yaml@2.8.1)
     transitivePeerDependencies:
       - '@types/node'
       - jiti
@@ -13639,18 +13420,18 @@ snapshots:
       - tsx
       - yaml
 
-  vite-tsconfig-paths@5.1.4(typescript@5.7.3)(vite@7.1.7(@types/node@22.18.6)(jiti@2.6.1)(lightningcss@1.30.1)(sass@1.77.4)(tsx@4.20.3)(yaml@2.8.1)):
+  vite-tsconfig-paths@5.1.4(typescript@5.7.3)(vite@7.1.7(@types/node@22.18.6)(jiti@2.6.1)(lightningcss@1.30.1)(sass@1.77.4)(tsx@4.20.6)(yaml@2.8.1)):
     dependencies:
       debug: 4.4.3
       globrex: 0.1.2
       tsconfck: 3.1.6(typescript@5.7.3)
     optionalDependencies:
-      vite: 7.1.7(@types/node@22.18.6)(jiti@2.6.1)(lightningcss@1.30.1)(sass@1.77.4)(tsx@4.20.3)(yaml@2.8.1)
+      vite: 7.1.7(@types/node@22.18.6)(jiti@2.6.1)(lightningcss@1.30.1)(sass@1.77.4)(tsx@4.20.6)(yaml@2.8.1)
     transitivePeerDependencies:
       - supports-color
       - typescript
 
-  vite@7.1.7(@types/node@22.18.6)(jiti@2.6.1)(lightningcss@1.30.1)(sass@1.77.4)(tsx@4.20.3)(yaml@2.8.1):
+  vite@7.1.7(@types/node@22.18.6)(jiti@2.6.1)(lightningcss@1.30.1)(sass@1.77.4)(tsx@4.20.6)(yaml@2.8.1):
     dependencies:
       esbuild: 0.25.10
       fdir: 6.5.0(picomatch@4.0.3)
@@ -13664,14 +13445,14 @@ snapshots:
       jiti: 2.6.1
       lightningcss: 1.30.1
       sass: 1.77.4
-      tsx: 4.20.3
+      tsx: 4.20.6
       yaml: 2.8.1
 
-  vitest@3.2.4(@types/debug@4.1.12)(@types/node@22.18.6)(jiti@2.6.1)(lightningcss@1.30.1)(sass@1.77.4)(tsx@4.20.3)(yaml@2.8.1):
+  vitest@3.2.4(@types/debug@4.1.12)(@types/node@22.18.6)(jiti@2.6.1)(lightningcss@1.30.1)(sass@1.77.4)(tsx@4.20.6)(yaml@2.8.1):
     dependencies:
       '@types/chai': 5.2.2
       '@vitest/expect': 3.2.4
-      '@vitest/mocker': 3.2.4(vite@7.1.7(@types/node@22.18.6)(jiti@2.6.1)(lightningcss@1.30.1)(sass@1.77.4)(tsx@4.20.3)(yaml@2.8.1))
+      '@vitest/mocker': 3.2.4(vite@7.1.7(@types/node@22.18.6)(jiti@2.6.1)(lightningcss@1.30.1)(sass@1.77.4)(tsx@4.20.6)(yaml@2.8.1))
       '@vitest/pretty-format': 3.2.4
       '@vitest/runner': 3.2.4
       '@vitest/snapshot': 3.2.4
@@ -13689,8 +13470,8 @@ snapshots:
       tinyglobby: 0.2.15
       tinypool: 1.1.1
       tinyrainbow: 2.0.0
-      vite: 7.1.7(@types/node@22.18.6)(jiti@2.6.1)(lightningcss@1.30.1)(sass@1.77.4)(tsx@4.20.3)(yaml@2.8.1)
-      vite-node: 3.2.4(@types/node@22.18.6)(jiti@2.6.1)(lightningcss@1.30.1)(sass@1.77.4)(tsx@4.20.3)(yaml@2.8.1)
+      vite: 7.1.7(@types/node@22.18.6)(jiti@2.6.1)(lightningcss@1.30.1)(sass@1.77.4)(tsx@4.20.6)(yaml@2.8.1)
+      vite-node: 3.2.4(@types/node@22.18.6)(jiti@2.6.1)(lightningcss@1.30.1)(sass@1.77.4)(tsx@4.20.6)(yaml@2.8.1)
       why-is-node-running: 2.3.0
     optionalDependencies:
       '@types/debug': 4.1.12
diff --git a/src/tasks/bulkEmbedAll.ts b/src/tasks/bulkEmbedAll.ts
index 0064843..5ca6714 100644
--- a/src/tasks/bulkEmbedAll.ts
+++ b/src/tasks/bulkEmbedAll.ts
@@ -239,23 +239,23 @@ export const createPollOrCompleteBulkEmbeddingTask = ({
 
       // Poll once
       const pollResult = await callbacks.pollBulkEmbeddings({
-        payload,
-        knowledgePool: poolName,
-        providerBatchId,
-      })
+          payload,
+          knowledgePool: poolName,
+          providerBatchId,
+        })
 
       const newStatus = pollResult.status
-      await payload.update({
-        id: input.runId,
-        collection: BULK_EMBEDDINGS_RUNS_SLUG,
-        data: {
+        await payload.update({
+          id: input.runId,
+          collection: BULK_EMBEDDINGS_RUNS_SLUG,
+          data: {
           status: newStatus,
           inputs: pollResult.counts?.inputs,
-          succeeded: pollResult.counts?.succeeded,
-          failed: pollResult.counts?.failed,
-          error: pollResult.error,
-        },
-      })
+            succeeded: pollResult.counts?.succeeded,
+            failed: pollResult.counts?.failed,
+            error: pollResult.error,
+          },
+        })
 
       // If still not terminal, requeue this task
       if (!TERMINAL_STATUSES.has(newStatus)) {
@@ -282,9 +282,9 @@ export const createPollOrCompleteBulkEmbeddingTask = ({
 
       // Success - complete the embeddings
       const completion = (await callbacks.completeBulkEmbeddings({
-        payload,
-        knowledgePool: poolName,
-        providerBatchId,
+          payload,
+          knowledgePool: poolName,
+          providerBatchId,
       })) || { status: newStatus, outputs: [] }
 
       const outputs = completion.outputs || []
diff --git a/src/types.ts b/src/types.ts
index b0ec452..e31f1f2 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -55,10 +55,10 @@ export type BulkEmbeddingInput = {
 
 /** Internal metadata we persist per input to rebuild embeddings after provider returns outputs */
 export type BulkEmbeddingInputMetadata = {
-  sourceCollection: string
-  docId: string
-  chunkIndex: number
-  embeddingVersion: string
+    sourceCollection: string
+    docId: string
+    chunkIndex: number
+    embeddingVersion: string
   /** Arbitrary extension fields returned by toKnowledgePool */
   extensionFields?: Record<string, any>
 }

From ec579d27aa20360fac8d318689a4f4e875989e21 Mon Sep 17 00:00:00 2001
From: techiejd <62455039+techiejd@users.noreply.github.com>
Date: Sat, 3 Jan 2026 22:26:09 +0700
Subject: [PATCH 10/49] Uses new embeddingConfig API

---
 dev/helpers/embed.ts                          |  4 +-
 dev/payload.config.ts                         | 46 +++++++++++++++++--
 dev/specs/bulkEmbed.spec.ts                   |  2 +-
 dev/specs/constants.ts                        |  8 ++--
 dev/specs/extensionFields.spec.ts             |  8 ++--
 dev/specs/extensionFieldsVectorSearch.spec.ts |  8 ++--
 dev/specs/failedValidation.spec.ts            |  8 ++--
 dev/specs/multipools.spec.ts                  | 18 +++++---
 dev/specs/queueName.spec.ts                   | 10 ++--
 dev/specs/schemaName.spec.ts                  | 16 ++++---
 dev/specs/utils.ts                            |  5 +-
 dev/specs/vectorSearch.spec.ts                | 16 ++++---
 src/tasks/bulkEmbedAll.ts                     | 34 +++++++-------
 src/tasks/vectorize.ts                        |  4 +-
 src/types.ts                                  | 40 ++++++++++------
 15 files changed, 148 insertions(+), 79 deletions(-)

diff --git a/dev/helpers/embed.ts b/dev/helpers/embed.ts
index 29fc53b..c83e0a9 100644
--- a/dev/helpers/embed.ts
+++ b/dev/helpers/embed.ts
@@ -4,7 +4,7 @@ import type {
   BulkEmbeddingInput,
   BulkEmbeddingOutput,
   BulkEmbeddingRunStatus,
-  BulkEmbeddingsConfig,
+  BulkEmbeddingsFns,
 } from 'payloadcms-vectorize'
 
 export const voyageEmbedDocs = async (texts: string[]): Promise<number[][]> => {
@@ -62,7 +62,7 @@ export function makeDummyEmbedDocs(dims: number) {
 export const testEmbeddingVersion = 'test-v1'
 
 // Real Voyage Batch API implementation
-export function makeVoyageBulkEmbeddingsConfig(): BulkEmbeddingsConfig {
+export function makeVoyageBulkEmbeddingsConfig(): BulkEmbeddingsFns {
   // Store batch state in memory for dev purposes
   const batchState = new Map<
     string,
diff --git a/dev/payload.config.ts b/dev/payload.config.ts
index d8a2090..6b3eaa6 100644
--- a/dev/payload.config.ts
+++ b/dev/payload.config.ts
@@ -48,6 +48,10 @@ const { afterSchemaInitHook, payloadcmsVectorize } = createVectorizeIntegration(
     dims,
     ivfflatLists, // Rule of thumb: ivfflatLists = sqrt(total_number_of_vectors). Helps with working memory usage.
   },
+  bulkDefault: {
+    dims,
+    ivfflatLists, // Another rule of thumb: ivfflatLists = total_number_of_vectors / 1000. Helps with working memory usage.
+  },
 })
 
 const buildConfigWithPostgres = async () => {
@@ -88,7 +92,12 @@ const buildConfigWithPostgres = async () => {
         {
           cron: '*/10 * * * * *', // Run every 10 seconds for bulk jobs
           limit: 5,
-          queue: 'vectorize-bulk',
+          queue: 'vectorize-bulk-prepare',
+        },
+        {
+          cron: '*/10 * * * * *', // Run every 10 seconds for bulk jobs
+          limit: 5,
+          queue: 'vectorize-bulk-poll',
         },
       ],
       jobsCollectionOverrides: ({ defaultJobsCollection }) => {
@@ -125,10 +134,37 @@ const buildConfigWithPostgres = async () => {
                 },
               },
             },
-            embedDocs,
-            embedQuery,
-            embeddingVersion: testEmbeddingVersion,
-            bulkEmbeddings: makeVoyageBulkEmbeddingsConfig(),
+            embeddingConfig: {
+              version: testEmbeddingVersion,
+              queryFn: embedQuery,
+              realTimeIngestionFn: embedDocs,
+              bulkEmbeddingsFns: makeVoyageBulkEmbeddingsConfig(),
+            },
+          },
+          bulkDefault: {
+            collections: {
+              posts: {
+                toKnowledgePool: async (doc, payload) => {
+                  const chunks: Array<{ chunk: string }> = []
+                  // Process title
+                  if (doc.title) {
+                    const titleChunks = chunkText(doc.title)
+                    chunks.push(...titleChunks.map((chunk) => ({ chunk })))
+                  }
+                  // Process content
+                  if (doc.content) {
+                    const contentChunks = await chunkRichText(doc.content, payload)
+                    chunks.push(...contentChunks.map((chunk) => ({ chunk })))
+                  }
+                  return chunks
+                },
+              },
+            },
+            embeddingConfig: {
+              version: testEmbeddingVersion,
+              queryFn: embedQuery,
+              bulkEmbeddingsFns: makeVoyageBulkEmbeddingsConfig(),
+            },
           },
         },
         bulkQueueNames: {
diff --git a/dev/specs/bulkEmbed.spec.ts b/dev/specs/bulkEmbed.spec.ts
index 34b4c77..1d7ac7f 100644
--- a/dev/specs/bulkEmbed.spec.ts
+++ b/dev/specs/bulkEmbed.spec.ts
@@ -14,7 +14,7 @@ import {
   waitForBulkJobs,
 } from './utils.js'
 import { makeDummyEmbedDocs, makeDummyEmbedQuery, testEmbeddingVersion } from 'helpers/embed.js'
-import type { BulkEmbeddingsConfig } from '../../src/types.js'
+import type { BulkEmbeddingsFns } from '../../src/types.js'
 
 const DIMS = DEFAULT_DIMS
 
diff --git a/dev/specs/constants.ts b/dev/specs/constants.ts
index ad5aafb..47e5784 100644
--- a/dev/specs/constants.ts
+++ b/dev/specs/constants.ts
@@ -61,9 +61,11 @@ export const dummyPluginOptions = {
   knowledgePools: {
     default: {
       collections: {},
-      embedDocs: async (texts: string[]) => texts.map(() => [0, 0, 0, 0, 0, 0, 0, 0]),
-      embedQuery: async (text: string) => [0, 0, 0, 0, 0, 0, 0, 0],
-      embeddingVersion: 'test',
+      embeddingConfig: {
+        version: 'test',
+        queryFn: async (text: string) => [0, 0, 0, 0, 0, 0, 0, 0],
+        realTimeIngestionFn: async (texts: string[]) => texts.map(() => [0, 0, 0, 0, 0, 0, 0, 0]),
+      },
     },
   },
   queueNameOrCronJob: vectorizeCronJob,
diff --git a/dev/specs/extensionFields.spec.ts b/dev/specs/extensionFields.spec.ts
index 762cfce..56ee27a 100644
--- a/dev/specs/extensionFields.spec.ts
+++ b/dev/specs/extensionFields.spec.ts
@@ -94,9 +94,11 @@ describe('Extension fields integration tests', () => {
                   },
                 },
               ],
-              embedDocs: makeDummyEmbedDocs(DIMS),
-              embedQuery: makeDummyEmbedQuery(DIMS),
-              embeddingVersion: testEmbeddingVersion,
+              embeddingConfig: {
+                version: testEmbeddingVersion,
+                queryFn: makeDummyEmbedQuery(DIMS),
+                realTimeIngestionFn: makeDummyEmbedDocs(DIMS),
+              },
             },
           },
         }),
diff --git a/dev/specs/extensionFieldsVectorSearch.spec.ts b/dev/specs/extensionFieldsVectorSearch.spec.ts
index 4c1cc59..437fc9c 100644
--- a/dev/specs/extensionFieldsVectorSearch.spec.ts
+++ b/dev/specs/extensionFieldsVectorSearch.spec.ts
@@ -59,9 +59,11 @@ describe('extensionFields', () => {
           },
         },
       ],
-      embedDocs: makeDummyEmbedDocs(DIMS),
-      embedQuery: makeDummyEmbedQuery(DIMS),
-      embeddingVersion: testEmbeddingVersion,
+      embeddingConfig: {
+        version: testEmbeddingVersion,
+        queryFn: makeDummyEmbedQuery(DIMS),
+        realTimeIngestionFn: makeDummyEmbedDocs(DIMS),
+      },
     } as const
     const configWithExtensions = await buildDummyConfig({
       jobs: {
diff --git a/dev/specs/failedValidation.spec.ts b/dev/specs/failedValidation.spec.ts
index d1a86e3..c06e2a3 100644
--- a/dev/specs/failedValidation.spec.ts
+++ b/dev/specs/failedValidation.spec.ts
@@ -53,9 +53,11 @@ const buildMalformedConfig = async () =>
                 toKnowledgePool: async () => [{ chunk: 'ok chunk' }, { bad: 'oops' } as any],
               },
             },
-            embedDocs,
-            embedQuery,
-            embeddingVersion: 'malformed-test',
+            embeddingConfig: {
+              version: 'malformed-test',
+              queryFn: embedQuery,
+              realTimeIngestionFn: embedDocs,
+            },
           },
         },
       }),
diff --git a/dev/specs/multipools.spec.ts b/dev/specs/multipools.spec.ts
index c90c073..7288132 100644
--- a/dev/specs/multipools.spec.ts
+++ b/dev/specs/multipools.spec.ts
@@ -34,15 +34,21 @@ describe('Multiple knowledge pools', () => {
       knowledgePools: {
         pool1: {
           collections: {},
-          embedDocs: async (texts: string[]) => texts.map(() => new Array(DIMS_POOL1).fill(0)),
-          embedQuery: async () => new Array(DIMS_POOL1).fill(0),
-          embeddingVersion: 'test-pool1',
+          embeddingConfig: {
+            version: 'test-pool1',
+            queryFn: async () => new Array(DIMS_POOL1).fill(0),
+            realTimeIngestionFn: async (texts: string[]) =>
+              texts.map(() => new Array(DIMS_POOL1).fill(0)),
+          },
         },
         pool2: {
           collections: {},
-          embedDocs: async (texts: string[]) => texts.map(() => new Array(DIMS_POOL2).fill(0)),
-          embedQuery: async () => new Array(DIMS_POOL2).fill(0),
-          embeddingVersion: 'test-pool2',
+          embeddingConfig: {
+            version: 'test-pool2',
+            queryFn: async () => new Array(DIMS_POOL2).fill(0),
+            realTimeIngestionFn: async (texts: string[]) =>
+              texts.map(() => new Array(DIMS_POOL2).fill(0)),
+          },
         },
       },
     }
diff --git a/dev/specs/queueName.spec.ts b/dev/specs/queueName.spec.ts
index 36fcdbc..887a1c0 100644
--- a/dev/specs/queueName.spec.ts
+++ b/dev/specs/queueName.spec.ts
@@ -34,7 +34,7 @@ describe('Queue tests', () => {
       }),
       plugins: [
         plugin({
-          queueName: expectedQueueName,
+          realtimeQueueName: expectedQueueName,
           knowledgePools: {
             default: {
               collections: {
@@ -55,9 +55,11 @@ describe('Queue tests', () => {
                   },
                 },
               },
-              embedDocs: async () => [[0, 0, 0, 0, 0, 0, 0, 0]],
-              embedQuery: async () => [0, 0, 0, 0, 0, 0, 0, 0],
-              embeddingVersion: 'test',
+              embeddingConfig: {
+                version: 'test',
+                queryFn: async () => [0, 0, 0, 0, 0, 0, 0, 0],
+                realTimeIngestionFn: async () => [[0, 0, 0, 0, 0, 0, 0, 0]],
+              },
             },
           },
         }),
diff --git a/dev/specs/schemaName.spec.ts b/dev/specs/schemaName.spec.ts
index 6023e92..eb4074d 100644
--- a/dev/specs/schemaName.spec.ts
+++ b/dev/specs/schemaName.spec.ts
@@ -74,9 +74,11 @@ describe('Custom schemaName support', () => {
                   },
                 },
               },
-              embeddingVersion: testEmbeddingVersion,
-              embedDocs: makeDummyEmbedDocs(DIMS),
-              embedQuery: makeDummyEmbedQuery(DIMS),
+              embeddingConfig: {
+                version: testEmbeddingVersion,
+                queryFn: makeDummyEmbedQuery(DIMS),
+                realTimeIngestionFn: makeDummyEmbedDocs(DIMS),
+              },
             },
           },
         }),
@@ -168,9 +170,11 @@ describe('Custom schemaName support', () => {
     const knowledgePools: Record<string, KnowledgePoolDynamicConfig> = {
       default: {
         collections: {},
-        embedDocs: makeDummyEmbedDocs(DIMS),
-        embedQuery: makeDummyEmbedQuery(DIMS),
-        embeddingVersion: testEmbeddingVersion,
+        embeddingConfig: {
+          version: testEmbeddingVersion,
+          queryFn: makeDummyEmbedQuery(DIMS),
+          realTimeIngestionFn: makeDummyEmbedDocs(DIMS),
+        },
       },
     }
     const searchHandler = createVectorSearchHandler(knowledgePools)
diff --git a/dev/specs/utils.ts b/dev/specs/utils.ts
index f7eb317..f9e541c 100644
--- a/dev/specs/utils.ts
+++ b/dev/specs/utils.ts
@@ -9,7 +9,7 @@ import { BULK_EMBEDDINGS_RUNS_SLUG } from '../../src/collections/bulkEmbeddingsR
 import { BULK_EMBEDDINGS_INPUT_METADATA_SLUG } from '../../src/collections/bulkEmbeddingInputMetadata.js'
 import { makeDummyEmbedDocs } from 'helpers/embed.js'
 import type {
-  BulkEmbeddingsConfig,
+  BulkEmbeddingsFns,
   BulkEmbeddingInput,
   BulkEmbeddingRunStatus,
 } from '../../src/types.js'
@@ -81,14 +81,13 @@ type MockOptions = {
 export function createMockBulkEmbeddings(
   options: MockOptions,
   dims: number = DEFAULT_DIMS,
-): BulkEmbeddingsConfig {
+): BulkEmbeddingsFns {
   const { statusSequence, partialFailure } = options
   let callCount = 0
   let lastInputs: BulkEmbeddingInput[] = []
   const embeddings = makeDummyEmbedDocs(dims)
 
   return {
-    ingestMode: 'bulk',
     prepareBulkEmbeddings: async ({ inputs }) => {
       lastInputs = inputs
       return {
diff --git a/dev/specs/vectorSearch.spec.ts b/dev/specs/vectorSearch.spec.ts
index 6077947..daa6e71 100644
--- a/dev/specs/vectorSearch.spec.ts
+++ b/dev/specs/vectorSearch.spec.ts
@@ -30,9 +30,11 @@ async function performVectorSearch(
   const knowledgePools: Record<string, KnowledgePoolDynamicConfig> = {
     default: {
       collections: {},
-      embedDocs: makeDummyEmbedDocs(DIMS),
-      embedQuery: embedFn,
-      embeddingVersion: testEmbeddingVersion,
+      embeddingConfig: {
+        version: testEmbeddingVersion,
+        queryFn: makeDummyEmbedQuery(DIMS),
+        realTimeIngestionFn: makeDummyEmbedDocs(DIMS),
+      },
     },
   }
   const searchHandler = createVectorSearchHandler(knowledgePools)
@@ -106,9 +108,11 @@ describe('Search endpoint integration tests', () => {
                   },
                 },
               },
-              embedDocs: makeDummyEmbedDocs(DIMS),
-              embedQuery: makeDummyEmbedQuery(DIMS),
-              embeddingVersion: testEmbeddingVersion,
+              embeddingConfig: {
+                version: testEmbeddingVersion,
+                queryFn: makeDummyEmbedQuery(DIMS),
+                realTimeIngestionFn: makeDummyEmbedDocs(DIMS),
+              },
             },
           },
         }),
diff --git a/src/tasks/bulkEmbedAll.ts b/src/tasks/bulkEmbedAll.ts
index 5ca6714..447c4e8 100644
--- a/src/tasks/bulkEmbedAll.ts
+++ b/src/tasks/bulkEmbedAll.ts
@@ -1,7 +1,7 @@
 import { Payload, TaskConfig, TaskHandlerResult } from 'payload'
 import {
   BulkEmbeddingInput,
-  BulkEmbeddingsConfig,
+  BulkEmbeddingsFns,
   CollectedEmbeddingInput,
   KnowledgePoolDynamicConfig,
   KnowledgePoolName,
@@ -239,23 +239,23 @@ export const createPollOrCompleteBulkEmbeddingTask = ({
 
       // Poll once
       const pollResult = await callbacks.pollBulkEmbeddings({
-          payload,
-          knowledgePool: poolName,
-          providerBatchId,
-        })
+        payload,
+        knowledgePool: poolName,
+        providerBatchId,
+      })
 
       const newStatus = pollResult.status
-        await payload.update({
-          id: input.runId,
-          collection: BULK_EMBEDDINGS_RUNS_SLUG,
-          data: {
+      await payload.update({
+        id: input.runId,
+        collection: BULK_EMBEDDINGS_RUNS_SLUG,
+        data: {
           status: newStatus,
           inputs: pollResult.counts?.inputs,
-            succeeded: pollResult.counts?.succeeded,
-            failed: pollResult.counts?.failed,
-            error: pollResult.error,
-          },
-        })
+          succeeded: pollResult.counts?.succeeded,
+          failed: pollResult.counts?.failed,
+          error: pollResult.error,
+        },
+      })
 
       // If still not terminal, requeue this task
       if (!TERMINAL_STATUSES.has(newStatus)) {
@@ -282,9 +282,9 @@ export const createPollOrCompleteBulkEmbeddingTask = ({
 
       // Success - complete the embeddings
       const completion = (await callbacks.completeBulkEmbeddings({
-          payload,
-          knowledgePool: poolName,
-          providerBatchId,
+        payload,
+        knowledgePool: poolName,
+        providerBatchId,
       })) || { status: newStatus, outputs: [] }
 
       const outputs = completion.outputs || []
diff --git a/src/tasks/vectorize.ts b/src/tasks/vectorize.ts
index 975be76..0e497f2 100644
--- a/src/tasks/vectorize.ts
+++ b/src/tasks/vectorize.ts
@@ -72,7 +72,7 @@ async function runVectorizeTask(args: {
   }
 }) {
   const { payload, poolName, dynamicConfig, job } = args
-  const embeddingVersion = dynamicConfig.embeddingVersion
+  const embeddingVersion = dynamicConfig.embeddingConfig.version
   const sourceDoc = job.doc
   const collection = job.collection
   const collectionConfig = dynamicConfig.collections[collection]
@@ -136,7 +136,7 @@ async function runVectorizeTask(args: {
 
   // Extract chunk texts for embedding
   const chunkTexts = chunkData.map((item) => item.chunk)
-  const vectors = await dynamicConfig.embedDocs(chunkTexts)
+  const vectors = await dynamicConfig.embeddingConfig.realTimeIngestionFn!(chunkTexts)
 
   // Create embedding documents with extension field values
   await Promise.all(
diff --git a/src/types.ts b/src/types.ts
index e31f1f2..75783b0 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -32,16 +32,28 @@ export type KnowledgePoolStaticConfig = {
 export type KnowledgePoolDynamicConfig = {
   /** Collections and fields to vectorize */
   collections: Partial<Record<CollectionSlug, CollectionVectorizeOption>>
-  /** Embedding function for document provided by the user */
-  embedDocs: EmbedDocsFn
-  /** Embedding function for query provided by the user */
-  embedQuery: EmbedQueryFn
-  /** Version string to track embedding model/version - stored in each embedding document */
-  embeddingVersion: string
   /** Optional fields to extend the knowledge pool collection schema */
   extensionFields?: Field[]
-  /** User provided bulk embedding configuration */
-  bulkEmbeddings?: BulkEmbeddingsConfig
+  /** Embedding configuration for the knowledge pool */
+  embeddingConfig: EmbeddingConfig
+}
+
+type EmbeddingConfig = {
+  /** Version string to track embedding model/version - stored in each embedding document */
+  version: string
+  /** Embedding function for query provided by the user
+   * TODO(techiejd): Should be optional? Maybe if not provided then we can disable the search endpoint?
+   */
+  queryFn: EmbedQueryFn
+  /** Embedding function for real-time ingestion of documents provided by the user
+   * If not provided, then there is no real-time ingestion of documents provided by the user
+   */
+  realTimeIngestionFn?: EmbedDocsFn
+  /** Bulk embedding configuration provided by the user
+   * If not provided, then there bulk embedding is not available
+   */
+  bulkEmbeddingsFns?: BulkEmbeddingsFns
+  /** If both realTimeIngestionFn and bulkEmbeddingsConfig are not provided, then embedding is essentially disabled */
 }
 
 export type BulkEmbeddingRunStatus = 'queued' | 'running' | 'succeeded' | 'failed' | 'canceled'
@@ -55,10 +67,10 @@ export type BulkEmbeddingInput = {
 
 /** Internal metadata we persist per input to rebuild embeddings after provider returns outputs */
 export type BulkEmbeddingInputMetadata = {
-    sourceCollection: string
-    docId: string
-    chunkIndex: number
-    embeddingVersion: string
+  sourceCollection: string
+  docId: string
+  chunkIndex: number
+  embeddingVersion: string
   /** Arbitrary extension fields returned by toKnowledgePool */
   extensionFields?: Record<string, any>
 }
@@ -118,9 +130,7 @@ export type CompleteBulkEmbeddingsResult = {
   error?: string
 }
 
-export type BulkEmbeddingsConfig = {
-  /** Controls whether docs embed immediately or are staged for bulk runs */
-  ingestMode?: IngestMode
+export type BulkEmbeddingsFns = {
   prepareBulkEmbeddings: (
     args: PrepareBulkEmbeddingsArgs,
   ) => Promise<PrepareBulkEmbeddingsResult | void>

From 51b47895ea3bddcf5a0c692d6b10b5b33426d769 Mon Sep 17 00:00:00 2001
From: techiejd <62455039+techiejd@users.noreply.github.com>
Date: Sun, 4 Jan 2026 15:57:25 +0700
Subject: [PATCH 11/49] Removes first run onInit

---
 dev/helpers/embed.ts                   |   1 -
 dev/payload-types.ts                   | 257 +++++++++++++++++++++++-
 dev/specs/bulkEmbed.initNoBulk.spec.ts |  90 ---------
 dev/specs/bulkEmbed.spec.ts            | 268 ++++++++++++++-----------
 src/collections/embeddings.ts          |   2 +-
 src/endpoints/bulkEmbed.ts             |  24 ++-
 src/endpoints/vectorSearch.ts          |   4 +-
 src/index.ts                           |  91 ++-------
 src/tasks/bulkEmbedAll.ts              |  15 +-
 src/types.ts                           |   2 +-
 10 files changed, 449 insertions(+), 305 deletions(-)
 delete mode 100644 dev/specs/bulkEmbed.initNoBulk.spec.ts

diff --git a/dev/helpers/embed.ts b/dev/helpers/embed.ts
index c83e0a9..c4340a9 100644
--- a/dev/helpers/embed.ts
+++ b/dev/helpers/embed.ts
@@ -74,7 +74,6 @@ export function makeVoyageBulkEmbeddingsConfig(): BulkEmbeddingsFns {
   >()
 
   return {
-    ingestMode: 'bulk',
     prepareBulkEmbeddings: async ({ inputs }) => {
       try {
         // Create JSONL content for Voyage batch
diff --git a/dev/payload-types.ts b/dev/payload-types.ts
index f860f73..cf54a34 100644
--- a/dev/payload-types.ts
+++ b/dev/payload-types.ts
@@ -68,7 +68,11 @@ export interface Config {
   blocks: {};
   collections: {
     posts: Post;
+    'vector-bulk-embeddings-runs': VectorBulkEmbeddingsRun;
+    'vector-bulk-embedding-input-metadata': VectorBulkEmbeddingInputMetadatum;
     default: Default;
+    bulkDefault: BulkDefault;
+    'payload-kv': PayloadKv;
     users: User;
     'payload-jobs': PayloadJob;
     'payload-locked-documents': PayloadLockedDocument;
@@ -78,7 +82,11 @@ export interface Config {
   collectionsJoins: {};
   collectionsSelect: {
     posts: PostsSelect<false> | PostsSelect<true>;
+    'vector-bulk-embeddings-runs': VectorBulkEmbeddingsRunsSelect<false> | VectorBulkEmbeddingsRunsSelect<true>;
+    'vector-bulk-embedding-input-metadata': VectorBulkEmbeddingInputMetadataSelect<false> | VectorBulkEmbeddingInputMetadataSelect<true>;
     default: DefaultSelect<false> | DefaultSelect<true>;
+    bulkDefault: BulkDefaultSelect<false> | BulkDefaultSelect<true>;
+    'payload-kv': PayloadKvSelect<false> | PayloadKvSelect<true>;
     users: UsersSelect<false> | UsersSelect<true>;
     'payload-jobs': PayloadJobsSelect<false> | PayloadJobsSelect<true>;
     'payload-locked-documents': PayloadLockedDocumentsSelect<false> | PayloadLockedDocumentsSelect<true>;
@@ -88,6 +96,7 @@ export interface Config {
   db: {
     defaultIDType: number;
   };
+  fallbackLocale: null;
   globals: {};
   globalsSelect: {};
   locale: null;
@@ -97,6 +106,8 @@ export interface Config {
   jobs: {
     tasks: {
       'payloadcms-vectorize:vectorize': TaskPayloadcmsVectorizeVectorize;
+      'payloadcms-vectorize:prepare-bulk-embedding': TaskPayloadcmsVectorizePrepareBulkEmbedding;
+      'payloadcms-vectorize:poll-or-complete-bulk-embedding': TaskPayloadcmsVectorizePollOrCompleteBulkEmbedding;
       inline: {
         input: unknown;
         output: unknown;
@@ -134,7 +145,7 @@ export interface Post {
     root: {
       type: string;
       children: {
-        type: string;
+        type: any;
         version: number;
         [k: string]: unknown;
       }[];
@@ -148,6 +159,85 @@ export interface Post {
   updatedAt: string;
   createdAt: string;
 }
+/**
+ * Bulk embedding run records. Created automatically when the Embed all action is triggered.
+ *
+ * This interface was referenced by `Config`'s JSON-Schema
+ * via the `definition` "vector-bulk-embeddings-runs".
+ */
+export interface VectorBulkEmbeddingsRun {
+  id: number;
+  /**
+   * Knowledge pool slug
+   */
+  pool: string;
+  /**
+   * Embedding version at submission time
+   */
+  embeddingVersion: string;
+  /**
+   * Provider file or input reference used for the batch
+   */
+  inputFileRef?: string | null;
+  /**
+   * Provider batch identifier
+   */
+  providerBatchId?: string | null;
+  status: 'queued' | 'running' | 'succeeded' | 'failed' | 'canceled';
+  inputs?: number | null;
+  succeeded?: number | null;
+  failed?: number | null;
+  /**
+   * Timestamp when the batch was submitted
+   */
+  submittedAt?: string | null;
+  /**
+   * Timestamp when the batch finished
+   */
+  completedAt?: string | null;
+  /**
+   * Failure reason if the run ended in error
+   */
+  error?: string | null;
+  updatedAt: string;
+  createdAt: string;
+}
+/**
+ * Stores per-input metadata for bulk embedding runs.
+ *
+ * This interface was referenced by `Config`'s JSON-Schema
+ * via the `definition` "vector-bulk-embedding-input-metadata".
+ */
+export interface VectorBulkEmbeddingInputMetadatum {
+  id: number;
+  /**
+   * Bulk run this input belongs to
+   */
+  run: number | VectorBulkEmbeddingsRun;
+  inputId: string;
+  /**
+   * Original chunk text
+   */
+  text: string;
+  sourceCollection: string;
+  docId: string;
+  chunkIndex: number;
+  embeddingVersion: string;
+  /**
+   * Extension field values for this chunk
+   */
+  extensionFields?:
+    | {
+        [k: string]: unknown;
+      }
+    | unknown[]
+    | string
+    | number
+    | boolean
+    | null;
+  updatedAt: string;
+  createdAt: string;
+}
 /**
  * Vector embeddings for search and similarity queries. Created by the payloadcms-vectorize plugin. Embeddings cannot be added or modified, only deleted, through the admin panel. No other restrictions enforced.
  *
@@ -179,6 +269,54 @@ export interface Default {
   updatedAt: string;
   createdAt: string;
 }
+/**
+ * Vector embeddings for search and similarity queries. Created by the payloadcms-vectorize plugin. Embeddings cannot be added or modified, only deleted, through the admin panel. No other restrictions enforced.
+ *
+ * This interface was referenced by `Config`'s JSON-Schema
+ * via the `definition` "bulkDefault".
+ */
+export interface BulkDefault {
+  id: number;
+  /**
+   * The collection that this embedding belongs to
+   */
+  sourceCollection: string;
+  /**
+   * The ID of the source document
+   */
+  docId: string;
+  /**
+   * The index of this chunk
+   */
+  chunkIndex: number;
+  /**
+   * The original text that was vectorized
+   */
+  chunkText?: string | null;
+  /**
+   * The version of the embedding model used
+   */
+  embeddingVersion?: string | null;
+  updatedAt: string;
+  createdAt: string;
+}
+/**
+ * This interface was referenced by `Config`'s JSON-Schema
+ * via the `definition` "payload-kv".
+ */
+export interface PayloadKv {
+  id: number;
+  key: string;
+  data:
+    | {
+        [k: string]: unknown;
+      }
+    | unknown[]
+    | string
+    | number
+    | boolean
+    | null;
+}
 /**
  * This interface was referenced by `Config`'s JSON-Schema
  * via the `definition` "users".
@@ -194,6 +332,13 @@ export interface User {
   hash?: string | null;
   loginAttempts?: number | null;
   lockUntil?: string | null;
+  sessions?:
+    | {
+        id: string;
+        createdAt?: string | null;
+        expiresAt: string;
+      }[]
+    | null;
   password?: string | null;
 }
 /**
@@ -248,7 +393,11 @@ export interface PayloadJob {
     | {
         executedAt: string;
         completedAt: string;
-        taskSlug: 'inline' | 'payloadcms-vectorize:vectorize';
+        taskSlug:
+          | 'inline'
+          | 'payloadcms-vectorize:vectorize'
+          | 'payloadcms-vectorize:prepare-bulk-embedding'
+          | 'payloadcms-vectorize:poll-or-complete-bulk-embedding';
         taskID: string;
         input?:
           | {
@@ -281,7 +430,14 @@ export interface PayloadJob {
         id?: string | null;
       }[]
     | null;
-  taskSlug?: ('inline' | 'payloadcms-vectorize:vectorize') | null;
+  taskSlug?:
+    | (
+        | 'inline'
+        | 'payloadcms-vectorize:vectorize'
+        | 'payloadcms-vectorize:prepare-bulk-embedding'
+        | 'payloadcms-vectorize:poll-or-complete-bulk-embedding'
+      )
+    | null;
   queue?: string | null;
   waitUntil?: string | null;
   processing?: boolean | null;
@@ -299,17 +455,25 @@ export interface PayloadLockedDocument {
         relationTo: 'posts';
         value: number | Post;
       } | null)
+    | ({
+        relationTo: 'vector-bulk-embeddings-runs';
+        value: number | VectorBulkEmbeddingsRun;
+      } | null)
+    | ({
+        relationTo: 'vector-bulk-embedding-input-metadata';
+        value: number | VectorBulkEmbeddingInputMetadatum;
+      } | null)
     | ({
         relationTo: 'default';
         value: number | Default;
       } | null)
     | ({
-        relationTo: 'users';
-        value: number | User;
+        relationTo: 'bulkDefault';
+        value: number | BulkDefault;
       } | null)
     | ({
-        relationTo: 'payload-jobs';
-        value: number | PayloadJob;
+        relationTo: 'users';
+        value: number | User;
       } | null);
   globalSlug?: string | null;
   user: {
@@ -363,6 +527,41 @@ export interface PostsSelect<T extends boolean = true> {
   updatedAt?: T;
   createdAt?: T;
 }
+/**
+ * This interface was referenced by `Config`'s JSON-Schema
+ * via the `definition` "vector-bulk-embeddings-runs_select".
+ */
+export interface VectorBulkEmbeddingsRunsSelect<T extends boolean = true> {
+  pool?: T;
+  embeddingVersion?: T;
+  inputFileRef?: T;
+  providerBatchId?: T;
+  status?: T;
+  inputs?: T;
+  succeeded?: T;
+  failed?: T;
+  submittedAt?: T;
+  completedAt?: T;
+  error?: T;
+  updatedAt?: T;
+  createdAt?: T;
+}
+/**
+ * This interface was referenced by `Config`'s JSON-Schema
+ * via the `definition` "vector-bulk-embedding-input-metadata_select".
+ */
+export interface VectorBulkEmbeddingInputMetadataSelect<T extends boolean = true> {
+  run?: T;
+  inputId?: T;
+  text?: T;
+  sourceCollection?: T;
+  docId?: T;
+  chunkIndex?: T;
+  embeddingVersion?: T;
+  extensionFields?: T;
+  updatedAt?: T;
+  createdAt?: T;
+}
 /**
  * This interface was referenced by `Config`'s JSON-Schema
  * via the `definition` "default_select".
@@ -376,6 +575,27 @@ export interface DefaultSelect<T extends boolean = true> {
   updatedAt?: T;
   createdAt?: T;
 }
+/**
+ * This interface was referenced by `Config`'s JSON-Schema
+ * via the `definition` "bulkDefault_select".
+ */
+export interface BulkDefaultSelect<T extends boolean = true> {
+  sourceCollection?: T;
+  docId?: T;
+  chunkIndex?: T;
+  chunkText?: T;
+  embeddingVersion?: T;
+  updatedAt?: T;
+  createdAt?: T;
+}
+/**
+ * This interface was referenced by `Config`'s JSON-Schema
+ * via the `definition` "payload-kv_select".
+ */
+export interface PayloadKvSelect<T extends boolean = true> {
+  key?: T;
+  data?: T;
+}
 /**
  * This interface was referenced by `Config`'s JSON-Schema
  * via the `definition` "users_select".
@@ -390,6 +610,13 @@ export interface UsersSelect<T extends boolean = true> {
   hash?: T;
   loginAttempts?: T;
   lockUntil?: T;
+  sessions?:
+    | T
+    | {
+        id?: T;
+        createdAt?: T;
+        expiresAt?: T;
+      };
 }
 /**
  * This interface was referenced by `Config`'s JSON-Schema
@@ -462,6 +689,22 @@ export interface TaskPayloadcmsVectorizeVectorize {
   input?: unknown;
   output?: unknown;
 }
+/**
+ * This interface was referenced by `Config`'s JSON-Schema
+ * via the `definition` "TaskPayloadcms-vectorize:prepare-bulk-embedding".
+ */
+export interface TaskPayloadcmsVectorizePrepareBulkEmbedding {
+  input?: unknown;
+  output?: unknown;
+}
+/**
+ * This interface was referenced by `Config`'s JSON-Schema
+ * via the `definition` "TaskPayloadcms-vectorize:poll-or-complete-bulk-embedding".
+ */
+export interface TaskPayloadcmsVectorizePollOrCompleteBulkEmbedding {
+  input?: unknown;
+  output?: unknown;
+}
 /**
  * This interface was referenced by `Config`'s JSON-Schema
  * via the `definition` "auth".
diff --git a/dev/specs/bulkEmbed.initNoBulk.spec.ts b/dev/specs/bulkEmbed.initNoBulk.spec.ts
deleted file mode 100644
index a81517d..0000000
--- a/dev/specs/bulkEmbed.initNoBulk.spec.ts
+++ /dev/null
@@ -1,90 +0,0 @@
-import type { Payload } from 'payload'
-import { describe, expect, test } from 'vitest'
-import {
-  BULK_QUEUE_NAMES,
-  DEFAULT_DIMS,
-  buildPayloadWithIntegration,
-  clearAllCollections,
-  createMockBulkEmbeddings,
-  createTestDb,
-  waitForBulkJobs,
-} from './utils.js'
-import { makeDummyEmbedDocs, makeDummyEmbedQuery, testEmbeddingVersion } from 'helpers/embed.js'
-import { BULK_EMBEDDINGS_RUNS_SLUG } from '../../src/collections/bulkEmbeddingsRuns.js'
-
-const DIMS = DEFAULT_DIMS
-
-describe('Bulk embed init without bulk', () => {
-  let payload: Payload
-  const dbName = 'bulk_embed_init_toggle'
-
-  const basePluginOptions = {
-    knowledgePools: {
-      default: {
-        collections: {
-          posts: {
-            toKnowledgePool: async (doc: any) => [{ chunk: doc.title }],
-          },
-        },
-        embedDocs: makeDummyEmbedDocs(DIMS),
-        embedQuery: makeDummyEmbedQuery(DIMS),
-        embeddingVersion: testEmbeddingVersion,
-        bulkEmbeddings: createMockBulkEmbeddings({ statusSequence: ['succeeded'] }, DIMS),
-      },
-    },
-    bulkQueueNames: BULK_QUEUE_NAMES,
-  }
-
-  // NOTE: skipped because Payload caches the first getPayload() instance per process,
-  // so toggling bulk on/off in a single process cannot be simulated reliably.
-  // Keep this isolated spec for future process-isolated runs.
-  test('enabling bulk later queues first run automatically', async () => {
-    await createTestDb({ dbName })
-
-    // Build without bulk (plugin disabled so no hooks/onInit work)
-    const noBulkOptions = {
-      ...basePluginOptions,
-      disabled: true,
-      knowledgePools: {
-        default: {
-          ...basePluginOptions.knowledgePools.default,
-          bulkEmbeddings: undefined,
-        },
-      },
-    }
-    const { payload: noBulkPayload } = await buildPayloadWithIntegration({
-      dbName,
-      pluginOpts: noBulkOptions,
-      dims: DIMS,
-      key: `noBulkPayload-${dbName}-${Date.now()}`,
-    })
-    await clearAllCollections(noBulkPayload)
-    await noBulkPayload.create({ collection: 'posts', data: { title: 'NoBulk' } as any })
-    const none = await (noBulkPayload as any).find({
-      collection: BULK_EMBEDDINGS_RUNS_SLUG,
-      where: { pool: { equals: 'default' } },
-    })
-    expect(none.totalDocs).toBe(0)
-
-    // Rebuild with bulk enabled; onInit should queue the first run
-    const { payload: bulkPayload } = await buildPayloadWithIntegration({
-      dbName,
-      pluginOpts: basePluginOptions,
-      dims: DIMS,
-      key: `bulkPayload-${dbName}-${Date.now()}`,
-    })
-    payload = bulkPayload
-    await bulkPayload.create({ collection: 'posts', data: { title: 'WithBulk' } as any })
-    await waitForBulkJobs(bulkPayload)
-    const runDoc = (
-      await (bulkPayload as any).find({
-        collection: BULK_EMBEDDINGS_RUNS_SLUG,
-        where: { pool: { equals: 'default' } },
-        sort: '-createdAt',
-        limit: 1,
-      })
-    ).docs[0]
-    expect(runDoc).toBeDefined()
-    expect(runDoc.status).toBe('succeeded')
-  })
-})
diff --git a/dev/specs/bulkEmbed.spec.ts b/dev/specs/bulkEmbed.spec.ts
index 1d7ac7f..bdf1613 100644
--- a/dev/specs/bulkEmbed.spec.ts
+++ b/dev/specs/bulkEmbed.spec.ts
@@ -32,10 +32,12 @@ describe('Bulk embed ingest mode with version/time gating', () => {
             toKnowledgePool: async (doc: any) => [{ chunk: doc.title }],
           },
         },
-        embedDocs: makeDummyEmbedDocs(DIMS),
-        embedQuery: makeDummyEmbedQuery(DIMS),
-        embeddingVersion: testEmbeddingVersion,
-        bulkEmbeddings: createMockBulkEmbeddings({ statusSequence: ['succeeded'] }),
+        embeddingConfig: {
+          version: testEmbeddingVersion,
+          queryFn: makeDummyEmbedQuery(DIMS),
+          // No realTimeIngestionFn - bulk-only mode
+          bulkEmbeddingsFns: createMockBulkEmbeddings({ statusSequence: ['succeeded'] }),
+        },
       },
     },
     bulkQueueNames: BULK_QUEUE_NAMES,
@@ -80,87 +82,49 @@ describe('Bulk embed ingest mode with version/time gating', () => {
     return createSucceededBaselineRun(payload, { version, completedAt })
   }
 
-  test('fresh project, no docs → counts 0 baseline', async () => {
-    // onInit queues first run automatically; no docs should yield zero counts
-    await waitForBulkJobs(payload)
-    const runDoc = (
-      await (payload as any).find({
-        collection: BULK_EMBEDDINGS_RUNS_SLUG,
-        where: { pool: { equals: 'default' } },
-        sort: '-createdAt',
-        limit: 1,
-      })
-    ).docs[0]
-    expect(runDoc.inputs).toBe(0)
-    expect(runDoc.succeeded).toBe(0)
-  })
-
-  test('enabling bulk later queues first run automatically', async () => {
-    // Clear state from the default payload built in beforeEach
-    await clearAllCollections(payload)
-
-    // Use a fresh database for the toggle scenario to avoid residual runs
-    const tempDbName = `${dbNameBase}_toggle_${Date.now()}`
-    await createTestDb({ dbName: tempDbName })
-
-    // Start without bulkEmbeddings configured
-    const noBulkOptions = {
-      ...basePluginOptions,
-      disabled: true,
-      knowledgePools: {
-        default: {
-          ...basePluginOptions.knowledgePools.default,
-          bulkEmbeddings: undefined,
-        },
-      },
-    }
-    console.log('building noBulkPayload with noBulkOptions', noBulkOptions)
-    const noBulkPayload = await buildPayload(noBulkOptions as any, {
-      dbName: tempDbName,
-      secret: `secret-nobulk-${Date.now()}`,
-    })
-    console.log('noBulkPayload')
-    // Clear any runs that might have been created by previous payload instance
-    console.log('clearing all')
-    await clearAllCollections(noBulkPayload)
-    console.log('creating post')
-    await noBulkPayload.create({ collection: 'posts', data: { title: 'NoBulk' } as any })
-    console.log('creating post done')
-    // No bulk runs should exist (onInit shouldn't create runs when bulkEmbeddings is undefined)
-    const none = await (noBulkPayload as any).find({
+  test('no bulk run is queued on init or doc creation (bulk-only mode)', async () => {
+    // Verify that no bulk runs are queued automatically on init
+    const runsBeforeCreate = await (payload as any).find({
       collection: BULK_EMBEDDINGS_RUNS_SLUG,
       where: { pool: { equals: 'default' } },
     })
-    console.log('none', none)
-    expect(none.totalDocs).toBe(0)
+    expect(runsBeforeCreate.totalDocs).toBe(0)
+
+    // Create a post - should NOT queue a bulk run (bulk must be triggered manually)
+    await payload.create({ collection: 'posts', data: { title: 'First' } as any })
 
-    // Rebuild with bulk enabled; onInit should queue the first run
-    const bulkPayload = await buildPayload(basePluginOptions, {
-      dbName: tempDbName,
-      secret: `secret-bulk-${Date.now()}`,
+    // Verify still no bulk runs queued
+    const runsAfterCreate = await (payload as any).find({
+      collection: BULK_EMBEDDINGS_RUNS_SLUG,
+      where: { pool: { equals: 'default' } },
     })
-    await bulkPayload.create({ collection: 'posts', data: { title: 'WithBulk' } as any })
-    await waitForBulkJobs(bulkPayload)
-    const runDoc = (
-      await (bulkPayload as any).find({
-        collection: BULK_EMBEDDINGS_RUNS_SLUG,
-        where: { pool: { equals: 'default' } },
-        sort: '-createdAt',
-        limit: 1,
-      })
-    ).docs[0]
-    expect(runDoc).toBeDefined()
-    expect(runDoc.status).toBe('succeeded')
+    expect(runsAfterCreate.totalDocs).toBe(0)
   })
 
-  test('fresh project, add doc → establishes baseline and then embeds all', async () => {
-    // Wait for the initial onInit-queued baseline run to complete (0 docs)
-    await waitForBulkJobs(payload)
-
-    // Now create the post, which will queue another run via afterChange
+  test('manually triggered bulk run embeds documents', async () => {
+    // Create a post first
     const post = await payload.create({ collection: 'posts', data: { title: 'First' } as any })
 
-    // Wait for the post-creation-queued run to complete
+    // Manually trigger a bulk run (simulating API call or admin UI)
+    const run = await payload.create({
+      collection: BULK_EMBEDDINGS_RUNS_SLUG,
+      data: {
+        pool: 'default',
+        embeddingVersion: testEmbeddingVersion,
+        status: 'queued',
+      },
+    })
+
+    await payload.jobs.queue<'payloadcms-vectorize:prepare-bulk-embedding'>({
+      task: 'payloadcms-vectorize:prepare-bulk-embedding',
+      input: { runId: String(run.id) },
+      req: { payload } as any,
+      ...(BULK_QUEUE_NAMES.prepareBulkEmbedQueueName
+        ? { queue: BULK_QUEUE_NAMES.prepareBulkEmbedQueueName }
+        : {}),
+    })
+
+    // Wait for the bulk run to complete
     await waitForBulkJobs(payload)
 
     const embeds = await payload.find({
@@ -171,47 +135,78 @@ describe('Bulk embed ingest mode with version/time gating', () => {
     const runDoc = (
       await (payload as any).find({
         collection: BULK_EMBEDDINGS_RUNS_SLUG,
-        where: { pool: { equals: 'default' } },
-        sort: '-createdAt',
-        limit: 1,
+        where: { id: { equals: String(run.id) } },
       })
     ).docs[0]
     expect(runDoc.status).toBe('succeeded')
   })
 
   test('version bump re-embeds all even without updates', async () => {
-    // TODO(techiejd): Why is this clear all necessary?
     await clearAllCollections(payload)
     const baselineOptions = {
       ...basePluginOptions,
       knowledgePools: {
         default: {
           ...basePluginOptions.knowledgePools.default,
-          embeddingVersion: 'old-version',
-          bulkEmbeddings: createMockBulkEmbeddings({ statusSequence: ['succeeded'] }),
+          embeddingConfig: {
+            ...basePluginOptions.knowledgePools.default.embeddingConfig,
+            version: 'old-version',
+            bulkEmbeddingsFns: createMockBulkEmbeddings({ statusSequence: ['succeeded'] }),
+          },
         },
       },
     }
     const baselinePayload = await buildPayload(baselineOptions)
     await baselinePayload.create({ collection: 'posts', data: { title: 'Old' } as any })
-    await waitForBulkJobs(baselinePayload) // initial baseline run with 'old-version'
+
+    // Manually trigger baseline bulk run
+    const baselineRun = await baselinePayload.create({
+      collection: BULK_EMBEDDINGS_RUNS_SLUG,
+      data: { pool: 'default', embeddingVersion: 'old-version', status: 'queued' },
+    })
+    await baselinePayload.jobs.queue<'payloadcms-vectorize:prepare-bulk-embedding'>({
+      task: 'payloadcms-vectorize:prepare-bulk-embedding',
+      input: { runId: String(baselineRun.id) },
+      req: { payload: baselinePayload } as any,
+      ...(BULK_QUEUE_NAMES.prepareBulkEmbedQueueName
+        ? { queue: BULK_QUEUE_NAMES.prepareBulkEmbedQueueName }
+        : {}),
+    })
+    await waitForBulkJobs(baselinePayload)
 
     const bumpedOptions = {
       ...basePluginOptions,
       knowledgePools: {
         default: {
           ...basePluginOptions.knowledgePools.default,
-          embeddingVersion: 'new-version',
-          bulkEmbeddings: createMockBulkEmbeddings({ statusSequence: ['succeeded'] }),
+          embeddingConfig: {
+            ...basePluginOptions.knowledgePools.default.embeddingConfig,
+            version: 'new-version',
+            bulkEmbeddingsFns: createMockBulkEmbeddings({ statusSequence: ['succeeded'] }),
+          },
         },
       },
     }
-    // rebuild payload with bumped options so onInit queues a version-mismatch run
+    // rebuild payload with bumped version
     const bumpedPayload = await buildPayload(bumpedOptions)
     const postAfter = await bumpedPayload.create({
       collection: 'posts',
       data: { title: 'Old' } as any,
     })
+
+    // Manually trigger bulk run with new version
+    const newVersionRun = await bumpedPayload.create({
+      collection: BULK_EMBEDDINGS_RUNS_SLUG,
+      data: { pool: 'default', embeddingVersion: 'new-version', status: 'queued' },
+    })
+    await bumpedPayload.jobs.queue<'payloadcms-vectorize:prepare-bulk-embedding'>({
+      task: 'payloadcms-vectorize:prepare-bulk-embedding',
+      input: { runId: String(newVersionRun.id) },
+      req: { payload: bumpedPayload } as any,
+      ...(BULK_QUEUE_NAMES.prepareBulkEmbedQueueName
+        ? { queue: BULK_QUEUE_NAMES.prepareBulkEmbedQueueName }
+        : {}),
+    })
     await waitForBulkJobs(bumpedPayload)
 
     const embeds = await bumpedPayload.find({
@@ -222,20 +217,29 @@ describe('Bulk embed ingest mode with version/time gating', () => {
     const runDoc = (
       await (bumpedPayload as any).find({
         collection: BULK_EMBEDDINGS_RUNS_SLUG,
-        where: { pool: { equals: 'default' } },
-        sort: '-createdAt',
-        limit: 1,
+        where: { id: { equals: String(newVersionRun.id) } },
       })
     ).docs[0]
     expect(runDoc.inputs).toBe(1)
   })
 
   test('no version bump and no updates → zero eligible and succeed', async () => {
-    // Wait for initial onInit-queued baseline run
-    await waitForBulkJobs(payload)
-
-    // Create post and wait for it to be embedded (establishes baseline)
+    // Create post first
     const post = await payload.create({ collection: 'posts', data: { title: 'Stable' } as any })
+
+    // Manually trigger baseline bulk run
+    const baselineRun = await payload.create({
+      collection: BULK_EMBEDDINGS_RUNS_SLUG,
+      data: { pool: 'default', embeddingVersion: testEmbeddingVersion, status: 'queued' },
+    })
+    await payload.jobs.queue<'payloadcms-vectorize:prepare-bulk-embedding'>({
+      task: 'payloadcms-vectorize:prepare-bulk-embedding',
+      input: { runId: String(baselineRun.id) },
+      req: { payload } as any,
+      ...(BULK_QUEUE_NAMES.prepareBulkEmbedQueueName
+        ? { queue: BULK_QUEUE_NAMES.prepareBulkEmbedQueueName }
+        : {}),
+    })
     await waitForBulkJobs(payload)
 
     // Verify baseline exists
@@ -357,10 +361,13 @@ describe('Bulk embed ingest mode with version/time gating', () => {
       knowledgePools: {
         default: {
           ...basePluginOptions.knowledgePools.default,
-          bulkEmbeddings: createMockBulkEmbeddings({
-            statusSequence: ['succeeded'],
-            partialFailure: { failIds: [] },
-          }),
+          embeddingConfig: {
+            ...basePluginOptions.knowledgePools.default.embeddingConfig,
+            bulkEmbeddingsFns: createMockBulkEmbeddings({
+              statusSequence: ['succeeded'],
+              partialFailure: { failIds: [] },
+            }),
+          },
         },
       },
     })
@@ -372,10 +379,13 @@ describe('Bulk embed ingest mode with version/time gating', () => {
       knowledgePools: {
         default: {
           ...basePluginOptions.knowledgePools.default,
-          bulkEmbeddings: createMockBulkEmbeddings({
-            statusSequence: ['succeeded'],
-            partialFailure: { failIds: [`posts:${post2.id}:0`] },
-          }),
+          embeddingConfig: {
+            ...basePluginOptions.knowledgePools.default.embeddingConfig,
+            bulkEmbeddingsFns: createMockBulkEmbeddings({
+              statusSequence: ['succeeded'],
+              partialFailure: { failIds: [`posts:${post2.id}:0`] },
+            }),
+          },
         },
       },
     })
@@ -400,7 +410,10 @@ describe('Bulk embed ingest mode with version/time gating', () => {
       knowledgePools: {
         default: {
           ...basePluginOptions.knowledgePools.default,
-          bulkEmbeddings: createMockBulkEmbeddings({ statusSequence: ['running', 'succeeded'] }),
+          embeddingConfig: {
+            ...basePluginOptions.knowledgePools.default.embeddingConfig,
+            bulkEmbeddingsFns: createMockBulkEmbeddings({ statusSequence: ['running', 'succeeded'] }),
+          },
         },
       },
     })
@@ -410,7 +423,10 @@ describe('Bulk embed ingest mode with version/time gating', () => {
       knowledgePools: {
         default: {
           ...basePluginOptions.knowledgePools.default,
-          bulkEmbeddings: createMockBulkEmbeddings({ statusSequence: ['running', 'succeeded'] }),
+          embeddingConfig: {
+            ...basePluginOptions.knowledgePools.default.embeddingConfig,
+            bulkEmbeddingsFns: createMockBulkEmbeddings({ statusSequence: ['running', 'succeeded'] }),
+          },
         },
       },
     }
@@ -434,7 +450,10 @@ describe('Bulk embed ingest mode with version/time gating', () => {
       knowledgePools: {
         default: {
           ...basePluginOptions.knowledgePools.default,
-          bulkEmbeddings: createMockBulkEmbeddings({ statusSequence: ['failed'] }),
+          embeddingConfig: {
+            ...basePluginOptions.knowledgePools.default.embeddingConfig,
+            bulkEmbeddingsFns: createMockBulkEmbeddings({ statusSequence: ['failed'] }),
+          },
         },
       },
     })
@@ -454,7 +473,10 @@ describe('Bulk embed ingest mode with version/time gating', () => {
       knowledgePools: {
         default: {
           ...basePluginOptions.knowledgePools.default,
-          bulkEmbeddings: createMockBulkEmbeddings({ statusSequence: ['canceled'] }),
+          embeddingConfig: {
+            ...basePluginOptions.knowledgePools.default.embeddingConfig,
+            bulkEmbeddingsFns: createMockBulkEmbeddings({ statusSequence: ['canceled'] }),
+          },
         },
       },
     })
@@ -594,7 +616,7 @@ describe('Bulk embed ingest mode with version/time gating', () => {
     expect(metadata.totalDocs).toBe(0)
   })
 
-  test('realtime ingest mode still queues vectorize jobs', async () => {
+  test('realtime mode queues vectorize jobs when realTimeIngestionFn is provided', async () => {
     const realtimeOptions = {
       knowledgePools: {
         default: {
@@ -603,22 +625,24 @@ describe('Bulk embed ingest mode with version/time gating', () => {
               toKnowledgePool: async (doc: any) => [{ chunk: doc.title }],
             },
           },
-          embedDocs: makeDummyEmbedDocs(DIMS),
-          embedQuery: makeDummyEmbedQuery(DIMS),
-          embeddingVersion: testEmbeddingVersion,
-          bulkEmbeddings: {
-            ingestMode: 'realtime' as const,
-            prepareBulkEmbeddings: async () => ({
-              providerBatchId: 'noop',
-              status: 'succeeded' as const,
-              counts: { inputs: 0, succeeded: 0, failed: 0 },
-            }),
-            pollBulkEmbeddings: async () => ({ status: 'succeeded' }),
-            completeBulkEmbeddings: async () => ({
-              status: 'succeeded' as const,
-              outputs: [],
-              counts: { inputs: 0 },
-            }),
+          embeddingConfig: {
+            version: testEmbeddingVersion,
+            queryFn: makeDummyEmbedQuery(DIMS),
+            realTimeIngestionFn: makeDummyEmbedDocs(DIMS),
+            // Also provide bulk for testing, but realTimeIngestionFn should take precedence
+            bulkEmbeddingsFns: {
+              prepareBulkEmbeddings: async () => ({
+                providerBatchId: 'noop',
+                status: 'succeeded' as const,
+                counts: { inputs: 0, succeeded: 0, failed: 0 },
+              }),
+              pollBulkEmbeddings: async () => ({ status: 'succeeded' as const }),
+              completeBulkEmbeddings: async () => ({
+                status: 'succeeded' as const,
+                outputs: [],
+                counts: { inputs: 0 },
+              }),
+            },
           },
         },
       },
diff --git a/src/collections/embeddings.ts b/src/collections/embeddings.ts
index 13cba94..603c854 100644
--- a/src/collections/embeddings.ts
+++ b/src/collections/embeddings.ts
@@ -43,7 +43,7 @@ export const createEmbeddingsCollection = (
 
                 if (!pluginOptions?.knowledgePools?.[poolName]) return false
 
-                return !!pluginOptions.knowledgePools[poolName].bulkEmbeddings
+                return !!pluginOptions.knowledgePools[poolName].embeddingConfig.bulkEmbeddingsFns
               },
             },
           },
diff --git a/src/endpoints/bulkEmbed.ts b/src/endpoints/bulkEmbed.ts
index d04b7d1..a3f0f5b 100644
--- a/src/endpoints/bulkEmbed.ts
+++ b/src/endpoints/bulkEmbed.ts
@@ -28,11 +28,33 @@ export const createBulkEmbedHandler = (
       }
 
       const payload = req.payload
+
+      // Check for existing queued run for this pool - return it instead of creating a new one
+      const existingQueuedRun = await payload.find({
+        collection: BULK_EMBEDDINGS_RUNS_SLUG,
+        where: {
+          and: [{ pool: { equals: knowledgePool } }, { status: { equals: 'queued' } }],
+        },
+        limit: 1,
+      })
+
+      if (existingQueuedRun.totalDocs > 0) {
+        const existing = existingQueuedRun.docs[0] as any
+        return Response.json(
+          {
+            runId: String(existing.id),
+            status: existing.status,
+            message: `A bulk embedding run is already queued for this knowledge pool`,
+          },
+          { status: 200 },
+        )
+      }
+
       const run = await payload.create({
         collection: BULK_EMBEDDINGS_RUNS_SLUG,
         data: {
           pool: knowledgePool,
-          embeddingVersion: poolConfig.embeddingVersion,
+          embeddingVersion: poolConfig.embeddingConfig.version,
           status: 'queued',
         },
       })
diff --git a/src/endpoints/vectorSearch.ts b/src/endpoints/vectorSearch.ts
index 9318cde..17e8dc5 100644
--- a/src/endpoints/vectorSearch.ts
+++ b/src/endpoints/vectorSearch.ts
@@ -60,9 +60,9 @@ export const createVectorSearchHandler = <TPoolNames extends KnowledgePoolName>(
 
       const payload = req.payload
 
-      // Generate embedding for the query using pool-specific embedQuery
+      // Generate embedding for the query using pool-specific queryFn
       const queryEmbedding = await (async () => {
-        const qE = await poolConfig.embedQuery(query)
+        const qE = await poolConfig.embeddingConfig.queryFn(query)
         return Array.isArray(qE) ? qE : Array.from(qE)
       })()
 
diff --git a/src/index.ts b/src/index.ts
index 3484bce..dbce98b 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -199,14 +199,14 @@ export const createVectorizeIntegration = <TPoolNames extends KnowledgePoolName>
       let bulkIngestEnabled = false
       for (const poolName in pluginOptions.knowledgePools) {
         const dynamicConfig = pluginOptions.knowledgePools[poolName]
-        if ((dynamicConfig.bulkEmbeddings?.ingestMode || 'realtime') === 'bulk') {
+        if (dynamicConfig.embeddingConfig.bulkEmbeddingsFns) {
           bulkIngestEnabled = true
           break
         }
       }
       if (bulkIngestEnabled && !pluginOptions.bulkQueueNames) {
         throw new Error(
-          '[payloadcms-vectorize] bulkQueueNames is required when any knowledge pool uses bulk ingest mode (bulkEmbeddings.ingestMode === \"bulk\").',
+          '[payloadcms-vectorize] bulkQueueNames is required when any knowledge pool has bulk embedding configured (embeddingConfig.bulkEmbeddingsFns).',
         )
       }
 
@@ -257,45 +257,26 @@ export const createVectorizeIntegration = <TPoolNames extends KnowledgePoolName>
                 const collectionConfig = dynamic.collections[collectionSlug]
                 if (!collectionConfig) continue
 
-                if ((dynamic.bulkEmbeddings?.ingestMode || 'realtime') === 'bulk') {
-                  console.log(
-                    '[payloadcms-vectorize] afterChange enqueue bulk run',
-                    pool,
-                    dynamic.bulkEmbeddings,
-                  )
-                  // In bulk mode, queue a bulk run and let poll/completion handle deletes
-                  const run = await payload.create({
-                    collection: BULK_EMBEDDINGS_RUNS_SLUG,
-                    data: {
-                      pool,
-                      embeddingVersion: dynamic.embeddingVersion,
-                      status: 'queued',
+                const { realTimeIngestionFn } = dynamic.embeddingConfig
+
+                // Only queue real-time vectorization if realTimeIngestionFn is provided
+                // Bulk embedding is only triggered manually via API (/vector-bulk-embed) or admin UI
+                if (realTimeIngestionFn) {
+                  await payload.jobs.queue<'payloadcms-vectorize:vectorize'>({
+                    task: 'payloadcms-vectorize:vectorize',
+                    input: {
+                      doc,
+                      collection: collectionSlug,
+                      knowledgePool: pool,
                     },
-                  })
-
-                  await payload.jobs.queue<'payloadcms-vectorize:prepare-bulk-embedding'>({
-                    task: 'payloadcms-vectorize:prepare-bulk-embedding',
-                    input: { runId: String(run.id) },
-                    req,
-                    ...(pluginOptions.bulkQueueNames?.prepareBulkEmbedQueueName
-                      ? { queue: pluginOptions.bulkQueueNames.prepareBulkEmbedQueueName }
+                    req: req,
+                    ...(pluginOptions.realtimeQueueName
+                      ? { queue: pluginOptions.realtimeQueueName }
                       : {}),
                   })
-                  continue
                 }
-
-                await payload.jobs.queue<'payloadcms-vectorize:vectorize'>({
-                  task: 'payloadcms-vectorize:vectorize',
-                  input: {
-                    doc,
-                    collection: collectionSlug,
-                    knowledgePool: pool,
-                  },
-                  req: req,
-                  ...(pluginOptions.realtimeQueueName
-                    ? { queue: pluginOptions.realtimeQueueName }
-                    : {}),
-                })
+                // If no realTimeIngestionFn, nothing happens on doc change
+                // User must trigger bulk embedding manually
               }
               return
             },
@@ -342,42 +323,6 @@ export const createVectorizeIntegration = <TPoolNames extends KnowledgePoolName>
             dims: staticConfig.dims,
             ivfflatLists: staticConfig.ivfflatLists,
           })
-
-          // If bulk ingest is configured for this pool, ensure a baseline run exists and is queued
-          const dynamicConfig = pluginOptions.knowledgePools?.[poolName]
-          if (dynamicConfig?.bulkEmbeddings?.ingestMode === 'bulk') {
-            const existingSucceeded = await payload.find({
-              collection: BULK_EMBEDDINGS_RUNS_SLUG,
-              where: {
-                and: [{ pool: { equals: poolName } }, { status: { equals: 'succeeded' } }],
-              },
-              limit: 1,
-              sort: '-completedAt',
-            })
-            if (!existingSucceeded.totalDocs) {
-              console.log(
-                '[payloadcms-vectorize] queuing baseline bulk run',
-                poolName,
-                dynamicConfig?.bulkEmbeddings,
-              )
-              const run = await payload.create({
-                collection: BULK_EMBEDDINGS_RUNS_SLUG,
-                data: {
-                  pool: poolName,
-                  embeddingVersion: dynamicConfig.embeddingVersion,
-                  status: 'queued',
-                },
-              })
-              await payload.jobs.queue<'payloadcms-vectorize:prepare-bulk-embedding'>({
-                task: 'payloadcms-vectorize:prepare-bulk-embedding',
-                input: { runId: String(run.id) },
-                req: { payload } as any,
-                ...(pluginOptions.bulkQueueNames?.prepareBulkEmbedQueueName
-                  ? { queue: pluginOptions.bulkQueueNames.prepareBulkEmbedQueueName }
-                  : {}),
-              })
-            }
-          }
         }
       }
 
diff --git a/src/tasks/bulkEmbedAll.ts b/src/tasks/bulkEmbedAll.ts
index 447c4e8..65abc64 100644
--- a/src/tasks/bulkEmbedAll.ts
+++ b/src/tasks/bulkEmbedAll.ts
@@ -64,9 +64,9 @@ async function loadRunAndConfig({
       `[payloadcms-vectorize] knowledgePool "${poolName}" not found for bulk embed run ${runId}`,
     )
   }
-  if (!dynamicConfig.bulkEmbeddings) {
+  if (!dynamicConfig.embeddingConfig.bulkEmbeddingsFns) {
     throw new Error(
-      `[payloadcms-vectorize] knowledgePool "${poolName}" does not have bulkEmbeddings configured`,
+      `[payloadcms-vectorize] knowledgePool "${poolName}" does not have bulkEmbeddingsFns configured`,
     )
   }
   return { run, poolName, dynamicConfig }
@@ -95,8 +95,8 @@ export const createPrepareBulkEmbeddingTask = ({
         knowledgePools,
       })
 
-      const callbacks = dynamicConfig.bulkEmbeddings!
-      const embeddingVersion = dynamicConfig.embeddingVersion
+      const callbacks = dynamicConfig.embeddingConfig.bulkEmbeddingsFns!
+      const embeddingVersion = dynamicConfig.embeddingConfig.version
 
       const latestSucceededRun = await payload.find({
         collection: BULK_EMBEDDINGS_RUNS_SLUG,
@@ -227,9 +227,9 @@ export const createPollOrCompleteBulkEmbeddingTask = ({
         knowledgePools,
       })
 
-      const callbacks = dynamicConfig.bulkEmbeddings!
+      const callbacks = dynamicConfig.embeddingConfig.bulkEmbeddingsFns!
       const providerBatchId = (run as any).providerBatchId
-      const embeddingVersion = dynamicConfig.embeddingVersion
+      const embeddingVersion = dynamicConfig.embeddingConfig.version
 
       // Check if already terminal
       const currentStatus = (run as any).status
@@ -401,7 +401,8 @@ async function persistVectorColumn(args: {
   try {
     await runSQL(sql, [literal, id])
   } catch (e) {
-    payload.logger.error('[payloadcms-vectorize] Failed to persist vector column', e as Error)
+    const errorMessage = (e as Error).message || (e as any).toString()
+    payload.logger.error(`[payloadcms-vectorize] Failed to persist vector column: ${errorMessage}`)
     throw e
   }
 }
diff --git a/src/types.ts b/src/types.ts
index 75783b0..1a162c6 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -53,7 +53,7 @@ type EmbeddingConfig = {
    * If not provided, then there bulk embedding is not available
    */
   bulkEmbeddingsFns?: BulkEmbeddingsFns
-  /** If both realTimeIngestionFn and bulkEmbeddingsConfig are not provided, then embedding is essentially disabled */
+  /** If both realTimeIngestionFn and bulkEmbeddingsConfig are not provided, then embedding for this knowledge pool is essentially disabled */
 }
 
 export type BulkEmbeddingRunStatus = 'queued' | 'running' | 'succeeded' | 'failed' | 'canceled'

From 64b8a8f1d6d36072198f72d5cec8a6bcc0b840ee Mon Sep 17 00:00:00 2001
From: techiejd <62455039+techiejd@users.noreply.github.com>
Date: Sun, 4 Jan 2026 23:20:50 +0700
Subject: [PATCH 12/49] Adds better batch streaming

---
 dev/helpers/embed.ts                     | 233 +++++-----
 dev/specs/bulkEmbed.spec.ts              | 495 +++++++++++---------
 dev/specs/utils.ts                       |  98 +++-
 src/collections/bulkEmbeddingsBatches.ts | 129 ++++++
 src/collections/bulkEmbeddingsRuns.ts    |  25 +-
 src/index.ts                             |   9 +
 src/tasks/bulkEmbedAll.ts                | 551 ++++++++++++++---------
 src/types.ts                             |  84 ++--
 8 files changed, 1035 insertions(+), 589 deletions(-)
 create mode 100644 src/collections/bulkEmbeddingsBatches.ts

diff --git a/dev/helpers/embed.ts b/dev/helpers/embed.ts
index c4340a9..63a14f4 100644
--- a/dev/helpers/embed.ts
+++ b/dev/helpers/embed.ts
@@ -5,6 +5,7 @@ import type {
   BulkEmbeddingOutput,
   BulkEmbeddingRunStatus,
   BulkEmbeddingsFns,
+  BatchSubmission,
 } from 'payloadcms-vectorize'
 
 export const voyageEmbedDocs = async (texts: string[]): Promise<number[][]> => {
@@ -61,96 +62,132 @@ export function makeDummyEmbedDocs(dims: number) {
 }
 export const testEmbeddingVersion = 'test-v1'
 
-// Real Voyage Batch API implementation
+// Voyage file size limit (approximately 100MB, we use a safer threshold)
+const VOYAGE_FILE_SIZE_LIMIT = 50 * 1024 * 1024 // 50MB to be safe
+
+/**
+ * Real Voyage Batch API implementation using the new streaming API.
+ * User controls batching based on file size.
+ */
 export function makeVoyageBulkEmbeddingsConfig(): BulkEmbeddingsFns {
-  // Store batch state in memory for dev purposes
-  const batchState = new Map<
-    string,
-    {
-      inputs: BulkEmbeddingInput[]
-      batchId: string
-      outputFileId?: string
+  // Accumulated chunks for current batch
+  let accumulatedChunks: BulkEmbeddingInput[] = []
+  let accumulatedSize = 0
+  let batchIndex = 0
+
+  // Store batch state in memory for dev purposes (output file IDs for completion)
+  const batchOutputFiles = new Map<string, string>()
+
+  // Helper to estimate JSONL line size for a chunk
+  const estimateChunkSize = (chunk: BulkEmbeddingInput): number => {
+    const jsonLine = JSON.stringify({
+      custom_id: chunk.id,
+      body: {
+        input: [chunk.text],
+        model: 'voyage-3.5-lite',
+        input_type: 'document',
+      },
+    })
+    return jsonLine.length + 1 // +1 for newline
+  }
+
+  // Helper to submit accumulated chunks to Voyage
+  const submitBatch = async (chunks: BulkEmbeddingInput[]): Promise<BatchSubmission> => {
+    // Create JSONL content for Voyage batch
+    const jsonlLines = chunks.map((input) => {
+      return JSON.stringify({
+        custom_id: input.id,
+        body: {
+          input: [input.text],
+          model: 'voyage-3.5-lite',
+          input_type: 'document',
+        },
+      })
+    })
+    const jsonlContent = jsonlLines.join('\n')
+
+    // Upload file to Voyage Files API using FormData
+    const formData = new FormData()
+    const blob = new Blob([jsonlContent], { type: 'application/jsonl' })
+    formData.append('file', blob, `batch-input-${batchIndex}.jsonl`)
+    formData.append('purpose', 'batch')
+
+    const uploadResponse = await fetch('https://api.voyageai.com/v1/files', {
+      method: 'POST',
+      headers: {
+        Authorization: `Bearer ${process.env.VOYAGE_API_KEY}`,
+      },
+      body: formData,
+    })
+
+    if (!uploadResponse.ok) {
+      const error = await uploadResponse.text()
+      throw new Error(`Voyage file upload failed: ${error}`)
     }
-  >()
 
-  return {
-    prepareBulkEmbeddings: async ({ inputs }) => {
-      try {
-        // Create JSONL content for Voyage batch
-        const jsonlLines = inputs.map((input) => {
-          return JSON.stringify({
-            custom_id: input.id,
-            body: {
-              input: [input.text],
-              model: 'voyage-3.5-lite',
-              input_type: 'document',
-            },
-          })
-        })
-        const jsonlContent = jsonlLines.join('\n')
+    const fileData = await uploadResponse.json()
+    const fileId = fileData.id
 
-        // Upload file to Voyage Files API using FormData
-        const formData = new FormData()
-        const blob = new Blob([jsonlContent], { type: 'application/jsonl' })
-        formData.append('file', blob, 'batch-input.jsonl')
-        formData.append('purpose', 'batch')
+    // Create batch
+    const batchResponse = await fetch('https://api.voyageai.com/v1/batches', {
+      method: 'POST',
+      headers: {
+        Authorization: `Bearer ${process.env.VOYAGE_API_KEY}`,
+        'Content-Type': 'application/json',
+      },
+      body: JSON.stringify({
+        input_file_id: fileId,
+        endpoint: '/v1/embeddings',
+        completion_window: '24h',
+      }),
+    })
 
-        const uploadResponse = await fetch('https://api.voyageai.com/v1/files', {
-          method: 'POST',
-          headers: {
-            Authorization: `Bearer ${process.env.VOYAGE_API_KEY}`,
-          },
-          body: formData,
-        })
+    if (!batchResponse.ok) {
+      const error = await batchResponse.text()
+      throw new Error(`Voyage batch creation failed: ${error}`)
+    }
 
-        if (!uploadResponse.ok) {
-          const error = await uploadResponse.text()
-          throw new Error(`Voyage file upload failed: ${error}`)
-        }
+    const batchData = await batchResponse.json()
+    const providerBatchId = batchData.id
 
-        const fileData = await uploadResponse.json()
-        const fileId = fileData.id
+    batchIndex++
 
-        // Create batch
-        const batchResponse = await fetch('https://api.voyageai.com/v1/batches', {
-          method: 'POST',
-          headers: {
-            Authorization: `Bearer ${process.env.VOYAGE_API_KEY}`,
-            'Content-Type': 'application/json',
-          },
-          body: JSON.stringify({
-            input_file_id: fileId,
-            endpoint: '/v1/embeddings',
-            completion_window: '24h',
-          }),
-        })
+    return {
+      providerBatchId,
+      inputFileRef: fileId,
+      submittedChunks: chunks,
+    }
+  }
 
-        if (!batchResponse.ok) {
-          const error = await batchResponse.text()
-          throw new Error(`Voyage batch creation failed: ${error}`)
-        }
+  return {
+    addChunk: async ({ chunk, isLastChunk }) => {
+      const chunkSize = estimateChunkSize(chunk)
 
-        const batchData = await batchResponse.json()
-        const batchId = batchData.id
+      // Check if adding this chunk would exceed the file size limit
+      if (accumulatedSize + chunkSize > VOYAGE_FILE_SIZE_LIMIT && accumulatedChunks.length > 0) {
+        // Submit what we have (without this chunk)
+        const toSubmit = [...accumulatedChunks]
+        accumulatedChunks = [chunk]
+        accumulatedSize = chunkSize
+        return await submitBatch(toSubmit)
+      }
 
-        // Store state for later retrieval
-        batchState.set(batchId, {
-          inputs,
-          batchId,
-        })
+      // Add chunk to accumulator
+      accumulatedChunks.push(chunk)
+      accumulatedSize += chunkSize
 
-        return {
-          providerBatchId: batchId,
-          status: batchData.status || 'queued',
-          counts: { inputs: inputs.length },
-        }
-      } catch (error) {
-        console.error('Voyage prepareBulkEmbeddings error:', error)
-        throw error
+      // If this is the last chunk, flush everything
+      if (isLastChunk && accumulatedChunks.length > 0) {
+        const toSubmit = [...accumulatedChunks]
+        accumulatedChunks = []
+        accumulatedSize = 0
+        return await submitBatch(toSubmit)
       }
+
+      return null
     },
 
-    pollBulkEmbeddings: async ({ providerBatchId }) => {
+    pollBatch: async ({ providerBatchId }) => {
       try {
         const response = await fetch(`https://api.voyageai.com/v1/batches/${providerBatchId}`, {
           headers: {
@@ -188,12 +225,9 @@ export function makeVoyageBulkEmbeddingsConfig(): BulkEmbeddingsFns {
             status = 'running'
         }
 
-        // Store output file ID if available
+        // Store output file ID if available for later completion
         if (batchData.output_file_id) {
-          const state = batchState.get(providerBatchId)
-          if (state) {
-            state.outputFileId = batchData.output_file_id
-          }
+          batchOutputFiles.set(providerBatchId, batchData.output_file_id)
         }
 
         return {
@@ -205,30 +239,26 @@ export function makeVoyageBulkEmbeddingsConfig(): BulkEmbeddingsFns {
                 failed: batchData.request_counts.failed || 0,
               }
             : undefined,
-          nextPollMs: status === 'running' || status === 'queued' ? 10000 : undefined, // Poll every 10s if not terminal
         }
       } catch (error) {
-        console.error('Voyage pollBulkEmbeddings error:', error)
+        console.error('Voyage pollBatch error:', error)
         return { status: 'failed', error: 'Failed to poll batch status' }
       }
     },
 
-    completeBulkEmbeddings: async ({ providerBatchId }) => {
+    completeBatch: async ({ providerBatchId }) => {
       try {
-        const state = batchState.get(providerBatchId)
-        if (!state?.outputFileId) {
+        const outputFileId = batchOutputFiles.get(providerBatchId)
+        if (!outputFileId) {
           throw new Error('No output file available for batch')
         }
 
         // Download output file
-        const response = await fetch(
-          `https://api.voyageai.com/v1/files/${state.outputFileId}/content`,
-          {
-            headers: {
-              Authorization: `Bearer ${process.env.VOYAGE_API_KEY}`,
-            },
+        const response = await fetch(`https://api.voyageai.com/v1/files/${outputFileId}/content`, {
+          headers: {
+            Authorization: `Bearer ${process.env.VOYAGE_API_KEY}`,
           },
-        )
+        })
 
         if (!response.ok) {
           const error = await response.text()
@@ -239,8 +269,6 @@ export function makeVoyageBulkEmbeddingsConfig(): BulkEmbeddingsFns {
         const lines = jsonlContent.trim().split('\n')
 
         const outputs: BulkEmbeddingOutput[] = []
-        let succeeded = 0
-        let failed = 0
 
         for (const line of lines) {
           if (!line.trim()) continue
@@ -251,34 +279,23 @@ export function makeVoyageBulkEmbeddingsConfig(): BulkEmbeddingsFns {
                 id: result.custom_id,
                 error: result.error.message || 'Unknown error',
               })
-              failed++
             } else {
               outputs.push({
                 id: result.custom_id,
                 embedding: result.response.body.data[0].embedding,
               })
-              succeeded++
             }
           } catch (parseError) {
             console.error('Failed to parse output line:', line, parseError)
-            failed++
           }
         }
 
         // Clean up state
-        batchState.delete(providerBatchId)
+        batchOutputFiles.delete(providerBatchId)
 
-        return {
-          status: 'succeeded',
-          outputs,
-          counts: {
-            inputs: state.inputs.length,
-            succeeded,
-            failed,
-          },
-        }
+        return outputs
       } catch (error) {
-        console.error('Voyage completeBulkEmbeddings error:', error)
+        console.error('Voyage completeBatch error:', error)
         throw error
       }
     },
diff --git a/dev/specs/bulkEmbed.spec.ts b/dev/specs/bulkEmbed.spec.ts
index bdf1613..eeaf5a9 100644
--- a/dev/specs/bulkEmbed.spec.ts
+++ b/dev/specs/bulkEmbed.spec.ts
@@ -1,8 +1,10 @@
 import type { Payload, SanitizedConfig } from 'payload'
 
-import { afterEach, beforeAll, beforeEach, describe, expect, test, vi } from 'vitest'
+import { afterEach, beforeEach, describe, expect, test, vi } from 'vitest'
 import { createVectorizeTask } from '../../src/tasks/vectorize.js'
 import { BULK_EMBEDDINGS_RUNS_SLUG } from '../../src/collections/bulkEmbeddingsRuns.js'
+import { BULK_EMBEDDINGS_BATCHES_SLUG } from '../../src/collections/bulkEmbeddingsBatches.js'
+import { BULK_EMBEDDINGS_INPUT_METADATA_SLUG } from '../../src/collections/bulkEmbeddingInputMetadata.js'
 import {
   BULK_QUEUE_NAMES,
   DEFAULT_DIMS,
@@ -14,11 +16,10 @@ import {
   waitForBulkJobs,
 } from './utils.js'
 import { makeDummyEmbedDocs, makeDummyEmbedQuery, testEmbeddingVersion } from 'helpers/embed.js'
-import type { BulkEmbeddingsFns } from '../../src/types.js'
 
 const DIMS = DEFAULT_DIMS
 
-describe('Bulk embed ingest mode with version/time gating', () => {
+describe('Bulk embed ingest mode with streaming API', () => {
   let payload: Payload
   let config: SanitizedConfig
   const dbNameBase = 'bulk_embed_test'
@@ -51,7 +52,6 @@ describe('Bulk embed ingest mode with version/time gating', () => {
   ) => {
     const dbToUse = options?.dbName || dbName
     const secret = options?.secret || 'test-secret'
-    console.log('building payload with pluginOpts', pluginOpts)
     const built = await buildPayloadWithIntegration({
       dbName: dbToUse,
       pluginOpts,
@@ -75,13 +75,6 @@ describe('Bulk embed ingest mode with version/time gating', () => {
     vi.restoreAllMocks()
   })
 
-  async function createSucceededBaseline({
-    version = testEmbeddingVersion,
-    completedAt = new Date().toISOString(),
-  } = {}) {
-    return createSucceededBaselineRun(payload, { version, completedAt })
-  }
-
   test('no bulk run is queued on init or doc creation (bulk-only mode)', async () => {
     // Verify that no bulk runs are queued automatically on init
     const runsBeforeCreate = await (payload as any).find({
@@ -141,6 +134,41 @@ describe('Bulk embed ingest mode with version/time gating', () => {
     expect(runDoc.status).toBe('succeeded')
   })
 
+  test('bulk run creates batch records', async () => {
+    // Create a post first
+    await payload.create({ collection: 'posts', data: { title: 'Batch Test' } as any })
+
+    // Manually trigger a bulk run
+    const run = await payload.create({
+      collection: BULK_EMBEDDINGS_RUNS_SLUG,
+      data: {
+        pool: 'default',
+        embeddingVersion: testEmbeddingVersion,
+        status: 'queued',
+      },
+    })
+
+    await payload.jobs.queue<'payloadcms-vectorize:prepare-bulk-embedding'>({
+      task: 'payloadcms-vectorize:prepare-bulk-embedding',
+      input: { runId: String(run.id) },
+      req: { payload } as any,
+      ...(BULK_QUEUE_NAMES.prepareBulkEmbedQueueName
+        ? { queue: BULK_QUEUE_NAMES.prepareBulkEmbedQueueName }
+        : {}),
+    })
+
+    await waitForBulkJobs(payload)
+
+    // Verify batch records were created
+    const batches = await payload.find({
+      collection: BULK_EMBEDDINGS_BATCHES_SLUG as any,
+      where: { run: { equals: String(run.id) } },
+    })
+    expect(batches.totalDocs).toBe(1)
+    expect(batches.docs[0]).toHaveProperty('batchIndex', 0)
+    expect(batches.docs[0]).toHaveProperty('status', 'succeeded')
+  })
+
   test('version bump re-embeds all even without updates', async () => {
     await clearAllCollections(payload)
     const baselineOptions = {
@@ -283,98 +311,9 @@ describe('Bulk embed ingest mode with version/time gating', () => {
     expect(runDoc.succeeded).toBe(0)
   })
 
-  /*test('updatedAt gating: updated after last bulk is eligible; before is skipped', async () => {
-    await clearAllCollections(payload)
-    const gatingPayload = await buildPayload()
-    const oldPost = await gatingPayload.create({
-      collection: 'posts',
-      data: { title: 'Old' } as any,
-    })
-    await waitForBulkJobs(gatingPayload) // baseline run
-    const baselineTime = new Date()
-    await createSucceededBaseline({ completedAt: baselineTime.toISOString() })
-
-    const newPost = await gatingPayload.create({
-      collection: 'posts',
-      data: { title: 'New' } as any,
-    })
-    await gatingPayload.update({
-      collection: 'posts',
-      id: newPost.id,
-      data: { title: 'New Updated' } as any,
-    })
-
-    await waitForBulkJobs(gatingPayload)
-
-    const embedsOld = await gatingPayload.find({
-      collection: 'default',
-      where: { docId: { equals: String(oldPost.id) } },
-    })
-    const embedsNew = await gatingPayload.find({
-      collection: 'default',
-      where: { docId: { equals: String(newPost.id) } },
-    })
-    expect(embedsOld.totalDocs).toBe(1)
-    expect(embedsNew.totalDocs).toBe(1)
-  })
-
-  test('missing embedding for current version is eligible even if not updated', async () => {
-    const post = await payload.create({ collection: 'posts', data: { title: 'Missing' } as any })
-    await createSucceededBaseline()
-    await payload.delete({
-      collection: 'default',
-      where: { docId: { equals: String(post.id) } },
-    })
-    await waitForBulkJobs(payload)
-    const embeds = await payload.find({
-      collection: 'default',
-      where: { docId: { equals: String(post.id) } },
-    })
-    expect(embeds.totalDocs).toBe(1)
-  })
-
-  test('stale replacement happens at completion, not prepare', async () => {
-    const post = await payload.create({ collection: 'posts', data: { title: 'Stale' } as any })
-    await waitForBulkJobs(payload)
-    await payload.update({
-      collection: 'posts',
-      id: post.id,
-      data: { title: 'Fresh Title' } as any,
-    })
-    const embedsBefore = await payload.find({
-      collection: 'default',
-      where: { docId: { equals: String(post.id) } },
-    })
-    expect(embedsBefore.totalDocs).toBeGreaterThan(0)
-    await waitForBulkJobs(payload)
-    const embedsAfter = await payload.find({
-      collection: 'default',
-      where: { docId: { equals: String(post.id) } },
-    })
-    expect(embedsAfter.docs[0]?.chunkText).toContain('Fresh Title')
-  })
-
-  test('partial outputs only delete/replace succeeded doc IDs', async () => {
+  test('polling requeues when non-terminal then succeeds', async () => {
     await clearAllCollections(payload)
-    const partialPayload = await buildPayload({
-      ...basePluginOptions,
-      knowledgePools: {
-        default: {
-          ...basePluginOptions.knowledgePools.default,
-          embeddingConfig: {
-            ...basePluginOptions.knowledgePools.default.embeddingConfig,
-            bulkEmbeddingsFns: createMockBulkEmbeddings({
-              statusSequence: ['succeeded'],
-              partialFailure: { failIds: [] },
-            }),
-          },
-        },
-      },
-    })
-    const post1 = await partialPayload.create({ collection: 'posts', data: { title: 'P1' } as any })
-    const post2 = await partialPayload.create({ collection: 'posts', data: { title: 'P2' } as any })
-    // Rebuild with failIds after IDs are known
-    const partialPayloadWithFails = await buildPayload({
+    const loopPayload = await buildPayload({
       ...basePluginOptions,
       knowledgePools: {
         default: {
@@ -382,60 +321,38 @@ describe('Bulk embed ingest mode with version/time gating', () => {
           embeddingConfig: {
             ...basePluginOptions.knowledgePools.default.embeddingConfig,
             bulkEmbeddingsFns: createMockBulkEmbeddings({
-              statusSequence: ['succeeded'],
-              partialFailure: { failIds: [`posts:${post2.id}:0`] },
+              statusSequence: ['running', 'succeeded'],
             }),
           },
         },
       },
     })
-    await waitForBulkJobs(partialPayloadWithFails)
 
-    const embedsP1 = await partialPayloadWithFails.find({
-      collection: 'default',
-      where: { docId: { equals: String(post1.id) } },
-    })
-    const embedsP2 = await partialPayloadWithFails.find({
-      collection: 'default',
-      where: { docId: { equals: String(post2.id) } },
-    })
-    expect(embedsP1.totalDocs).toBe(1)
-    expect(embedsP2.totalDocs).toBe(0)
-  })
+    const post = await loopPayload.create({ collection: 'posts', data: { title: 'Loop' } as any })
 
-  test('polling requeues when non-terminal then succeeds', async () => {
-    await clearAllCollections(payload)
-    const loopPayload = await buildPayload({
-      ...basePluginOptions,
-      knowledgePools: {
-        default: {
-          ...basePluginOptions.knowledgePools.default,
-          embeddingConfig: {
-            ...basePluginOptions.knowledgePools.default.embeddingConfig,
-            bulkEmbeddingsFns: createMockBulkEmbeddings({ statusSequence: ['running', 'succeeded'] }),
-          },
-        },
-      },
+    // Manually trigger bulk run
+    const run = await loopPayload.create({
+      collection: BULK_EMBEDDINGS_RUNS_SLUG,
+      data: { pool: 'default', embeddingVersion: testEmbeddingVersion, status: 'queued' },
     })
+
     const queueSpy = vi.spyOn(loopPayload.jobs, 'queue')
-    const opts = {
-      ...basePluginOptions,
-      knowledgePools: {
-        default: {
-          ...basePluginOptions.knowledgePools.default,
-          embeddingConfig: {
-            ...basePluginOptions.knowledgePools.default.embeddingConfig,
-            bulkEmbeddingsFns: createMockBulkEmbeddings({ statusSequence: ['running', 'succeeded'] }),
-          },
-        },
-      },
-    }
-    const post = await loopPayload.create({ collection: 'posts', data: { title: 'Loop' } as any })
+
+    await loopPayload.jobs.queue<'payloadcms-vectorize:prepare-bulk-embedding'>({
+      task: 'payloadcms-vectorize:prepare-bulk-embedding',
+      input: { runId: String(run.id) },
+      req: { payload: loopPayload } as any,
+      ...(BULK_QUEUE_NAMES.prepareBulkEmbedQueueName
+        ? { queue: BULK_QUEUE_NAMES.prepareBulkEmbedQueueName }
+        : {}),
+    })
+
     await waitForBulkJobs(loopPayload)
+
     expect(queueSpy).toHaveBeenCalledWith(
       expect.objectContaining({ task: 'payloadcms-vectorize:poll-or-complete-bulk-embedding' }),
     )
-    await waitForBulkJobs(loopPayload)
+
     const embeds = await loopPayload.find({
       collection: 'default',
       where: { docId: { equals: String(post.id) } },
@@ -443,7 +360,7 @@ describe('Bulk embed ingest mode with version/time gating', () => {
     expect(embeds.totalDocs).toBe(1)
   })
 
-  test('failed polling stops and does not complete embeddings', async () => {
+  test('failed batch marks entire run as failed', async () => {
     await clearAllCollections(payload)
     const failedPayload = await buildPayload({
       ...basePluginOptions,
@@ -457,8 +374,36 @@ describe('Bulk embed ingest mode with version/time gating', () => {
         },
       },
     })
+
     const post = await failedPayload.create({ collection: 'posts', data: { title: 'Fail' } as any })
+
+    // Manually trigger bulk run
+    const run = await failedPayload.create({
+      collection: BULK_EMBEDDINGS_RUNS_SLUG,
+      data: { pool: 'default', embeddingVersion: testEmbeddingVersion, status: 'queued' },
+    })
+
+    await failedPayload.jobs.queue<'payloadcms-vectorize:prepare-bulk-embedding'>({
+      task: 'payloadcms-vectorize:prepare-bulk-embedding',
+      input: { runId: String(run.id) },
+      req: { payload: failedPayload } as any,
+      ...(BULK_QUEUE_NAMES.prepareBulkEmbedQueueName
+        ? { queue: BULK_QUEUE_NAMES.prepareBulkEmbedQueueName }
+        : {}),
+    })
+
     await waitForBulkJobs(failedPayload)
+
+    // Verify run is marked as failed
+    const runDoc = (
+      await (failedPayload as any).find({
+        collection: BULK_EMBEDDINGS_RUNS_SLUG,
+        where: { id: { equals: String(run.id) } },
+      })
+    ).docs[0]
+    expect(runDoc.status).toBe('failed')
+
+    // Verify no embeddings were written
     const embeds = await failedPayload.find({
       collection: 'default',
       where: { docId: { equals: String(post.id) } },
@@ -466,7 +411,7 @@ describe('Bulk embed ingest mode with version/time gating', () => {
     expect(embeds.totalDocs).toBe(0)
   })
 
-  test('canceled polling stops', async () => {
+  test('canceled batch marks entire run as failed', async () => {
     await clearAllCollections(payload)
     const canceledPayload = await buildPayload({
       ...basePluginOptions,
@@ -480,11 +425,29 @@ describe('Bulk embed ingest mode with version/time gating', () => {
         },
       },
     })
+
     const post = await canceledPayload.create({
       collection: 'posts',
       data: { title: 'Cancel' } as any,
     })
+
+    // Manually trigger bulk run
+    const run = await canceledPayload.create({
+      collection: BULK_EMBEDDINGS_RUNS_SLUG,
+      data: { pool: 'default', embeddingVersion: testEmbeddingVersion, status: 'queued' },
+    })
+
+    await canceledPayload.jobs.queue<'payloadcms-vectorize:prepare-bulk-embedding'>({
+      task: 'payloadcms-vectorize:prepare-bulk-embedding',
+      input: { runId: String(run.id) },
+      req: { payload: canceledPayload } as any,
+      ...(BULK_QUEUE_NAMES.prepareBulkEmbedQueueName
+        ? { queue: BULK_QUEUE_NAMES.prepareBulkEmbedQueueName }
+        : {}),
+    })
+
     await waitForBulkJobs(canceledPayload)
+
     const embeds = await canceledPayload.find({
       collection: 'default',
       where: { docId: { equals: String(post.id) } },
@@ -492,45 +455,96 @@ describe('Bulk embed ingest mode with version/time gating', () => {
     expect(embeds.totalDocs).toBe(0)
   })
 
-  test('stores metadata records for inputs before provider submit', async () => {
+  test('metadata table is cleaned after successful completion', async () => {
     await clearAllCollections(payload)
-    const metaPayload = await buildPayload({
+    const cleanPayload = await buildPayload({
       ...basePluginOptions,
       knowledgePools: {
         default: {
           ...basePluginOptions.knowledgePools.default,
-          extensionFields: [{ name: 'category', type: 'text' }],
           collections: {
             posts: {
-              toKnowledgePool: async (doc: any) => [{ chunk: doc.title, category: 'tech' }],
+              toKnowledgePool: async (doc: any) => [{ chunk: doc.title }],
             },
           },
         },
       },
     })
-    const createSpy = vi.spyOn(metaPayload, 'create')
-    await metaPayload.create({ collection: 'posts', data: { title: 'Meta' } as any })
-    await waitForBulkJobs(metaPayload)
-    expect(
-      createSpy.mock.calls.some(
-        (call) =>
-          call[0]?.collection === BULK_EMBEDDINGS_INPUT_METADATA_SLUG && call[0]?.data?.inputId,
-      ),
-    ).toBe(true)
-    createSpy.mockRestore()
+
+    await cleanPayload.create({ collection: 'posts', data: { title: 'Cleanup' } as any })
+
+    // Manually trigger bulk run
+    const run = await cleanPayload.create({
+      collection: BULK_EMBEDDINGS_RUNS_SLUG,
+      data: { pool: 'default', embeddingVersion: testEmbeddingVersion, status: 'queued' },
+    })
+
+    await cleanPayload.jobs.queue<'payloadcms-vectorize:prepare-bulk-embedding'>({
+      task: 'payloadcms-vectorize:prepare-bulk-embedding',
+      input: { runId: String(run.id) },
+      req: { payload: cleanPayload } as any,
+      ...(BULK_QUEUE_NAMES.prepareBulkEmbedQueueName
+        ? { queue: BULK_QUEUE_NAMES.prepareBulkEmbedQueueName }
+        : {}),
+    })
+
+    await waitForBulkJobs(cleanPayload)
+
+    const metadata = await cleanPayload.find({
+      collection: BULK_EMBEDDINGS_INPUT_METADATA_SLUG,
+      where: { run: { exists: true } },
+    })
+    expect(metadata.totalDocs).toBe(0)
   })
 
-  test('extension fields are merged when writing embeddings from metadata table', async () => {
+  test('metadata table is cleaned after failed run (no partial writes)', async () => {
     await clearAllCollections(payload)
-    const metaPayload = await buildPayload({
+    const failPayload = await buildPayload({
       ...basePluginOptions,
       knowledgePools: {
         default: {
           ...basePluginOptions.knowledgePools.default,
-          extensionFields: [
-            { name: 'category', type: 'text' },
-            { name: 'priority', type: 'number' },
-          ],
+          embeddingConfig: {
+            ...basePluginOptions.knowledgePools.default.embeddingConfig,
+            bulkEmbeddingsFns: createMockBulkEmbeddings({ statusSequence: ['failed'] }),
+          },
+        },
+      },
+    })
+
+    await failPayload.create({ collection: 'posts', data: { title: 'FailCleanup' } as any })
+
+    // Manually trigger bulk run
+    const run = await failPayload.create({
+      collection: BULK_EMBEDDINGS_RUNS_SLUG,
+      data: { pool: 'default', embeddingVersion: testEmbeddingVersion, status: 'queued' },
+    })
+
+    await failPayload.jobs.queue<'payloadcms-vectorize:prepare-bulk-embedding'>({
+      task: 'payloadcms-vectorize:prepare-bulk-embedding',
+      input: { runId: String(run.id) },
+      req: { payload: failPayload } as any,
+      ...(BULK_QUEUE_NAMES.prepareBulkEmbedQueueName
+        ? { queue: BULK_QUEUE_NAMES.prepareBulkEmbedQueueName }
+        : {}),
+    })
+
+    await waitForBulkJobs(failPayload)
+
+    // Verify metadata is cleaned up even on failure
+    const metadata = await failPayload.find({
+      collection: BULK_EMBEDDINGS_INPUT_METADATA_SLUG,
+      where: { run: { exists: true } },
+    })
+    expect(metadata.totalDocs).toBe(0)
+  })
+
+  test('extension fields are merged when writing embeddings', async () => {
+    await clearAllCollections(payload)
+    const extPayload = await buildPayload({
+      ...basePluginOptions,
+      knowledgePools: {
+        default: {
           collections: {
             posts: {
               toKnowledgePool: async (doc: any) => [
@@ -538,15 +552,40 @@ describe('Bulk embed ingest mode with version/time gating', () => {
               ],
             },
           },
+          extensionFields: [
+            { name: 'category', type: 'text' },
+            { name: 'priority', type: 'number' },
+          ],
+          embeddingConfig: {
+            ...basePluginOptions.knowledgePools.default.embeddingConfig,
+          },
         },
       },
-    })
-    const post = await metaPayload.create({
+    } as any)
+
+    const post = await extPayload.create({
       collection: 'posts',
       data: { title: 'Ext merge' } as any,
     })
-    await waitForBulkJobs(metaPayload)
-    const embeds = await metaPayload.find({
+
+    // Manually trigger bulk run
+    const run = await extPayload.create({
+      collection: BULK_EMBEDDINGS_RUNS_SLUG,
+      data: { pool: 'default', embeddingVersion: testEmbeddingVersion, status: 'queued' },
+    })
+
+    await extPayload.jobs.queue<'payloadcms-vectorize:prepare-bulk-embedding'>({
+      task: 'payloadcms-vectorize:prepare-bulk-embedding',
+      input: { runId: String(run.id) },
+      req: { payload: extPayload } as any,
+      ...(BULK_QUEUE_NAMES.prepareBulkEmbedQueueName
+        ? { queue: BULK_QUEUE_NAMES.prepareBulkEmbedQueueName }
+        : {}),
+    })
+
+    await waitForBulkJobs(extPayload)
+
+    const embeds = await extPayload.find({
       collection: 'default',
       where: { docId: { equals: String(post.id) } },
     })
@@ -561,11 +600,6 @@ describe('Bulk embed ingest mode with version/time gating', () => {
       ...basePluginOptions,
       knowledgePools: {
         default: {
-          ...basePluginOptions.knowledgePools.default,
-          extensionFields: [
-            { name: 'category', type: 'text' },
-            { name: 'priority', type: 'number' },
-          ],
           collections: {
             posts: {
               toKnowledgePool: async () => [
@@ -574,14 +608,39 @@ describe('Bulk embed ingest mode with version/time gating', () => {
               ],
             },
           },
+          extensionFields: [
+            { name: 'category', type: 'text' },
+            { name: 'priority', type: 'number' },
+          ],
+          embeddingConfig: {
+            ...basePluginOptions.knowledgePools.default.embeddingConfig,
+          },
         },
       },
-    })
+    } as any)
+
     const post = await multiPayload.create({
       collection: 'posts',
       data: { title: 'Two' } as any,
     })
+
+    // Manually trigger bulk run
+    const run = await multiPayload.create({
+      collection: BULK_EMBEDDINGS_RUNS_SLUG,
+      data: { pool: 'default', embeddingVersion: testEmbeddingVersion, status: 'queued' },
+    })
+
+    await multiPayload.jobs.queue<'payloadcms-vectorize:prepare-bulk-embedding'>({
+      task: 'payloadcms-vectorize:prepare-bulk-embedding',
+      input: { runId: String(run.id) },
+      req: { payload: multiPayload } as any,
+      ...(BULK_QUEUE_NAMES.prepareBulkEmbedQueueName
+        ? { queue: BULK_QUEUE_NAMES.prepareBulkEmbedQueueName }
+        : {}),
+    })
+
     await waitForBulkJobs(multiPayload)
+
     const embeds = await multiPayload.find({
       collection: 'default',
       where: { docId: { equals: String(post.id) } },
@@ -592,28 +651,74 @@ describe('Bulk embed ingest mode with version/time gating', () => {
     expect(embeds.docs[1]).toMatchObject({ category: 'b', priority: 2, chunkIndex: 1 })
   })
 
-  test('metadata table is cleaned after successful completion', async () => {
+  test('multiple batches are created when flushing after N chunks', async () => {
     await clearAllCollections(payload)
-    const cleanPayload = await buildPayload({
+
+    // Create mock that flushes after 2 chunks
+    const smallBatchPayload = await buildPayload({
       ...basePluginOptions,
       knowledgePools: {
         default: {
           ...basePluginOptions.knowledgePools.default,
-          collections: {
-            posts: {
-              toKnowledgePool: async (doc: any) => [{ chunk: doc.title }],
-            },
+          embeddingConfig: {
+            ...basePluginOptions.knowledgePools.default.embeddingConfig,
+            bulkEmbeddingsFns: createMockBulkEmbeddings({
+              statusSequence: ['succeeded'],
+              flushAfterChunks: 2, // Flush after 2 chunks
+            }),
           },
         },
       },
     })
-    await cleanPayload.create({ collection: 'posts', data: { title: 'Cleanup' } as any })
-    await waitForBulkJobs(cleanPayload)
-    const metadata = await cleanPayload.find({
-      collection: BULK_EMBEDDINGS_INPUT_METADATA_SLUG,
-      where: { run: { exists: true } },
+
+    // Create 5 posts (should result in 3 batches: 2, 2, 1)
+    for (let i = 0; i < 5; i++) {
+      await smallBatchPayload.create({ collection: 'posts', data: { title: `Post ${i}` } as any })
+    }
+
+    // Manually trigger bulk run
+    const run = await smallBatchPayload.create({
+      collection: BULK_EMBEDDINGS_RUNS_SLUG,
+      data: { pool: 'default', embeddingVersion: testEmbeddingVersion, status: 'queued' },
     })
-    expect(metadata.totalDocs).toBe(0)
+
+    await smallBatchPayload.jobs.queue<'payloadcms-vectorize:prepare-bulk-embedding'>({
+      task: 'payloadcms-vectorize:prepare-bulk-embedding',
+      input: { runId: String(run.id) },
+      req: { payload: smallBatchPayload } as any,
+      ...(BULK_QUEUE_NAMES.prepareBulkEmbedQueueName
+        ? { queue: BULK_QUEUE_NAMES.prepareBulkEmbedQueueName }
+        : {}),
+    })
+
+    await waitForBulkJobs(smallBatchPayload, 15000)
+
+    // Verify multiple batches were created
+    const batches = await smallBatchPayload.find({
+      collection: BULK_EMBEDDINGS_BATCHES_SLUG as any,
+      where: { run: { equals: String(run.id) } },
+      sort: 'batchIndex',
+    })
+    expect(batches.totalDocs).toBe(3)
+    expect(batches.docs[0]).toHaveProperty('batchIndex', 0)
+    expect(batches.docs[1]).toHaveProperty('batchIndex', 1)
+    expect(batches.docs[2]).toHaveProperty('batchIndex', 2)
+
+    // Verify all embeddings were written
+    const embeds = await smallBatchPayload.find({
+      collection: 'default',
+    })
+    expect(embeds.totalDocs).toBe(5)
+
+    // Verify run has correct totalBatches
+    const runDoc = (
+      await (smallBatchPayload as any).find({
+        collection: BULK_EMBEDDINGS_RUNS_SLUG,
+        where: { id: { equals: String(run.id) } },
+      })
+    ).docs[0]
+    expect(runDoc.totalBatches).toBe(3)
+    expect(runDoc.status).toBe('succeeded')
   })
 
   test('realtime mode queues vectorize jobs when realTimeIngestionFn is provided', async () => {
@@ -630,19 +735,7 @@ describe('Bulk embed ingest mode with version/time gating', () => {
             queryFn: makeDummyEmbedQuery(DIMS),
             realTimeIngestionFn: makeDummyEmbedDocs(DIMS),
             // Also provide bulk for testing, but realTimeIngestionFn should take precedence
-            bulkEmbeddingsFns: {
-              prepareBulkEmbeddings: async () => ({
-                providerBatchId: 'noop',
-                status: 'succeeded' as const,
-                counts: { inputs: 0, succeeded: 0, failed: 0 },
-              }),
-              pollBulkEmbeddings: async () => ({ status: 'succeeded' as const }),
-              completeBulkEmbeddings: async () => ({
-                status: 'succeeded' as const,
-                outputs: [],
-                counts: { inputs: 0 },
-              }),
-            },
+            bulkEmbeddingsFns: createMockBulkEmbeddings({ statusSequence: ['succeeded'] }),
           },
         },
       },
@@ -670,5 +763,5 @@ describe('Bulk embed ingest mode with version/time gating', () => {
       where: { docId: { equals: String(post.id) } },
     })
     expect(embeds.totalDocs).toBeGreaterThan(0)
-  })*/
+  })
 })
diff --git a/dev/specs/utils.ts b/dev/specs/utils.ts
index f9e541c..de3b2c6 100644
--- a/dev/specs/utils.ts
+++ b/dev/specs/utils.ts
@@ -7,6 +7,7 @@ import { lexicalEditor } from '@payloadcms/richtext-lexical'
 import { createVectorizeIntegration } from 'payloadcms-vectorize'
 import { BULK_EMBEDDINGS_RUNS_SLUG } from '../../src/collections/bulkEmbeddingsRuns.js'
 import { BULK_EMBEDDINGS_INPUT_METADATA_SLUG } from '../../src/collections/bulkEmbeddingInputMetadata.js'
+import { BULK_EMBEDDINGS_BATCHES_SLUG } from '../../src/collections/bulkEmbeddingsBatches.js'
 import { makeDummyEmbedDocs } from 'helpers/embed.js'
 import type {
   BulkEmbeddingsFns,
@@ -76,55 +77,103 @@ export const BULK_QUEUE_NAMES = {
 type MockOptions = {
   statusSequence: BulkEmbeddingRunStatus[]
   partialFailure?: { failIds: string[] }
+  /** Optional: flush after this many chunks (for testing multi-batch scenarios) */
+  flushAfterChunks?: number
 }
 
+/**
+ * Creates a mock BulkEmbeddingsFns for testing the new addChunk API.
+ * User controls batching - we simulate by optionally flushing after N chunks.
+ */
 export function createMockBulkEmbeddings(
   options: MockOptions,
   dims: number = DEFAULT_DIMS,
 ): BulkEmbeddingsFns {
-  const { statusSequence, partialFailure } = options
-  let callCount = 0
-  let lastInputs: BulkEmbeddingInput[] = []
+  const { statusSequence, partialFailure, flushAfterChunks } = options
+  // Accumulated chunks for current batch
+  let accumulatedChunks: BulkEmbeddingInput[] = []
+  let batchIndex = 0
+
+  // Track inputs per batch (keyed by providerBatchId)
+  const batchInputs = new Map<string, BulkEmbeddingInput[]>()
+  // Track poll call count per batch for status sequence
+  const batchPollCount = new Map<string, number>()
   const embeddings = makeDummyEmbedDocs(dims)
 
   return {
-    prepareBulkEmbeddings: async ({ inputs }) => {
-      lastInputs = inputs
-      return {
-        providerBatchId: `mock-${Date.now()}`,
-        status: 'queued',
-        counts: { inputs: inputs.length },
+    addChunk: async ({ chunk, isLastChunk }) => {
+      // Check if we should flush before adding this chunk
+      if (
+        flushAfterChunks &&
+        accumulatedChunks.length >= flushAfterChunks &&
+        accumulatedChunks.length > 0
+      ) {
+        // Submit what we have (without this chunk)
+        const toSubmit = [...accumulatedChunks]
+        accumulatedChunks = [chunk]
+        const providerBatchId = `mock-batch-${batchIndex}-${Date.now()}`
+        batchInputs.set(providerBatchId, toSubmit)
+        batchPollCount.set(providerBatchId, 0)
+        batchIndex++
+        return {
+          providerBatchId,
+          inputFileRef: `mock-file-${batchIndex - 1}`,
+          submittedChunks: toSubmit,
+        }
+      }
+
+      // Add chunk to accumulator
+      accumulatedChunks.push(chunk)
+
+      // If this is the last chunk, flush everything
+      if (isLastChunk && accumulatedChunks.length > 0) {
+        const toSubmit = [...accumulatedChunks]
+        accumulatedChunks = []
+        const providerBatchId = `mock-batch-${batchIndex}-${Date.now()}`
+        batchInputs.set(providerBatchId, toSubmit)
+        batchPollCount.set(providerBatchId, 0)
+        batchIndex++
+        return {
+          providerBatchId,
+          inputFileRef: `mock-file-${batchIndex - 1}`,
+          submittedChunks: toSubmit,
+        }
       }
+
+      return null
     },
-    pollBulkEmbeddings: async () => {
-      const status = statusSequence[Math.min(callCount++, statusSequence.length - 1)]
+
+    pollBatch: async ({ providerBatchId }) => {
+      const callCount = batchPollCount.get(providerBatchId) ?? 0
+      batchPollCount.set(providerBatchId, callCount + 1)
+      const status = statusSequence[Math.min(callCount, statusSequence.length - 1)]
+      const inputs = batchInputs.get(providerBatchId) ?? []
       const counts =
         status === 'succeeded'
-          ? { inputs: lastInputs.length, succeeded: lastInputs.length, failed: 0 }
+          ? { inputs: inputs.length, succeeded: inputs.length, failed: 0 }
           : undefined
       return {
         status,
         counts,
       }
     },
-    completeBulkEmbeddings: async () => {
-      if (!lastInputs.length) {
-        return { status: 'succeeded', outputs: [], counts: { inputs: 0, succeeded: 0, failed: 0 } }
+
+    completeBatch: async ({ providerBatchId }) => {
+      const inputs = batchInputs.get(providerBatchId) ?? []
+      if (!inputs.length) {
+        return []
       }
-      const vectors = await embeddings(lastInputs.map((i) => i.text))
-      const outputs = lastInputs.map((input, idx) => {
+      const vectors = await embeddings(inputs.map((i) => i.text))
+      const outputs = inputs.map((input, idx) => {
         const shouldFail = partialFailure?.failIds?.includes(input.id)
         return shouldFail
           ? { id: input.id, error: 'fail' }
           : { id: input.id, embedding: vectors[idx] }
       })
-      const succeeded = outputs.filter((o) => (o as any).embedding).length
-      const failed = outputs.length - succeeded
-      return {
-        status: 'succeeded',
-        outputs,
-        counts: { inputs: outputs.length, succeeded, failed },
-      }
+      // Clean up state
+      batchInputs.delete(providerBatchId)
+      batchPollCount.delete(providerBatchId)
+      return outputs
     },
   }
 }
@@ -207,6 +256,7 @@ export const clearAllCollections = async (pl: Payload) => {
   }
 
   await safeDelete(BULK_EMBEDDINGS_RUNS_SLUG)
+  await safeDelete(BULK_EMBEDDINGS_BATCHES_SLUG)
   await safeDelete(BULK_EMBEDDINGS_INPUT_METADATA_SLUG)
   await safeDelete('default')
   await safeDelete('posts')
diff --git a/src/collections/bulkEmbeddingsBatches.ts b/src/collections/bulkEmbeddingsBatches.ts
new file mode 100644
index 0000000..373ce87
--- /dev/null
+++ b/src/collections/bulkEmbeddingsBatches.ts
@@ -0,0 +1,129 @@
+import type { CollectionConfig } from 'payload'
+import type { BulkEmbeddingRunStatus } from '../types.js'
+
+export const BULK_EMBEDDINGS_BATCHES_SLUG = 'vector-bulk-embeddings-batches'
+
+const statusOptions: BulkEmbeddingRunStatus[] = [
+  'queued',
+  'running',
+  'succeeded',
+  'failed',
+  'canceled',
+]
+
+/**
+ * Collection for tracking individual batches within a bulk embedding run.
+ * A run can have multiple batches when the input count exceeds the provider's file limit.
+ */
+export const createBulkEmbeddingsBatchesCollection = (): CollectionConfig => ({
+  slug: BULK_EMBEDDINGS_BATCHES_SLUG,
+  admin: {
+    useAsTitle: 'providerBatchId',
+    description:
+      'Individual batches within a bulk embedding run. Created when input count exceeds file limits.',
+    defaultColumns: ['run', 'batchIndex', 'status', 'inputCount', 'succeededCount', 'failedCount'],
+  },
+  access: {
+    // Anyone can read; only internal (local API) can mutate.
+    read: () => true,
+    create: ({ req }) => req?.payloadAPI === 'local',
+    update: ({ req }) => req?.payloadAPI === 'local',
+    delete: ({ req }) => req?.payloadAPI === 'local',
+  },
+  fields: [
+    {
+      name: 'run',
+      type: 'relationship',
+      relationTo: 'vector-bulk-embeddings-runs',
+      required: true,
+      admin: {
+        description: 'Parent bulk embedding run',
+      },
+    },
+    {
+      name: 'batchIndex',
+      type: 'number',
+      required: true,
+      admin: {
+        description: 'Zero-based index of this batch within the run',
+      },
+    },
+    {
+      name: 'providerBatchId',
+      type: 'text',
+      required: true,
+      admin: {
+        description: 'Provider-specific batch identifier',
+      },
+    },
+    {
+      name: 'inputFileRef',
+      type: 'text',
+      admin: {
+        description: 'Provider file reference for the input file',
+      },
+    },
+    {
+      name: 'status',
+      type: 'select',
+      options: statusOptions.map((value) => ({ value, label: value })),
+      required: true,
+      defaultValue: 'queued',
+    },
+    {
+      name: 'inputCount',
+      type: 'number',
+      required: true,
+      defaultValue: 0,
+      admin: {
+        description: 'Number of inputs in this batch',
+      },
+    },
+    {
+      name: 'succeededCount',
+      type: 'number',
+      defaultValue: 0,
+      admin: {
+        description: 'Number of successful embeddings',
+      },
+    },
+    {
+      name: 'failedCount',
+      type: 'number',
+      defaultValue: 0,
+      admin: {
+        description: 'Number of failed embeddings',
+      },
+    },
+    {
+      name: 'submittedAt',
+      type: 'date',
+      admin: { description: 'Timestamp when the batch was submitted to provider' },
+    },
+    {
+      name: 'completedAt',
+      type: 'date',
+      admin: { description: 'Timestamp when the batch finished' },
+    },
+    {
+      name: 'error',
+      type: 'textarea',
+      admin: {
+        description: 'Error message if the batch failed',
+      },
+    },
+  ],
+  timestamps: true,
+  indexes: [
+    {
+      fields: ['run'],
+    },
+    {
+      fields: ['providerBatchId'],
+    },
+    {
+      fields: ['status'],
+    },
+  ],
+})
+
diff --git a/src/collections/bulkEmbeddingsRuns.ts b/src/collections/bulkEmbeddingsRuns.ts
index 677b9f0..c2a6757 100644
--- a/src/collections/bulkEmbeddingsRuns.ts
+++ b/src/collections/bulkEmbeddingsRuns.ts
@@ -43,20 +43,6 @@ export const createBulkEmbeddingsRunsCollection = (): CollectionConfig => ({
         description: 'Embedding version at submission time',
       },
     },
-    {
-      name: 'inputFileRef',
-      type: 'text',
-      admin: {
-        description: 'Provider file or input reference used for the batch',
-      },
-    },
-    {
-      name: 'providerBatchId',
-      type: 'text',
-      admin: {
-        description: 'Provider batch identifier',
-      },
-    },
     {
       name: 'status',
       type: 'select',
@@ -64,6 +50,14 @@ export const createBulkEmbeddingsRunsCollection = (): CollectionConfig => ({
       required: true,
       defaultValue: 'queued',
     },
+    {
+      name: 'totalBatches',
+      type: 'number',
+      defaultValue: 0,
+      admin: {
+        description: 'Total number of batches in this run',
+      },
+    },
     {
       name: 'inputs',
       type: 'number',
@@ -102,9 +96,6 @@ export const createBulkEmbeddingsRunsCollection = (): CollectionConfig => ({
     {
       fields: ['pool'],
     },
-    {
-      fields: ['providerBatchId'],
-    },
     {
       fields: ['status'],
     },
diff --git a/src/index.ts b/src/index.ts
index dbce98b..b47adc3 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -22,6 +22,10 @@ import {
   BULK_EMBEDDINGS_INPUT_METADATA_SLUG,
   createBulkEmbeddingInputMetadataCollection,
 } from './collections/bulkEmbeddingInputMetadata.js'
+import {
+  createBulkEmbeddingsBatchesCollection,
+  BULK_EMBEDDINGS_BATCHES_SLUG,
+} from './collections/bulkEmbeddingsBatches.js'
 import {
   createPrepareBulkEmbeddingTask,
   createPollOrCompleteBulkEmbeddingTask,
@@ -142,6 +146,11 @@ export const createVectorizeIntegration = <TPoolNames extends KnowledgePoolName>
       if (!config.collections.find((c) => c.slug === BULK_EMBEDDINGS_INPUT_METADATA_SLUG)) {
         config.collections.push(bulkInputMetadataCollection)
       }
+      // Ensure bulk batches collection exists once
+      const bulkBatchesCollection = createBulkEmbeddingsBatchesCollection()
+      if (!config.collections.find((c) => c.slug === BULK_EMBEDDINGS_BATCHES_SLUG)) {
+        config.collections.push(bulkBatchesCollection)
+      }
 
       // Validate static/dynamic configs share the same pool names
       for (const poolName in pluginOptions.knowledgePools) {
diff --git a/src/tasks/bulkEmbedAll.ts b/src/tasks/bulkEmbedAll.ts
index 65abc64..d9cdce5 100644
--- a/src/tasks/bulkEmbedAll.ts
+++ b/src/tasks/bulkEmbedAll.ts
@@ -1,14 +1,15 @@
 import { Payload, TaskConfig, TaskHandlerResult } from 'payload'
 import {
-  BulkEmbeddingInput,
-  BulkEmbeddingsFns,
+  BatchSubmission,
+  BulkEmbeddingOutput,
   CollectedEmbeddingInput,
   KnowledgePoolDynamicConfig,
   KnowledgePoolName,
 } from '../types.js'
 import { BULK_EMBEDDINGS_RUNS_SLUG } from '../collections/bulkEmbeddingsRuns.js'
 import { BULK_EMBEDDINGS_INPUT_METADATA_SLUG } from '../collections/bulkEmbeddingInputMetadata.js'
-import { isPostgresPayload, PostgresPayload } from '../types.js'
+import { BULK_EMBEDDINGS_BATCHES_SLUG } from '../collections/bulkEmbeddingsBatches.js'
+import { isPostgresPayload, PostgresPayload, BulkEmbeddingInput } from '../types.js'
 
 type PrepareBulkEmbeddingTaskInput = {
   runId: string
@@ -17,6 +18,7 @@ type PrepareBulkEmbeddingTaskInput = {
 type PrepareBulkEmbeddingTaskOutput = {
   runId: string
   status: string
+  batchCount?: number
 }
 
 type PrepareBulkEmbeddingTaskInputOutput = {
@@ -98,6 +100,7 @@ export const createPrepareBulkEmbeddingTask = ({
       const callbacks = dynamicConfig.embeddingConfig.bulkEmbeddingsFns!
       const embeddingVersion = dynamicConfig.embeddingConfig.version
 
+      // Find baseline run information
       const latestSucceededRun = await payload.find({
         collection: BULK_EMBEDDINGS_RUNS_SLUG,
         where: {
@@ -114,82 +117,51 @@ export const createPrepareBulkEmbeddingTask = ({
       const baselineRun = (latestSucceededRun as any)?.docs?.[0]
       const baselineVersion: string | undefined = baselineRun?.embeddingVersion
       const lastBulkCompletedAt: string | undefined = baselineRun?.completedAt
-      const currentEmbeddingVersion = embeddingVersion
-      const versionMismatch =
-        baselineVersion !== undefined && baselineVersion !== currentEmbeddingVersion
+      const versionMismatch = baselineVersion !== undefined && baselineVersion !== embeddingVersion
 
-      const inputsWithMetadata = await collectMissingEmbeddings({
+      // Stream missing embeddings and create batches
+      const result = await streamAndBatchMissingEmbeddings({
         payload,
+        runId: input.runId,
         poolName,
         dynamicConfig,
-        embeddingVersion: currentEmbeddingVersion,
+        embeddingVersion,
         lastBulkCompletedAt,
         versionMismatch,
         hasBaseline: Boolean(baselineRun),
+        addChunk: callbacks.addChunk,
       })
 
-      const inputsCount = inputsWithMetadata.length
-      if (inputsCount === 0) {
+      if (result.totalInputs === 0) {
+        // No inputs to process - mark run as succeeded
         await payload.update({
           id: input.runId,
           collection: BULK_EMBEDDINGS_RUNS_SLUG,
           data: {
             status: 'succeeded',
+            totalBatches: 0,
             inputs: 0,
             succeeded: 0,
             failed: 0,
             completedAt: new Date().toISOString(),
           },
         })
-        return { output: { runId: input.runId, status: 'succeeded' } }
+        return { output: { runId: input.runId, status: 'succeeded', batchCount: 0 } }
       }
 
-      // Persist metadata for this run so we can rebuild embeddings later
-      await Promise.all(
-        inputsWithMetadata.map((inputWithMeta) =>
-          payload.create({
-            collection: BULK_EMBEDDINGS_INPUT_METADATA_SLUG,
-            data: {
-              run: (run as any).id,
-              inputId: inputWithMeta.id,
-              text: inputWithMeta.text,
-              sourceCollection: inputWithMeta.metadata.sourceCollection,
-              docId: inputWithMeta.metadata.docId,
-              chunkIndex: inputWithMeta.metadata.chunkIndex,
-              embeddingVersion: inputWithMeta.metadata.embeddingVersion,
-              extensionFields: inputWithMeta.metadata.extensionFields,
-            },
-          }),
-        ),
-      )
-
-      const providerInputs: BulkEmbeddingInput[] = inputsWithMetadata.map(({ id, text }) => ({
-        id,
-        text,
-      }))
-
-      const prepare = (await callbacks.prepareBulkEmbeddings({
-        payload,
-        knowledgePool: poolName,
-        embeddingVersion,
-        inputs: providerInputs,
-      })) || { providerBatchId: `local-${Date.now()}` }
-
-      const providerBatchId = prepare.providerBatchId
-      let status = prepare.status ?? 'running'
+      // Update run with batch count and total inputs
       await payload.update({
         id: input.runId,
         collection: BULK_EMBEDDINGS_RUNS_SLUG,
         data: {
-          providerBatchId,
-          inputFileRef: prepare.inputFileRef,
-          status,
-          inputs: prepare.counts?.inputs ?? inputsCount,
+          status: 'running',
+          totalBatches: result.batchCount,
+          inputs: result.totalInputs,
           submittedAt: new Date().toISOString(),
         },
       })
 
-      // Queue the poll task
+      // Queue the poll task to monitor all batches
       await payload.jobs.queue<'payloadcms-vectorize:poll-or-complete-bulk-embedding'>({
         task: 'payloadcms-vectorize:poll-or-complete-bulk-embedding',
         input: { runId: input.runId },
@@ -197,7 +169,9 @@ export const createPrepareBulkEmbeddingTask = ({
         ...(pollOrCompleteQueueName ? { queue: pollOrCompleteQueueName } : {}),
       })
 
-      return { output: { runId: input.runId, status: 'prepared' } }
+      return {
+        output: { runId: input.runId, status: 'prepared', batchCount: result.batchCount },
+      }
     },
   }
 
@@ -228,211 +202,198 @@ export const createPollOrCompleteBulkEmbeddingTask = ({
       })
 
       const callbacks = dynamicConfig.embeddingConfig.bulkEmbeddingsFns!
-      const providerBatchId = (run as any).providerBatchId
-      const embeddingVersion = dynamicConfig.embeddingConfig.version
 
-      // Check if already terminal
+      // Check if run is already terminal
       const currentStatus = (run as any).status
       if (TERMINAL_STATUSES.has(currentStatus)) {
         return { output: { runId: input.runId, status: currentStatus } }
       }
 
-      // Poll once
-      const pollResult = await callbacks.pollBulkEmbeddings({
-        payload,
-        knowledgePool: poolName,
-        providerBatchId,
+      // Load all batches for this run
+      const batchesResult = await payload.find({
+        collection: BULK_EMBEDDINGS_BATCHES_SLUG,
+        where: { run: { equals: input.runId } },
+        limit: 1000,
+        sort: 'batchIndex',
       })
+      const batches = (batchesResult as any)?.docs || []
 
-      const newStatus = pollResult.status
-      await payload.update({
-        id: input.runId,
-        collection: BULK_EMBEDDINGS_RUNS_SLUG,
-        data: {
-          status: newStatus,
-          inputs: pollResult.counts?.inputs,
-          succeeded: pollResult.counts?.succeeded,
-          failed: pollResult.counts?.failed,
-          error: pollResult.error,
-        },
-      })
-
-      // If still not terminal, requeue this task
-      if (!TERMINAL_STATUSES.has(newStatus)) {
-        await payload.jobs.queue<'payloadcms-vectorize:poll-or-complete-bulk-embedding'>({
-          task: 'payloadcms-vectorize:poll-or-complete-bulk-embedding',
-          input: { runId: input.runId },
-          req,
-          ...(pollOrCompleteQueueName ? { queue: pollOrCompleteQueueName } : {}),
-        })
-        return { output: { runId: input.runId, status: 'polling' } }
-      }
-
-      // Terminal - handle success vs failure
-      if (newStatus !== 'succeeded') {
+      if (batches.length === 0) {
+        // No batches found - this shouldn't happen but handle gracefully
         await payload.update({
           id: input.runId,
           collection: BULK_EMBEDDINGS_RUNS_SLUG,
           data: {
+            status: 'failed',
+            error: 'No batches found for run',
             completedAt: new Date().toISOString(),
           },
         })
-        return { output: { runId: input.runId, status: newStatus } }
+        return { output: { runId: input.runId, status: 'failed' } }
       }
 
-      // Success - complete the embeddings
-      const completion = (await callbacks.completeBulkEmbeddings({
-        payload,
-        knowledgePool: poolName,
-        providerBatchId,
-      })) || { status: newStatus, outputs: [] }
-
-      const outputs = completion.outputs || []
+      // Poll each non-terminal batch
+      let allSucceeded = true
+      let anyFailed = false
+      let anyRunning = false
+
+      for (const batch of batches) {
+        const batchStatus = batch.status as string
+        if (TERMINAL_STATUSES.has(batchStatus)) {
+          if (batchStatus !== 'succeeded') {
+            anyFailed = true
+            allSucceeded = false
+          }
+          continue
+        }
 
-      // Load stored metadata for this run
-      const metadataById = await loadInputMetadataByRun({
-        payload,
-        runId: String((run as any).id),
-      })
+        // Poll this batch
+        const pollResult = await callbacks.pollBatch({
+          providerBatchId: batch.providerBatchId,
+        })
 
-      const successfulOutputs = outputs.filter((o) => !o.error && o.embedding)
-      const failedCount = completion.counts?.failed ?? outputs.length - successfulOutputs.length
+        // Update batch status
+        await payload.update({
+          id: batch.id,
+          collection: BULK_EMBEDDINGS_BATCHES_SLUG,
+          data: {
+            status: pollResult.status,
+            succeededCount: pollResult.counts?.succeeded,
+            failedCount: pollResult.counts?.failed,
+            error: pollResult.error,
+            ...(TERMINAL_STATUSES.has(pollResult.status)
+              ? { completedAt: new Date().toISOString() }
+              : {}),
+          },
+        })
 
-      // Remove existing embeddings for successful doc ids before writing new vectors
-      const docKeys = new Set<string>()
-      for (const output of successfulOutputs) {
-        const meta = metadataById.get(output.id)
-        if (!meta) continue
-        docKeys.add(`${meta.sourceCollection}:${meta.docId}`)
+        if (pollResult.status === 'failed' || pollResult.status === 'canceled') {
+          anyFailed = true
+          allSucceeded = false
+        } else if (!TERMINAL_STATUSES.has(pollResult.status)) {
+          anyRunning = true
+          allSucceeded = false
+        }
       }
-      for (const key of docKeys) {
-        const [sourceCollection, docId] = key.split(':')
-        await payload.delete({
-          collection: poolName,
-          where: {
-            and: [
-              { sourceCollection: { equals: sourceCollection } },
-              { docId: { equals: String(docId) } },
-            ],
+
+      // If any batch failed, mark the entire run as failed
+      if (anyFailed) {
+        await payload.update({
+          id: input.runId,
+          collection: BULK_EMBEDDINGS_RUNS_SLUG,
+          data: {
+            status: 'failed',
+            error: 'One or more batches failed',
+            completedAt: new Date().toISOString(),
           },
         })
+        // Cleanup metadata without writing embeddings
+        await payload.delete({
+          collection: BULK_EMBEDDINGS_INPUT_METADATA_SLUG,
+          where: { run: { equals: (run as any).id } },
+        })
+        return { output: { runId: input.runId, status: 'failed' } }
       }
 
-      for (const output of successfulOutputs) {
-        const meta = metadataById.get(output.id)
-        if (!meta || !output.embedding) continue
-
-        const embeddingArray = Array.isArray(output.embedding)
-          ? output.embedding
-          : Array.from(output.embedding)
-
-        const chunkText = meta.text
-
-        const created = await payload.create({
-          collection: poolName,
-          data: {
-            sourceCollection: meta.sourceCollection,
-            docId: String(meta.docId),
-            chunkIndex: meta.chunkIndex,
-            chunkText,
-            embeddingVersion: meta.embeddingVersion,
-            ...(meta.extensionFields || {}),
-            embedding: embeddingArray,
-          } as any,
+      // If still running, requeue this task
+      if (anyRunning) {
+        await payload.jobs.queue<'payloadcms-vectorize:poll-or-complete-bulk-embedding'>({
+          task: 'payloadcms-vectorize:poll-or-complete-bulk-embedding',
+          input: { runId: input.runId },
+          req,
+          ...(pollOrCompleteQueueName ? { queue: pollOrCompleteQueueName } : {}),
         })
-        await persistVectorColumn({
+        return { output: { runId: input.runId, status: 'polling' } }
+      }
+
+      // All batches succeeded - complete the embeddings atomically
+      if (allSucceeded) {
+        const completionResult = await completeAllBatchesAtomically({
           payload,
+          runId: input.runId,
           poolName,
-          vector: embeddingArray,
-          id: String((created as any)?.id ?? ''),
+          batches,
+          callbacks,
         })
-      }
 
-      // Cleanup stored metadata for this run
-      await payload.delete({
-        collection: BULK_EMBEDDINGS_INPUT_METADATA_SLUG,
-        where: { run: { equals: (run as any).id } },
-      })
+        await payload.update({
+          id: input.runId,
+          collection: BULK_EMBEDDINGS_RUNS_SLUG,
+          data: {
+            status: completionResult.success ? 'succeeded' : 'failed',
+            succeeded: completionResult.succeededCount,
+            failed: completionResult.failedCount,
+            error: completionResult.error,
+            completedAt: new Date().toISOString(),
+          },
+        })
 
-      await payload.update({
-        id: input.runId,
-        collection: BULK_EMBEDDINGS_RUNS_SLUG,
-        data: {
-          status: completion.status ?? 'succeeded',
-          inputs: completion.counts?.inputs ?? outputs.length,
-          succeeded: completion.counts?.succeeded ?? successfulOutputs.length,
-          failed: failedCount,
-          error: completion.error,
-          completedAt: new Date().toISOString(),
-        },
-      })
+        // Cleanup metadata
+        await payload.delete({
+          collection: BULK_EMBEDDINGS_INPUT_METADATA_SLUG,
+          where: { run: { equals: (run as any).id } },
+        })
 
-      return {
-        output: {
-          runId: input.runId,
-          status: completion.status ?? 'succeeded',
-        },
+        return {
+          output: {
+            runId: input.runId,
+            status: completionResult.success ? 'succeeded' : 'failed',
+          },
+        }
       }
+
+      return { output: { runId: input.runId, status: 'unknown' } }
     },
   }
 
   return task
 }
 
-async function persistVectorColumn(args: {
-  payload: Payload
-  poolName: KnowledgePoolName
-  vector: number[] | Float32Array
-  id: string
-}) {
-  const { payload, poolName, vector, id } = args
-  if (!isPostgresPayload(payload)) {
-    throw new Error('[payloadcms-vectorize] Bulk embeddings require the Postgres adapter')
-  }
-  const postgresPayload = payload as PostgresPayload
-  const schemaName = postgresPayload.db.schemaName || 'public'
-  const literal = `[${Array.from(vector).join(',')}]`
-  const sql = `UPDATE "${schemaName}"."${poolName}" SET embedding = $1 WHERE id = $2`
-  const runSQL = async (statement: string, params?: any[]) => {
-    if (postgresPayload.db.pool?.query) return postgresPayload.db.pool.query(statement, params)
-    if (postgresPayload.db.drizzle?.execute) return postgresPayload.db.drizzle.execute(statement)
-    throw new Error('[payloadcms-vectorize] Failed to persist vector column')
-  }
-  try {
-    await runSQL(sql, [literal, id])
-  } catch (e) {
-    const errorMessage = (e as Error).message || (e as any).toString()
-    payload.logger.error(`[payloadcms-vectorize] Failed to persist vector column: ${errorMessage}`)
-    throw e
-  }
-}
-
-async function collectMissingEmbeddings(args: {
+/**
+ * Stream through missing embeddings, calling addChunk for each.
+ * User controls batching via addChunk return value.
+ */
+async function streamAndBatchMissingEmbeddings(args: {
   payload: Payload
+  runId: string
   poolName: KnowledgePoolName
   dynamicConfig: KnowledgePoolDynamicConfig
   embeddingVersion: string
   lastBulkCompletedAt?: string
   versionMismatch: boolean
   hasBaseline: boolean
-}): Promise<CollectedEmbeddingInput[]> {
+  addChunk: (args: {
+    chunk: BulkEmbeddingInput
+    isLastChunk: boolean
+  }) => Promise<BatchSubmission | null>
+}): Promise<{ batchCount: number; totalInputs: number }> {
   const {
     payload,
+    runId,
     poolName,
     dynamicConfig,
     embeddingVersion,
     lastBulkCompletedAt,
     versionMismatch,
     hasBaseline,
+    addChunk,
   } = args
-  const inputs: CollectedEmbeddingInput[] = []
 
   const includeAll = versionMismatch || !hasBaseline
   const lastCompletedAtDate = lastBulkCompletedAt ? new Date(lastBulkCompletedAt) : undefined
 
-  for (const collectionSlug of Object.keys(dynamicConfig.collections)) {
+  let batchIndex = 0
+  let totalInputs = 0
+  const collectionSlugs = Object.keys(dynamicConfig.collections)
+
+  // Collect all chunks first to know which is the last one
+  const allChunks: CollectedEmbeddingInput[] = []
+
+  // Iterate through all collections and their documents
+  for (const collectionSlug of collectionSlugs) {
     const collectionConfig = dynamicConfig.collections[collectionSlug]
     if (!collectionConfig) continue
+
     const toKnowledgePool = collectionConfig.toKnowledgePool
     let page = 1
     const limit = 50
@@ -466,10 +427,12 @@ async function collectMissingEmbeddings(args: {
         if (!shouldInclude) continue
 
         const chunkData = await toKnowledgePool(doc, payload)
-        chunkData.forEach((chunkEntry, idx) => {
-          if (!chunkEntry?.chunk) return
+        for (let idx = 0; idx < chunkData.length; idx++) {
+          const chunkEntry = chunkData[idx]
+          if (!chunkEntry?.chunk) continue
+
           const { chunk, ...extensionFields } = chunkEntry
-          inputs.push({
+          allChunks.push({
             id: `${collectionSlug}:${doc.id}:${idx}`,
             text: chunk,
             metadata: {
@@ -480,14 +443,200 @@ async function collectMissingEmbeddings(args: {
               extensionFields,
             },
           })
-        })
+        }
       }
-      page += 1
+
+      page++
       if (page > totalPages) break
     }
   }
 
-  return inputs
+  // Now stream chunks to addChunk, tracking which is last
+  for (let i = 0; i < allChunks.length; i++) {
+    const collectedChunk = allChunks[i]
+    const isLastChunk = i === allChunks.length - 1
+
+    const submission = await addChunk({
+      chunk: { id: collectedChunk.id, text: collectedChunk.text },
+      isLastChunk,
+    })
+
+    if (submission) {
+      // User submitted a batch - store metadata for those chunks
+      await Promise.all(
+        submission.submittedChunks.map((submittedChunk) => {
+          // Find the full metadata for this chunk
+          const fullChunk = allChunks.find((c) => c.id === submittedChunk.id)
+          if (!fullChunk) return Promise.resolve()
+
+          return payload.create({
+            collection: BULK_EMBEDDINGS_INPUT_METADATA_SLUG,
+            data: {
+              run: runId,
+              inputId: fullChunk.id,
+              text: fullChunk.text,
+              sourceCollection: fullChunk.metadata.sourceCollection,
+              docId: fullChunk.metadata.docId,
+              chunkIndex: fullChunk.metadata.chunkIndex,
+              embeddingVersion: fullChunk.metadata.embeddingVersion,
+              extensionFields: fullChunk.metadata.extensionFields,
+            },
+          })
+        }),
+      )
+
+      // Create batch record
+      await payload.create({
+        collection: BULK_EMBEDDINGS_BATCHES_SLUG,
+        data: {
+          run: runId,
+          batchIndex,
+          providerBatchId: submission.providerBatchId,
+          inputFileRef: submission.inputFileRef,
+          status: 'queued',
+          inputCount: submission.submittedChunks.length,
+          submittedAt: new Date().toISOString(),
+        },
+      })
+
+      totalInputs += submission.submittedChunks.length
+      batchIndex++
+    }
+  }
+
+  return { batchCount: batchIndex, totalInputs }
+}
+
+/**
+ * Complete all batches atomically - download all outputs and write all embeddings
+ */
+async function completeAllBatchesAtomically(args: {
+  payload: Payload
+  runId: string
+  poolName: KnowledgePoolName
+  batches: any[]
+  callbacks: {
+    completeBatch: (args: { providerBatchId: string }) => Promise<BulkEmbeddingOutput[]>
+  }
+}): Promise<{
+  success: boolean
+  succeededCount: number
+  failedCount: number
+  error?: string
+}> {
+  const { payload, runId, poolName, batches, callbacks } = args
+
+  try {
+    // Load all metadata for this run
+    const metadataById = await loadInputMetadataByRun({ payload, runId })
+
+    // Collect all outputs from all batches
+    const allOutputs: BulkEmbeddingOutput[] = []
+    for (const batch of batches) {
+      const outputs = await callbacks.completeBatch({
+        providerBatchId: batch.providerBatchId,
+      })
+      allOutputs.push(...outputs)
+    }
+
+    // Filter successful outputs
+    const successfulOutputs = allOutputs.filter((o) => !o.error && o.embedding)
+    const failedCount = allOutputs.length - successfulOutputs.length
+
+    // Collect unique doc keys for deletion
+    const docKeys = new Set<string>()
+    for (const output of successfulOutputs) {
+      const meta = metadataById.get(output.id)
+      if (!meta) continue
+      docKeys.add(`${meta.sourceCollection}:${meta.docId}`)
+    }
+
+    // Delete existing embeddings for docs we're about to update
+    for (const key of docKeys) {
+      const [sourceCollection, docId] = key.split(':')
+      await payload.delete({
+        collection: poolName,
+        where: {
+          and: [
+            { sourceCollection: { equals: sourceCollection } },
+            { docId: { equals: String(docId) } },
+          ],
+        },
+      })
+    }
+
+    // Write all new embeddings
+    for (const output of successfulOutputs) {
+      const meta = metadataById.get(output.id)
+      if (!meta || !output.embedding) continue
+
+      const embeddingArray = Array.isArray(output.embedding)
+        ? output.embedding
+        : Array.from(output.embedding)
+
+      const created = await payload.create({
+        collection: poolName,
+        data: {
+          sourceCollection: meta.sourceCollection,
+          docId: String(meta.docId),
+          chunkIndex: meta.chunkIndex,
+          chunkText: meta.text,
+          embeddingVersion: meta.embeddingVersion,
+          ...(meta.extensionFields || {}),
+          embedding: embeddingArray,
+        } as any,
+      })
+
+      await persistVectorColumn({
+        payload,
+        poolName,
+        vector: embeddingArray,
+        id: String((created as any)?.id ?? ''),
+      })
+    }
+
+    return {
+      success: true,
+      succeededCount: successfulOutputs.length,
+      failedCount,
+    }
+  } catch (error) {
+    const errorMessage = (error as Error).message || String(error)
+    return {
+      success: false,
+      succeededCount: 0,
+      failedCount: 0,
+      error: `Completion failed: ${errorMessage}`,
+    }
+  }
+}
+
+async function persistVectorColumn(args: {
+  payload: Payload
+  poolName: KnowledgePoolName
+  vector: number[] | Float32Array
+  id: string
+}) {
+  const { payload, poolName, vector, id } = args
+  if (!isPostgresPayload(payload)) {
+    throw new Error('[payloadcms-vectorize] Bulk embeddings require the Postgres adapter')
+  }
+  const postgresPayload = payload as PostgresPayload
+  const schemaName = postgresPayload.db.schemaName || 'public'
+  const literal = `[${Array.from(vector).join(',')}]`
+  const sql = `UPDATE "${schemaName}"."${poolName}" SET embedding = $1 WHERE id = $2`
+  const runSQL = async (statement: string, params?: any[]) => {
+    if (postgresPayload.db.pool?.query) return postgresPayload.db.pool.query(statement, params)
+    if (postgresPayload.db.drizzle?.execute) return postgresPayload.db.drizzle.execute(statement)
+    throw new Error('[payloadcms-vectorize] Failed to persist vector column')
+  }
+  try {
+    await runSQL(sql, [literal, id])
+  } catch (e) {
+    const errorMessage = (e as Error).message || (e as any).toString()
+    payload.logger.error(`[payloadcms-vectorize] Failed to persist vector column: ${errorMessage}`)
+    throw e
+  }
 }
 
 async function docHasEmbeddingVersion(args: {
@@ -563,7 +712,7 @@ async function loadInputMetadataByRun(args: { payload: Payload; runId: string })
     }
 
     const totalPages = (res as any)?.totalPages ?? page
-    page += 1
+    page++
     if (page > totalPages) break
   }
 
diff --git a/src/types.ts b/src/types.ts
index 1a162c6..cca64e3 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -13,8 +13,6 @@ export type CollectionVectorizeOption = {
   toKnowledgePool: ToKnowledgePoolFn
 }
 
-export type IngestMode = 'realtime' | 'bulk'
-
 /** Knowledge pool name identifier */
 export type KnowledgePoolName = string
 
@@ -89,55 +87,65 @@ export type BulkEmbeddingCounts = {
   failed?: number
 }
 
-export type PrepareBulkEmbeddingsArgs = {
-  payload: Payload
-  knowledgePool: KnowledgePoolName
-  embeddingVersion: string
-  inputs: BulkEmbeddingInput[]
-}
-
-export type PrepareBulkEmbeddingsResult = {
-  providerBatchId: string
-  inputFileRef?: string
-  status?: BulkEmbeddingRunStatus
+export type PollBulkEmbeddingsResult = {
+  status: BulkEmbeddingRunStatus
   counts?: BulkEmbeddingCounts
+  error?: string
 }
 
-export type PollBulkEmbeddingsArgs = {
-  payload: Payload
-  knowledgePool: KnowledgePoolName
-  providerBatchId: string
+/** Arguments passed to addChunk callback */
+export type AddChunkArgs = {
+  /** The chunk to add */
+  chunk: BulkEmbeddingInput
+  /** True if this is the last chunk in the run */
+  isLastChunk: boolean
 }
 
-export type PollBulkEmbeddingsResult = {
-  status: BulkEmbeddingRunStatus
-  counts?: BulkEmbeddingCounts
-  error?: string
-  /** Optional delay hint in ms before the next poll */
-  nextPollMs?: number
+/** Result when user decides to submit a batch */
+export type BatchSubmission = {
+  /** Provider-specific batch identifier */
+  providerBatchId: string
+  /** Optional file reference for the input file */
+  inputFileRef?: string
+  /** The chunks that were submitted in this batch (for metadata tracking) */
+  submittedChunks: BulkEmbeddingInput[]
 }
 
-export type CompleteBulkEmbeddingsArgs = {
-  payload: Payload
-  knowledgePool: KnowledgePoolName
+/** Arguments for polling a single batch */
+export type PollBatchArgs = {
+  /** Provider-specific batch identifier */
   providerBatchId: string
 }
 
-export type CompleteBulkEmbeddingsResult = {
-  status: BulkEmbeddingRunStatus
-  outputs: BulkEmbeddingOutput[]
-  counts?: BulkEmbeddingCounts
-  error?: string
+/** Arguments for completing/downloading a single batch */
+export type CompleteBatchArgs = {
+  /** Provider-specific batch identifier */
+  providerBatchId: string
 }
 
+/**
+ * Bulk embeddings API with user-controlled batching.
+ * User accumulates chunks internally and decides when to flush based on file size.
+ */
 export type BulkEmbeddingsFns = {
-  prepareBulkEmbeddings: (
-    args: PrepareBulkEmbeddingsArgs,
-  ) => Promise<PrepareBulkEmbeddingsResult | void>
-  pollBulkEmbeddings: (args: PollBulkEmbeddingsArgs) => Promise<PollBulkEmbeddingsResult>
-  completeBulkEmbeddings: (
-    args: CompleteBulkEmbeddingsArgs,
-  ) => Promise<CompleteBulkEmbeddingsResult | void>
+  /**
+   * Called for each chunk. User accumulates internally based on file size logic.
+   * - Return null to keep accumulating
+   * - Return BatchSubmission when ready to submit a batch
+   * - When isLastChunk=true, must flush any remaining accumulated chunks
+   *
+   * Example flow when chunk would exceed file limit:
+   * 1. Submit currently accumulated chunks (without this chunk)
+   * 2. Start fresh accumulation with this chunk
+   * 3. Return the BatchSubmission
+   */
+  addChunk: (args: AddChunkArgs) => Promise<BatchSubmission | null>
+
+  /** Poll a specific batch by providerBatchId */
+  pollBatch: (args: PollBatchArgs) => Promise<PollBulkEmbeddingsResult>
+
+  /** Download outputs for a completed batch */
+  completeBatch: (args: CompleteBatchArgs) => Promise<BulkEmbeddingOutput[]>
 }
 
 export type PayloadcmsVectorizeConfig<TPoolNames extends KnowledgePoolName = KnowledgePoolName> = {

From 73a664f48b16ba91a63b028614a2c49fda6587c1 Mon Sep 17 00:00:00 2001
From: techiejd <62455039+techiejd@users.noreply.github.com>
Date: Mon, 5 Jan 2026 20:46:44 +0700
Subject: [PATCH 13/49] WIP

---
 README.md                                | 184 ++++++++++++++++++-----
 dev/helpers/embed.ts                     |  36 ++++-
 dev/specs/bulkEmbed.spec.ts              | 128 +++++++++++-----
 dev/specs/config.spec.ts                 |  10 +-
 dev/specs/int.spec.ts                    |  96 ++++++++++--
 dev/specs/utils.ts                       |  31 ++--
 src/collections/bulkEmbeddingsBatches.ts |   8 -
 src/index.ts                             |  24 +--
 src/tasks/bulkEmbedAll.ts                | 114 +++++++++-----
 src/types.ts                             |  27 +++-
 vitest.config.js                         |   3 +
 11 files changed, 490 insertions(+), 171 deletions(-)

diff --git a/README.md b/README.md
index e668788..a39b3e2 100644
--- a/README.md
+++ b/README.md
@@ -129,14 +129,17 @@ export default buildConfig({
             { name: 'category', type: 'text' },
             { name: 'priority', type: 'number' },
           ],
-          embedDocs,
-          embedQuery,
-          embeddingVersion: 'v1.0.0',
+          embeddingConfig: {
+            version: 'v1.0.0',
+            queryFn: embedQuery,
+            realTimeIngestionFn: embedDocs,
+            // bulkEmbeddingsFns: { ... } // Optional: for batch API support
+          },
         },
       },
       // Optional plugin options:
-      // queueName: 'custom-queue',
-      // endpointOverrides: { path: '/custom-vector-search', enabled: true }, // will be /api/custom-vector-search
+      // realtimeQueueName: 'custom-queue',
+      // endpointOverrides: { path: '/custom-vector-search', enabled: true },
       // disabled: false,
     }),
   ],
@@ -194,47 +197,161 @@ The embeddings collection name will be the same as the knowledge pool name.
 **2. Dynamic Config** (passed to `payloadcmsVectorize`):
 
 - `collections`: `Record<string, CollectionVectorizeOption>` - Collections and their chunking configs
-- `embedDocs`: `EmbedDocsFn` - Function to embed multiple documents
-- `embedQuery`: `EmbedQueryFn` - Function to embed search queries
-- `embeddingVersion`: `string` - Version string for tracking model changes
 - `extensionFields?`: `Field[]` - Optional fields to extend the embeddings collection schema
-- `bulkEmbeddings?`: Configuration for bulk embedding operations:
-  - `ingestMode?`: `'realtime' | 'bulk'` - Default `'realtime'` queues embeddings immediately. `'bulk'` skips realtime embedding, deletes stale vectors on updates, and relies on the bulk job to backfill.
-  - `prepareBulkEmbeddings(args)`: Callback to prepare a bulk embedding batch
-  - `pollBulkEmbeddings(args)`: Callback to poll the status of a bulk embedding batch
-  - `completeBulkEmbeddings(args)`: Callback to retrieve completed embeddings from a batch
-    If `bulkEmbeddings` is omitted for a pool, the "Embed all" button is disabled and bulk is not available.
+- `embeddingConfig`: Embedding configuration object:
+  - `version`: `string` - Version string for tracking model changes
+  - `queryFn`: `EmbedQueryFn` - Function to embed search queries
+  - `realTimeIngestionFn?`: `EmbedDocsFn` - Function for real-time embedding on document changes
+  - `bulkEmbeddingsFns?`: Streaming bulk embedding callbacks (see below)
+
+If `realTimeIngestionFn` is provided, documents are embedded immediately on create/update.
+If only `bulkEmbeddingsFns` is provided (no `realTimeIngestionFn`), embedding only happens via manual bulk runs.
+If neither is provided, embedding is disabled for that pool.
+
+### Bulk Embeddings API
+
+The bulk embedding API is designed for large-scale embedding using provider batch APIs (like Voyage AI). **Bulk runs are never auto-queued** - they must be triggered manually via the admin UI or API.
+
+#### The Streaming Model
+
+The plugin streams chunks to your callbacks one at a time, giving you full control over batching based on your provider's file size limits:
+
+```typescript
+type BulkEmbeddingsFns = {
+  addChunk: (args: AddChunkArgs) => Promise<BatchSubmission | null>
+  pollBatch: (args: PollBatchArgs) => Promise<PollBulkEmbeddingsResult>
+  completeBatch: (args: CompleteBatchArgs) => Promise<BulkEmbeddingOutput[]>
+  onError?: (args: OnBulkErrorArgs) => Promise<void>
+}
+```
+
+#### `addChunk` - Accumulate and Submit
+
+Called for each chunk. You manage your own accumulation and decide when to submit based on file size.
+
+```typescript
+type AddChunkArgs = {
+  chunk: { id: string; text: string }
+  isLastChunk: boolean
+}
+
+type BatchSubmission = {
+  providerBatchId: string
+}
+```
+
+**Return values:**
+
+- `null` - "I'm accumulating this chunk, not ready to submit yet"
+- `{ providerBatchId }` - "I just submitted a batch to my provider"
+
+**⚠️ Important contract about which chunks are included in a submission:**
+
+- When `isLastChunk=false` and you return a submission: all pending chunks **EXCEPT** the current one were submitted (current chunk starts fresh accumulation)
+- When `isLastChunk=true` and you return a submission: all pending chunks **INCLUDING** the current one were submitted
+
+**Example implementation:**
+
+```typescript
+let accumulated: BulkEmbeddingInput[] = []
+let accumulatedSize = 0
+const FILE_SIZE_LIMIT = 50 * 1024 * 1024 // 50MB
+
+addChunk: async ({ chunk, isLastChunk }) => {
+  const chunkSize = JSON.stringify(chunk).length
+
+  // Would exceed limit? Submit what we have, keep current for next batch
+  if (accumulatedSize + chunkSize > FILE_SIZE_LIMIT && accumulated.length > 0) {
+    const result = await submitToProvider(accumulated)
+    accumulated = [chunk] // Start fresh WITH current chunk
+    accumulatedSize = chunkSize
+    return { providerBatchId: result.id }
+  }
+
+  accumulated.push(chunk)
+  accumulatedSize += chunkSize
+
+  // Last chunk? Must flush everything
+  if (isLastChunk && accumulated.length > 0) {
+    const result = await submitToProvider(accumulated)
+    accumulated = []
+    accumulatedSize = 0
+    return { providerBatchId: result.id }
+  }
+
+  return null
+}
+```
+
+**Note:** If a single chunk exceeds your provider's file size limit, you'll need to handle that edge case in your implementation (e.g., skip it, split it, or fail gracefully).
+
+#### `pollBatch` - Check Status
+
+Called repeatedly until the batch reaches a terminal status.
+
+```typescript
+type PollBatchArgs = { providerBatchId: string }
+
+type PollBulkEmbeddingsResult = {
+  status: 'queued' | 'running' | 'succeeded' | 'failed' | 'canceled'
+  counts?: { inputs?: number; succeeded?: number; failed?: number }
+  error?: string
+}
+```
+
+#### `completeBatch` - Download Results
+
+Called after all batches succeed. Download the embeddings from your provider.
+
+```typescript
+type CompleteBatchArgs = { providerBatchId: string }
+
+type BulkEmbeddingOutput = {
+  id: string // Must match the chunk.id from addChunk
+  embedding?: number[]
+  error?: string
+}
+```
+
+#### `onError` - Cleanup on Failure (Optional)
+
+Called when the bulk run fails. Use this to clean up provider-side resources (delete files, cancel batches). The run can be re-queued after cleanup.
+
+```typescript
+type OnBulkErrorArgs = {
+  providerBatchIds: string[]
+  error: Error
+}
+```
 
 ### Bulk Task Model
 
-When bulk ingest mode is enabled, the plugin uses separate Payload jobs for reliability with long-running providers:
+The plugin uses separate Payload jobs for reliability with long-running providers:
 
-- **`prepare-bulk-embedding`**: One-shot task that collects missing embeddings and submits them to the provider. Short-lived.
-- **`poll-or-complete-bulk-embedding`**: Polls the provider status and completes embedding ingestion when ready. Can requeue itself until completion.
+- **`prepare-bulk-embedding`**: Streams through documents, calls your `addChunk` for each chunk, creates batch records.
+- **`poll-or-complete-bulk-embedding`**: Polls all batches, requeues itself until done, then atomically writes all embeddings.
 
 ### Queue Configuration
 
 For production deployments with bulk embedding:
 
 ```typescript
-// Recommended production setup
 plugins: [
   payloadcmsVectorize({
     knowledgePools: { /* ... */ },
-    realtimeQueueName: 'vectorize-realtime', // Separate realtime jobs (Optional)
+    realtimeQueueName: 'vectorize-realtime',
     bulkQueueNames: {
-      prepareBulkEmbedQueueName: 'vectorize-bulk-prepare', // Daily bulk preparation (Required if any knowledge pool uses bulk ingestion)
-      pollOrCompleteQueueName: 'vectorize-bulk-poll',       // Frequent polling/completion (Required if any knowledge pool uses bulk ingestion)
+      prepareBulkEmbedQueueName: 'vectorize-bulk-prepare',
+      pollOrCompleteQueueName: 'vectorize-bulk-poll',
     },
   }),
 ]
 
 jobs: {
-  // Payload processes jobs via autoRun. Use different schedules for different workloads.
   autoRun: [
-    { cron: '*/5 * * * * *', limit: 10, queue: 'vectorize-realtime' }, // Optional: Process realtime jobs every 5 seconds
-    { cron: '0 0 * * * *', limit: 1, queue: 'vectorize-bulk-prepare' }, // Required: Run bulk preparation once per hour (or daily)
-    { cron: '*/30 * * * * *', limit: 5, queue: 'vectorize-bulk-poll' }, // Required: Poll bulk status every 30 seconds
+    { cron: '*/5 * * * * *', limit: 10, queue: 'vectorize-realtime' },
+    { cron: '0 0 * * * *', limit: 1, queue: 'vectorize-bulk-prepare' },
+    { cron: '*/30 * * * * *', limit: 5, queue: 'vectorize-bulk-poll' },
   ],
 }
 ```
@@ -354,11 +471,11 @@ Search for similar content using vector similarity.
 }
 ```
 
-### Bulk embedding (Embed all)
+### Bulk Embedding (Embed All)
 
-- Each knowledge pool’s embeddings list shows an **Embed all** admin button that queues a `payloadcms-vectorize:bulk-embed-all` job.
-- Bulk runs only include documents that are missing embeddings for the pool’s current `embeddingVersion`.
-- Progress is recorded in the `vector-bulk-embeddings-runs` collection (fields: `pool`, `embeddingVersion`, `providerBatchId`, `status`, counts, timestamps, `error`).
+- Each knowledge pool's embeddings list shows an **Embed all** admin button that triggers a bulk run.
+- Bulk runs only include documents missing embeddings for the pool's current `embeddingConfig.version`.
+- Progress is recorded in `vector-bulk-embeddings-runs` and `vector-bulk-embeddings-batches` collections.
 - Endpoint: **POST** `/api/vector-bulk-embed`
 
 ```jsonc
@@ -367,13 +484,11 @@ Search for similar content using vector similarity.
 }
 ```
 
-Bulk callbacks are provider-agnostic:
+The bulk embedding process is **atomic**: either all embeddings are written or none are. If any batch fails, the run is marked failed and no partial writes occur.
 
-- `prepareBulkEmbeddings({ payload, knowledgePool, embeddingVersion, inputs })`
-- `pollBulkEmbeddings({ payload, knowledgePool, providerBatchId })`
-- `completeBulkEmbeddings({ payload, knowledgePool, providerBatchId })`
+**Error Recovery:** If a run fails, you can re-queue it. If you provided an `onError` callback, it will be called with all `providerBatchIds` so you can clean up provider-side resources before retrying.
 
-If `bulkEmbeddings` is not provided, the plugin falls back to running `embedDocs` locally.
+If `bulkEmbeddingsFns` is not provided, the "Embed all" button is disabled.
 
 ## Changelog
 
@@ -419,6 +534,7 @@ Thank you for the stars! The following updates have been completed:
 
 The following features are planned for future releases based on community interest and stars:
 
+- **Bulk prepare progress visibility**: Real-time progress tracking during the prepare phase for large collections
 - **Migrations for vector dimensions**: Easy migration tools for changing vector dimensions and/or ivfflatLists after initial setup
 - **MongoDB support**: Extend vector search capabilities to MongoDB databases
 - **Vercel support**: Optimized deployment and configuration for Vercel hosting
diff --git a/dev/helpers/embed.ts b/dev/helpers/embed.ts
index 63a14f4..c85ae8a 100644
--- a/dev/helpers/embed.ts
+++ b/dev/helpers/embed.ts
@@ -152,11 +152,7 @@ export function makeVoyageBulkEmbeddingsConfig(): BulkEmbeddingsFns {
 
     batchIndex++
 
-    return {
-      providerBatchId,
-      inputFileRef: fileId,
-      submittedChunks: chunks,
-    }
+    return { providerBatchId }
   }
 
   return {
@@ -299,5 +295,35 @@ export function makeVoyageBulkEmbeddingsConfig(): BulkEmbeddingsFns {
         throw error
       }
     },
+
+    onError: async ({ providerBatchIds, error }) => {
+      console.log(
+        `Voyage bulk run failed: ${error.message}. Cleaning up ${providerBatchIds.length} batches...`,
+      )
+
+      // Cancel any running batches
+      for (const batchId of providerBatchIds) {
+        try {
+          await fetch(`https://api.voyageai.com/v1/batches/${batchId}/cancel`, {
+            method: 'POST',
+            headers: {
+              Authorization: `Bearer ${process.env.VOYAGE_API_KEY}`,
+            },
+          })
+        } catch (cancelError) {
+          console.error(`Failed to cancel batch ${batchId}:`, cancelError)
+        }
+      }
+
+      // Clean up local state
+      for (const batchId of providerBatchIds) {
+        batchOutputFiles.delete(batchId)
+      }
+
+      // Reset accumulator state for potential retry
+      accumulatedChunks = []
+      accumulatedSize = 0
+      batchIndex = 0
+    },
   }
 }
diff --git a/dev/specs/bulkEmbed.spec.ts b/dev/specs/bulkEmbed.spec.ts
index eeaf5a9..e65a477 100644
--- a/dev/specs/bulkEmbed.spec.ts
+++ b/dev/specs/bulkEmbed.spec.ts
@@ -1,6 +1,6 @@
 import type { Payload, SanitizedConfig } from 'payload'
 
-import { afterEach, beforeEach, describe, expect, test, vi } from 'vitest'
+import { afterAll, afterEach, beforeAll, beforeEach, describe, expect, test, vi } from 'vitest'
 import { createVectorizeTask } from '../../src/tasks/vectorize.js'
 import { BULK_EMBEDDINGS_RUNS_SLUG } from '../../src/collections/bulkEmbeddingsRuns.js'
 import { BULK_EMBEDDINGS_BATCHES_SLUG } from '../../src/collections/bulkEmbeddingsBatches.js'
@@ -22,8 +22,7 @@ const DIMS = DEFAULT_DIMS
 describe('Bulk embed ingest mode with streaming API', () => {
   let payload: Payload
   let config: SanitizedConfig
-  const dbNameBase = 'bulk_embed_test'
-  let dbName: string
+  const dbName = `bulk_embed_test_${Date.now()}`
 
   const basePluginOptions = {
     knowledgePools: {
@@ -44,34 +43,40 @@ describe('Bulk embed ingest mode with streaming API', () => {
     bulkQueueNames: BULK_QUEUE_NAMES,
   }
 
-  const makeDbName = () => `${dbNameBase}_${Date.now()}_${Math.random().toString(36).slice(2, 6)}`
-
-  const buildPayload = async (
-    pluginOpts = basePluginOptions,
-    options?: { dbName?: string; secret?: string },
-  ) => {
-    const dbToUse = options?.dbName || dbName
-    const secret = options?.secret || 'test-secret'
+  // Helper to build payload with custom options using the SAME database
+  const buildPayloadWithOptions = async (
+    pluginOpts: any,
+    keyPrefix: string = 'custom',
+  ): Promise<Payload> => {
     const built = await buildPayloadWithIntegration({
-      dbName: dbToUse,
+      dbName,
       pluginOpts,
-      secret,
+      secret: 'test-secret',
+      dims: DIMS,
+      key: `${keyPrefix}-${Date.now()}-${Math.random().toString(36).slice(2, 6)}`,
+    })
+    return built.payload
+  }
+
+  beforeAll(async () => {
+    await createTestDb({ dbName })
+    const built = await buildPayloadWithIntegration({
+      dbName,
+      pluginOpts: basePluginOptions,
+      secret: 'test-secret',
       dims: DIMS,
-      key: `${Date.now()}-${Math.random().toString(36).slice(2, 6)}`,
+      key: `base-${Date.now()}`,
     })
     payload = built.payload
     config = built.config
-    return payload
-  }
+  })
 
   beforeEach(async () => {
-    dbName = makeDbName()
-    await createTestDb({ dbName })
-    await buildPayload()
+    // Clear data before each test but keep the same DB/payload
+    await clearAllCollections(payload)
   })
 
   afterEach(async () => {
-    await clearAllCollections(payload)
     vi.restoreAllMocks()
   })
 
@@ -170,7 +175,6 @@ describe('Bulk embed ingest mode with streaming API', () => {
   })
 
   test('version bump re-embeds all even without updates', async () => {
-    await clearAllCollections(payload)
     const baselineOptions = {
       ...basePluginOptions,
       knowledgePools: {
@@ -184,7 +188,7 @@ describe('Bulk embed ingest mode with streaming API', () => {
         },
       },
     }
-    const baselinePayload = await buildPayload(baselineOptions)
+    const baselinePayload = await buildPayloadWithOptions(baselineOptions, 'baseline')
     await baselinePayload.create({ collection: 'posts', data: { title: 'Old' } as any })
 
     // Manually trigger baseline bulk run
@@ -216,7 +220,7 @@ describe('Bulk embed ingest mode with streaming API', () => {
       },
     }
     // rebuild payload with bumped version
-    const bumpedPayload = await buildPayload(bumpedOptions)
+    const bumpedPayload = await buildPayloadWithOptions(bumpedOptions, 'bumped')
     const postAfter = await bumpedPayload.create({
       collection: 'posts',
       data: { title: 'Old' } as any,
@@ -312,8 +316,7 @@ describe('Bulk embed ingest mode with streaming API', () => {
   })
 
   test('polling requeues when non-terminal then succeeds', async () => {
-    await clearAllCollections(payload)
-    const loopPayload = await buildPayload({
+    const loopPayload = await buildPayloadWithOptions({
       ...basePluginOptions,
       knowledgePools: {
         default: {
@@ -361,8 +364,7 @@ describe('Bulk embed ingest mode with streaming API', () => {
   })
 
   test('failed batch marks entire run as failed', async () => {
-    await clearAllCollections(payload)
-    const failedPayload = await buildPayload({
+    const failedPayload = await buildPayloadWithOptions({
       ...basePluginOptions,
       knowledgePools: {
         default: {
@@ -412,8 +414,7 @@ describe('Bulk embed ingest mode with streaming API', () => {
   })
 
   test('canceled batch marks entire run as failed', async () => {
-    await clearAllCollections(payload)
-    const canceledPayload = await buildPayload({
+    const canceledPayload = await buildPayloadWithOptions({
       ...basePluginOptions,
       knowledgePools: {
         default: {
@@ -455,9 +456,59 @@ describe('Bulk embed ingest mode with streaming API', () => {
     expect(embeds.totalDocs).toBe(0)
   })
 
+  test('onError callback is called when batch fails', async () => {
+    // Track onError calls
+    let onErrorCalled = false
+    let onErrorArgs: { providerBatchIds: string[]; error: Error } | null = null
+
+    const errorPayload = await buildPayloadWithOptions({
+      ...basePluginOptions,
+      knowledgePools: {
+        default: {
+          ...basePluginOptions.knowledgePools.default,
+          embeddingConfig: {
+            ...basePluginOptions.knowledgePools.default.embeddingConfig,
+            bulkEmbeddingsFns: createMockBulkEmbeddings({
+              statusSequence: ['failed'],
+              onErrorCallback: (args) => {
+                onErrorCalled = true
+                onErrorArgs = args
+              },
+            }),
+          },
+        },
+      },
+    })
+
+    await errorPayload.create({ collection: 'posts', data: { title: 'Error Test' } as any })
+
+    // Manually trigger bulk run
+    const run = await errorPayload.create({
+      collection: BULK_EMBEDDINGS_RUNS_SLUG,
+      data: { pool: 'default', embeddingVersion: testEmbeddingVersion, status: 'queued' },
+    })
+
+    await errorPayload.jobs.queue<'payloadcms-vectorize:prepare-bulk-embedding'>({
+      task: 'payloadcms-vectorize:prepare-bulk-embedding',
+      input: { runId: String(run.id) },
+      req: { payload: errorPayload } as any,
+      ...(BULK_QUEUE_NAMES.prepareBulkEmbedQueueName
+        ? { queue: BULK_QUEUE_NAMES.prepareBulkEmbedQueueName }
+        : {}),
+    })
+
+    await waitForBulkJobs(errorPayload)
+
+    // Verify onError was called
+    expect(onErrorCalled).toBe(true)
+    expect(onErrorArgs).not.toBeNull()
+    expect(onErrorArgs!.providerBatchIds.length).toBeGreaterThan(0)
+    expect(onErrorArgs!.error).toBeInstanceOf(Error)
+    expect(onErrorArgs!.error.message).toContain('failed')
+  })
+
   test('metadata table is cleaned after successful completion', async () => {
-    await clearAllCollections(payload)
-    const cleanPayload = await buildPayload({
+    const cleanPayload = await buildPayloadWithOptions({
       ...basePluginOptions,
       knowledgePools: {
         default: {
@@ -498,8 +549,7 @@ describe('Bulk embed ingest mode with streaming API', () => {
   })
 
   test('metadata table is cleaned after failed run (no partial writes)', async () => {
-    await clearAllCollections(payload)
-    const failPayload = await buildPayload({
+    const failPayload = await buildPayloadWithOptions({
       ...basePluginOptions,
       knowledgePools: {
         default: {
@@ -540,8 +590,7 @@ describe('Bulk embed ingest mode with streaming API', () => {
   })
 
   test('extension fields are merged when writing embeddings', async () => {
-    await clearAllCollections(payload)
-    const extPayload = await buildPayload({
+    const extPayload = await buildPayloadWithOptions({
       ...basePluginOptions,
       knowledgePools: {
         default: {
@@ -595,8 +644,7 @@ describe('Bulk embed ingest mode with streaming API', () => {
   })
 
   test('multiple chunks keep their respective extension fields', async () => {
-    await clearAllCollections(payload)
-    const multiPayload = await buildPayload({
+    const multiPayload = await buildPayloadWithOptions({
       ...basePluginOptions,
       knowledgePools: {
         default: {
@@ -652,10 +700,8 @@ describe('Bulk embed ingest mode with streaming API', () => {
   })
 
   test('multiple batches are created when flushing after N chunks', async () => {
-    await clearAllCollections(payload)
-
     // Create mock that flushes after 2 chunks
-    const smallBatchPayload = await buildPayload({
+    const smallBatchPayload = await buildPayloadWithOptions({
       ...basePluginOptions,
       knowledgePools: {
         default: {
@@ -742,7 +788,7 @@ describe('Bulk embed ingest mode with streaming API', () => {
       bulkQueueNames: BULK_QUEUE_NAMES,
     }
 
-    const realtimePayload = await buildPayload(realtimeOptions as any)
+    const realtimePayload = await buildPayloadWithOptions(realtimeOptions as any, 'realtime')
     const post = await realtimePayload.create({
       collection: 'posts',
       data: { title: 'Realtime Test' } as any,
diff --git a/dev/specs/config.spec.ts b/dev/specs/config.spec.ts
index 0a467f2..45aa676 100644
--- a/dev/specs/config.spec.ts
+++ b/dev/specs/config.spec.ts
@@ -9,14 +9,18 @@ describe('jobs.tasks merging', () => {
     expect(tasks).toEqual(
       expect.arrayContaining([
         { slug: 'payloadcms-vectorize:vectorize', handler: expect.any(Function) },
-        { slug: 'payloadcms-vectorize:bulk-embed-all', handler: expect.any(Function) },
+        { slug: 'payloadcms-vectorize:prepare-bulk-embedding', handler: expect.any(Function) },
+        {
+          slug: 'payloadcms-vectorize:poll-or-complete-bulk-embedding',
+          handler: expect.any(Function),
+        },
       ]),
     )
   })
 })
 
-describe('/vector-search endpoint', () => {
-  test('adds the endpoint by default', async () => {
+describe('endpoints: /vector-search, /vector-bulk-embed', () => {
+  test('adds the endpoints by default', async () => {
     const cfg = await buildDummyConfig({})
     const endpoints = cfg.endpoints
     expect(Array.isArray(endpoints)).toBe(true)
diff --git a/dev/specs/int.spec.ts b/dev/specs/int.spec.ts
index 58eda64..6995412 100644
--- a/dev/specs/int.spec.ts
+++ b/dev/specs/int.spec.ts
@@ -1,10 +1,8 @@
-import type { Payload } from 'payload'
+import type { Payload, SanitizedConfig } from 'payload'
 
-import config from '@payload-config'
-import { getPayload } from 'payload'
 import { beforeAll, describe, expect, test } from 'vitest'
-import { makeDummyEmbedDocs, testEmbeddingVersion } from 'helpers/embed.js'
-import { chunkRichText } from 'helpers/chunkers.js'
+import { makeDummyEmbedDocs, makeDummyEmbedQuery, testEmbeddingVersion } from 'helpers/embed.js'
+import { chunkRichText, chunkText } from 'helpers/chunkers.js'
 import { createHeadlessEditor } from '@payloadcms/richtext-lexical/lexical/headless'
 import {
   $getRoot,
@@ -14,21 +12,97 @@ import {
 } from '@payloadcms/richtext-lexical/lexical'
 import { $createHeadingNode } from '@payloadcms/richtext-lexical/lexical/rich-text'
 import { PostgresPayload } from '../../src/types.js'
-import { editorConfigFactory, getEnabledNodes } from '@payloadcms/richtext-lexical'
-import { DIMS, embeddingsCollection, getInitialMarkdownContent } from './constants.js'
-import { waitForVectorizationJobs } from './utils.js'
+import { editorConfigFactory, getEnabledNodes, lexicalEditor } from '@payloadcms/richtext-lexical'
+import { DIMS, getInitialMarkdownContent } from './constants.js'
+import { createTestDb, waitForVectorizationJobs } from './utils.js'
+import { postgresAdapter } from '@payloadcms/db-postgres'
+import { buildConfig, getPayload } from 'payload'
+import { createVectorizeIntegration } from 'payloadcms-vectorize'
 
 const embedFn = makeDummyEmbedDocs(DIMS)
+const embeddingsCollection = 'default'
 
 describe('Plugin integration tests', () => {
   let payload: Payload
+  let config: SanitizedConfig
   let postId: string
   let markdownContent: SerializedEditorState
+  const dbName = `int_test_${Date.now()}`
+
   beforeAll(async () => {
-    const _config = await config
-    payload = await getPayload({ config: _config, cron: true })
-    markdownContent = await getInitialMarkdownContent(_config)
+    await createTestDb({ dbName })
+
+    // Create isolated integration for this test suite
+    const integration = createVectorizeIntegration({
+      default: {
+        dims: DIMS,
+        ivfflatLists: 1,
+      },
+    })
+
+    config = await buildConfig({
+      secret: 'test-secret',
+      editor: lexicalEditor(),
+      collections: [
+        {
+          slug: 'posts',
+          fields: [
+            { name: 'title', type: 'text' },
+            { name: 'content', type: 'richText' },
+          ],
+        },
+      ],
+      db: postgresAdapter({
+        extensions: ['vector'],
+        afterSchemaInit: [integration.afterSchemaInitHook],
+        pool: {
+          connectionString: `postgresql://postgres:password@localhost:5433/${dbName}`,
+        },
+      }),
+      plugins: [
+        integration.payloadcmsVectorize({
+          knowledgePools: {
+            default: {
+              collections: {
+                posts: {
+                  toKnowledgePool: async (doc, pl) => {
+                    const chunks: Array<{ chunk: string }> = []
+                    if (doc.title) {
+                      const titleChunks = chunkText(doc.title)
+                      chunks.push(...titleChunks.map((chunk) => ({ chunk })))
+                    }
+                    if (doc.content) {
+                      const contentChunks = await chunkRichText(doc.content, pl)
+                      chunks.push(...contentChunks.map((chunk) => ({ chunk })))
+                    }
+                    return chunks
+                  },
+                },
+              },
+              embeddingConfig: {
+                version: testEmbeddingVersion,
+                queryFn: makeDummyEmbedQuery(DIMS),
+                realTimeIngestionFn: makeDummyEmbedDocs(DIMS),
+              },
+            },
+          },
+        }),
+      ],
+      jobs: {
+        tasks: [],
+        autoRun: [
+          {
+            cron: '*/5 * * * * *',
+            limit: 10,
+          },
+        ],
+      },
+    })
+
+    payload = await getPayload({ config, key: `int-test-${Date.now()}`, cron: true })
+    markdownContent = await getInitialMarkdownContent(config)
   })
+
   test('adds embeddings collection with vector column', async () => {
     // Check schema for embeddings collection
     const collections = payload.collections
diff --git a/dev/specs/utils.ts b/dev/specs/utils.ts
index de3b2c6..18a25b5 100644
--- a/dev/specs/utils.ts
+++ b/dev/specs/utils.ts
@@ -79,6 +79,8 @@ type MockOptions = {
   partialFailure?: { failIds: string[] }
   /** Optional: flush after this many chunks (for testing multi-batch scenarios) */
   flushAfterChunks?: number
+  /** Optional: callback to track onError calls for testing */
+  onErrorCallback?: (args: { providerBatchIds: string[]; error: Error }) => void
 }
 
 /**
@@ -89,7 +91,7 @@ export function createMockBulkEmbeddings(
   options: MockOptions,
   dims: number = DEFAULT_DIMS,
 ): BulkEmbeddingsFns {
-  const { statusSequence, partialFailure, flushAfterChunks } = options
+  const { statusSequence, partialFailure, flushAfterChunks, onErrorCallback } = options
   // Accumulated chunks for current batch
   let accumulatedChunks: BulkEmbeddingInput[] = []
   let batchIndex = 0
@@ -115,11 +117,7 @@ export function createMockBulkEmbeddings(
         batchInputs.set(providerBatchId, toSubmit)
         batchPollCount.set(providerBatchId, 0)
         batchIndex++
-        return {
-          providerBatchId,
-          inputFileRef: `mock-file-${batchIndex - 1}`,
-          submittedChunks: toSubmit,
-        }
+        return { providerBatchId }
       }
 
       // Add chunk to accumulator
@@ -133,11 +131,7 @@ export function createMockBulkEmbeddings(
         batchInputs.set(providerBatchId, toSubmit)
         batchPollCount.set(providerBatchId, 0)
         batchIndex++
-        return {
-          providerBatchId,
-          inputFileRef: `mock-file-${batchIndex - 1}`,
-          submittedChunks: toSubmit,
-        }
+        return { providerBatchId }
       }
 
       return null
@@ -175,6 +169,21 @@ export function createMockBulkEmbeddings(
       batchPollCount.delete(providerBatchId)
       return outputs
     },
+
+    onError: async ({ providerBatchIds, error }) => {
+      // Clean up state
+      for (const batchId of providerBatchIds) {
+        batchInputs.delete(batchId)
+        batchPollCount.delete(batchId)
+      }
+      accumulatedChunks = []
+      batchIndex = 0
+
+      // Call the test callback if provided
+      if (onErrorCallback) {
+        onErrorCallback({ providerBatchIds, error })
+      }
+    },
   }
 }
 
diff --git a/src/collections/bulkEmbeddingsBatches.ts b/src/collections/bulkEmbeddingsBatches.ts
index 373ce87..21e89e3 100644
--- a/src/collections/bulkEmbeddingsBatches.ts
+++ b/src/collections/bulkEmbeddingsBatches.ts
@@ -56,13 +56,6 @@ export const createBulkEmbeddingsBatchesCollection = (): CollectionConfig => ({
         description: 'Provider-specific batch identifier',
       },
     },
-    {
-      name: 'inputFileRef',
-      type: 'text',
-      admin: {
-        description: 'Provider file reference for the input file',
-      },
-    },
     {
       name: 'status',
       type: 'select',
@@ -126,4 +119,3 @@ export const createBulkEmbeddingsBatchesCollection = (): CollectionConfig => ({
     },
   ],
 })
-
diff --git a/src/index.ts b/src/index.ts
index b47adc3..69a32bc 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -271,18 +271,18 @@ export const createVectorizeIntegration = <TPoolNames extends KnowledgePoolName>
                 // Only queue real-time vectorization if realTimeIngestionFn is provided
                 // Bulk embedding is only triggered manually via API (/vector-bulk-embed) or admin UI
                 if (realTimeIngestionFn) {
-                  await payload.jobs.queue<'payloadcms-vectorize:vectorize'>({
-                    task: 'payloadcms-vectorize:vectorize',
-                    input: {
-                      doc,
-                      collection: collectionSlug,
-                      knowledgePool: pool,
-                    },
-                    req: req,
-                    ...(pluginOptions.realtimeQueueName
-                      ? { queue: pluginOptions.realtimeQueueName }
-                      : {}),
-                  })
+                await payload.jobs.queue<'payloadcms-vectorize:vectorize'>({
+                  task: 'payloadcms-vectorize:vectorize',
+                  input: {
+                    doc,
+                    collection: collectionSlug,
+                    knowledgePool: pool,
+                  },
+                  req: req,
+                  ...(pluginOptions.realtimeQueueName
+                    ? { queue: pluginOptions.realtimeQueueName }
+                    : {}),
+                })
                 }
                 // If no realTimeIngestionFn, nothing happens on doc change
                 // User must trigger bulk embedding manually
diff --git a/src/tasks/bulkEmbedAll.ts b/src/tasks/bulkEmbedAll.ts
index d9cdce5..b858f26 100644
--- a/src/tasks/bulkEmbedAll.ts
+++ b/src/tasks/bulkEmbedAll.ts
@@ -210,9 +210,11 @@ export const createPollOrCompleteBulkEmbeddingTask = ({
       }
 
       // Load all batches for this run
+      // Convert runId to number for postgres relationship queries
+      const runIdNum = parseInt(input.runId, 10)
       const batchesResult = await payload.find({
         collection: BULK_EMBEDDINGS_BATCHES_SLUG,
-        where: { run: { equals: input.runId } },
+        where: { run: { equals: runIdNum } },
         limit: 1000,
         sort: 'batchIndex',
       })
@@ -220,10 +222,10 @@ export const createPollOrCompleteBulkEmbeddingTask = ({
 
       if (batches.length === 0) {
         // No batches found - this shouldn't happen but handle gracefully
-        await payload.update({
-          id: input.runId,
-          collection: BULK_EMBEDDINGS_RUNS_SLUG,
-          data: {
+      await payload.update({
+        id: input.runId,
+        collection: BULK_EMBEDDINGS_RUNS_SLUG,
+        data: {
             status: 'failed',
             error: 'No batches found for run',
             completedAt: new Date().toISOString(),
@@ -260,12 +262,12 @@ export const createPollOrCompleteBulkEmbeddingTask = ({
             status: pollResult.status,
             succeededCount: pollResult.counts?.succeeded,
             failedCount: pollResult.counts?.failed,
-            error: pollResult.error,
+          error: pollResult.error,
             ...(TERMINAL_STATUSES.has(pollResult.status)
               ? { completedAt: new Date().toISOString() }
               : {}),
-          },
-        })
+        },
+      })
 
         if (pollResult.status === 'failed' || pollResult.status === 'canceled') {
           anyFailed = true
@@ -292,6 +294,14 @@ export const createPollOrCompleteBulkEmbeddingTask = ({
           collection: BULK_EMBEDDINGS_INPUT_METADATA_SLUG,
           where: { run: { equals: (run as any).id } },
         })
+        // Call onError callback so user can clean up provider-side resources
+        if (callbacks.onError) {
+          const providerBatchIds = batches.map((b: any) => b.providerBatchId as string)
+          await callbacks.onError({
+            providerBatchIds,
+            error: new Error('One or more batches failed'),
+          })
+        }
         return { output: { runId: input.runId, status: 'failed' } }
       }
 
@@ -329,16 +339,25 @@ export const createPollOrCompleteBulkEmbeddingTask = ({
         })
 
         // Cleanup metadata
-        await payload.delete({
-          collection: BULK_EMBEDDINGS_INPUT_METADATA_SLUG,
-          where: { run: { equals: (run as any).id } },
-        })
+      await payload.delete({
+        collection: BULK_EMBEDDINGS_INPUT_METADATA_SLUG,
+        where: { run: { equals: (run as any).id } },
+      })
 
-        return {
-          output: {
-            runId: input.runId,
+        // If completion failed, call onError so user can clean up provider resources
+        if (!completionResult.success && callbacks.onError) {
+          const providerBatchIds = batches.map((b: any) => b.providerBatchId as string)
+          await callbacks.onError({
+            providerBatchIds,
+            error: new Error(completionResult.error || 'Completion failed'),
+          })
+        }
+
+      return {
+        output: {
+          runId: input.runId,
             status: completionResult.success ? 'succeeded' : 'failed',
-          },
+        },
         }
       }
 
@@ -451,55 +470,69 @@ async function streamAndBatchMissingEmbeddings(args: {
     }
   }
 
-  // Now stream chunks to addChunk, tracking which is last
+  // Track pending chunks - plugin manages this queue
+  const pendingChunks: CollectedEmbeddingInput[] = []
+
+  // Stream chunks to addChunk, tracking which is last
   for (let i = 0; i < allChunks.length; i++) {
     const collectedChunk = allChunks[i]
     const isLastChunk = i === allChunks.length - 1
 
+    // Add to pending queue BEFORE calling addChunk
+    pendingChunks.push(collectedChunk)
+
     const submission = await addChunk({
       chunk: { id: collectedChunk.id, text: collectedChunk.text },
       isLastChunk,
     })
 
     if (submission) {
-      // User submitted a batch - store metadata for those chunks
-      await Promise.all(
-        submission.submittedChunks.map((submittedChunk) => {
-          // Find the full metadata for this chunk
-          const fullChunk = allChunks.find((c) => c.id === submittedChunk.id)
-          if (!fullChunk) return Promise.resolve()
+      // User submitted a batch
+      // - If isLastChunk: all pending chunks were submitted
+      // - If not isLastChunk: all except current were submitted (current starts fresh)
+      let submittedChunks: CollectedEmbeddingInput[]
+      if (isLastChunk) {
+        submittedChunks = pendingChunks.splice(0)
+      } else {
+        submittedChunks = pendingChunks.splice(0, pendingChunks.length - 1)
+      }
+
+      // Convert runId to number for postgres relationships
+      const runIdNum = parseInt(runId, 10)
 
-          return payload.create({
+      // Store metadata for submitted chunks
+      await Promise.all(
+        submittedChunks.map((chunk) =>
+          payload.create({
             collection: BULK_EMBEDDINGS_INPUT_METADATA_SLUG,
             data: {
-              run: runId,
-              inputId: fullChunk.id,
-              text: fullChunk.text,
-              sourceCollection: fullChunk.metadata.sourceCollection,
-              docId: fullChunk.metadata.docId,
-              chunkIndex: fullChunk.metadata.chunkIndex,
-              embeddingVersion: fullChunk.metadata.embeddingVersion,
-              extensionFields: fullChunk.metadata.extensionFields,
+              run: runIdNum,
+              inputId: chunk.id,
+              text: chunk.text,
+              sourceCollection: chunk.metadata.sourceCollection,
+              docId: chunk.metadata.docId,
+              chunkIndex: chunk.metadata.chunkIndex,
+              embeddingVersion: chunk.metadata.embeddingVersion,
+              extensionFields: chunk.metadata.extensionFields,
             },
-          })
-        }),
+          }),
+        ),
       )
 
       // Create batch record
       await payload.create({
         collection: BULK_EMBEDDINGS_BATCHES_SLUG,
         data: {
-          run: runId,
+          run: runIdNum,
           batchIndex,
           providerBatchId: submission.providerBatchId,
-          inputFileRef: submission.inputFileRef,
           status: 'queued',
-          inputCount: submission.submittedChunks.length,
+          inputCount: submittedChunks.length,
           submittedAt: new Date().toISOString(),
         },
       })
 
-      totalInputs += submission.submittedChunks.length
+      totalInputs += submittedChunks.length
       batchIndex++
     }
   }
@@ -687,6 +720,9 @@ async function loadInputMetadataByRun(args: { payload: Payload; runId: string })
     }
   >()
 
+  // Convert runId to number for postgres relationship queries
+  const runIdNum = parseInt(runId, 10)
+  
   let page = 1
   const limit = 100
   while (true) {
@@ -694,7 +730,7 @@ async function loadInputMetadataByRun(args: { payload: Payload; runId: string })
       collection: BULK_EMBEDDINGS_INPUT_METADATA_SLUG,
       page,
       limit,
-      where: { run: { equals: runId } },
+      where: { run: { equals: runIdNum } },
       sort: 'inputId',
     })
     const docs = (res as any)?.docs || []
diff --git a/src/types.ts b/src/types.ts
index cca64e3..670ae59 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -105,10 +105,6 @@ export type AddChunkArgs = {
 export type BatchSubmission = {
   /** Provider-specific batch identifier */
   providerBatchId: string
-  /** Optional file reference for the input file */
-  inputFileRef?: string
-  /** The chunks that were submitted in this batch (for metadata tracking) */
-  submittedChunks: BulkEmbeddingInput[]
 }
 
 /** Arguments for polling a single batch */
@@ -123,6 +119,14 @@ export type CompleteBatchArgs = {
   providerBatchId: string
 }
 
+/** Arguments passed to onError callback */
+export type OnBulkErrorArgs = {
+  /** All provider batch IDs that were created during this run */
+  providerBatchIds: string[]
+  /** The error that caused the failure */
+  error: Error
+}
+
 /**
  * Bulk embeddings API with user-controlled batching.
  * User accumulates chunks internally and decides when to flush based on file size.
@@ -132,11 +136,14 @@ export type BulkEmbeddingsFns = {
    * Called for each chunk. User accumulates internally based on file size logic.
    * - Return null to keep accumulating
    * - Return BatchSubmission when ready to submit a batch
-   * - When isLastChunk=true, must flush any remaining accumulated chunks
+   *
+   * **Important contract about which chunks are included:**
+   * - When `isLastChunk=false` and you return a submission: all pending chunks EXCEPT the current one were submitted
+   * - When `isLastChunk=true` and you return a submission: all pending chunks INCLUDING the current one were submitted
    *
    * Example flow when chunk would exceed file limit:
-   * 1. Submit currently accumulated chunks (without this chunk)
-   * 2. Start fresh accumulation with this chunk
+   * 1. Check if adding current chunk would exceed your provider's file size limit
+   * 2. If yes: submit currently accumulated chunks (without this chunk), start fresh with this chunk
    * 3. Return the BatchSubmission
    */
   addChunk: (args: AddChunkArgs) => Promise<BatchSubmission | null>
@@ -146,6 +153,12 @@ export type BulkEmbeddingsFns = {
 
   /** Download outputs for a completed batch */
   completeBatch: (args: CompleteBatchArgs) => Promise<BulkEmbeddingOutput[]>
+
+  /**
+   * Called when the bulk run fails. Use this to clean up provider-side resources
+   * (e.g., delete uploaded files, cancel batches). The run can be re-queued after cleanup.
+   */
+  onError?: (args: OnBulkErrorArgs) => Promise<void>
 }
 
 export type PayloadcmsVectorizeConfig<TPoolNames extends KnowledgePoolName = KnowledgePoolName> = {
diff --git a/vitest.config.js b/vitest.config.js
index 6aad3e5..5083c5c 100644
--- a/vitest.config.js
+++ b/vitest.config.js
@@ -21,6 +21,9 @@ export default defineConfig(() => {
       hookTimeout: 30_000,
       testTimeout: 30_000,
       exclude: ['**/e2e.spec.{ts,js}', '**/node_modules/**'],
+      // Run test files sequentially to avoid global state interference
+      // (embeddingsTables map and Payload instance caching)
+      fileParallelism: false,
     },
   }
 })

From 0ddd7386b9609b0118e742082318eb99aabffb23 Mon Sep 17 00:00:00 2001
From: techiejd <62455039+techiejd@users.noreply.github.com>
Date: Tue, 6 Jan 2026 11:36:33 +0700
Subject: [PATCH 14/49] WIP

---
 dev/specs/bulkEmbed.spec.ts                 | 813 --------------------
 dev/specs/bulkEmbed/basic.spec.ts           | 218 ++++++
 dev/specs/bulkEmbed/canceledBatch.spec.ts   |  74 ++
 dev/specs/bulkEmbed/extensionFields.spec.ts |  85 ++
 dev/specs/bulkEmbed/failedBatch.spec.ts     | 109 +++
 dev/specs/bulkEmbed/multipleBatches.spec.ts |  99 +++
 dev/specs/bulkEmbed/multipleChunks.spec.ts  |  87 +++
 dev/specs/bulkEmbed/onError.spec.ts         |  82 ++
 dev/specs/bulkEmbed/polling.spec.ts         |  82 ++
 dev/specs/bulkEmbed/realtimeMode.spec.ts    |  76 ++
 dev/specs/bulkEmbed/versionBump.spec.ts     |  99 +++
 dev/specs/utils.ts                          |  25 +-
 src/index.ts                                |   2 +
 src/tasks/bulkEmbedAll.ts                   |  32 +-
 vitest.config.js                            |   2 +-
 15 files changed, 1036 insertions(+), 849 deletions(-)
 delete mode 100644 dev/specs/bulkEmbed.spec.ts
 create mode 100644 dev/specs/bulkEmbed/basic.spec.ts
 create mode 100644 dev/specs/bulkEmbed/canceledBatch.spec.ts
 create mode 100644 dev/specs/bulkEmbed/extensionFields.spec.ts
 create mode 100644 dev/specs/bulkEmbed/failedBatch.spec.ts
 create mode 100644 dev/specs/bulkEmbed/multipleBatches.spec.ts
 create mode 100644 dev/specs/bulkEmbed/multipleChunks.spec.ts
 create mode 100644 dev/specs/bulkEmbed/onError.spec.ts
 create mode 100644 dev/specs/bulkEmbed/polling.spec.ts
 create mode 100644 dev/specs/bulkEmbed/realtimeMode.spec.ts
 create mode 100644 dev/specs/bulkEmbed/versionBump.spec.ts

diff --git a/dev/specs/bulkEmbed.spec.ts b/dev/specs/bulkEmbed.spec.ts
deleted file mode 100644
index e65a477..0000000
--- a/dev/specs/bulkEmbed.spec.ts
+++ /dev/null
@@ -1,813 +0,0 @@
-import type { Payload, SanitizedConfig } from 'payload'
-
-import { afterAll, afterEach, beforeAll, beforeEach, describe, expect, test, vi } from 'vitest'
-import { createVectorizeTask } from '../../src/tasks/vectorize.js'
-import { BULK_EMBEDDINGS_RUNS_SLUG } from '../../src/collections/bulkEmbeddingsRuns.js'
-import { BULK_EMBEDDINGS_BATCHES_SLUG } from '../../src/collections/bulkEmbeddingsBatches.js'
-import { BULK_EMBEDDINGS_INPUT_METADATA_SLUG } from '../../src/collections/bulkEmbeddingInputMetadata.js'
-import {
-  BULK_QUEUE_NAMES,
-  DEFAULT_DIMS,
-  buildPayloadWithIntegration,
-  clearAllCollections,
-  createMockBulkEmbeddings,
-  createSucceededBaselineRun,
-  createTestDb,
-  waitForBulkJobs,
-} from './utils.js'
-import { makeDummyEmbedDocs, makeDummyEmbedQuery, testEmbeddingVersion } from 'helpers/embed.js'
-
-const DIMS = DEFAULT_DIMS
-
-describe('Bulk embed ingest mode with streaming API', () => {
-  let payload: Payload
-  let config: SanitizedConfig
-  const dbName = `bulk_embed_test_${Date.now()}`
-
-  const basePluginOptions = {
-    knowledgePools: {
-      default: {
-        collections: {
-          posts: {
-            toKnowledgePool: async (doc: any) => [{ chunk: doc.title }],
-          },
-        },
-        embeddingConfig: {
-          version: testEmbeddingVersion,
-          queryFn: makeDummyEmbedQuery(DIMS),
-          // No realTimeIngestionFn - bulk-only mode
-          bulkEmbeddingsFns: createMockBulkEmbeddings({ statusSequence: ['succeeded'] }),
-        },
-      },
-    },
-    bulkQueueNames: BULK_QUEUE_NAMES,
-  }
-
-  // Helper to build payload with custom options using the SAME database
-  const buildPayloadWithOptions = async (
-    pluginOpts: any,
-    keyPrefix: string = 'custom',
-  ): Promise<Payload> => {
-    const built = await buildPayloadWithIntegration({
-      dbName,
-      pluginOpts,
-      secret: 'test-secret',
-      dims: DIMS,
-      key: `${keyPrefix}-${Date.now()}-${Math.random().toString(36).slice(2, 6)}`,
-    })
-    return built.payload
-  }
-
-  beforeAll(async () => {
-    await createTestDb({ dbName })
-    const built = await buildPayloadWithIntegration({
-      dbName,
-      pluginOpts: basePluginOptions,
-      secret: 'test-secret',
-      dims: DIMS,
-      key: `base-${Date.now()}`,
-    })
-    payload = built.payload
-    config = built.config
-  })
-
-  beforeEach(async () => {
-    // Clear data before each test but keep the same DB/payload
-    await clearAllCollections(payload)
-  })
-
-  afterEach(async () => {
-    vi.restoreAllMocks()
-  })
-
-  test('no bulk run is queued on init or doc creation (bulk-only mode)', async () => {
-    // Verify that no bulk runs are queued automatically on init
-    const runsBeforeCreate = await (payload as any).find({
-      collection: BULK_EMBEDDINGS_RUNS_SLUG,
-      where: { pool: { equals: 'default' } },
-    })
-    expect(runsBeforeCreate.totalDocs).toBe(0)
-
-    // Create a post - should NOT queue a bulk run (bulk must be triggered manually)
-    await payload.create({ collection: 'posts', data: { title: 'First' } as any })
-
-    // Verify still no bulk runs queued
-    const runsAfterCreate = await (payload as any).find({
-      collection: BULK_EMBEDDINGS_RUNS_SLUG,
-      where: { pool: { equals: 'default' } },
-    })
-    expect(runsAfterCreate.totalDocs).toBe(0)
-  })
-
-  test('manually triggered bulk run embeds documents', async () => {
-    // Create a post first
-    const post = await payload.create({ collection: 'posts', data: { title: 'First' } as any })
-
-    // Manually trigger a bulk run (simulating API call or admin UI)
-    const run = await payload.create({
-      collection: BULK_EMBEDDINGS_RUNS_SLUG,
-      data: {
-        pool: 'default',
-        embeddingVersion: testEmbeddingVersion,
-        status: 'queued',
-      },
-    })
-
-    await payload.jobs.queue<'payloadcms-vectorize:prepare-bulk-embedding'>({
-      task: 'payloadcms-vectorize:prepare-bulk-embedding',
-      input: { runId: String(run.id) },
-      req: { payload } as any,
-      ...(BULK_QUEUE_NAMES.prepareBulkEmbedQueueName
-        ? { queue: BULK_QUEUE_NAMES.prepareBulkEmbedQueueName }
-        : {}),
-    })
-
-    // Wait for the bulk run to complete
-    await waitForBulkJobs(payload)
-
-    const embeds = await payload.find({
-      collection: 'default',
-      where: { docId: { equals: String(post.id) } },
-    })
-    expect(embeds.totalDocs).toBe(1)
-    const runDoc = (
-      await (payload as any).find({
-        collection: BULK_EMBEDDINGS_RUNS_SLUG,
-        where: { id: { equals: String(run.id) } },
-      })
-    ).docs[0]
-    expect(runDoc.status).toBe('succeeded')
-  })
-
-  test('bulk run creates batch records', async () => {
-    // Create a post first
-    await payload.create({ collection: 'posts', data: { title: 'Batch Test' } as any })
-
-    // Manually trigger a bulk run
-    const run = await payload.create({
-      collection: BULK_EMBEDDINGS_RUNS_SLUG,
-      data: {
-        pool: 'default',
-        embeddingVersion: testEmbeddingVersion,
-        status: 'queued',
-      },
-    })
-
-    await payload.jobs.queue<'payloadcms-vectorize:prepare-bulk-embedding'>({
-      task: 'payloadcms-vectorize:prepare-bulk-embedding',
-      input: { runId: String(run.id) },
-      req: { payload } as any,
-      ...(BULK_QUEUE_NAMES.prepareBulkEmbedQueueName
-        ? { queue: BULK_QUEUE_NAMES.prepareBulkEmbedQueueName }
-        : {}),
-    })
-
-    await waitForBulkJobs(payload)
-
-    // Verify batch records were created
-    const batches = await payload.find({
-      collection: BULK_EMBEDDINGS_BATCHES_SLUG as any,
-      where: { run: { equals: String(run.id) } },
-    })
-    expect(batches.totalDocs).toBe(1)
-    expect(batches.docs[0]).toHaveProperty('batchIndex', 0)
-    expect(batches.docs[0]).toHaveProperty('status', 'succeeded')
-  })
-
-  test('version bump re-embeds all even without updates', async () => {
-    const baselineOptions = {
-      ...basePluginOptions,
-      knowledgePools: {
-        default: {
-          ...basePluginOptions.knowledgePools.default,
-          embeddingConfig: {
-            ...basePluginOptions.knowledgePools.default.embeddingConfig,
-            version: 'old-version',
-            bulkEmbeddingsFns: createMockBulkEmbeddings({ statusSequence: ['succeeded'] }),
-          },
-        },
-      },
-    }
-    const baselinePayload = await buildPayloadWithOptions(baselineOptions, 'baseline')
-    await baselinePayload.create({ collection: 'posts', data: { title: 'Old' } as any })
-
-    // Manually trigger baseline bulk run
-    const baselineRun = await baselinePayload.create({
-      collection: BULK_EMBEDDINGS_RUNS_SLUG,
-      data: { pool: 'default', embeddingVersion: 'old-version', status: 'queued' },
-    })
-    await baselinePayload.jobs.queue<'payloadcms-vectorize:prepare-bulk-embedding'>({
-      task: 'payloadcms-vectorize:prepare-bulk-embedding',
-      input: { runId: String(baselineRun.id) },
-      req: { payload: baselinePayload } as any,
-      ...(BULK_QUEUE_NAMES.prepareBulkEmbedQueueName
-        ? { queue: BULK_QUEUE_NAMES.prepareBulkEmbedQueueName }
-        : {}),
-    })
-    await waitForBulkJobs(baselinePayload)
-
-    const bumpedOptions = {
-      ...basePluginOptions,
-      knowledgePools: {
-        default: {
-          ...basePluginOptions.knowledgePools.default,
-          embeddingConfig: {
-            ...basePluginOptions.knowledgePools.default.embeddingConfig,
-            version: 'new-version',
-            bulkEmbeddingsFns: createMockBulkEmbeddings({ statusSequence: ['succeeded'] }),
-          },
-        },
-      },
-    }
-    // rebuild payload with bumped version
-    const bumpedPayload = await buildPayloadWithOptions(bumpedOptions, 'bumped')
-    const postAfter = await bumpedPayload.create({
-      collection: 'posts',
-      data: { title: 'Old' } as any,
-    })
-
-    // Manually trigger bulk run with new version
-    const newVersionRun = await bumpedPayload.create({
-      collection: BULK_EMBEDDINGS_RUNS_SLUG,
-      data: { pool: 'default', embeddingVersion: 'new-version', status: 'queued' },
-    })
-    await bumpedPayload.jobs.queue<'payloadcms-vectorize:prepare-bulk-embedding'>({
-      task: 'payloadcms-vectorize:prepare-bulk-embedding',
-      input: { runId: String(newVersionRun.id) },
-      req: { payload: bumpedPayload } as any,
-      ...(BULK_QUEUE_NAMES.prepareBulkEmbedQueueName
-        ? { queue: BULK_QUEUE_NAMES.prepareBulkEmbedQueueName }
-        : {}),
-    })
-    await waitForBulkJobs(bumpedPayload)
-
-    const embeds = await bumpedPayload.find({
-      collection: 'default',
-      where: { docId: { equals: String(postAfter.id) } },
-    })
-    expect(embeds.totalDocs).toBe(1)
-    const runDoc = (
-      await (bumpedPayload as any).find({
-        collection: BULK_EMBEDDINGS_RUNS_SLUG,
-        where: { id: { equals: String(newVersionRun.id) } },
-      })
-    ).docs[0]
-    expect(runDoc.inputs).toBe(1)
-  })
-
-  test('no version bump and no updates → zero eligible and succeed', async () => {
-    // Create post first
-    const post = await payload.create({ collection: 'posts', data: { title: 'Stable' } as any })
-
-    // Manually trigger baseline bulk run
-    const baselineRun = await payload.create({
-      collection: BULK_EMBEDDINGS_RUNS_SLUG,
-      data: { pool: 'default', embeddingVersion: testEmbeddingVersion, status: 'queued' },
-    })
-    await payload.jobs.queue<'payloadcms-vectorize:prepare-bulk-embedding'>({
-      task: 'payloadcms-vectorize:prepare-bulk-embedding',
-      input: { runId: String(baselineRun.id) },
-      req: { payload } as any,
-      ...(BULK_QUEUE_NAMES.prepareBulkEmbedQueueName
-        ? { queue: BULK_QUEUE_NAMES.prepareBulkEmbedQueueName }
-        : {}),
-    })
-    await waitForBulkJobs(payload)
-
-    // Verify baseline exists
-    const embeds = await payload.find({
-      collection: 'default',
-      where: { docId: { equals: String(post.id) } },
-    })
-    expect(embeds.totalDocs).toBe(1)
-
-    // Explicitly trigger a new bulk run (simulating manual trigger or API call)
-    const run = await payload.create({
-      collection: BULK_EMBEDDINGS_RUNS_SLUG,
-      data: {
-        pool: 'default',
-        embeddingVersion: testEmbeddingVersion,
-        status: 'queued',
-      },
-    })
-
-    await payload.jobs.queue<'payloadcms-vectorize:prepare-bulk-embedding'>({
-      task: 'payloadcms-vectorize:prepare-bulk-embedding',
-      input: { runId: String(run.id) },
-      req: { payload } as any,
-      ...(BULK_QUEUE_NAMES.prepareBulkEmbedQueueName
-        ? { queue: BULK_QUEUE_NAMES.prepareBulkEmbedQueueName }
-        : {}),
-    })
-
-    // Wait for the explicitly triggered run to complete
-    await waitForBulkJobs(payload)
-
-    // Verify the new run found zero eligible documents and succeeded
-    const runDoc = (
-      await (payload as any).find({
-        collection: BULK_EMBEDDINGS_RUNS_SLUG,
-        where: { id: { equals: String(run.id) } },
-      })
-    ).docs[0]
-    expect(runDoc.status).toBe('succeeded')
-    expect(runDoc.inputs).toBe(0)
-    expect(runDoc.succeeded).toBe(0)
-  })
-
-  test('polling requeues when non-terminal then succeeds', async () => {
-    const loopPayload = await buildPayloadWithOptions({
-      ...basePluginOptions,
-      knowledgePools: {
-        default: {
-          ...basePluginOptions.knowledgePools.default,
-          embeddingConfig: {
-            ...basePluginOptions.knowledgePools.default.embeddingConfig,
-            bulkEmbeddingsFns: createMockBulkEmbeddings({
-              statusSequence: ['running', 'succeeded'],
-            }),
-          },
-        },
-      },
-    })
-
-    const post = await loopPayload.create({ collection: 'posts', data: { title: 'Loop' } as any })
-
-    // Manually trigger bulk run
-    const run = await loopPayload.create({
-      collection: BULK_EMBEDDINGS_RUNS_SLUG,
-      data: { pool: 'default', embeddingVersion: testEmbeddingVersion, status: 'queued' },
-    })
-
-    const queueSpy = vi.spyOn(loopPayload.jobs, 'queue')
-
-    await loopPayload.jobs.queue<'payloadcms-vectorize:prepare-bulk-embedding'>({
-      task: 'payloadcms-vectorize:prepare-bulk-embedding',
-      input: { runId: String(run.id) },
-      req: { payload: loopPayload } as any,
-      ...(BULK_QUEUE_NAMES.prepareBulkEmbedQueueName
-        ? { queue: BULK_QUEUE_NAMES.prepareBulkEmbedQueueName }
-        : {}),
-    })
-
-    await waitForBulkJobs(loopPayload)
-
-    expect(queueSpy).toHaveBeenCalledWith(
-      expect.objectContaining({ task: 'payloadcms-vectorize:poll-or-complete-bulk-embedding' }),
-    )
-
-    const embeds = await loopPayload.find({
-      collection: 'default',
-      where: { docId: { equals: String(post.id) } },
-    })
-    expect(embeds.totalDocs).toBe(1)
-  })
-
-  test('failed batch marks entire run as failed', async () => {
-    const failedPayload = await buildPayloadWithOptions({
-      ...basePluginOptions,
-      knowledgePools: {
-        default: {
-          ...basePluginOptions.knowledgePools.default,
-          embeddingConfig: {
-            ...basePluginOptions.knowledgePools.default.embeddingConfig,
-            bulkEmbeddingsFns: createMockBulkEmbeddings({ statusSequence: ['failed'] }),
-          },
-        },
-      },
-    })
-
-    const post = await failedPayload.create({ collection: 'posts', data: { title: 'Fail' } as any })
-
-    // Manually trigger bulk run
-    const run = await failedPayload.create({
-      collection: BULK_EMBEDDINGS_RUNS_SLUG,
-      data: { pool: 'default', embeddingVersion: testEmbeddingVersion, status: 'queued' },
-    })
-
-    await failedPayload.jobs.queue<'payloadcms-vectorize:prepare-bulk-embedding'>({
-      task: 'payloadcms-vectorize:prepare-bulk-embedding',
-      input: { runId: String(run.id) },
-      req: { payload: failedPayload } as any,
-      ...(BULK_QUEUE_NAMES.prepareBulkEmbedQueueName
-        ? { queue: BULK_QUEUE_NAMES.prepareBulkEmbedQueueName }
-        : {}),
-    })
-
-    await waitForBulkJobs(failedPayload)
-
-    // Verify run is marked as failed
-    const runDoc = (
-      await (failedPayload as any).find({
-        collection: BULK_EMBEDDINGS_RUNS_SLUG,
-        where: { id: { equals: String(run.id) } },
-      })
-    ).docs[0]
-    expect(runDoc.status).toBe('failed')
-
-    // Verify no embeddings were written
-    const embeds = await failedPayload.find({
-      collection: 'default',
-      where: { docId: { equals: String(post.id) } },
-    })
-    expect(embeds.totalDocs).toBe(0)
-  })
-
-  test('canceled batch marks entire run as failed', async () => {
-    const canceledPayload = await buildPayloadWithOptions({
-      ...basePluginOptions,
-      knowledgePools: {
-        default: {
-          ...basePluginOptions.knowledgePools.default,
-          embeddingConfig: {
-            ...basePluginOptions.knowledgePools.default.embeddingConfig,
-            bulkEmbeddingsFns: createMockBulkEmbeddings({ statusSequence: ['canceled'] }),
-          },
-        },
-      },
-    })
-
-    const post = await canceledPayload.create({
-      collection: 'posts',
-      data: { title: 'Cancel' } as any,
-    })
-
-    // Manually trigger bulk run
-    const run = await canceledPayload.create({
-      collection: BULK_EMBEDDINGS_RUNS_SLUG,
-      data: { pool: 'default', embeddingVersion: testEmbeddingVersion, status: 'queued' },
-    })
-
-    await canceledPayload.jobs.queue<'payloadcms-vectorize:prepare-bulk-embedding'>({
-      task: 'payloadcms-vectorize:prepare-bulk-embedding',
-      input: { runId: String(run.id) },
-      req: { payload: canceledPayload } as any,
-      ...(BULK_QUEUE_NAMES.prepareBulkEmbedQueueName
-        ? { queue: BULK_QUEUE_NAMES.prepareBulkEmbedQueueName }
-        : {}),
-    })
-
-    await waitForBulkJobs(canceledPayload)
-
-    const embeds = await canceledPayload.find({
-      collection: 'default',
-      where: { docId: { equals: String(post.id) } },
-    })
-    expect(embeds.totalDocs).toBe(0)
-  })
-
-  test('onError callback is called when batch fails', async () => {
-    // Track onError calls
-    let onErrorCalled = false
-    let onErrorArgs: { providerBatchIds: string[]; error: Error } | null = null
-
-    const errorPayload = await buildPayloadWithOptions({
-      ...basePluginOptions,
-      knowledgePools: {
-        default: {
-          ...basePluginOptions.knowledgePools.default,
-          embeddingConfig: {
-            ...basePluginOptions.knowledgePools.default.embeddingConfig,
-            bulkEmbeddingsFns: createMockBulkEmbeddings({
-              statusSequence: ['failed'],
-              onErrorCallback: (args) => {
-                onErrorCalled = true
-                onErrorArgs = args
-              },
-            }),
-          },
-        },
-      },
-    })
-
-    await errorPayload.create({ collection: 'posts', data: { title: 'Error Test' } as any })
-
-    // Manually trigger bulk run
-    const run = await errorPayload.create({
-      collection: BULK_EMBEDDINGS_RUNS_SLUG,
-      data: { pool: 'default', embeddingVersion: testEmbeddingVersion, status: 'queued' },
-    })
-
-    await errorPayload.jobs.queue<'payloadcms-vectorize:prepare-bulk-embedding'>({
-      task: 'payloadcms-vectorize:prepare-bulk-embedding',
-      input: { runId: String(run.id) },
-      req: { payload: errorPayload } as any,
-      ...(BULK_QUEUE_NAMES.prepareBulkEmbedQueueName
-        ? { queue: BULK_QUEUE_NAMES.prepareBulkEmbedQueueName }
-        : {}),
-    })
-
-    await waitForBulkJobs(errorPayload)
-
-    // Verify onError was called
-    expect(onErrorCalled).toBe(true)
-    expect(onErrorArgs).not.toBeNull()
-    expect(onErrorArgs!.providerBatchIds.length).toBeGreaterThan(0)
-    expect(onErrorArgs!.error).toBeInstanceOf(Error)
-    expect(onErrorArgs!.error.message).toContain('failed')
-  })
-
-  test('metadata table is cleaned after successful completion', async () => {
-    const cleanPayload = await buildPayloadWithOptions({
-      ...basePluginOptions,
-      knowledgePools: {
-        default: {
-          ...basePluginOptions.knowledgePools.default,
-          collections: {
-            posts: {
-              toKnowledgePool: async (doc: any) => [{ chunk: doc.title }],
-            },
-          },
-        },
-      },
-    })
-
-    await cleanPayload.create({ collection: 'posts', data: { title: 'Cleanup' } as any })
-
-    // Manually trigger bulk run
-    const run = await cleanPayload.create({
-      collection: BULK_EMBEDDINGS_RUNS_SLUG,
-      data: { pool: 'default', embeddingVersion: testEmbeddingVersion, status: 'queued' },
-    })
-
-    await cleanPayload.jobs.queue<'payloadcms-vectorize:prepare-bulk-embedding'>({
-      task: 'payloadcms-vectorize:prepare-bulk-embedding',
-      input: { runId: String(run.id) },
-      req: { payload: cleanPayload } as any,
-      ...(BULK_QUEUE_NAMES.prepareBulkEmbedQueueName
-        ? { queue: BULK_QUEUE_NAMES.prepareBulkEmbedQueueName }
-        : {}),
-    })
-
-    await waitForBulkJobs(cleanPayload)
-
-    const metadata = await cleanPayload.find({
-      collection: BULK_EMBEDDINGS_INPUT_METADATA_SLUG,
-      where: { run: { exists: true } },
-    })
-    expect(metadata.totalDocs).toBe(0)
-  })
-
-  test('metadata table is cleaned after failed run (no partial writes)', async () => {
-    const failPayload = await buildPayloadWithOptions({
-      ...basePluginOptions,
-      knowledgePools: {
-        default: {
-          ...basePluginOptions.knowledgePools.default,
-          embeddingConfig: {
-            ...basePluginOptions.knowledgePools.default.embeddingConfig,
-            bulkEmbeddingsFns: createMockBulkEmbeddings({ statusSequence: ['failed'] }),
-          },
-        },
-      },
-    })
-
-    await failPayload.create({ collection: 'posts', data: { title: 'FailCleanup' } as any })
-
-    // Manually trigger bulk run
-    const run = await failPayload.create({
-      collection: BULK_EMBEDDINGS_RUNS_SLUG,
-      data: { pool: 'default', embeddingVersion: testEmbeddingVersion, status: 'queued' },
-    })
-
-    await failPayload.jobs.queue<'payloadcms-vectorize:prepare-bulk-embedding'>({
-      task: 'payloadcms-vectorize:prepare-bulk-embedding',
-      input: { runId: String(run.id) },
-      req: { payload: failPayload } as any,
-      ...(BULK_QUEUE_NAMES.prepareBulkEmbedQueueName
-        ? { queue: BULK_QUEUE_NAMES.prepareBulkEmbedQueueName }
-        : {}),
-    })
-
-    await waitForBulkJobs(failPayload)
-
-    // Verify metadata is cleaned up even on failure
-    const metadata = await failPayload.find({
-      collection: BULK_EMBEDDINGS_INPUT_METADATA_SLUG,
-      where: { run: { exists: true } },
-    })
-    expect(metadata.totalDocs).toBe(0)
-  })
-
-  test('extension fields are merged when writing embeddings', async () => {
-    const extPayload = await buildPayloadWithOptions({
-      ...basePluginOptions,
-      knowledgePools: {
-        default: {
-          collections: {
-            posts: {
-              toKnowledgePool: async (doc: any) => [
-                { chunk: doc.title, category: 'tech', priority: 3 },
-              ],
-            },
-          },
-          extensionFields: [
-            { name: 'category', type: 'text' },
-            { name: 'priority', type: 'number' },
-          ],
-          embeddingConfig: {
-            ...basePluginOptions.knowledgePools.default.embeddingConfig,
-          },
-        },
-      },
-    } as any)
-
-    const post = await extPayload.create({
-      collection: 'posts',
-      data: { title: 'Ext merge' } as any,
-    })
-
-    // Manually trigger bulk run
-    const run = await extPayload.create({
-      collection: BULK_EMBEDDINGS_RUNS_SLUG,
-      data: { pool: 'default', embeddingVersion: testEmbeddingVersion, status: 'queued' },
-    })
-
-    await extPayload.jobs.queue<'payloadcms-vectorize:prepare-bulk-embedding'>({
-      task: 'payloadcms-vectorize:prepare-bulk-embedding',
-      input: { runId: String(run.id) },
-      req: { payload: extPayload } as any,
-      ...(BULK_QUEUE_NAMES.prepareBulkEmbedQueueName
-        ? { queue: BULK_QUEUE_NAMES.prepareBulkEmbedQueueName }
-        : {}),
-    })
-
-    await waitForBulkJobs(extPayload)
-
-    const embeds = await extPayload.find({
-      collection: 'default',
-      where: { docId: { equals: String(post.id) } },
-    })
-    expect(embeds.totalDocs).toBe(1)
-    expect(embeds.docs[0]).toHaveProperty('category', 'tech')
-    expect(embeds.docs[0]).toHaveProperty('priority', 3)
-  })
-
-  test('multiple chunks keep their respective extension fields', async () => {
-    const multiPayload = await buildPayloadWithOptions({
-      ...basePluginOptions,
-      knowledgePools: {
-        default: {
-          collections: {
-            posts: {
-              toKnowledgePool: async () => [
-                { chunk: 'Chunk 1', category: 'a', priority: 1 },
-                { chunk: 'Chunk 2', category: 'b', priority: 2 },
-              ],
-            },
-          },
-          extensionFields: [
-            { name: 'category', type: 'text' },
-            { name: 'priority', type: 'number' },
-          ],
-          embeddingConfig: {
-            ...basePluginOptions.knowledgePools.default.embeddingConfig,
-          },
-        },
-      },
-    } as any)
-
-    const post = await multiPayload.create({
-      collection: 'posts',
-      data: { title: 'Two' } as any,
-    })
-
-    // Manually trigger bulk run
-    const run = await multiPayload.create({
-      collection: BULK_EMBEDDINGS_RUNS_SLUG,
-      data: { pool: 'default', embeddingVersion: testEmbeddingVersion, status: 'queued' },
-    })
-
-    await multiPayload.jobs.queue<'payloadcms-vectorize:prepare-bulk-embedding'>({
-      task: 'payloadcms-vectorize:prepare-bulk-embedding',
-      input: { runId: String(run.id) },
-      req: { payload: multiPayload } as any,
-      ...(BULK_QUEUE_NAMES.prepareBulkEmbedQueueName
-        ? { queue: BULK_QUEUE_NAMES.prepareBulkEmbedQueueName }
-        : {}),
-    })
-
-    await waitForBulkJobs(multiPayload)
-
-    const embeds = await multiPayload.find({
-      collection: 'default',
-      where: { docId: { equals: String(post.id) } },
-      sort: 'chunkIndex',
-    })
-    expect(embeds.totalDocs).toBe(2)
-    expect(embeds.docs[0]).toMatchObject({ category: 'a', priority: 1, chunkIndex: 0 })
-    expect(embeds.docs[1]).toMatchObject({ category: 'b', priority: 2, chunkIndex: 1 })
-  })
-
-  test('multiple batches are created when flushing after N chunks', async () => {
-    // Create mock that flushes after 2 chunks
-    const smallBatchPayload = await buildPayloadWithOptions({
-      ...basePluginOptions,
-      knowledgePools: {
-        default: {
-          ...basePluginOptions.knowledgePools.default,
-          embeddingConfig: {
-            ...basePluginOptions.knowledgePools.default.embeddingConfig,
-            bulkEmbeddingsFns: createMockBulkEmbeddings({
-              statusSequence: ['succeeded'],
-              flushAfterChunks: 2, // Flush after 2 chunks
-            }),
-          },
-        },
-      },
-    })
-
-    // Create 5 posts (should result in 3 batches: 2, 2, 1)
-    for (let i = 0; i < 5; i++) {
-      await smallBatchPayload.create({ collection: 'posts', data: { title: `Post ${i}` } as any })
-    }
-
-    // Manually trigger bulk run
-    const run = await smallBatchPayload.create({
-      collection: BULK_EMBEDDINGS_RUNS_SLUG,
-      data: { pool: 'default', embeddingVersion: testEmbeddingVersion, status: 'queued' },
-    })
-
-    await smallBatchPayload.jobs.queue<'payloadcms-vectorize:prepare-bulk-embedding'>({
-      task: 'payloadcms-vectorize:prepare-bulk-embedding',
-      input: { runId: String(run.id) },
-      req: { payload: smallBatchPayload } as any,
-      ...(BULK_QUEUE_NAMES.prepareBulkEmbedQueueName
-        ? { queue: BULK_QUEUE_NAMES.prepareBulkEmbedQueueName }
-        : {}),
-    })
-
-    await waitForBulkJobs(smallBatchPayload, 15000)
-
-    // Verify multiple batches were created
-    const batches = await smallBatchPayload.find({
-      collection: BULK_EMBEDDINGS_BATCHES_SLUG as any,
-      where: { run: { equals: String(run.id) } },
-      sort: 'batchIndex',
-    })
-    expect(batches.totalDocs).toBe(3)
-    expect(batches.docs[0]).toHaveProperty('batchIndex', 0)
-    expect(batches.docs[1]).toHaveProperty('batchIndex', 1)
-    expect(batches.docs[2]).toHaveProperty('batchIndex', 2)
-
-    // Verify all embeddings were written
-    const embeds = await smallBatchPayload.find({
-      collection: 'default',
-    })
-    expect(embeds.totalDocs).toBe(5)
-
-    // Verify run has correct totalBatches
-    const runDoc = (
-      await (smallBatchPayload as any).find({
-        collection: BULK_EMBEDDINGS_RUNS_SLUG,
-        where: { id: { equals: String(run.id) } },
-      })
-    ).docs[0]
-    expect(runDoc.totalBatches).toBe(3)
-    expect(runDoc.status).toBe('succeeded')
-  })
-
-  test('realtime mode queues vectorize jobs when realTimeIngestionFn is provided', async () => {
-    const realtimeOptions = {
-      knowledgePools: {
-        default: {
-          collections: {
-            posts: {
-              toKnowledgePool: async (doc: any) => [{ chunk: doc.title }],
-            },
-          },
-          embeddingConfig: {
-            version: testEmbeddingVersion,
-            queryFn: makeDummyEmbedQuery(DIMS),
-            realTimeIngestionFn: makeDummyEmbedDocs(DIMS),
-            // Also provide bulk for testing, but realTimeIngestionFn should take precedence
-            bulkEmbeddingsFns: createMockBulkEmbeddings({ statusSequence: ['succeeded'] }),
-          },
-        },
-      },
-      bulkQueueNames: BULK_QUEUE_NAMES,
-    }
-
-    const realtimePayload = await buildPayloadWithOptions(realtimeOptions as any, 'realtime')
-    const post = await realtimePayload.create({
-      collection: 'posts',
-      data: { title: 'Realtime Test' } as any,
-    })
-    const vectorizeTask = createVectorizeTask({
-      knowledgePools: realtimeOptions.knowledgePools as any,
-    })
-    const vectorizeHandler = vectorizeTask.handler as any
-    await vectorizeHandler({
-      input: { doc: post, collection: 'posts', knowledgePool: 'default' } as any,
-      req: { payload: realtimePayload } as any,
-      inlineTask: vi.fn(),
-      tasks: {} as any,
-      job: {} as any,
-    })
-    const embeds = await realtimePayload.find({
-      collection: 'default',
-      where: { docId: { equals: String(post.id) } },
-    })
-    expect(embeds.totalDocs).toBeGreaterThan(0)
-  })
-})
diff --git a/dev/specs/bulkEmbed/basic.spec.ts b/dev/specs/bulkEmbed/basic.spec.ts
new file mode 100644
index 0000000..25ec4a6
--- /dev/null
+++ b/dev/specs/bulkEmbed/basic.spec.ts
@@ -0,0 +1,218 @@
+import type { Payload, SanitizedConfig } from 'payload'
+import { afterEach, beforeAll, beforeEach, describe, expect, test, vi } from 'vitest'
+import { BULK_EMBEDDINGS_RUNS_SLUG } from '../../../src/collections/bulkEmbeddingsRuns.js'
+import { BULK_EMBEDDINGS_BATCHES_SLUG } from '../../../src/collections/bulkEmbeddingsBatches.js'
+import { BULK_EMBEDDINGS_INPUT_METADATA_SLUG } from '../../../src/collections/bulkEmbeddingInputMetadata.js'
+import {
+  BULK_QUEUE_NAMES,
+  DEFAULT_DIMS,
+  buildPayloadWithIntegration,
+  clearAllCollections,
+  createMockBulkEmbeddings,
+  createTestDb,
+  waitForBulkJobs,
+} from '../utils.js'
+import { makeDummyEmbedQuery, testEmbeddingVersion } from 'helpers/embed.js'
+
+const DIMS = DEFAULT_DIMS
+const dbName = `bulk_basic_${Date.now()}`
+
+const basePluginOptions = {
+  knowledgePools: {
+    default: {
+      collections: {
+        posts: {
+          toKnowledgePool: async (doc: any) => [{ chunk: doc.title }],
+        },
+      },
+      embeddingConfig: {
+        version: testEmbeddingVersion,
+        queryFn: makeDummyEmbedQuery(DIMS),
+        bulkEmbeddingsFns: createMockBulkEmbeddings({ statusSequence: ['succeeded'] }),
+      },
+    },
+  },
+  bulkQueueNames: BULK_QUEUE_NAMES,
+}
+
+describe('Bulk embed - basic tests', () => {
+  let payload: Payload
+  let config: SanitizedConfig
+
+  beforeAll(async () => {
+    await createTestDb({ dbName })
+    const built = await buildPayloadWithIntegration({
+      dbName,
+      pluginOpts: basePluginOptions,
+      secret: 'test-secret',
+      dims: DIMS,
+      key: `basic-${Date.now()}`,
+    })
+    payload = built.payload
+    config = built.config
+  })
+
+  beforeEach(async () => {
+    await clearAllCollections(payload)
+  })
+
+  afterEach(async () => {
+    vi.restoreAllMocks()
+  })
+
+  test('no bulk run is queued on init or doc creation (bulk-only mode)', async () => {
+    const runsBeforeCreate = await (payload as any).find({
+      collection: BULK_EMBEDDINGS_RUNS_SLUG,
+      where: { pool: { equals: 'default' } },
+    })
+    expect(runsBeforeCreate.totalDocs).toBe(0)
+
+    await payload.create({ collection: 'posts', data: { title: 'First' } as any })
+
+    const runsAfterCreate = await (payload as any).find({
+      collection: BULK_EMBEDDINGS_RUNS_SLUG,
+      where: { pool: { equals: 'default' } },
+    })
+    expect(runsAfterCreate.totalDocs).toBe(0)
+  })
+
+  test('manually triggered bulk run embeds documents', async () => {
+    const post = await payload.create({ collection: 'posts', data: { title: 'First' } as any })
+
+    const run = await payload.create({
+      collection: BULK_EMBEDDINGS_RUNS_SLUG,
+      data: { pool: 'default', embeddingVersion: testEmbeddingVersion, status: 'queued' },
+    })
+
+    await payload.jobs.queue<'payloadcms-vectorize:prepare-bulk-embedding'>({
+      task: 'payloadcms-vectorize:prepare-bulk-embedding',
+      input: { runId: String(run.id) },
+      req: { payload } as any,
+      ...(BULK_QUEUE_NAMES.prepareBulkEmbedQueueName
+        ? { queue: BULK_QUEUE_NAMES.prepareBulkEmbedQueueName }
+        : {}),
+    })
+
+    await waitForBulkJobs(payload)
+
+    const embeds = await payload.find({
+      collection: 'default',
+      where: { docId: { equals: String(post.id) } },
+    })
+    expect(embeds.totalDocs).toBe(1)
+    const runDoc = (
+      await (payload as any).find({
+        collection: BULK_EMBEDDINGS_RUNS_SLUG,
+        where: { id: { equals: String(run.id) } },
+      })
+    ).docs[0]
+    expect(runDoc.status).toBe('succeeded')
+  })
+
+  test('bulk run creates batch records', async () => {
+    await payload.create({ collection: 'posts', data: { title: 'Batch Test' } as any })
+
+    const run = await payload.create({
+      collection: BULK_EMBEDDINGS_RUNS_SLUG,
+      data: { pool: 'default', embeddingVersion: testEmbeddingVersion, status: 'queued' },
+    })
+
+    await payload.jobs.queue<'payloadcms-vectorize:prepare-bulk-embedding'>({
+      task: 'payloadcms-vectorize:prepare-bulk-embedding',
+      input: { runId: String(run.id) },
+      req: { payload } as any,
+      ...(BULK_QUEUE_NAMES.prepareBulkEmbedQueueName
+        ? { queue: BULK_QUEUE_NAMES.prepareBulkEmbedQueueName }
+        : {}),
+    })
+
+    await waitForBulkJobs(payload)
+
+    const batches = await payload.find({
+      collection: BULK_EMBEDDINGS_BATCHES_SLUG as any,
+      where: { run: { equals: String(run.id) } },
+    })
+    expect(batches.totalDocs).toBe(1)
+    expect(batches.docs[0]).toHaveProperty('batchIndex', 0)
+    expect(batches.docs[0]).toHaveProperty('status', 'succeeded')
+  })
+
+  test('no version bump and no updates → zero eligible and succeed', async () => {
+    const post = await payload.create({ collection: 'posts', data: { title: 'Stable' } as any })
+
+    // First bulk run
+    const baselineRun = await payload.create({
+      collection: BULK_EMBEDDINGS_RUNS_SLUG,
+      data: { pool: 'default', embeddingVersion: testEmbeddingVersion, status: 'queued' },
+    })
+    await payload.jobs.queue<'payloadcms-vectorize:prepare-bulk-embedding'>({
+      task: 'payloadcms-vectorize:prepare-bulk-embedding',
+      input: { runId: String(baselineRun.id) },
+      req: { payload } as any,
+      ...(BULK_QUEUE_NAMES.prepareBulkEmbedQueueName
+        ? { queue: BULK_QUEUE_NAMES.prepareBulkEmbedQueueName }
+        : {}),
+    })
+    await waitForBulkJobs(payload)
+
+    const embeds = await payload.find({
+      collection: 'default',
+      where: { docId: { equals: String(post.id) } },
+    })
+    expect(embeds.totalDocs).toBe(1)
+
+    // Second bulk run - should find zero eligible
+    const run = await payload.create({
+      collection: BULK_EMBEDDINGS_RUNS_SLUG,
+      data: { pool: 'default', embeddingVersion: testEmbeddingVersion, status: 'queued' },
+    })
+
+    await payload.jobs.queue<'payloadcms-vectorize:prepare-bulk-embedding'>({
+      task: 'payloadcms-vectorize:prepare-bulk-embedding',
+      input: { runId: String(run.id) },
+      req: { payload } as any,
+      ...(BULK_QUEUE_NAMES.prepareBulkEmbedQueueName
+        ? { queue: BULK_QUEUE_NAMES.prepareBulkEmbedQueueName }
+        : {}),
+    })
+
+    await waitForBulkJobs(payload)
+
+    const runDoc = (
+      await (payload as any).find({
+        collection: BULK_EMBEDDINGS_RUNS_SLUG,
+        where: { id: { equals: String(run.id) } },
+      })
+    ).docs[0]
+    expect(runDoc.status).toBe('succeeded')
+    expect(runDoc.inputs).toBe(0)
+    expect(runDoc.succeeded).toBe(0)
+  })
+
+  test('metadata table is cleaned after successful completion', async () => {
+    await payload.create({ collection: 'posts', data: { title: 'Cleanup' } as any })
+
+    const run = await payload.create({
+      collection: BULK_EMBEDDINGS_RUNS_SLUG,
+      data: { pool: 'default', embeddingVersion: testEmbeddingVersion, status: 'queued' },
+    })
+
+    await payload.jobs.queue<'payloadcms-vectorize:prepare-bulk-embedding'>({
+      task: 'payloadcms-vectorize:prepare-bulk-embedding',
+      input: { runId: String(run.id) },
+      req: { payload } as any,
+      ...(BULK_QUEUE_NAMES.prepareBulkEmbedQueueName
+        ? { queue: BULK_QUEUE_NAMES.prepareBulkEmbedQueueName }
+        : {}),
+    })
+
+    await waitForBulkJobs(payload)
+
+    const metadata = await payload.find({
+      collection: BULK_EMBEDDINGS_INPUT_METADATA_SLUG,
+      where: { run: { exists: true } },
+    })
+    expect(metadata.totalDocs).toBe(0)
+  })
+})
+
diff --git a/dev/specs/bulkEmbed/canceledBatch.spec.ts b/dev/specs/bulkEmbed/canceledBatch.spec.ts
new file mode 100644
index 0000000..756c905
--- /dev/null
+++ b/dev/specs/bulkEmbed/canceledBatch.spec.ts
@@ -0,0 +1,74 @@
+import type { Payload } from 'payload'
+import { beforeAll, describe, expect, test } from 'vitest'
+import { BULK_EMBEDDINGS_RUNS_SLUG } from '../../../src/collections/bulkEmbeddingsRuns.js'
+import {
+  BULK_QUEUE_NAMES,
+  DEFAULT_DIMS,
+  buildPayloadWithIntegration,
+  createMockBulkEmbeddings,
+  createTestDb,
+  waitForBulkJobs,
+} from '../utils.js'
+import { makeDummyEmbedQuery, testEmbeddingVersion } from 'helpers/embed.js'
+
+const DIMS = DEFAULT_DIMS
+const dbName = `bulk_canceled_${Date.now()}`
+
+describe('Bulk embed - canceled batch', () => {
+  let payload: Payload
+
+  beforeAll(async () => {
+    await createTestDb({ dbName })
+    const built = await buildPayloadWithIntegration({
+      dbName,
+      pluginOpts: {
+        knowledgePools: {
+          default: {
+            collections: {
+              posts: {
+                toKnowledgePool: async (doc: any) => [{ chunk: doc.title }],
+              },
+            },
+            embeddingConfig: {
+              version: testEmbeddingVersion,
+              queryFn: makeDummyEmbedQuery(DIMS),
+              bulkEmbeddingsFns: createMockBulkEmbeddings({ statusSequence: ['canceled'] }),
+            },
+          },
+        },
+        bulkQueueNames: BULK_QUEUE_NAMES,
+      },
+      secret: 'test-secret',
+      dims: DIMS,
+      key: `canceled-${Date.now()}`,
+    })
+    payload = built.payload
+  })
+
+  test('canceled batch marks entire run as failed', async () => {
+    const post = await payload.create({ collection: 'posts', data: { title: 'Cancel' } as any })
+
+    const run = await payload.create({
+      collection: BULK_EMBEDDINGS_RUNS_SLUG,
+      data: { pool: 'default', embeddingVersion: testEmbeddingVersion, status: 'queued' },
+    })
+
+    await payload.jobs.queue<'payloadcms-vectorize:prepare-bulk-embedding'>({
+      task: 'payloadcms-vectorize:prepare-bulk-embedding',
+      input: { runId: String(run.id) },
+      req: { payload } as any,
+      ...(BULK_QUEUE_NAMES.prepareBulkEmbedQueueName
+        ? { queue: BULK_QUEUE_NAMES.prepareBulkEmbedQueueName }
+        : {}),
+    })
+
+    await waitForBulkJobs(payload)
+
+    const embeds = await payload.find({
+      collection: 'default',
+      where: { docId: { equals: String(post.id) } },
+    })
+    expect(embeds.totalDocs).toBe(0)
+  })
+})
+
diff --git a/dev/specs/bulkEmbed/extensionFields.spec.ts b/dev/specs/bulkEmbed/extensionFields.spec.ts
new file mode 100644
index 0000000..e0fd9a8
--- /dev/null
+++ b/dev/specs/bulkEmbed/extensionFields.spec.ts
@@ -0,0 +1,85 @@
+import type { Payload } from 'payload'
+import { beforeAll, describe, expect, test } from 'vitest'
+import { BULK_EMBEDDINGS_RUNS_SLUG } from '../../../src/collections/bulkEmbeddingsRuns.js'
+import {
+  BULK_QUEUE_NAMES,
+  DEFAULT_DIMS,
+  buildPayloadWithIntegration,
+  createMockBulkEmbeddings,
+  createTestDb,
+  waitForBulkJobs,
+} from '../utils.js'
+import { makeDummyEmbedQuery, testEmbeddingVersion } from 'helpers/embed.js'
+
+const DIMS = DEFAULT_DIMS
+const dbName = `bulk_extfields_${Date.now()}`
+
+describe('Bulk embed - extension fields', () => {
+  let payload: Payload
+
+  beforeAll(async () => {
+    await createTestDb({ dbName })
+    const built = await buildPayloadWithIntegration({
+      dbName,
+      pluginOpts: {
+        knowledgePools: {
+          default: {
+            collections: {
+              posts: {
+                toKnowledgePool: async (doc: any) => [
+                  { chunk: doc.title, category: 'tech', priority: 3 },
+                ],
+              },
+            },
+            extensionFields: [
+              { name: 'category', type: 'text' },
+              { name: 'priority', type: 'number' },
+            ],
+            embeddingConfig: {
+              version: testEmbeddingVersion,
+              queryFn: makeDummyEmbedQuery(DIMS),
+              bulkEmbeddingsFns: createMockBulkEmbeddings({ statusSequence: ['succeeded'] }),
+            },
+          },
+        },
+        bulkQueueNames: BULK_QUEUE_NAMES,
+      },
+      secret: 'test-secret',
+      dims: DIMS,
+      key: `extfields-${Date.now()}`,
+    })
+    payload = built.payload
+  })
+
+  test('extension fields are merged when writing embeddings', async () => {
+    const post = await payload.create({
+      collection: 'posts',
+      data: { title: 'Ext merge' } as any,
+    })
+
+    const run = await payload.create({
+      collection: BULK_EMBEDDINGS_RUNS_SLUG,
+      data: { pool: 'default', embeddingVersion: testEmbeddingVersion, status: 'queued' },
+    })
+
+    await payload.jobs.queue<'payloadcms-vectorize:prepare-bulk-embedding'>({
+      task: 'payloadcms-vectorize:prepare-bulk-embedding',
+      input: { runId: String(run.id) },
+      req: { payload } as any,
+      ...(BULK_QUEUE_NAMES.prepareBulkEmbedQueueName
+        ? { queue: BULK_QUEUE_NAMES.prepareBulkEmbedQueueName }
+        : {}),
+    })
+
+    await waitForBulkJobs(payload)
+
+    const embeds = await payload.find({
+      collection: 'default',
+      where: { docId: { equals: String(post.id) } },
+    })
+    expect(embeds.totalDocs).toBe(1)
+    expect(embeds.docs[0]).toHaveProperty('category', 'tech')
+    expect(embeds.docs[0]).toHaveProperty('priority', 3)
+  })
+})
+
diff --git a/dev/specs/bulkEmbed/failedBatch.spec.ts b/dev/specs/bulkEmbed/failedBatch.spec.ts
new file mode 100644
index 0000000..e10d521
--- /dev/null
+++ b/dev/specs/bulkEmbed/failedBatch.spec.ts
@@ -0,0 +1,109 @@
+import type { Payload } from 'payload'
+import { beforeAll, describe, expect, test } from 'vitest'
+import { BULK_EMBEDDINGS_RUNS_SLUG } from '../../../src/collections/bulkEmbeddingsRuns.js'
+import { BULK_EMBEDDINGS_INPUT_METADATA_SLUG } from '../../../src/collections/bulkEmbeddingInputMetadata.js'
+import {
+  BULK_QUEUE_NAMES,
+  DEFAULT_DIMS,
+  buildPayloadWithIntegration,
+  createMockBulkEmbeddings,
+  createTestDb,
+  waitForBulkJobs,
+} from '../utils.js'
+import { makeDummyEmbedQuery, testEmbeddingVersion } from 'helpers/embed.js'
+
+const DIMS = DEFAULT_DIMS
+const dbName = `bulk_failed_${Date.now()}`
+
+describe('Bulk embed - failed batch', () => {
+  let payload: Payload
+
+  beforeAll(async () => {
+    await createTestDb({ dbName })
+    const built = await buildPayloadWithIntegration({
+      dbName,
+      pluginOpts: {
+        knowledgePools: {
+          default: {
+            collections: {
+              posts: {
+                toKnowledgePool: async (doc: any) => [{ chunk: doc.title }],
+              },
+            },
+            embeddingConfig: {
+              version: testEmbeddingVersion,
+              queryFn: makeDummyEmbedQuery(DIMS),
+              bulkEmbeddingsFns: createMockBulkEmbeddings({ statusSequence: ['failed'] }),
+            },
+          },
+        },
+        bulkQueueNames: BULK_QUEUE_NAMES,
+      },
+      secret: 'test-secret',
+      dims: DIMS,
+      key: `failed-${Date.now()}`,
+    })
+    payload = built.payload
+  })
+
+  test('failed batch marks entire run as failed', async () => {
+    const post = await payload.create({ collection: 'posts', data: { title: 'Fail' } as any })
+
+    const run = await payload.create({
+      collection: BULK_EMBEDDINGS_RUNS_SLUG,
+      data: { pool: 'default', embeddingVersion: testEmbeddingVersion, status: 'queued' },
+    })
+
+    await payload.jobs.queue<'payloadcms-vectorize:prepare-bulk-embedding'>({
+      task: 'payloadcms-vectorize:prepare-bulk-embedding',
+      input: { runId: String(run.id) },
+      req: { payload } as any,
+      ...(BULK_QUEUE_NAMES.prepareBulkEmbedQueueName
+        ? { queue: BULK_QUEUE_NAMES.prepareBulkEmbedQueueName }
+        : {}),
+    })
+
+    await waitForBulkJobs(payload)
+
+    const runDoc = (
+      await (payload as any).find({
+        collection: BULK_EMBEDDINGS_RUNS_SLUG,
+        where: { id: { equals: String(run.id) } },
+      })
+    ).docs[0]
+    expect(runDoc.status).toBe('failed')
+
+    const embeds = await payload.find({
+      collection: 'default',
+      where: { docId: { equals: String(post.id) } },
+    })
+    expect(embeds.totalDocs).toBe(0)
+  })
+
+  test('metadata table is cleaned after failed run (no partial writes)', async () => {
+    await payload.create({ collection: 'posts', data: { title: 'FailCleanup' } as any })
+
+    const run = await payload.create({
+      collection: BULK_EMBEDDINGS_RUNS_SLUG,
+      data: { pool: 'default', embeddingVersion: testEmbeddingVersion, status: 'queued' },
+    })
+
+    await payload.jobs.queue<'payloadcms-vectorize:prepare-bulk-embedding'>({
+      task: 'payloadcms-vectorize:prepare-bulk-embedding',
+      input: { runId: String(run.id) },
+      req: { payload } as any,
+      ...(BULK_QUEUE_NAMES.prepareBulkEmbedQueueName
+        ? { queue: BULK_QUEUE_NAMES.prepareBulkEmbedQueueName }
+        : {}),
+    })
+
+    await waitForBulkJobs(payload)
+
+    const metadata = await payload.find({
+      collection: BULK_EMBEDDINGS_INPUT_METADATA_SLUG,
+      where: { run: { exists: true } },
+    })
+    expect(metadata.totalDocs).toBe(0)
+  })
+})
+
diff --git a/dev/specs/bulkEmbed/multipleBatches.spec.ts b/dev/specs/bulkEmbed/multipleBatches.spec.ts
new file mode 100644
index 0000000..edcc8c0
--- /dev/null
+++ b/dev/specs/bulkEmbed/multipleBatches.spec.ts
@@ -0,0 +1,99 @@
+import type { Payload } from 'payload'
+import { beforeAll, describe, expect, test } from 'vitest'
+import { BULK_EMBEDDINGS_RUNS_SLUG } from '../../../src/collections/bulkEmbeddingsRuns.js'
+import { BULK_EMBEDDINGS_BATCHES_SLUG } from '../../../src/collections/bulkEmbeddingsBatches.js'
+import {
+  BULK_QUEUE_NAMES,
+  DEFAULT_DIMS,
+  buildPayloadWithIntegration,
+  createMockBulkEmbeddings,
+  createTestDb,
+  waitForBulkJobs,
+} from '../utils.js'
+import { makeDummyEmbedQuery, testEmbeddingVersion } from 'helpers/embed.js'
+
+const DIMS = DEFAULT_DIMS
+const dbName = `bulk_multibatch_${Date.now()}`
+
+describe('Bulk embed - multiple batches', () => {
+  let payload: Payload
+
+  beforeAll(async () => {
+    await createTestDb({ dbName })
+    const built = await buildPayloadWithIntegration({
+      dbName,
+      pluginOpts: {
+        knowledgePools: {
+          default: {
+            collections: {
+              posts: {
+                toKnowledgePool: async (doc: any) => [{ chunk: doc.title }],
+              },
+            },
+            embeddingConfig: {
+              version: testEmbeddingVersion,
+              queryFn: makeDummyEmbedQuery(DIMS),
+              bulkEmbeddingsFns: createMockBulkEmbeddings({
+                statusSequence: ['succeeded'],
+                flushAfterChunks: 2,
+              }),
+            },
+          },
+        },
+        bulkQueueNames: BULK_QUEUE_NAMES,
+      },
+      secret: 'test-secret',
+      dims: DIMS,
+      key: `multibatch-${Date.now()}`,
+    })
+    payload = built.payload
+  })
+
+  test('multiple batches are created when flushing after N chunks', async () => {
+    // Create 5 posts (should result in 3 batches: 2, 2, 1)
+    for (let i = 0; i < 5; i++) {
+      await payload.create({ collection: 'posts', data: { title: `Post ${i}` } as any })
+    }
+
+    const run = await payload.create({
+      collection: BULK_EMBEDDINGS_RUNS_SLUG,
+      data: { pool: 'default', embeddingVersion: testEmbeddingVersion, status: 'queued' },
+    })
+
+    await payload.jobs.queue<'payloadcms-vectorize:prepare-bulk-embedding'>({
+      task: 'payloadcms-vectorize:prepare-bulk-embedding',
+      input: { runId: String(run.id) },
+      req: { payload } as any,
+      ...(BULK_QUEUE_NAMES.prepareBulkEmbedQueueName
+        ? { queue: BULK_QUEUE_NAMES.prepareBulkEmbedQueueName }
+        : {}),
+    })
+
+    await waitForBulkJobs(payload, 20000)
+
+    const batches = await payload.find({
+      collection: BULK_EMBEDDINGS_BATCHES_SLUG as any,
+      where: { run: { equals: String(run.id) } },
+      sort: 'batchIndex',
+    })
+    expect(batches.totalDocs).toBe(3)
+    expect(batches.docs[0]).toHaveProperty('batchIndex', 0)
+    expect(batches.docs[1]).toHaveProperty('batchIndex', 1)
+    expect(batches.docs[2]).toHaveProperty('batchIndex', 2)
+
+    const embeds = await payload.find({
+      collection: 'default',
+    })
+    expect(embeds.totalDocs).toBe(5)
+
+    const runDoc = (
+      await (payload as any).find({
+        collection: BULK_EMBEDDINGS_RUNS_SLUG,
+        where: { id: { equals: String(run.id) } },
+      })
+    ).docs[0]
+    expect(runDoc.totalBatches).toBe(3)
+    expect(runDoc.status).toBe('succeeded')
+  })
+})
+
diff --git a/dev/specs/bulkEmbed/multipleChunks.spec.ts b/dev/specs/bulkEmbed/multipleChunks.spec.ts
new file mode 100644
index 0000000..7d04eb3
--- /dev/null
+++ b/dev/specs/bulkEmbed/multipleChunks.spec.ts
@@ -0,0 +1,87 @@
+import type { Payload } from 'payload'
+import { beforeAll, describe, expect, test } from 'vitest'
+import { BULK_EMBEDDINGS_RUNS_SLUG } from '../../../src/collections/bulkEmbeddingsRuns.js'
+import {
+  BULK_QUEUE_NAMES,
+  DEFAULT_DIMS,
+  buildPayloadWithIntegration,
+  createMockBulkEmbeddings,
+  createTestDb,
+  waitForBulkJobs,
+} from '../utils.js'
+import { makeDummyEmbedQuery, testEmbeddingVersion } from 'helpers/embed.js'
+
+const DIMS = DEFAULT_DIMS
+const dbName = `bulk_multichunk_${Date.now()}`
+
+describe('Bulk embed - multiple chunks with extension fields', () => {
+  let payload: Payload
+
+  beforeAll(async () => {
+    await createTestDb({ dbName })
+    const built = await buildPayloadWithIntegration({
+      dbName,
+      pluginOpts: {
+        knowledgePools: {
+          default: {
+            collections: {
+              posts: {
+                toKnowledgePool: async () => [
+                  { chunk: 'Chunk 1', category: 'a', priority: 1 },
+                  { chunk: 'Chunk 2', category: 'b', priority: 2 },
+                ],
+              },
+            },
+            extensionFields: [
+              { name: 'category', type: 'text' },
+              { name: 'priority', type: 'number' },
+            ],
+            embeddingConfig: {
+              version: testEmbeddingVersion,
+              queryFn: makeDummyEmbedQuery(DIMS),
+              bulkEmbeddingsFns: createMockBulkEmbeddings({ statusSequence: ['succeeded'] }),
+            },
+          },
+        },
+        bulkQueueNames: BULK_QUEUE_NAMES,
+      },
+      secret: 'test-secret',
+      dims: DIMS,
+      key: `multichunk-${Date.now()}`,
+    })
+    payload = built.payload
+  })
+
+  test('multiple chunks keep their respective extension fields', async () => {
+    const post = await payload.create({
+      collection: 'posts',
+      data: { title: 'Two' } as any,
+    })
+
+    const run = await payload.create({
+      collection: BULK_EMBEDDINGS_RUNS_SLUG,
+      data: { pool: 'default', embeddingVersion: testEmbeddingVersion, status: 'queued' },
+    })
+
+    await payload.jobs.queue<'payloadcms-vectorize:prepare-bulk-embedding'>({
+      task: 'payloadcms-vectorize:prepare-bulk-embedding',
+      input: { runId: String(run.id) },
+      req: { payload } as any,
+      ...(BULK_QUEUE_NAMES.prepareBulkEmbedQueueName
+        ? { queue: BULK_QUEUE_NAMES.prepareBulkEmbedQueueName }
+        : {}),
+    })
+
+    await waitForBulkJobs(payload)
+
+    const embeds = await payload.find({
+      collection: 'default',
+      where: { docId: { equals: String(post.id) } },
+      sort: 'chunkIndex',
+    })
+    expect(embeds.totalDocs).toBe(2)
+    expect(embeds.docs[0]).toMatchObject({ category: 'a', priority: 1, chunkIndex: 0 })
+    expect(embeds.docs[1]).toMatchObject({ category: 'b', priority: 2, chunkIndex: 1 })
+  })
+})
+
diff --git a/dev/specs/bulkEmbed/onError.spec.ts b/dev/specs/bulkEmbed/onError.spec.ts
new file mode 100644
index 0000000..de83039
--- /dev/null
+++ b/dev/specs/bulkEmbed/onError.spec.ts
@@ -0,0 +1,82 @@
+import type { Payload } from 'payload'
+import { beforeAll, describe, expect, test } from 'vitest'
+import { BULK_EMBEDDINGS_RUNS_SLUG } from '../../../src/collections/bulkEmbeddingsRuns.js'
+import {
+  BULK_QUEUE_NAMES,
+  DEFAULT_DIMS,
+  buildPayloadWithIntegration,
+  createMockBulkEmbeddings,
+  createTestDb,
+  waitForBulkJobs,
+} from '../utils.js'
+import { makeDummyEmbedQuery, testEmbeddingVersion } from 'helpers/embed.js'
+
+const DIMS = DEFAULT_DIMS
+const dbName = `bulk_onerror_${Date.now()}`
+
+describe('Bulk embed - onError callback', () => {
+  let payload: Payload
+  let onErrorCalled = false
+  let onErrorArgs: { providerBatchIds: string[]; error: Error } | null = null
+
+  beforeAll(async () => {
+    await createTestDb({ dbName })
+    const built = await buildPayloadWithIntegration({
+      dbName,
+      pluginOpts: {
+        knowledgePools: {
+          default: {
+            collections: {
+              posts: {
+                toKnowledgePool: async (doc: any) => [{ chunk: doc.title }],
+              },
+            },
+            embeddingConfig: {
+              version: testEmbeddingVersion,
+              queryFn: makeDummyEmbedQuery(DIMS),
+              bulkEmbeddingsFns: createMockBulkEmbeddings({
+                statusSequence: ['failed'],
+                onErrorCallback: (args) => {
+                  onErrorCalled = true
+                  onErrorArgs = args
+                },
+              }),
+            },
+          },
+        },
+        bulkQueueNames: BULK_QUEUE_NAMES,
+      },
+      secret: 'test-secret',
+      dims: DIMS,
+      key: `onerror-${Date.now()}`,
+    })
+    payload = built.payload
+  })
+
+  test('onError callback is called when batch fails', async () => {
+    await payload.create({ collection: 'posts', data: { title: 'Error Test' } as any })
+
+    const run = await payload.create({
+      collection: BULK_EMBEDDINGS_RUNS_SLUG,
+      data: { pool: 'default', embeddingVersion: testEmbeddingVersion, status: 'queued' },
+    })
+
+    await payload.jobs.queue<'payloadcms-vectorize:prepare-bulk-embedding'>({
+      task: 'payloadcms-vectorize:prepare-bulk-embedding',
+      input: { runId: String(run.id) },
+      req: { payload } as any,
+      ...(BULK_QUEUE_NAMES.prepareBulkEmbedQueueName
+        ? { queue: BULK_QUEUE_NAMES.prepareBulkEmbedQueueName }
+        : {}),
+    })
+
+    await waitForBulkJobs(payload)
+
+    expect(onErrorCalled).toBe(true)
+    expect(onErrorArgs).not.toBeNull()
+    expect(onErrorArgs!.providerBatchIds.length).toBeGreaterThan(0)
+    expect(onErrorArgs!.error).toBeInstanceOf(Error)
+    expect(onErrorArgs!.error.message).toContain('failed')
+  })
+})
+
diff --git a/dev/specs/bulkEmbed/polling.spec.ts b/dev/specs/bulkEmbed/polling.spec.ts
new file mode 100644
index 0000000..0d6583b
--- /dev/null
+++ b/dev/specs/bulkEmbed/polling.spec.ts
@@ -0,0 +1,82 @@
+import type { Payload } from 'payload'
+import { beforeAll, describe, expect, test, vi } from 'vitest'
+import { BULK_EMBEDDINGS_RUNS_SLUG } from '../../../src/collections/bulkEmbeddingsRuns.js'
+import {
+  BULK_QUEUE_NAMES,
+  DEFAULT_DIMS,
+  buildPayloadWithIntegration,
+  createMockBulkEmbeddings,
+  createTestDb,
+  waitForBulkJobs,
+} from '../utils.js'
+import { makeDummyEmbedQuery, testEmbeddingVersion } from 'helpers/embed.js'
+
+const DIMS = DEFAULT_DIMS
+const dbName = `bulk_polling_${Date.now()}`
+
+describe('Bulk embed - polling requeue', () => {
+  let payload: Payload
+
+  beforeAll(async () => {
+    await createTestDb({ dbName })
+    const built = await buildPayloadWithIntegration({
+      dbName,
+      pluginOpts: {
+        knowledgePools: {
+          default: {
+            collections: {
+              posts: {
+                toKnowledgePool: async (doc: any) => [{ chunk: doc.title }],
+              },
+            },
+            embeddingConfig: {
+              version: testEmbeddingVersion,
+              queryFn: makeDummyEmbedQuery(DIMS),
+              bulkEmbeddingsFns: createMockBulkEmbeddings({
+                statusSequence: ['running', 'succeeded'],
+              }),
+            },
+          },
+        },
+        bulkQueueNames: BULK_QUEUE_NAMES,
+      },
+      secret: 'test-secret',
+      dims: DIMS,
+      key: `polling-${Date.now()}`,
+    })
+    payload = built.payload
+  })
+
+  test('polling requeues when non-terminal then succeeds', async () => {
+    const post = await payload.create({ collection: 'posts', data: { title: 'Loop' } as any })
+
+    const run = await payload.create({
+      collection: BULK_EMBEDDINGS_RUNS_SLUG,
+      data: { pool: 'default', embeddingVersion: testEmbeddingVersion, status: 'queued' },
+    })
+
+    const queueSpy = vi.spyOn(payload.jobs, 'queue')
+
+    await payload.jobs.queue<'payloadcms-vectorize:prepare-bulk-embedding'>({
+      task: 'payloadcms-vectorize:prepare-bulk-embedding',
+      input: { runId: String(run.id) },
+      req: { payload } as any,
+      ...(BULK_QUEUE_NAMES.prepareBulkEmbedQueueName
+        ? { queue: BULK_QUEUE_NAMES.prepareBulkEmbedQueueName }
+        : {}),
+    })
+
+    await waitForBulkJobs(payload, 15000)
+
+    expect(queueSpy).toHaveBeenCalledWith(
+      expect.objectContaining({ task: 'payloadcms-vectorize:poll-or-complete-bulk-embedding' }),
+    )
+
+    const embeds = await payload.find({
+      collection: 'default',
+      where: { docId: { equals: String(post.id) } },
+    })
+    expect(embeds.totalDocs).toBe(1)
+  })
+})
+
diff --git a/dev/specs/bulkEmbed/realtimeMode.spec.ts b/dev/specs/bulkEmbed/realtimeMode.spec.ts
new file mode 100644
index 0000000..3293142
--- /dev/null
+++ b/dev/specs/bulkEmbed/realtimeMode.spec.ts
@@ -0,0 +1,76 @@
+import type { Payload } from 'payload'
+import { beforeAll, describe, expect, test, vi } from 'vitest'
+import { createVectorizeTask } from '../../../src/tasks/vectorize.js'
+import {
+  BULK_QUEUE_NAMES,
+  DEFAULT_DIMS,
+  buildPayloadWithIntegration,
+  createMockBulkEmbeddings,
+  createTestDb,
+} from '../utils.js'
+import { makeDummyEmbedDocs, makeDummyEmbedQuery, testEmbeddingVersion } from 'helpers/embed.js'
+
+const DIMS = DEFAULT_DIMS
+const dbName = `bulk_realtime_${Date.now()}`
+
+describe('Bulk embed - realtime mode', () => {
+  let payload: Payload
+  let realtimeOptions: any
+
+  beforeAll(async () => {
+    realtimeOptions = {
+      knowledgePools: {
+        default: {
+          collections: {
+            posts: {
+              toKnowledgePool: async (doc: any) => [{ chunk: doc.title }],
+            },
+          },
+          embeddingConfig: {
+            version: testEmbeddingVersion,
+            queryFn: makeDummyEmbedQuery(DIMS),
+            realTimeIngestionFn: makeDummyEmbedDocs(DIMS),
+            bulkEmbeddingsFns: createMockBulkEmbeddings({ statusSequence: ['succeeded'] }),
+          },
+        },
+      },
+      bulkQueueNames: BULK_QUEUE_NAMES,
+    }
+
+    await createTestDb({ dbName })
+    const built = await buildPayloadWithIntegration({
+      dbName,
+      pluginOpts: realtimeOptions,
+      secret: 'test-secret',
+      dims: DIMS,
+      key: `realtime-${Date.now()}`,
+    })
+    payload = built.payload
+  })
+
+  test('realtime mode queues vectorize jobs when realTimeIngestionFn is provided', async () => {
+    const post = await payload.create({
+      collection: 'posts',
+      data: { title: 'Realtime Test' } as any,
+    })
+
+    const vectorizeTask = createVectorizeTask({
+      knowledgePools: realtimeOptions.knowledgePools,
+    })
+    const vectorizeHandler = vectorizeTask.handler as any
+
+    await vectorizeHandler({
+      input: { doc: post, collection: 'posts', knowledgePool: 'default' } as any,
+      req: { payload } as any,
+      inlineTask: vi.fn(),
+      tasks: {} as any,
+      job: {} as any,
+    })
+
+    const embeds = await payload.find({
+      collection: 'default',
+      where: { docId: { equals: String(post.id) } },
+    })
+    expect(embeds.totalDocs).toBeGreaterThan(0)
+  })
+})
diff --git a/dev/specs/bulkEmbed/versionBump.spec.ts b/dev/specs/bulkEmbed/versionBump.spec.ts
new file mode 100644
index 0000000..64ac38e
--- /dev/null
+++ b/dev/specs/bulkEmbed/versionBump.spec.ts
@@ -0,0 +1,99 @@
+import type { Payload } from 'payload'
+import { beforeAll, describe, expect, test } from 'vitest'
+import { BULK_EMBEDDINGS_RUNS_SLUG } from '../../../src/collections/bulkEmbeddingsRuns.js'
+import {
+  BULK_QUEUE_NAMES,
+  DEFAULT_DIMS,
+  buildPayloadWithIntegration,
+  createMockBulkEmbeddings,
+  createTestDb,
+  waitForBulkJobs,
+} from '../utils.js'
+import { makeDummyEmbedQuery } from 'helpers/embed.js'
+
+const DIMS = DEFAULT_DIMS
+const dbName = `bulk_version_${Date.now()}`
+
+describe('Bulk embed - version bump', () => {
+  let payload: Payload
+
+  beforeAll(async () => {
+    await createTestDb({ dbName })
+    const built = await buildPayloadWithIntegration({
+      dbName,
+      pluginOpts: {
+        knowledgePools: {
+          default: {
+            collections: {
+              posts: {
+                toKnowledgePool: async (doc: any) => [{ chunk: doc.title }],
+              },
+            },
+            embeddingConfig: {
+              version: 'new-version',
+              queryFn: makeDummyEmbedQuery(DIMS),
+              bulkEmbeddingsFns: createMockBulkEmbeddings({ statusSequence: ['succeeded'] }),
+            },
+          },
+        },
+        bulkQueueNames: BULK_QUEUE_NAMES,
+      },
+      secret: 'test-secret',
+      dims: DIMS,
+      key: `version-${Date.now()}`,
+    })
+    payload = built.payload
+  })
+
+  test('version bump re-embeds all even without updates', async () => {
+    const post = await payload.create({ collection: 'posts', data: { title: 'Old' } as any })
+
+    // Create an embedding with old version manually
+    await payload.create({
+      collection: 'default',
+      data: {
+        docId: String(post.id),
+        sourceCollection: 'posts',
+        text: 'Old',
+        chunkIndex: 0,
+        embedding: Array(DIMS).fill(0.1),
+        embeddingVersion: 'old-version',
+        updatedAt: new Date().toISOString(),
+      } as any,
+    })
+
+    // Run bulk embed with new version
+    const run = await payload.create({
+      collection: BULK_EMBEDDINGS_RUNS_SLUG,
+      data: { pool: 'default', embeddingVersion: 'new-version', status: 'queued' },
+    })
+
+    await payload.jobs.queue<'payloadcms-vectorize:prepare-bulk-embedding'>({
+      task: 'payloadcms-vectorize:prepare-bulk-embedding',
+      input: { runId: String(run.id) },
+      req: { payload } as any,
+      ...(BULK_QUEUE_NAMES.prepareBulkEmbedQueueName
+        ? { queue: BULK_QUEUE_NAMES.prepareBulkEmbedQueueName }
+        : {}),
+    })
+
+    await waitForBulkJobs(payload)
+
+    // Should have 1 embedding with new version (old one replaced)
+    const embeds = await payload.find({
+      collection: 'default',
+      where: { docId: { equals: String(post.id) } },
+    })
+    expect(embeds.totalDocs).toBe(1)
+    expect(embeds.docs[0].embeddingVersion).toBe('new-version')
+
+    const runDoc = (
+      await (payload as any).find({
+        collection: BULK_EMBEDDINGS_RUNS_SLUG,
+        where: { id: { equals: String(run.id) } },
+      })
+    ).docs[0]
+    expect(runDoc.inputs).toBe(1)
+  })
+})
+
diff --git a/dev/specs/utils.ts b/dev/specs/utils.ts
index 18a25b5..4c46570 100644
--- a/dev/specs/utils.ts
+++ b/dev/specs/utils.ts
@@ -104,27 +104,14 @@ export function createMockBulkEmbeddings(
 
   return {
     addChunk: async ({ chunk, isLastChunk }) => {
-      // Check if we should flush before adding this chunk
-      if (
-        flushAfterChunks &&
-        accumulatedChunks.length >= flushAfterChunks &&
-        accumulatedChunks.length > 0
-      ) {
-        // Submit what we have (without this chunk)
-        const toSubmit = [...accumulatedChunks]
-        accumulatedChunks = [chunk]
-        const providerBatchId = `mock-batch-${batchIndex}-${Date.now()}`
-        batchInputs.set(providerBatchId, toSubmit)
-        batchPollCount.set(providerBatchId, 0)
-        batchIndex++
-        return { providerBatchId }
-      }
-
-      // Add chunk to accumulator
+      // Add current chunk to accumulator
       accumulatedChunks.push(chunk)
 
-      // If this is the last chunk, flush everything
-      if (isLastChunk && accumulatedChunks.length > 0) {
+      // Determine if we should flush
+      const shouldFlushDueToSize = flushAfterChunks && accumulatedChunks.length >= flushAfterChunks
+      const shouldFlush = shouldFlushDueToSize || isLastChunk
+
+      if (shouldFlush && accumulatedChunks.length > 0) {
         const toSubmit = [...accumulatedChunks]
         accumulatedChunks = []
         const providerBatchId = `mock-batch-${batchIndex}-${Date.now()}`
diff --git a/src/index.ts b/src/index.ts
index 69a32bc..2c48aec 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -67,6 +67,7 @@ async function ensurePgvectorArtifacts(args: {
       for (const sql of sqls) {
         await postgresPayload.db.drizzle.execute(sql)
       }
+
     }
     postgresPayload.logger.info('[payloadcms-vectorize] pgvector extension/columns/index ensured')
   } catch (err) {
@@ -364,3 +365,4 @@ export const createVectorizeIntegration = <TPoolNames extends KnowledgePoolName>
     payloadcmsVectorize,
   }
 }
+
diff --git a/src/tasks/bulkEmbedAll.ts b/src/tasks/bulkEmbedAll.ts
index b858f26..cea93db 100644
--- a/src/tasks/bulkEmbedAll.ts
+++ b/src/tasks/bulkEmbedAll.ts
@@ -222,10 +222,10 @@ export const createPollOrCompleteBulkEmbeddingTask = ({
 
       if (batches.length === 0) {
         // No batches found - this shouldn't happen but handle gracefully
-      await payload.update({
-        id: input.runId,
-        collection: BULK_EMBEDDINGS_RUNS_SLUG,
-        data: {
+        await payload.update({
+          id: input.runId,
+          collection: BULK_EMBEDDINGS_RUNS_SLUG,
+          data: {
             status: 'failed',
             error: 'No batches found for run',
             completedAt: new Date().toISOString(),
@@ -262,12 +262,12 @@ export const createPollOrCompleteBulkEmbeddingTask = ({
             status: pollResult.status,
             succeededCount: pollResult.counts?.succeeded,
             failedCount: pollResult.counts?.failed,
-          error: pollResult.error,
+            error: pollResult.error,
             ...(TERMINAL_STATUSES.has(pollResult.status)
               ? { completedAt: new Date().toISOString() }
               : {}),
-        },
-      })
+          },
+        })
 
         if (pollResult.status === 'failed' || pollResult.status === 'canceled') {
           anyFailed = true
@@ -339,10 +339,10 @@ export const createPollOrCompleteBulkEmbeddingTask = ({
         })
 
         // Cleanup metadata
-      await payload.delete({
-        collection: BULK_EMBEDDINGS_INPUT_METADATA_SLUG,
-        where: { run: { equals: (run as any).id } },
-      })
+        await payload.delete({
+          collection: BULK_EMBEDDINGS_INPUT_METADATA_SLUG,
+          where: { run: { equals: (run as any).id } },
+        })
 
         // If completion failed, call onError so user can clean up provider resources
         if (!completionResult.success && callbacks.onError) {
@@ -353,11 +353,11 @@ export const createPollOrCompleteBulkEmbeddingTask = ({
           })
         }
 
-      return {
-        output: {
-          runId: input.runId,
+        return {
+          output: {
+            runId: input.runId,
             status: completionResult.success ? 'succeeded' : 'failed',
-        },
+          },
         }
       }
 
@@ -722,7 +722,7 @@ async function loadInputMetadataByRun(args: { payload: Payload; runId: string })
 
   // Convert runId to number for postgres relationship queries
   const runIdNum = parseInt(runId, 10)
-  
+
   let page = 1
   const limit = 100
   while (true) {
diff --git a/vitest.config.js b/vitest.config.js
index 5083c5c..08ff6e8 100644
--- a/vitest.config.js
+++ b/vitest.config.js
@@ -23,7 +23,7 @@ export default defineConfig(() => {
       exclude: ['**/e2e.spec.{ts,js}', '**/node_modules/**'],
       // Run test files sequentially to avoid global state interference
       // (embeddingsTables map and Payload instance caching)
-      fileParallelism: false,
+      // fileParallelism: false,
     },
   }
 })

From c97f6ae537cb5f2f19a180a791c183ebc2c9c9ee Mon Sep 17 00:00:00 2001
From: techiejd <62455039+techiejd@users.noreply.github.com>
Date: Tue, 6 Jan 2026 17:59:03 +0700
Subject: [PATCH 15/49] WIP

---
 dev/specs/bulkEmbed/basic.spec.ts           | 1 +
 dev/specs/bulkEmbed/canceledBatch.spec.ts   | 1 +
 dev/specs/bulkEmbed/extensionFields.spec.ts | 1 +
 dev/specs/bulkEmbed/failedBatch.spec.ts     | 1 +
 dev/specs/bulkEmbed/multipleBatches.spec.ts | 1 +
 dev/specs/bulkEmbed/multipleChunks.spec.ts  | 1 +
 dev/specs/bulkEmbed/onError.spec.ts         | 1 +
 dev/specs/bulkEmbed/polling.spec.ts         | 1 +
 dev/specs/bulkEmbed/realtimeMode.spec.ts    | 1 +
 dev/specs/bulkEmbed/versionBump.spec.ts     | 1 +
 10 files changed, 10 insertions(+)

diff --git a/dev/specs/bulkEmbed/basic.spec.ts b/dev/specs/bulkEmbed/basic.spec.ts
index 25ec4a6..9825387 100644
--- a/dev/specs/bulkEmbed/basic.spec.ts
+++ b/dev/specs/bulkEmbed/basic.spec.ts
@@ -216,3 +216,4 @@ describe('Bulk embed - basic tests', () => {
   })
 })
 
+
diff --git a/dev/specs/bulkEmbed/canceledBatch.spec.ts b/dev/specs/bulkEmbed/canceledBatch.spec.ts
index 756c905..60d2170 100644
--- a/dev/specs/bulkEmbed/canceledBatch.spec.ts
+++ b/dev/specs/bulkEmbed/canceledBatch.spec.ts
@@ -72,3 +72,4 @@ describe('Bulk embed - canceled batch', () => {
   })
 })
 
+
diff --git a/dev/specs/bulkEmbed/extensionFields.spec.ts b/dev/specs/bulkEmbed/extensionFields.spec.ts
index e0fd9a8..cc8e92c 100644
--- a/dev/specs/bulkEmbed/extensionFields.spec.ts
+++ b/dev/specs/bulkEmbed/extensionFields.spec.ts
@@ -83,3 +83,4 @@ describe('Bulk embed - extension fields', () => {
   })
 })
 
+
diff --git a/dev/specs/bulkEmbed/failedBatch.spec.ts b/dev/specs/bulkEmbed/failedBatch.spec.ts
index e10d521..54c1877 100644
--- a/dev/specs/bulkEmbed/failedBatch.spec.ts
+++ b/dev/specs/bulkEmbed/failedBatch.spec.ts
@@ -107,3 +107,4 @@ describe('Bulk embed - failed batch', () => {
   })
 })
 
+
diff --git a/dev/specs/bulkEmbed/multipleBatches.spec.ts b/dev/specs/bulkEmbed/multipleBatches.spec.ts
index edcc8c0..7820f7a 100644
--- a/dev/specs/bulkEmbed/multipleBatches.spec.ts
+++ b/dev/specs/bulkEmbed/multipleBatches.spec.ts
@@ -97,3 +97,4 @@ describe('Bulk embed - multiple batches', () => {
   })
 })
 
+
diff --git a/dev/specs/bulkEmbed/multipleChunks.spec.ts b/dev/specs/bulkEmbed/multipleChunks.spec.ts
index 7d04eb3..b621b15 100644
--- a/dev/specs/bulkEmbed/multipleChunks.spec.ts
+++ b/dev/specs/bulkEmbed/multipleChunks.spec.ts
@@ -85,3 +85,4 @@ describe('Bulk embed - multiple chunks with extension fields', () => {
   })
 })
 
+
diff --git a/dev/specs/bulkEmbed/onError.spec.ts b/dev/specs/bulkEmbed/onError.spec.ts
index de83039..ffc3087 100644
--- a/dev/specs/bulkEmbed/onError.spec.ts
+++ b/dev/specs/bulkEmbed/onError.spec.ts
@@ -80,3 +80,4 @@ describe('Bulk embed - onError callback', () => {
   })
 })
 
+
diff --git a/dev/specs/bulkEmbed/polling.spec.ts b/dev/specs/bulkEmbed/polling.spec.ts
index 0d6583b..ba47e34 100644
--- a/dev/specs/bulkEmbed/polling.spec.ts
+++ b/dev/specs/bulkEmbed/polling.spec.ts
@@ -80,3 +80,4 @@ describe('Bulk embed - polling requeue', () => {
   })
 })
 
+
diff --git a/dev/specs/bulkEmbed/realtimeMode.spec.ts b/dev/specs/bulkEmbed/realtimeMode.spec.ts
index 3293142..82eb79d 100644
--- a/dev/specs/bulkEmbed/realtimeMode.spec.ts
+++ b/dev/specs/bulkEmbed/realtimeMode.spec.ts
@@ -74,3 +74,4 @@ describe('Bulk embed - realtime mode', () => {
     expect(embeds.totalDocs).toBeGreaterThan(0)
   })
 })
+
diff --git a/dev/specs/bulkEmbed/versionBump.spec.ts b/dev/specs/bulkEmbed/versionBump.spec.ts
index 64ac38e..2facc85 100644
--- a/dev/specs/bulkEmbed/versionBump.spec.ts
+++ b/dev/specs/bulkEmbed/versionBump.spec.ts
@@ -97,3 +97,4 @@ describe('Bulk embed - version bump', () => {
   })
 })
 
+

From 58317dfb3b0484b1e1cf641534935dfd81a3188a Mon Sep 17 00:00:00 2001
From: techiejd <62455039+techiejd@users.noreply.github.com>
Date: Tue, 6 Jan 2026 18:53:57 +0700
Subject: [PATCH 16/49] WIP

---
 vitest.config.js | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vitest.config.js b/vitest.config.js
index 08ff6e8..5083c5c 100644
--- a/vitest.config.js
+++ b/vitest.config.js
@@ -23,7 +23,7 @@ export default defineConfig(() => {
       exclude: ['**/e2e.spec.{ts,js}', '**/node_modules/**'],
       // Run test files sequentially to avoid global state interference
       // (embeddingsTables map and Payload instance caching)
-      // fileParallelism: false,
+      fileParallelism: false,
     },
   }
 })

From 165b0f38f83027b7f269257cb00f55f2c76e7a63 Mon Sep 17 00:00:00 2001
From: techiejd <62455039+techiejd@users.noreply.github.com>
Date: Tue, 6 Jan 2026 21:08:55 +0700
Subject: [PATCH 17/49] Ends parralellism to see if that's what's failing in
 github CI

---
 vitest.config.js | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/vitest.config.js b/vitest.config.js
index 5083c5c..37c533b 100644
--- a/vitest.config.js
+++ b/vitest.config.js
@@ -24,6 +24,9 @@ export default defineConfig(() => {
       // Run test files sequentially to avoid global state interference
       // (embeddingsTables map and Payload instance caching)
       fileParallelism: false,
+      // Disable parallel test execution within files as well
+      threads: false,
+      maxConcurrency: 1,
     },
   }
 })

From c31b006bfa2b5d705318bf1701a099fd9b64799a Mon Sep 17 00:00:00 2001
From: techiejd <62455039+techiejd@users.noreply.github.com>
Date: Wed, 7 Jan 2026 15:17:16 +0700
Subject: [PATCH 18/49] WIP

---
 dev/payload.config.ts                   |  13 +-
 dev/specs/e2e.spec.ts                   | 152 ++++++++++++++++++++----
 playwright.config.js                    |   2 +-
 src/admin/components/EmbedAllButton.tsx |  26 +++-
 src/index.ts                            |  26 ++--
 vitest.config.js                        |   4 +-
 6 files changed, 178 insertions(+), 45 deletions(-)

diff --git a/dev/payload.config.ts b/dev/payload.config.ts
index 6b3eaa6..56d43f1 100644
--- a/dev/payload.config.ts
+++ b/dev/payload.config.ts
@@ -17,6 +17,7 @@ import { fileURLToPath } from 'url'
 import { testEmailAdapter } from './helpers/testEmailAdapter.js'
 import { seed } from './seed.js'
 import { chunkRichText, chunkText } from './helpers/chunkers.js'
+import { createMockBulkEmbeddings } from './specs/utils.js'
 
 const filename = fileURLToPath(import.meta.url)
 const dirname = path.dirname(filename)
@@ -35,6 +36,12 @@ const ivfflatLists = Number(process.env.IVFFLATLISTS)
 const embedDocs = process.env.USE_VOYAGE !== undefined ? voyageEmbedDocs : makeDummyEmbedDocs(dims)
 const embedQuery =
   process.env.USE_VOYAGE !== undefined ? voyageEmbedQuery : makeDummyEmbedQuery(dims)
+const bulkEmbeddingsFns =
+  process.env.USE_VOYAGE !== undefined
+    ? makeVoyageBulkEmbeddingsConfig()
+    : createMockBulkEmbeddings({
+        statusSequence: ['queued', 'running', 'running', 'succeeded'],
+      })
 const ssl =
   process.env.DATABASE_URI !== undefined
     ? {
@@ -50,7 +57,7 @@ const { afterSchemaInitHook, payloadcmsVectorize } = createVectorizeIntegration(
   },
   bulkDefault: {
     dims,
-    ivfflatLists, // Another rule of thumb: ivfflatLists = total_number_of_vectors / 1000. Helps with working memory usage.
+    ivfflatLists,
   },
 })
 
@@ -138,7 +145,7 @@ const buildConfigWithPostgres = async () => {
               version: testEmbeddingVersion,
               queryFn: embedQuery,
               realTimeIngestionFn: embedDocs,
-              bulkEmbeddingsFns: makeVoyageBulkEmbeddingsConfig(),
+              bulkEmbeddingsFns,
             },
           },
           bulkDefault: {
@@ -163,7 +170,7 @@ const buildConfigWithPostgres = async () => {
             embeddingConfig: {
               version: testEmbeddingVersion,
               queryFn: embedQuery,
-              bulkEmbeddingsFns: makeVoyageBulkEmbeddingsConfig(),
+              bulkEmbeddingsFns,
             },
           },
         },
diff --git a/dev/specs/e2e.spec.ts b/dev/specs/e2e.spec.ts
index d60b29e..daef019 100644
--- a/dev/specs/e2e.spec.ts
+++ b/dev/specs/e2e.spec.ts
@@ -1,30 +1,12 @@
 import { expect, test } from '@playwright/test'
+import type { Payload, SanitizedConfig } from 'payload'
 import config from '@payload-config'
 import { getPayload } from 'payload'
 import { getInitialMarkdownContent } from './constants.js'
 import { waitForVectorizationJobs } from './utils.js'
 import { testEmbeddingVersion } from 'helpers/embed.js'
 
-test('querying the endpoint should return the title when queried', async ({ request }) => {
-  const title = 'test query post title'
-  const _config = await config
-  const payload = await getPayload({ config: _config })
-  const post = await payload.create({
-    collection: 'posts',
-    data: {
-      title,
-      content: (await getInitialMarkdownContent(_config)) as unknown as any,
-    },
-  })
-
-  await waitForVectorizationJobs(payload)
-
-  const response = await request.post('/api/vector-search', {
-    data: {
-      query: title,
-      knowledgePool: 'default',
-    },
-  })
+const expectVectorSearchResponse = async (response: any, post: any, title: string) => {
   expect(response.ok()).toBe(true)
   const json = await response.json()
   expect(json).toHaveProperty('results')
@@ -40,4 +22,134 @@ test('querying the endpoint should return the title when queried', async ({ requ
       }),
     ]),
   )
+}
+
+const expectEmptyVectorSearchResponse = async (response: any) => {
+  expect(response.ok()).toBe(true)
+  const json = await response.json()
+  expect(json).toHaveProperty('results')
+  expect(json.results.length).toBe(0)
+}
+
+test.describe('Vector embedding e2e tests', () => {
+  const title = 'e2e test post title'
+  let payload: Payload
+  let _config: SanitizedConfig
+  let post: any
+
+  test.beforeAll(async () => {
+    // Setup: Create a post and wait for realtime embedding
+    _config = await config
+    payload = await getPayload({ config: _config, key: `e2e-test-${Date.now()}` })
+  })
+
+  test('querying the endpoint should return the title with testEmbeddingVersion', async ({
+    request,
+  }) => {
+    const emptyResponse = await request.post('/api/vector-search', {
+      data: {
+        query: title,
+        knowledgePool: 'default',
+      },
+    })
+    await expectEmptyVectorSearchResponse(emptyResponse)
+
+    post = await payload.create({
+      collection: 'posts',
+      data: {
+        title,
+        content: (await getInitialMarkdownContent(_config)) as unknown as any,
+      },
+    })
+
+    await waitForVectorizationJobs(payload)
+
+    const response = await request.post('/api/vector-search', {
+      data: {
+        query: title,
+        knowledgePool: 'default',
+      },
+    })
+    await expectVectorSearchResponse(response, post, title)
+  })
+
+  test('clicking Embed All button triggers bulk embedding for bulkDefault pool', async ({
+    page,
+    request,
+  }) => {
+    test.setTimeout(120000)
+
+    // Verify bulkDefault pool is EMPTY (no realTimeIngestionFn configured)
+    const emptyResponse = await request.post('/api/vector-search', {
+      data: {
+        query: title,
+        knowledgePool: 'bulkDefault',
+      },
+    })
+    await expectEmptyVectorSearchResponse(emptyResponse)
+
+    // Navigate to the bulkDefault embeddings collection page in admin
+    await page.goto('/admin/collections/bulkDefault')
+
+    // Wait for the page to load and find the Embed All button
+    const embedAllButton = page.locator('button:has-text("Embed all")')
+    await expect(embedAllButton).toBeVisible({ timeout: 10000 })
+
+    // Click the Embed All button
+    await embedAllButton.click()
+
+    // Wait for success message with run link
+    const successMessage = page.locator('text=/Queued bulk embed run/')
+    await expect(successMessage).toBeVisible({ timeout: 30000 })
+
+    // Click on the run link to navigate to the run page
+    const runLink = page.locator('[data-testid="bulk-run-link"]')
+    await expect(runLink).toBeVisible({ timeout: 5000 })
+    await runLink.click()
+
+    // We're now on the run detail page - verify we can see the status field
+    // The status should progress through: queued -> running -> polling -> completed
+    // Wait for the page to load
+    await page.waitForURL(/\/admin\/collections\/vector-bulk-embeddings-runs\/\d+/)
+
+    // Check initial status - should be queued or running
+    const statusField = page.locator('[id="field-status"]')
+    await expect(statusField).toBeVisible({ timeout: 10000 })
+
+    // Wait for status to become 'completed' by polling the page
+    // The mock statusSequence is ['queued', 'running', 'running', 'succeeded']
+    // which means 4 polls before completion
+    let attempts = 0
+    const maxAttempts = 30 // 30 * 3s = 90s max
+    let finalStatus = ''
+
+    while (attempts < maxAttempts) {
+      // Refresh the page to see updated status
+      await page.reload()
+      await page.waitForLoadState('domcontentloaded')
+
+      // Get the status value - it's in a select or text field
+      const statusValue = await statusField.inputValue().catch(() => null)
+      if (statusValue) {
+        finalStatus = statusValue
+        if (statusValue === 'completed') {
+          break
+        }
+      }
+
+      attempts++
+      await page.waitForTimeout(3000)
+    }
+
+    expect(finalStatus).toBe('completed')
+
+    // Now verify vector-search returns results for bulkDefault pool
+    const filledResponse = await request.post('/api/vector-search', {
+      data: {
+        query: title,
+        knowledgePool: 'bulkDefault',
+      },
+    })
+    await expectVectorSearchResponse(filledResponse, post, title)
+  })
 })
diff --git a/playwright.config.js b/playwright.config.js
index ae593e4..1404bda 100644
--- a/playwright.config.js
+++ b/playwright.config.js
@@ -48,7 +48,7 @@ export default defineConfig({
     command:
       'cross-env DOTENV_CONFIG_PATH=dev/.env.test NODE_OPTIONS=--require=dotenv/config next dev dev --turbo',
     reuseExistingServer: true,
-	timeout:180_000,
+    timeout: 180_000,
     url: 'http://localhost:3000/admin',
   },
 })
diff --git a/src/admin/components/EmbedAllButton.tsx b/src/admin/components/EmbedAllButton.tsx
index 6ca02a7..64bd604 100644
--- a/src/admin/components/EmbedAllButton.tsx
+++ b/src/admin/components/EmbedAllButton.tsx
@@ -19,7 +19,7 @@ export const EmbedAllButton: React.FC<EmbedAllButtonProps> = ({
   hasBulkEmbeddings,
 }) => {
   const [isSubmitting, setIsSubmitting] = useState(false)
-  const [message, setMessage] = useState<string | null>(null)
+  const [message, setMessage] = useState<{ text: string; runId?: string } | null>(null)
 
   const handleClick = async () => {
     setIsSubmitting(true)
@@ -34,12 +34,12 @@ export const EmbedAllButton: React.FC<EmbedAllButtonProps> = ({
       })
       const data = await res.json()
       if (!res.ok) {
-        setMessage(data?.error || 'Failed to queue bulk embed run')
+        setMessage({ text: data?.error || 'Failed to queue bulk embed run' })
         return
       }
-      setMessage(`Queued bulk embed run ${data.runId}`)
+      setMessage({ text: 'Queued bulk embed run', runId: data.runId })
     } catch (error: any) {
-      setMessage(error?.message || 'Failed to queue bulk embed run')
+      setMessage({ text: error?.message || 'Failed to queue bulk embed run' })
     } finally {
       setIsSubmitting(false)
     }
@@ -71,7 +71,23 @@ export const EmbedAllButton: React.FC<EmbedAllButtonProps> = ({
       >
         {isSubmitting ? 'Submitting…' : 'Embed all'}
       </button>
-      {message ? <span style={{ fontSize: '0.9rem' }}>{message}</span> : null}
+      {message ? (
+        <span style={{ fontSize: '0.9rem' }}>
+          {message.text}
+          {message.runId ? (
+            <>
+              {' '}
+              <a
+                href={`/admin/collections/vector-bulk-embeddings-runs/${message.runId}`}
+                style={{ textDecoration: 'underline' }}
+                data-testid="bulk-run-link"
+              >
+                #{message.runId}
+              </a>
+            </>
+          ) : null}
+        </span>
+      ) : null}
     </div>
   )
 }
diff --git a/src/index.ts b/src/index.ts
index 2c48aec..b47adc3 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -67,7 +67,6 @@ async function ensurePgvectorArtifacts(args: {
       for (const sql of sqls) {
         await postgresPayload.db.drizzle.execute(sql)
       }
-
     }
     postgresPayload.logger.info('[payloadcms-vectorize] pgvector extension/columns/index ensured')
   } catch (err) {
@@ -272,18 +271,18 @@ export const createVectorizeIntegration = <TPoolNames extends KnowledgePoolName>
                 // Only queue real-time vectorization if realTimeIngestionFn is provided
                 // Bulk embedding is only triggered manually via API (/vector-bulk-embed) or admin UI
                 if (realTimeIngestionFn) {
-                await payload.jobs.queue<'payloadcms-vectorize:vectorize'>({
-                  task: 'payloadcms-vectorize:vectorize',
-                  input: {
-                    doc,
-                    collection: collectionSlug,
-                    knowledgePool: pool,
-                  },
-                  req: req,
-                  ...(pluginOptions.realtimeQueueName
-                    ? { queue: pluginOptions.realtimeQueueName }
-                    : {}),
-                })
+                  await payload.jobs.queue<'payloadcms-vectorize:vectorize'>({
+                    task: 'payloadcms-vectorize:vectorize',
+                    input: {
+                      doc,
+                      collection: collectionSlug,
+                      knowledgePool: pool,
+                    },
+                    req: req,
+                    ...(pluginOptions.realtimeQueueName
+                      ? { queue: pluginOptions.realtimeQueueName }
+                      : {}),
+                  })
                 }
                 // If no realTimeIngestionFn, nothing happens on doc change
                 // User must trigger bulk embedding manually
@@ -365,4 +364,3 @@ export const createVectorizeIntegration = <TPoolNames extends KnowledgePoolName>
     payloadcmsVectorize,
   }
 }
-
diff --git a/vitest.config.js b/vitest.config.js
index 37c533b..9d7b479 100644
--- a/vitest.config.js
+++ b/vitest.config.js
@@ -25,8 +25,8 @@ export default defineConfig(() => {
       // (embeddingsTables map and Payload instance caching)
       fileParallelism: false,
       // Disable parallel test execution within files as well
-      threads: false,
-      maxConcurrency: 1,
+      //threads: false,
+      //maxConcurrency: 1,
     },
   }
 })

From ea041981b06f214d3cff2819e0d9de394185e986 Mon Sep 17 00:00:00 2001
From: techiejd <62455039+techiejd@users.noreply.github.com>
Date: Thu, 8 Jan 2026 15:05:09 +0700
Subject: [PATCH 19/49] WIP

---
 README.md                                     |  16 +
 dev/app/(payload)/admin/importMap.js          |   6 +-
 dev/payload.config.ts                         |   1 -
 dev/specs/e2e.spec.ts                         | 135 +++++-
 dev/specs/utils.ts                            |   2 +-
 playwright.config.js                          |   4 +-
 src/admin/components/EmbedAllButton.tsx       |  95 ----
 .../components/EmbedAllButton/client.tsx      | 419 ++++++++++++++++++
 src/admin/components/EmbedAllButton/index.tsx |  57 +++
 src/collections/embeddings.ts                 |  28 +-
 src/exports/client.ts                         |   2 +-
 src/index.ts                                  |  23 +-
 src/tasks/bulkEmbedAll.ts                     |   5 +-
 src/tasks/vectorize.ts                        |   4 +-
 src/types.ts                                  |  18 +
 15 files changed, 686 insertions(+), 129 deletions(-)
 delete mode 100644 src/admin/components/EmbedAllButton.tsx
 create mode 100644 src/admin/components/EmbedAllButton/client.tsx
 create mode 100644 src/admin/components/EmbedAllButton/index.tsx

diff --git a/README.md b/README.md
index a39b3e2..c603274 100644
--- a/README.md
+++ b/README.md
@@ -148,6 +148,21 @@ export default buildConfig({
 
 **Important:** `knowledgePools` must have **different names than your collections**—reusing a collection name for a knowledge pool **will cause schema conflicts**. (In this example, the knowledge pool is named 'main' and a collection named 'main' will be created.)
 
+### 1.5. Generate Import Map (Required for Admin UI)
+
+After configuring the plugin, you must generate the import map so that Payload can resolve client components (like the "Embed all" button) in the admin UI for bulk embeddings:
+
+```bash
+pnpm run generate:importmap
+```
+
+**⚠️ Important:** Run this command:
+
+- After initial plugin setup
+- If the "Embed all" button doesn't appear in the admin UI
+
+The import map tells Payload how to resolve component paths (like `'payloadcms-vectorize/client#EmbedAllButton'`) to actual React components. Without it, client components referenced in your collection configs won't render.
+
 ### 2. Search Your Content
 
 The plugin automatically creates a `/api/vector-search` endpoint:
@@ -474,6 +489,7 @@ Search for similar content using vector similarity.
 ### Bulk Embedding (Embed All)
 
 - Each knowledge pool's embeddings list shows an **Embed all** admin button that triggers a bulk run.
+- **Note:** Make sure you've run `pnpm run generate:importmap` after plugin configuration, otherwise the button won't appear.
 - Bulk runs only include documents missing embeddings for the pool's current `embeddingConfig.version`.
 - Progress is recorded in `vector-bulk-embeddings-runs` and `vector-bulk-embeddings-batches` collections.
 - Endpoint: **POST** `/api/vector-bulk-embed`
diff --git a/dev/app/(payload)/admin/importMap.js b/dev/app/(payload)/admin/importMap.js
index 5bc8ec3..abe5d88 100644
--- a/dev/app/(payload)/admin/importMap.js
+++ b/dev/app/(payload)/admin/importMap.js
@@ -21,6 +21,8 @@ import { StrikethroughFeatureClient as StrikethroughFeatureClient_e70f5e05f09f93
 import { UnderlineFeatureClient as UnderlineFeatureClient_e70f5e05f09f93e00b997edb1ef0c864 } from '@payloadcms/richtext-lexical/client'
 import { BoldFeatureClient as BoldFeatureClient_e70f5e05f09f93e00b997edb1ef0c864 } from '@payloadcms/richtext-lexical/client'
 import { ItalicFeatureClient as ItalicFeatureClient_e70f5e05f09f93e00b997edb1ef0c864 } from '@payloadcms/richtext-lexical/client'
+import { EmbedAllButton as EmbedAllButton_69051d9d0217691c78245f4f33731b73 } from 'payloadcms-vectorize/client'
+import { CollectionCards as CollectionCards_ab83ff7e88da8d3530831f296ec4756a } from '@payloadcms/ui/rsc'
 
 export const importMap = {
   "@payloadcms/richtext-lexical/rsc#RscEntryLexicalCell": RscEntryLexicalCell_44fe37237e0ebf4470c9990d8cb7b07e,
@@ -45,5 +47,7 @@ export const importMap = {
   "@payloadcms/richtext-lexical/client#StrikethroughFeatureClient": StrikethroughFeatureClient_e70f5e05f09f93e00b997edb1ef0c864,
   "@payloadcms/richtext-lexical/client#UnderlineFeatureClient": UnderlineFeatureClient_e70f5e05f09f93e00b997edb1ef0c864,
   "@payloadcms/richtext-lexical/client#BoldFeatureClient": BoldFeatureClient_e70f5e05f09f93e00b997edb1ef0c864,
-  "@payloadcms/richtext-lexical/client#ItalicFeatureClient": ItalicFeatureClient_e70f5e05f09f93e00b997edb1ef0c864
+  "@payloadcms/richtext-lexical/client#ItalicFeatureClient": ItalicFeatureClient_e70f5e05f09f93e00b997edb1ef0c864,
+  "payloadcms-vectorize/client#EmbedAllButton": EmbedAllButton_69051d9d0217691c78245f4f33731b73,
+  "@payloadcms/ui/rsc#CollectionCards": CollectionCards_ab83ff7e88da8d3530831f296ec4756a
 }
diff --git a/dev/payload.config.ts b/dev/payload.config.ts
index 56d43f1..efc9723 100644
--- a/dev/payload.config.ts
+++ b/dev/payload.config.ts
@@ -145,7 +145,6 @@ const buildConfigWithPostgres = async () => {
               version: testEmbeddingVersion,
               queryFn: embedQuery,
               realTimeIngestionFn: embedDocs,
-              bulkEmbeddingsFns,
             },
           },
           bulkDefault: {
diff --git a/dev/specs/e2e.spec.ts b/dev/specs/e2e.spec.ts
index daef019..6b2b80b 100644
--- a/dev/specs/e2e.spec.ts
+++ b/dev/specs/e2e.spec.ts
@@ -5,6 +5,32 @@ import { getPayload } from 'payload'
 import { getInitialMarkdownContent } from './constants.js'
 import { waitForVectorizationJobs } from './utils.js'
 import { testEmbeddingVersion } from 'helpers/embed.js'
+import { devUser } from 'helpers/credentials.js'
+
+// Helper function to log in to the admin panel
+const loginToAdmin = async (page: any) => {
+  console.log('[loginToAdmin] Starting login process...')
+  await page.goto('/admin/login')
+  console.log('[loginToAdmin] Navigated to login page')
+
+  await page.waitForLoadState('domcontentloaded')
+  console.log('[loginToAdmin] Page loaded')
+
+  // Fill in the login form
+  console.log('[loginToAdmin] Filling in email...')
+  await page.fill('input[name="email"]', devUser.email)
+  console.log('[loginToAdmin] Filling in password...')
+  await page.fill('input[name="password"]', devUser.password)
+
+  // Click the login button
+  console.log('[loginToAdmin] Clicking submit button...')
+  await page.click('button[type="submit"]')
+
+  // Wait for redirect to admin dashboard
+  console.log('[loginToAdmin] Waiting for redirect...')
+  await page.waitForURL(/\/admin(?!\/login)/, { timeout: 15000 })
+  console.log('[loginToAdmin] Login complete!')
+}
 
 const expectVectorSearchResponse = async (response: any, post: any, title: string) => {
   expect(response.ok()).toBe(true)
@@ -38,9 +64,11 @@ test.describe('Vector embedding e2e tests', () => {
   let post: any
 
   test.beforeAll(async () => {
+    console.log('[beforeAll] Setting up Payload instance...')
     // Setup: Create a post and wait for realtime embedding
     _config = await config
     payload = await getPayload({ config: _config, key: `e2e-test-${Date.now()}` })
+    console.log('[beforeAll] Payload instance created')
   })
 
   test('querying the endpoint should return the title with testEmbeddingVersion', async ({
@@ -77,9 +105,15 @@ test.describe('Vector embedding e2e tests', () => {
     page,
     request,
   }) => {
+    console.log('[test] Starting bulk embedding test...')
     test.setTimeout(120000)
 
+    // Login to admin first
+    console.log('[test] Logging in...')
+    await loginToAdmin(page)
+
     // Verify bulkDefault pool is EMPTY (no realTimeIngestionFn configured)
+    console.log('[test] Checking bulkDefault pool is empty...')
     const emptyResponse = await request.post('/api/vector-search', {
       data: {
         query: title,
@@ -89,13 +123,41 @@ test.describe('Vector embedding e2e tests', () => {
     await expectEmptyVectorSearchResponse(emptyResponse)
 
     // Navigate to the bulkDefault embeddings collection page in admin
-    await page.goto('/admin/collections/bulkDefault')
+    console.log('[test] Navigating to bulkDefault collection page...')
+    await page.goto('/admin/collections/bulkDefault', { waitUntil: 'networkidle' })
+    console.log('[test] Page loaded')
+
+    // Wait for the page to fully load and render
+    console.log('[test] Waiting for page to fully load...')
+    await page.waitForLoadState('domcontentloaded')
+    await page.waitForLoadState('networkidle')
+    console.log('[test] Page fully loaded')
+
+    // Wait for the collapsible header to appear - use getByText for more flexible matching
+    // Note: If this fails, ensure `pnpm run generate:importmap` has been run
+    console.log('[test] Looking for "Bulk Embed All" text...')
+    const bulkEmbedAllText = page.getByText('Bulk Embed All', { exact: false })
+    await expect(bulkEmbedAllText).toBeVisible({ timeout: 15000 })
+    console.log('[test] Found "Bulk Embed All" text!')
 
-    // Wait for the page to load and find the Embed All button
-    const embedAllButton = page.locator('button:has-text("Embed all")')
-    await expect(embedAllButton).toBeVisible({ timeout: 10000 })
+    // Click the button that contains the h3 with "Bulk Embed All" text
+    // The button wraps the h3, so we click the button that contains the h3
+    const expandButton = page.locator('button:has(h3:has-text("Bulk Embed All"))')
+    // If that doesn't work, try clicking the parent of the text
+    if ((await expandButton.count()) === 0) {
+      const parentButton = bulkEmbedAllText.locator('..').locator('button').first()
+      await parentButton.click()
+    } else {
+      await expandButton.click()
+    }
+
+    // Wait for the expanded content to appear (the Embed All button should become visible)
+    await page.waitForTimeout(500) // Small delay for animation
 
-    // Click the Embed All button
+    // Now find and click the Embed All button (should be visible after expansion)
+    // Use a more specific selector to avoid clicking the expand button again
+    const embedAllButton = page.locator('button.btn--style-primary:has-text("Embed all")')
+    await expect(embedAllButton).toBeVisible({ timeout: 5000 })
     await embedAllButton.click()
 
     // Wait for success message with run link
@@ -124,15 +186,21 @@ test.describe('Vector embedding e2e tests', () => {
     let finalStatus = ''
 
     while (attempts < maxAttempts) {
+      console.log('[test] Polling for status...')
       // Refresh the page to see updated status
       await page.reload()
       await page.waitForLoadState('domcontentloaded')
 
-      // Get the status value - it's in a select or text field
-      const statusValue = await statusField.inputValue().catch(() => null)
+      // Get the status value - React Select displays value in .rs__single-value
+      const statusValue = await statusField
+        .locator('.rs__single-value')
+        .textContent()
+        .catch(() => null)
+      console.log('[test] Status value:', statusValue)
       if (statusValue) {
         finalStatus = statusValue
-        if (statusValue === 'completed') {
+        console.log('[test] Status value:', statusValue)
+        if (statusValue === 'succeeded') {
           break
         }
       }
@@ -141,7 +209,7 @@ test.describe('Vector embedding e2e tests', () => {
       await page.waitForTimeout(3000)
     }
 
-    expect(finalStatus).toBe('completed')
+    expect(finalStatus).toBe('succeeded')
 
     // Now verify vector-search returns results for bulkDefault pool
     const filledResponse = await request.post('/api/vector-search', {
@@ -152,4 +220,53 @@ test.describe('Vector embedding e2e tests', () => {
     })
     await expectVectorSearchResponse(filledResponse, post, title)
   })
+
+  test('clicking expand section on default collection shows not enabled message', async ({
+    page,
+  }) => {
+    console.log('[test] Starting default collection test...')
+
+    // Login to admin first
+    console.log('[test] Logging in...')
+    await loginToAdmin(page)
+
+    // Navigate to the default embeddings collection page in admin
+    console.log('[test] Navigating to default collection page...')
+    await page.goto('/admin/collections/default', { waitUntil: 'networkidle' })
+    console.log('[test] Page loaded')
+
+    // Wait for the page to fully load and render
+    console.log('[test] Waiting for page to fully load...')
+    await page.waitForLoadState('domcontentloaded')
+    await page.waitForLoadState('networkidle')
+    console.log('[test] Page fully loaded')
+
+    // Wait for the collapsible header to appear - use getByText for more flexible matching
+    // Note: If this fails, ensure `pnpm run generate:importmap` has been run
+    console.log('[test] Looking for "Bulk Embed All" text...')
+    const bulkEmbedAllText = page.getByText('Bulk Embed All', { exact: false })
+    await expect(bulkEmbedAllText).toBeVisible({ timeout: 15000 })
+    console.log('[test] Found "Bulk Embed All" text!')
+
+    // Click the button that contains the h3 with "Bulk Embed All" text
+    const expandButton = page.locator('button:has(h3:has-text("Bulk Embed All"))')
+    // If that doesn't work, try clicking the parent of the text
+    if ((await expandButton.count()) === 0) {
+      const parentButton = bulkEmbedAllText.locator('..').locator('button').first()
+      await parentButton.click()
+    } else {
+      await expandButton.click()
+    }
+
+    // Wait for the expanded content to appear
+    await page.waitForTimeout(500) // Small delay for animation
+
+    // Verify the "Bulk embedding not configured" message appears
+    const notConfiguredMessage = page.locator('text=/Bulk embedding not configured/i')
+    await expect(notConfiguredMessage).toBeVisible({ timeout: 5000 })
+
+    // Verify the message about configuring bulkEmbeddingsFns appears
+    const configMessage = page.locator('text=/bulkEmbeddingsFns/i')
+    await expect(configMessage).toBeVisible({ timeout: 5000 })
+  })
 })
diff --git a/dev/specs/utils.ts b/dev/specs/utils.ts
index 4c46570..ae1804a 100644
--- a/dev/specs/utils.ts
+++ b/dev/specs/utils.ts
@@ -8,7 +8,7 @@ import { createVectorizeIntegration } from 'payloadcms-vectorize'
 import { BULK_EMBEDDINGS_RUNS_SLUG } from '../../src/collections/bulkEmbeddingsRuns.js'
 import { BULK_EMBEDDINGS_INPUT_METADATA_SLUG } from '../../src/collections/bulkEmbeddingInputMetadata.js'
 import { BULK_EMBEDDINGS_BATCHES_SLUG } from '../../src/collections/bulkEmbeddingsBatches.js'
-import { makeDummyEmbedDocs } from 'helpers/embed.js'
+import { makeDummyEmbedDocs } from '../helpers/embed.js'
 import type {
   BulkEmbeddingsFns,
   BulkEmbeddingInput,
diff --git a/playwright.config.js b/playwright.config.js
index 1404bda..41d2423 100644
--- a/playwright.config.js
+++ b/playwright.config.js
@@ -25,9 +25,9 @@ export default defineConfig({
   /* Fail the build on CI if you accidentally left test.only in the source code. */
   forbidOnly: !!process.env.CI,
   /* Retry on CI only */
-  retries: process.env.CI ? 2 : 0,
+  retries: 0,
   /* Opt out of parallel tests on CI. */
-  workers: process.env.CI ? 1 : undefined,
+  workers: 1,
   /* Reporter to use. See https://playwright.dev/docs/test-reporters */
   reporter: 'html',
   /* Shared settings for all the projects below. See https://playwright.dev/docs/api/class-testoptions. */
diff --git a/src/admin/components/EmbedAllButton.tsx b/src/admin/components/EmbedAllButton.tsx
deleted file mode 100644
index 64bd604..0000000
--- a/src/admin/components/EmbedAllButton.tsx
+++ /dev/null
@@ -1,95 +0,0 @@
-'use client'
-
-import React, { useState } from 'react'
-
-type EmbedAllButtonServerProps = {
-  hasBulkEmbeddings: boolean
-}
-
-type EmbedAllButtonClientProps = {
-  collectionSlug: string
-  hasCreatePermission?: boolean
-  newDocumentURL?: string
-}
-
-type EmbedAllButtonProps = EmbedAllButtonServerProps & EmbedAllButtonClientProps
-
-export const EmbedAllButton: React.FC<EmbedAllButtonProps> = ({
-  collectionSlug,
-  hasBulkEmbeddings,
-}) => {
-  const [isSubmitting, setIsSubmitting] = useState(false)
-  const [message, setMessage] = useState<{ text: string; runId?: string } | null>(null)
-
-  const handleClick = async () => {
-    setIsSubmitting(true)
-    setMessage(null)
-    try {
-      const res = await fetch('/api/vector-bulk-embed', {
-        method: 'POST',
-        headers: {
-          'Content-Type': 'application/json',
-        },
-        body: JSON.stringify({ knowledgePool: collectionSlug }),
-      })
-      const data = await res.json()
-      if (!res.ok) {
-        setMessage({ text: data?.error || 'Failed to queue bulk embed run' })
-        return
-      }
-      setMessage({ text: 'Queued bulk embed run', runId: data.runId })
-    } catch (error: any) {
-      setMessage({ text: error?.message || 'Failed to queue bulk embed run' })
-    } finally {
-      setIsSubmitting(false)
-    }
-  }
-
-  if (!hasBulkEmbeddings) {
-    return (
-      <div style={{ marginBottom: '1rem', display: 'flex', gap: '0.75rem', alignItems: 'center' }}>
-        <button
-          type="button"
-          className="btn btn--style-secondary"
-          disabled
-          title="Bulk embedding not implemented for this pool"
-        >
-          Embed all
-        </button>
-        <span style={{ fontSize: '0.9rem', color: '#666' }}>Bulk embedding not configured</span>
-      </div>
-    )
-  }
-
-  return (
-    <div style={{ marginBottom: '1rem', display: 'flex', gap: '0.75rem', alignItems: 'center' }}>
-      <button
-        type="button"
-        className="btn btn--style-primary"
-        onClick={handleClick}
-        disabled={isSubmitting}
-      >
-        {isSubmitting ? 'Submitting…' : 'Embed all'}
-      </button>
-      {message ? (
-        <span style={{ fontSize: '0.9rem' }}>
-          {message.text}
-          {message.runId ? (
-            <>
-              {' '}
-              <a
-                href={`/admin/collections/vector-bulk-embeddings-runs/${message.runId}`}
-                style={{ textDecoration: 'underline' }}
-                data-testid="bulk-run-link"
-              >
-                #{message.runId}
-              </a>
-            </>
-          ) : null}
-        </span>
-      ) : null}
-    </div>
-  )
-}
-
-export default EmbedAllButton
diff --git a/src/admin/components/EmbedAllButton/client.tsx b/src/admin/components/EmbedAllButton/client.tsx
new file mode 100644
index 0000000..14d3dbf
--- /dev/null
+++ b/src/admin/components/EmbedAllButton/client.tsx
@@ -0,0 +1,419 @@
+'use client'
+
+import React, { useState } from 'react'
+
+type EmbedAllButtonClientProps = {
+  collectionSlug: string
+  hasBulkEmbeddings: boolean
+}
+
+export const EmbedAllButtonClient: React.FC<EmbedAllButtonClientProps> = ({
+  collectionSlug,
+  hasBulkEmbeddings,
+}) => {
+  const [isSubmitting, setIsSubmitting] = useState(false)
+  const [message, setMessage] = useState<{ text: string; runId?: string; error?: boolean } | null>(
+    null,
+  )
+  const [isExpanded, setIsExpanded] = useState(false)
+  const [isExpandedDisabled, setIsExpandedDisabled] = useState(false)
+
+  const handleClick = async () => {
+    setIsSubmitting(true)
+    setMessage(null)
+    try {
+      const res = await fetch('/api/vector-bulk-embed', {
+        method: 'POST',
+        headers: {
+          'Content-Type': 'application/json',
+        },
+        body: JSON.stringify({ knowledgePool: collectionSlug }),
+      })
+      const data = await res.json()
+      if (!res.ok) {
+        setMessage({ text: data?.error || 'Failed to queue bulk embed run', error: true })
+        return
+      }
+      setMessage({ text: 'Queued bulk embed run', runId: data.runId, error: false })
+    } catch (error: any) {
+      setMessage({ text: error?.message || 'Failed to queue bulk embed run', error: true })
+    } finally {
+      setIsSubmitting(false)
+    }
+  }
+
+  if (!hasBulkEmbeddings) {
+    return (
+      <div
+        style={{
+          marginBottom: '2rem',
+          padding: '1.5rem',
+          backgroundColor: '#f8f9fa',
+          borderRadius: '4px',
+          border: '1px solid #e9ecef',
+        }}
+      >
+        <button
+          type="button"
+          onClick={() => setIsExpandedDisabled(!isExpandedDisabled)}
+          style={{
+            display: 'flex',
+            alignItems: 'center',
+            gap: '0.5rem',
+            background: 'none',
+            border: 'none',
+            padding: 0,
+            cursor: 'pointer',
+            width: '100%',
+            textAlign: 'left',
+            marginBottom: isExpandedDisabled ? '0.75rem' : 0,
+          }}
+        >
+          <h3
+            style={{
+              fontSize: '1rem',
+              fontWeight: 600,
+              margin: 0,
+              color: '#212529',
+            }}
+          >
+            Bulk Embed All
+          </h3>
+          <svg
+            width="16"
+            height="16"
+            viewBox="0 0 16 16"
+            fill="none"
+            xmlns="http://www.w3.org/2000/svg"
+            style={{
+              transform: isExpandedDisabled ? 'rotate(180deg)' : 'rotate(0deg)',
+              transition: 'transform 0.2s ease',
+              flexShrink: 0,
+            }}
+          >
+            <path
+              d="M4 6L8 10L12 6"
+              stroke="#6c757d"
+              strokeWidth="1.5"
+              strokeLinecap="round"
+              strokeLinejoin="round"
+            />
+          </svg>
+          <span
+            style={{
+              fontSize: '0.875rem',
+              color: '#6c757d',
+              marginLeft: 'auto',
+            }}
+          >
+            {isExpandedDisabled ? 'Hide details' : 'Show details'}
+          </span>
+        </button>
+
+        {isExpandedDisabled && (
+          <div
+            style={{
+              animation: 'fadeIn 0.2s ease',
+              paddingTop: '0.5rem',
+            }}
+          >
+            <span style={{ fontSize: '0.875rem', color: '#6c757d', fontWeight: 500 }}>
+              Bulk embedding not configured
+            </span>
+            <p
+              style={{
+                fontSize: '0.8125rem',
+                color: '#6c757d',
+                margin: 0,
+                lineHeight: '1.5',
+              }}
+            >
+              This knowledge pool does not have bulk embedding configured. Configure{' '}
+              <code
+                style={{
+                  fontSize: '0.875em',
+                  padding: '0.125rem 0.25rem',
+                  backgroundColor: '#fff',
+                  borderRadius: '2px',
+                }}
+              >
+                bulkEmbeddingsFns
+              </code>{' '}
+              in your plugin options to enable this feature.
+            </p>
+          </div>
+        )}
+
+        <style>
+          {`
+            @keyframes fadeIn {
+              from { opacity: 0; transform: translateY(-4px); }
+              to { opacity: 1; transform: translateY(0); }
+            }
+          `}
+        </style>
+      </div>
+    )
+  }
+
+  return (
+    <div
+      style={{
+        marginBottom: '2rem',
+        padding: '1.5rem',
+        backgroundColor: '#ffffff',
+        borderRadius: '4px',
+        border: '1px solid #e9ecef',
+        boxShadow: '0 1px 3px rgba(0, 0, 0, 0.05)',
+      }}
+    >
+      <div style={{ marginBottom: '1rem' }}>
+        <button
+          type="button"
+          onClick={() => setIsExpanded(!isExpanded)}
+          style={{
+            display: 'flex',
+            alignItems: 'center',
+            gap: '0.5rem',
+            background: 'none',
+            border: 'none',
+            padding: 0,
+            cursor: 'pointer',
+            width: '100%',
+            textAlign: 'left',
+            marginBottom: isExpanded ? '0.75rem' : 0,
+          }}
+        >
+          <h3
+            style={{
+              fontSize: '1rem',
+              fontWeight: 600,
+              margin: 0,
+              color: '#212529',
+            }}
+          >
+            Bulk Embed All
+          </h3>
+          <svg
+            width="16"
+            height="16"
+            viewBox="0 0 16 16"
+            fill="none"
+            xmlns="http://www.w3.org/2000/svg"
+            style={{
+              transform: isExpanded ? 'rotate(180deg)' : 'rotate(0deg)',
+              transition: 'transform 0.2s ease',
+              flexShrink: 0,
+            }}
+          >
+            <path
+              d="M4 6L8 10L12 6"
+              stroke="#6c757d"
+              strokeWidth="1.5"
+              strokeLinecap="round"
+              strokeLinejoin="round"
+            />
+          </svg>
+          <span
+            style={{
+              fontSize: '0.875rem',
+              color: '#6c757d',
+              marginLeft: 'auto',
+            }}
+          >
+            {isExpanded ? 'Hide details' : 'Show details'}
+          </span>
+        </button>
+
+        {isExpanded && (
+          <div
+            style={{
+              animation: 'fadeIn 0.2s ease',
+              paddingTop: '0.5rem',
+            }}
+          >
+            <p
+              style={{
+                fontSize: '0.875rem',
+                color: '#6c757d',
+                margin: '0 0 0.75rem 0',
+                lineHeight: '1.6',
+              }}
+            >
+              Generate embeddings for all documents that don't have embeddings in this knowledge
+              pool. This process will:
+            </p>
+            <ul
+              style={{
+                fontSize: '0.875rem',
+                color: '#6c757d',
+                margin: '0 0 0.75rem 1.25rem',
+                padding: 0,
+                lineHeight: '1.6',
+              }}
+            >
+              <li>
+                Collects all documents missing embeddings or with embeddings of a different version
+              </li>
+              <li>Create batches and submit them to your embedding provider</li>
+              <li>Monitor batch completion and save embeddings atomically</li>
+              <li>Track progress in the bulk embeddings runs collection</li>
+            </ul>
+            <p
+              style={{
+                fontSize: '0.8125rem',
+                color: '#868e96',
+                margin: '0 0 1rem 0',
+                fontStyle: 'italic',
+              }}
+            >
+              Note: This is a large operation. You can monitor progress by clicking the run link
+              after submission.
+            </p>
+
+            <div
+              style={{ display: 'flex', alignItems: 'center', gap: '0.75rem', flexWrap: 'wrap' }}
+            >
+              <button
+                type="button"
+                className="btn btn--style-primary"
+                onClick={handleClick}
+                disabled={isSubmitting}
+                style={{
+                  minWidth: '140px',
+                  display: 'inline-flex',
+                  alignItems: 'center',
+                  justifyContent: 'center',
+                  gap: '0.5rem',
+                }}
+              >
+                {isSubmitting ? (
+                  <>
+                    <span
+                      style={{
+                        display: 'inline-block',
+                        width: '12px',
+                        height: '12px',
+                        border: '2px solid currentColor',
+                        borderTopColor: 'transparent',
+                        borderRadius: '50%',
+                        animation: 'spin 0.6s linear infinite',
+                      }}
+                    />
+                    Processing...
+                  </>
+                ) : (
+                  <>
+                    <svg
+                      width="16"
+                      height="16"
+                      viewBox="0 0 16 16"
+                      fill="none"
+                      xmlns="http://www.w3.org/2000/svg"
+                      style={{ flexShrink: 0 }}
+                    >
+                      <path
+                        d="M8 2V8M8 8L5 5M8 8L11 5M3 8C3 9.30622 3.52678 10.4175 4.46447 11.1213M13 8C13 9.30622 12.4732 10.4175 11.5355 11.1213M4.46447 11.1213C5.50095 11.8819 6.70096 12.3333 8 12.3333C9.29904 12.3333 10.499 11.8819 11.5355 11.1213M4.46447 11.1213L4 14H12L11.5355 11.1213"
+                        stroke="currentColor"
+                        strokeWidth="1.5"
+                        strokeLinecap="round"
+                        strokeLinejoin="round"
+                      />
+                    </svg>
+                    Embed all
+                  </>
+                )}
+              </button>
+
+              {message && (
+                <div
+                  style={{
+                    display: 'inline-flex',
+                    alignItems: 'center',
+                    gap: '0.5rem',
+                    padding: '0.5rem 0.75rem',
+                    borderRadius: '4px',
+                    fontSize: '0.875rem',
+                    backgroundColor: message.error ? '#fff5f5' : '#f0f9ff',
+                    color: message.error ? '#c92a2a' : '#0c4a6e',
+                    border: `1px solid ${message.error ? '#fecaca' : '#bae6fd'}`,
+                  }}
+                >
+                  {message.error ? (
+                    <svg
+                      width="16"
+                      height="16"
+                      viewBox="0 0 16 16"
+                      fill="none"
+                      xmlns="http://www.w3.org/2000/svg"
+                    >
+                      <circle cx="8" cy="8" r="7" stroke="currentColor" strokeWidth="1.5" />
+                      <path
+                        d="M8 5V8M8 11H8.01"
+                        stroke="currentColor"
+                        strokeWidth="1.5"
+                        strokeLinecap="round"
+                      />
+                    </svg>
+                  ) : (
+                    <svg
+                      width="16"
+                      height="16"
+                      viewBox="0 0 16 16"
+                      fill="none"
+                      xmlns="http://www.w3.org/2000/svg"
+                    >
+                      <circle cx="8" cy="8" r="7" stroke="currentColor" strokeWidth="1.5" />
+                      <path
+                        d="M6 8L7.5 9.5L10 7"
+                        stroke="currentColor"
+                        strokeWidth="1.5"
+                        strokeLinecap="round"
+                        strokeLinejoin="round"
+                      />
+                    </svg>
+                  )}
+                  <span>
+                    {message.text}
+                    {message.runId && !message.error && (
+                      <>
+                        {' — '}
+                        <a
+                          href={`/admin/collections/vector-bulk-embeddings-runs/${message.runId}`}
+                          style={{
+                            color: 'inherit',
+                            textDecoration: 'underline',
+                            fontWeight: 500,
+                          }}
+                          data-testid="bulk-run-link"
+                          onClick={(e) => {
+                            e.preventDefault()
+                            window.location.href = `/admin/collections/vector-bulk-embeddings-runs/${message.runId}`
+                          }}
+                        >
+                          View run #{message.runId}
+                        </a>
+                      </>
+                    )}
+                  </span>
+                </div>
+              )}
+            </div>
+          </div>
+        )}
+      </div>
+
+      <style>
+        {`
+          @keyframes spin {
+            to { transform: rotate(360deg); }
+          }
+          @keyframes fadeIn {
+            from { opacity: 0; transform: translateY(-4px); }
+            to { opacity: 1; transform: translateY(0); }
+          }
+        `}
+      </style>
+    </div>
+  )
+}
diff --git a/src/admin/components/EmbedAllButton/index.tsx b/src/admin/components/EmbedAllButton/index.tsx
new file mode 100644
index 0000000..472ff6c
--- /dev/null
+++ b/src/admin/components/EmbedAllButton/index.tsx
@@ -0,0 +1,57 @@
+import React from 'react'
+import { EmbedAllButtonClient } from './client.js'
+
+type EmbedAllButtonServerProps = {
+  hasBulkEmbeddings: boolean
+  collectionSlug: string
+}
+
+type EmbedAllButtonProps = EmbedAllButtonServerProps
+
+export const EmbedAllButton: React.FC<EmbedAllButtonProps & { payload?: any; params?: any }> = (
+  props,
+) => {
+  // Payload passes serverProps functions - we need to call them ourselves
+  // The function receives { payload, params } context
+  let hasBulkEmbeddings: boolean = false
+
+  if (typeof props.hasBulkEmbeddings === 'function') {
+    // Call the serverProps function with the payload/params context
+    try {
+      hasBulkEmbeddings = Boolean(
+        (props.hasBulkEmbeddings as any)({ payload: props.payload, params: props.params }),
+      )
+    } catch (error) {
+      console.error('[EmbedAllButton Server] Error calling hasBulkEmbeddings:', error)
+      hasBulkEmbeddings = false
+    }
+  } else {
+    hasBulkEmbeddings = Boolean(props.hasBulkEmbeddings)
+  }
+
+  let collectionSlug: string = ''
+
+  if (typeof props.collectionSlug === 'function') {
+    // Call the serverProps function with the payload/params context
+    try {
+      collectionSlug = String(
+        (props.collectionSlug as any)({ payload: props.payload, params: props.params }) || '',
+      )
+    } catch (error) {
+      console.error('[EmbedAllButton Server] Error calling collectionSlug:', error)
+      collectionSlug = ''
+    }
+  } else {
+    collectionSlug = String(props.collectionSlug || '')
+  }
+
+  console.log('[EmbedAllButton Server] Resolved hasBulkEmbeddings:', hasBulkEmbeddings)
+  console.log('[EmbedAllButton Server] Resolved collectionSlug:', collectionSlug)
+
+  // Only pass serializable props to the client component
+  return (
+    <EmbedAllButtonClient collectionSlug={collectionSlug} hasBulkEmbeddings={hasBulkEmbeddings} />
+  )
+}
+
+export default EmbedAllButton
diff --git a/src/collections/embeddings.ts b/src/collections/embeddings.ts
index 603c854..9081ae4 100644
--- a/src/collections/embeddings.ts
+++ b/src/collections/embeddings.ts
@@ -1,5 +1,6 @@
 import type { CollectionConfig, Field } from 'payload'
-import type { KnowledgePoolName } from '../types.js'
+import type { KnowledgePoolName, VectorizedPayload } from '../types.js'
+import { isVectorizedPayload } from '../types.js'
 
 const RESERVED_FIELDS = ['sourceCollection', 'docId', 'chunkIndex', 'chunkText', 'embeddingVersion']
 
@@ -31,19 +32,22 @@ export const createEmbeddingsCollection = (
             path: 'payloadcms-vectorize/client#EmbedAllButton',
             exportName: 'EmbedAllButton',
             serverProps: {
-              hasBulkEmbeddings: ({ payload, params }: { payload: any; params: any }) => {
-                // Get the knowledge pool name from the collection slug
-                const poolName = params?.slug as string
-                if (!poolName) return false
+              hasBulkEmbeddings: ({ payload, params }: { payload: any; params: any }): boolean => {
+                // Get the knowledge pool name from params.segments
+                // params structure: { segments: [ 'collections', 'bulkDefault' ] }
+                const poolName = params?.segments?.[1]
 
-                // Access plugin options from payload config
-                const pluginOptions = payload.config.plugins?.find(
-                  (p: any) => p.payloadcmsVectorize,
-                )?.payloadcmsVectorize
+                // Use the _isBulkEmbedEnabled method added by the plugin
+                if (poolName && typeof poolName === 'string' && isVectorizedPayload(payload)) {
+                  return payload._isBulkEmbedEnabled(poolName)
+                }
 
-                if (!pluginOptions?.knowledgePools?.[poolName]) return false
-
-                return !!pluginOptions.knowledgePools[poolName].embeddingConfig.bulkEmbeddingsFns
+                return false
+              },
+              collectionSlug: ({ params }: { payload: any; params: any }): string => {
+                // Get the knowledge pool name from params.segments
+                // params structure: { segments: [ 'collections', 'bulkDefault' ] }
+                return params?.segments?.[1] || ''
               },
             },
           },
diff --git a/src/exports/client.ts b/src/exports/client.ts
index eaa8a1d..e864467 100644
--- a/src/exports/client.ts
+++ b/src/exports/client.ts
@@ -1 +1 @@
-export { EmbedAllButton } from '../admin/components/EmbedAllButton.js'
+export { EmbedAllButton } from '../admin/components/EmbedAllButton/index.js'
diff --git a/src/index.ts b/src/index.ts
index b47adc3..590ae51 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -1,5 +1,6 @@
 import type { Config, Payload } from 'payload'
 import { customType } from '@payloadcms/db-postgres/drizzle/pg-core'
+import toSnakeCase from 'to-snake-case'
 
 import { createEmbeddingsCollection } from './collections/embeddings.js'
 import type {
@@ -8,6 +9,7 @@ import type {
   KnowledgePoolName,
   KnowledgePoolStaticConfig,
   KnowledgePoolDynamicConfig,
+  VectorizedPayload,
 } from './types.js'
 import { isPostgresPayload } from './types.js'
 import type { PostgresAdapterArgs } from '@payloadcms/db-postgres'
@@ -109,10 +111,12 @@ export const createVectorizeIntegration = <TPoolNames extends KnowledgePoolName>
         },
       })
 
-      const table = schema?.tables?.[poolName]
+      // Drizzle converts camelCase collection slugs to snake_case table names
+      const tableName = toSnakeCase(poolName)
+      const table = schema?.tables?.[tableName]
       if (!table) {
         throw new Error(
-          `[payloadcms-vectorize] Embeddings table "${poolName}" not found during schema initialization. Ensure the collection has been registered.`,
+          `[payloadcms-vectorize] Embeddings table "${poolName}" (table: "${tableName}") not found during schema initialization. Ensure the collection has been registered.`,
         )
       }
 
@@ -321,14 +325,25 @@ export const createVectorizeIntegration = <TPoolNames extends KnowledgePoolName>
 
       const incomingOnInit = config.onInit
       config.onInit = async (payload) => {
-        if (incomingOnInit) await incomingOnInit(payload)
+        if (incomingOnInit)
+          await incomingOnInit(payload)
+
+          // Add _isBulkEmbedEnabled method to payload object
+          // This allows checking if bulk embedding is enabled for a knowledge pool
+        ;(payload as VectorizedPayload<TPoolNames>)._isBulkEmbedEnabled = (
+          knowledgePool: TPoolNames,
+        ): boolean => {
+          const poolConfig = pluginOptions.knowledgePools[knowledgePool]
+          return !!poolConfig?.embeddingConfig?.bulkEmbeddingsFns
+        }
 
         // Ensure pgvector artifacts for each knowledge pool
         for (const poolName in staticConfigs) {
           const staticConfig = staticConfigs[poolName]
+          // Drizzle converts camelCase collection slugs to snake_case table names
           await ensurePgvectorArtifacts({
             payload,
-            tableName: poolName,
+            tableName: toSnakeCase(poolName),
             dims: staticConfig.dims,
             ivfflatLists: staticConfig.ivfflatLists,
           })
diff --git a/src/tasks/bulkEmbedAll.ts b/src/tasks/bulkEmbedAll.ts
index cea93db..051dc62 100644
--- a/src/tasks/bulkEmbedAll.ts
+++ b/src/tasks/bulkEmbedAll.ts
@@ -10,6 +10,7 @@ import { BULK_EMBEDDINGS_RUNS_SLUG } from '../collections/bulkEmbeddingsRuns.js'
 import { BULK_EMBEDDINGS_INPUT_METADATA_SLUG } from '../collections/bulkEmbeddingInputMetadata.js'
 import { BULK_EMBEDDINGS_BATCHES_SLUG } from '../collections/bulkEmbeddingsBatches.js'
 import { isPostgresPayload, PostgresPayload, BulkEmbeddingInput } from '../types.js'
+import toSnakeCase from 'to-snake-case'
 
 type PrepareBulkEmbeddingTaskInput = {
   runId: string
@@ -622,7 +623,7 @@ async function completeAllBatchesAtomically(args: {
 
       await persistVectorColumn({
         payload,
-        poolName,
+        poolName: toSnakeCase(poolName),
         vector: embeddingArray,
         id: String((created as any)?.id ?? ''),
       })
@@ -657,7 +658,7 @@ async function persistVectorColumn(args: {
   const postgresPayload = payload as PostgresPayload
   const schemaName = postgresPayload.db.schemaName || 'public'
   const literal = `[${Array.from(vector).join(',')}]`
-  const sql = `UPDATE "${schemaName}"."${poolName}" SET embedding = $1 WHERE id = $2`
+  const sql = `UPDATE "${schemaName}"."${toSnakeCase(poolName)}" SET embedding = $1 WHERE id = $2`
   const runSQL = async (statement: string, params?: any[]) => {
     if (postgresPayload.db.pool?.query) return postgresPayload.db.pool.query(statement, params)
     if (postgresPayload.db.drizzle?.execute) return postgresPayload.db.drizzle.execute(statement)
diff --git a/src/tasks/vectorize.ts b/src/tasks/vectorize.ts
index 0e497f2..5f8364c 100644
--- a/src/tasks/vectorize.ts
+++ b/src/tasks/vectorize.ts
@@ -6,6 +6,7 @@ import {
   KnowledgePoolDynamicConfig,
   ToKnowledgePoolFn,
 } from '../types.js'
+import toSnakeCase from 'to-snake-case'
 
 type VectorizeTaskInput = {
   doc: Record<string, any>
@@ -159,7 +160,8 @@ async function runVectorizeTask(args: {
       const literal = `[${Array.from(vector).join(',')}]`
       const postgresPayload = payload as PostgresPayload
       const schemaName = postgresPayload.db.schemaName || 'public'
-      const sql = `UPDATE "${schemaName}"."${poolName}" SET embedding = $1 WHERE id = $2` as string
+      const sql =
+        `UPDATE "${schemaName}"."${toSnakeCase(poolName)}" SET embedding = $1 WHERE id = $2` as string
       try {
         await runSQL(sql, [literal, id])
       } catch (e) {
diff --git a/src/types.ts b/src/types.ts
index 670ae59..9216e71 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -1,5 +1,23 @@
 import type { CollectionSlug, Payload, Field, Where } from 'payload'
 
+/**
+ * Extended Payload type with vectorize plugin methods
+ */
+export type VectorizedPayload<TPoolNames extends KnowledgePoolName = KnowledgePoolName> =
+  Payload & {
+    /** Check if bulk embedding is enabled for a knowledge pool */
+    _isBulkEmbedEnabled: (knowledgePool: TPoolNames) => boolean
+  }
+
+/**
+ * Type guard to check if a Payload instance has vectorize extensions
+ */
+export function isVectorizedPayload(payload: Payload): payload is VectorizedPayload {
+  return (
+    '_isBulkEmbedEnabled' in payload && typeof (payload as any)._isBulkEmbedEnabled === 'function'
+  )
+}
+
 export type EmbedDocsFn = (texts: string[]) => Promise<number[][] | Float32Array[]>
 export type EmbedQueryFn = (text: string) => Promise<number[] | Float32Array>
 

From 1aaf52cbeb793aae152ed6c29288ad79cead3865 Mon Sep 17 00:00:00 2001
From: techiejd <62455039+techiejd@users.noreply.github.com>
Date: Thu, 8 Jan 2026 17:06:27 +0700
Subject: [PATCH 20/49] Adds CI browser

---
 .github/workflows/ci.yml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index aa206bf..d0a3d4c 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -39,6 +39,9 @@ jobs:
       - name: Install dependencies
         run: pnpm install
 
+      - name: Install Playwright browsers
+        run: pnpm exec playwright install --with-deps chromium
+
       - name: Install pgvector extension
         run: |
           sudo apt-get update

From 91d0bf72a5cd21966e23a55ae61b52f1fd25c675 Mon Sep 17 00:00:00 2001
From: techiejd <62455039+techiejd@users.noreply.github.com>
Date: Thu, 8 Jan 2026 18:26:34 +0700
Subject: [PATCH 21/49] Runs sequentially so the tests pass in CI

---
 dev/specs/e2e.spec.ts | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/dev/specs/e2e.spec.ts b/dev/specs/e2e.spec.ts
index 6b2b80b..406055e 100644
--- a/dev/specs/e2e.spec.ts
+++ b/dev/specs/e2e.spec.ts
@@ -58,6 +58,9 @@ const expectEmptyVectorSearchResponse = async (response: any) => {
 }
 
 test.describe('Vector embedding e2e tests', () => {
+  // Force tests to run sequentially
+  test.describe.configure({ mode: 'serial' })
+
   const title = 'e2e test post title'
   let payload: Payload
   let _config: SanitizedConfig

From 95bdb714e587586d2d5d94c37b7f9901c98f38e2 Mon Sep 17 00:00:00 2001
From: techiejd <62455039+techiejd@users.noreply.github.com>
Date: Thu, 8 Jan 2026 18:54:59 +0700
Subject: [PATCH 22/49] increases timeout since tests are in parallel now

---
 dev/specs/e2e.spec.ts | 2 +-
 playwright.config.js  | 2 --
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/dev/specs/e2e.spec.ts b/dev/specs/e2e.spec.ts
index 406055e..baaf4fb 100644
--- a/dev/specs/e2e.spec.ts
+++ b/dev/specs/e2e.spec.ts
@@ -59,7 +59,7 @@ const expectEmptyVectorSearchResponse = async (response: any) => {
 
 test.describe('Vector embedding e2e tests', () => {
   // Force tests to run sequentially
-  test.describe.configure({ mode: 'serial' })
+  test.describe.configure({ mode: 'serial', timeout: 120000 })
 
   const title = 'e2e test post title'
   let payload: Payload
diff --git a/playwright.config.js b/playwright.config.js
index 41d2423..9c895ac 100644
--- a/playwright.config.js
+++ b/playwright.config.js
@@ -24,9 +24,7 @@ export default defineConfig({
   fullyParallel: true,
   /* Fail the build on CI if you accidentally left test.only in the source code. */
   forbidOnly: !!process.env.CI,
-  /* Retry on CI only */
   retries: 0,
-  /* Opt out of parallel tests on CI. */
   workers: 1,
   /* Reporter to use. See https://playwright.dev/docs/test-reporters */
   reporter: 'html',

From 3a7b73cb093bda37c90463b0b33a0edeec9b5488 Mon Sep 17 00:00:00 2001
From: techiejd <62455039+techiejd@users.noreply.github.com>
Date: Fri, 9 Jan 2026 23:51:13 +0700
Subject: [PATCH 23/49] Better explanation and leaner API

---
 README.md                 | 12 ++++++++++++
 dev/helpers/embed.ts      |  7 -------
 dev/specs/utils.ts        |  6 ------
 src/index.ts              | 36 +++++++++++++++++++++++++++++++++++-
 src/tasks/bulkEmbedAll.ts |  2 --
 src/types.ts              | 13 +++----------
 6 files changed, 50 insertions(+), 26 deletions(-)

diff --git a/README.md b/README.md
index c603274..80b5485 100644
--- a/README.md
+++ b/README.md
@@ -255,6 +255,12 @@ type BatchSubmission = {
 }
 ```
 
+**About the `chunk.id` field:**
+
+- **Plugin-generated**: The plugin automatically generates a unique `id` for each chunk (format: `${collectionSlug}:${docId}:${chunkIndex}`). You don't need to create it.
+- **Purpose**: The `id` is used to correlate embedding outputs back to their original inputs, ensuring each embedding is correctly associated with its source document and chunk.
+- **Usage**: When submitting batches to your provider, you must pass this `id` along with the text (e.g., as `custom_id` in Voyage AI's batch API). This allows your provider to return the `id` with each embedding result.
+
 **Return values:**
 
 - `null` - "I'm accumulating this chunk, not ready to submit yet"
@@ -328,6 +334,12 @@ type BulkEmbeddingOutput = {
 }
 ```
 
+**About the `id` field in outputs:**
+
+- **Correlation**: The `id` in each `BulkEmbeddingOutput` must match the `chunk.id` that was passed to `addChunk`. This is how the plugin correlates outputs back to their original inputs.
+- **Extraction**: When processing your provider's response, extract the `id` that you originally sent (e.g., from Voyage's `custom_id` field) and include it in the returned `BulkEmbeddingOutput`.
+- **Example**: If you sent `{ custom_id: "posts:123:0", input: [...] }` to your provider, extract `result.custom_id` from the response and return `{ id: result.custom_id, embedding: [...] }`.
+
 #### `onError` - Cleanup on Failure (Optional)
 
 Called when the bulk run fails. Use this to clean up provider-side resources (delete files, cancel batches). The run can be re-queued after cleanup.
diff --git a/dev/helpers/embed.ts b/dev/helpers/embed.ts
index c85ae8a..20ca634 100644
--- a/dev/helpers/embed.ts
+++ b/dev/helpers/embed.ts
@@ -228,13 +228,6 @@ export function makeVoyageBulkEmbeddingsConfig(): BulkEmbeddingsFns {
 
         return {
           status,
-          counts: batchData.request_counts
-            ? {
-                inputs: batchData.request_counts.total || 0,
-                succeeded: batchData.request_counts.completed || 0,
-                failed: batchData.request_counts.failed || 0,
-              }
-            : undefined,
         }
       } catch (error) {
         console.error('Voyage pollBatch error:', error)
diff --git a/dev/specs/utils.ts b/dev/specs/utils.ts
index ae1804a..f6be5a5 100644
--- a/dev/specs/utils.ts
+++ b/dev/specs/utils.ts
@@ -128,14 +128,8 @@ export function createMockBulkEmbeddings(
       const callCount = batchPollCount.get(providerBatchId) ?? 0
       batchPollCount.set(providerBatchId, callCount + 1)
       const status = statusSequence[Math.min(callCount, statusSequence.length - 1)]
-      const inputs = batchInputs.get(providerBatchId) ?? []
-      const counts =
-        status === 'succeeded'
-          ? { inputs: inputs.length, succeeded: inputs.length, failed: 0 }
-          : undefined
       return {
         status,
-        counts,
       }
     },
 
diff --git a/src/index.ts b/src/index.ts
index 8f5d5a7..8520bc3 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -34,7 +34,41 @@ import {
 } from './tasks/bulkEmbedAll.js'
 import { createBulkEmbedHandler } from './endpoints/bulkEmbed.js'
 
-export type * from './types.js'
+export type {
+  KnowledgePoolStaticConfig,
+  PayloadcmsVectorizeConfig,
+
+  // PayloadcmsVectorizeConfig
+  KnowledgePoolDynamicConfig,
+  KnowledgePoolName,
+
+  // KnowledgePoolDynamicConfig,
+  CollectionVectorizeOption,
+  EmbeddingConfig,
+
+  // CollectionVectorizeOption
+  ToKnowledgePoolFn,
+
+  // EmbeddingConfig
+  EmbedQueryFn,
+  EmbedDocsFn,
+  BulkEmbeddingsFns,
+
+  // BulkEmbeddingsFns
+  AddChunkArgs,
+  BatchSubmission,
+  PollBatchArgs,
+  PollBulkEmbeddingsResult,
+  CompleteBatchArgs,
+  BulkEmbeddingOutput,
+  OnBulkErrorArgs,
+
+  // AddChunkArgs
+  BulkEmbeddingInput,
+
+  // PollBulkEmbeddingsResult
+  BulkEmbeddingRunStatus,
+} from './types.js'
 
 async function ensurePgvectorArtifacts(args: {
   payload: Payload
diff --git a/src/tasks/bulkEmbedAll.ts b/src/tasks/bulkEmbedAll.ts
index 051dc62..6445827 100644
--- a/src/tasks/bulkEmbedAll.ts
+++ b/src/tasks/bulkEmbedAll.ts
@@ -261,8 +261,6 @@ export const createPollOrCompleteBulkEmbeddingTask = ({
           collection: BULK_EMBEDDINGS_BATCHES_SLUG,
           data: {
             status: pollResult.status,
-            succeededCount: pollResult.counts?.succeeded,
-            failedCount: pollResult.counts?.failed,
             error: pollResult.error,
             ...(TERMINAL_STATUSES.has(pollResult.status)
               ? { completedAt: new Date().toISOString() }
diff --git a/src/types.ts b/src/types.ts
index 9216e71..b62e335 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -54,7 +54,7 @@ export type KnowledgePoolDynamicConfig = {
   embeddingConfig: EmbeddingConfig
 }
 
-type EmbeddingConfig = {
+export type EmbeddingConfig = {
   /** Version string to track embedding model/version - stored in each embedding document */
   version: string
   /** Embedding function for query provided by the user
@@ -75,7 +75,7 @@ type EmbeddingConfig = {
 export type BulkEmbeddingRunStatus = 'queued' | 'running' | 'succeeded' | 'failed' | 'canceled'
 
 export type BulkEmbeddingInput = {
-  /** Stable identifier for correlating outputs (should be unique per chunk) */
+  /** Stable identifier for correlating outputs (is unique per chunk) */
   id: string
   /** Raw text to embed */
   text: string
@@ -99,15 +99,8 @@ export type BulkEmbeddingOutput = {
   error?: string | null
 }
 
-export type BulkEmbeddingCounts = {
-  inputs?: number
-  succeeded?: number
-  failed?: number
-}
-
 export type PollBulkEmbeddingsResult = {
   status: BulkEmbeddingRunStatus
-  counts?: BulkEmbeddingCounts
   error?: string
 }
 
@@ -226,7 +219,7 @@ export type PostgresPayload = any & {
 // Job task argument types
 export type VectorizeTaskArgs = {
   payload: any
-  pluginOptions: PayloadcmsVectorizeConfig & { embeddingsCollectionSlug?: string }
+  pluginOptions: PayloadcmsVectorizeConfig
   doc: Record<string, any>
   collection: string
   knowledgePool: KnowledgePoolName

From 306cd3194a19bae3f1482421083b694b606bce6e Mon Sep 17 00:00:00 2001
From: techiejd <62455039+techiejd@users.noreply.github.com>
Date: Sat, 10 Jan 2026 09:09:20 +0700
Subject: [PATCH 24/49] WIP

---
 README.md                                  |  28 ++-
 dev/specs/bulkEmbed/onError.spec.ts        |   7 +-
 dev/specs/bulkEmbed/partialFailure.spec.ts | 244 +++++++++++++++++++++
 dev/specs/utils.ts                         |   7 +-
 src/collections/bulkEmbeddingsRuns.ts      |   8 +
 src/tasks/bulkEmbedAll.ts                  |  60 ++++-
 src/types.ts                               |  14 ++
 7 files changed, 353 insertions(+), 15 deletions(-)
 create mode 100644 dev/specs/bulkEmbed/partialFailure.spec.ts

diff --git a/README.md b/README.md
index 80b5485..3b1e676 100644
--- a/README.md
+++ b/README.md
@@ -342,21 +342,38 @@ type BulkEmbeddingOutput = {
 
 #### `onError` - Cleanup on Failure (Optional)
 
-Called when the bulk run fails. Use this to clean up provider-side resources (delete files, cancel batches). The run can be re-queued after cleanup.
+Called when the bulk run fails OR when there are partial chunk failures. Use this to clean up provider-side resources (delete files, cancel batches) and handle failed chunks. The run can be re-queued after cleanup.
 
 ```typescript
+type FailedChunkData = {
+  collection: string // Source collection slug
+  documentId: string // Source document ID
+  chunkIndex: number // Index of the chunk within the document
+}
+
 type OnBulkErrorArgs = {
   providerBatchIds: string[]
   error: Error
+  /** Data about chunks that failed during completion */
+  failedChunkData?: FailedChunkData[]
+  /** Count of failed chunks */
+  failedChunkCount?: number
 }
 ```
 
+**Error handling behavior:**
+
+- **Batch failures**: If any batch fails during polling, the entire run fails and `onError` is called.
+- **Partial chunk failures**: If individual chunks fail during completion (e.g., provider returned an error for specific inputs), the run still succeeds but `onError` is called with `failedChunkData` and `failedChunkCount`.
+- **Failed chunk data**: The `failedChunkData` array contains structured information about failed chunks, including `collection`, `documentId`, and `chunkIndex`. This data is also stored in the run record (`failedChunkData` field) for later inspection and potential retry.
+- **Partial success**: Successful embeddings are still written even when some chunks fail. Only the failed chunks are skipped.
+
 ### Bulk Task Model
 
 The plugin uses separate Payload jobs for reliability with long-running providers:
 
 - **`prepare-bulk-embedding`**: Streams through documents, calls your `addChunk` for each chunk, creates batch records.
-- **`poll-or-complete-bulk-embedding`**: Polls all batches, requeues itself until done, then atomically writes all embeddings.
+- **`poll-or-complete-bulk-embedding`**: Polls all batches, requeues itself until done, then writes all successful embeddings (partial chunk failures are allowed).
 
 ### Queue Configuration
 
@@ -512,7 +529,12 @@ Search for similar content using vector similarity.
 }
 ```
 
-The bulk embedding process is **atomic**: either all embeddings are written or none are. If any batch fails, the run is marked failed and no partial writes occur.
+The bulk embedding process has **two levels of atomicity**:
+
+- **Batch level**: If any batch fails during polling, the entire run fails and no embeddings are written. This is fully atomic.
+- **Chunk level**: If individual chunks fail during completion (e.g., provider returns errors for specific inputs), the run still succeeds and successful embeddings are written. Failed chunks are tracked in `failedChunkData` (with structured `collection`, `documentId`, and `chunkIndex` fields) and passed to the `onError` callback for cleanup.
+
+This design allows for partial success: if 100 chunks are processed and 2 fail, 98 embeddings are written and the 2 failures are tracked for potential retry.
 
 **Error Recovery:** If a run fails, you can re-queue it. If you provided an `onError` callback, it will be called with all `providerBatchIds` so you can clean up provider-side resources before retrying.
 
diff --git a/dev/specs/bulkEmbed/onError.spec.ts b/dev/specs/bulkEmbed/onError.spec.ts
index ffc3087..cfc2e89 100644
--- a/dev/specs/bulkEmbed/onError.spec.ts
+++ b/dev/specs/bulkEmbed/onError.spec.ts
@@ -17,7 +17,12 @@ const dbName = `bulk_onerror_${Date.now()}`
 describe('Bulk embed - onError callback', () => {
   let payload: Payload
   let onErrorCalled = false
-  let onErrorArgs: { providerBatchIds: string[]; error: Error } | null = null
+  let onErrorArgs: {
+    providerBatchIds: string[]
+    error: Error
+    failedChunkData?: Array<{ collection: string; documentId: string; chunkIndex: number }>
+    failedChunkCount?: number
+  } | null = null
 
   beforeAll(async () => {
     await createTestDb({ dbName })
diff --git a/dev/specs/bulkEmbed/partialFailure.spec.ts b/dev/specs/bulkEmbed/partialFailure.spec.ts
new file mode 100644
index 0000000..3d2f928
--- /dev/null
+++ b/dev/specs/bulkEmbed/partialFailure.spec.ts
@@ -0,0 +1,244 @@
+import type { Payload } from 'payload'
+import { beforeAll, describe, expect, test } from 'vitest'
+import { BULK_EMBEDDINGS_RUNS_SLUG } from '../../../src/collections/bulkEmbeddingsRuns.js'
+import {
+  BULK_QUEUE_NAMES,
+  DEFAULT_DIMS,
+  buildPayloadWithIntegration,
+  createMockBulkEmbeddings,
+  createTestDb,
+  waitForBulkJobs,
+} from '../utils.js'
+import { makeDummyEmbedQuery, testEmbeddingVersion } from 'helpers/embed.js'
+
+const DIMS = DEFAULT_DIMS
+const dbName = `bulk_partial_failure_${Date.now()}`
+
+describe('Bulk embed - partial chunk failures', () => {
+  let payload: Payload
+  let onErrorCalled = false
+  let onErrorArgs: {
+    providerBatchIds: string[]
+    error: Error
+    failedChunkData?: Array<{ collection: string; documentId: string; chunkIndex: number }>
+    failedChunkCount?: number
+  } | null = null
+
+  beforeAll(async () => {
+    await createTestDb({ dbName })
+    // We'll set up the payload dynamically in each test to control failIds
+  })
+
+  test('partial chunk failures are tracked and passed to onError', async () => {
+    // Reset state
+    onErrorCalled = false
+    onErrorArgs = null
+
+    // The ID format is collectionSlug:docId:chunkIndex
+    // We need to fail a specific chunk - but we don't know the docId yet
+    // So we'll create the payload with a dynamic failIds check
+
+    const built = await buildPayloadWithIntegration({
+      dbName,
+      pluginOpts: {
+        knowledgePools: {
+          default: {
+            collections: {
+              posts: {
+                toKnowledgePool: async (doc: any) => [
+                  { chunk: doc.title },
+                  { chunk: doc.title + ' chunk2' },
+                ],
+              },
+            },
+            embeddingConfig: {
+              version: testEmbeddingVersion,
+              queryFn: makeDummyEmbedQuery(DIMS),
+              bulkEmbeddingsFns: createMockBulkEmbeddings(
+                {
+                  statusSequence: ['succeeded'],
+                  // We'll fail chunks that contain ":1" (second chunk of any doc)
+                  partialFailure: { failIds: [] }, // Will be updated below
+                  onErrorCallback: (args) => {
+                    onErrorCalled = true
+                    onErrorArgs = args
+                  },
+                },
+                DIMS,
+              ),
+            },
+          },
+        },
+        bulkQueueNames: BULK_QUEUE_NAMES,
+      },
+      secret: 'test-secret',
+      dims: DIMS,
+      key: `partial-failure-${Date.now()}`,
+    })
+    payload = built.payload
+
+    // Create a post
+    const post = await payload.create({
+      collection: 'posts',
+      data: { title: 'Partial Failure Test' } as any,
+    })
+
+    // Now we know the docId, update the mock to fail the second chunk
+    const failChunkId = `posts:${post.id}:1`
+
+    // Re-create with the correct failIds
+    const built2 = await buildPayloadWithIntegration({
+      dbName,
+      pluginOpts: {
+        knowledgePools: {
+          default: {
+            collections: {
+              posts: {
+                toKnowledgePool: async (doc: any) => [
+                  { chunk: doc.title },
+                  { chunk: doc.title + ' chunk2' },
+                ],
+              },
+            },
+            embeddingConfig: {
+              version: testEmbeddingVersion + '-v2',
+              queryFn: makeDummyEmbedQuery(DIMS),
+              bulkEmbeddingsFns: createMockBulkEmbeddings(
+                {
+                  statusSequence: ['succeeded'],
+                  partialFailure: { failIds: [failChunkId] },
+                  onErrorCallback: (args) => {
+                    onErrorCalled = true
+                    onErrorArgs = args
+                  },
+                },
+                DIMS,
+              ),
+            },
+          },
+        },
+        bulkQueueNames: BULK_QUEUE_NAMES,
+      },
+      secret: 'test-secret',
+      dims: DIMS,
+      key: `partial-failure-2-${Date.now()}`,
+    })
+    payload = built2.payload
+
+    const run = await payload.create({
+      collection: BULK_EMBEDDINGS_RUNS_SLUG,
+      data: { pool: 'default', embeddingVersion: testEmbeddingVersion + '-v2', status: 'queued' },
+    })
+
+    await payload.jobs.queue<'payloadcms-vectorize:prepare-bulk-embedding'>({
+      task: 'payloadcms-vectorize:prepare-bulk-embedding',
+      input: { runId: String(run.id) },
+      req: { payload } as any,
+      ...(BULK_QUEUE_NAMES.prepareBulkEmbedQueueName
+        ? { queue: BULK_QUEUE_NAMES.prepareBulkEmbedQueueName }
+        : {}),
+    })
+
+    await waitForBulkJobs(payload)
+
+    // Check run status - should still succeed but with failed count
+    const updatedRun = await payload.findByID({
+      collection: BULK_EMBEDDINGS_RUNS_SLUG,
+      id: run.id,
+    })
+
+    expect(updatedRun.status).toBe('succeeded')
+    expect(updatedRun.succeeded).toBe(1) // First chunk succeeded
+    expect(updatedRun.failed).toBe(1) // Second chunk failed
+    expect(updatedRun.failedChunkData).toBeDefined()
+    expect(Array.isArray(updatedRun.failedChunkData)).toBe(true)
+    expect((updatedRun.failedChunkData as Array<{ collection: string; documentId: string; chunkIndex: number }>).length).toBe(1)
+    const failedChunk = (updatedRun.failedChunkData as Array<{ collection: string; documentId: string; chunkIndex: number }>)[0]
+    expect(failedChunk.collection).toBe('posts')
+    expect(failedChunk.documentId).toBe(String(post.id))
+    expect(failedChunk.chunkIndex).toBe(1) // Second chunk (index 1)
+
+    // Check onError callback was called with failed chunk info
+    expect(onErrorCalled).toBe(true)
+    expect(onErrorArgs).not.toBeNull()
+    expect(onErrorArgs!.failedChunkData).toBeDefined()
+    expect(onErrorArgs!.failedChunkData!.length).toBe(1)
+    expect(onErrorArgs!.failedChunkData![0].collection).toBe('posts')
+    expect(onErrorArgs!.failedChunkData![0].documentId).toBe(String(post.id))
+    expect(onErrorArgs!.failedChunkData![0].chunkIndex).toBe(1)
+    expect(onErrorArgs!.failedChunkCount).toBe(1)
+    expect(onErrorArgs!.error.message).toContain('1 chunk(s) failed')
+  })
+
+  test('run with no partial failures does not call onError', async () => {
+    // Reset state
+    onErrorCalled = false
+    onErrorArgs = null
+
+    const built = await buildPayloadWithIntegration({
+      dbName,
+      pluginOpts: {
+        knowledgePools: {
+          default: {
+            collections: {
+              posts: {
+                toKnowledgePool: async (doc: any) => [{ chunk: doc.title }],
+              },
+            },
+            embeddingConfig: {
+              version: testEmbeddingVersion + '-v3',
+              queryFn: makeDummyEmbedQuery(DIMS),
+              bulkEmbeddingsFns: createMockBulkEmbeddings(
+                {
+                  statusSequence: ['succeeded'],
+                  // No partial failures
+                  onErrorCallback: (args) => {
+                    onErrorCalled = true
+                    onErrorArgs = args
+                  },
+                },
+                DIMS,
+              ),
+            },
+          },
+        },
+        bulkQueueNames: BULK_QUEUE_NAMES,
+      },
+      secret: 'test-secret',
+      dims: DIMS,
+      key: `no-partial-failure-${Date.now()}`,
+    })
+    payload = built.payload
+
+    await payload.create({ collection: 'posts', data: { title: 'No Failure Test' } as any })
+
+    const run = await payload.create({
+      collection: BULK_EMBEDDINGS_RUNS_SLUG,
+      data: { pool: 'default', embeddingVersion: testEmbeddingVersion + '-v3', status: 'queued' },
+    })
+
+    await payload.jobs.queue<'payloadcms-vectorize:prepare-bulk-embedding'>({
+      task: 'payloadcms-vectorize:prepare-bulk-embedding',
+      input: { runId: String(run.id) },
+      req: { payload } as any,
+      ...(BULK_QUEUE_NAMES.prepareBulkEmbedQueueName
+        ? { queue: BULK_QUEUE_NAMES.prepareBulkEmbedQueueName }
+        : {}),
+    })
+
+    await waitForBulkJobs(payload)
+
+    // Check run status
+    const updatedRun = await payload.findByID({
+      collection: BULK_EMBEDDINGS_RUNS_SLUG,
+      id: run.id,
+    })
+
+    expect(updatedRun.status).toBe('succeeded')
+    expect(updatedRun.failed).toBe(0)
+    expect(updatedRun.failedChunkData).toBeUndefined()
+
+    // onError should NOT be called when everything succeeds
+    expect(onErrorCalled).toBe(false)
+  })
+})
diff --git a/dev/specs/utils.ts b/dev/specs/utils.ts
index f6be5a5..d3d01ce 100644
--- a/dev/specs/utils.ts
+++ b/dev/specs/utils.ts
@@ -80,7 +80,12 @@ type MockOptions = {
   /** Optional: flush after this many chunks (for testing multi-batch scenarios) */
   flushAfterChunks?: number
   /** Optional: callback to track onError calls for testing */
-  onErrorCallback?: (args: { providerBatchIds: string[]; error: Error }) => void
+  onErrorCallback?: (args: {
+    providerBatchIds: string[]
+    error: Error
+    failedChunkData?: Array<{ collection: string; documentId: string; chunkIndex: number }>
+    failedChunkCount?: number
+  }) => void
 }
 
 /**
diff --git a/src/collections/bulkEmbeddingsRuns.ts b/src/collections/bulkEmbeddingsRuns.ts
index c2a6757..c6faf25 100644
--- a/src/collections/bulkEmbeddingsRuns.ts
+++ b/src/collections/bulkEmbeddingsRuns.ts
@@ -90,6 +90,14 @@ export const createBulkEmbeddingsRunsCollection = (): CollectionConfig => ({
         description: 'Failure reason if the run ended in error',
       },
     },
+    {
+      name: 'failedChunkData',
+      type: 'json',
+      admin: {
+        description:
+          'Data about chunks that failed during completion (collection, documentId, chunkIndex)',
+      },
+    },
   ],
   timestamps: true,
   indexes: [
diff --git a/src/tasks/bulkEmbedAll.ts b/src/tasks/bulkEmbedAll.ts
index 6445827..d8cf536 100644
--- a/src/tasks/bulkEmbedAll.ts
+++ b/src/tasks/bulkEmbedAll.ts
@@ -9,7 +9,12 @@ import {
 import { BULK_EMBEDDINGS_RUNS_SLUG } from '../collections/bulkEmbeddingsRuns.js'
 import { BULK_EMBEDDINGS_INPUT_METADATA_SLUG } from '../collections/bulkEmbeddingInputMetadata.js'
 import { BULK_EMBEDDINGS_BATCHES_SLUG } from '../collections/bulkEmbeddingsBatches.js'
-import { isPostgresPayload, PostgresPayload, BulkEmbeddingInput } from '../types.js'
+import {
+  isPostgresPayload,
+  PostgresPayload,
+  BulkEmbeddingInput,
+  FailedChunkData,
+} from '../types.js'
 import toSnakeCase from 'to-snake-case'
 
 type PrepareBulkEmbeddingTaskInput = {
@@ -315,9 +320,9 @@ export const createPollOrCompleteBulkEmbeddingTask = ({
         return { output: { runId: input.runId, status: 'polling' } }
       }
 
-      // All batches succeeded - complete the embeddings atomically
+      // All batches succeeded - complete the embeddings (writes successful chunks, tracks failures)
       if (allSucceeded) {
-        const completionResult = await completeAllBatchesAtomically({
+        const completionResult = await completeBatches({
           payload,
           runId: input.runId,
           poolName,
@@ -333,6 +338,10 @@ export const createPollOrCompleteBulkEmbeddingTask = ({
             succeeded: completionResult.succeededCount,
             failed: completionResult.failedCount,
             error: completionResult.error,
+            failedChunkData:
+              completionResult.failedChunkData.length > 0
+                ? completionResult.failedChunkData
+                : undefined,
             completedAt: new Date().toISOString(),
           },
         })
@@ -343,12 +352,23 @@ export const createPollOrCompleteBulkEmbeddingTask = ({
           where: { run: { equals: (run as any).id } },
         })
 
-        // If completion failed, call onError so user can clean up provider resources
-        if (!completionResult.success && callbacks.onError) {
+        // Call onError if completion failed OR if there were partial chunk failures
+        if (callbacks.onError && (!completionResult.success || completionResult.failedCount > 0)) {
           const providerBatchIds = batches.map((b: any) => b.providerBatchId as string)
           await callbacks.onError({
             providerBatchIds,
-            error: new Error(completionResult.error || 'Completion failed'),
+            error: new Error(
+              completionResult.error ||
+                (completionResult.failedCount > 0
+                  ? `${completionResult.failedCount} chunk(s) failed during completion`
+                  : 'Completion failed'),
+            ),
+            failedChunkData:
+              completionResult.failedChunkData.length > 0
+                ? completionResult.failedChunkData
+                : undefined,
+            failedChunkCount:
+              completionResult.failedCount > 0 ? completionResult.failedCount : undefined,
           })
         }
 
@@ -540,9 +560,13 @@ async function streamAndBatchMissingEmbeddings(args: {
 }
 
 /**
- * Complete all batches atomically - download all outputs and write all embeddings
+ * Complete all batches - download all outputs and write successful embeddings.
+ *
+ * Note: This function writes partial results. If some chunks fail during completion,
+ * successful embeddings are still written. Only failed chunks are skipped.
+ * The operation is atomic in that if an exception is thrown, nothing is written.
  */
-async function completeAllBatchesAtomically(args: {
+async function completeBatches(args: {
   payload: Payload
   runId: string
   poolName: KnowledgePoolName
@@ -554,6 +578,7 @@ async function completeAllBatchesAtomically(args: {
   success: boolean
   succeededCount: number
   failedCount: number
+  failedChunkData: FailedChunkData[]
   error?: string
 }> {
   const { payload, runId, poolName, batches, callbacks } = args
@@ -571,9 +596,22 @@ async function completeAllBatchesAtomically(args: {
       allOutputs.push(...outputs)
     }
 
-    // Filter successful outputs
+    // Filter successful outputs and collect failed chunk data
     const successfulOutputs = allOutputs.filter((o) => !o.error && o.embedding)
-    const failedCount = allOutputs.length - successfulOutputs.length
+    const failedChunkData: FailedChunkData[] = []
+    for (const output of allOutputs) {
+      if (output.error) {
+        const meta = metadataById.get(output.id)
+        if (meta) {
+          failedChunkData.push({
+            collection: meta.sourceCollection,
+            documentId: meta.docId,
+            chunkIndex: meta.chunkIndex,
+          })
+        }
+      }
+    }
+    const failedCount = failedChunkData.length
 
     // Collect unique doc keys for deletion
     const docKeys = new Set<string>()
@@ -631,6 +669,7 @@ async function completeAllBatchesAtomically(args: {
       success: true,
       succeededCount: successfulOutputs.length,
       failedCount,
+      failedChunkData,
     }
   } catch (error) {
     const errorMessage = (error as Error).message || String(error)
@@ -638,6 +677,7 @@ async function completeAllBatchesAtomically(args: {
       success: false,
       succeededCount: 0,
       failedCount: 0,
+      failedChunkData: [],
       error: `Completion failed: ${errorMessage}`,
     }
   }
diff --git a/src/types.ts b/src/types.ts
index b62e335..2368a22 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -130,12 +130,26 @@ export type CompleteBatchArgs = {
   providerBatchId: string
 }
 
+/** Data about a failed chunk during bulk embedding completion */
+export type FailedChunkData = {
+  /** Source collection slug */
+  collection: string
+  /** Source document ID */
+  documentId: string
+  /** Index of the chunk within the document */
+  chunkIndex: number
+}
+
 /** Arguments passed to onError callback */
 export type OnBulkErrorArgs = {
   /** All provider batch IDs that were created during this run */
   providerBatchIds: string[]
   /** The error that caused the failure */
   error: Error
+  /** Optional: Data about chunks that failed during completion */
+  failedChunkData?: FailedChunkData[]
+  /** Optional: Count of failed chunks (for quick summary without iterating failedChunkData) */
+  failedChunkCount?: number
 }
 
 /**

From b60af9f652c7a0a425b0bb613dde54bcae638c9c Mon Sep 17 00:00:00 2001
From: techiejd <62455039+techiejd@users.noreply.github.com>
Date: Sun, 11 Jan 2026 20:00:31 +0700
Subject: [PATCH 25/49] WIP

---
 README.md                                     |  33 +-
 dev/helpers/embed.ts                          | 101 +--
 dev/payload.config.ts                         |  31 +
 dev/specs/bulkEmbed/concurrentRuns.spec.ts    | 121 +++
 dev/specs/bulkEmbed/failedBatch.spec.ts       |  50 +-
 dev/specs/e2e.spec.ts                         | 213 ++++-
 dev/specs/utils.ts                            |  40 +-
 dev/specs/vectorizedPayload.spec.ts           |  74 +-
 .../RetryFailedBatchButton/client.tsx         | 183 +++++
 .../RetryFailedBatchButton/index.tsx          |  56 ++
 src/collections/bulkEmbeddingInputMetadata.ts |   3 +
 src/collections/bulkEmbeddingsBatches.ts      |  13 +
 src/endpoints/bulkEmbed.ts                    | 133 +--
 src/endpoints/retryFailedBatch.ts             | 167 ++++
 src/exports/client.ts                         |   1 +
 src/index.ts                                  |  57 +-
 src/tasks/bulkEmbedAll.ts                     | 757 +++++++++++-------
 src/types.ts                                  |  71 +-
 18 files changed, 1618 insertions(+), 486 deletions(-)
 create mode 100644 dev/specs/bulkEmbed/concurrentRuns.spec.ts
 create mode 100644 src/admin/components/RetryFailedBatchButton/client.tsx
 create mode 100644 src/admin/components/RetryFailedBatchButton/index.tsx
 create mode 100644 src/endpoints/retryFailedBatch.ts

diff --git a/README.md b/README.md
index 7d87061..4b0fb91 100644
--- a/README.md
+++ b/README.md
@@ -224,7 +224,7 @@ if (isVectorizedPayload(payload)) {
 | `realtimeQueueName` | `string`                                                               | ❌       | Custom queue name for realtime vectorization jobs                           |
 | `bulkQueueNames`    | `{prepareBulkEmbedQueueName: string, pollOrCompleteQueueName: string}` | ❌       | Queue names for bulk embedding jobs (required if any pool uses bulk ingest) |
 | `endpointOverrides` | `object`                                                               | ❌       | Customize the search endpoint                                               |
-| `disabled`          | `boolean`                                                              | ❌       | Disable plugin while keeping schema                                         |
+| `disabled`          | `boolean`                                                              | ❌       | Disable plugin, except embeddings deletions, while keeping schema           |
 
 ### Knowledge Pool Config
 
@@ -251,6 +251,8 @@ If `realTimeIngestionFn` is provided, documents are embedded immediately on crea
 If only `bulkEmbeddingsFns` is provided (no `realTimeIngestionFn`), embedding only happens via manual bulk runs.
 If neither is provided, embedding is disabled for that pool.
 
+**Note:** Embedding deletion cannot be disabled. When a source document is deleted, all its embeddings are automatically deleted from all knowledge pools that contain that collection, regardless of how the embeddings were created (bulk or real-time). This behavior ensures data consistency and cannot be configured.
+
 ### Bulk Embeddings API
 
 The bulk embedding API is designed for large-scale embedding using provider batch APIs (like Voyage AI). **Bulk runs are never auto-queued** - they must be triggered manually via the admin UI or API.
@@ -262,8 +264,7 @@ The plugin streams chunks to your callbacks one at a time, giving you full contr
 ```typescript
 type BulkEmbeddingsFns = {
   addChunk: (args: AddChunkArgs) => Promise<BatchSubmission | null>
-  pollBatch: (args: PollBatchArgs) => Promise<PollBulkEmbeddingsResult>
-  completeBatch: (args: CompleteBatchArgs) => Promise<BulkEmbeddingOutput[]>
+  pollOrCompleteBatch: (args: PollOrCompleteBatchArgs) => Promise<PollBulkEmbeddingsResult>
   onError?: (args: OnBulkErrorArgs) => Promise<void>
 }
 ```
@@ -334,26 +335,21 @@ addChunk: async ({ chunk, isLastChunk }) => {
 
 **Note:** If a single chunk exceeds your provider's file size limit, you'll need to handle that edge case in your implementation (e.g., skip it, split it, or fail gracefully).
 
-#### `pollBatch` - Check Status
+#### `pollOrCompleteBatch` - Poll and Stream Results
 
-Called repeatedly until the batch reaches a terminal status.
+Called repeatedly until the batch reaches a terminal status. When the batch completes, stream the outputs via the `onChunk` callback.
 
 ```typescript
-type PollBatchArgs = { providerBatchId: string }
+type PollOrCompleteBatchArgs = {
+  providerBatchId: string
+  onChunk: (chunk: BulkEmbeddingOutput) => Promise<void>
+}
 
 type PollBulkEmbeddingsResult = {
   status: 'queued' | 'running' | 'succeeded' | 'failed' | 'canceled'
   counts?: { inputs?: number; succeeded?: number; failed?: number }
   error?: string
 }
-```
-
-#### `completeBatch` - Download Results
-
-Called after all batches succeed. Download the embeddings from your provider.
-
-```typescript
-type CompleteBatchArgs = { providerBatchId: string }
 
 type BulkEmbeddingOutput = {
   id: string // Must match the chunk.id from addChunk
@@ -362,11 +358,18 @@ type BulkEmbeddingOutput = {
 }
 ```
 
+**How it works:**
+
+1. The plugin calls `pollOrCompleteBatch` repeatedly for each batch
+2. While the batch is in progress, return the status (`queued` or `running`) without calling `onChunk`
+3. When the batch completes, stream each embedding result by calling `onChunk` for each output, then return `{ status: 'succeeded' }`
+4. If the batch fails, return `{ status: 'failed', error: '...' }` without calling `onChunk`
+
 **About the `id` field in outputs:**
 
 - **Correlation**: The `id` in each `BulkEmbeddingOutput` must match the `chunk.id` that was passed to `addChunk`. This is how the plugin correlates outputs back to their original inputs.
 - **Extraction**: When processing your provider's response, extract the `id` that you originally sent (e.g., from Voyage's `custom_id` field) and include it in the returned `BulkEmbeddingOutput`.
-- **Example**: If you sent `{ custom_id: "posts:123:0", input: [...] }` to your provider, extract `result.custom_id` from the response and return `{ id: result.custom_id, embedding: [...] }`.
+- **Example**: If you sent `{ custom_id: "posts:123:0", input: [...] }` to your provider, extract `result.custom_id` from the response and call `await onChunk({ id: result.custom_id, embedding: [...] })`.
 
 #### `onError` - Cleanup on Failure (Optional)
 
diff --git a/dev/helpers/embed.ts b/dev/helpers/embed.ts
index 20ca634..dfb5daf 100644
--- a/dev/helpers/embed.ts
+++ b/dev/helpers/embed.ts
@@ -183,7 +183,7 @@ export function makeVoyageBulkEmbeddingsConfig(): BulkEmbeddingsFns {
       return null
     },
 
-    pollBatch: async ({ providerBatchId }) => {
+    pollOrCompleteBatch: async ({ providerBatchId, onChunk }) => {
       try {
         const response = await fetch(`https://api.voyageai.com/v1/batches/${providerBatchId}`, {
           headers: {
@@ -221,71 +221,56 @@ export function makeVoyageBulkEmbeddingsConfig(): BulkEmbeddingsFns {
             status = 'running'
         }
 
-        // Store output file ID if available for later completion
-        if (batchData.output_file_id) {
-          batchOutputFiles.set(providerBatchId, batchData.output_file_id)
-        }
-
-        return {
-          status,
-        }
-      } catch (error) {
-        console.error('Voyage pollBatch error:', error)
-        return { status: 'failed', error: 'Failed to poll batch status' }
-      }
-    },
-
-    completeBatch: async ({ providerBatchId }) => {
-      try {
-        const outputFileId = batchOutputFiles.get(providerBatchId)
-        if (!outputFileId) {
-          throw new Error('No output file available for batch')
-        }
+        // If succeeded, download and stream outputs
+        if (status === 'succeeded') {
+          const outputFileId = batchData.output_file_id
+          if (!outputFileId) {
+            return { status: 'failed', error: 'No output file available for completed batch' }
+          }
 
-        // Download output file
-        const response = await fetch(`https://api.voyageai.com/v1/files/${outputFileId}/content`, {
-          headers: {
-            Authorization: `Bearer ${process.env.VOYAGE_API_KEY}`,
-          },
-        })
+          // Download output file
+          const downloadResponse = await fetch(
+            `https://api.voyageai.com/v1/files/${outputFileId}/content`,
+            {
+              headers: {
+                Authorization: `Bearer ${process.env.VOYAGE_API_KEY}`,
+              },
+            },
+          )
 
-        if (!response.ok) {
-          const error = await response.text()
-          throw new Error(`Failed to download output file: ${error}`)
-        }
+          if (!downloadResponse.ok) {
+            const error = await downloadResponse.text()
+            return { status: 'failed', error: `Failed to download output file: ${error}` }
+          }
 
-        const jsonlContent = await response.text()
-        const lines = jsonlContent.trim().split('\n')
-
-        const outputs: BulkEmbeddingOutput[] = []
-
-        for (const line of lines) {
-          if (!line.trim()) continue
-          try {
-            const result = JSON.parse(line)
-            if (result.error) {
-              outputs.push({
-                id: result.custom_id,
-                error: result.error.message || 'Unknown error',
-              })
-            } else {
-              outputs.push({
-                id: result.custom_id,
-                embedding: result.response.body.data[0].embedding,
-              })
+          const jsonlContent = await downloadResponse.text()
+          const lines = jsonlContent.trim().split('\n')
+
+          for (const line of lines) {
+            if (!line.trim()) continue
+            try {
+              const result = JSON.parse(line)
+              if (result.error) {
+                await onChunk({
+                  id: result.custom_id,
+                  error: result.error.message || 'Unknown error',
+                })
+              } else {
+                await onChunk({
+                  id: result.custom_id,
+                  embedding: result.response.body.data[0].embedding,
+                })
+              }
+            } catch (parseError) {
+              console.error('Failed to parse output line:', line, parseError)
             }
-          } catch (parseError) {
-            console.error('Failed to parse output line:', line, parseError)
           }
         }
 
-        // Clean up state
-        batchOutputFiles.delete(providerBatchId)
-
-        return outputs
+        return { status }
       } catch (error) {
-        console.error('Voyage completeBatch error:', error)
-        throw error
+        console.error('Voyage pollOrCompleteBatch error:', error)
+        return { status: 'failed', error: 'Failed to poll batch status' }
       }
     },
 
diff --git a/dev/payload.config.ts b/dev/payload.config.ts
index efc9723..41d82e2 100644
--- a/dev/payload.config.ts
+++ b/dev/payload.config.ts
@@ -59,6 +59,10 @@ const { afterSchemaInitHook, payloadcmsVectorize } = createVectorizeIntegration(
     dims,
     ivfflatLists,
   },
+  failingBulkDefault: {
+    dims,
+    ivfflatLists,
+  },
 })
 
 const buildConfigWithPostgres = async () => {
@@ -172,6 +176,33 @@ const buildConfigWithPostgres = async () => {
               bulkEmbeddingsFns,
             },
           },
+          failingBulkDefault: {
+            collections: {
+              posts: {
+                toKnowledgePool: async (doc, payload) => {
+                  const chunks: Array<{ chunk: string }> = []
+                  // Process title
+                  if (doc.title) {
+                    const titleChunks = chunkText(doc.title)
+                    chunks.push(...titleChunks.map((chunk) => ({ chunk })))
+                  }
+                  // Process content
+                  if (doc.content) {
+                    const contentChunks = await chunkRichText(doc.content, payload)
+                    chunks.push(...contentChunks.map((chunk) => ({ chunk })))
+                  }
+                  return chunks
+                },
+              },
+            },
+            embeddingConfig: {
+              version: testEmbeddingVersion,
+              queryFn: embedQuery,
+              bulkEmbeddingsFns: createMockBulkEmbeddings({
+                statusSequence: ['queued', 'running', 'failed'],
+              }),
+            },
+          },
         },
         bulkQueueNames: {
           prepareBulkEmbedQueueName: 'vectorize-bulk-prepare',
diff --git a/dev/specs/bulkEmbed/concurrentRuns.spec.ts b/dev/specs/bulkEmbed/concurrentRuns.spec.ts
new file mode 100644
index 0000000..289a202
--- /dev/null
+++ b/dev/specs/bulkEmbed/concurrentRuns.spec.ts
@@ -0,0 +1,121 @@
+import type { Payload } from 'payload'
+import { beforeAll, describe, expect, test } from 'vitest'
+import { BULK_EMBEDDINGS_RUNS_SLUG } from '../../../src/collections/bulkEmbeddingsRuns.js'
+import type { VectorizedPayload } from '../../../src/types.js'
+import {
+  BULK_QUEUE_NAMES,
+  DEFAULT_DIMS,
+  buildPayloadWithIntegration,
+  createMockBulkEmbeddings,
+  createTestDb,
+} from '../utils.js'
+import { makeDummyEmbedQuery, testEmbeddingVersion } from 'helpers/embed.js'
+
+const DIMS = DEFAULT_DIMS
+const dbName = `bulk_concurrent_${Date.now()}`
+
+describe('Bulk embed - concurrent runs prevention', () => {
+  let payload: VectorizedPayload<'default'>
+
+  beforeAll(async () => {
+    await createTestDb({ dbName })
+    const built = await buildPayloadWithIntegration({
+      dbName,
+      pluginOpts: {
+        knowledgePools: {
+          default: {
+            collections: {
+              posts: {
+                toKnowledgePool: async (doc: any) => [{ chunk: doc.title }],
+              },
+            },
+            embeddingConfig: {
+              version: testEmbeddingVersion,
+              queryFn: makeDummyEmbedQuery(DIMS),
+              bulkEmbeddingsFns: createMockBulkEmbeddings({
+                statusSequence: ['queued', 'running'],
+              }),
+            },
+          },
+        },
+        bulkQueueNames: BULK_QUEUE_NAMES,
+      },
+      secret: 'test-secret',
+      dims: DIMS,
+      key: `concurrent-${Date.now()}`,
+    })
+    payload = built.payload as VectorizedPayload<'default'>
+  })
+
+  test('cannot start concurrent bulk embed runs for the same pool', async () => {
+    // Create a test post first
+    await payload.create({
+      collection: 'posts',
+      data: { title: 'Concurrent test post' } as any,
+    })
+
+    // Create a run manually in 'running' status
+    const existingRun = await (payload as any).create({
+      collection: BULK_EMBEDDINGS_RUNS_SLUG,
+      data: {
+        pool: 'default',
+        embeddingVersion: testEmbeddingVersion,
+        status: 'running',
+      },
+    })
+
+    // Try to start another bulk embed for the same pool
+    const result = await payload.bulkEmbed({ knowledgePool: 'default' })
+
+    expect('conflict' in result && result.conflict).toBe(true)
+    expect(result.status).toBe('running')
+    expect(result.runId).toBe(String(existingRun.id))
+    expect('message' in result && result.message).toContain('already running')
+
+    // Cleanup: mark the run as succeeded so it doesn't interfere with other tests
+    await (payload as any).update({
+      collection: BULK_EMBEDDINGS_RUNS_SLUG,
+      id: existingRun.id,
+      data: {
+        status: 'succeeded',
+        completedAt: new Date().toISOString(),
+      },
+    })
+  })
+
+  test('can start bulk embed run after previous run completes', async () => {
+    // Create a test post
+    await payload.create({
+      collection: 'posts',
+      data: { title: 'Sequential test post' } as any,
+    })
+
+    // Create a completed run
+    await (payload as any).create({
+      collection: BULK_EMBEDDINGS_RUNS_SLUG,
+      data: {
+        pool: 'default',
+        embeddingVersion: testEmbeddingVersion,
+        status: 'succeeded',
+        completedAt: new Date().toISOString(),
+      },
+    })
+
+    // Should be able to start a new run for the same pool
+    const result = await payload.bulkEmbed({ knowledgePool: 'default' })
+
+    expect('conflict' in result).toBe(false)
+    expect(result.status).toBe('queued')
+    expect(result.runId).toBeDefined()
+
+    // Cleanup: mark the new run as succeeded
+    await (payload as any).update({
+      collection: BULK_EMBEDDINGS_RUNS_SLUG,
+      id: result.runId,
+      data: {
+        status: 'succeeded',
+        completedAt: new Date().toISOString(),
+      },
+    })
+  })
+})
diff --git a/dev/specs/bulkEmbed/failedBatch.spec.ts b/dev/specs/bulkEmbed/failedBatch.spec.ts
index 54c1877..2d56359 100644
--- a/dev/specs/bulkEmbed/failedBatch.spec.ts
+++ b/dev/specs/bulkEmbed/failedBatch.spec.ts
@@ -1,7 +1,9 @@
 import type { Payload } from 'payload'
 import { beforeAll, describe, expect, test } from 'vitest'
 import { BULK_EMBEDDINGS_RUNS_SLUG } from '../../../src/collections/bulkEmbeddingsRuns.js'
+import { BULK_EMBEDDINGS_BATCHES_SLUG } from '../../../src/collections/bulkEmbeddingsBatches.js'
 import { BULK_EMBEDDINGS_INPUT_METADATA_SLUG } from '../../../src/collections/bulkEmbeddingInputMetadata.js'
+import type { VectorizedPayload } from '../../../src/types.js'
 import {
   BULK_QUEUE_NAMES,
   DEFAULT_DIMS,
@@ -16,7 +18,7 @@ const DIMS = DEFAULT_DIMS
 const dbName = `bulk_failed_${Date.now()}`
 
 describe('Bulk embed - failed batch', () => {
-  let payload: Payload
+  let payload: VectorizedPayload<'default'>
 
   beforeAll(async () => {
     await createTestDb({ dbName })
@@ -43,7 +45,7 @@ describe('Bulk embed - failed batch', () => {
       dims: DIMS,
       key: `failed-${Date.now()}`,
     })
-    payload = built.payload
+    payload = built.payload as VectorizedPayload<'default'>
   })
 
   test('failed batch marks entire run as failed', async () => {
@@ -105,6 +107,48 @@ describe('Bulk embed - failed batch', () => {
     })
     expect(metadata.totalDocs).toBe(0)
   })
-})
 
+  test('cannot retry batch while run is still running', async () => {
+    // Create a run in 'running' status
+    const run = await (payload as any).create({
+      collection: BULK_EMBEDDINGS_RUNS_SLUG,
+      data: {
+        pool: 'default',
+        embeddingVersion: testEmbeddingVersion,
+        status: 'running',
+      },
+    })
+
+    // Create a failed batch for this running run
+    const batch = await (payload as any).create({
+      collection: BULK_EMBEDDINGS_BATCHES_SLUG,
+      data: {
+        run: run.id,
+        batchIndex: 0,
+        providerBatchId: `mock-failed-lock-test-${Date.now()}`,
+        status: 'failed',
+        inputCount: 1,
+        error: 'Test error for lock test',
+      },
+    })
+
+    // Try to retry the batch while run is running - should be rejected
+    const result = await payload.retryFailedBatch({ batchId: String(batch.id) })
+
+    expect('error' in result).toBe(true)
+    expect('conflict' in result && result.conflict).toBe(true)
+    if ('error' in result) {
+      expect(result.error).toContain('Cannot retry batch while run is running')
+    }
 
+    // Cleanup: mark the run as failed so the batch can be retried in the future
+    await (payload as any).update({
+      collection: BULK_EMBEDDINGS_RUNS_SLUG,
+      id: run.id,
+      data: {
+        status: 'failed',
+        completedAt: new Date().toISOString(),
+      },
+    })
+  })
+})
diff --git a/dev/specs/e2e.spec.ts b/dev/specs/e2e.spec.ts
index baaf4fb..1706ea7 100644
--- a/dev/specs/e2e.spec.ts
+++ b/dev/specs/e2e.spec.ts
@@ -3,9 +3,11 @@ import type { Payload, SanitizedConfig } from 'payload'
 import config from '@payload-config'
 import { getPayload } from 'payload'
 import { getInitialMarkdownContent } from './constants.js'
-import { waitForVectorizationJobs } from './utils.js'
+import { waitForVectorizationJobs, waitForBulkJobs } from './utils.js'
 import { testEmbeddingVersion } from 'helpers/embed.js'
 import { devUser } from 'helpers/credentials.js'
+import { BULK_EMBEDDINGS_RUNS_SLUG } from '../../src/collections/bulkEmbeddingsRuns.js'
+import { BULK_EMBEDDINGS_BATCHES_SLUG } from '../../src/collections/bulkEmbeddingsBatches.js'
 
 // Helper function to log in to the admin panel
 const loginToAdmin = async (page: any) => {
@@ -222,6 +224,54 @@ test.describe('Vector embedding e2e tests', () => {
       },
     })
     await expectVectorSearchResponse(filledResponse, post, title)
+
+    // Get the run ID from the current URL
+    const runUrl = page.url()
+    const runIdMatch = runUrl.match(/\/(\d+)$/)
+    const bulkRunId = runIdMatch ? runIdMatch[1] : null
+    expect(bulkRunId).not.toBeNull()
+    console.log('[test] Bulk run ID:', bulkRunId)
+
+    // Find the succeeded batch that was created
+    const succeededBatches = await (payload as any).find({
+      collection: BULK_EMBEDDINGS_BATCHES_SLUG,
+      where: {
+        and: [{ run: { equals: bulkRunId } }, { status: { equals: 'succeeded' } }],
+      },
+    })
+    expect(succeededBatches.totalDocs).toBeGreaterThan(0)
+    const succeededBatch = succeededBatches.docs[0]
+    console.log('[test] Found succeeded batch:', succeededBatch.id)
+
+    // Test: Retry endpoint returns 400 for succeeded batch
+    const succeededRetryResponse = await request.post('/api/vector-retry-failed-batch', {
+      data: { batchId: String(succeededBatch.id) },
+    })
+    expect(succeededRetryResponse.status()).toBe(400)
+    const succeededRetryJson = await succeededRetryResponse.json()
+    expect(succeededRetryJson.error).toContain('not in failed status')
+    console.log('[test] Retry endpoint correctly rejected succeeded batch')
+
+    // Navigate to the succeeded batch page and verify retry button is disabled
+    console.log('[test] Navigating to succeeded batch page...')
+    await page.goto(`/admin/collections/${BULK_EMBEDDINGS_BATCHES_SLUG}/${succeededBatch.id}`, {
+      waitUntil: 'networkidle',
+    })
+    await page.waitForLoadState('domcontentloaded')
+
+    // Look for the retry button - it should be present but disabled
+    const retryButton = page.locator('[data-testid="retry-failed-batch-button"]')
+    await expect(retryButton).toBeVisible({ timeout: 15000 })
+
+    // Verify the button is disabled (opacity check)
+    const buttonStyle = await retryButton.getAttribute('style')
+    expect(buttonStyle).toContain('opacity: 0.5')
+
+    // Verify the "Retry Not Available" message is shown
+    const notAvailableMessage = page.locator('text=/Retry Not Available/i')
+    await expect(notAvailableMessage).toBeVisible({ timeout: 5000 })
+
+    console.log('[test] Retry button correctly disabled for succeeded batch!')
   })
 
   test('clicking expand section on default collection shows not enabled message', async ({
@@ -272,4 +322,165 @@ test.describe('Vector embedding e2e tests', () => {
     const configMessage = page.locator('text=/bulkEmbeddingsFns/i')
     await expect(configMessage).toBeVisible({ timeout: 5000 })
   })
+
+  test('retry failed batch endpoint returns 404 for non-existent batch', async ({ request }) => {
+    console.log('[test] Testing non-existent batch retry...')
+
+    const nonExistentResponse = await request.post('/api/vector-retry-failed-batch', {
+      data: { batchId: '999999' },
+    })
+    expect(nonExistentResponse.status()).toBe(404)
+
+    console.log('[test] Non-existent batch test completed!')
+  })
+
+  test('retry failed batch endpoint works correctly', async ({ request }) => {
+    console.log('[test] Starting retry failed batch endpoint test...')
+
+    // Create a test post first (needed for bulk embedding to have something to embed)
+    const post = await payload.create({
+      collection: 'posts',
+      data: {
+        title: 'Failed batch test post',
+      },
+    })
+    console.log('[test] Created test post:', post.id)
+
+    // Use the bulk embed endpoint to create a run for failingBulkDefault pool
+    const bulkEmbedResponse = await request.post('/api/vector-bulk-embed', {
+      data: {
+        knowledgePool: 'failingBulkDefault',
+      },
+    })
+    expect(bulkEmbedResponse.ok()).toBe(true)
+    const bulkEmbedJson = await bulkEmbedResponse.json()
+    const runId = bulkEmbedJson.runId
+    console.log('[test] Created bulk run via endpoint:', runId)
+
+    // Wait for the bulk jobs to process and fail (failingBulkDefault has a mock that fails)
+    await waitForBulkJobs(payload, 30000)
+    console.log('[test] Bulk jobs completed')
+
+    // Find the failed batch that was created
+    const batches = await (payload as any).find({
+      collection: BULK_EMBEDDINGS_BATCHES_SLUG,
+      where: {
+        and: [{ run: { equals: runId } }, { status: { equals: 'failed' } }],
+      },
+    })
+    expect(batches.totalDocs).toBeGreaterThan(0)
+    const batch = batches.docs[0]
+    console.log('[test] Found failed batch:', batch.id)
+
+    // Retry the failed batch (should succeed)
+    const retryResponse = await request.post('/api/vector-retry-failed-batch', {
+      data: { batchId: String(batch.id) },
+    })
+    expect(retryResponse.status()).toBe(202)
+    const retryJson = await retryResponse.json()
+    expect(retryJson.message).toBe('Failed batch has been re-queued for processing')
+    expect(retryJson.batchId).toBe(String(batch.id))
+    expect(retryJson.status).toBe('queued')
+
+    // Verify the batch status was updated
+    const updatedBatch = await (payload as any).findByID({
+      collection: BULK_EMBEDDINGS_BATCHES_SLUG,
+      id: String(batch.id),
+    })
+    expect(updatedBatch.status).toBe('queued')
+    expect(updatedBatch.error).toBeNull()
+
+    // Verify the run status was reset to running
+    const updatedRun = await (payload as any).findByID({
+      collection: BULK_EMBEDDINGS_RUNS_SLUG,
+      id: runId,
+    })
+    expect((updatedRun as any).status).toBe('running')
+
+    console.log('[test] Retry failed batch endpoint test completed successfully!')
+  })
+
+  test('retry failed batch button works for failed batches', async ({ page }) => {
+    console.log('[test] Starting retry button click test...')
+    test.setTimeout(120000)
+
+    // Login first
+    await loginToAdmin(page)
+
+    // Create a bulk embedding run
+    const run = await (payload as any).create({
+      collection: BULK_EMBEDDINGS_RUNS_SLUG,
+      data: {
+        pool: 'failingBulkDefault',
+        embeddingVersion: testEmbeddingVersion,
+        status: 'failed',
+      },
+    })
+    console.log('[test] Created bulk run:', run.id)
+
+    // Create a failed batch
+    const failedBatch = await (payload as any).create({
+      collection: BULK_EMBEDDINGS_BATCHES_SLUG,
+      data: {
+        run: run.id,
+        batchIndex: 0,
+        providerBatchId: `mock-failed-ui-${Date.now()}`,
+        status: 'failed',
+        inputCount: 1,
+        error: 'Test error for UI test',
+      },
+    })
+    console.log('[test] Created failed batch:', failedBatch.id)
+
+    // Navigate to the failed batch edit page
+    console.log('[test] Navigating to failed batch page...')
+    await page.goto(`/admin/collections/${BULK_EMBEDDINGS_BATCHES_SLUG}/${failedBatch.id}`, {
+      waitUntil: 'networkidle',
+    })
+    await page.waitForLoadState('domcontentloaded')
+
+    // Look for the retry button
+    const retryButton = page.locator('[data-testid="retry-failed-batch-button"]')
+    await expect(retryButton).toBeVisible({ timeout: 15000 })
+
+    // Verify the "Retry Failed Batch" message is shown (not "Retry Not Available")
+    const retryMessage = page.locator('text=/Retry Failed Batch/i')
+    await expect(retryMessage).toBeVisible({ timeout: 5000 })
+
+    // Verify the button is NOT disabled
+    const buttonStyle = await retryButton.getAttribute('style')
+    expect(buttonStyle).not.toContain('opacity: 0.5')
+
+    // Click the retry button
+    console.log('[test] Clicking retry button...')
+    await retryButton.click()
+
+    // Wait for success message
+    const successMessage = page.locator('text=/Batch re-queued successfully/i')
+    await expect(successMessage).toBeVisible({ timeout: 10000 })
+
+    console.log('[test] Retry button click test completed!')
+
+    // Wait a bit for the page reload
+    await page.waitForTimeout(2000)
+
+    // Verify we're still on the batch page after reload
+    await page.waitForURL(/\/admin\/collections\/vector-bulk-embeddings-batches\/\d+/)
+
+    console.log('[test] Retry failed batch button test completed successfully!')
+  })
+
+  test('missing batchId returns 400 error', async ({ request }) => {
+    console.log('[test] Testing missing batchId...')
+
+    const response = await request.post('/api/vector-retry-failed-batch', {
+      data: {},
+    })
+
+    expect(response.status()).toBe(400)
+    const json = await response.json()
+    expect(json.error).toContain('batchId is required')
+
+    console.log('[test] Missing batchId test completed!')
+  })
 })
diff --git a/dev/specs/utils.ts b/dev/specs/utils.ts
index d3d01ce..b44643c 100644
--- a/dev/specs/utils.ts
+++ b/dev/specs/utils.ts
@@ -129,31 +129,31 @@ export function createMockBulkEmbeddings(
       return null
     },
 
-    pollBatch: async ({ providerBatchId }) => {
+    pollOrCompleteBatch: async ({ providerBatchId, onChunk }) => {
       const callCount = batchPollCount.get(providerBatchId) ?? 0
       batchPollCount.set(providerBatchId, callCount + 1)
       const status = statusSequence[Math.min(callCount, statusSequence.length - 1)]
-      return {
-        status,
-      }
-    },
 
-    completeBatch: async ({ providerBatchId }) => {
-      const inputs = batchInputs.get(providerBatchId) ?? []
-      if (!inputs.length) {
-        return []
+      // If succeeded, stream the outputs via onChunk
+      if (status === 'succeeded') {
+        const inputs = batchInputs.get(providerBatchId) ?? []
+        if (inputs.length) {
+          const vectors = await embeddings(inputs.map((i) => i.text))
+          for (let idx = 0; idx < inputs.length; idx++) {
+            const input = inputs[idx]
+            const shouldFail = partialFailure?.failIds?.includes(input.id)
+            const output = shouldFail
+              ? { id: input.id, error: 'fail' }
+              : { id: input.id, embedding: vectors[idx] }
+            await onChunk(output)
+          }
+        }
+        // Clean up state
+        batchInputs.delete(providerBatchId)
+        batchPollCount.delete(providerBatchId)
       }
-      const vectors = await embeddings(inputs.map((i) => i.text))
-      const outputs = inputs.map((input, idx) => {
-        const shouldFail = partialFailure?.failIds?.includes(input.id)
-        return shouldFail
-          ? { id: input.id, error: 'fail' }
-          : { id: input.id, embedding: vectors[idx] }
-      })
-      // Clean up state
-      batchInputs.delete(providerBatchId)
-      batchPollCount.delete(providerBatchId)
-      return outputs
+
+      return { status }
     },
 
     onError: async ({ providerBatchIds, error }) => {
diff --git a/dev/specs/vectorizedPayload.spec.ts b/dev/specs/vectorizedPayload.spec.ts
index 6424253..e717909 100644
--- a/dev/specs/vectorizedPayload.spec.ts
+++ b/dev/specs/vectorizedPayload.spec.ts
@@ -79,9 +79,11 @@ describe('VectorizedPayload', () => {
                   },
                 },
               },
-              embedDocs: makeDummyEmbedDocs(DIMS),
-              embedQuery: makeDummyEmbedQuery(DIMS),
-              embeddingVersion: testEmbeddingVersion,
+              embeddingConfig: {
+                version: testEmbeddingVersion,
+                queryFn: makeDummyEmbedQuery(DIMS),
+                realTimeIngestionFn: makeDummyEmbedDocs(DIMS),
+              },
             },
           },
         }),
@@ -97,12 +99,42 @@ describe('VectorizedPayload', () => {
     })
 
     test('returns false for a plain object without search method', () => {
-      const plainObj = { queueEmbed: () => Promise.resolve() } as unknown as Payload
+      const plainObj = {
+        _isBulkEmbedEnabled: () => false,
+        queueEmbed: () => Promise.resolve(),
+        bulkEmbed: () => Promise.resolve({}),
+        retryFailedBatch: () => Promise.resolve({}),
+      } as unknown as Payload
       expect(isVectorizedPayload(plainObj)).toBe(false)
     })
 
     test('returns false for a plain object without queueEmbed method', () => {
-      const plainObj = { search: () => Promise.resolve([]) } as unknown as Payload
+      const plainObj = {
+        _isBulkEmbedEnabled: () => false,
+        search: () => Promise.resolve([]),
+        bulkEmbed: () => Promise.resolve({}),
+        retryFailedBatch: () => Promise.resolve({}),
+      } as unknown as Payload
+      expect(isVectorizedPayload(plainObj)).toBe(false)
+    })
+
+    test('returns false for a plain object without bulkEmbed method', () => {
+      const plainObj = {
+        _isBulkEmbedEnabled: () => false,
+        search: () => Promise.resolve([]),
+        queueEmbed: () => Promise.resolve(),
+        retryFailedBatch: () => Promise.resolve({}),
+      } as unknown as Payload
+      expect(isVectorizedPayload(plainObj)).toBe(false)
+    })
+
+    test('returns false for a plain object without retryFailedBatch method', () => {
+      const plainObj = {
+        _isBulkEmbedEnabled: () => false,
+        search: () => Promise.resolve([]),
+        queueEmbed: () => Promise.resolve(),
+        bulkEmbed: () => Promise.resolve({}),
+      } as unknown as Payload
       expect(isVectorizedPayload(plainObj)).toBe(false)
     })
 
@@ -236,4 +268,36 @@ describe('VectorizedPayload', () => {
       expect(pendingJobs.totalDocs).toBeGreaterThan(0)
     })
   })
+
+  describe('bulkEmbed method', () => {
+    test('payload has bulkEmbed method', () => {
+      expect(typeof (payload as VectorizedPayload).bulkEmbed).toBe('function')
+    })
+
+    test('bulkEmbed throws error when bulk embedding not configured', async () => {
+      const vectorizedPayload = payload as VectorizedPayload<'default'>
+
+      // This pool doesn't have bulkEmbeddingsFns configured
+      await expect(vectorizedPayload.bulkEmbed({ knowledgePool: 'default' })).rejects.toThrow(
+        'does not have bulk embedding configured',
+      )
+    })
+  })
+
+  describe('retryFailedBatch method', () => {
+    test('payload has retryFailedBatch method', () => {
+      expect(typeof (payload as VectorizedPayload).retryFailedBatch).toBe('function')
+    })
+
+    test('retryFailedBatch returns error for non-existent batch', async () => {
+      const vectorizedPayload = payload as VectorizedPayload
+
+      const result = await vectorizedPayload.retryFailedBatch({ batchId: '999999' })
+
+      expect('error' in result).toBe(true)
+      if ('error' in result) {
+        expect(result.error).toContain('not found')
+      }
+    })
+  })
 })
diff --git a/src/admin/components/RetryFailedBatchButton/client.tsx b/src/admin/components/RetryFailedBatchButton/client.tsx
new file mode 100644
index 0000000..40a4374
--- /dev/null
+++ b/src/admin/components/RetryFailedBatchButton/client.tsx
@@ -0,0 +1,183 @@
+'use client'
+
+import React, { useState } from 'react'
+
+type RetryFailedBatchButtonClientProps = {
+  batchId: string
+  status: string
+}
+
+export const RetryFailedBatchButtonClient: React.FC<RetryFailedBatchButtonClientProps> = ({
+  batchId,
+  status,
+}) => {
+  const [isSubmitting, setIsSubmitting] = useState(false)
+  const [message, setMessage] = useState<{ text: string; error?: boolean } | null>(null)
+
+  const isDisabled = status !== 'failed'
+
+  const handleClick = async () => {
+    if (isDisabled) return
+
+    setIsSubmitting(true)
+    setMessage(null)
+
+    try {
+      const res = await fetch('/api/vector-retry-failed-batch', {
+        method: 'POST',
+        headers: {
+          'Content-Type': 'application/json',
+        },
+        body: JSON.stringify({ batchId }),
+      })
+
+      const data = await res.json()
+
+      if (!res.ok) {
+        setMessage({ text: data?.error || 'Failed to retry batch', error: true })
+        return
+      }
+
+      setMessage({ text: 'Batch re-queued successfully', error: false })
+
+      // Reload the page after a short delay to show the updated status
+      setTimeout(() => {
+        window.location.reload()
+      }, 1500)
+    } catch (error: any) {
+      setMessage({ text: error?.message || 'Failed to retry batch', error: true })
+    } finally {
+      setIsSubmitting(false)
+    }
+  }
+
+  return (
+    <div
+      style={{
+        marginBottom: '1rem',
+        padding: '1rem',
+        backgroundColor: isDisabled ? '#f8f9fa' : '#fff5f5',
+        borderRadius: '4px',
+        border: `1px solid ${isDisabled ? '#e9ecef' : '#fecaca'}`,
+      }}
+    >
+      <div style={{ display: 'flex', alignItems: 'center', gap: '1rem', flexWrap: 'wrap' }}>
+        <div style={{ flex: 1, minWidth: '200px' }}>
+          <h4
+            style={{
+              fontSize: '0.875rem',
+              fontWeight: 600,
+              margin: '0 0 0.25rem 0',
+              color: isDisabled ? '#6c757d' : '#c92a2a',
+            }}
+          >
+            {isDisabled ? 'Retry Not Available' : 'Retry Failed Batch'}
+          </h4>
+          <p
+            style={{
+              fontSize: '0.8125rem',
+              color: '#6c757d',
+              margin: 0,
+            }}
+          >
+            {isDisabled
+              ? `This batch is in "${status}" status. Retry is only available for failed batches.`
+              : 'Re-queue this failed batch for processing. The batch will be polled again and embeddings will be written for successful chunks.'}
+          </p>
+        </div>
+
+        <button
+          type="button"
+          className={`btn ${isDisabled ? 'btn--style-secondary' : 'btn--style-primary'}`}
+          onClick={handleClick}
+          disabled={isDisabled || isSubmitting}
+          data-testid="retry-failed-batch-button"
+          style={{
+            minWidth: '120px',
+            display: 'inline-flex',
+            alignItems: 'center',
+            justifyContent: 'center',
+            gap: '0.5rem',
+            opacity: isDisabled ? 0.5 : 1,
+            cursor: isDisabled ? 'not-allowed' : 'pointer',
+          }}
+        >
+          {isSubmitting ? (
+            <>
+              <span
+                style={{
+                  display: 'inline-block',
+                  width: '12px',
+                  height: '12px',
+                  border: '2px solid currentColor',
+                  borderTopColor: 'transparent',
+                  borderRadius: '50%',
+                  animation: 'spin 0.6s linear infinite',
+                }}
+              />
+              Retrying...
+            </>
+          ) : (
+            <>
+              <svg
+                width="16"
+                height="16"
+                viewBox="0 0 16 16"
+                fill="none"
+                xmlns="http://www.w3.org/2000/svg"
+                style={{ flexShrink: 0 }}
+              >
+                <path
+                  d="M2 8C2 4.68629 4.68629 2 8 2C10.0503 2 11.8711 3.0016 13 4.54329M14 8C14 11.3137 11.3137 14 8 14C5.94975 14 4.12893 12.9984 3 11.4567M3 14V11.4567M3 11.4567H5.5M13 2V4.54329M13 4.54329H10.5"
+                  stroke="currentColor"
+                  strokeWidth="1.5"
+                  strokeLinecap="round"
+                  strokeLinejoin="round"
+                />
+              </svg>
+              Retry
+            </>
+          )}
+        </button>
+      </div>
+
+      {message && (
+        <div
+          style={{
+            marginTop: '0.75rem',
+            display: 'inline-flex',
+            alignItems: 'center',
+            gap: '0.5rem',
+            padding: '0.5rem 0.75rem',
+            borderRadius: '4px',
+            fontSize: '0.875rem',
+            backgroundColor: message.error ? '#fff5f5' : '#f0f9ff',
+            color: message.error ? '#c92a2a' : '#0c4a6e',
+            border: `1px solid ${message.error ? '#fecaca' : '#bae6fd'}`,
+          }}
+        >
+          {message.error ? (
+            <svg width="16" height="16" viewBox="0 0 16 16" fill="none" xmlns="http://www.w3.org/2000/svg">
+              <circle cx="8" cy="8" r="7" stroke="currentColor" strokeWidth="1.5" />
+              <path d="M8 5V8M8 11H8.01" stroke="currentColor" strokeWidth="1.5" strokeLinecap="round" />
+            </svg>
+          ) : (
+            <svg width="16" height="16" viewBox="0 0 16 16" fill="none" xmlns="http://www.w3.org/2000/svg">
+              <circle cx="8" cy="8" r="7" stroke="currentColor" strokeWidth="1.5" />
+              <path d="M6 8L7.5 9.5L10 7" stroke="currentColor" strokeWidth="1.5" strokeLinecap="round" strokeLinejoin="round" />
+            </svg>
+          )}
+          <span>{message.text}</span>
+        </div>
+      )}
+
+      <style>
+        {`
+          @keyframes spin {
+            to { transform: rotate(360deg); }
+          }
+        `}
+      </style>
+    </div>
+  )
+}
diff --git a/src/admin/components/RetryFailedBatchButton/index.tsx b/src/admin/components/RetryFailedBatchButton/index.tsx
new file mode 100644
index 0000000..b06b00d
--- /dev/null
+++ b/src/admin/components/RetryFailedBatchButton/index.tsx
@@ -0,0 +1,56 @@
+import React from 'react'
+import { RetryFailedBatchButtonClient } from './client.js'
+
+type RetryFailedBatchButtonProps = {
+  batchId: string
+  status: string
+}
+
+export const RetryFailedBatchButton: React.FC<
+  RetryFailedBatchButtonProps & { payload?: any; params?: any; data?: any }
+> = (props) => {
+  // Handle both direct props and serverProps functions
+  let batchId: string = ''
+  let status: string = ''
+
+  if (typeof props.batchId === 'function') {
+    try {
+      batchId = String(
+        (props.batchId as any)({ payload: props.payload, params: props.params, data: props.data }) ||
+          '',
+      )
+    } catch (error) {
+      console.error('[RetryFailedBatchButton] Error calling batchId:', error)
+      batchId = ''
+    }
+  } else if (props.data?.id) {
+    batchId = String(props.data.id)
+  } else {
+    batchId = String(props.batchId || '')
+  }
+
+  if (typeof props.status === 'function') {
+    try {
+      status = String(
+        (props.status as any)({ payload: props.payload, params: props.params, data: props.data }) ||
+          '',
+      )
+    } catch (error) {
+      console.error('[RetryFailedBatchButton] Error calling status:', error)
+      status = ''
+    }
+  } else if (props.data?.status) {
+    status = String(props.data.status)
+  } else {
+    status = String(props.status || '')
+  }
+
+  // Only render on the edit view (when we have a batchId)
+  if (!batchId) {
+    return null
+  }
+
+  return <RetryFailedBatchButtonClient batchId={batchId} status={status} />
+}
+
+export default RetryFailedBatchButton
diff --git a/src/collections/bulkEmbeddingInputMetadata.ts b/src/collections/bulkEmbeddingInputMetadata.ts
index 22263fe..29472c1 100644
--- a/src/collections/bulkEmbeddingInputMetadata.ts
+++ b/src/collections/bulkEmbeddingInputMetadata.ts
@@ -71,5 +71,8 @@ export const createBulkEmbeddingInputMetadataCollection = (): CollectionConfig =
     {
       fields: ['run'],
     },
+    {
+      fields: ['sourceCollection', 'docId'],
+    },
   ],
 })
diff --git a/src/collections/bulkEmbeddingsBatches.ts b/src/collections/bulkEmbeddingsBatches.ts
index 21e89e3..219e1ba 100644
--- a/src/collections/bulkEmbeddingsBatches.ts
+++ b/src/collections/bulkEmbeddingsBatches.ts
@@ -22,6 +22,19 @@ export const createBulkEmbeddingsBatchesCollection = (): CollectionConfig => ({
     description:
       'Individual batches within a bulk embedding run. Created when input count exceeds file limits.',
     defaultColumns: ['run', 'batchIndex', 'status', 'inputCount', 'succeededCount', 'failedCount'],
+    components: {
+      edit: {
+        beforeDocumentControls: [
+          {
+            path: 'payloadcms-vectorize/client#RetryFailedBatchButton',
+            serverProps: {
+              batchId: ({ data }: { data: any }) => data?.id,
+              status: ({ data }: { data: any }) => data?.status,
+            },
+          },
+        ],
+      },
+    },
   },
   access: {
     // Anyone can read; only internal (local API) can mutate.
diff --git a/src/endpoints/bulkEmbed.ts b/src/endpoints/bulkEmbed.ts
index a3f0f5b..d3d5503 100644
--- a/src/endpoints/bulkEmbed.ts
+++ b/src/endpoints/bulkEmbed.ts
@@ -1,6 +1,70 @@
-import type { PayloadHandler } from 'payload'
+import type { Payload, PayloadHandler } from 'payload'
 import { BULK_EMBEDDINGS_RUNS_SLUG } from '../collections/bulkEmbeddingsRuns.js'
-import type { KnowledgePoolDynamicConfig, KnowledgePoolName } from '../types.js'
+import type { BulkEmbedResult, KnowledgePoolDynamicConfig, KnowledgePoolName } from '../types.js'
+
+/**
+ * Core logic for starting a bulk embed run.
+ * Used by both the HTTP handler and VectorizedPayload.bulkEmbed method.
+ */
+export async function startBulkEmbed<
+  TPoolNames extends KnowledgePoolName = KnowledgePoolName,
+>(args: {
+  payload: Payload
+  knowledgePool: TPoolNames
+  knowledgePools: Record<TPoolNames, KnowledgePoolDynamicConfig>
+  queueName?: string
+}): Promise<BulkEmbedResult> {
+  const { payload, knowledgePool, knowledgePools, queueName } = args
+
+  const poolConfig = knowledgePools[knowledgePool]
+  if (!poolConfig) {
+    throw new Error(`[payloadcms-vectorize] Knowledge pool "${knowledgePool}" not found`)
+  }
+  if (!poolConfig.embeddingConfig.bulkEmbeddingsFns) {
+    throw new Error(
+      `[payloadcms-vectorize] Knowledge pool "${knowledgePool}" does not have bulk embedding configured`,
+    )
+  }
+
+  // Check for existing non-terminal run for this pool
+  const existingActiveRun = await payload.find({
+    collection: BULK_EMBEDDINGS_RUNS_SLUG,
+    where: {
+      and: [{ pool: { equals: knowledgePool } }, { status: { in: ['queued', 'running'] } }],
+    },
+    limit: 1,
+  })
+
+  if (existingActiveRun.totalDocs > 0) {
+    const existing = existingActiveRun.docs[0] as any
+    return {
+      runId: String(existing.id),
+      status: existing.status,
+      message: `A bulk embedding run is already ${existing.status} for this knowledge pool. Wait for it to complete or cancel it first.`,
+      conflict: true,
+    }
+  }
+
+  const run = await payload.create({
+    collection: BULK_EMBEDDINGS_RUNS_SLUG,
+    data: {
+      pool: knowledgePool,
+      embeddingVersion: poolConfig.embeddingConfig.version,
+      status: 'queued',
+    },
+  })
+
+  await payload.jobs.queue<'payloadcms-vectorize:prepare-bulk-embedding'>({
+    task: 'payloadcms-vectorize:prepare-bulk-embedding',
+    input: { runId: String(run.id) },
+    ...(queueName ? { queue: queueName } : {}),
+  })
+
+  return {
+    runId: String(run.id),
+    status: 'queued',
+  }
+}
 
 export const createBulkEmbedHandler = (
   knowledgePools: Record<KnowledgePoolName, KnowledgePoolDynamicConfig>,
@@ -19,64 +83,31 @@ export const createBulkEmbedHandler = (
           { status: 400 },
         )
       }
-      const poolConfig = knowledgePools[knowledgePool]
-      if (!poolConfig) {
-        return Response.json(
-          { error: `Knowledge pool "${knowledgePool}" not found` },
-          { status: 400 },
-        )
-      }
 
-      const payload = req.payload
-
-      // Check for existing queued run for this pool - return it instead of creating a new one
-      const existingQueuedRun = await payload.find({
-        collection: BULK_EMBEDDINGS_RUNS_SLUG,
-        where: {
-          and: [{ pool: { equals: knowledgePool } }, { status: { equals: 'queued' } }],
-        },
-        limit: 1,
+      const result = await startBulkEmbed({
+        payload: req.payload,
+        knowledgePool,
+        knowledgePools,
+        queueName,
       })
 
-      if (existingQueuedRun.totalDocs > 0) {
-        const existing = existingQueuedRun.docs[0] as any
-        return Response.json(
-          {
-            runId: String(existing.id),
-            status: existing.status,
-            message: `A bulk embedding run is already queued for this knowledge pool`,
-          },
-          { status: 200 },
-        )
+      if ('conflict' in result && result.conflict) {
+        return Response.json(result, { status: 409 })
       }
 
-      const run = await payload.create({
-        collection: BULK_EMBEDDINGS_RUNS_SLUG,
-        data: {
-          pool: knowledgePool,
-          embeddingVersion: poolConfig.embeddingConfig.version,
-          status: 'queued',
-        },
-      })
-
-      await payload.jobs.queue<'payloadcms-vectorize:prepare-bulk-embedding'>({
-        task: 'payloadcms-vectorize:prepare-bulk-embedding',
-        input: {
-          runId: String(run.id),
-        },
-        req,
-        ...(queueName ? { queue: queueName } : {}),
-      })
-
+      return Response.json(result, { status: 202 })
+    } catch (error) {
+      const errorMessage = (error as Error).message || String(error)
+      req.payload.logger.error(
+        `[payloadcms-vectorize] Failed to queue bulk embed run: ${errorMessage}`,
+      )
       return Response.json(
         {
-          runId: String(run.id),
-          status: 'queued',
+          error: 'Failed to queue bulk embed run',
+          details: errorMessage,
         },
-        { status: 202 },
+        { status: 500 },
       )
-    } catch (error) {
-      return Response.json({ error: 'Failed to queue bulk embed run' }, { status: 500 })
     }
   }
   return handler
diff --git a/src/endpoints/retryFailedBatch.ts b/src/endpoints/retryFailedBatch.ts
new file mode 100644
index 0000000..d66cb61
--- /dev/null
+++ b/src/endpoints/retryFailedBatch.ts
@@ -0,0 +1,167 @@
+import type { Payload, PayloadHandler } from 'payload'
+import { BULK_EMBEDDINGS_BATCHES_SLUG } from '../collections/bulkEmbeddingsBatches.js'
+import { BULK_EMBEDDINGS_RUNS_SLUG } from '../collections/bulkEmbeddingsRuns.js'
+import type {
+  KnowledgePoolDynamicConfig,
+  KnowledgePoolName,
+  RetryFailedBatchResult,
+} from '../types.js'
+
+/**
+ * Core logic for retrying a failed batch.
+ * Used by both the HTTP handler and VectorizedPayload.retryFailedBatch method.
+ */
+export async function retryBatch<TPoolNames extends KnowledgePoolName = KnowledgePoolName>(args: {
+  payload: Payload
+  batchId: string
+  knowledgePools: Record<TPoolNames, KnowledgePoolDynamicConfig>
+  queueName?: string
+}): Promise<RetryFailedBatchResult> {
+  const { payload, batchId, knowledgePools, queueName } = args
+
+  // Find the batch
+  let batch: any
+  try {
+    batch = await payload.findByID({
+      collection: BULK_EMBEDDINGS_BATCHES_SLUG,
+      id: batchId,
+    })
+  } catch {
+    return { error: `Batch "${batchId}" not found` }
+  }
+
+  if (!batch) {
+    return { error: `Batch "${batchId}" not found` }
+  }
+
+  // Verify batch has failed status
+  if (batch.status !== 'failed') {
+    return {
+      error: `Batch "${batchId}" is not in failed status. Current status: ${batch.status}`,
+    }
+  }
+
+  // Get the parent run
+  const runId = typeof batch.run === 'object' ? batch.run.id : batch.run
+  const run = await payload.findByID({
+    collection: BULK_EMBEDDINGS_RUNS_SLUG,
+    id: String(runId),
+  })
+
+  if (!run) {
+    return { error: `Parent run not found for batch "${batchId}"` }
+  }
+
+  // Only allow retry when run is in a terminal state
+  const runStatus = (run as any).status
+  if (runStatus === 'running' || runStatus === 'queued') {
+    return {
+      error: `Cannot retry batch while run is ${runStatus}. Wait for the run to complete first.`,
+      conflict: true,
+    }
+  }
+
+  const poolName = (run as any).pool as TPoolNames
+  const poolConfig = knowledgePools[poolName]
+
+  if (!poolConfig) {
+    return { error: `Knowledge pool "${poolName}" not found` }
+  }
+
+  if (!poolConfig.embeddingConfig.bulkEmbeddingsFns) {
+    return {
+      error: `Knowledge pool "${poolName}" does not have bulk embedding configured`,
+    }
+  }
+
+  // Reset the batch status to queued
+  await payload.update({
+    collection: BULK_EMBEDDINGS_BATCHES_SLUG,
+    id: batchId,
+    data: {
+      status: 'queued',
+      error: null,
+      completedAt: null,
+      succeededCount: 0,
+      failedCount: 0,
+    },
+  })
+
+  // If the parent run is in failed/succeeded status, reset it to running
+  if (runStatus === 'failed' || runStatus === 'succeeded') {
+    await payload.update({
+      collection: BULK_EMBEDDINGS_RUNS_SLUG,
+      id: String(runId),
+      data: {
+        status: 'running',
+        completedAt: null,
+      },
+    })
+  }
+
+  // Queue the poll-or-complete task
+  await payload.jobs.queue<'payloadcms-vectorize:poll-or-complete-bulk-embedding'>({
+    task: 'payloadcms-vectorize:poll-or-complete-bulk-embedding',
+    input: { runId: String(runId) },
+    ...(queueName ? { queue: queueName } : {}),
+  })
+
+  return {
+    batchId,
+    runId: String(runId),
+    status: 'queued',
+    message: 'Failed batch has been re-queued for processing',
+  }
+}
+
+export const createRetryFailedBatchHandler = (
+  knowledgePools: Record<KnowledgePoolName, KnowledgePoolDynamicConfig>,
+  pollOrCompleteQueueName?: string,
+): PayloadHandler => {
+  const handler: PayloadHandler = async (req) => {
+    if (!req || !req.json) {
+      return Response.json({ error: 'Request is required' }, { status: 400 })
+    }
+
+    try {
+      const body = await req.json()
+      const batchId = body?.batchId as string
+
+      if (!batchId) {
+        return Response.json({ error: 'batchId is required and must be a string' }, { status: 400 })
+      }
+
+      const result = await retryBatch({
+        payload: req.payload,
+        batchId,
+        knowledgePools,
+        queueName: pollOrCompleteQueueName,
+      })
+
+      if ('error' in result) {
+        if ('conflict' in result && result.conflict) {
+          return Response.json(result, { status: 409 })
+        }
+        // Check if it's a "not found" error
+        if (result.error.includes('not found')) {
+          return Response.json(result, { status: 404 })
+        }
+        return Response.json(result, { status: 400 })
+      }
+
+      return Response.json(result, { status: 202 })
+    } catch (error) {
+      const errorMessage = (error as Error).message || String(error)
+      req.payload.logger.error(`[payloadcms-vectorize] Failed to retry batch: ${errorMessage}`)
+      return Response.json(
+        {
+          error: 'Failed to retry batch',
+          details: errorMessage,
+        },
+        { status: 500 },
+      )
+    }
+  }
+
+  return handler
+}
diff --git a/src/exports/client.ts b/src/exports/client.ts
index e864467..c871ed9 100644
--- a/src/exports/client.ts
+++ b/src/exports/client.ts
@@ -1 +1,2 @@
 export { EmbedAllButton } from '../admin/components/EmbedAllButton/index.js'
+export { RetryFailedBatchButton } from '../admin/components/RetryFailedBatchButton/index.js'
diff --git a/src/index.ts b/src/index.ts
index 50e1611..d0eaf09 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -11,6 +11,8 @@ import type {
   KnowledgePoolDynamicConfig,
   VectorizedPayload,
   VectorSearchQuery,
+  BulkEmbedResult,
+  RetryFailedBatchResult,
 } from './types.js'
 import { isPostgresPayload } from './types.js'
 import type { PostgresAdapterArgs } from '@payloadcms/db-postgres'
@@ -33,7 +35,8 @@ import {
   createPrepareBulkEmbeddingTask,
   createPollOrCompleteBulkEmbeddingTask,
 } from './tasks/bulkEmbedAll.js'
-import { createBulkEmbedHandler } from './endpoints/bulkEmbed.js'
+import { createBulkEmbedHandler, startBulkEmbed } from './endpoints/bulkEmbed.js'
+import { createRetryFailedBatchHandler, retryBatch } from './endpoints/retryFailedBatch.js'
 
 export type {
   KnowledgePoolStaticConfig,
@@ -58,9 +61,8 @@ export type {
   // BulkEmbeddingsFns
   AddChunkArgs,
   BatchSubmission,
-  PollBatchArgs,
+  PollOrCompleteBatchArgs,
   PollBulkEmbeddingsResult,
-  CompleteBatchArgs,
   BulkEmbeddingOutput,
   OnBulkErrorArgs,
 
@@ -69,6 +71,8 @@ export type {
 
   // PollBulkEmbeddingsResult
   BulkEmbeddingRunStatus,
+  isVectorizedPayload,
+  VectorizedPayload,
 } from './types.js'
 
 async function ensurePgvectorArtifacts(args: {
@@ -359,6 +363,25 @@ export const createVectorizeIntegration = <TPoolNames extends KnowledgePoolName>
                   )
                 }
               }
+
+              // Also clean up any pending bulk embedding metadata for this document
+              // This prevents embedding a document that was deleted during a bulk run
+              try {
+                await payload.delete({
+                  collection: BULK_EMBEDDINGS_INPUT_METADATA_SLUG,
+                  where: {
+                    and: [
+                      { sourceCollection: { equals: collectionSlug } },
+                      { docId: { equals: String(id) } },
+                    ],
+                  },
+                })
+              } catch (e) {
+                payload?.logger?.warn?.(
+                  `[payloadcms-vectorize] Failed to delete bulk embedding metadata for ${collectionSlug}:${id}`,
+                  e as Error,
+                )
+              }
             },
           ],
         }
@@ -367,11 +390,7 @@ export const createVectorizeIntegration = <TPoolNames extends KnowledgePoolName>
       const incomingOnInit = config.onInit
       const vectorSearchHandlers = createVectorSearchHandlers(pluginOptions.knowledgePools)
       config.onInit = async (payload) => {
-        if (incomingOnInit)
-          await incomingOnInit(payload)
-
-          // Add _isBulkEmbedEnabled method to payload object
-          // This allows checking if bulk embedding is enabled for a knowledge pool
+        if (incomingOnInit) await incomingOnInit(payload)
         ;(payload as VectorizedPayload<TPoolNames>) = {
           ...(payload as any),
           _isBulkEmbedEnabled: (knowledgePool: TPoolNames): boolean => {
@@ -419,6 +438,20 @@ export const createVectorizeIntegration = <TPoolNames extends KnowledgePoolName>
             }
             return embedQueue(doc, payload)
           },
+          bulkEmbed: (params: { knowledgePool: TPoolNames }): Promise<BulkEmbedResult> =>
+            startBulkEmbed({
+              payload,
+              knowledgePool: params.knowledgePool,
+              knowledgePools: pluginOptions.knowledgePools,
+              queueName: pluginOptions.bulkQueueNames?.prepareBulkEmbedQueueName,
+            }),
+          retryFailedBatch: (params: { batchId: string }): Promise<RetryFailedBatchResult> =>
+            retryBatch({
+              payload,
+              batchId: params.batchId,
+              knowledgePools: pluginOptions.knowledgePools,
+              queueName: pluginOptions.bulkQueueNames?.pollOrCompleteQueueName,
+            }),
         }
         // Ensure pgvector artifacts for each knowledge pool
         for (const poolName in staticConfigs) {
@@ -452,6 +485,14 @@ export const createVectorizeIntegration = <TPoolNames extends KnowledgePoolName>
               pluginOptions.bulkQueueNames?.prepareBulkEmbedQueueName,
             ),
           },
+          {
+            path: '/vector-retry-failed-batch',
+            method: 'post' as const,
+            handler: createRetryFailedBatchHandler(
+              pluginOptions.knowledgePools,
+              pluginOptions.bulkQueueNames?.pollOrCompleteQueueName,
+            ),
+          },
         ]
         config.endpoints = endpoints
       }
diff --git a/src/tasks/bulkEmbedAll.ts b/src/tasks/bulkEmbedAll.ts
index d8cf536..fd9af6d 100644
--- a/src/tasks/bulkEmbedAll.ts
+++ b/src/tasks/bulkEmbedAll.ts
@@ -215,16 +215,28 @@ export const createPollOrCompleteBulkEmbeddingTask = ({
         return { output: { runId: input.runId, status: currentStatus } }
       }
 
-      // Load all batches for this run
+      // Load all batches for this run with pagination to handle >1000 batches
       // Convert runId to number for postgres relationship queries
       const runIdNum = parseInt(input.runId, 10)
-      const batchesResult = await payload.find({
-        collection: BULK_EMBEDDINGS_BATCHES_SLUG,
-        where: { run: { equals: runIdNum } },
-        limit: 1000,
-        sort: 'batchIndex',
-      })
-      const batches = (batchesResult as any)?.docs || []
+      const batches: any[] = []
+      let batchPage = 1
+      const batchLimit = 100 // Smaller pages for better memory management
+
+      while (true) {
+        const batchesResult = await payload.find({
+          collection: BULK_EMBEDDINGS_BATCHES_SLUG,
+          where: { run: { equals: runIdNum } },
+          limit: batchLimit,
+          page: batchPage,
+          sort: 'batchIndex',
+        })
+        const pageDocs = (batchesResult as any)?.docs || []
+        batches.push(...pageDocs)
+
+        const totalPages = (batchesResult as any)?.totalPages ?? batchPage
+        if (batchPage >= totalPages || pageDocs.length === 0) break
+        batchPage++
+      }
 
       if (batches.length === 0) {
         // No batches found - this shouldn't happen but handle gracefully
@@ -240,73 +252,155 @@ export const createPollOrCompleteBulkEmbeddingTask = ({
         return { output: { runId: input.runId, status: 'failed' } }
       }
 
-      // Poll each non-terminal batch
-      let allSucceeded = true
-      let anyFailed = false
+      // Poll each non-terminal batch and complete succeeded ones incrementally
       let anyRunning = false
+      let totalSucceeded = 0
+      let totalFailed = 0
+      const allFailedChunkData: FailedChunkData[] = []
+      const batchStatuses = new Map<string, string>() // Track batch statuses as we process
 
+      // Initialize with current statuses
       for (const batch of batches) {
-        const batchStatus = batch.status as string
-        if (TERMINAL_STATUSES.has(batchStatus)) {
-          if (batchStatus !== 'succeeded') {
-            anyFailed = true
-            allSucceeded = false
+        batchStatuses.set(String(batch.id), batch.status as string)
+        // Accumulate counts from already completed batches
+        if (TERMINAL_STATUSES.has(batch.status as string)) {
+          if (batch.status === 'succeeded') {
+            totalSucceeded += batch.succeededCount || 0
+            totalFailed += batch.failedCount || 0
           }
+        }
+      }
+
+      for (const batch of batches) {
+        const batchStatus = batchStatuses.get(String(batch.id)) as string
+
+        // Skip batches that are already completed
+        if (TERMINAL_STATUSES.has(batchStatus)) {
           continue
         }
 
-        // Poll this batch
-        const pollResult = await callbacks.pollBatch({
-          providerBatchId: batch.providerBatchId,
-        })
+        // Poll batch and complete if succeeded (streams embeddings via onChunk callback)
+        try {
+          const completionResult = await pollAndCompleteSingleBatch({
+            payload,
+            runId: input.runId,
+            poolName,
+            batch,
+            callbacks,
+          })
 
-        // Update batch status
-        await payload.update({
-          id: batch.id,
-          collection: BULK_EMBEDDINGS_BATCHES_SLUG,
-          data: {
-            status: pollResult.status,
-            error: pollResult.error,
-            ...(TERMINAL_STATUSES.has(pollResult.status)
-              ? { completedAt: new Date().toISOString() }
-              : {}),
-          },
-        })
+          // Update batch status and counts
+          await payload.update({
+            id: batch.id,
+            collection: BULK_EMBEDDINGS_BATCHES_SLUG,
+            data: {
+              status: completionResult.status,
+              error: completionResult.error,
+              ...(TERMINAL_STATUSES.has(completionResult.status)
+                ? { completedAt: new Date().toISOString() }
+                : {}),
+              ...(completionResult.status === 'succeeded'
+                ? {
+                    succeededCount: completionResult.succeededCount,
+                    failedCount: completionResult.failedCount,
+                  }
+                : {}),
+            },
+          })
+
+          // Track the new status
+          batchStatuses.set(String(batch.id), completionResult.status)
+
+          // Accumulate counts from newly succeeded batches
+          if (completionResult.status === 'succeeded') {
+            totalSucceeded += completionResult.succeededCount
+            totalFailed += completionResult.failedCount
+            allFailedChunkData.push(...completionResult.failedChunkData)
+          }
 
-        if (pollResult.status === 'failed' || pollResult.status === 'canceled') {
-          anyFailed = true
-          allSucceeded = false
-        } else if (!TERMINAL_STATUSES.has(pollResult.status)) {
-          anyRunning = true
-          allSucceeded = false
+          // Track if still running (queued or running)
+          if (completionResult.status === 'queued' || completionResult.status === 'running') {
+            anyRunning = true
+          }
+          // Failed/canceled batches - leave them, can be re-run later
+        } catch (error) {
+          // Completion failed - mark batch as failed
+          const errorMessage = (error as Error).message || String(error)
+          await payload.update({
+            id: batch.id,
+            collection: BULK_EMBEDDINGS_BATCHES_SLUG,
+            data: {
+              status: 'failed',
+              error: `Completion failed: ${errorMessage}`,
+              completedAt: new Date().toISOString(),
+            },
+          })
+          batchStatuses.set(String(batch.id), 'failed')
         }
       }
 
-      // If any batch failed, mark the entire run as failed
-      if (anyFailed) {
+      // Check if all batches are complete
+      const allBatchesComplete = Array.from(batchStatuses.values()).every((status) =>
+        TERMINAL_STATUSES.has(status),
+      )
+
+      if (allBatchesComplete) {
+        // All batches are done - finalize the run
+        const hasAnySucceeded = Array.from(batchStatuses.values()).some(
+          (status) => status === 'succeeded',
+        )
+
+        // Check if any batches are failed (not just canceled) - we keep metadata for potential retries
+        const hasFailedBatches = Array.from(batchStatuses.values()).some(
+          (status) => status === 'failed',
+        )
+
         await payload.update({
           id: input.runId,
           collection: BULK_EMBEDDINGS_RUNS_SLUG,
           data: {
-            status: 'failed',
-            error: 'One or more batches failed',
+            status: hasAnySucceeded ? 'succeeded' : 'failed',
+            succeeded: totalSucceeded,
+            failed: totalFailed,
+            failedChunkData: allFailedChunkData.length > 0 ? allFailedChunkData : undefined,
             completedAt: new Date().toISOString(),
           },
         })
-        // Cleanup metadata without writing embeddings
-        await payload.delete({
-          collection: BULK_EMBEDDINGS_INPUT_METADATA_SLUG,
-          where: { run: { equals: (run as any).id } },
-        })
-        // Call onError callback so user can clean up provider-side resources
-        if (callbacks.onError) {
+
+        // Cleanup metadata for succeeded batches only
+        // Keep metadata for failed batches to allow retry functionality
+        const succeededBatchIds = Array.from(batchStatuses.entries())
+          .filter(([_, status]) => status === 'succeeded')
+          .map(([id, _]) => parseInt(id, 10))
+
+        if (succeededBatchIds.length > 0) {
+          await payload.delete({
+            collection: BULK_EMBEDDINGS_INPUT_METADATA_SLUG,
+            where: { batch: { in: succeededBatchIds } },
+          })
+        }
+
+        // Call onError if there were any failures
+        if (callbacks.onError && (totalFailed > 0 || !hasAnySucceeded)) {
           const providerBatchIds = batches.map((b: any) => b.providerBatchId as string)
           await callbacks.onError({
             providerBatchIds,
-            error: new Error('One or more batches failed'),
+            error: new Error(
+              totalFailed > 0
+                ? `${totalFailed} chunk(s) failed during completion`
+                : 'All batches failed',
+            ),
+            failedChunkData: allFailedChunkData.length > 0 ? allFailedChunkData : undefined,
+            failedChunkCount: totalFailed > 0 ? totalFailed : undefined,
           })
         }
-        return { output: { runId: input.runId, status: 'failed' } }
+
+        return {
+          output: {
+            runId: input.runId,
+            status: hasAnySucceeded ? 'succeeded' : 'failed',
+          },
+        }
       }
 
       // If still running, requeue this task
@@ -320,67 +414,47 @@ export const createPollOrCompleteBulkEmbeddingTask = ({
         return { output: { runId: input.runId, status: 'polling' } }
       }
 
-      // All batches succeeded - complete the embeddings (writes successful chunks, tracks failures)
-      if (allSucceeded) {
-        const completionResult = await completeBatches({
-          payload,
-          runId: input.runId,
-          poolName,
-          batches,
-          callbacks,
-        })
+      // Edge case: allBatchesComplete is false but anyRunning is false
+      // This happens when all batches are in 'canceled' or 'failed' status but we didn't detect it above
+      // Check if all batches are canceled
+      const allCanceled = Array.from(batchStatuses.values()).every(
+        (status) => status === 'canceled',
+      )
 
+      if (allCanceled) {
         await payload.update({
           id: input.runId,
           collection: BULK_EMBEDDINGS_RUNS_SLUG,
           data: {
-            status: completionResult.success ? 'succeeded' : 'failed',
-            succeeded: completionResult.succeededCount,
-            failed: completionResult.failedCount,
-            error: completionResult.error,
-            failedChunkData:
-              completionResult.failedChunkData.length > 0
-                ? completionResult.failedChunkData
-                : undefined,
+            status: 'canceled',
             completedAt: new Date().toISOString(),
           },
         })
-
-        // Cleanup metadata
-        await payload.delete({
-          collection: BULK_EMBEDDINGS_INPUT_METADATA_SLUG,
-          where: { run: { equals: (run as any).id } },
-        })
-
-        // Call onError if completion failed OR if there were partial chunk failures
-        if (callbacks.onError && (!completionResult.success || completionResult.failedCount > 0)) {
-          const providerBatchIds = batches.map((b: any) => b.providerBatchId as string)
-          await callbacks.onError({
-            providerBatchIds,
-            error: new Error(
-              completionResult.error ||
-                (completionResult.failedCount > 0
-                  ? `${completionResult.failedCount} chunk(s) failed during completion`
-                  : 'Completion failed'),
-            ),
-            failedChunkData:
-              completionResult.failedChunkData.length > 0
-                ? completionResult.failedChunkData
-                : undefined,
-            failedChunkCount:
-              completionResult.failedCount > 0 ? completionResult.failedCount : undefined,
-          })
-        }
-
-        return {
-          output: {
-            runId: input.runId,
-            status: completionResult.success ? 'succeeded' : 'failed',
-          },
-        }
+        return { output: { runId: input.runId, status: 'canceled' } }
       }
 
-      return { output: { runId: input.runId, status: 'unknown' } }
+      // Fallback: mark as failed with diagnostic info
+      const statusCounts = Array.from(batchStatuses.values()).reduce(
+        (acc, status) => {
+          acc[status] = (acc[status] || 0) + 1
+          return acc
+        },
+        {} as Record<string, number>,
+      )
+      payload.logger.warn(
+        `[payloadcms-vectorize] Run ${input.runId} reached unexpected state. Batch statuses: ${JSON.stringify(statusCounts)}`,
+      )
+
+      await payload.update({
+        id: input.runId,
+        collection: BULK_EMBEDDINGS_RUNS_SLUG,
+        data: {
+          status: 'failed',
+          error: `Run reached unexpected state. Batch statuses: ${JSON.stringify(statusCounts)}`,
+          completedAt: new Date().toISOString(),
+        },
+      })
+      return { output: { runId: input.runId, status: 'failed' } }
     },
   }
 
@@ -390,6 +464,10 @@ export const createPollOrCompleteBulkEmbeddingTask = ({
 /**
  * Stream through missing embeddings, calling addChunk for each.
  * User controls batching via addChunk return value.
+ *
+ * Uses a two-pass approach:
+ * 1. First pass: count total chunks to know when we reach the last one
+ * 2. Second pass: stream chunks without holding all in memory
  */
 async function streamAndBatchMissingEmbeddings(args: {
   payload: Payload
@@ -419,15 +497,14 @@ async function streamAndBatchMissingEmbeddings(args: {
 
   const includeAll = versionMismatch || !hasBaseline
   const lastCompletedAtDate = lastBulkCompletedAt ? new Date(lastBulkCompletedAt) : undefined
-
-  let batchIndex = 0
-  let totalInputs = 0
   const collectionSlugs = Object.keys(dynamicConfig.collections)
 
-  // Collect all chunks first to know which is the last one
-  const allChunks: CollectedEmbeddingInput[] = []
+  // First pass: count total chunks to know the last one
+  // We store minimal info (docId + chunkCount) to avoid OOM
+  type DocChunkInfo = { collectionSlug: string; docId: string; chunkCount: number }
+  const docsToProcess: DocChunkInfo[] = []
+  let totalChunkCount = 0
 
-  // Iterate through all collections and their documents
   for (const collectionSlug of collectionSlugs) {
     const collectionConfig = dynamicConfig.collections[collectionSlug]
     if (!collectionConfig) continue
@@ -436,7 +513,6 @@ async function streamAndBatchMissingEmbeddings(args: {
     let page = 1
     const limit = 50
 
-    // Paginate through source collection docs
     while (true) {
       const res = await payload.find({
         collection: collectionSlug,
@@ -465,22 +541,14 @@ async function streamAndBatchMissingEmbeddings(args: {
         if (!shouldInclude) continue
 
         const chunkData = await toKnowledgePool(doc, payload)
-        for (let idx = 0; idx < chunkData.length; idx++) {
-          const chunkEntry = chunkData[idx]
-          if (!chunkEntry?.chunk) continue
-
-          const { chunk, ...extensionFields } = chunkEntry
-          allChunks.push({
-            id: `${collectionSlug}:${doc.id}:${idx}`,
-            text: chunk,
-            metadata: {
-              sourceCollection: collectionSlug,
-              docId: String(doc.id),
-              chunkIndex: idx,
-              embeddingVersion,
-              extensionFields,
-            },
+        const validChunkCount = chunkData.filter((c) => c?.chunk).length
+        if (validChunkCount > 0) {
+          docsToProcess.push({
+            collectionSlug,
+            docId: String(doc.id),
+            chunkCount: validChunkCount,
           })
+          totalChunkCount += validChunkCount
         }
       }
 
@@ -489,70 +557,108 @@ async function streamAndBatchMissingEmbeddings(args: {
     }
   }
 
-  // Track pending chunks - plugin manages this queue
-  const pendingChunks: CollectedEmbeddingInput[] = []
+  // If no chunks, return early
+  if (totalChunkCount === 0) {
+    return { batchCount: 0, totalInputs: 0 }
+  }
 
-  // Stream chunks to addChunk, tracking which is last
-  for (let i = 0; i < allChunks.length; i++) {
-    const collectedChunk = allChunks[i]
-    const isLastChunk = i === allChunks.length - 1
+  // Second pass: stream chunks without holding all in memory
+  let batchIndex = 0
+  let totalInputs = 0
+  let processedChunkCount = 0
+  const pendingChunks: CollectedEmbeddingInput[] = []
 
-    // Add to pending queue BEFORE calling addChunk
-    pendingChunks.push(collectedChunk)
+  for (const docInfo of docsToProcess) {
+    const collectionConfig = dynamicConfig.collections[docInfo.collectionSlug]
+    if (!collectionConfig) continue
 
-    const submission = await addChunk({
-      chunk: { id: collectedChunk.id, text: collectedChunk.text },
-      isLastChunk,
+    // Re-fetch the document to get its data
+    const doc = await payload.findByID({
+      collection: docInfo.collectionSlug as any,
+      id: docInfo.docId,
     })
+    if (!doc) continue
 
-    if (submission) {
-      // User submitted a batch
-      // - If isLastChunk: all pending chunks were submitted
-      // - If not isLastChunk: all except current were submitted (current starts fresh)
-      let submittedChunks: CollectedEmbeddingInput[]
-      if (isLastChunk) {
-        submittedChunks = pendingChunks.splice(0)
-      } else {
-        submittedChunks = pendingChunks.splice(0, pendingChunks.length - 1)
+    const toKnowledgePool = collectionConfig.toKnowledgePool
+    const chunkData = await toKnowledgePool(doc, payload)
+
+    for (let idx = 0; idx < chunkData.length; idx++) {
+      const chunkEntry = chunkData[idx]
+      if (!chunkEntry?.chunk) continue
+
+      processedChunkCount++
+      const isLastChunk = processedChunkCount === totalChunkCount
+
+      const { chunk, ...extensionFields } = chunkEntry
+      const collectedChunk: CollectedEmbeddingInput = {
+        id: `${docInfo.collectionSlug}:${doc.id}:${idx}`,
+        text: chunk,
+        metadata: {
+          sourceCollection: docInfo.collectionSlug,
+          docId: String(doc.id),
+          chunkIndex: idx,
+          embeddingVersion,
+          extensionFields,
+        },
       }
 
-      // Convert runId to number for postgres relationships
-      const runIdNum = parseInt(runId, 10)
+      // Add to pending queue BEFORE calling addChunk
+      pendingChunks.push(collectedChunk)
 
-      // Store metadata for submitted chunks
-      await Promise.all(
-        submittedChunks.map((chunk) =>
-          payload.create({
-            collection: BULK_EMBEDDINGS_INPUT_METADATA_SLUG,
-            data: {
-              run: runIdNum,
-              inputId: chunk.id,
-              text: chunk.text,
-              sourceCollection: chunk.metadata.sourceCollection,
-              docId: chunk.metadata.docId,
-              chunkIndex: chunk.metadata.chunkIndex,
-              embeddingVersion: chunk.metadata.embeddingVersion,
-              extensionFields: chunk.metadata.extensionFields,
-            },
-          }),
-        ),
-      )
-
-      // Create batch record
-      await payload.create({
-        collection: BULK_EMBEDDINGS_BATCHES_SLUG,
-        data: {
-          run: runIdNum,
-          batchIndex,
-          providerBatchId: submission.providerBatchId,
-          status: 'queued',
-          inputCount: submittedChunks.length,
-          submittedAt: new Date().toISOString(),
-        },
+      const submission = await addChunk({
+        chunk: { id: collectedChunk.id, text: collectedChunk.text },
+        isLastChunk,
       })
 
-      totalInputs += submittedChunks.length
-      batchIndex++
+      if (submission) {
+        // User submitted a batch
+        // - If isLastChunk: all pending chunks were submitted
+        // - If not isLastChunk: all except current were submitted (current starts fresh)
+        let submittedChunks: CollectedEmbeddingInput[]
+        if (isLastChunk) {
+          submittedChunks = pendingChunks.splice(0)
+        } else {
+          submittedChunks = pendingChunks.splice(0, pendingChunks.length - 1)
+        }
+
+        // Convert runId to number for postgres relationships
+        const runIdNum = parseInt(runId, 10)
+
+        // Store metadata for submitted chunks
+        await Promise.all(
+          submittedChunks.map((c) =>
+            payload.create({
+              collection: BULK_EMBEDDINGS_INPUT_METADATA_SLUG,
+              data: {
+                run: runIdNum,
+                inputId: c.id,
+                text: c.text,
+                sourceCollection: c.metadata.sourceCollection,
+                docId: c.metadata.docId,
+                chunkIndex: c.metadata.chunkIndex,
+                embeddingVersion: c.metadata.embeddingVersion,
+                extensionFields: c.metadata.extensionFields,
+              },
+            }),
+          ),
+        )
+
+        // Create batch record
+        await payload.create({
+          collection: BULK_EMBEDDINGS_BATCHES_SLUG,
+          data: {
+            run: runIdNum,
+            batchIndex,
+            providerBatchId: submission.providerBatchId,
+            status: 'queued',
+            inputCount: submittedChunks.length,
+            submittedAt: new Date().toISOString(),
+          },
+        })
+
+        totalInputs += submittedChunks.length
+        batchIndex++
+      }
     }
   }
 
@@ -560,86 +666,141 @@ async function streamAndBatchMissingEmbeddings(args: {
 }
 
 /**
- * Complete all batches - download all outputs and write successful embeddings.
- *
- * Note: This function writes partial results. If some chunks fail during completion,
- * successful embeddings are still written. Only failed chunks are skipped.
- * The operation is atomic in that if an exception is thrown, nothing is written.
+ * Check if a source document exists
  */
-async function completeBatches(args: {
+async function documentExists(args: {
+  payload: Payload
+  collection: string
+  docId: string
+}): Promise<boolean> {
+  const { payload, collection, docId } = args
+  try {
+    await payload.findByID({
+      collection: collection as any,
+      id: docId,
+    })
+    return true
+  } catch (error) {
+    // Document not found or other error
+    return false
+  }
+}
+
+/**
+ * Poll a single batch and complete if succeeded - stream outputs and write embeddings incrementally.
+ * Checks document existence before writing each embedding (skips deleted docs).
+ * Returns both the batch status and completion counts.
+ */
+async function pollAndCompleteSingleBatch(args: {
   payload: Payload
   runId: string
   poolName: KnowledgePoolName
-  batches: any[]
+  batch: any
   callbacks: {
-    completeBatch: (args: { providerBatchId: string }) => Promise<BulkEmbeddingOutput[]>
+    pollOrCompleteBatch: (args: {
+      providerBatchId: string
+      onChunk: (chunk: BulkEmbeddingOutput) => Promise<void>
+    }) => Promise<{ status: string; error?: string }>
   }
 }): Promise<{
-  success: boolean
+  status: string
+  error?: string
   succeededCount: number
   failedCount: number
   failedChunkData: FailedChunkData[]
-  error?: string
 }> {
-  const { payload, runId, poolName, batches, callbacks } = args
+  const { payload, runId, poolName, batch, callbacks } = args
+
+  let succeededCount = 0
+  let failedCount = 0
+  const failedChunkData: FailedChunkData[] = []
+  const processedDocs = new Set<string>() // Track which docs we've processed (for deletion)
+
+  // Poll batch and stream chunks when complete
+  const pollResult = await callbacks.pollOrCompleteBatch({
+    providerBatchId: batch.providerBatchId,
+    onChunk: async (output: BulkEmbeddingOutput) => {
+      // Lookup metadata on-demand (O(1) with index) instead of loading all into memory
+      const meta = await getMetadataByInputId({
+        payload,
+        runId,
+        inputId: output.id,
+      })
+      if (!meta) {
+        // Metadata not found - log and skip this chunk (may have been deleted or cleanup ran)
+        payload.logger.warn(
+          `[payloadcms-vectorize] Metadata not found for chunk ${output.id} in run ${runId}. Skipping chunk.`,
+        )
+        failedCount++
+        return
+      }
 
-  try {
-    // Load all metadata for this run
-    const metadataById = await loadInputMetadataByRun({ payload, runId })
-
-    // Collect all outputs from all batches
-    const allOutputs: BulkEmbeddingOutput[] = []
-    for (const batch of batches) {
-      const outputs = await callbacks.completeBatch({
-        providerBatchId: batch.providerBatchId,
+      // Check if document still exists (may have been deleted during bulk embedding)
+      const docExists = await documentExists({
+        payload,
+        collection: meta.sourceCollection,
+        docId: meta.docId,
       })
-      allOutputs.push(...outputs)
-    }
 
-    // Filter successful outputs and collect failed chunk data
-    const successfulOutputs = allOutputs.filter((o) => !o.error && o.embedding)
-    const failedChunkData: FailedChunkData[] = []
-    for (const output of allOutputs) {
-      if (output.error) {
-        const meta = metadataById.get(output.id)
-        if (meta) {
-          failedChunkData.push({
-            collection: meta.sourceCollection,
-            documentId: meta.docId,
-            chunkIndex: meta.chunkIndex,
-          })
-        }
+      if (!docExists) {
+        // Document was deleted - skip this chunk and clean up metadata
+        await payload.delete({
+          collection: BULK_EMBEDDINGS_INPUT_METADATA_SLUG,
+          where: {
+            and: [{ run: { equals: parseInt(runId, 10) } }, { inputId: { equals: output.id } }],
+          },
+        })
+        failedCount++
+        failedChunkData.push({
+          collection: meta.sourceCollection,
+          documentId: meta.docId,
+          chunkIndex: meta.chunkIndex,
+        })
+        return
       }
-    }
-    const failedCount = failedChunkData.length
-
-    // Collect unique doc keys for deletion
-    const docKeys = new Set<string>()
-    for (const output of successfulOutputs) {
-      const meta = metadataById.get(output.id)
-      if (!meta) continue
-      docKeys.add(`${meta.sourceCollection}:${meta.docId}`)
-    }
 
-    // Delete existing embeddings for docs we're about to update
-    for (const key of docKeys) {
-      const [sourceCollection, docId] = key.split(':')
-      await payload.delete({
-        collection: poolName,
-        where: {
-          and: [
-            { sourceCollection: { equals: sourceCollection } },
-            { docId: { equals: String(docId) } },
-          ],
-        },
-      })
-    }
+      // Handle errors from provider
+      if (output.error || !output.embedding) {
+        failedCount++
+        failedChunkData.push({
+          collection: meta.sourceCollection,
+          documentId: meta.docId,
+          chunkIndex: meta.chunkIndex,
+        })
+        return
+      }
 
-    // Write all new embeddings
-    for (const output of successfulOutputs) {
-      const meta = metadataById.get(output.id)
-      if (!meta || !output.embedding) continue
+      // Track this doc for potential deletion of old embeddings
+      const docKey = `${meta.sourceCollection}:${meta.docId}`
+      const isFirstChunkForDoc = !processedDocs.has(docKey)
 
+      if (isFirstChunkForDoc) {
+        processedDocs.add(docKey)
+        // Check if embeddings already exist for this document+version (from a previous batch)
+        const hasCurrentEmbedding = await docHasEmbeddingVersion({
+          payload,
+          poolName,
+          sourceCollection: meta.sourceCollection,
+          docId: meta.docId,
+          embeddingVersion: meta.embeddingVersion,
+        })
+
+        // Only delete if no embeddings exist for this version (they're from an old version)
+        if (!hasCurrentEmbedding) {
+          // Delete existing embeddings for this document (from old version)
+          await payload.delete({
+            collection: poolName,
+            where: {
+              and: [
+                { sourceCollection: { equals: meta.sourceCollection } },
+                { docId: { equals: String(meta.docId) } },
+              ],
+            },
+          })
+        }
+      }
+
+      // Write the embedding
       const embeddingArray = Array.isArray(output.embedding)
         ? output.embedding
         : Array.from(output.embedding)
@@ -663,23 +824,17 @@ async function completeBatches(args: {
         vector: embeddingArray,
         id: String((created as any)?.id ?? ''),
       })
-    }
 
-    return {
-      success: true,
-      succeededCount: successfulOutputs.length,
-      failedCount,
-      failedChunkData,
-    }
-  } catch (error) {
-    const errorMessage = (error as Error).message || String(error)
-    return {
-      success: false,
-      succeededCount: 0,
-      failedCount: 0,
-      failedChunkData: [],
-      error: `Completion failed: ${errorMessage}`,
-    }
+      succeededCount++
+    },
+  })
+
+  return {
+    status: pollResult.status,
+    error: pollResult.error,
+    succeededCount,
+    failedCount,
+    failedChunkData,
   }
 }
 
@@ -733,63 +888,43 @@ async function docHasEmbeddingVersion(args: {
   return (existing as any)?.totalDocs > 0
 }
 
-async function loadInputMetadataByRun(args: { payload: Payload; runId: string }): Promise<
-  Map<
-    string,
-    {
-      text: string
-      sourceCollection: string
-      docId: string
-      chunkIndex: number
-      embeddingVersion: string
-      extensionFields?: Record<string, any>
-    }
-  >
-> {
-  const { payload, runId } = args
-  const map = new Map<
-    string,
-    {
-      text: string
-      sourceCollection: string
-      docId: string
-      chunkIndex: number
-      embeddingVersion: string
-      extensionFields?: Record<string, any>
-    }
-  >()
-
-  // Convert runId to number for postgres relationship queries
+/**
+ * Lookup metadata for a single input by runId + inputId.
+ * Uses the composite index ['run', 'inputId'] for O(1) lookup.
+ * This approach uses constant memory instead of loading all metadata into memory.
+ */
+async function getMetadataByInputId(args: {
+  payload: Payload
+  runId: string
+  inputId: string
+}): Promise<{
+  text: string
+  sourceCollection: string
+  docId: string
+  chunkIndex: number
+  embeddingVersion: string
+  extensionFields?: Record<string, any>
+} | null> {
+  const { payload, runId, inputId } = args
   const runIdNum = parseInt(runId, 10)
 
-  let page = 1
-  const limit = 100
-  while (true) {
-    const res = await payload.find({
-      collection: BULK_EMBEDDINGS_INPUT_METADATA_SLUG,
-      page,
-      limit,
-      where: { run: { equals: runIdNum } },
-      sort: 'inputId',
-    })
-    const docs = (res as any)?.docs || []
-    if (!docs.length) break
-
-    for (const doc of docs) {
-      map.set(String(doc.inputId), {
-        text: doc.text,
-        sourceCollection: doc.sourceCollection,
-        docId: String(doc.docId),
-        chunkIndex: doc.chunkIndex,
-        embeddingVersion: doc.embeddingVersion,
-        extensionFields: doc.extensionFields || undefined,
-      })
-    }
+  const result = await payload.find({
+    collection: BULK_EMBEDDINGS_INPUT_METADATA_SLUG,
+    where: {
+      and: [{ run: { equals: runIdNum } }, { inputId: { equals: inputId } }],
+    },
+    limit: 1,
+  })
 
-    const totalPages = (res as any)?.totalPages ?? page
-    page++
-    if (page > totalPages) break
-  }
+  const doc = (result as any)?.docs?.[0]
+  if (!doc) return null
 
-  return map
+  return {
+    text: doc.text,
+    sourceCollection: doc.sourceCollection,
+    docId: String(doc.docId),
+    chunkIndex: doc.chunkIndex,
+    embeddingVersion: doc.embeddingVersion,
+    extensionFields: doc.extensionFields || undefined,
+  }
 }
diff --git a/src/types.ts b/src/types.ts
index 220605f..63aeb58 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -1,5 +1,43 @@
 import type { CollectionSlug, Payload, Field, Where } from 'payload'
 
+/** Result from bulkEmbed method */
+export type BulkEmbedResult =
+  | {
+      /** ID of the created run */
+      runId: string
+      /** Status of the run */
+      status: 'queued'
+    }
+  | {
+      /** ID of existing active run */
+      runId: string
+      /** Status of existing run */
+      status: 'queued' | 'running'
+      /** Message explaining why a new run wasn't started */
+      message: string
+      /** Indicates a conflict occurred */
+      conflict: true
+    }
+
+/** Result from retryFailedBatch method */
+export type RetryFailedBatchResult =
+  | {
+      /** ID of the batch being retried */
+      batchId: string
+      /** ID of the parent run */
+      runId: string
+      /** New status of the batch */
+      status: 'queued'
+      /** Confirmation message */
+      message: string
+    }
+  | {
+      /** Error message */
+      error: string
+      /** Indicates a conflict occurred (e.g., run still active) */
+      conflict?: true
+    }
+
 /**
  * Extended Payload type with vectorize plugin methods
  */
@@ -19,6 +57,10 @@ export type VectorizedPayload<TPoolNames extends KnowledgePoolName = KnowledgePo
             doc: Record<string, any>
           },
     ) => Promise<void>
+    /** Start a bulk embedding run for a knowledge pool */
+    bulkEmbed: (params: { knowledgePool: TPoolNames }) => Promise<BulkEmbedResult>
+    /** Retry a failed batch */
+    retryFailedBatch: (params: { batchId: string }) => Promise<RetryFailedBatchResult>
   }
 
 /**
@@ -31,7 +73,11 @@ export function isVectorizedPayload(payload: Payload): payload is VectorizedPayl
     'search' in payload &&
     typeof (payload as any).search === 'function' &&
     'queueEmbed' in payload &&
-    typeof (payload as any).queueEmbed === 'function'
+    typeof (payload as any).queueEmbed === 'function' &&
+    'bulkEmbed' in payload &&
+    typeof (payload as any).bulkEmbed === 'function' &&
+    'retryFailedBatch' in payload &&
+    typeof (payload as any).retryFailedBatch === 'function'
   )
 }
 
@@ -135,16 +181,12 @@ export type BatchSubmission = {
   providerBatchId: string
 }
 
-/** Arguments for polling a single batch */
-export type PollBatchArgs = {
-  /** Provider-specific batch identifier */
-  providerBatchId: string
-}
-
-/** Arguments for completing/downloading a single batch */
-export type CompleteBatchArgs = {
+/** Arguments for polling or completing a single batch */
+export type PollOrCompleteBatchArgs = {
   /** Provider-specific batch identifier */
   providerBatchId: string
+  /** Callback function to stream completed chunks as they become available */
+  onChunk: (chunk: BulkEmbeddingOutput) => Promise<void>
 }
 
 /** Data about a failed chunk during bulk embedding completion */
@@ -190,11 +232,12 @@ export type BulkEmbeddingsFns = {
    */
   addChunk: (args: AddChunkArgs) => Promise<BatchSubmission | null>
 
-  /** Poll a specific batch by providerBatchId */
-  pollBatch: (args: PollBatchArgs) => Promise<PollBulkEmbeddingsResult>
-
-  /** Download outputs for a completed batch */
-  completeBatch: (args: CompleteBatchArgs) => Promise<BulkEmbeddingOutput[]>
+  /**
+   * Poll a specific batch by providerBatchId, and stream outputs when complete.
+   * Call onChunk for each output as it becomes available once the batch completes.
+   * The function completes when all chunks have been streamed.
+   */
+  pollOrCompleteBatch: (args: PollOrCompleteBatchArgs) => Promise<PollBulkEmbeddingsResult>
 
   /**
    * Called when the bulk run fails. Use this to clean up provider-side resources

From 313ef3f99cccb5228c48891565de2d5d7a014175 Mon Sep 17 00:00:00 2001
From: techiejd <62455039+techiejd@users.noreply.github.com>
Date: Sun, 11 Jan 2026 20:05:02 +0700
Subject: [PATCH 26/49] adds import map

---
 dev/app/(payload)/admin/importMap.js | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/dev/app/(payload)/admin/importMap.js b/dev/app/(payload)/admin/importMap.js
index abe5d88..a13c2ea 100644
--- a/dev/app/(payload)/admin/importMap.js
+++ b/dev/app/(payload)/admin/importMap.js
@@ -21,6 +21,7 @@ import { StrikethroughFeatureClient as StrikethroughFeatureClient_e70f5e05f09f93
 import { UnderlineFeatureClient as UnderlineFeatureClient_e70f5e05f09f93e00b997edb1ef0c864 } from '@payloadcms/richtext-lexical/client'
 import { BoldFeatureClient as BoldFeatureClient_e70f5e05f09f93e00b997edb1ef0c864 } from '@payloadcms/richtext-lexical/client'
 import { ItalicFeatureClient as ItalicFeatureClient_e70f5e05f09f93e00b997edb1ef0c864 } from '@payloadcms/richtext-lexical/client'
+import { RetryFailedBatchButton as RetryFailedBatchButton_69051d9d0217691c78245f4f33731b73 } from 'payloadcms-vectorize/client'
 import { EmbedAllButton as EmbedAllButton_69051d9d0217691c78245f4f33731b73 } from 'payloadcms-vectorize/client'
 import { CollectionCards as CollectionCards_ab83ff7e88da8d3530831f296ec4756a } from '@payloadcms/ui/rsc'
 
@@ -48,6 +49,7 @@ export const importMap = {
   "@payloadcms/richtext-lexical/client#UnderlineFeatureClient": UnderlineFeatureClient_e70f5e05f09f93e00b997edb1ef0c864,
   "@payloadcms/richtext-lexical/client#BoldFeatureClient": BoldFeatureClient_e70f5e05f09f93e00b997edb1ef0c864,
   "@payloadcms/richtext-lexical/client#ItalicFeatureClient": ItalicFeatureClient_e70f5e05f09f93e00b997edb1ef0c864,
+  "payloadcms-vectorize/client#RetryFailedBatchButton": RetryFailedBatchButton_69051d9d0217691c78245f4f33731b73,
   "payloadcms-vectorize/client#EmbedAllButton": EmbedAllButton_69051d9d0217691c78245f4f33731b73,
   "@payloadcms/ui/rsc#CollectionCards": CollectionCards_ab83ff7e88da8d3530831f296ec4756a
 }

From 1db26018ccb8dcfe9514e0b892c582077f12989a Mon Sep 17 00:00:00 2001
From: techiejd <62455039+techiejd@users.noreply.github.com>
Date: Sun, 11 Jan 2026 21:20:03 +0700
Subject: [PATCH 27/49] assigns the extra funcs to the payload instance

---
 src/index.ts | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/index.ts b/src/index.ts
index d0eaf09..2da03ea 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -391,8 +391,7 @@ export const createVectorizeIntegration = <TPoolNames extends KnowledgePoolName>
       const vectorSearchHandlers = createVectorSearchHandlers(pluginOptions.knowledgePools)
       config.onInit = async (payload) => {
         if (incomingOnInit) await incomingOnInit(payload)
-        ;(payload as VectorizedPayload<TPoolNames>) = {
-          ...(payload as any),
+        Object.assign(payload, {
           _isBulkEmbedEnabled: (knowledgePool: TPoolNames): boolean => {
             const poolConfig = pluginOptions.knowledgePools[knowledgePool]
             return !!poolConfig?.embeddingConfig?.bulkEmbeddingsFns
@@ -452,7 +451,7 @@ export const createVectorizeIntegration = <TPoolNames extends KnowledgePoolName>
               knowledgePools: pluginOptions.knowledgePools,
               queueName: pluginOptions.bulkQueueNames?.pollOrCompleteQueueName,
             }),
-        }
+        } as Partial<VectorizedPayload<TPoolNames>>)
         // Ensure pgvector artifacts for each knowledge pool
         for (const poolName in staticConfigs) {
           const staticConfig = staticConfigs[poolName]

From 2ca94b505d73d5648f8bda7e1ef0253dfec73fc8 Mon Sep 17 00:00:00 2001
From: techiejd <62455039+techiejd@users.noreply.github.com>
Date: Sun, 11 Jan 2026 21:59:32 +0700
Subject: [PATCH 28/49] WIP

---
 dev/payload-types.ts                       | 148 +++++++++++++++++++--
 dev/specs/bulkEmbed/partialFailure.spec.ts |  20 ++-
 2 files changed, 156 insertions(+), 12 deletions(-)

diff --git a/dev/payload-types.ts b/dev/payload-types.ts
index cf54a34..5fb6133 100644
--- a/dev/payload-types.ts
+++ b/dev/payload-types.ts
@@ -70,8 +70,10 @@ export interface Config {
     posts: Post;
     'vector-bulk-embeddings-runs': VectorBulkEmbeddingsRun;
     'vector-bulk-embedding-input-metadata': VectorBulkEmbeddingInputMetadatum;
+    'vector-bulk-embeddings-batches': VectorBulkEmbeddingsBatch;
     default: Default;
     bulkDefault: BulkDefault;
+    failingBulkDefault: FailingBulkDefault;
     'payload-kv': PayloadKv;
     users: User;
     'payload-jobs': PayloadJob;
@@ -84,8 +86,10 @@ export interface Config {
     posts: PostsSelect<false> | PostsSelect<true>;
     'vector-bulk-embeddings-runs': VectorBulkEmbeddingsRunsSelect<false> | VectorBulkEmbeddingsRunsSelect<true>;
     'vector-bulk-embedding-input-metadata': VectorBulkEmbeddingInputMetadataSelect<false> | VectorBulkEmbeddingInputMetadataSelect<true>;
+    'vector-bulk-embeddings-batches': VectorBulkEmbeddingsBatchesSelect<false> | VectorBulkEmbeddingsBatchesSelect<true>;
     default: DefaultSelect<false> | DefaultSelect<true>;
     bulkDefault: BulkDefaultSelect<false> | BulkDefaultSelect<true>;
+    failingBulkDefault: FailingBulkDefaultSelect<false> | FailingBulkDefaultSelect<true>;
     'payload-kv': PayloadKvSelect<false> | PayloadKvSelect<true>;
     users: UsersSelect<false> | UsersSelect<true>;
     'payload-jobs': PayloadJobsSelect<false> | PayloadJobsSelect<true>;
@@ -175,15 +179,11 @@ export interface VectorBulkEmbeddingsRun {
    * Embedding version at submission time
    */
   embeddingVersion: string;
+  status: 'queued' | 'running' | 'succeeded' | 'failed' | 'canceled';
   /**
-   * Provider file or input reference used for the batch
-   */
-  inputFileRef?: string | null;
-  /**
-   * Provider batch identifier
+   * Total number of batches in this run
    */
-  providerBatchId?: string | null;
-  status: 'queued' | 'running' | 'succeeded' | 'failed' | 'canceled';
+  totalBatches?: number | null;
   inputs?: number | null;
   succeeded?: number | null;
   failed?: number | null;
@@ -199,6 +199,18 @@ export interface VectorBulkEmbeddingsRun {
    * Failure reason if the run ended in error
    */
   error?: string | null;
+  /**
+   * Data about chunks that failed during completion (collection, documentId, chunkIndex)
+   */
+  failedChunkData?:
+    | {
+        [k: string]: unknown;
+      }
+    | unknown[]
+    | string
+    | number
+    | boolean
+    | null;
   updatedAt: string;
   createdAt: string;
 }
@@ -238,6 +250,54 @@ export interface VectorBulkEmbeddingInputMetadatum {
   updatedAt: string;
   createdAt: string;
 }
+/**
+ * Individual batches within a bulk embedding run. Created when input count exceeds file limits.
+ *
+ * This interface was referenced by `Config`'s JSON-Schema
+ * via the `definition` "vector-bulk-embeddings-batches".
+ */
+export interface VectorBulkEmbeddingsBatch {
+  id: number;
+  /**
+   * Parent bulk embedding run
+   */
+  run: number | VectorBulkEmbeddingsRun;
+  /**
+   * Zero-based index of this batch within the run
+   */
+  batchIndex: number;
+  /**
+   * Provider-specific batch identifier
+   */
+  providerBatchId: string;
+  status: 'queued' | 'running' | 'succeeded' | 'failed' | 'canceled';
+  /**
+   * Number of inputs in this batch
+   */
+  inputCount: number;
+  /**
+   * Number of successful embeddings
+   */
+  succeededCount?: number | null;
+  /**
+   * Number of failed embeddings
+   */
+  failedCount?: number | null;
+  /**
+   * Timestamp when the batch was submitted to provider
+   */
+  submittedAt?: string | null;
+  /**
+   * Timestamp when the batch finished
+   */
+  completedAt?: string | null;
+  /**
+   * Error message if the batch failed
+   */
+  error?: string | null;
+  updatedAt: string;
+  createdAt: string;
+}
 /**
  * Vector embeddings for search and similarity queries. Created by the payloadcms-vectorize plugin. Embeddings cannot be added or modified, only deleted, through the admin panel. No other restrictions enforced.
  *
@@ -300,6 +360,37 @@ export interface BulkDefault {
   updatedAt: string;
   createdAt: string;
 }
+/**
+ * Vector embeddings for search and similarity queries. Created by the payloadcms-vectorize plugin. Embeddings cannot be added or modified, only deleted, through the admin panel. No other restrictions enforced.
+ *
+ * This interface was referenced by `Config`'s JSON-Schema
+ * via the `definition` "failingBulkDefault".
+ */
+export interface FailingBulkDefault {
+  id: number;
+  /**
+   * The collection that this embedding belongs to
+   */
+  sourceCollection: string;
+  /**
+   * The ID of the source document
+   */
+  docId: string;
+  /**
+   * The index of this chunk
+   */
+  chunkIndex: number;
+  /**
+   * The original text that was vectorized
+   */
+  chunkText?: string | null;
+  /**
+   * The version of the embedding model used
+   */
+  embeddingVersion?: string | null;
+  updatedAt: string;
+  createdAt: string;
+}
 /**
  * This interface was referenced by `Config`'s JSON-Schema
  * via the `definition` "payload-kv".
@@ -463,6 +554,10 @@ export interface PayloadLockedDocument {
         relationTo: 'vector-bulk-embedding-input-metadata';
         value: number | VectorBulkEmbeddingInputMetadatum;
       } | null)
+    | ({
+        relationTo: 'vector-bulk-embeddings-batches';
+        value: number | VectorBulkEmbeddingsBatch;
+      } | null)
     | ({
         relationTo: 'default';
         value: number | Default;
@@ -471,6 +566,10 @@ export interface PayloadLockedDocument {
         relationTo: 'bulkDefault';
         value: number | BulkDefault;
       } | null)
+    | ({
+        relationTo: 'failingBulkDefault';
+        value: number | FailingBulkDefault;
+      } | null)
     | ({
         relationTo: 'users';
         value: number | User;
@@ -534,15 +633,15 @@ export interface PostsSelect<T extends boolean = true> {
 export interface VectorBulkEmbeddingsRunsSelect<T extends boolean = true> {
   pool?: T;
   embeddingVersion?: T;
-  inputFileRef?: T;
-  providerBatchId?: T;
   status?: T;
+  totalBatches?: T;
   inputs?: T;
   succeeded?: T;
   failed?: T;
   submittedAt?: T;
   completedAt?: T;
   error?: T;
+  failedChunkData?: T;
   updatedAt?: T;
   createdAt?: T;
 }
@@ -562,6 +661,24 @@ export interface VectorBulkEmbeddingInputMetadataSelect<T extends boolean = true
   updatedAt?: T;
   createdAt?: T;
 }
+/**
+ * This interface was referenced by `Config`'s JSON-Schema
+ * via the `definition` "vector-bulk-embeddings-batches_select".
+ */
+export interface VectorBulkEmbeddingsBatchesSelect<T extends boolean = true> {
+  run?: T;
+  batchIndex?: T;
+  providerBatchId?: T;
+  status?: T;
+  inputCount?: T;
+  succeededCount?: T;
+  failedCount?: T;
+  submittedAt?: T;
+  completedAt?: T;
+  error?: T;
+  updatedAt?: T;
+  createdAt?: T;
+}
 /**
  * This interface was referenced by `Config`'s JSON-Schema
  * via the `definition` "default_select".
@@ -588,6 +705,19 @@ export interface BulkDefaultSelect<T extends boolean = true> {
   updatedAt?: T;
   createdAt?: T;
 }
+/**
+ * This interface was referenced by `Config`'s JSON-Schema
+ * via the `definition` "failingBulkDefault_select".
+ */
+export interface FailingBulkDefaultSelect<T extends boolean = true> {
+  sourceCollection?: T;
+  docId?: T;
+  chunkIndex?: T;
+  chunkText?: T;
+  embeddingVersion?: T;
+  updatedAt?: T;
+  createdAt?: T;
+}
 /**
  * This interface was referenced by `Config`'s JSON-Schema
  * via the `definition` "payload-kv_select".
diff --git a/dev/specs/bulkEmbed/partialFailure.spec.ts b/dev/specs/bulkEmbed/partialFailure.spec.ts
index 3d2f928..961274a 100644
--- a/dev/specs/bulkEmbed/partialFailure.spec.ts
+++ b/dev/specs/bulkEmbed/partialFailure.spec.ts
@@ -152,8 +152,22 @@ describe('Bulk embed - partial chunk failures', () => {
     expect(updatedRun.failed).toBe(1) // Second chunk failed
     expect(updatedRun.failedChunkData).toBeDefined()
     expect(Array.isArray(updatedRun.failedChunkData)).toBe(true)
-    expect((updatedRun.failedChunkData as Array<{ collection: string; documentId: string; chunkIndex: number }>).length).toBe(1)
-    const failedChunk = (updatedRun.failedChunkData as Array<{ collection: string; documentId: string; chunkIndex: number }>)[0]
+    expect(
+      (
+        updatedRun.failedChunkData as Array<{
+          collection: string
+          documentId: string
+          chunkIndex: number
+        }>
+      ).length,
+    ).toBe(1)
+    const failedChunk = (
+      updatedRun.failedChunkData as Array<{
+        collection: string
+        documentId: string
+        chunkIndex: number
+      }>
+    )[0]
     expect(failedChunk.collection).toBe('posts')
     expect(failedChunk.documentId).toBe(String(post.id))
     expect(failedChunk.chunkIndex).toBe(1) // Second chunk (index 1)
@@ -236,7 +250,7 @@ describe('Bulk embed - partial chunk failures', () => {
 
     expect(updatedRun.status).toBe('succeeded')
     expect(updatedRun.failed).toBe(0)
-    expect(updatedRun.failedChunkData).toBeUndefined()
+    expect(updatedRun.failedChunkData).toBeNull()
 
     // onError should NOT be called when everything succeeds
     expect(onErrorCalled).toBe(false)

From 22661e8d81f90ba47d7c496bd5311bfe220e7caa Mon Sep 17 00:00:00 2001
From: techiejd <62455039+techiejd@users.noreply.github.com>
Date: Mon, 12 Jan 2026 12:06:14 +0700
Subject: [PATCH 29/49] WIP

---
 dev/specs/bulkEmbed/partialFailure.spec.ts    | 134 ++----------------
 .../bulkEmbed/partialFailureNoFail.spec.ts    | 105 ++++++++++++++
 dev/specs/utils.ts                            |  14 +-
 src/collections/bulkEmbeddingInputMetadata.ts |  13 +-
 src/tasks/bulkEmbedAll.ts                     |  31 ++--
 5 files changed, 154 insertions(+), 143 deletions(-)
 create mode 100644 dev/specs/bulkEmbed/partialFailureNoFail.spec.ts

diff --git a/dev/specs/bulkEmbed/partialFailure.spec.ts b/dev/specs/bulkEmbed/partialFailure.spec.ts
index 961274a..bf84443 100644
--- a/dev/specs/bulkEmbed/partialFailure.spec.ts
+++ b/dev/specs/bulkEmbed/partialFailure.spec.ts
@@ -26,7 +26,6 @@ describe('Bulk embed - partial chunk failures', () => {
 
   beforeAll(async () => {
     await createTestDb({ dbName })
-    // We'll set up the payload dynamically in each test to control failIds
   })
 
   test('partial chunk failures are tracked and passed to onError', async () => {
@@ -34,10 +33,11 @@ describe('Bulk embed - partial chunk failures', () => {
     onErrorCalled = false
     onErrorArgs = null
 
-    // The ID format is collectionSlug:docId:chunkIndex
-    // We need to fail a specific chunk - but we don't know the docId yet
-    // So we'll create the payload with a dynamic failIds check
+    // Use unique version to ensure this test only processes its own data
+    const testVersion = `${testEmbeddingVersion}-partial-${Date.now()}`
 
+    // Use a function-based failure check to avoid needing to know docId ahead of time
+    // Fail any chunk with index 1 (second chunk of any doc)
     const built = await buildPayloadWithIntegration({
       dbName,
       pluginOpts: {
@@ -52,13 +52,13 @@ describe('Bulk embed - partial chunk failures', () => {
               },
             },
             embeddingConfig: {
-              version: testEmbeddingVersion,
+              version: testVersion,
               queryFn: makeDummyEmbedQuery(DIMS),
               bulkEmbeddingsFns: createMockBulkEmbeddings(
                 {
                   statusSequence: ['succeeded'],
-                  // We'll fail chunks that contain ":1" (second chunk of any doc)
-                  partialFailure: { failIds: [] }, // Will be updated below
+                  // Fail any chunk with index 1 (second chunk) - ID format is collection:docId:chunkIndex
+                  partialFailure: { shouldFail: (id: string) => id.endsWith(':1') },
                   onErrorCallback: (args) => {
                     onErrorCalled = true
                     onErrorArgs = args
@@ -73,61 +73,19 @@ describe('Bulk embed - partial chunk failures', () => {
       },
       secret: 'test-secret',
       dims: DIMS,
-      key: `partial-failure-${Date.now()}`,
+      key: `partial-failure-${Date.now()}-${Math.random()}`,
     })
     payload = built.payload
 
-    // Create a post
+    // Create a post with 2 chunks
     const post = await payload.create({
       collection: 'posts',
       data: { title: 'Partial Failure Test' } as any,
     })
 
-    // Now we know the docId, update the mock to fail the second chunk
-    const failChunkId = `posts:${post.id}:1`
-
-    // Re-create with the correct failIds
-    const built2 = await buildPayloadWithIntegration({
-      dbName,
-      pluginOpts: {
-        knowledgePools: {
-          default: {
-            collections: {
-              posts: {
-                toKnowledgePool: async (doc: any) => [
-                  { chunk: doc.title },
-                  { chunk: doc.title + ' chunk2' },
-                ],
-              },
-            },
-            embeddingConfig: {
-              version: testEmbeddingVersion + '-v2',
-              queryFn: makeDummyEmbedQuery(DIMS),
-              bulkEmbeddingsFns: createMockBulkEmbeddings(
-                {
-                  statusSequence: ['succeeded'],
-                  partialFailure: { failIds: [failChunkId] },
-                  onErrorCallback: (args) => {
-                    onErrorCalled = true
-                    onErrorArgs = args
-                  },
-                },
-                DIMS,
-              ),
-            },
-          },
-        },
-        bulkQueueNames: BULK_QUEUE_NAMES,
-      },
-      secret: 'test-secret',
-      dims: DIMS,
-      key: `partial-failure-2-${Date.now()}`,
-    })
-    payload = built2.payload
-
     const run = await payload.create({
       collection: BULK_EMBEDDINGS_RUNS_SLUG,
-      data: { pool: 'default', embeddingVersion: testEmbeddingVersion + '-v2', status: 'queued' },
+      data: { pool: 'default', embeddingVersion: testVersion, status: 'queued' },
     })
 
     await payload.jobs.queue<'payloadcms-vectorize:prepare-bulk-embedding'>({
@@ -183,76 +141,4 @@ describe('Bulk embed - partial chunk failures', () => {
     expect(onErrorArgs!.failedChunkCount).toBe(1)
     expect(onErrorArgs!.error.message).toContain('1 chunk(s) failed')
   })
-
-  test('run with no partial failures does not call onError', async () => {
-    // Reset state
-    onErrorCalled = false
-    onErrorArgs = null
-
-    const built = await buildPayloadWithIntegration({
-      dbName,
-      pluginOpts: {
-        knowledgePools: {
-          default: {
-            collections: {
-              posts: {
-                toKnowledgePool: async (doc: any) => [{ chunk: doc.title }],
-              },
-            },
-            embeddingConfig: {
-              version: testEmbeddingVersion + '-v3',
-              queryFn: makeDummyEmbedQuery(DIMS),
-              bulkEmbeddingsFns: createMockBulkEmbeddings(
-                {
-                  statusSequence: ['succeeded'],
-                  // No partial failures
-                  onErrorCallback: (args) => {
-                    onErrorCalled = true
-                    onErrorArgs = args
-                  },
-                },
-                DIMS,
-              ),
-            },
-          },
-        },
-        bulkQueueNames: BULK_QUEUE_NAMES,
-      },
-      secret: 'test-secret',
-      dims: DIMS,
-      key: `no-partial-failure-${Date.now()}`,
-    })
-    payload = built.payload
-
-    await payload.create({ collection: 'posts', data: { title: 'No Failure Test' } as any })
-
-    const run = await payload.create({
-      collection: BULK_EMBEDDINGS_RUNS_SLUG,
-      data: { pool: 'default', embeddingVersion: testEmbeddingVersion + '-v3', status: 'queued' },
-    })
-
-    await payload.jobs.queue<'payloadcms-vectorize:prepare-bulk-embedding'>({
-      task: 'payloadcms-vectorize:prepare-bulk-embedding',
-      input: { runId: String(run.id) },
-      req: { payload } as any,
-      ...(BULK_QUEUE_NAMES.prepareBulkEmbedQueueName
-        ? { queue: BULK_QUEUE_NAMES.prepareBulkEmbedQueueName }
-        : {}),
-    })
-
-    await waitForBulkJobs(payload)
-
-    // Check run status
-    const updatedRun = await payload.findByID({
-      collection: BULK_EMBEDDINGS_RUNS_SLUG,
-      id: run.id,
-    })
-
-    expect(updatedRun.status).toBe('succeeded')
-    expect(updatedRun.failed).toBe(0)
-    expect(updatedRun.failedChunkData).toBeNull()
-
-    // onError should NOT be called when everything succeeds
-    expect(onErrorCalled).toBe(false)
-  })
 })
diff --git a/dev/specs/bulkEmbed/partialFailureNoFail.spec.ts b/dev/specs/bulkEmbed/partialFailureNoFail.spec.ts
new file mode 100644
index 0000000..133e97c
--- /dev/null
+++ b/dev/specs/bulkEmbed/partialFailureNoFail.spec.ts
@@ -0,0 +1,105 @@
+import type { Payload } from 'payload'
+import { beforeAll, describe, expect, test } from 'vitest'
+import { BULK_EMBEDDINGS_RUNS_SLUG } from '../../../src/collections/bulkEmbeddingsRuns.js'
+import {
+  BULK_QUEUE_NAMES,
+  DEFAULT_DIMS,
+  buildPayloadWithIntegration,
+  createMockBulkEmbeddings,
+  createTestDb,
+  waitForBulkJobs,
+} from '../utils.js'
+import { makeDummyEmbedQuery, testEmbeddingVersion } from 'helpers/embed.js'
+
+const DIMS = DEFAULT_DIMS
+const dbName = `bulk_partial_failure_nofail_${Date.now()}`
+
+describe('Bulk embed - no partial failures', () => {
+  let payload: Payload
+  let onErrorCalled = false
+  let onErrorArgs: {
+    providerBatchIds: string[]
+    error: Error
+    failedChunkData?: Array<{ collection: string; documentId: string; chunkIndex: number }>
+    failedChunkCount?: number
+  } | null = null
+
+  beforeAll(async () => {
+    await createTestDb({ dbName })
+  })
+
+  test('run with no partial failures does not call onError', async () => {
+    // Reset state
+    onErrorCalled = false
+    onErrorArgs = null
+
+    // Use unique version to ensure this test only processes its own data
+    const testVersion = `${testEmbeddingVersion}-nofail-${Date.now()}`
+
+    const built = await buildPayloadWithIntegration({
+      dbName,
+      pluginOpts: {
+        knowledgePools: {
+          default: {
+            collections: {
+              posts: {
+                toKnowledgePool: async (doc: any) => [{ chunk: doc.title }],
+              },
+            },
+            embeddingConfig: {
+              version: testVersion,
+              queryFn: makeDummyEmbedQuery(DIMS),
+              bulkEmbeddingsFns: createMockBulkEmbeddings(
+                {
+                  statusSequence: ['succeeded'],
+                  // No partial failures
+                  onErrorCallback: (args) => {
+                    onErrorCalled = true
+                    onErrorArgs = args
+                  },
+                },
+                DIMS,
+              ),
+            },
+          },
+        },
+        bulkQueueNames: BULK_QUEUE_NAMES,
+      },
+      secret: 'test-secret',
+      dims: DIMS,
+      key: `no-partial-failure-${Date.now()}-${Math.random()}`,
+    })
+    payload = built.payload
+
+    await payload.create({ collection: 'posts', data: { title: 'No Failure Test' } as any })
+
+    const run = await payload.create({
+      collection: BULK_EMBEDDINGS_RUNS_SLUG,
+      data: { pool: 'default', embeddingVersion: testVersion, status: 'queued' },
+    })
+
+    await payload.jobs.queue<'payloadcms-vectorize:prepare-bulk-embedding'>({
+      task: 'payloadcms-vectorize:prepare-bulk-embedding',
+      input: { runId: String(run.id) },
+      req: { payload } as any,
+      ...(BULK_QUEUE_NAMES.prepareBulkEmbedQueueName
+        ? { queue: BULK_QUEUE_NAMES.prepareBulkEmbedQueueName }
+        : {}),
+    })
+
+    await waitForBulkJobs(payload)
+
+    // Check run status
+    const updatedRun = await payload.findByID({
+      collection: BULK_EMBEDDINGS_RUNS_SLUG,
+      id: run.id,
+    })
+
+    expect(updatedRun.status).toBe('succeeded')
+    expect(updatedRun.failed).toBe(0)
+    expect(updatedRun.failedChunkData).toBeNull()
+
+    // onError should NOT be called when everything succeeds
+    expect(onErrorCalled).toBe(false)
+  })
+})
diff --git a/dev/specs/utils.ts b/dev/specs/utils.ts
index b44643c..474f4a1 100644
--- a/dev/specs/utils.ts
+++ b/dev/specs/utils.ts
@@ -76,7 +76,8 @@ export const BULK_QUEUE_NAMES = {
 
 type MockOptions = {
   statusSequence: BulkEmbeddingRunStatus[]
-  partialFailure?: { failIds: string[] }
+  /** Static list of IDs to fail, OR a function to decide at runtime */
+  partialFailure?: { failIds: string[] } | { shouldFail: (id: string) => boolean }
   /** Optional: flush after this many chunks (for testing multi-batch scenarios) */
   flushAfterChunks?: number
   /** Optional: callback to track onError calls for testing */
@@ -141,7 +142,12 @@ export function createMockBulkEmbeddings(
           const vectors = await embeddings(inputs.map((i) => i.text))
           for (let idx = 0; idx < inputs.length; idx++) {
             const input = inputs[idx]
-            const shouldFail = partialFailure?.failIds?.includes(input.id)
+            // Support both static array and function-based failure check
+            const shouldFail = partialFailure
+              ? 'shouldFail' in partialFailure
+                ? partialFailure.shouldFail(input.id)
+                : partialFailure.failIds?.includes(input.id)
+              : false
             const output = shouldFail
               ? { id: input.id, error: 'fail' }
               : { id: input.id, embedding: vectors[idx] }
@@ -156,7 +162,7 @@ export function createMockBulkEmbeddings(
       return { status }
     },
 
-    onError: async ({ providerBatchIds, error }) => {
+    onError: async ({ providerBatchIds, error, failedChunkData, failedChunkCount }) => {
       // Clean up state
       for (const batchId of providerBatchIds) {
         batchInputs.delete(batchId)
@@ -167,7 +173,7 @@ export function createMockBulkEmbeddings(
 
       // Call the test callback if provided
       if (onErrorCallback) {
-        onErrorCallback({ providerBatchIds, error })
+        onErrorCallback({ providerBatchIds, error, failedChunkData, failedChunkCount })
       }
     },
   }
diff --git a/src/collections/bulkEmbeddingInputMetadata.ts b/src/collections/bulkEmbeddingInputMetadata.ts
index 29472c1..806c8df 100644
--- a/src/collections/bulkEmbeddingInputMetadata.ts
+++ b/src/collections/bulkEmbeddingInputMetadata.ts
@@ -1,5 +1,6 @@
 import type { CollectionConfig } from 'payload'
 import { BULK_EMBEDDINGS_RUNS_SLUG } from './bulkEmbeddingsRuns.js'
+import { BULK_EMBEDDINGS_BATCHES_SLUG } from './bulkEmbeddingsBatches.js'
 
 export const BULK_EMBEDDINGS_INPUT_METADATA_SLUG = 'vector-bulk-embedding-input-metadata'
 
@@ -8,7 +9,7 @@ export const createBulkEmbeddingInputMetadataCollection = (): CollectionConfig =
   admin: {
     useAsTitle: 'inputId',
     description: 'Stores per-input metadata for bulk embedding runs.',
-    defaultColumns: ['run', 'inputId', 'sourceCollection', 'docId', 'chunkIndex'],
+    defaultColumns: ['run', 'batch', 'inputId', 'sourceCollection', 'docId', 'chunkIndex'],
   },
   access: {
     // Anyone can read; only internal (local API) can mutate.
@@ -25,6 +26,13 @@ export const createBulkEmbeddingInputMetadataCollection = (): CollectionConfig =
       required: true,
       admin: { description: 'Bulk run this input belongs to' },
     },
+    {
+      name: 'batch',
+      type: 'relationship',
+      relationTo: BULK_EMBEDDINGS_BATCHES_SLUG,
+      required: true,
+      admin: { description: 'Batch this input belongs to' },
+    },
     {
       name: 'inputId',
       type: 'text',
@@ -71,6 +79,9 @@ export const createBulkEmbeddingInputMetadataCollection = (): CollectionConfig =
     {
       fields: ['run'],
     },
+    {
+      fields: ['batch'],
+    },
     {
       fields: ['sourceCollection', 'docId'],
     },
diff --git a/src/tasks/bulkEmbedAll.ts b/src/tasks/bulkEmbedAll.ts
index fd9af6d..839eb40 100644
--- a/src/tasks/bulkEmbedAll.ts
+++ b/src/tasks/bulkEmbedAll.ts
@@ -624,13 +624,29 @@ async function streamAndBatchMissingEmbeddings(args: {
         // Convert runId to number for postgres relationships
         const runIdNum = parseInt(runId, 10)
 
-        // Store metadata for submitted chunks
+        // Create batch record first so we have the batch ID for metadata
+        const batchRecord = await payload.create({
+          collection: BULK_EMBEDDINGS_BATCHES_SLUG,
+          data: {
+            run: runIdNum,
+            batchIndex,
+            providerBatchId: submission.providerBatchId,
+            status: 'queued',
+            inputCount: submittedChunks.length,
+            submittedAt: new Date().toISOString(),
+          },
+        })
+
+        const batchId = (batchRecord as any).id
+
+        // Store metadata for submitted chunks with batch reference
         await Promise.all(
           submittedChunks.map((c) =>
             payload.create({
               collection: BULK_EMBEDDINGS_INPUT_METADATA_SLUG,
               data: {
                 run: runIdNum,
+                batch: batchId,
                 inputId: c.id,
                 text: c.text,
                 sourceCollection: c.metadata.sourceCollection,
@@ -643,19 +659,6 @@ async function streamAndBatchMissingEmbeddings(args: {
           ),
         )
 
-        // Create batch record
-        await payload.create({
-          collection: BULK_EMBEDDINGS_BATCHES_SLUG,
-          data: {
-            run: runIdNum,
-            batchIndex,
-            providerBatchId: submission.providerBatchId,
-            status: 'queued',
-            inputCount: submittedChunks.length,
-            submittedAt: new Date().toISOString(),
-          },
-        })
-
         totalInputs += submittedChunks.length
         batchIndex++
       }

From 3a02647f8721cfe5e788a21d2ac762dc19c7eb4e Mon Sep 17 00:00:00 2001
From: techiejd <62455039+techiejd@users.noreply.github.com>
Date: Mon, 12 Jan 2026 17:06:44 +0700
Subject: [PATCH 30/49] fixes tests

---
 dev/specs/bulkEmbed/failedBatch.spec.ts | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/dev/specs/bulkEmbed/failedBatch.spec.ts b/dev/specs/bulkEmbed/failedBatch.spec.ts
index 2d56359..e6010d8 100644
--- a/dev/specs/bulkEmbed/failedBatch.spec.ts
+++ b/dev/specs/bulkEmbed/failedBatch.spec.ts
@@ -82,8 +82,8 @@ describe('Bulk embed - failed batch', () => {
     expect(embeds.totalDocs).toBe(0)
   })
 
-  test('metadata table is cleaned after failed run (no partial writes)', async () => {
-    await payload.create({ collection: 'posts', data: { title: 'FailCleanup' } as any })
+  test('metadata table is kept after failed run (to allow retries)', async () => {
+    const post = await payload.create({ collection: 'posts', data: { title: 'FailCleanup' } as any })
 
     const run = await payload.create({
       collection: BULK_EMBEDDINGS_RUNS_SLUG,
@@ -101,11 +101,20 @@ describe('Bulk embed - failed batch', () => {
 
     await waitForBulkJobs(payload)
 
+    // Metadata should be kept for failed batches to allow retries
+    const runIdNum = typeof run.id === 'number' ? run.id : parseInt(String(run.id), 10)
     const metadata = await payload.find({
       collection: BULK_EMBEDDINGS_INPUT_METADATA_SLUG,
-      where: { run: { exists: true } },
+      where: { run: { equals: runIdNum } },
     })
-    expect(metadata.totalDocs).toBe(0)
+    expect(metadata.totalDocs).toBeGreaterThan(0)
+
+    // Verify no partial embeddings were written (no partial writes)
+    const embeds = await payload.find({
+      collection: 'default',
+      where: { docId: { equals: String(post.id) } },
+    })
+    expect(embeds.totalDocs).toBe(0)
   })
 
   test('cannot retry batch while run is still running', async () => {

From c2d745bcea388331216bcb9bad0f81c5a6cd0db3 Mon Sep 17 00:00:00 2001
From: techiejd <62455039+techiejd@users.noreply.github.com>
Date: Mon, 12 Jan 2026 19:16:19 +0700
Subject: [PATCH 31/49] Adds better retry stragey

---
 dev/specs/bulkEmbed/failedBatch.spec.ts       | 120 ++++++++++-
 .../RetryFailedBatchButton/client.tsx         | 196 ++++++++++++------
 .../RetryFailedBatchButton/index.tsx          |  57 ++---
 src/collections/bulkEmbeddingsBatches.ts      |  14 +-
 src/endpoints/retryFailedBatch.ts             | 129 +++++++++++-
 src/tasks/bulkEmbedAll.ts                     |   2 +-
 src/types.ts                                  |  10 +-
 7 files changed, 402 insertions(+), 126 deletions(-)

diff --git a/dev/specs/bulkEmbed/failedBatch.spec.ts b/dev/specs/bulkEmbed/failedBatch.spec.ts
index e6010d8..20f580e 100644
--- a/dev/specs/bulkEmbed/failedBatch.spec.ts
+++ b/dev/specs/bulkEmbed/failedBatch.spec.ts
@@ -83,7 +83,10 @@ describe('Bulk embed - failed batch', () => {
   })
 
   test('metadata table is kept after failed run (to allow retries)', async () => {
-    const post = await payload.create({ collection: 'posts', data: { title: 'FailCleanup' } as any })
+    const post = await payload.create({
+      collection: 'posts',
+      data: { title: 'FailCleanup' } as any,
+    })
 
     const run = await payload.create({
       collection: BULK_EMBEDDINGS_RUNS_SLUG,
@@ -160,4 +163,119 @@ describe('Bulk embed - failed batch', () => {
       },
     })
   })
+
+  test('retrying a failed batch creates a new batch and marks old batch as retried', async () => {
+    const post = await payload.create({ collection: 'posts', data: { title: 'RetryTest' } as any })
+
+    const run = await payload.create({
+      collection: BULK_EMBEDDINGS_RUNS_SLUG,
+      data: { pool: 'default', embeddingVersion: testEmbeddingVersion, status: 'queued' },
+    })
+
+    await payload.jobs.queue<'payloadcms-vectorize:prepare-bulk-embedding'>({
+      task: 'payloadcms-vectorize:prepare-bulk-embedding',
+      input: { runId: String(run.id) },
+      req: { payload } as any,
+      ...(BULK_QUEUE_NAMES.prepareBulkEmbedQueueName
+        ? { queue: BULK_QUEUE_NAMES.prepareBulkEmbedQueueName }
+        : {}),
+    })
+
+    await waitForBulkJobs(payload)
+
+    // Find the failed batch
+    const batchesResult = await payload.find({
+      collection: BULK_EMBEDDINGS_BATCHES_SLUG,
+      where: { run: { equals: run.id } },
+    })
+    const failedBatch = (batchesResult as any).docs[0]
+    expect(failedBatch.status).toBe('failed')
+
+    // Retry the batch
+    const retryResult = await payload.retryFailedBatch({ batchId: String(failedBatch.id) })
+
+    expect('error' in retryResult).toBe(false)
+    if (!('error' in retryResult)) {
+      expect(retryResult.newBatchId).toBeDefined()
+      expect(retryResult.status).toBe('queued')
+      expect(retryResult.message).toContain('resubmitted')
+
+      // Check that the old batch is marked as retried
+      const oldBatch = await payload.findByID({
+        collection: BULK_EMBEDDINGS_BATCHES_SLUG,
+        id: String(failedBatch.id),
+      })
+      expect((oldBatch as any).status).toBe('retried')
+      expect((oldBatch as any).retriedBatch).toBeDefined()
+
+      // Check that the new batch exists and is queued
+      const newBatch = await payload.findByID({
+        collection: BULK_EMBEDDINGS_BATCHES_SLUG,
+        id: retryResult.newBatchId!,
+      })
+      expect((newBatch as any).status).toBe('queued')
+      expect((newBatch as any).providerBatchId).toBeDefined()
+      expect((newBatch as any).providerBatchId).not.toBe(failedBatch.providerBatchId)
+
+      // Check that metadata points to the new batch
+      const runIdNum = typeof run.id === 'number' ? run.id : parseInt(String(run.id), 10)
+      const metadata = await payload.find({
+        collection: BULK_EMBEDDINGS_INPUT_METADATA_SLUG,
+        where: { run: { equals: runIdNum } },
+      })
+      expect(metadata.totalDocs).toBeGreaterThan(0)
+      // All metadata should point to the new batch
+      for (const meta of (metadata as any).docs) {
+        const metaBatchId =
+          typeof meta.batch === 'object' ? meta.batch.id : parseInt(String(meta.batch), 10)
+        expect(metaBatchId).toBe(parseInt(retryResult.newBatchId!, 10))
+      }
+    }
+  })
+
+  test('retrying a retried batch returns the existing retry batch', async () => {
+    const post = await payload.create({
+      collection: 'posts',
+      data: { title: 'RetryRetryTest' } as any,
+    })
+
+    const run = await payload.create({
+      collection: BULK_EMBEDDINGS_RUNS_SLUG,
+      data: { pool: 'default', embeddingVersion: testEmbeddingVersion, status: 'queued' },
+    })
+
+    await payload.jobs.queue<'payloadcms-vectorize:prepare-bulk-embedding'>({
+      task: 'payloadcms-vectorize:prepare-bulk-embedding',
+      input: { runId: String(run.id) },
+      req: { payload } as any,
+      ...(BULK_QUEUE_NAMES.prepareBulkEmbedQueueName
+        ? { queue: BULK_QUEUE_NAMES.prepareBulkEmbedQueueName }
+        : {}),
+    })
+
+    await waitForBulkJobs(payload)
+
+    // Find the failed batch
+    const batchesResult = await payload.find({
+      collection: BULK_EMBEDDINGS_BATCHES_SLUG,
+      where: { run: { equals: run.id } },
+    })
+    const failedBatch = (batchesResult as any).docs[0]
+
+    // Retry the batch first time
+    const firstRetryResult = await payload.retryFailedBatch({ batchId: String(failedBatch.id) })
+    expect('error' in firstRetryResult).toBe(false)
+    if ('error' in firstRetryResult) return
+
+    const firstRetryBatchId = firstRetryResult.newBatchId!
+
+    // Retry the retried batch - should return the existing retry batch
+    const secondRetryResult = await payload.retryFailedBatch({ batchId: String(failedBatch.id) })
+
+    expect('error' in secondRetryResult).toBe(false)
+    if (!('error' in secondRetryResult)) {
+      expect(secondRetryResult.newBatchId).toBe(firstRetryBatchId)
+      expect(secondRetryResult.message).toContain('already retried')
+    }
+  })
 })
diff --git a/src/admin/components/RetryFailedBatchButton/client.tsx b/src/admin/components/RetryFailedBatchButton/client.tsx
index 40a4374..c1aa874 100644
--- a/src/admin/components/RetryFailedBatchButton/client.tsx
+++ b/src/admin/components/RetryFailedBatchButton/client.tsx
@@ -1,20 +1,25 @@
 'use client'
 
 import React, { useState } from 'react'
+import { BULK_EMBEDDINGS_BATCHES_SLUG } from '../../../collections/bulkEmbeddingsBatches.js'
 
 type RetryFailedBatchButtonClientProps = {
   batchId: string
   status: string
+  retriedBatchId?: string | null
 }
 
 export const RetryFailedBatchButtonClient: React.FC<RetryFailedBatchButtonClientProps> = ({
   batchId,
   status,
+  retriedBatchId,
 }) => {
+  console.log('RetryFailedBatchButtonClient', batchId, status, retriedBatchId)
   const [isSubmitting, setIsSubmitting] = useState(false)
   const [message, setMessage] = useState<{ text: string; error?: boolean } | null>(null)
 
-  const isDisabled = status !== 'failed'
+  const isDisabled = status !== 'failed' && status !== 'retried'
+  const isRetried = status === 'retried' && retriedBatchId
 
   const handleClick = async () => {
     if (isDisabled) return
@@ -38,12 +43,24 @@ export const RetryFailedBatchButtonClient: React.FC<RetryFailedBatchButtonClient
         return
       }
 
-      setMessage({ text: 'Batch re-queued successfully', error: false })
-
-      // Reload the page after a short delay to show the updated status
-      setTimeout(() => {
-        window.location.reload()
-      }, 1500)
+      // If a new batch was created, show that in the message
+      const newBatchId = data?.newBatchId
+      if (newBatchId) {
+        setMessage({
+          text: `Batch resubmitted successfully. New batch ID: ${newBatchId}`,
+          error: false,
+        })
+        // Redirect to the new batch after a delay
+        setTimeout(() => {
+          window.location.href = `/admin/collections/${BULK_EMBEDDINGS_BATCHES_SLUG}/${newBatchId}`
+        }, 2000)
+      } else {
+        setMessage({ text: 'Batch resubmitted successfully', error: false })
+        // Reload the page after a short delay to show the updated status
+        setTimeout(() => {
+          window.location.reload()
+        }, 1500)
+      }
     } catch (error: any) {
       setMessage({ text: error?.message || 'Failed to retry batch', error: true })
     } finally {
@@ -68,10 +85,14 @@ export const RetryFailedBatchButtonClient: React.FC<RetryFailedBatchButtonClient
               fontSize: '0.875rem',
               fontWeight: 600,
               margin: '0 0 0.25rem 0',
-              color: isDisabled ? '#6c757d' : '#c92a2a',
+              color: isDisabled && !isRetried ? '#6c757d' : '#c92a2a',
             }}
           >
-            {isDisabled ? 'Retry Not Available' : 'Retry Failed Batch'}
+            {isRetried
+              ? 'Batch Retried'
+              : isDisabled
+                ? 'Retry Not Available'
+                : 'Retry Failed Batch'}
           </h4>
           <p
             style={{
@@ -80,65 +101,81 @@ export const RetryFailedBatchButtonClient: React.FC<RetryFailedBatchButtonClient
               margin: 0,
             }}
           >
-            {isDisabled
-              ? `This batch is in "${status}" status. Retry is only available for failed batches.`
-              : 'Re-queue this failed batch for processing. The batch will be polled again and embeddings will be written for successful chunks.'}
+            {isRetried ? (
+              <>
+                This batch was retried.{' '}
+                {retriedBatchId && (
+                  <a
+                    href={`/admin/collections/${BULK_EMBEDDINGS_BATCHES_SLUG}/${retriedBatchId}`}
+                    style={{ color: '#2563eb', textDecoration: 'underline' }}
+                  >
+                    View retry batch
+                  </a>
+                )}
+              </>
+            ) : isDisabled ? (
+              `This batch is in "${status}" status. Retry is only available for failed or retried batches.`
+            ) : (
+              'Resubmit this failed batch to the provider. The batch will be resubmitted and processed from the beginning.'
+            )}
           </p>
         </div>
 
-        <button
-          type="button"
-          className={`btn ${isDisabled ? 'btn--style-secondary' : 'btn--style-primary'}`}
-          onClick={handleClick}
-          disabled={isDisabled || isSubmitting}
-          data-testid="retry-failed-batch-button"
-          style={{
-            minWidth: '120px',
-            display: 'inline-flex',
-            alignItems: 'center',
-            justifyContent: 'center',
-            gap: '0.5rem',
-            opacity: isDisabled ? 0.5 : 1,
-            cursor: isDisabled ? 'not-allowed' : 'pointer',
-          }}
-        >
-          {isSubmitting ? (
-            <>
-              <span
-                style={{
-                  display: 'inline-block',
-                  width: '12px',
-                  height: '12px',
-                  border: '2px solid currentColor',
-                  borderTopColor: 'transparent',
-                  borderRadius: '50%',
-                  animation: 'spin 0.6s linear infinite',
-                }}
-              />
-              Retrying...
-            </>
-          ) : (
-            <>
-              <svg
-                width="16"
-                height="16"
-                viewBox="0 0 16 16"
-                fill="none"
-                xmlns="http://www.w3.org/2000/svg"
-                style={{ flexShrink: 0 }}
-              >
-                <path
-                  d="M2 8C2 4.68629 4.68629 2 8 2C10.0503 2 11.8711 3.0016 13 4.54329M14 8C14 11.3137 11.3137 14 8 14C5.94975 14 4.12893 12.9984 3 11.4567M3 14V11.4567M3 11.4567H5.5M13 2V4.54329M13 4.54329H10.5"
-                  stroke="currentColor"
-                  strokeWidth="1.5"
-                  strokeLinecap="round"
-                  strokeLinejoin="round"
+        {!isRetried && (
+          <button
+            type="button"
+            className={`btn ${isDisabled ? 'btn--style-secondary' : 'btn--style-primary'}`}
+            onClick={handleClick}
+            disabled={isDisabled || isSubmitting}
+            data-testid="retry-failed-batch-button"
+            style={{
+              minWidth: '120px',
+              display: 'inline-flex',
+              alignItems: 'center',
+              justifyContent: 'center',
+              gap: '0.5rem',
+              opacity: isDisabled ? 0.5 : 1,
+              cursor: isDisabled ? 'not-allowed' : 'pointer',
+            }}
+          >
+            {isSubmitting ? (
+              <>
+                <span
+                  style={{
+                    display: 'inline-block',
+                    width: '12px',
+                    height: '12px',
+                    border: '2px solid currentColor',
+                    borderTopColor: 'transparent',
+                    borderRadius: '50%',
+                    animation: 'spin 0.6s linear infinite',
+                  }}
                 />
-              </svg>
-              Retry
-            </>
-          )}
-        </button>
+                Retrying...
+              </>
+            ) : (
+              <>
+                <svg
+                  width="16"
+                  height="16"
+                  viewBox="0 0 16 16"
+                  fill="none"
+                  xmlns="http://www.w3.org/2000/svg"
+                  style={{ flexShrink: 0 }}
+                >
+                  <path
+                    d="M2 8C2 4.68629 4.68629 2 8 2C10.0503 2 11.8711 3.0016 13 4.54329M14 8C14 11.3137 11.3137 14 8 14C5.94975 14 4.12893 12.9984 3 11.4567M3 14V11.4567M3 11.4567H5.5M13 2V4.54329M13 4.54329H10.5"
+                    stroke="currentColor"
+                    strokeWidth="1.5"
+                    strokeLinecap="round"
+                    strokeLinejoin="round"
+                  />
+                </svg>
+                Retry
+              </>
+            )}
+          </button>
+        )}
       </div>
 
       {message && (
@@ -157,14 +194,37 @@ export const RetryFailedBatchButtonClient: React.FC<RetryFailedBatchButtonClient
           }}
         >
           {message.error ? (
-            <svg width="16" height="16" viewBox="0 0 16 16" fill="none" xmlns="http://www.w3.org/2000/svg">
+            <svg
+              width="16"
+              height="16"
+              viewBox="0 0 16 16"
+              fill="none"
+              xmlns="http://www.w3.org/2000/svg"
+            >
               <circle cx="8" cy="8" r="7" stroke="currentColor" strokeWidth="1.5" />
-              <path d="M8 5V8M8 11H8.01" stroke="currentColor" strokeWidth="1.5" strokeLinecap="round" />
+              <path
+                d="M8 5V8M8 11H8.01"
+                stroke="currentColor"
+                strokeWidth="1.5"
+                strokeLinecap="round"
+              />
             </svg>
           ) : (
-            <svg width="16" height="16" viewBox="0 0 16 16" fill="none" xmlns="http://www.w3.org/2000/svg">
+            <svg
+              width="16"
+              height="16"
+              viewBox="0 0 16 16"
+              fill="none"
+              xmlns="http://www.w3.org/2000/svg"
+            >
               <circle cx="8" cy="8" r="7" stroke="currentColor" strokeWidth="1.5" />
-              <path d="M6 8L7.5 9.5L10 7" stroke="currentColor" strokeWidth="1.5" strokeLinecap="round" strokeLinejoin="round" />
+              <path
+                d="M6 8L7.5 9.5L10 7"
+                stroke="currentColor"
+                strokeWidth="1.5"
+                strokeLinecap="round"
+                strokeLinejoin="round"
+              />
             </svg>
           )}
           <span>{message.text}</span>
diff --git a/src/admin/components/RetryFailedBatchButton/index.tsx b/src/admin/components/RetryFailedBatchButton/index.tsx
index b06b00d..7f47387 100644
--- a/src/admin/components/RetryFailedBatchButton/index.tsx
+++ b/src/admin/components/RetryFailedBatchButton/index.tsx
@@ -1,5 +1,6 @@
 import React from 'react'
 import { RetryFailedBatchButtonClient } from './client.js'
+import { BULK_EMBEDDINGS_BATCHES_SLUG } from '../../../collections/bulkEmbeddingsBatches.js'
 
 type RetryFailedBatchButtonProps = {
   batchId: string
@@ -7,50 +8,22 @@ type RetryFailedBatchButtonProps = {
 }
 
 export const RetryFailedBatchButton: React.FC<
-  RetryFailedBatchButtonProps & { payload?: any; params?: any; data?: any }
-> = (props) => {
-  // Handle both direct props and serverProps functions
-  let batchId: string = ''
-  let status: string = ''
+  RetryFailedBatchButtonProps & { payload?: any; id?: string }
+> = async (props) => {
+  const batch = await props.payload?.findByID({
+    collection: BULK_EMBEDDINGS_BATCHES_SLUG,
+    id: props.id,
+  })
 
-  if (typeof props.batchId === 'function') {
-    try {
-      batchId = String(
-        (props.batchId as any)({ payload: props.payload, params: props.params, data: props.data }) ||
-          '',
-      )
-    } catch (error) {
-      console.error('[RetryFailedBatchButton] Error calling batchId:', error)
-      batchId = ''
-    }
-  } else if (props.data?.id) {
-    batchId = String(props.data.id)
-  } else {
-    batchId = String(props.batchId || '')
-  }
+  console.log('RetryFailedBatchButtonBatch', batch)
 
-  if (typeof props.status === 'function') {
-    try {
-      status = String(
-        (props.status as any)({ payload: props.payload, params: props.params, data: props.data }) ||
-          '',
-      )
-    } catch (error) {
-      console.error('[RetryFailedBatchButton] Error calling status:', error)
-      status = ''
-    }
-  } else if (props.data?.status) {
-    status = String(props.data.status)
-  } else {
-    status = String(props.status || '')
-  }
-
-  // Only render on the edit view (when we have a batchId)
-  if (!batchId) {
-    return null
-  }
-
-  return <RetryFailedBatchButtonClient batchId={batchId} status={status} />
+  return (
+    <RetryFailedBatchButtonClient
+      batchId={props.id!}
+      status={batch.status}
+      retriedBatchId={batch.retriedBatchId}
+    />
+  )
 }
 
 export default RetryFailedBatchButton
diff --git a/src/collections/bulkEmbeddingsBatches.ts b/src/collections/bulkEmbeddingsBatches.ts
index 219e1ba..488e013 100644
--- a/src/collections/bulkEmbeddingsBatches.ts
+++ b/src/collections/bulkEmbeddingsBatches.ts
@@ -9,6 +9,7 @@ const statusOptions: BulkEmbeddingRunStatus[] = [
   'succeeded',
   'failed',
   'canceled',
+  'retried',
 ]
 
 /**
@@ -27,10 +28,6 @@ export const createBulkEmbeddingsBatchesCollection = (): CollectionConfig => ({
         beforeDocumentControls: [
           {
             path: 'payloadcms-vectorize/client#RetryFailedBatchButton',
-            serverProps: {
-              batchId: ({ data }: { data: any }) => data?.id,
-              status: ({ data }: { data: any }) => data?.status,
-            },
           },
         ],
       },
@@ -118,6 +115,15 @@ export const createBulkEmbeddingsBatchesCollection = (): CollectionConfig => ({
         description: 'Error message if the batch failed',
       },
     },
+    {
+      name: 'retriedBatch',
+      type: 'relationship',
+      relationTo: BULK_EMBEDDINGS_BATCHES_SLUG,
+      admin: {
+        description: 'The new batch created when this batch was retried',
+      },
+      hasMany: false,
+    },
   ],
   timestamps: true,
   indexes: [
diff --git a/src/endpoints/retryFailedBatch.ts b/src/endpoints/retryFailedBatch.ts
index d66cb61..8d86e50 100644
--- a/src/endpoints/retryFailedBatch.ts
+++ b/src/endpoints/retryFailedBatch.ts
@@ -1,10 +1,12 @@
 import type { Payload, PayloadHandler } from 'payload'
 import { BULK_EMBEDDINGS_BATCHES_SLUG } from '../collections/bulkEmbeddingsBatches.js'
 import { BULK_EMBEDDINGS_RUNS_SLUG } from '../collections/bulkEmbeddingsRuns.js'
+import { BULK_EMBEDDINGS_INPUT_METADATA_SLUG } from '../collections/bulkEmbeddingInputMetadata.js'
 import type {
   KnowledgePoolDynamicConfig,
   KnowledgePoolName,
   RetryFailedBatchResult,
+  BulkEmbeddingInput,
 } from '../types.js'
 
 /**
@@ -34,10 +36,25 @@ export async function retryBatch<TPoolNames extends KnowledgePoolName = Knowledg
     return { error: `Batch "${batchId}" not found` }
   }
 
-  // Verify batch has failed status
-  if (batch.status !== 'failed') {
+  // Check if batch is already retried - if so, return the retried batch
+  if (batch.status === 'retried' && batch.retriedBatch) {
+    const retriedBatchId =
+      typeof batch.retriedBatch === 'object'
+        ? String(batch.retriedBatch.id)
+        : String(batch.retriedBatch)
     return {
-      error: `Batch "${batchId}" is not in failed status. Current status: ${batch.status}`,
+      batchId,
+      newBatchId: retriedBatchId,
+      runId: String(batch.run && typeof batch.run === 'object' ? batch.run.id : batch.run),
+      status: 'queued',
+      message: 'Batch was already retried. Returning the retry batch.',
+    }
+  }
+
+  // Verify batch has failed or retried status (retried batches can be retried again)
+  if (batch.status !== 'failed' && batch.status !== 'retried') {
+    return {
+      error: `Batch "${batchId}" is not in failed or retried status. Current status: ${batch.status}`,
     }
   }
 
@@ -74,16 +91,109 @@ export async function retryBatch<TPoolNames extends KnowledgePoolName = Knowledg
     }
   }
 
-  // Reset the batch status to queued
-  await payload.update({
+  const callbacks = poolConfig.embeddingConfig.bulkEmbeddingsFns
+  const batchIdNum = parseInt(batchId, 10)
+  const runIdNum = parseInt(String(runId), 10)
+
+  // Load all metadata for this batch to reconstruct chunks (with pagination)
+  const metadataDocs: any[] = []
+  let metadataPage = 1
+  const metadataLimit = 1000 // Process in pages to avoid memory issues
+
+  while (true) {
+    const metadataResult = await payload.find({
+      collection: BULK_EMBEDDINGS_INPUT_METADATA_SLUG,
+      where: { batch: { equals: batchIdNum } },
+      limit: metadataLimit,
+      page: metadataPage,
+    })
+
+    const pageDocs = (metadataResult as any)?.docs || []
+    metadataDocs.push(...pageDocs)
+
+    const totalPages = (metadataResult as any)?.totalPages ?? metadataPage
+    if (metadataPage >= totalPages || pageDocs.length === 0) break
+    metadataPage++
+  }
+
+  if (metadataDocs.length === 0) {
+    return {
+      error: `No metadata found for batch "${batchId}". Cannot retry without chunk data.`,
+    }
+  }
+
+  // Reconstruct chunks from metadata (only id and text for addChunk)
+  const chunks: BulkEmbeddingInput[] = metadataDocs.map((meta: any) => ({
+    id: meta.inputId,
+    text: meta.text,
+  }))
+
+  // Find the highest batchIndex for this run to determine the new batch index
+  const existingBatchesResult = await payload.find({
+    collection: BULK_EMBEDDINGS_BATCHES_SLUG,
+    where: { run: { equals: runIdNum } },
+    limit: 1000,
+    sort: '-batchIndex',
+  })
+  const existingBatches = (existingBatchesResult as any)?.docs || []
+  const maxBatchIndex = existingBatches.length > 0 ? (existingBatches[0].batchIndex as number) : -1
+  const newBatchIndex = maxBatchIndex + 1
+
+  // Resubmit chunks via addChunk to get a new providerBatchId
+  // Submit all chunks - addChunk will accumulate and return a BatchSubmission when ready
+  let submission: { providerBatchId: string } | null = null
+  for (let i = 0; i < chunks.length; i++) {
+    const chunk = chunks[i]
+    const isLastChunk = i === chunks.length - 1
+
+    const result = await callbacks.addChunk({
+      chunk,
+      isLastChunk,
+    })
+
+    if (result) {
+      submission = result
+      break // Batch was submitted
+    }
+  }
+
+  if (!submission) {
+    return {
+      error: 'Failed to resubmit batch - no providerBatchId was returned from addChunk',
+    }
+  }
+
+  // Create the new batch
+  const newBatch = await payload.create({
     collection: BULK_EMBEDDINGS_BATCHES_SLUG,
-    id: batchId,
     data: {
+      run: runIdNum,
+      batchIndex: newBatchIndex,
+      providerBatchId: submission.providerBatchId,
       status: 'queued',
-      error: null,
-      completedAt: null,
+      inputCount: chunks.length,
       succeededCount: 0,
       failedCount: 0,
+      submittedAt: new Date().toISOString(),
+    },
+  })
+
+  // Update metadata to point to the new batch
+  await payload.update({
+    collection: BULK_EMBEDDINGS_INPUT_METADATA_SLUG,
+    where: { batch: { equals: batchIdNum } },
+    data: {
+      batch: newBatch.id,
+    },
+  })
+
+  // Update the old batch to point to the new batch and set status to 'retried'
+  await payload.update({
+    collection: BULK_EMBEDDINGS_BATCHES_SLUG,
+    id: batchId,
+    data: {
+      status: 'retried',
+      retriedBatch: newBatch.id,
     },
   })
 
@@ -108,9 +218,10 @@ export async function retryBatch<TPoolNames extends KnowledgePoolName = Knowledg
 
   return {
     batchId,
+    newBatchId: String(newBatch.id),
     runId: String(runId),
     status: 'queued',
-    message: 'Failed batch has been re-queued for processing',
+    message: 'Failed batch has been resubmitted and re-queued for processing',
   }
 }
 
diff --git a/src/tasks/bulkEmbedAll.ts b/src/tasks/bulkEmbedAll.ts
index 839eb40..369ea93 100644
--- a/src/tasks/bulkEmbedAll.ts
+++ b/src/tasks/bulkEmbedAll.ts
@@ -46,7 +46,7 @@ type PollOrCompleteBulkEmbeddingTaskInputOutput = {
   output: PollOrCompleteBulkEmbeddingTaskOutput
 }
 
-const TERMINAL_STATUSES = new Set(['succeeded', 'failed', 'canceled'])
+const TERMINAL_STATUSES = new Set(['succeeded', 'failed', 'canceled', 'retried'])
 
 // Helper to load and validate run + config
 async function loadRunAndConfig({
diff --git a/src/types.ts b/src/types.ts
index 63aeb58..3a0a31d 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -24,6 +24,8 @@ export type RetryFailedBatchResult =
   | {
       /** ID of the batch being retried */
       batchId: string
+      /** ID of the new batch created from retry (if batch was already retried, returns existing retry batch) */
+      newBatchId?: string
       /** ID of the parent run */
       runId: string
       /** New status of the batch */
@@ -135,7 +137,13 @@ export type EmbeddingConfig = {
   /** If both realTimeIngestionFn and bulkEmbeddingsConfig are not provided, then embedding for this knowledge pool is essentially disabled */
 }
 
-export type BulkEmbeddingRunStatus = 'queued' | 'running' | 'succeeded' | 'failed' | 'canceled'
+export type BulkEmbeddingRunStatus =
+  | 'queued'
+  | 'running'
+  | 'succeeded'
+  | 'failed'
+  | 'canceled'
+  | 'retried'
 
 export type BulkEmbeddingInput = {
   /** Stable identifier for correlating outputs (is unique per chunk) */

From a8efd826ff7864acedc75991200f5e3cc147e044 Mon Sep 17 00:00:00 2001
From: techiejd <62455039+techiejd@users.noreply.github.com>
Date: Mon, 12 Jan 2026 19:35:58 +0700
Subject: [PATCH 32/49] Increases test time

---
 playwright.config.js | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/playwright.config.js b/playwright.config.js
index 9c895ac..f6e2af2 100644
--- a/playwright.config.js
+++ b/playwright.config.js
@@ -46,7 +46,7 @@ export default defineConfig({
     command:
       'cross-env DOTENV_CONFIG_PATH=dev/.env.test NODE_OPTIONS=--require=dotenv/config next dev dev --turbo',
     reuseExistingServer: true,
-    timeout: 180_000,
+    timeout: 300_000,
     url: 'http://localhost:3000/admin',
   },
 })

From 5fdd48aac11291a80f340d68f52e6cb34ba51ddd Mon Sep 17 00:00:00 2001
From: techiejd <62455039+techiejd@users.noreply.github.com>
Date: Mon, 12 Jan 2026 20:36:18 +0700
Subject: [PATCH 33/49] WIP

---
 dev/specs/e2e.spec.ts | 89 ++++++++++++++++++++++++++++++++++++-------
 1 file changed, 75 insertions(+), 14 deletions(-)

diff --git a/dev/specs/e2e.spec.ts b/dev/specs/e2e.spec.ts
index 1706ea7..532edb3 100644
--- a/dev/specs/e2e.spec.ts
+++ b/dev/specs/e2e.spec.ts
@@ -249,7 +249,7 @@ test.describe('Vector embedding e2e tests', () => {
     })
     expect(succeededRetryResponse.status()).toBe(400)
     const succeededRetryJson = await succeededRetryResponse.json()
-    expect(succeededRetryJson.error).toContain('not in failed status')
+    expect(succeededRetryJson.error).toContain('not in failed or retried status')
     console.log('[test] Retry endpoint correctly rejected succeeded batch')
 
     // Navigate to the succeeded batch page and verify retry button is disabled
@@ -265,7 +265,8 @@ test.describe('Vector embedding e2e tests', () => {
 
     // Verify the button is disabled (opacity check)
     const buttonStyle = await retryButton.getAttribute('style')
-    expect(buttonStyle).toContain('opacity: 0.5')
+    console.log('[test] Button style:', buttonStyle)
+    expect(buttonStyle).toContain('opacity:0.5')
 
     // Verify the "Retry Not Available" message is shown
     const notAvailableMessage = page.locator('text=/Retry Not Available/i')
@@ -361,14 +362,64 @@ test.describe('Vector embedding e2e tests', () => {
     await waitForBulkJobs(payload, 30000)
     console.log('[test] Bulk jobs completed')
 
-    // Find the failed batch that was created
-    const batches = await (payload as any).find({
-      collection: BULK_EMBEDDINGS_BATCHES_SLUG,
-      where: {
-        and: [{ run: { equals: runId } }, { status: { equals: 'failed' } }],
-      },
-    })
-    expect(batches.totalDocs).toBeGreaterThan(0)
+    // Wait for the batch to actually fail (poll-or-complete job needs to finish)
+    const runIdNum = parseInt(runId, 10)
+    let batches: any
+    let attempts = 0
+    const maxAttempts = 30 // Wait up to 30 seconds
+
+    while (attempts < maxAttempts) {
+      batches = await (payload as any).find({
+        collection: BULK_EMBEDDINGS_BATCHES_SLUG,
+        where: {
+          and: [{ run: { equals: runIdNum } }, { status: { equals: 'failed' } }],
+        },
+      })
+
+      if (batches.totalDocs > 0) {
+        break
+      }
+
+      // Check current batch status
+      const allBatches = await (payload as any).find({
+        collection: BULK_EMBEDDINGS_BATCHES_SLUG,
+        where: { run: { equals: runIdNum } },
+      })
+      if (allBatches.totalDocs > 0) {
+        const currentStatus = allBatches.docs[0].status
+        if (currentStatus === 'failed') {
+          batches = allBatches
+          break
+        }
+      }
+
+      // Wait a bit before retrying
+      await new Promise((resolve) => setTimeout(resolve, 1000))
+      attempts++
+    }
+
+    if (!batches || batches.totalDocs === 0) {
+      // Final check for debugging
+      const allBatchesFinal = await (payload as any).find({
+        collection: BULK_EMBEDDINGS_BATCHES_SLUG,
+        where: { run: { equals: runIdNum } },
+      })
+      const runFinal = await (payload as any).findByID({
+        collection: BULK_EMBEDDINGS_RUNS_SLUG,
+        id: runId,
+      })
+      console.log('[test] Failed to find failed batch after', attempts, 'attempts')
+      console.log('[test] Run status:', runFinal.status)
+      console.log('[test] Batches found:', allBatchesFinal.totalDocs)
+      if (allBatchesFinal.totalDocs > 0) {
+        console.log(
+          '[test] Batch statuses:',
+          allBatchesFinal.docs.map((b: any) => b.status),
+        )
+      }
+    }
+
+    expect(batches?.totalDocs).toBeGreaterThan(0)
     const batch = batches.docs[0]
     console.log('[test] Found failed batch:', batch.id)
 
@@ -378,17 +429,27 @@ test.describe('Vector embedding e2e tests', () => {
     })
     expect(retryResponse.status()).toBe(202)
     const retryJson = await retryResponse.json()
-    expect(retryJson.message).toBe('Failed batch has been re-queued for processing')
+    expect(retryJson.message).toBe('Failed batch has been resubmitted and re-queued for processing')
     expect(retryJson.batchId).toBe(String(batch.id))
+    expect(retryJson.newBatchId).toBeDefined()
     expect(retryJson.status).toBe('queued')
 
-    // Verify the batch status was updated
+    // Verify the old batch status was updated to 'retried'
     const updatedBatch = await (payload as any).findByID({
       collection: BULK_EMBEDDINGS_BATCHES_SLUG,
       id: String(batch.id),
     })
-    expect(updatedBatch.status).toBe('queued')
-    expect(updatedBatch.error).toBeNull()
+    expect(updatedBatch.status).toBe('retried')
+    expect(updatedBatch.retriedBatch).toBeDefined()
+
+    // Verify the new batch exists and is queued
+    const newBatch = await (payload as any).findByID({
+      collection: BULK_EMBEDDINGS_BATCHES_SLUG,
+      id: retryJson.newBatchId,
+    })
+    expect(newBatch.status).toBe('queued')
+    expect(newBatch.providerBatchId).toBeDefined()
+    expect(newBatch.providerBatchId).not.toBe(batch.providerBatchId)
 
     // Verify the run status was reset to running
     const updatedRun = await (payload as any).findByID({

From 5194ce94e6c426da9ec363bcfde854f1face7be8 Mon Sep 17 00:00:00 2001
From: techiejd <62455039+techiejd@users.noreply.github.com>
Date: Mon, 12 Jan 2026 23:59:03 +0700
Subject: [PATCH 34/49] WIP

---
 dev/app/(payload)/admin/importMap.js          |   2 +
 dev/specs/e2e.spec.ts                         |  99 +++++++---
 .../components/FailedBatchesList/client.tsx   | 182 ++++++++++++++++++
 .../components/FailedBatchesList/index.tsx    |  57 ++++++
 src/collections/bulkEmbeddingInputMetadata.ts |   1 +
 src/collections/bulkEmbeddingsBatches.ts      |   6 +-
 src/collections/bulkEmbeddingsRuns.ts         |  15 +-
 src/exports/client.ts                         |   1 +
 8 files changed, 335 insertions(+), 28 deletions(-)
 create mode 100644 src/admin/components/FailedBatchesList/client.tsx
 create mode 100644 src/admin/components/FailedBatchesList/index.tsx

diff --git a/dev/app/(payload)/admin/importMap.js b/dev/app/(payload)/admin/importMap.js
index a13c2ea..8fa9361 100644
--- a/dev/app/(payload)/admin/importMap.js
+++ b/dev/app/(payload)/admin/importMap.js
@@ -21,6 +21,7 @@ import { StrikethroughFeatureClient as StrikethroughFeatureClient_e70f5e05f09f93
 import { UnderlineFeatureClient as UnderlineFeatureClient_e70f5e05f09f93e00b997edb1ef0c864 } from '@payloadcms/richtext-lexical/client'
 import { BoldFeatureClient as BoldFeatureClient_e70f5e05f09f93e00b997edb1ef0c864 } from '@payloadcms/richtext-lexical/client'
 import { ItalicFeatureClient as ItalicFeatureClient_e70f5e05f09f93e00b997edb1ef0c864 } from '@payloadcms/richtext-lexical/client'
+import { FailedBatchesList as FailedBatchesList_69051d9d0217691c78245f4f33731b73 } from 'payloadcms-vectorize/client'
 import { RetryFailedBatchButton as RetryFailedBatchButton_69051d9d0217691c78245f4f33731b73 } from 'payloadcms-vectorize/client'
 import { EmbedAllButton as EmbedAllButton_69051d9d0217691c78245f4f33731b73 } from 'payloadcms-vectorize/client'
 import { CollectionCards as CollectionCards_ab83ff7e88da8d3530831f296ec4756a } from '@payloadcms/ui/rsc'
@@ -49,6 +50,7 @@ export const importMap = {
   "@payloadcms/richtext-lexical/client#UnderlineFeatureClient": UnderlineFeatureClient_e70f5e05f09f93e00b997edb1ef0c864,
   "@payloadcms/richtext-lexical/client#BoldFeatureClient": BoldFeatureClient_e70f5e05f09f93e00b997edb1ef0c864,
   "@payloadcms/richtext-lexical/client#ItalicFeatureClient": ItalicFeatureClient_e70f5e05f09f93e00b997edb1ef0c864,
+  "payloadcms-vectorize/client#FailedBatchesList": FailedBatchesList_69051d9d0217691c78245f4f33731b73,
   "payloadcms-vectorize/client#RetryFailedBatchButton": RetryFailedBatchButton_69051d9d0217691c78245f4f33731b73,
   "payloadcms-vectorize/client#EmbedAllButton": EmbedAllButton_69051d9d0217691c78245f4f33731b73,
   "@payloadcms/ui/rsc#CollectionCards": CollectionCards_ab83ff7e88da8d3530831f296ec4756a
diff --git a/dev/specs/e2e.spec.ts b/dev/specs/e2e.spec.ts
index 532edb3..e0be340 100644
--- a/dev/specs/e2e.spec.ts
+++ b/dev/specs/e2e.spec.ts
@@ -461,45 +461,100 @@ test.describe('Vector embedding e2e tests', () => {
     console.log('[test] Retry failed batch endpoint test completed successfully!')
   })
 
-  test('retry failed batch button works for failed batches', async ({ page }) => {
+  test('retry failed batch button works for failed batches', async ({ page, request }) => {
     console.log('[test] Starting retry button click test...')
     test.setTimeout(120000)
 
     // Login first
     await loginToAdmin(page)
 
-    // Create a bulk embedding run
-    const run = await (payload as any).create({
-      collection: BULK_EMBEDDINGS_RUNS_SLUG,
+    // Create a test post first (needed for bulk embedding to have something to embed)
+    const post = await payload.create({
+      collection: 'posts',
       data: {
-        pool: 'failingBulkDefault',
-        embeddingVersion: testEmbeddingVersion,
-        status: 'failed',
+        title: 'Failed batch UI test post',
       },
     })
-    console.log('[test] Created bulk run:', run.id)
+    console.log('[test] Created test post:', post.id)
 
-    // Create a failed batch
-    const failedBatch = await (payload as any).create({
-      collection: BULK_EMBEDDINGS_BATCHES_SLUG,
+    // Use the bulk embed endpoint to create a run for failingBulkDefault pool
+    const bulkEmbedResponse = await request.post('/api/vector-bulk-embed', {
       data: {
-        run: run.id,
-        batchIndex: 0,
-        providerBatchId: `mock-failed-ui-${Date.now()}`,
-        status: 'failed',
-        inputCount: 1,
-        error: 'Test error for UI test',
+        knowledgePool: 'failingBulkDefault',
       },
     })
-    console.log('[test] Created failed batch:', failedBatch.id)
+    expect(bulkEmbedResponse.ok()).toBe(true)
+    const bulkEmbedJson = await bulkEmbedResponse.json()
+    const runId = bulkEmbedJson.runId
+    console.log('[test] Created bulk run via endpoint:', runId)
 
-    // Navigate to the failed batch edit page
-    console.log('[test] Navigating to failed batch page...')
-    await page.goto(`/admin/collections/${BULK_EMBEDDINGS_BATCHES_SLUG}/${failedBatch.id}`, {
+    // Wait for the bulk jobs to process and fail (failingBulkDefault has a mock that fails)
+    await waitForBulkJobs(payload, 30000)
+    console.log('[test] Bulk jobs completed')
+
+    // Wait for the batch to actually fail (poll-or-complete job needs to finish)
+    const runIdNum = parseInt(runId, 10)
+    let batches: any
+    let attempts = 0
+    const maxAttempts = 30 // Wait up to 30 seconds
+
+    while (attempts < maxAttempts) {
+      batches = await (payload as any).find({
+        collection: BULK_EMBEDDINGS_BATCHES_SLUG,
+        where: {
+          and: [{ run: { equals: runIdNum } }, { status: { equals: 'failed' } }],
+        },
+      })
+
+      if (batches.totalDocs > 0) {
+        break
+      }
+
+      // Check current batch status
+      const allBatches = await (payload as any).find({
+        collection: BULK_EMBEDDINGS_BATCHES_SLUG,
+        where: { run: { equals: runIdNum } },
+      })
+      if (allBatches.totalDocs > 0) {
+        const currentStatus = allBatches.docs[0].status
+        if (currentStatus === 'failed') {
+          batches = allBatches
+          break
+        }
+      }
+
+      // Wait a bit before retrying
+      await new Promise((resolve) => setTimeout(resolve, 1000))
+      attempts++
+    }
+
+    expect(batches?.totalDocs).toBeGreaterThan(0)
+    const failedBatch = batches.docs[0]
+    console.log('[test] Found failed batch:', failedBatch.id)
+
+    // Navigate to the run edit page (where FailedBatchesList component should be visible)
+    console.log('[test] Navigating to run page...')
+    await page.goto(`/admin/collections/${BULK_EMBEDDINGS_RUNS_SLUG}/${runId}`, {
       waitUntil: 'networkidle',
     })
     await page.waitForLoadState('domcontentloaded')
 
+    // Wait for the FailedBatchesList component to appear
+    const failedBatchesList = page.locator('[data-testid^="failed-batch-link-"]').first()
+    await expect(failedBatchesList).toBeVisible({ timeout: 10000 })
+    console.log('[test] Failed batches list is visible')
+
+    // Click on the failed batch link to navigate to the batch page
+    console.log('[test] Clicking failed batch link...')
+    await failedBatchesList.click()
+
+    // Wait for navigation to batch page
+    await page.waitForURL(/\/admin\/collections\/vector-bulk-embeddings-batches\/\d+/, {
+      timeout: 10000,
+    })
+    await page.waitForLoadState('domcontentloaded')
+    console.log('[test] Navigated to batch page')
+
     // Look for the retry button
     const retryButton = page.locator('[data-testid="retry-failed-batch-button"]')
     await expect(retryButton).toBeVisible({ timeout: 15000 })
@@ -517,7 +572,7 @@ test.describe('Vector embedding e2e tests', () => {
     await retryButton.click()
 
     // Wait for success message
-    const successMessage = page.locator('text=/Batch re-queued successfully/i')
+    const successMessage = page.locator('text=/Batch resubmitted successfully/i')
     await expect(successMessage).toBeVisible({ timeout: 10000 })
 
     console.log('[test] Retry button click test completed!')
diff --git a/src/admin/components/FailedBatchesList/client.tsx b/src/admin/components/FailedBatchesList/client.tsx
new file mode 100644
index 0000000..5503a1b
--- /dev/null
+++ b/src/admin/components/FailedBatchesList/client.tsx
@@ -0,0 +1,182 @@
+'use client'
+
+import React from 'react'
+import { BULK_EMBEDDINGS_BATCHES_SLUG } from '../../../collections/bulkEmbeddingsBatches.js'
+
+type FailedBatch = {
+  id: string
+  batchIndex: number
+  providerBatchId: string
+  error?: string | null
+}
+
+type FailedBatchesListClientProps = {
+  runId: string
+  failedCount: number
+  batches: FailedBatch[]
+}
+
+export const FailedBatchesListClient: React.FC<FailedBatchesListClientProps> = ({
+  runId,
+  failedCount,
+  batches,
+}) => {
+  if (batches.length === 0) {
+    return null
+  }
+
+  return (
+    <div
+      style={{
+        marginBottom: '2rem',
+        padding: '1.5rem',
+        backgroundColor: '#fff3cd',
+        border: '1px solid #ffc107',
+        borderRadius: '4px',
+      }}
+    >
+      <div style={{ marginBottom: '1rem' }}>
+        <h3
+          style={{
+            margin: 0,
+            fontSize: '1.25rem',
+            fontWeight: 600,
+            color: '#856404',
+            display: 'flex',
+            alignItems: 'center',
+            gap: '0.5rem',
+          }}
+        >
+          <svg
+            width="20"
+            height="20"
+            viewBox="0 0 20 20"
+            fill="none"
+            xmlns="http://www.w3.org/2000/svg"
+            style={{ flexShrink: 0 }}
+          >
+            <path
+              d="M10 2C5.58 2 2 5.58 2 10C2 14.42 5.58 18 10 18C14.42 18 18 14.42 18 10C18 5.58 14.42 2 10 2ZM11 14H9V12H11V14ZM11 10H9V6H11V10Z"
+              fill="#856404"
+            />
+          </svg>
+          Failed Batches ({failedCount})
+        </h3>
+        <p style={{ margin: '0.5rem 0 0 0', color: '#856404', fontSize: '0.875rem' }}>
+          {batches.length === failedCount
+            ? 'All failed batches are listed below. Click to view details and retry.'
+            : `Showing ${batches.length} of ${failedCount} failed batches.`}
+        </p>
+      </div>
+
+      <div
+        style={{
+          display: 'flex',
+          flexDirection: 'column',
+          gap: '0.75rem',
+        }}
+      >
+        {batches.map((batch) => (
+          <div
+            key={batch.id}
+            style={{
+              padding: '1rem',
+              backgroundColor: '#fff',
+              border: '1px solid #dee2e6',
+              borderRadius: '4px',
+              display: 'flex',
+              justifyContent: 'space-between',
+              alignItems: 'center',
+            }}
+          >
+            <div style={{ flex: 1 }}>
+              <div
+                style={{
+                  display: 'flex',
+                  alignItems: 'center',
+                  gap: '0.5rem',
+                  marginBottom: '0.25rem',
+                }}
+              >
+                <a
+                  href={`/admin/collections/${BULK_EMBEDDINGS_BATCHES_SLUG}/${batch.id}`}
+                  style={{
+                    color: '#2563eb',
+                    textDecoration: 'none',
+                    fontWeight: 500,
+                    fontSize: '0.9375rem',
+                  }}
+                  data-testid={`failed-batch-link-${batch.id}`}
+                  onClick={(e) => {
+                    e.preventDefault()
+                    window.location.href = `/admin/collections/${BULK_EMBEDDINGS_BATCHES_SLUG}/${batch.id}`
+                  }}
+                >
+                  Batch #{batch.batchIndex}
+                </a>
+                <span style={{ color: '#6c757d', fontSize: '0.875rem' }}>
+                  ({batch.providerBatchId})
+                </span>
+              </div>
+              {batch.error && (
+                <p
+                  style={{
+                    margin: 0,
+                    color: '#dc3545',
+                    fontSize: '0.8125rem',
+                    fontFamily: 'monospace',
+                    whiteSpace: 'nowrap',
+                    overflow: 'hidden',
+                    textOverflow: 'ellipsis',
+                    maxWidth: '600px',
+                  }}
+                  title={batch.error}
+                >
+                  {batch.error}
+                </p>
+              )}
+            </div>
+            <a
+              href={`/admin/collections/${BULK_EMBEDDINGS_BATCHES_SLUG}/${batch.id}`}
+              style={{
+                padding: '0.5rem 1rem',
+                backgroundColor: '#2563eb',
+                color: '#fff',
+                textDecoration: 'none',
+                borderRadius: '4px',
+                fontSize: '0.875rem',
+                fontWeight: 500,
+                whiteSpace: 'nowrap',
+              }}
+              data-testid={`failed-batch-view-button-${batch.id}`}
+              onClick={(e) => {
+                e.preventDefault()
+                window.location.href = `/admin/collections/${BULK_EMBEDDINGS_BATCHES_SLUG}/${batch.id}`
+              }}
+            >
+              View
+            </a>
+          </div>
+        ))}
+      </div>
+
+      {batches.length < failedCount && (
+        <div style={{ marginTop: '1rem', textAlign: 'center' }}>
+          <a
+            href={`/admin/collections/${BULK_EMBEDDINGS_BATCHES_SLUG}?where[run][equals]=${runId}&where[status][equals]=failed`}
+            style={{
+              color: '#2563eb',
+              textDecoration: 'underline',
+              fontSize: '0.875rem',
+            }}
+            data-testid="view-all-failed-batches-link"
+          >
+            View all {failedCount} failed batches →
+          </a>
+        </div>
+      )}
+    </div>
+  )
+}
+
+export default FailedBatchesListClient
diff --git a/src/admin/components/FailedBatchesList/index.tsx b/src/admin/components/FailedBatchesList/index.tsx
new file mode 100644
index 0000000..a666529
--- /dev/null
+++ b/src/admin/components/FailedBatchesList/index.tsx
@@ -0,0 +1,57 @@
+import React from 'react'
+import { FailedBatchesListClient } from './client.js'
+import { BULK_EMBEDDINGS_RUNS_SLUG } from '../../../collections/bulkEmbeddingsRuns.js'
+import { BULK_EMBEDDINGS_BATCHES_SLUG } from '../../../collections/bulkEmbeddingsBatches.js'
+
+type FailedBatchesListProps = {
+  payload?: any
+  id?: string
+  data?: any // The document data passed by beforeDocumentControls
+}
+
+export const FailedBatchesList: React.FC<FailedBatchesListProps> = async (props) => {
+  // Always render something for debugging
+  console.log('[FailedBatchesList] Component called with props:', {
+    hasPayload: !!props.payload,
+    hasId: !!props.id,
+    allProps: Object.keys(props),
+  })
+
+  const run = await props.payload.findByID({
+    collection: BULK_EMBEDDINGS_RUNS_SLUG,
+    id: props.id,
+  })
+
+  console.log('[FailedBatchesList] Fetching failed batches for run:', run.id)
+
+  // Fetch failed batches for this run
+  const runIdNum = typeof run.id === 'number' ? run.id : parseInt(String(run.id), 10)
+  const failedBatches = await props.payload.find({
+    collection: BULK_EMBEDDINGS_BATCHES_SLUG,
+    where: {
+      and: [{ run: { equals: runIdNum } }, { status: { equals: 'failed' } }],
+    },
+    limit: 100, // Limit to first 100 failed batches
+    sort: 'batchIndex',
+  })
+
+  const batches = (failedBatches as any)?.docs || []
+  const runId = props.id || String(run.id)
+
+  console.log('[FailedBatchesList] Found batches:', batches.length, 'for run:', runId)
+
+  return (
+    <FailedBatchesListClient
+      runId={runId}
+      failedCount={run.failed}
+      batches={batches.map((b: any) => ({
+        id: String(b.id),
+        batchIndex: b.batchIndex,
+        providerBatchId: b.providerBatchId,
+        error: b.error,
+      }))}
+    />
+  )
+}
+
+export default FailedBatchesList
diff --git a/src/collections/bulkEmbeddingInputMetadata.ts b/src/collections/bulkEmbeddingInputMetadata.ts
index 806c8df..5ebce1b 100644
--- a/src/collections/bulkEmbeddingInputMetadata.ts
+++ b/src/collections/bulkEmbeddingInputMetadata.ts
@@ -7,6 +7,7 @@ export const BULK_EMBEDDINGS_INPUT_METADATA_SLUG = 'vector-bulk-embedding-input-
 export const createBulkEmbeddingInputMetadataCollection = (): CollectionConfig => ({
   slug: BULK_EMBEDDINGS_INPUT_METADATA_SLUG,
   admin: {
+    hidden: true,
     useAsTitle: 'inputId',
     description: 'Stores per-input metadata for bulk embedding runs.',
     defaultColumns: ['run', 'batch', 'inputId', 'sourceCollection', 'docId', 'chunkIndex'],
diff --git a/src/collections/bulkEmbeddingsBatches.ts b/src/collections/bulkEmbeddingsBatches.ts
index 488e013..e47c18a 100644
--- a/src/collections/bulkEmbeddingsBatches.ts
+++ b/src/collections/bulkEmbeddingsBatches.ts
@@ -36,9 +36,9 @@ export const createBulkEmbeddingsBatchesCollection = (): CollectionConfig => ({
   access: {
     // Anyone can read; only internal (local API) can mutate.
     read: () => true,
-    create: ({ req }) => req?.payloadAPI === 'local',
-    update: ({ req }) => req?.payloadAPI === 'local',
-    delete: ({ req }) => req?.payloadAPI === 'local',
+    create: ({ req }) => false,
+    update: ({ req }) => false,
+    delete: ({ req }) => false,
   },
   fields: [
     {
diff --git a/src/collections/bulkEmbeddingsRuns.ts b/src/collections/bulkEmbeddingsRuns.ts
index c6faf25..c9f5b36 100644
--- a/src/collections/bulkEmbeddingsRuns.ts
+++ b/src/collections/bulkEmbeddingsRuns.ts
@@ -18,13 +18,22 @@ export const createBulkEmbeddingsRunsCollection = (): CollectionConfig => ({
     description:
       'Bulk embedding run records. Created automatically when the Embed all action is triggered.',
     defaultColumns: ['pool', 'status', 'inputs', 'succeeded', 'failed', 'submittedAt'],
+    components: {
+      edit: {
+        beforeDocumentControls: [
+          {
+            path: 'payloadcms-vectorize/client#FailedBatchesList',
+          },
+        ],
+      },
+    },
   },
   access: {
     // Anyone can read; only internal (local API) can mutate.
     read: () => true,
-    create: ({ req }) => req?.payloadAPI === 'local',
-    update: ({ req }) => req?.payloadAPI === 'local',
-    delete: ({ req }) => req?.payloadAPI === 'local',
+    create: ({ req }) => false,
+    update: ({ req }) => false,
+    delete: ({ req }) => false,
   },
   fields: [
     {
diff --git a/src/exports/client.ts b/src/exports/client.ts
index c871ed9..0eb8619 100644
--- a/src/exports/client.ts
+++ b/src/exports/client.ts
@@ -1,2 +1,3 @@
 export { EmbedAllButton } from '../admin/components/EmbedAllButton/index.js'
 export { RetryFailedBatchButton } from '../admin/components/RetryFailedBatchButton/index.js'
+export { FailedBatchesList } from '../admin/components/FailedBatchesList/index.js'

From 171411a11a82bf2e8625905596946e375ad3e21d Mon Sep 17 00:00:00 2001
From: techiejd <62455039+techiejd@users.noreply.github.com>
Date: Tue, 13 Jan 2026 12:07:48 +0700
Subject: [PATCH 35/49] Fixes tests WIP

---
 dev/specs/e2e.spec.ts | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/dev/specs/e2e.spec.ts b/dev/specs/e2e.spec.ts
index e0be340..8d5669c 100644
--- a/dev/specs/e2e.spec.ts
+++ b/dev/specs/e2e.spec.ts
@@ -353,6 +353,7 @@ test.describe('Vector embedding e2e tests', () => {
         knowledgePool: 'failingBulkDefault',
       },
     })
+    console.log('[test] Bulk embed response:', await bulkEmbedResponse.json())
     expect(bulkEmbedResponse.ok()).toBe(true)
     const bulkEmbedJson = await bulkEmbedResponse.json()
     const runId = bulkEmbedJson.runId
@@ -477,12 +478,17 @@ test.describe('Vector embedding e2e tests', () => {
     })
     console.log('[test] Created test post:', post.id)
 
+    // Wait for any existing bulk embedding jobs to complete before starting a new run
+    await waitForBulkJobs(payload, 30000)
+    console.log('[test] Existing bulk jobs completed, proceeding...')
+
     // Use the bulk embed endpoint to create a run for failingBulkDefault pool
     const bulkEmbedResponse = await request.post('/api/vector-bulk-embed', {
       data: {
         knowledgePool: 'failingBulkDefault',
       },
     })
+    console.log('[test] Bulk embed response:', await bulkEmbedResponse.json())
     expect(bulkEmbedResponse.ok()).toBe(true)
     const bulkEmbedJson = await bulkEmbedResponse.json()
     const runId = bulkEmbedJson.runId

From da8965c1ca19d48a74fc188cb3e380c72b4f06ca Mon Sep 17 00:00:00 2001
From: techiejd <62455039+techiejd@users.noreply.github.com>
Date: Tue, 13 Jan 2026 20:28:13 +0700
Subject: [PATCH 36/49] betters embed

---
 dev/helpers/embed.ts | 79 +++++++++++---------------------------------
 1 file changed, 19 insertions(+), 60 deletions(-)

diff --git a/dev/helpers/embed.ts b/dev/helpers/embed.ts
index dfb5daf..04a5e83 100644
--- a/dev/helpers/embed.ts
+++ b/dev/helpers/embed.ts
@@ -62,35 +62,18 @@ export function makeDummyEmbedDocs(dims: number) {
 }
 export const testEmbeddingVersion = 'test-v1'
 
-// Voyage file size limit (approximately 100MB, we use a safer threshold)
-const VOYAGE_FILE_SIZE_LIMIT = 50 * 1024 * 1024 // 50MB to be safe
+// Voyage line limit (100,000 lines per batch)
+// https://docs.voyageai.com/docs/batch-inference
+const VOYAGE_LINE_LIMIT = 100_000
 
 /**
  * Real Voyage Batch API implementation using the new streaming API.
- * User controls batching based on file size.
  */
 export function makeVoyageBulkEmbeddingsConfig(): BulkEmbeddingsFns {
   // Accumulated chunks for current batch
   let accumulatedChunks: BulkEmbeddingInput[] = []
-  let accumulatedSize = 0
   let batchIndex = 0
 
-  // Store batch state in memory for dev purposes (output file IDs for completion)
-  const batchOutputFiles = new Map<string, string>()
-
-  // Helper to estimate JSONL line size for a chunk
-  const estimateChunkSize = (chunk: BulkEmbeddingInput): number => {
-    const jsonLine = JSON.stringify({
-      custom_id: chunk.id,
-      body: {
-        input: [chunk.text],
-        model: 'voyage-3.5-lite',
-        input_type: 'document',
-      },
-    })
-    return jsonLine.length + 1 // +1 for newline
-  }
-
   // Helper to submit accumulated chunks to Voyage
   const submitBatch = async (chunks: BulkEmbeddingInput[]): Promise<BatchSubmission> => {
     // Create JSONL content for Voyage batch
@@ -98,9 +81,7 @@ export function makeVoyageBulkEmbeddingsConfig(): BulkEmbeddingsFns {
       return JSON.stringify({
         custom_id: input.id,
         body: {
-          input: [input.text],
-          model: 'voyage-3.5-lite',
-          input_type: 'document',
+          input: input.text,
         },
       })
     })
@@ -138,7 +119,11 @@ export function makeVoyageBulkEmbeddingsConfig(): BulkEmbeddingsFns {
       body: JSON.stringify({
         input_file_id: fileId,
         endpoint: '/v1/embeddings',
-        completion_window: '24h',
+        completion_window: '12h',
+        request_params: {
+          model: 'voyage-3.5-lite',
+          input_type: 'document',
+        },
       }),
     })
 
@@ -157,26 +142,20 @@ export function makeVoyageBulkEmbeddingsConfig(): BulkEmbeddingsFns {
 
   return {
     addChunk: async ({ chunk, isLastChunk }) => {
-      const chunkSize = estimateChunkSize(chunk)
+      // Add chunk to accumulator
+      accumulatedChunks.push(chunk)
 
-      // Check if adding this chunk would exceed the file size limit
-      if (accumulatedSize + chunkSize > VOYAGE_FILE_SIZE_LIMIT && accumulatedChunks.length > 0) {
-        // Submit what we have (without this chunk)
+      // If we hit the 100,000 limit, submit and start a new batch
+      if (accumulatedChunks.length === VOYAGE_LINE_LIMIT) {
         const toSubmit = [...accumulatedChunks]
-        accumulatedChunks = [chunk]
-        accumulatedSize = chunkSize
+        accumulatedChunks = []
         return await submitBatch(toSubmit)
       }
 
-      // Add chunk to accumulator
-      accumulatedChunks.push(chunk)
-      accumulatedSize += chunkSize
-
       // If this is the last chunk, flush everything
       if (isLastChunk && accumulatedChunks.length > 0) {
         const toSubmit = [...accumulatedChunks]
         accumulatedChunks = []
-        accumulatedSize = 0
         return await submitBatch(toSubmit)
       }
 
@@ -275,33 +254,13 @@ export function makeVoyageBulkEmbeddingsConfig(): BulkEmbeddingsFns {
     },
 
     onError: async ({ providerBatchIds, error }) => {
+      // TODO: Could implement error recovery here, e.g.:
+      // - Cancel running batches via API
+      // - Retry failed embeddings one by one using the regular embed API
+      // - Clean up uploaded files
       console.log(
-        `Voyage bulk run failed: ${error.message}. Cleaning up ${providerBatchIds.length} batches...`,
+        `Voyage bulk run failed: ${error.message}. ${providerBatchIds.length} batches affected.`,
       )
-
-      // Cancel any running batches
-      for (const batchId of providerBatchIds) {
-        try {
-          await fetch(`https://api.voyageai.com/v1/batches/${batchId}/cancel`, {
-            method: 'POST',
-            headers: {
-              Authorization: `Bearer ${process.env.VOYAGE_API_KEY}`,
-            },
-          })
-        } catch (cancelError) {
-          console.error(`Failed to cancel batch ${batchId}:`, cancelError)
-        }
-      }
-
-      // Clean up local state
-      for (const batchId of providerBatchIds) {
-        batchOutputFiles.delete(batchId)
-      }
-
-      // Reset accumulator state for potential retry
-      accumulatedChunks = []
-      accumulatedSize = 0
-      batchIndex = 0
     },
   }
 }

From 9861c90dacb83410ee86b97b27c9363d4c08af7d Mon Sep 17 00:00:00 2001
From: techiejd <62455039+techiejd@users.noreply.github.com>
Date: Tue, 13 Jan 2026 21:33:44 +0700
Subject: [PATCH 37/49] WIP

---
 dev/payload.config.ts                         |  3 +-
 dev/specs/bulkEmbed/concurrentRuns.spec.ts    | 12 +--
 dev/specs/bulkEmbed/failedBatch.spec.ts       | 23 +++--
 dev/specs/vectorizedPayload.spec.ts           | 88 ++++++-------------
 src/admin/components/EmbedAllButton/index.tsx |  3 +
 src/collections/embeddings.ts                 | 13 +--
 src/index.ts                                  | 24 +++--
 src/types.ts                                  | 66 +++++++-------
 8 files changed, 114 insertions(+), 118 deletions(-)

diff --git a/dev/payload.config.ts b/dev/payload.config.ts
index 41d82e2..b171cdf 100644
--- a/dev/payload.config.ts
+++ b/dev/payload.config.ts
@@ -42,6 +42,7 @@ const bulkEmbeddingsFns =
     : createMockBulkEmbeddings({
         statusSequence: ['queued', 'running', 'running', 'succeeded'],
       })
+console.log('bulkEmbeddingsFns', bulkEmbeddingsFns)
 const ssl =
   process.env.DATABASE_URI !== undefined
     ? {
@@ -106,7 +107,7 @@ const buildConfigWithPostgres = async () => {
           queue: 'vectorize-bulk-prepare',
         },
         {
-          cron: '*/10 * * * * *', // Run every 10 seconds for bulk jobs
+          cron: '0 * * * *', // Run every hour
           limit: 5,
           queue: 'vectorize-bulk-poll',
         },
diff --git a/dev/specs/bulkEmbed/concurrentRuns.spec.ts b/dev/specs/bulkEmbed/concurrentRuns.spec.ts
index 289a202..4d3d01b 100644
--- a/dev/specs/bulkEmbed/concurrentRuns.spec.ts
+++ b/dev/specs/bulkEmbed/concurrentRuns.spec.ts
@@ -1,7 +1,7 @@
 import type { Payload } from 'payload'
 import { beforeAll, describe, expect, test } from 'vitest'
 import { BULK_EMBEDDINGS_RUNS_SLUG } from '../../../src/collections/bulkEmbeddingsRuns.js'
-import type { VectorizedPayload } from '../../../src/types.js'
+import { getVectorizedPayload } from '../../../src/types.js'
 import {
   BULK_QUEUE_NAMES,
   DEFAULT_DIMS,
@@ -15,7 +15,7 @@ const DIMS = DEFAULT_DIMS
 const dbName = `bulk_concurrent_${Date.now()}`
 
 describe('Bulk embed - concurrent runs prevention', () => {
-  let payload: VectorizedPayload<'default'>
+  let payload: Payload
 
   beforeAll(async () => {
     await createTestDb({ dbName })
@@ -44,10 +44,11 @@ describe('Bulk embed - concurrent runs prevention', () => {
       dims: DIMS,
       key: `concurrent-${Date.now()}`,
     })
-    payload = built.payload as VectorizedPayload<'default'>
+    payload = built.payload
   })
 
   test('cannot start concurrent bulk embed runs for the same pool', async () => {
+    const vectorizedPayload = getVectorizedPayload<'default'>(payload)!
     // Create a test post first
     await payload.create({
       collection: 'posts',
@@ -65,7 +66,7 @@ describe('Bulk embed - concurrent runs prevention', () => {
     })
 
     // Try to start another bulk embed for the same pool
-    const result = await payload.bulkEmbed({ knowledgePool: 'default' })
+    const result = await vectorizedPayload.bulkEmbed({ knowledgePool: 'default' })
 
     expect('conflict' in result && result.conflict).toBe(true)
     expect(result.status).toBe('running')
@@ -84,6 +85,7 @@ describe('Bulk embed - concurrent runs prevention', () => {
   })
 
   test('can start bulk embed run after previous run completes', async () => {
+    const vectorizedPayload = getVectorizedPayload<'default'>(payload)!
     // Create a test post
     await payload.create({
       collection: 'posts',
@@ -102,7 +104,7 @@ describe('Bulk embed - concurrent runs prevention', () => {
     })
 
     // Should be able to start a new run for the same pool
-    const result = await payload.bulkEmbed({ knowledgePool: 'default' })
+    const result = await vectorizedPayload.bulkEmbed({ knowledgePool: 'default' })
 
     expect('conflict' in result).toBe(false)
     expect(result.status).toBe('queued')
diff --git a/dev/specs/bulkEmbed/failedBatch.spec.ts b/dev/specs/bulkEmbed/failedBatch.spec.ts
index 20f580e..f25a0f3 100644
--- a/dev/specs/bulkEmbed/failedBatch.spec.ts
+++ b/dev/specs/bulkEmbed/failedBatch.spec.ts
@@ -3,7 +3,7 @@ import { beforeAll, describe, expect, test } from 'vitest'
 import { BULK_EMBEDDINGS_RUNS_SLUG } from '../../../src/collections/bulkEmbeddingsRuns.js'
 import { BULK_EMBEDDINGS_BATCHES_SLUG } from '../../../src/collections/bulkEmbeddingsBatches.js'
 import { BULK_EMBEDDINGS_INPUT_METADATA_SLUG } from '../../../src/collections/bulkEmbeddingInputMetadata.js'
-import type { VectorizedPayload } from '../../../src/types.js'
+import { getVectorizedPayload } from '../../../src/types.js'
 import {
   BULK_QUEUE_NAMES,
   DEFAULT_DIMS,
@@ -18,7 +18,7 @@ const DIMS = DEFAULT_DIMS
 const dbName = `bulk_failed_${Date.now()}`
 
 describe('Bulk embed - failed batch', () => {
-  let payload: VectorizedPayload<'default'>
+  let payload: Payload
 
   beforeAll(async () => {
     await createTestDb({ dbName })
@@ -45,7 +45,7 @@ describe('Bulk embed - failed batch', () => {
       dims: DIMS,
       key: `failed-${Date.now()}`,
     })
-    payload = built.payload as VectorizedPayload<'default'>
+    payload = built.payload
   })
 
   test('failed batch marks entire run as failed', async () => {
@@ -121,6 +121,7 @@ describe('Bulk embed - failed batch', () => {
   })
 
   test('cannot retry batch while run is still running', async () => {
+    const vectorizedPayload = getVectorizedPayload<'default'>(payload)!
     // Create a run in 'running' status
     const run = await (payload as any).create({
       collection: BULK_EMBEDDINGS_RUNS_SLUG,
@@ -145,7 +146,7 @@ describe('Bulk embed - failed batch', () => {
     })
 
     // Try to retry the batch while run is running - should be rejected
-    const result = await payload.retryFailedBatch({ batchId: String(batch.id) })
+    const result = await vectorizedPayload.retryFailedBatch({ batchId: String(batch.id) })
 
     expect('error' in result).toBe(true)
     expect('conflict' in result && result.conflict).toBe(true)
@@ -165,6 +166,7 @@ describe('Bulk embed - failed batch', () => {
   })
 
   test('retrying a failed batch creates a new batch and marks old batch as retried', async () => {
+    const vectorizedPayload = getVectorizedPayload<'default'>(payload)!
     const post = await payload.create({ collection: 'posts', data: { title: 'RetryTest' } as any })
 
     const run = await payload.create({
@@ -192,7 +194,9 @@ describe('Bulk embed - failed batch', () => {
     expect(failedBatch.status).toBe('failed')
 
     // Retry the batch
-    const retryResult = await payload.retryFailedBatch({ batchId: String(failedBatch.id) })
+    const retryResult = await vectorizedPayload.retryFailedBatch({
+      batchId: String(failedBatch.id),
+    })
 
     expect('error' in retryResult).toBe(false)
     if (!('error' in retryResult)) {
@@ -234,6 +238,7 @@ describe('Bulk embed - failed batch', () => {
   })
 
   test('retrying a retried batch returns the existing retry batch', async () => {
+    const vectorizedPayload = getVectorizedPayload<'default'>(payload)!
     const post = await payload.create({
       collection: 'posts',
       data: { title: 'RetryRetryTest' } as any,
@@ -263,14 +268,18 @@ describe('Bulk embed - failed batch', () => {
     const failedBatch = (batchesResult as any).docs[0]
 
     // Retry the batch first time
-    const firstRetryResult = await payload.retryFailedBatch({ batchId: String(failedBatch.id) })
+    const firstRetryResult = await vectorizedPayload.retryFailedBatch({
+      batchId: String(failedBatch.id),
+    })
     expect('error' in firstRetryResult).toBe(false)
     if ('error' in firstRetryResult) return
 
     const firstRetryBatchId = firstRetryResult.newBatchId!
 
     // Retry the retried batch - should return the existing retry batch
-    const secondRetryResult = await payload.retryFailedBatch({ batchId: String(failedBatch.id) })
+    const secondRetryResult = await vectorizedPayload.retryFailedBatch({
+      batchId: String(failedBatch.id),
+    })
 
     expect('error' in secondRetryResult).toBe(false)
     if (!('error' in secondRetryResult)) {
diff --git a/dev/specs/vectorizedPayload.spec.ts b/dev/specs/vectorizedPayload.spec.ts
index e717909..6ea9539 100644
--- a/dev/specs/vectorizedPayload.spec.ts
+++ b/dev/specs/vectorizedPayload.spec.ts
@@ -2,7 +2,7 @@ import type { Payload } from 'payload'
 
 import { getPayload } from 'payload'
 import { beforeAll, describe, expect, test } from 'vitest'
-import { isVectorizedPayload, VectorizedPayload } from '../../src/types.js'
+import { getVectorizedPayload, VectorizedPayload } from '../../src/types.js'
 import { buildDummyConfig, DIMS, getInitialMarkdownContent } from './constants.js'
 import { createTestDb, waitForVectorizationJobs } from './utils.js'
 import { postgresAdapter } from '@payloadcms/db-postgres'
@@ -93,54 +93,16 @@ describe('VectorizedPayload', () => {
     markdownContent = await getInitialMarkdownContent(config)
   })
 
-  describe('isVectorizedPayload type guard', () => {
-    test('returns true for a payload instance with vectorize extensions', () => {
-      expect(isVectorizedPayload(payload)).toBe(true)
+  describe('getVectorizedPayload', () => {
+    test('returns vectorized payload object for a payload instance with vectorize extensions', () => {
+      const vectorizedPayload = getVectorizedPayload(payload)
+      expect(vectorizedPayload).not.toBeNull()
+      expect(vectorizedPayload).toBeDefined()
     })
 
-    test('returns false for a plain object without search method', () => {
-      const plainObj = {
-        _isBulkEmbedEnabled: () => false,
-        queueEmbed: () => Promise.resolve(),
-        bulkEmbed: () => Promise.resolve({}),
-        retryFailedBatch: () => Promise.resolve({}),
-      } as unknown as Payload
-      expect(isVectorizedPayload(plainObj)).toBe(false)
-    })
-
-    test('returns false for a plain object without queueEmbed method', () => {
-      const plainObj = {
-        _isBulkEmbedEnabled: () => false,
-        search: () => Promise.resolve([]),
-        bulkEmbed: () => Promise.resolve({}),
-        retryFailedBatch: () => Promise.resolve({}),
-      } as unknown as Payload
-      expect(isVectorizedPayload(plainObj)).toBe(false)
-    })
-
-    test('returns false for a plain object without bulkEmbed method', () => {
-      const plainObj = {
-        _isBulkEmbedEnabled: () => false,
-        search: () => Promise.resolve([]),
-        queueEmbed: () => Promise.resolve(),
-        retryFailedBatch: () => Promise.resolve({}),
-      } as unknown as Payload
-      expect(isVectorizedPayload(plainObj)).toBe(false)
-    })
-
-    test('returns false for a plain object without retryFailedBatch method', () => {
-      const plainObj = {
-        _isBulkEmbedEnabled: () => false,
-        search: () => Promise.resolve([]),
-        queueEmbed: () => Promise.resolve(),
-        bulkEmbed: () => Promise.resolve({}),
-      } as unknown as Payload
-      expect(isVectorizedPayload(plainObj)).toBe(false)
-    })
-
-    test('returns false for an empty object', () => {
-      const emptyObj = {} as unknown as Payload
-      expect(isVectorizedPayload(emptyObj)).toBe(false)
+    test('returns null for a payload instance without vectorize extensions', () => {
+      const plainPayload = {} as unknown as Payload
+      expect(getVectorizedPayload(plainPayload)).toBeNull()
     })
   })
 
@@ -161,11 +123,13 @@ describe('VectorizedPayload', () => {
     })
 
     test('payload has search method', () => {
-      expect(typeof (payload as VectorizedPayload).search).toBe('function')
+      const vectorizedPayload = getVectorizedPayload<'default'>(payload)
+      expect(vectorizedPayload).not.toBeNull()
+      expect(typeof vectorizedPayload!.search).toBe('function')
     })
 
     test('search returns an array of VectorSearchResult', async () => {
-      const vectorizedPayload = payload as VectorizedPayload<'default'>
+      const vectorizedPayload = getVectorizedPayload<'default'>(payload)!
 
       const results = await vectorizedPayload.search({
         query: titleAndQuery,
@@ -177,7 +141,7 @@ describe('VectorizedPayload', () => {
     })
 
     test('search results are ordered by similarity (highest first)', async () => {
-      const vectorizedPayload = payload as VectorizedPayload<'default'>
+      const vectorizedPayload = getVectorizedPayload<'default'>(payload)!
 
       const results = await vectorizedPayload.search({
         query: titleAndQuery,
@@ -189,7 +153,7 @@ describe('VectorizedPayload', () => {
     })
 
     test('search respects limit parameter', async () => {
-      const vectorizedPayload = payload as VectorizedPayload<'default'>
+      const vectorizedPayload = getVectorizedPayload<'default'>(payload)!
 
       const results = await vectorizedPayload.search({
         query: titleAndQuery,
@@ -201,7 +165,7 @@ describe('VectorizedPayload', () => {
     })
 
     test('search respects where clause', async () => {
-      const vectorizedPayload = payload as VectorizedPayload<'default'>
+      const vectorizedPayload = getVectorizedPayload<'default'>(payload)!
 
       const results = await vectorizedPayload.search({
         query: titleAndQuery,
@@ -216,7 +180,7 @@ describe('VectorizedPayload', () => {
     })
 
     test('querying a title should return the title as top result', async () => {
-      const vectorizedPayload = payload as VectorizedPayload<'default'>
+      const vectorizedPayload = getVectorizedPayload<'default'>(payload)!
 
       const results = await vectorizedPayload.search({
         query: titleAndQuery,
@@ -230,11 +194,13 @@ describe('VectorizedPayload', () => {
 
   describe('queueEmbed method', () => {
     test('payload has queueEmbed method', () => {
-      expect(typeof (payload as VectorizedPayload).queueEmbed).toBe('function')
+      const vectorizedPayload = getVectorizedPayload(payload)
+      expect(vectorizedPayload).not.toBeNull()
+      expect(typeof vectorizedPayload!.queueEmbed).toBe('function')
     })
 
     test('queueEmbed queues a vectorization job', async () => {
-      const vectorizedPayload = payload as VectorizedPayload
+      const vectorizedPayload = getVectorizedPayload(payload)!
 
       // Create a post (triggers automatic embedding)
       const post = await payload.create({
@@ -271,11 +237,13 @@ describe('VectorizedPayload', () => {
 
   describe('bulkEmbed method', () => {
     test('payload has bulkEmbed method', () => {
-      expect(typeof (payload as VectorizedPayload).bulkEmbed).toBe('function')
+      const vectorizedPayload = getVectorizedPayload(payload)
+      expect(vectorizedPayload).not.toBeNull()
+      expect(typeof vectorizedPayload!.bulkEmbed).toBe('function')
     })
 
     test('bulkEmbed throws error when bulk embedding not configured', async () => {
-      const vectorizedPayload = payload as VectorizedPayload<'default'>
+      const vectorizedPayload = getVectorizedPayload<'default'>(payload)!
 
       // This pool doesn't have bulkEmbeddingsFns configured
       await expect(vectorizedPayload.bulkEmbed({ knowledgePool: 'default' })).rejects.toThrow(
@@ -286,11 +254,13 @@ describe('VectorizedPayload', () => {
 
   describe('retryFailedBatch method', () => {
     test('payload has retryFailedBatch method', () => {
-      expect(typeof (payload as VectorizedPayload).retryFailedBatch).toBe('function')
+      const vectorizedPayload = getVectorizedPayload(payload)
+      expect(vectorizedPayload).not.toBeNull()
+      expect(typeof vectorizedPayload!.retryFailedBatch).toBe('function')
     })
 
     test('retryFailedBatch returns error for non-existent batch', async () => {
-      const vectorizedPayload = payload as VectorizedPayload
+      const vectorizedPayload = getVectorizedPayload(payload)!
 
       const result = await vectorizedPayload.retryFailedBatch({ batchId: '999999' })
 
diff --git a/src/admin/components/EmbedAllButton/index.tsx b/src/admin/components/EmbedAllButton/index.tsx
index 472ff6c..de0dbeb 100644
--- a/src/admin/components/EmbedAllButton/index.tsx
+++ b/src/admin/components/EmbedAllButton/index.tsx
@@ -15,12 +15,15 @@ export const EmbedAllButton: React.FC<EmbedAllButtonProps & { payload?: any; par
   // The function receives { payload, params } context
   let hasBulkEmbeddings: boolean = false
 
+  console.log('hasBulkEmbeddings', props.hasBulkEmbeddings)
+
   if (typeof props.hasBulkEmbeddings === 'function') {
     // Call the serverProps function with the payload/params context
     try {
       hasBulkEmbeddings = Boolean(
         (props.hasBulkEmbeddings as any)({ payload: props.payload, params: props.params }),
       )
+      console.log('hasBulkEmbeddings from function', hasBulkEmbeddings)
     } catch (error) {
       console.error('[EmbedAllButton Server] Error calling hasBulkEmbeddings:', error)
       hasBulkEmbeddings = false
diff --git a/src/collections/embeddings.ts b/src/collections/embeddings.ts
index 9081ae4..da1bc5a 100644
--- a/src/collections/embeddings.ts
+++ b/src/collections/embeddings.ts
@@ -1,6 +1,6 @@
 import type { CollectionConfig, Field } from 'payload'
-import type { KnowledgePoolName, VectorizedPayload } from '../types.js'
-import { isVectorizedPayload } from '../types.js'
+import type { KnowledgePoolName } from '../types.js'
+import { getVectorizedPayload } from '../types.js'
 
 const RESERVED_FIELDS = ['sourceCollection', 'docId', 'chunkIndex', 'chunkText', 'embeddingVersion']
 
@@ -37,9 +37,12 @@ export const createEmbeddingsCollection = (
                 // params structure: { segments: [ 'collections', 'bulkDefault' ] }
                 const poolName = params?.segments?.[1]
 
-                // Use the _isBulkEmbedEnabled method added by the plugin
-                if (poolName && typeof poolName === 'string' && isVectorizedPayload(payload)) {
-                  return payload._isBulkEmbedEnabled(poolName)
+                // Use getVectorizedPayload to get the vectorized payload object
+                const vectorizedPayload = getVectorizedPayload(payload)
+                console.log('vectorizedPayload', vectorizedPayload)
+                console.log('payload.config.custom', payload.config.custom)
+                if (poolName && typeof poolName === 'string' && vectorizedPayload) {
+                  return vectorizedPayload._isBulkEmbedEnabled(poolName)
                 }
 
                 return false
diff --git a/src/index.ts b/src/index.ts
index 2da03ea..c4b73de 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -71,7 +71,7 @@ export type {
 
   // PollBulkEmbeddingsResult
   BulkEmbeddingRunStatus,
-  isVectorizedPayload,
+  getVectorizedPayload,
   VectorizedPayload,
 } from './types.js'
 
@@ -387,11 +387,12 @@ export const createVectorizeIntegration = <TPoolNames extends KnowledgePoolName>
         }
       }
 
-      const incomingOnInit = config.onInit
       const vectorSearchHandlers = createVectorSearchHandlers(pluginOptions.knowledgePools)
-      config.onInit = async (payload) => {
-        if (incomingOnInit) await incomingOnInit(payload)
-        Object.assign(payload, {
+
+      // Create vectorized payload object factory that creates methods bound to a payload instance
+      const createVectorizedPayloadObject = (payload: Payload): VectorizedPayload<TPoolNames> => {
+        console.log('createVectorizedPayloadObject', payload)
+        return {
           _isBulkEmbedEnabled: (knowledgePool: TPoolNames): boolean => {
             const poolConfig = pluginOptions.knowledgePools[knowledgePool]
             return !!poolConfig?.embeddingConfig?.bulkEmbeddingsFns
@@ -451,7 +452,18 @@ export const createVectorizeIntegration = <TPoolNames extends KnowledgePoolName>
               knowledgePools: pluginOptions.knowledgePools,
               queueName: pluginOptions.bulkQueueNames?.pollOrCompleteQueueName,
             }),
-        } as Partial<VectorizedPayload<TPoolNames>>)
+        } as VectorizedPayload<TPoolNames>
+      }
+
+      // Store factory in config.custom
+      config.custom = {
+        ...(config.custom || {}),
+        createVectorizedPayloadObject,
+      }
+
+      const incomingOnInit = config.onInit
+      config.onInit = async (payload) => {
+        if (incomingOnInit) await incomingOnInit(payload)
         // Ensure pgvector artifacts for each knowledge pool
         for (const poolName in staticConfigs) {
           const staticConfig = staticConfigs[poolName]
diff --git a/src/types.ts b/src/types.ts
index 3a0a31d..0a6cd4c 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -43,44 +43,40 @@ export type RetryFailedBatchResult =
 /**
  * Extended Payload type with vectorize plugin methods
  */
-export type VectorizedPayload<TPoolNames extends KnowledgePoolName = KnowledgePoolName> =
-  Payload & {
-    /** Check if bulk embedding is enabled for a knowledge pool */
-    _isBulkEmbedEnabled: (knowledgePool: TPoolNames) => boolean
-    search: (params: VectorSearchQuery<TPoolNames>) => Promise<Array<VectorSearchResult>>
-    queueEmbed: (
-      params:
-        | {
-            collection: string
-            docId: string
-          }
-        | {
-            collection: string
-            doc: Record<string, any>
-          },
-    ) => Promise<void>
-    /** Start a bulk embedding run for a knowledge pool */
-    bulkEmbed: (params: { knowledgePool: TPoolNames }) => Promise<BulkEmbedResult>
-    /** Retry a failed batch */
-    retryFailedBatch: (params: { batchId: string }) => Promise<RetryFailedBatchResult>
-  }
+export type VectorizedPayload<TPoolNames extends KnowledgePoolName = KnowledgePoolName> = {
+  /** Check if bulk embedding is enabled for a knowledge pool */
+  _isBulkEmbedEnabled: (knowledgePool: TPoolNames) => boolean
+  search: (params: VectorSearchQuery<TPoolNames>) => Promise<Array<VectorSearchResult>>
+  queueEmbed: (
+    params:
+      | {
+          collection: string
+          docId: string
+        }
+      | {
+          collection: string
+          doc: Record<string, any>
+        },
+  ) => Promise<void>
+  /** Start a bulk embedding run for a knowledge pool */
+  bulkEmbed: (params: { knowledgePool: TPoolNames }) => Promise<BulkEmbedResult>
+  /** Retry a failed batch */
+  retryFailedBatch: (params: { batchId: string }) => Promise<RetryFailedBatchResult>
+}
 
 /**
- * Type guard to check if a Payload instance has vectorize extensions
+ * Get the vectorized payload object from config.custom
+ * Returns null if the payload instance doesn't have vectorize extensions
  */
-export function isVectorizedPayload(payload: Payload): payload is VectorizedPayload {
-  return (
-    '_isBulkEmbedEnabled' in payload &&
-    typeof (payload as any)._isBulkEmbedEnabled === 'function' &&
-    'search' in payload &&
-    typeof (payload as any).search === 'function' &&
-    'queueEmbed' in payload &&
-    typeof (payload as any).queueEmbed === 'function' &&
-    'bulkEmbed' in payload &&
-    typeof (payload as any).bulkEmbed === 'function' &&
-    'retryFailedBatch' in payload &&
-    typeof (payload as any).retryFailedBatch === 'function'
-  )
+export function getVectorizedPayload<TPoolNames extends KnowledgePoolName = KnowledgePoolName>(
+  payload: Payload,
+): VectorizedPayload<TPoolNames> | null {
+  const custom = (payload.config as any)?.custom
+  const vectorizedPayloadFactory = custom?.createVectorizedPayloadObject
+  if (vectorizedPayloadFactory && typeof vectorizedPayloadFactory === 'function') {
+    return vectorizedPayloadFactory(payload) as VectorizedPayload<TPoolNames>
+  }
+  return null
 }
 
 export type EmbedDocsFn = (texts: string[]) => Promise<number[][] | Float32Array[]>

From fff3ef5e6f41ff2b6bac01b9c8cf8914e1360a7e Mon Sep 17 00:00:00 2001
From: techiejd <62455039+techiejd@users.noreply.github.com>
Date: Tue, 13 Jan 2026 21:58:36 +0700
Subject: [PATCH 38/49] WIP

---
 CHANGELOG.md          | 33 ++++++++++++++++
 README.md             | 92 ++++++++++++++++++++++++++-----------------
 dev/payload.config.ts |  5 ++-
 3 files changed, 93 insertions(+), 37 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index dc80d09..2b4ee5f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,37 @@
 
 All notable changes to this project will be documented in this file.
 
+## Unreleased
+
+### Changed
+
+- **`isVectorizedPayload` replaced with `getVectorizedPayload`**: The type guard `isVectorizedPayload(payload)` has been replaced with `getVectorizedPayload(payload)` which returns the vectorized payload object directly (or `null` if not available). This provides a cleaner API that doesn't require type assertions.
+
+### Migration
+
+**Before:**
+
+```typescript
+import { isVectorizedPayload, type VectorizedPayload } from 'payloadcms-vectorize'
+
+if (isVectorizedPayload(payload)) {
+  const results = await payload.search({ ... })
+  await payload.queueEmbed({ ... })
+}
+```
+
+**After:**
+
+```typescript
+import { getVectorizedPayload } from 'payloadcms-vectorize'
+
+const vectorizedPayload = getVectorizedPayload(payload)
+if (vectorizedPayload) {
+  const results = await vectorizedPayload.search({ ... })
+  await vectorizedPayload.queueEmbed({ ... })
+}
+```
+
 ## 0.5.0 - 2026-01-10
 
 ### Breaking Changes
@@ -27,6 +58,8 @@ All notable changes to this project will be documented in this file.
 
 ## 0.4.5 - 2025-01-09
 
+**Note:** This version is deprecated due to a critical bug with `isVectorizedPayload`. Use `getVectorizedPayload(payload)` instead (see 0.5.0 section above). No 0.4 line fix (0.4.6) exists yet.
+
 ### Added
 
 - **Local API**: Added `payload.search()` and `payload.queueEmbed()` methods directly on the Payload instance for programmatic vector search without HTTP requests
diff --git a/README.md b/README.md
index 4b0fb91..814a1f9 100644
--- a/README.md
+++ b/README.md
@@ -189,14 +189,14 @@ const { results } = await response.json()
 Alternatively, you can use the local API directly on the Payload instance:
 
 ```typescript
-import { isVectorizedPayload, type VectorizedPayload } from 'payloadcms-vectorize'
+import { getVectorizedPayload } from 'payloadcms-vectorize'
 
-// After initializing Payload, it will have the search and queueEmbed methods
+// After initializing Payload, get the vectorized payload object
 const payload = await getPayload({ config, cron: true })
+const vectorizedPayload = getVectorizedPayload(payload)
 
-// Type guard to ensure payload has vectorize extensions
-if (isVectorizedPayload(payload)) {
-  const results = await payload.search({
+if (vectorizedPayload) {
+  const results = await vectorizedPayload.search({
     query: 'What is machine learning?',
     knowledgePool: 'main',
     where: {
@@ -207,7 +207,7 @@ if (isVectorizedPayload(payload)) {
   // results is an array of VectorSearchResult
 
   // Manually queue an embedding job
-  await payload.queueEmbed({
+  await vectorizedPayload.queueEmbed({
     collection: 'posts',
     docId: 'some-post-id',
   })
@@ -591,18 +591,21 @@ Perform vector search programmatically without making an HTTP request.
 **Example:**
 
 ```typescript
-import type { VectorizedPayload } from 'payloadcms-vectorize'
+import { getVectorizedPayload } from 'payloadcms-vectorize'
 
 const payload = await getPayload({ config, cron: true })
+const vectorizedPayload = getVectorizedPayload<'main'>(payload)
 
-const results = await (payload as VectorizedPayload<'main'>).search({
-  query: 'What is machine learning?',
-  knowledgePool: 'main',
-  where: {
-    category: { equals: 'guides' },
-  },
-  limit: 5,
-})
+if (vectorizedPayload) {
+  const results = await vectorizedPayload.search({
+    query: 'What is machine learning?',
+    knowledgePool: 'main',
+    where: {
+      category: { equals: 'guides' },
+    },
+    limit: 5,
+  })
+}
 ```
 
 #### `payload.queueEmbed(params)`
@@ -626,40 +629,57 @@ Or:
 **Example:**
 
 ```typescript
-// Queue by document ID (fetches document first)
-await (payload as VectorizedPayload).queueEmbed({
-  collection: 'posts',
-  docId: 'some-post-id',
-})
+import { getVectorizedPayload } from 'payloadcms-vectorize'
 
-// Queue with document object directly
-await (payload as VectorizedPayload).queueEmbed({
-  collection: 'posts',
-  doc: {
-    id: 'some-post-id',
-    title: 'Post Title',
-    content: {
-      /* ... */
+const payload = await getPayload({ config, cron: true })
+const vectorizedPayload = getVectorizedPayload(payload)
+
+if (vectorizedPayload) {
+  // Queue by document ID (fetches document first)
+  await vectorizedPayload.queueEmbed({
+    collection: 'posts',
+    docId: 'some-post-id',
+  })
+
+  // Queue with document object directly
+  await vectorizedPayload.queueEmbed({
+    collection: 'posts',
+    doc: {
+      id: 'some-post-id',
+      title: 'Post Title',
+      content: {
+        /* ... */
+      },
     },
-  },
-})
+  })
+}
 ```
 
-#### Type Guard
+#### Getting the Vectorized Payload Object
 
-Use the `isVectorizedPayload` type guard to check if a Payload instance has vectorize extensions:
+Use the `getVectorizedPayload` function to get the vectorized payload object with all vectorize methods:
 
 ```typescript
-import { isVectorizedPayload } from 'payloadcms-vectorize'
+import { getVectorizedPayload } from 'payloadcms-vectorize'
 
 const payload = await getPayload({ config, cron: true })
+const vectorizedPayload = getVectorizedPayload(payload)
 
-if (isVectorizedPayload(payload)) {
-  // TypeScript now knows payload has search and queueEmbed methods
-  const results = await payload.search({
+if (vectorizedPayload) {
+  // Use all vectorize methods
+  const results = await vectorizedPayload.search({
     query: 'search query',
     knowledgePool: 'main',
   })
+  
+  await vectorizedPayload.queueEmbed({
+    collection: 'posts',
+    docId: 'some-id',
+  })
+  
+  await vectorizedPayload.bulkEmbed({
+    knowledgePool: 'main',
+  })
 }
 ```
 
diff --git a/dev/payload.config.ts b/dev/payload.config.ts
index b171cdf..c9238bc 100644
--- a/dev/payload.config.ts
+++ b/dev/payload.config.ts
@@ -42,6 +42,9 @@ const bulkEmbeddingsFns =
     : createMockBulkEmbeddings({
         statusSequence: ['queued', 'running', 'running', 'succeeded'],
       })
+
+// Run every hour for voyage, every 5 seconds for mock
+const bulkPollCronSchedule = process.env.USE_VOYAGE !== undefined ? '0 * * * *' : '*/5 * * * * *'
 console.log('bulkEmbeddingsFns', bulkEmbeddingsFns)
 const ssl =
   process.env.DATABASE_URI !== undefined
@@ -107,7 +110,7 @@ const buildConfigWithPostgres = async () => {
           queue: 'vectorize-bulk-prepare',
         },
         {
-          cron: '0 * * * *', // Run every hour
+          cron: bulkPollCronSchedule,
           limit: 5,
           queue: 'vectorize-bulk-poll',
         },

From f0614813669f7680ec372282db648a3d75dceb99 Mon Sep 17 00:00:00 2001
From: techiejd <62455039+techiejd@users.noreply.github.com>
Date: Thu, 15 Jan 2026 16:44:20 +0700
Subject: [PATCH 39/49] Clean up

---
 CHANGELOG.md                                  |  36 +----
 README.md                                     |  12 +-
 dev/helpers/embed.ts                          |  34 +++-
 dev/payload.config.ts                         |   1 -
 dev/specs/bulkEmbed/basic.spec.ts             | 103 ++-----------
 dev/specs/bulkEmbed/canceledBatch.spec.ts     |  26 ++--
 dev/specs/bulkEmbed/extensionFields.spec.ts   |  22 +--
 dev/specs/bulkEmbed/failedBatch.spec.ts       |  81 +++-------
 dev/specs/bulkEmbed/multipleBatches.spec.ts   |  25 +--
 dev/specs/bulkEmbed/multipleChunks.spec.ts    |  21 +--
 dev/specs/bulkEmbed/partialFailure.spec.ts    |  20 +--
 .../bulkEmbed/partialFailureNoFail.spec.ts    |  22 +--
 dev/specs/bulkEmbed/polling.spec.ts           |  29 ++--
 dev/specs/bulkEmbed/realtimeMode.spec.ts      |  18 +--
 dev/specs/bulkEmbed/versionBump.spec.ts       | 145 ++++++++++--------
 dev/specs/config.spec.ts                      |  11 +-
 dev/specs/e2e.spec.ts                         |  92 -----------
 dev/specs/utils.ts                            |  13 ++
 src/admin/components/EmbedAllButton/index.tsx |  28 +---
 .../components/FailedBatchesList/index.tsx    |  11 --
 .../RetryFailedBatchButton/client.tsx         |   1 -
 .../RetryFailedBatchButton/index.tsx          |   2 -
 src/index.ts                                  |   4 +-
 src/tasks/vectorize.ts                        |   1 -
 24 files changed, 243 insertions(+), 515 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2b4ee5f..15f2d73 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,43 +2,13 @@
 
 All notable changes to this project will be documented in this file.
 
-## Unreleased
-
-### Changed
-
-- **`isVectorizedPayload` replaced with `getVectorizedPayload`**: The type guard `isVectorizedPayload(payload)` has been replaced with `getVectorizedPayload(payload)` which returns the vectorized payload object directly (or `null` if not available). This provides a cleaner API that doesn't require type assertions.
-
-### Migration
-
-**Before:**
-
-```typescript
-import { isVectorizedPayload, type VectorizedPayload } from 'payloadcms-vectorize'
-
-if (isVectorizedPayload(payload)) {
-  const results = await payload.search({ ... })
-  await payload.queueEmbed({ ... })
-}
-```
-
-**After:**
-
-```typescript
-import { getVectorizedPayload } from 'payloadcms-vectorize'
-
-const vectorizedPayload = getVectorizedPayload(payload)
-if (vectorizedPayload) {
-  const results = await vectorizedPayload.search({ ... })
-  await vectorizedPayload.queueEmbed({ ... })
-}
-```
-
-## 0.5.0 - 2026-01-10
+## 0.5.0 - 2026-01-15
 
 ### Breaking Changes
 
 - **`queueName` renamed to `realtimeQueueName`**: The plugin option `queueName` has been renamed to `realtimeQueueName` to clarify that it only affects realtime vectorization jobs.
 - **`bulkQueueName` changed to `bulkQueueNames`**: The plugin option `bulkQueueName` has been replaced with `bulkQueueNames` object containing `prepareBulkEmbedQueueName` and `pollOrCompleteQueueName` for separate queue isolation of bulk preparation vs polling workloads.
+- **`isVectorizedPayload` replaced with `getVectorizedPayload`**: The type guard `isVectorizedPayload(payload)` has been replaced with `getVectorizedPayload(payload)` which returns the vectorized payload object directly (or `null` if not available). This fixes a bug where methods are missing because onInit was not called.
 
 ### New Features
 
@@ -51,7 +21,7 @@ if (vectorizedPayload) {
 
 ### Tests & Reliability
 
-- Added comprehensive tests for realtime vs bulk ingest behavior
+- Added comprehensive tests for realtime vs bulk ingest behavior, and failing bulk situations
 - Added tests for bulk polling error conditions (`failed`, `canceled` statuses)
 - Added tests for bulk fan-in behavior (multiple documents processed in single run)
 - Improved test coverage for edge cases in bulk embedding workflow
diff --git a/README.md b/README.md
index 814a1f9..4ce2cf8 100644
--- a/README.md
+++ b/README.md
@@ -141,6 +141,10 @@ export default buildConfig({
       // realtimeQueueName: 'custom-queue',
       // endpointOverrides: { path: '/custom-vector-search', enabled: true },
       // disabled: false,
+      // bulkQueueNames: { // Required iff `bulkEmbeddingsFns` included
+      //   prepareBulkEmbedQueueName: ...,
+      //   pollOrCompleteQueueName: ...,
+      // },
     }),
   ],
 })
@@ -148,9 +152,9 @@ export default buildConfig({
 
 **Important:** `knowledgePools` must have **different names than your collections**—reusing a collection name for a knowledge pool **will cause schema conflicts**. (In this example, the knowledge pool is named 'main' and a collection named 'main' will be created.)
 
-### 1.5. Generate Import Map (Required for Admin UI)
+### 1.5. Generate Import Map (If Needed)
 
-After configuring the plugin, you must generate the import map so that Payload can resolve client components (like the "Embed all" button) in the admin UI for bulk embeddings:
+Payload automatically generates the import map on startup and during development (HMR), so you typically don't need to run this manually. However, if client components (like the "Embed all" button) don't appear in the admin UI, you may need to manually generate the import map:
 
 ```bash
 pnpm run generate:importmap
@@ -671,12 +675,12 @@ if (vectorizedPayload) {
     query: 'search query',
     knowledgePool: 'main',
   })
-  
+
   await vectorizedPayload.queueEmbed({
     collection: 'posts',
     docId: 'some-id',
   })
-  
+
   await vectorizedPayload.bulkEmbed({
     knowledgePool: 'main',
   })
diff --git a/dev/helpers/embed.ts b/dev/helpers/embed.ts
index 04a5e83..229dde1 100644
--- a/dev/helpers/embed.ts
+++ b/dev/helpers/embed.ts
@@ -229,17 +229,49 @@ export function makeVoyageBulkEmbeddingsConfig(): BulkEmbeddingsFns {
             if (!line.trim()) continue
             try {
               const result = JSON.parse(line)
+              // Check for error in result.error field
               if (result.error) {
                 await onChunk({
                   id: result.custom_id,
                   error: result.error.message || 'Unknown error',
                 })
-              } else {
+              }
+              // Check for error in result.response.status_code (Voyage AI format)
+              // Error if status_code exists and is >= 400 or not 200
+              else if (result.response?.status_code && result.response.status_code !== 200) {
+                await onChunk({
+                  id: result.custom_id,
+                  error: result.response.message || `HTTP ${result.response.status_code}`,
+                })
+              }
+              // Success case - check for embedding data
+              // Handle body.object === "list" with data array
+              else if (
+                result.response?.body?.object === 'list' &&
+                result.response.body.data?.[0]?.embedding
+              ) {
                 await onChunk({
                   id: result.custom_id,
                   embedding: result.response.body.data[0].embedding,
                 })
               }
+              // Handle body.object === "embedding" (direct embedding)
+              else if (
+                result.response?.body?.object === 'embedding' &&
+                result.response.body.embedding
+              ) {
+                await onChunk({
+                  id: result.custom_id,
+                  embedding: result.response.body.embedding,
+                })
+              }
+              // Unknown format
+              else {
+                await onChunk({
+                  id: result.custom_id,
+                  error: 'Unexpected response format',
+                })
+              }
             } catch (parseError) {
               console.error('Failed to parse output line:', line, parseError)
             }
diff --git a/dev/payload.config.ts b/dev/payload.config.ts
index c9238bc..cb8a738 100644
--- a/dev/payload.config.ts
+++ b/dev/payload.config.ts
@@ -45,7 +45,6 @@ const bulkEmbeddingsFns =
 
 // Run every hour for voyage, every 5 seconds for mock
 const bulkPollCronSchedule = process.env.USE_VOYAGE !== undefined ? '0 * * * *' : '*/5 * * * * *'
-console.log('bulkEmbeddingsFns', bulkEmbeddingsFns)
 const ssl =
   process.env.DATABASE_URI !== undefined
     ? {
diff --git a/dev/specs/bulkEmbed/basic.spec.ts b/dev/specs/bulkEmbed/basic.spec.ts
index 9825387..46b0d29 100644
--- a/dev/specs/bulkEmbed/basic.spec.ts
+++ b/dev/specs/bulkEmbed/basic.spec.ts
@@ -10,9 +10,12 @@ import {
   clearAllCollections,
   createMockBulkEmbeddings,
   createTestDb,
+  expectGoodResult,
   waitForBulkJobs,
 } from '../utils.js'
 import { makeDummyEmbedQuery, testEmbeddingVersion } from 'helpers/embed.js'
+import { getVectorizedPayload, VectorizedPayload } from 'payloadcms-vectorize'
+import { BulkEmbedResult } from '../../../src/types.js'
 
 const DIMS = DEFAULT_DIMS
 const dbName = `bulk_basic_${Date.now()}`
@@ -38,6 +41,7 @@ const basePluginOptions = {
 describe('Bulk embed - basic tests', () => {
   let payload: Payload
   let config: SanitizedConfig
+  let vectorizedPayload: VectorizedPayload | null = null
 
   beforeAll(async () => {
     await createTestDb({ dbName })
@@ -50,6 +54,7 @@ describe('Bulk embed - basic tests', () => {
     })
     payload = built.payload
     config = built.config
+    vectorizedPayload = getVectorizedPayload(payload)
   })
 
   beforeEach(async () => {
@@ -60,38 +65,11 @@ describe('Bulk embed - basic tests', () => {
     vi.restoreAllMocks()
   })
 
-  test('no bulk run is queued on init or doc creation (bulk-only mode)', async () => {
-    const runsBeforeCreate = await (payload as any).find({
-      collection: BULK_EMBEDDINGS_RUNS_SLUG,
-      where: { pool: { equals: 'default' } },
-    })
-    expect(runsBeforeCreate.totalDocs).toBe(0)
-
-    await payload.create({ collection: 'posts', data: { title: 'First' } as any })
-
-    const runsAfterCreate = await (payload as any).find({
-      collection: BULK_EMBEDDINGS_RUNS_SLUG,
-      where: { pool: { equals: 'default' } },
-    })
-    expect(runsAfterCreate.totalDocs).toBe(0)
-  })
-
   test('manually triggered bulk run embeds documents', async () => {
     const post = await payload.create({ collection: 'posts', data: { title: 'First' } as any })
 
-    const run = await payload.create({
-      collection: BULK_EMBEDDINGS_RUNS_SLUG,
-      data: { pool: 'default', embeddingVersion: testEmbeddingVersion, status: 'queued' },
-    })
-
-    await payload.jobs.queue<'payloadcms-vectorize:prepare-bulk-embedding'>({
-      task: 'payloadcms-vectorize:prepare-bulk-embedding',
-      input: { runId: String(run.id) },
-      req: { payload } as any,
-      ...(BULK_QUEUE_NAMES.prepareBulkEmbedQueueName
-        ? { queue: BULK_QUEUE_NAMES.prepareBulkEmbedQueueName }
-        : {}),
-    })
+    const result = await vectorizedPayload?.bulkEmbed({ knowledgePool: 'default' })
+    expectGoodResult(result)
 
     await waitForBulkJobs(payload)
 
@@ -103,7 +81,7 @@ describe('Bulk embed - basic tests', () => {
     const runDoc = (
       await (payload as any).find({
         collection: BULK_EMBEDDINGS_RUNS_SLUG,
-        where: { id: { equals: String(run.id) } },
+        where: { id: { equals: String(result!.runId) } },
       })
     ).docs[0]
     expect(runDoc.status).toBe('succeeded')
@@ -111,26 +89,14 @@ describe('Bulk embed - basic tests', () => {
 
   test('bulk run creates batch records', async () => {
     await payload.create({ collection: 'posts', data: { title: 'Batch Test' } as any })
-
-    const run = await payload.create({
-      collection: BULK_EMBEDDINGS_RUNS_SLUG,
-      data: { pool: 'default', embeddingVersion: testEmbeddingVersion, status: 'queued' },
-    })
-
-    await payload.jobs.queue<'payloadcms-vectorize:prepare-bulk-embedding'>({
-      task: 'payloadcms-vectorize:prepare-bulk-embedding',
-      input: { runId: String(run.id) },
-      req: { payload } as any,
-      ...(BULK_QUEUE_NAMES.prepareBulkEmbedQueueName
-        ? { queue: BULK_QUEUE_NAMES.prepareBulkEmbedQueueName }
-        : {}),
-    })
+    const result = await vectorizedPayload?.bulkEmbed({ knowledgePool: 'default' })
+    expectGoodResult(result)
 
     await waitForBulkJobs(payload)
 
     const batches = await payload.find({
       collection: BULK_EMBEDDINGS_BATCHES_SLUG as any,
-      where: { run: { equals: String(run.id) } },
+      where: { run: { equals: String(result!.runId) } },
     })
     expect(batches.totalDocs).toBe(1)
     expect(batches.docs[0]).toHaveProperty('batchIndex', 0)
@@ -141,18 +107,8 @@ describe('Bulk embed - basic tests', () => {
     const post = await payload.create({ collection: 'posts', data: { title: 'Stable' } as any })
 
     // First bulk run
-    const baselineRun = await payload.create({
-      collection: BULK_EMBEDDINGS_RUNS_SLUG,
-      data: { pool: 'default', embeddingVersion: testEmbeddingVersion, status: 'queued' },
-    })
-    await payload.jobs.queue<'payloadcms-vectorize:prepare-bulk-embedding'>({
-      task: 'payloadcms-vectorize:prepare-bulk-embedding',
-      input: { runId: String(baselineRun.id) },
-      req: { payload } as any,
-      ...(BULK_QUEUE_NAMES.prepareBulkEmbedQueueName
-        ? { queue: BULK_QUEUE_NAMES.prepareBulkEmbedQueueName }
-        : {}),
-    })
+    const result0 = await vectorizedPayload?.bulkEmbed({ knowledgePool: 'default' })
+    expectGoodResult(result0)
     await waitForBulkJobs(payload)
 
     const embeds = await payload.find({
@@ -162,26 +118,15 @@ describe('Bulk embed - basic tests', () => {
     expect(embeds.totalDocs).toBe(1)
 
     // Second bulk run - should find zero eligible
-    const run = await payload.create({
-      collection: BULK_EMBEDDINGS_RUNS_SLUG,
-      data: { pool: 'default', embeddingVersion: testEmbeddingVersion, status: 'queued' },
-    })
-
-    await payload.jobs.queue<'payloadcms-vectorize:prepare-bulk-embedding'>({
-      task: 'payloadcms-vectorize:prepare-bulk-embedding',
-      input: { runId: String(run.id) },
-      req: { payload } as any,
-      ...(BULK_QUEUE_NAMES.prepareBulkEmbedQueueName
-        ? { queue: BULK_QUEUE_NAMES.prepareBulkEmbedQueueName }
-        : {}),
-    })
+    const result1 = await vectorizedPayload?.bulkEmbed({ knowledgePool: 'default' })
+    expect(result1).toBeDefined()
 
     await waitForBulkJobs(payload)
 
     const runDoc = (
       await (payload as any).find({
         collection: BULK_EMBEDDINGS_RUNS_SLUG,
-        where: { id: { equals: String(run.id) } },
+        where: { id: { equals: String(result1!.runId) } },
       })
     ).docs[0]
     expect(runDoc.status).toBe('succeeded')
@@ -192,19 +137,7 @@ describe('Bulk embed - basic tests', () => {
   test('metadata table is cleaned after successful completion', async () => {
     await payload.create({ collection: 'posts', data: { title: 'Cleanup' } as any })
 
-    const run = await payload.create({
-      collection: BULK_EMBEDDINGS_RUNS_SLUG,
-      data: { pool: 'default', embeddingVersion: testEmbeddingVersion, status: 'queued' },
-    })
-
-    await payload.jobs.queue<'payloadcms-vectorize:prepare-bulk-embedding'>({
-      task: 'payloadcms-vectorize:prepare-bulk-embedding',
-      input: { runId: String(run.id) },
-      req: { payload } as any,
-      ...(BULK_QUEUE_NAMES.prepareBulkEmbedQueueName
-        ? { queue: BULK_QUEUE_NAMES.prepareBulkEmbedQueueName }
-        : {}),
-    })
+    await vectorizedPayload?.bulkEmbed({ knowledgePool: 'default' })
 
     await waitForBulkJobs(payload)
 
@@ -215,5 +148,3 @@ describe('Bulk embed - basic tests', () => {
     expect(metadata.totalDocs).toBe(0)
   })
 })
-
-
diff --git a/dev/specs/bulkEmbed/canceledBatch.spec.ts b/dev/specs/bulkEmbed/canceledBatch.spec.ts
index 60d2170..f14c4d2 100644
--- a/dev/specs/bulkEmbed/canceledBatch.spec.ts
+++ b/dev/specs/bulkEmbed/canceledBatch.spec.ts
@@ -1,21 +1,26 @@
 import type { Payload } from 'payload'
 import { beforeAll, describe, expect, test } from 'vitest'
-import { BULK_EMBEDDINGS_RUNS_SLUG } from '../../../src/collections/bulkEmbeddingsRuns.js'
 import {
   BULK_QUEUE_NAMES,
   DEFAULT_DIMS,
   buildPayloadWithIntegration,
   createMockBulkEmbeddings,
   createTestDb,
+  expectGoodResult,
   waitForBulkJobs,
 } from '../utils.js'
 import { makeDummyEmbedQuery, testEmbeddingVersion } from 'helpers/embed.js'
+import { getVectorizedPayload, VectorizedPayload } from 'payloadcms-vectorize'
 
 const DIMS = DEFAULT_DIMS
 const dbName = `bulk_canceled_${Date.now()}`
 
+// Right now, we only test if the batch was canceled outside of the bulk embed process.
+// TODO(techiejd): Add a way to cancel a batch and/or a run inside the bulk embed process.
+
 describe('Bulk embed - canceled batch', () => {
   let payload: Payload
+  let vectorizedPayload: VectorizedPayload | null = null
 
   beforeAll(async () => {
     await createTestDb({ dbName })
@@ -43,24 +48,13 @@ describe('Bulk embed - canceled batch', () => {
       key: `canceled-${Date.now()}`,
     })
     payload = built.payload
+    vectorizedPayload = getVectorizedPayload(payload)
   })
 
   test('canceled batch marks entire run as failed', async () => {
     const post = await payload.create({ collection: 'posts', data: { title: 'Cancel' } as any })
-
-    const run = await payload.create({
-      collection: BULK_EMBEDDINGS_RUNS_SLUG,
-      data: { pool: 'default', embeddingVersion: testEmbeddingVersion, status: 'queued' },
-    })
-
-    await payload.jobs.queue<'payloadcms-vectorize:prepare-bulk-embedding'>({
-      task: 'payloadcms-vectorize:prepare-bulk-embedding',
-      input: { runId: String(run.id) },
-      req: { payload } as any,
-      ...(BULK_QUEUE_NAMES.prepareBulkEmbedQueueName
-        ? { queue: BULK_QUEUE_NAMES.prepareBulkEmbedQueueName }
-        : {}),
-    })
+    const result = await vectorizedPayload?.bulkEmbed({ knowledgePool: 'default' })
+    expectGoodResult(result)
 
     await waitForBulkJobs(payload)
 
@@ -71,5 +65,3 @@ describe('Bulk embed - canceled batch', () => {
     expect(embeds.totalDocs).toBe(0)
   })
 })
-
-
diff --git a/dev/specs/bulkEmbed/extensionFields.spec.ts b/dev/specs/bulkEmbed/extensionFields.spec.ts
index cc8e92c..4b829e0 100644
--- a/dev/specs/bulkEmbed/extensionFields.spec.ts
+++ b/dev/specs/bulkEmbed/extensionFields.spec.ts
@@ -7,15 +7,18 @@ import {
   buildPayloadWithIntegration,
   createMockBulkEmbeddings,
   createTestDb,
+  expectGoodResult,
   waitForBulkJobs,
 } from '../utils.js'
 import { makeDummyEmbedQuery, testEmbeddingVersion } from 'helpers/embed.js'
+import { getVectorizedPayload, VectorizedPayload } from 'payloadcms-vectorize'
 
 const DIMS = DEFAULT_DIMS
 const dbName = `bulk_extfields_${Date.now()}`
 
 describe('Bulk embed - extension fields', () => {
   let payload: Payload
+  let vectorizedPayload: VectorizedPayload | null = null
 
   beforeAll(async () => {
     await createTestDb({ dbName })
@@ -49,6 +52,7 @@ describe('Bulk embed - extension fields', () => {
       key: `extfields-${Date.now()}`,
     })
     payload = built.payload
+    vectorizedPayload = getVectorizedPayload(payload)
   })
 
   test('extension fields are merged when writing embeddings', async () => {
@@ -56,20 +60,8 @@ describe('Bulk embed - extension fields', () => {
       collection: 'posts',
       data: { title: 'Ext merge' } as any,
     })
-
-    const run = await payload.create({
-      collection: BULK_EMBEDDINGS_RUNS_SLUG,
-      data: { pool: 'default', embeddingVersion: testEmbeddingVersion, status: 'queued' },
-    })
-
-    await payload.jobs.queue<'payloadcms-vectorize:prepare-bulk-embedding'>({
-      task: 'payloadcms-vectorize:prepare-bulk-embedding',
-      input: { runId: String(run.id) },
-      req: { payload } as any,
-      ...(BULK_QUEUE_NAMES.prepareBulkEmbedQueueName
-        ? { queue: BULK_QUEUE_NAMES.prepareBulkEmbedQueueName }
-        : {}),
-    })
+    const result = await vectorizedPayload?.bulkEmbed({ knowledgePool: 'default' })
+    expectGoodResult(result)
 
     await waitForBulkJobs(payload)
 
@@ -82,5 +74,3 @@ describe('Bulk embed - extension fields', () => {
     expect(embeds.docs[0]).toHaveProperty('priority', 3)
   })
 })
-
-
diff --git a/dev/specs/bulkEmbed/failedBatch.spec.ts b/dev/specs/bulkEmbed/failedBatch.spec.ts
index f25a0f3..5e09d16 100644
--- a/dev/specs/bulkEmbed/failedBatch.spec.ts
+++ b/dev/specs/bulkEmbed/failedBatch.spec.ts
@@ -3,13 +3,14 @@ import { beforeAll, describe, expect, test } from 'vitest'
 import { BULK_EMBEDDINGS_RUNS_SLUG } from '../../../src/collections/bulkEmbeddingsRuns.js'
 import { BULK_EMBEDDINGS_BATCHES_SLUG } from '../../../src/collections/bulkEmbeddingsBatches.js'
 import { BULK_EMBEDDINGS_INPUT_METADATA_SLUG } from '../../../src/collections/bulkEmbeddingInputMetadata.js'
-import { getVectorizedPayload } from '../../../src/types.js'
+import { getVectorizedPayload, VectorizedPayload } from '../../../src/types.js'
 import {
   BULK_QUEUE_NAMES,
   DEFAULT_DIMS,
   buildPayloadWithIntegration,
   createMockBulkEmbeddings,
   createTestDb,
+  expectGoodResult,
   waitForBulkJobs,
 } from '../utils.js'
 import { makeDummyEmbedQuery, testEmbeddingVersion } from 'helpers/embed.js'
@@ -19,6 +20,7 @@ const dbName = `bulk_failed_${Date.now()}`
 
 describe('Bulk embed - failed batch', () => {
   let payload: Payload
+  let vectorizedPayload: VectorizedPayload | null = null
 
   beforeAll(async () => {
     await createTestDb({ dbName })
@@ -46,31 +48,21 @@ describe('Bulk embed - failed batch', () => {
       key: `failed-${Date.now()}`,
     })
     payload = built.payload
+    vectorizedPayload = getVectorizedPayload(payload)
   })
 
   test('failed batch marks entire run as failed', async () => {
     const post = await payload.create({ collection: 'posts', data: { title: 'Fail' } as any })
 
-    const run = await payload.create({
-      collection: BULK_EMBEDDINGS_RUNS_SLUG,
-      data: { pool: 'default', embeddingVersion: testEmbeddingVersion, status: 'queued' },
-    })
-
-    await payload.jobs.queue<'payloadcms-vectorize:prepare-bulk-embedding'>({
-      task: 'payloadcms-vectorize:prepare-bulk-embedding',
-      input: { runId: String(run.id) },
-      req: { payload } as any,
-      ...(BULK_QUEUE_NAMES.prepareBulkEmbedQueueName
-        ? { queue: BULK_QUEUE_NAMES.prepareBulkEmbedQueueName }
-        : {}),
-    })
+    const result = await vectorizedPayload?.bulkEmbed({ knowledgePool: 'default' })
+    expectGoodResult(result)
 
     await waitForBulkJobs(payload)
 
     const runDoc = (
       await (payload as any).find({
         collection: BULK_EMBEDDINGS_RUNS_SLUG,
-        where: { id: { equals: String(run.id) } },
+        where: { id: { equals: String(result!.runId) } },
       })
     ).docs[0]
     expect(runDoc.status).toBe('failed')
@@ -88,27 +80,17 @@ describe('Bulk embed - failed batch', () => {
       data: { title: 'FailCleanup' } as any,
     })
 
-    const run = await payload.create({
-      collection: BULK_EMBEDDINGS_RUNS_SLUG,
-      data: { pool: 'default', embeddingVersion: testEmbeddingVersion, status: 'queued' },
-    })
-
-    await payload.jobs.queue<'payloadcms-vectorize:prepare-bulk-embedding'>({
-      task: 'payloadcms-vectorize:prepare-bulk-embedding',
-      input: { runId: String(run.id) },
-      req: { payload } as any,
-      ...(BULK_QUEUE_NAMES.prepareBulkEmbedQueueName
-        ? { queue: BULK_QUEUE_NAMES.prepareBulkEmbedQueueName }
-        : {}),
-    })
+    const result = await vectorizedPayload?.bulkEmbed({ knowledgePool: 'default' })
+    expectGoodResult(result)
 
     await waitForBulkJobs(payload)
 
+    const runIdNum = parseInt(String(result!.runId), 10)
+
     // Metadata should be kept for failed batches to allow retries
-    const runIdNum = typeof run.id === 'number' ? run.id : parseInt(String(run.id), 10)
     const metadata = await payload.find({
       collection: BULK_EMBEDDINGS_INPUT_METADATA_SLUG,
-      where: { run: { equals: runIdNum } },
+      where: { run: { equals: runIdNum } as any },
     })
     expect(metadata.totalDocs).toBeGreaterThan(0)
 
@@ -167,28 +149,17 @@ describe('Bulk embed - failed batch', () => {
 
   test('retrying a failed batch creates a new batch and marks old batch as retried', async () => {
     const vectorizedPayload = getVectorizedPayload<'default'>(payload)!
-    const post = await payload.create({ collection: 'posts', data: { title: 'RetryTest' } as any })
+    await payload.create({ collection: 'posts', data: { title: 'RetryTest' } as any })
 
-    const run = await payload.create({
-      collection: BULK_EMBEDDINGS_RUNS_SLUG,
-      data: { pool: 'default', embeddingVersion: testEmbeddingVersion, status: 'queued' },
-    })
-
-    await payload.jobs.queue<'payloadcms-vectorize:prepare-bulk-embedding'>({
-      task: 'payloadcms-vectorize:prepare-bulk-embedding',
-      input: { runId: String(run.id) },
-      req: { payload } as any,
-      ...(BULK_QUEUE_NAMES.prepareBulkEmbedQueueName
-        ? { queue: BULK_QUEUE_NAMES.prepareBulkEmbedQueueName }
-        : {}),
-    })
+    const result = await vectorizedPayload?.bulkEmbed({ knowledgePool: 'default' })
+    expectGoodResult(result)
 
     await waitForBulkJobs(payload)
 
     // Find the failed batch
     const batchesResult = await payload.find({
       collection: BULK_EMBEDDINGS_BATCHES_SLUG,
-      where: { run: { equals: run.id } },
+      where: { run: { equals: result.runId } },
     })
     const failedBatch = (batchesResult as any).docs[0]
     expect(failedBatch.status).toBe('failed')
@@ -222,7 +193,7 @@ describe('Bulk embed - failed batch', () => {
       expect((newBatch as any).providerBatchId).not.toBe(failedBatch.providerBatchId)
 
       // Check that metadata points to the new batch
-      const runIdNum = typeof run.id === 'number' ? run.id : parseInt(String(run.id), 10)
+      const runIdNum = parseInt(String(result!.runId), 10)
       const metadata = await payload.find({
         collection: BULK_EMBEDDINGS_INPUT_METADATA_SLUG,
         where: { run: { equals: runIdNum } },
@@ -239,31 +210,19 @@ describe('Bulk embed - failed batch', () => {
 
   test('retrying a retried batch returns the existing retry batch', async () => {
     const vectorizedPayload = getVectorizedPayload<'default'>(payload)!
-    const post = await payload.create({
+    await payload.create({
       collection: 'posts',
       data: { title: 'RetryRetryTest' } as any,
     })
 
-    const run = await payload.create({
-      collection: BULK_EMBEDDINGS_RUNS_SLUG,
-      data: { pool: 'default', embeddingVersion: testEmbeddingVersion, status: 'queued' },
-    })
-
-    await payload.jobs.queue<'payloadcms-vectorize:prepare-bulk-embedding'>({
-      task: 'payloadcms-vectorize:prepare-bulk-embedding',
-      input: { runId: String(run.id) },
-      req: { payload } as any,
-      ...(BULK_QUEUE_NAMES.prepareBulkEmbedQueueName
-        ? { queue: BULK_QUEUE_NAMES.prepareBulkEmbedQueueName }
-        : {}),
-    })
+    const result = await vectorizedPayload?.bulkEmbed({ knowledgePool: 'default' })
 
     await waitForBulkJobs(payload)
 
     // Find the failed batch
     const batchesResult = await payload.find({
       collection: BULK_EMBEDDINGS_BATCHES_SLUG,
-      where: { run: { equals: run.id } },
+      where: { run: { equals: result!.runId } },
     })
     const failedBatch = (batchesResult as any).docs[0]
 
diff --git a/dev/specs/bulkEmbed/multipleBatches.spec.ts b/dev/specs/bulkEmbed/multipleBatches.spec.ts
index 7820f7a..6612847 100644
--- a/dev/specs/bulkEmbed/multipleBatches.spec.ts
+++ b/dev/specs/bulkEmbed/multipleBatches.spec.ts
@@ -8,15 +8,18 @@ import {
   buildPayloadWithIntegration,
   createMockBulkEmbeddings,
   createTestDb,
+  expectGoodResult,
   waitForBulkJobs,
 } from '../utils.js'
 import { makeDummyEmbedQuery, testEmbeddingVersion } from 'helpers/embed.js'
+import { getVectorizedPayload, VectorizedPayload } from 'payloadcms-vectorize'
 
 const DIMS = DEFAULT_DIMS
 const dbName = `bulk_multibatch_${Date.now()}`
 
 describe('Bulk embed - multiple batches', () => {
   let payload: Payload
+  let vectorizedPayload: VectorizedPayload | null = null
 
   beforeAll(async () => {
     await createTestDb({ dbName })
@@ -47,6 +50,7 @@ describe('Bulk embed - multiple batches', () => {
       key: `multibatch-${Date.now()}`,
     })
     payload = built.payload
+    vectorizedPayload = getVectorizedPayload(payload)
   })
 
   test('multiple batches are created when flushing after N chunks', async () => {
@@ -55,25 +59,14 @@ describe('Bulk embed - multiple batches', () => {
       await payload.create({ collection: 'posts', data: { title: `Post ${i}` } as any })
     }
 
-    const run = await payload.create({
-      collection: BULK_EMBEDDINGS_RUNS_SLUG,
-      data: { pool: 'default', embeddingVersion: testEmbeddingVersion, status: 'queued' },
-    })
-
-    await payload.jobs.queue<'payloadcms-vectorize:prepare-bulk-embedding'>({
-      task: 'payloadcms-vectorize:prepare-bulk-embedding',
-      input: { runId: String(run.id) },
-      req: { payload } as any,
-      ...(BULK_QUEUE_NAMES.prepareBulkEmbedQueueName
-        ? { queue: BULK_QUEUE_NAMES.prepareBulkEmbedQueueName }
-        : {}),
-    })
+    const result = await vectorizedPayload?.bulkEmbed({ knowledgePool: 'default' })
+    expectGoodResult(result)
 
     await waitForBulkJobs(payload, 20000)
 
     const batches = await payload.find({
       collection: BULK_EMBEDDINGS_BATCHES_SLUG as any,
-      where: { run: { equals: String(run.id) } },
+      where: { run: { equals: result!.runId } },
       sort: 'batchIndex',
     })
     expect(batches.totalDocs).toBe(3)
@@ -89,12 +82,10 @@ describe('Bulk embed - multiple batches', () => {
     const runDoc = (
       await (payload as any).find({
         collection: BULK_EMBEDDINGS_RUNS_SLUG,
-        where: { id: { equals: String(run.id) } },
+        where: { id: { equals: result!.runId } },
       })
     ).docs[0]
     expect(runDoc.totalBatches).toBe(3)
     expect(runDoc.status).toBe('succeeded')
   })
 })
-
-
diff --git a/dev/specs/bulkEmbed/multipleChunks.spec.ts b/dev/specs/bulkEmbed/multipleChunks.spec.ts
index b621b15..1b913e5 100644
--- a/dev/specs/bulkEmbed/multipleChunks.spec.ts
+++ b/dev/specs/bulkEmbed/multipleChunks.spec.ts
@@ -1,15 +1,16 @@
 import type { Payload } from 'payload'
 import { beforeAll, describe, expect, test } from 'vitest'
-import { BULK_EMBEDDINGS_RUNS_SLUG } from '../../../src/collections/bulkEmbeddingsRuns.js'
 import {
   BULK_QUEUE_NAMES,
   DEFAULT_DIMS,
   buildPayloadWithIntegration,
   createMockBulkEmbeddings,
   createTestDb,
+  expectGoodResult,
   waitForBulkJobs,
 } from '../utils.js'
 import { makeDummyEmbedQuery, testEmbeddingVersion } from 'helpers/embed.js'
+import { getVectorizedPayload } from 'payloadcms-vectorize'
 
 const DIMS = DEFAULT_DIMS
 const dbName = `bulk_multichunk_${Date.now()}`
@@ -58,19 +59,9 @@ describe('Bulk embed - multiple chunks with extension fields', () => {
       data: { title: 'Two' } as any,
     })
 
-    const run = await payload.create({
-      collection: BULK_EMBEDDINGS_RUNS_SLUG,
-      data: { pool: 'default', embeddingVersion: testEmbeddingVersion, status: 'queued' },
-    })
-
-    await payload.jobs.queue<'payloadcms-vectorize:prepare-bulk-embedding'>({
-      task: 'payloadcms-vectorize:prepare-bulk-embedding',
-      input: { runId: String(run.id) },
-      req: { payload } as any,
-      ...(BULK_QUEUE_NAMES.prepareBulkEmbedQueueName
-        ? { queue: BULK_QUEUE_NAMES.prepareBulkEmbedQueueName }
-        : {}),
-    })
+    const vectorizedPayload = getVectorizedPayload(payload)
+    const result = await vectorizedPayload?.bulkEmbed({ knowledgePool: 'default' })
+    expectGoodResult(result)
 
     await waitForBulkJobs(payload)
 
@@ -84,5 +75,3 @@ describe('Bulk embed - multiple chunks with extension fields', () => {
     expect(embeds.docs[1]).toMatchObject({ category: 'b', priority: 2, chunkIndex: 1 })
   })
 })
-
-
diff --git a/dev/specs/bulkEmbed/partialFailure.spec.ts b/dev/specs/bulkEmbed/partialFailure.spec.ts
index bf84443..1cb5171 100644
--- a/dev/specs/bulkEmbed/partialFailure.spec.ts
+++ b/dev/specs/bulkEmbed/partialFailure.spec.ts
@@ -7,9 +7,11 @@ import {
   buildPayloadWithIntegration,
   createMockBulkEmbeddings,
   createTestDb,
+  expectGoodResult,
   waitForBulkJobs,
 } from '../utils.js'
 import { makeDummyEmbedQuery, testEmbeddingVersion } from 'helpers/embed.js'
+import { getVectorizedPayload } from 'payloadcms-vectorize'
 
 const DIMS = DEFAULT_DIMS
 const dbName = `bulk_partial_failure_${Date.now()}`
@@ -83,26 +85,16 @@ describe('Bulk embed - partial chunk failures', () => {
       data: { title: 'Partial Failure Test' } as any,
     })
 
-    const run = await payload.create({
-      collection: BULK_EMBEDDINGS_RUNS_SLUG,
-      data: { pool: 'default', embeddingVersion: testVersion, status: 'queued' },
-    })
-
-    await payload.jobs.queue<'payloadcms-vectorize:prepare-bulk-embedding'>({
-      task: 'payloadcms-vectorize:prepare-bulk-embedding',
-      input: { runId: String(run.id) },
-      req: { payload } as any,
-      ...(BULK_QUEUE_NAMES.prepareBulkEmbedQueueName
-        ? { queue: BULK_QUEUE_NAMES.prepareBulkEmbedQueueName }
-        : {}),
-    })
+    const vectorizedPayload = getVectorizedPayload(payload)
+    const result = await vectorizedPayload?.bulkEmbed({ knowledgePool: 'default' })
+    expectGoodResult(result)
 
     await waitForBulkJobs(payload)
 
     // Check run status - should still succeed but with failed count
     const updatedRun = await payload.findByID({
       collection: BULK_EMBEDDINGS_RUNS_SLUG,
-      id: run.id,
+      id: result!.runId,
     })
 
     expect(updatedRun.status).toBe('succeeded')
diff --git a/dev/specs/bulkEmbed/partialFailureNoFail.spec.ts b/dev/specs/bulkEmbed/partialFailureNoFail.spec.ts
index 133e97c..2211e40 100644
--- a/dev/specs/bulkEmbed/partialFailureNoFail.spec.ts
+++ b/dev/specs/bulkEmbed/partialFailureNoFail.spec.ts
@@ -7,14 +7,16 @@ import {
   buildPayloadWithIntegration,
   createMockBulkEmbeddings,
   createTestDb,
+  expectGoodResult,
   waitForBulkJobs,
 } from '../utils.js'
 import { makeDummyEmbedQuery, testEmbeddingVersion } from 'helpers/embed.js'
+import { getVectorizedPayload } from 'payloadcms-vectorize'
 
 const DIMS = DEFAULT_DIMS
 const dbName = `bulk_partial_failure_nofail_${Date.now()}`
 
-describe('Bulk embed - no partial failures', () => {
+describe('Bulk embed - partial failures', () => {
   let payload: Payload
   let onErrorCalled = false
   let onErrorArgs: {
@@ -73,26 +75,16 @@ describe('Bulk embed - no partial failures', () => {
 
     await payload.create({ collection: 'posts', data: { title: 'No Failure Test' } as any })
 
-    const run = await payload.create({
-      collection: BULK_EMBEDDINGS_RUNS_SLUG,
-      data: { pool: 'default', embeddingVersion: testVersion, status: 'queued' },
-    })
-
-    await payload.jobs.queue<'payloadcms-vectorize:prepare-bulk-embedding'>({
-      task: 'payloadcms-vectorize:prepare-bulk-embedding',
-      input: { runId: String(run.id) },
-      req: { payload } as any,
-      ...(BULK_QUEUE_NAMES.prepareBulkEmbedQueueName
-        ? { queue: BULK_QUEUE_NAMES.prepareBulkEmbedQueueName }
-        : {}),
-    })
+    const vectorizedPayload = getVectorizedPayload(payload)
+    const result = await vectorizedPayload?.bulkEmbed({ knowledgePool: 'default' })
+    expectGoodResult(result)
 
     await waitForBulkJobs(payload)
 
     // Check run status
     const updatedRun = await payload.findByID({
       collection: BULK_EMBEDDINGS_RUNS_SLUG,
-      id: run.id,
+      id: result!.runId,
     })
 
     expect(updatedRun.status).toBe('succeeded')
diff --git a/dev/specs/bulkEmbed/polling.spec.ts b/dev/specs/bulkEmbed/polling.spec.ts
index ba47e34..9ffae7e 100644
--- a/dev/specs/bulkEmbed/polling.spec.ts
+++ b/dev/specs/bulkEmbed/polling.spec.ts
@@ -7,8 +7,10 @@ import {
   buildPayloadWithIntegration,
   createMockBulkEmbeddings,
   createTestDb,
+  expectGoodResult,
   waitForBulkJobs,
 } from '../utils.js'
+import { getVectorizedPayload } from 'payloadcms-vectorize'
 import { makeDummyEmbedQuery, testEmbeddingVersion } from 'helpers/embed.js'
 
 const DIMS = DEFAULT_DIMS
@@ -49,26 +51,19 @@ describe('Bulk embed - polling requeue', () => {
 
   test('polling requeues when non-terminal then succeeds', async () => {
     const post = await payload.create({ collection: 'posts', data: { title: 'Loop' } as any })
-
-    const run = await payload.create({
-      collection: BULK_EMBEDDINGS_RUNS_SLUG,
-      data: { pool: 'default', embeddingVersion: testEmbeddingVersion, status: 'queued' },
-    })
-
     const queueSpy = vi.spyOn(payload.jobs, 'queue')
-
-    await payload.jobs.queue<'payloadcms-vectorize:prepare-bulk-embedding'>({
-      task: 'payloadcms-vectorize:prepare-bulk-embedding',
-      input: { runId: String(run.id) },
-      req: { payload } as any,
-      ...(BULK_QUEUE_NAMES.prepareBulkEmbedQueueName
-        ? { queue: BULK_QUEUE_NAMES.prepareBulkEmbedQueueName }
-        : {}),
-    })
+    const vectorizedPayload = getVectorizedPayload(payload)
+    const result = await vectorizedPayload?.bulkEmbed({ knowledgePool: 'default' })
+    expectGoodResult(result)
 
     await waitForBulkJobs(payload, 15000)
 
-    expect(queueSpy).toHaveBeenCalledWith(
+    expect(queueSpy).toHaveBeenNthCalledWith(
+      2, // 2nd call
+      expect.objectContaining({ task: 'payloadcms-vectorize:poll-or-complete-bulk-embedding' }),
+    )
+    expect(queueSpy).toHaveBeenNthCalledWith(
+      3, // 3rd call
       expect.objectContaining({ task: 'payloadcms-vectorize:poll-or-complete-bulk-embedding' }),
     )
 
@@ -79,5 +74,3 @@ describe('Bulk embed - polling requeue', () => {
     expect(embeds.totalDocs).toBe(1)
   })
 })
-
-
diff --git a/dev/specs/bulkEmbed/realtimeMode.spec.ts b/dev/specs/bulkEmbed/realtimeMode.spec.ts
index 82eb79d..e59da32 100644
--- a/dev/specs/bulkEmbed/realtimeMode.spec.ts
+++ b/dev/specs/bulkEmbed/realtimeMode.spec.ts
@@ -1,12 +1,12 @@
 import type { Payload } from 'payload'
-import { beforeAll, describe, expect, test, vi } from 'vitest'
-import { createVectorizeTask } from '../../../src/tasks/vectorize.js'
+import { beforeAll, describe, expect, test } from 'vitest'
 import {
   BULK_QUEUE_NAMES,
   DEFAULT_DIMS,
   buildPayloadWithIntegration,
   createMockBulkEmbeddings,
   createTestDb,
+  waitForVectorizationJobs,
 } from '../utils.js'
 import { makeDummyEmbedDocs, makeDummyEmbedQuery, testEmbeddingVersion } from 'helpers/embed.js'
 
@@ -54,18 +54,7 @@ describe('Bulk embed - realtime mode', () => {
       data: { title: 'Realtime Test' } as any,
     })
 
-    const vectorizeTask = createVectorizeTask({
-      knowledgePools: realtimeOptions.knowledgePools,
-    })
-    const vectorizeHandler = vectorizeTask.handler as any
-
-    await vectorizeHandler({
-      input: { doc: post, collection: 'posts', knowledgePool: 'default' } as any,
-      req: { payload } as any,
-      inlineTask: vi.fn(),
-      tasks: {} as any,
-      job: {} as any,
-    })
+    await waitForVectorizationJobs(payload)
 
     const embeds = await payload.find({
       collection: 'default',
@@ -74,4 +63,3 @@ describe('Bulk embed - realtime mode', () => {
     expect(embeds.totalDocs).toBeGreaterThan(0)
   })
 })
-
diff --git a/dev/specs/bulkEmbed/versionBump.spec.ts b/dev/specs/bulkEmbed/versionBump.spec.ts
index 2facc85..7f0dd4c 100644
--- a/dev/specs/bulkEmbed/versionBump.spec.ts
+++ b/dev/specs/bulkEmbed/versionBump.spec.ts
@@ -1,100 +1,115 @@
-import type { Payload } from 'payload'
 import { beforeAll, describe, expect, test } from 'vitest'
-import { BULK_EMBEDDINGS_RUNS_SLUG } from '../../../src/collections/bulkEmbeddingsRuns.js'
 import {
   BULK_QUEUE_NAMES,
   DEFAULT_DIMS,
   buildPayloadWithIntegration,
   createMockBulkEmbeddings,
   createTestDb,
+  expectGoodResult,
   waitForBulkJobs,
 } from '../utils.js'
 import { makeDummyEmbedQuery } from 'helpers/embed.js'
+import { getVectorizedPayload } from '../../../src/types.js'
 
 const DIMS = DEFAULT_DIMS
 const dbName = `bulk_version_${Date.now()}`
 
-describe('Bulk embed - version bump', () => {
-  let payload: Payload
+// Use distinct bulk queue names per payload instance so that
+// the second payload's cron worker handles its own bulk runs,
+// instead of the first payload instance continuing to process them.
+const BULK_QUEUE_NAMES_0 = BULK_QUEUE_NAMES
+const BULK_QUEUE_NAMES_1 = {
+  prepareBulkEmbedQueueName: `${BULK_QUEUE_NAMES.prepareBulkEmbedQueueName}-v2`,
+  pollOrCompleteQueueName: `${BULK_QUEUE_NAMES.pollOrCompleteQueueName}-v2`,
+}
 
+describe('Bulk embed - version bump', () => {
+  let post: any
   beforeAll(async () => {
     await createTestDb({ dbName })
-    const built = await buildPayloadWithIntegration({
-      dbName,
-      pluginOpts: {
-        knowledgePools: {
-          default: {
-            collections: {
-              posts: {
-                toKnowledgePool: async (doc: any) => [{ chunk: doc.title }],
+  })
+
+  test('version bump re-embeds all even without updates', async () => {
+    const payload0 = (
+      await buildPayloadWithIntegration({
+        dbName,
+        pluginOpts: {
+          knowledgePools: {
+            default: {
+              collections: {
+                posts: {
+                  toKnowledgePool: async (doc: any) => [{ chunk: doc.title }],
+                },
+              },
+              embeddingConfig: {
+                version: 'old-version',
+                queryFn: makeDummyEmbedQuery(DIMS),
+                bulkEmbeddingsFns: createMockBulkEmbeddings({ statusSequence: ['succeeded'] }),
               },
-            },
-            embeddingConfig: {
-              version: 'new-version',
-              queryFn: makeDummyEmbedQuery(DIMS),
-              bulkEmbeddingsFns: createMockBulkEmbeddings({ statusSequence: ['succeeded'] }),
             },
           },
+          bulkQueueNames: BULK_QUEUE_NAMES_0,
         },
-        bulkQueueNames: BULK_QUEUE_NAMES,
-      },
-      secret: 'test-secret',
-      dims: DIMS,
-      key: `version-${Date.now()}`,
-    })
-    payload = built.payload
-  })
+        secret: 'test-secret',
+        dims: DIMS,
+        key: `payload0`,
+      })
+    ).payload
 
-  test('version bump re-embeds all even without updates', async () => {
-    const post = await payload.create({ collection: 'posts', data: { title: 'Old' } as any })
+    post = await payload0.create({ collection: 'posts', data: { title: 'Old' } as any })
+
+    const vectorizedPayload0 = getVectorizedPayload(payload0)
+    const result0 = await vectorizedPayload0?.bulkEmbed({ knowledgePool: 'default' })
+    expectGoodResult(result0)
+
+    await waitForBulkJobs(payload0)
 
-    // Create an embedding with old version manually
-    await payload.create({
+    // Debug: log embeddings after first run
+    const embeds0 = await payload0.find({
       collection: 'default',
-      data: {
-        docId: String(post.id),
-        sourceCollection: 'posts',
-        text: 'Old',
-        chunkIndex: 0,
-        embedding: Array(DIMS).fill(0.1),
-        embeddingVersion: 'old-version',
-        updatedAt: new Date().toISOString(),
-      } as any,
+      where: { docId: { equals: String(post.id) } },
     })
+    expect(embeds0.totalDocs).toBe(1)
+    expect(embeds0.docs[0].embeddingVersion).toBe('old-version')
 
-    // Run bulk embed with new version
-    const run = await payload.create({
-      collection: BULK_EMBEDDINGS_RUNS_SLUG,
-      data: { pool: 'default', embeddingVersion: 'new-version', status: 'queued' },
-    })
+    const payload1 = (
+      await buildPayloadWithIntegration({
+        dbName,
+        pluginOpts: {
+          knowledgePools: {
+            default: {
+              collections: {
+                posts: {
+                  toKnowledgePool: async (doc: any) => [{ chunk: doc.title }],
+                },
+              },
+              embeddingConfig: {
+                version: 'new-version',
+                queryFn: makeDummyEmbedQuery(DIMS),
+                bulkEmbeddingsFns: createMockBulkEmbeddings({ statusSequence: ['succeeded'] }),
+              },
+            },
+          },
+          bulkQueueNames: BULK_QUEUE_NAMES_1,
+        },
+        secret: 'test-secret',
+        dims: DIMS,
+        key: `payload1`,
+      })
+    ).payload
 
-    await payload.jobs.queue<'payloadcms-vectorize:prepare-bulk-embedding'>({
-      task: 'payloadcms-vectorize:prepare-bulk-embedding',
-      input: { runId: String(run.id) },
-      req: { payload } as any,
-      ...(BULK_QUEUE_NAMES.prepareBulkEmbedQueueName
-        ? { queue: BULK_QUEUE_NAMES.prepareBulkEmbedQueueName }
-        : {}),
-    })
+    const vectorizedPayload1 = getVectorizedPayload(payload1)
+    const result1 = await vectorizedPayload1?.bulkEmbed({ knowledgePool: 'default' })
+    expectGoodResult(result1)
 
-    await waitForBulkJobs(payload)
+    await waitForBulkJobs(payload1)
 
-    // Should have 1 embedding with new version (old one replaced)
-    const embeds = await payload.find({
+    const embeds1 = await payload1.find({
       collection: 'default',
       where: { docId: { equals: String(post.id) } },
     })
-    expect(embeds.totalDocs).toBe(1)
-    expect(embeds.docs[0].embeddingVersion).toBe('new-version')
 
-    const runDoc = (
-      await (payload as any).find({
-        collection: BULK_EMBEDDINGS_RUNS_SLUG,
-        where: { id: { equals: String(run.id) } },
-      })
-    ).docs[0]
-    expect(runDoc.inputs).toBe(1)
+    expect(embeds1.totalDocs).toBe(1)
+    expect(embeds1.docs[0].embeddingVersion).toBe('new-version')
   })
 })
-
-
diff --git a/dev/specs/config.spec.ts b/dev/specs/config.spec.ts
index 45aa676..b183af7 100644
--- a/dev/specs/config.spec.ts
+++ b/dev/specs/config.spec.ts
@@ -57,10 +57,16 @@ describe('endpoints: /vector-search, /vector-bulk-embed', () => {
           method: 'post',
           handler: expect.any(Function),
         }),
+        expect.objectContaining({
+          path: '/vector-retry-failed-batch',
+          method: 'post',
+          handler: expect.any(Function),
+        }),
       ]),
     )
   })
   test('uses the custom path when provided', async () => {
+    // TODO: Add test for custom path for bulk embed and retry failed batch
     const cfg = await buildDummyConfig({
       plugins: [plugin({ ...dummyPluginOptions, endpointOverrides: { path: '/custom-path' } })],
     })
@@ -73,11 +79,6 @@ describe('endpoints: /vector-search, /vector-bulk-embed', () => {
           method: 'post',
           handler: expect.any(Function),
         }),
-        expect.objectContaining({
-          path: '/vector-bulk-embed',
-          method: 'post',
-          handler: expect.any(Function),
-        }),
       ]),
     )
   })
diff --git a/dev/specs/e2e.spec.ts b/dev/specs/e2e.spec.ts
index 8d5669c..d4661aa 100644
--- a/dev/specs/e2e.spec.ts
+++ b/dev/specs/e2e.spec.ts
@@ -11,27 +11,19 @@ import { BULK_EMBEDDINGS_BATCHES_SLUG } from '../../src/collections/bulkEmbeddin
 
 // Helper function to log in to the admin panel
 const loginToAdmin = async (page: any) => {
-  console.log('[loginToAdmin] Starting login process...')
   await page.goto('/admin/login')
-  console.log('[loginToAdmin] Navigated to login page')
 
   await page.waitForLoadState('domcontentloaded')
-  console.log('[loginToAdmin] Page loaded')
 
   // Fill in the login form
-  console.log('[loginToAdmin] Filling in email...')
   await page.fill('input[name="email"]', devUser.email)
-  console.log('[loginToAdmin] Filling in password...')
   await page.fill('input[name="password"]', devUser.password)
 
   // Click the login button
-  console.log('[loginToAdmin] Clicking submit button...')
   await page.click('button[type="submit"]')
 
   // Wait for redirect to admin dashboard
-  console.log('[loginToAdmin] Waiting for redirect...')
   await page.waitForURL(/\/admin(?!\/login)/, { timeout: 15000 })
-  console.log('[loginToAdmin] Login complete!')
 }
 
 const expectVectorSearchResponse = async (response: any, post: any, title: string) => {
@@ -69,11 +61,9 @@ test.describe('Vector embedding e2e tests', () => {
   let post: any
 
   test.beforeAll(async () => {
-    console.log('[beforeAll] Setting up Payload instance...')
     // Setup: Create a post and wait for realtime embedding
     _config = await config
     payload = await getPayload({ config: _config, key: `e2e-test-${Date.now()}` })
-    console.log('[beforeAll] Payload instance created')
   })
 
   test('querying the endpoint should return the title with testEmbeddingVersion', async ({
@@ -110,15 +100,12 @@ test.describe('Vector embedding e2e tests', () => {
     page,
     request,
   }) => {
-    console.log('[test] Starting bulk embedding test...')
     test.setTimeout(120000)
 
     // Login to admin first
-    console.log('[test] Logging in...')
     await loginToAdmin(page)
 
     // Verify bulkDefault pool is EMPTY (no realTimeIngestionFn configured)
-    console.log('[test] Checking bulkDefault pool is empty...')
     const emptyResponse = await request.post('/api/vector-search', {
       data: {
         query: title,
@@ -128,22 +115,16 @@ test.describe('Vector embedding e2e tests', () => {
     await expectEmptyVectorSearchResponse(emptyResponse)
 
     // Navigate to the bulkDefault embeddings collection page in admin
-    console.log('[test] Navigating to bulkDefault collection page...')
     await page.goto('/admin/collections/bulkDefault', { waitUntil: 'networkidle' })
-    console.log('[test] Page loaded')
 
     // Wait for the page to fully load and render
-    console.log('[test] Waiting for page to fully load...')
     await page.waitForLoadState('domcontentloaded')
     await page.waitForLoadState('networkidle')
-    console.log('[test] Page fully loaded')
 
     // Wait for the collapsible header to appear - use getByText for more flexible matching
     // Note: If this fails, ensure `pnpm run generate:importmap` has been run
-    console.log('[test] Looking for "Bulk Embed All" text...')
     const bulkEmbedAllText = page.getByText('Bulk Embed All', { exact: false })
     await expect(bulkEmbedAllText).toBeVisible({ timeout: 15000 })
-    console.log('[test] Found "Bulk Embed All" text!')
 
     // Click the button that contains the h3 with "Bulk Embed All" text
     // The button wraps the h3, so we click the button that contains the h3
@@ -191,7 +172,6 @@ test.describe('Vector embedding e2e tests', () => {
     let finalStatus = ''
 
     while (attempts < maxAttempts) {
-      console.log('[test] Polling for status...')
       // Refresh the page to see updated status
       await page.reload()
       await page.waitForLoadState('domcontentloaded')
@@ -201,10 +181,8 @@ test.describe('Vector embedding e2e tests', () => {
         .locator('.rs__single-value')
         .textContent()
         .catch(() => null)
-      console.log('[test] Status value:', statusValue)
       if (statusValue) {
         finalStatus = statusValue
-        console.log('[test] Status value:', statusValue)
         if (statusValue === 'succeeded') {
           break
         }
@@ -230,7 +208,6 @@ test.describe('Vector embedding e2e tests', () => {
     const runIdMatch = runUrl.match(/\/(\d+)$/)
     const bulkRunId = runIdMatch ? runIdMatch[1] : null
     expect(bulkRunId).not.toBeNull()
-    console.log('[test] Bulk run ID:', bulkRunId)
 
     // Find the succeeded batch that was created
     const succeededBatches = await (payload as any).find({
@@ -241,7 +218,6 @@ test.describe('Vector embedding e2e tests', () => {
     })
     expect(succeededBatches.totalDocs).toBeGreaterThan(0)
     const succeededBatch = succeededBatches.docs[0]
-    console.log('[test] Found succeeded batch:', succeededBatch.id)
 
     // Test: Retry endpoint returns 400 for succeeded batch
     const succeededRetryResponse = await request.post('/api/vector-retry-failed-batch', {
@@ -250,10 +226,8 @@ test.describe('Vector embedding e2e tests', () => {
     expect(succeededRetryResponse.status()).toBe(400)
     const succeededRetryJson = await succeededRetryResponse.json()
     expect(succeededRetryJson.error).toContain('not in failed or retried status')
-    console.log('[test] Retry endpoint correctly rejected succeeded batch')
 
     // Navigate to the succeeded batch page and verify retry button is disabled
-    console.log('[test] Navigating to succeeded batch page...')
     await page.goto(`/admin/collections/${BULK_EMBEDDINGS_BATCHES_SLUG}/${succeededBatch.id}`, {
       waitUntil: 'networkidle',
     })
@@ -265,42 +239,30 @@ test.describe('Vector embedding e2e tests', () => {
 
     // Verify the button is disabled (opacity check)
     const buttonStyle = await retryButton.getAttribute('style')
-    console.log('[test] Button style:', buttonStyle)
     expect(buttonStyle).toContain('opacity:0.5')
 
     // Verify the "Retry Not Available" message is shown
     const notAvailableMessage = page.locator('text=/Retry Not Available/i')
     await expect(notAvailableMessage).toBeVisible({ timeout: 5000 })
-
-    console.log('[test] Retry button correctly disabled for succeeded batch!')
   })
 
   test('clicking expand section on default collection shows not enabled message', async ({
     page,
   }) => {
-    console.log('[test] Starting default collection test...')
-
     // Login to admin first
-    console.log('[test] Logging in...')
     await loginToAdmin(page)
 
     // Navigate to the default embeddings collection page in admin
-    console.log('[test] Navigating to default collection page...')
     await page.goto('/admin/collections/default', { waitUntil: 'networkidle' })
-    console.log('[test] Page loaded')
 
     // Wait for the page to fully load and render
-    console.log('[test] Waiting for page to fully load...')
     await page.waitForLoadState('domcontentloaded')
     await page.waitForLoadState('networkidle')
-    console.log('[test] Page fully loaded')
 
     // Wait for the collapsible header to appear - use getByText for more flexible matching
     // Note: If this fails, ensure `pnpm run generate:importmap` has been run
-    console.log('[test] Looking for "Bulk Embed All" text...')
     const bulkEmbedAllText = page.getByText('Bulk Embed All', { exact: false })
     await expect(bulkEmbedAllText).toBeVisible({ timeout: 15000 })
-    console.log('[test] Found "Bulk Embed All" text!')
 
     // Click the button that contains the h3 with "Bulk Embed All" text
     const expandButton = page.locator('button:has(h3:has-text("Bulk Embed All"))')
@@ -325,19 +287,13 @@ test.describe('Vector embedding e2e tests', () => {
   })
 
   test('retry failed batch endpoint returns 404 for non-existent batch', async ({ request }) => {
-    console.log('[test] Testing non-existent batch retry...')
-
     const nonExistentResponse = await request.post('/api/vector-retry-failed-batch', {
       data: { batchId: '999999' },
     })
     expect(nonExistentResponse.status()).toBe(404)
-
-    console.log('[test] Non-existent batch test completed!')
   })
 
   test('retry failed batch endpoint works correctly', async ({ request }) => {
-    console.log('[test] Starting retry failed batch endpoint test...')
-
     // Create a test post first (needed for bulk embedding to have something to embed)
     const post = await payload.create({
       collection: 'posts',
@@ -345,7 +301,6 @@ test.describe('Vector embedding e2e tests', () => {
         title: 'Failed batch test post',
       },
     })
-    console.log('[test] Created test post:', post.id)
 
     // Use the bulk embed endpoint to create a run for failingBulkDefault pool
     const bulkEmbedResponse = await request.post('/api/vector-bulk-embed', {
@@ -353,15 +308,12 @@ test.describe('Vector embedding e2e tests', () => {
         knowledgePool: 'failingBulkDefault',
       },
     })
-    console.log('[test] Bulk embed response:', await bulkEmbedResponse.json())
     expect(bulkEmbedResponse.ok()).toBe(true)
     const bulkEmbedJson = await bulkEmbedResponse.json()
     const runId = bulkEmbedJson.runId
-    console.log('[test] Created bulk run via endpoint:', runId)
 
     // Wait for the bulk jobs to process and fail (failingBulkDefault has a mock that fails)
     await waitForBulkJobs(payload, 30000)
-    console.log('[test] Bulk jobs completed')
 
     // Wait for the batch to actually fail (poll-or-complete job needs to finish)
     const runIdNum = parseInt(runId, 10)
@@ -399,30 +351,8 @@ test.describe('Vector embedding e2e tests', () => {
       attempts++
     }
 
-    if (!batches || batches.totalDocs === 0) {
-      // Final check for debugging
-      const allBatchesFinal = await (payload as any).find({
-        collection: BULK_EMBEDDINGS_BATCHES_SLUG,
-        where: { run: { equals: runIdNum } },
-      })
-      const runFinal = await (payload as any).findByID({
-        collection: BULK_EMBEDDINGS_RUNS_SLUG,
-        id: runId,
-      })
-      console.log('[test] Failed to find failed batch after', attempts, 'attempts')
-      console.log('[test] Run status:', runFinal.status)
-      console.log('[test] Batches found:', allBatchesFinal.totalDocs)
-      if (allBatchesFinal.totalDocs > 0) {
-        console.log(
-          '[test] Batch statuses:',
-          allBatchesFinal.docs.map((b: any) => b.status),
-        )
-      }
-    }
-
     expect(batches?.totalDocs).toBeGreaterThan(0)
     const batch = batches.docs[0]
-    console.log('[test] Found failed batch:', batch.id)
 
     // Retry the failed batch (should succeed)
     const retryResponse = await request.post('/api/vector-retry-failed-batch', {
@@ -458,12 +388,9 @@ test.describe('Vector embedding e2e tests', () => {
       id: runId,
     })
     expect((updatedRun as any).status).toBe('running')
-
-    console.log('[test] Retry failed batch endpoint test completed successfully!')
   })
 
   test('retry failed batch button works for failed batches', async ({ page, request }) => {
-    console.log('[test] Starting retry button click test...')
     test.setTimeout(120000)
 
     // Login first
@@ -476,11 +403,9 @@ test.describe('Vector embedding e2e tests', () => {
         title: 'Failed batch UI test post',
       },
     })
-    console.log('[test] Created test post:', post.id)
 
     // Wait for any existing bulk embedding jobs to complete before starting a new run
     await waitForBulkJobs(payload, 30000)
-    console.log('[test] Existing bulk jobs completed, proceeding...')
 
     // Use the bulk embed endpoint to create a run for failingBulkDefault pool
     const bulkEmbedResponse = await request.post('/api/vector-bulk-embed', {
@@ -488,15 +413,12 @@ test.describe('Vector embedding e2e tests', () => {
         knowledgePool: 'failingBulkDefault',
       },
     })
-    console.log('[test] Bulk embed response:', await bulkEmbedResponse.json())
     expect(bulkEmbedResponse.ok()).toBe(true)
     const bulkEmbedJson = await bulkEmbedResponse.json()
     const runId = bulkEmbedJson.runId
-    console.log('[test] Created bulk run via endpoint:', runId)
 
     // Wait for the bulk jobs to process and fail (failingBulkDefault has a mock that fails)
     await waitForBulkJobs(payload, 30000)
-    console.log('[test] Bulk jobs completed')
 
     // Wait for the batch to actually fail (poll-or-complete job needs to finish)
     const runIdNum = parseInt(runId, 10)
@@ -536,10 +458,8 @@ test.describe('Vector embedding e2e tests', () => {
 
     expect(batches?.totalDocs).toBeGreaterThan(0)
     const failedBatch = batches.docs[0]
-    console.log('[test] Found failed batch:', failedBatch.id)
 
     // Navigate to the run edit page (where FailedBatchesList component should be visible)
-    console.log('[test] Navigating to run page...')
     await page.goto(`/admin/collections/${BULK_EMBEDDINGS_RUNS_SLUG}/${runId}`, {
       waitUntil: 'networkidle',
     })
@@ -548,10 +468,8 @@ test.describe('Vector embedding e2e tests', () => {
     // Wait for the FailedBatchesList component to appear
     const failedBatchesList = page.locator('[data-testid^="failed-batch-link-"]').first()
     await expect(failedBatchesList).toBeVisible({ timeout: 10000 })
-    console.log('[test] Failed batches list is visible')
 
     // Click on the failed batch link to navigate to the batch page
-    console.log('[test] Clicking failed batch link...')
     await failedBatchesList.click()
 
     // Wait for navigation to batch page
@@ -559,7 +477,6 @@ test.describe('Vector embedding e2e tests', () => {
       timeout: 10000,
     })
     await page.waitForLoadState('domcontentloaded')
-    console.log('[test] Navigated to batch page')
 
     // Look for the retry button
     const retryButton = page.locator('[data-testid="retry-failed-batch-button"]')
@@ -574,27 +491,20 @@ test.describe('Vector embedding e2e tests', () => {
     expect(buttonStyle).not.toContain('opacity: 0.5')
 
     // Click the retry button
-    console.log('[test] Clicking retry button...')
     await retryButton.click()
 
     // Wait for success message
     const successMessage = page.locator('text=/Batch resubmitted successfully/i')
     await expect(successMessage).toBeVisible({ timeout: 10000 })
 
-    console.log('[test] Retry button click test completed!')
-
     // Wait a bit for the page reload
     await page.waitForTimeout(2000)
 
     // Verify we're still on the batch page after reload
     await page.waitForURL(/\/admin\/collections\/vector-bulk-embeddings-batches\/\d+/)
-
-    console.log('[test] Retry failed batch button test completed successfully!')
   })
 
   test('missing batchId returns 400 error', async ({ request }) => {
-    console.log('[test] Testing missing batchId...')
-
     const response = await request.post('/api/vector-retry-failed-batch', {
       data: {},
     })
@@ -602,7 +512,5 @@ test.describe('Vector embedding e2e tests', () => {
     expect(response.status()).toBe(400)
     const json = await response.json()
     expect(json.error).toContain('batchId is required')
-
-    console.log('[test] Missing batchId test completed!')
   })
 })
diff --git a/dev/specs/utils.ts b/dev/specs/utils.ts
index 474f4a1..cd1abf2 100644
--- a/dev/specs/utils.ts
+++ b/dev/specs/utils.ts
@@ -13,7 +13,9 @@ import type {
   BulkEmbeddingsFns,
   BulkEmbeddingInput,
   BulkEmbeddingRunStatus,
+  BulkEmbedResult,
 } from '../../src/types.js'
+import { expect } from 'vitest'
 
 export const createTestDb = async ({ dbName }: { dbName: string }) => {
   const adminUri =
@@ -221,6 +223,11 @@ export async function buildPayloadWithIntegration({
     jobs: {
       tasks: [],
       autoRun: [
+        {
+          cron: '*/2 * * * * *',
+          limit: 10,
+          queue: pluginOpts.realtimeQueueName ?? 'default',
+        },
         {
           cron: '*/2 * * * * *',
           limit: 10,
@@ -281,3 +288,9 @@ export async function createSucceededBaselineRun(
     },
   })
 }
+
+export const expectGoodResult = (result: BulkEmbedResult | undefined) => {
+  expect(result).toBeDefined()
+  expect(result!.status).toBe('queued')
+  expect((result as any).conflict).toBeUndefined()
+}
diff --git a/src/admin/components/EmbedAllButton/index.tsx b/src/admin/components/EmbedAllButton/index.tsx
index de0dbeb..04d20bb 100644
--- a/src/admin/components/EmbedAllButton/index.tsx
+++ b/src/admin/components/EmbedAllButton/index.tsx
@@ -15,19 +15,11 @@ export const EmbedAllButton: React.FC<EmbedAllButtonProps & { payload?: any; par
   // The function receives { payload, params } context
   let hasBulkEmbeddings: boolean = false
 
-  console.log('hasBulkEmbeddings', props.hasBulkEmbeddings)
-
   if (typeof props.hasBulkEmbeddings === 'function') {
     // Call the serverProps function with the payload/params context
-    try {
-      hasBulkEmbeddings = Boolean(
-        (props.hasBulkEmbeddings as any)({ payload: props.payload, params: props.params }),
-      )
-      console.log('hasBulkEmbeddings from function', hasBulkEmbeddings)
-    } catch (error) {
-      console.error('[EmbedAllButton Server] Error calling hasBulkEmbeddings:', error)
-      hasBulkEmbeddings = false
-    }
+    hasBulkEmbeddings = Boolean(
+      (props.hasBulkEmbeddings as any)({ payload: props.payload, params: props.params }),
+    )
   } else {
     hasBulkEmbeddings = Boolean(props.hasBulkEmbeddings)
   }
@@ -36,21 +28,13 @@ export const EmbedAllButton: React.FC<EmbedAllButtonProps & { payload?: any; par
 
   if (typeof props.collectionSlug === 'function') {
     // Call the serverProps function with the payload/params context
-    try {
-      collectionSlug = String(
-        (props.collectionSlug as any)({ payload: props.payload, params: props.params }) || '',
-      )
-    } catch (error) {
-      console.error('[EmbedAllButton Server] Error calling collectionSlug:', error)
-      collectionSlug = ''
-    }
+    collectionSlug = String(
+      (props.collectionSlug as any)({ payload: props.payload, params: props.params }) || '',
+    )
   } else {
     collectionSlug = String(props.collectionSlug || '')
   }
 
-  console.log('[EmbedAllButton Server] Resolved hasBulkEmbeddings:', hasBulkEmbeddings)
-  console.log('[EmbedAllButton Server] Resolved collectionSlug:', collectionSlug)
-
   // Only pass serializable props to the client component
   return (
     <EmbedAllButtonClient collectionSlug={collectionSlug} hasBulkEmbeddings={hasBulkEmbeddings} />
diff --git a/src/admin/components/FailedBatchesList/index.tsx b/src/admin/components/FailedBatchesList/index.tsx
index a666529..56341a0 100644
--- a/src/admin/components/FailedBatchesList/index.tsx
+++ b/src/admin/components/FailedBatchesList/index.tsx
@@ -10,20 +10,11 @@ type FailedBatchesListProps = {
 }
 
 export const FailedBatchesList: React.FC<FailedBatchesListProps> = async (props) => {
-  // Always render something for debugging
-  console.log('[FailedBatchesList] Component called with props:', {
-    hasPayload: !!props.payload,
-    hasId: !!props.id,
-    allProps: Object.keys(props),
-  })
-
   const run = await props.payload.findByID({
     collection: BULK_EMBEDDINGS_RUNS_SLUG,
     id: props.id,
   })
 
-  console.log('[FailedBatchesList] Fetching failed batches for run:', run.id)
-
   // Fetch failed batches for this run
   const runIdNum = typeof run.id === 'number' ? run.id : parseInt(String(run.id), 10)
   const failedBatches = await props.payload.find({
@@ -38,8 +29,6 @@ export const FailedBatchesList: React.FC<FailedBatchesListProps> = async (props)
   const batches = (failedBatches as any)?.docs || []
   const runId = props.id || String(run.id)
 
-  console.log('[FailedBatchesList] Found batches:', batches.length, 'for run:', runId)
-
   return (
     <FailedBatchesListClient
       runId={runId}
diff --git a/src/admin/components/RetryFailedBatchButton/client.tsx b/src/admin/components/RetryFailedBatchButton/client.tsx
index c1aa874..930fed7 100644
--- a/src/admin/components/RetryFailedBatchButton/client.tsx
+++ b/src/admin/components/RetryFailedBatchButton/client.tsx
@@ -14,7 +14,6 @@ export const RetryFailedBatchButtonClient: React.FC<RetryFailedBatchButtonClient
   status,
   retriedBatchId,
 }) => {
-  console.log('RetryFailedBatchButtonClient', batchId, status, retriedBatchId)
   const [isSubmitting, setIsSubmitting] = useState(false)
   const [message, setMessage] = useState<{ text: string; error?: boolean } | null>(null)
 
diff --git a/src/admin/components/RetryFailedBatchButton/index.tsx b/src/admin/components/RetryFailedBatchButton/index.tsx
index 7f47387..e4b798c 100644
--- a/src/admin/components/RetryFailedBatchButton/index.tsx
+++ b/src/admin/components/RetryFailedBatchButton/index.tsx
@@ -15,8 +15,6 @@ export const RetryFailedBatchButton: React.FC<
     id: props.id,
   })
 
-  console.log('RetryFailedBatchButtonBatch', batch)
-
   return (
     <RetryFailedBatchButtonClient
       batchId={props.id!}
diff --git a/src/index.ts b/src/index.ts
index c4b73de..60d7497 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -71,10 +71,11 @@ export type {
 
   // PollBulkEmbeddingsResult
   BulkEmbeddingRunStatus,
-  getVectorizedPayload,
   VectorizedPayload,
 } from './types.js'
 
+export { getVectorizedPayload } from './types.js'
+
 async function ensurePgvectorArtifacts(args: {
   payload: Payload
   tableName: string
@@ -391,7 +392,6 @@ export const createVectorizeIntegration = <TPoolNames extends KnowledgePoolName>
 
       // Create vectorized payload object factory that creates methods bound to a payload instance
       const createVectorizedPayloadObject = (payload: Payload): VectorizedPayload<TPoolNames> => {
-        console.log('createVectorizedPayloadObject', payload)
         return {
           _isBulkEmbedEnabled: (knowledgePool: TPoolNames): boolean => {
             const poolConfig = pluginOptions.knowledgePools[knowledgePool]
diff --git a/src/tasks/vectorize.ts b/src/tasks/vectorize.ts
index 80f1ac0..5dc191c 100644
--- a/src/tasks/vectorize.ts
+++ b/src/tasks/vectorize.ts
@@ -29,7 +29,6 @@ export const createVectorizeTask = ({
   /**
    * Vectorize Task Configuration
    * @description Scheduled task that vectorizes on data change.
-   * Runs every 5 seconds to call the embedding function.
    */
   const processVectorizationTask: TaskConfig<VectorizeTaskInputOutput> = {
     slug: 'payloadcms-vectorize:vectorize',

From 0ecd01c83f7ed6cb24fbc1f30575aa3adabe0ba9 Mon Sep 17 00:00:00 2001
From: techiejd <62455039+techiejd@users.noreply.github.com>
Date: Thu, 15 Jan 2026 17:22:42 +0700
Subject: [PATCH 40/49] trying to fix CI tests

---
 dev/specs/bulkEmbed/basic.spec.ts                | 3 +--
 dev/specs/bulkEmbed/canceledBatch.spec.ts        | 2 +-
 dev/specs/bulkEmbed/extensionFields.spec.ts      | 2 +-
 dev/specs/bulkEmbed/failedBatch.spec.ts          | 2 +-
 dev/specs/bulkEmbed/multipleBatches.spec.ts      | 2 +-
 dev/specs/bulkEmbed/multipleChunks.spec.ts       | 2 +-
 dev/specs/bulkEmbed/partialFailure.spec.ts       | 2 +-
 dev/specs/bulkEmbed/partialFailureNoFail.spec.ts | 2 +-
 dev/specs/bulkEmbed/polling.spec.ts              | 2 +-
 dev/specs/bulkEmbed/versionBump.spec.ts          | 2 +-
 dev/specs/utils.ts                               | 8 --------
 dev/specs/utils.vitest.ts                        | 8 ++++++++
 src/collections/embeddings.ts                    | 2 --
 13 files changed, 18 insertions(+), 21 deletions(-)
 create mode 100644 dev/specs/utils.vitest.ts

diff --git a/dev/specs/bulkEmbed/basic.spec.ts b/dev/specs/bulkEmbed/basic.spec.ts
index 46b0d29..6664ecc 100644
--- a/dev/specs/bulkEmbed/basic.spec.ts
+++ b/dev/specs/bulkEmbed/basic.spec.ts
@@ -10,12 +10,11 @@ import {
   clearAllCollections,
   createMockBulkEmbeddings,
   createTestDb,
-  expectGoodResult,
   waitForBulkJobs,
 } from '../utils.js'
 import { makeDummyEmbedQuery, testEmbeddingVersion } from 'helpers/embed.js'
 import { getVectorizedPayload, VectorizedPayload } from 'payloadcms-vectorize'
-import { BulkEmbedResult } from '../../../src/types.js'
+import { expectGoodResult } from '../utils.vitest.js'
 
 const DIMS = DEFAULT_DIMS
 const dbName = `bulk_basic_${Date.now()}`
diff --git a/dev/specs/bulkEmbed/canceledBatch.spec.ts b/dev/specs/bulkEmbed/canceledBatch.spec.ts
index f14c4d2..46922d9 100644
--- a/dev/specs/bulkEmbed/canceledBatch.spec.ts
+++ b/dev/specs/bulkEmbed/canceledBatch.spec.ts
@@ -6,11 +6,11 @@ import {
   buildPayloadWithIntegration,
   createMockBulkEmbeddings,
   createTestDb,
-  expectGoodResult,
   waitForBulkJobs,
 } from '../utils.js'
 import { makeDummyEmbedQuery, testEmbeddingVersion } from 'helpers/embed.js'
 import { getVectorizedPayload, VectorizedPayload } from 'payloadcms-vectorize'
+import { expectGoodResult } from '../utils.vitest.js'
 
 const DIMS = DEFAULT_DIMS
 const dbName = `bulk_canceled_${Date.now()}`
diff --git a/dev/specs/bulkEmbed/extensionFields.spec.ts b/dev/specs/bulkEmbed/extensionFields.spec.ts
index 4b829e0..c47fefd 100644
--- a/dev/specs/bulkEmbed/extensionFields.spec.ts
+++ b/dev/specs/bulkEmbed/extensionFields.spec.ts
@@ -7,11 +7,11 @@ import {
   buildPayloadWithIntegration,
   createMockBulkEmbeddings,
   createTestDb,
-  expectGoodResult,
   waitForBulkJobs,
 } from '../utils.js'
 import { makeDummyEmbedQuery, testEmbeddingVersion } from 'helpers/embed.js'
 import { getVectorizedPayload, VectorizedPayload } from 'payloadcms-vectorize'
+import { expectGoodResult } from '../utils.vitest.js'
 
 const DIMS = DEFAULT_DIMS
 const dbName = `bulk_extfields_${Date.now()}`
diff --git a/dev/specs/bulkEmbed/failedBatch.spec.ts b/dev/specs/bulkEmbed/failedBatch.spec.ts
index 5e09d16..7819def 100644
--- a/dev/specs/bulkEmbed/failedBatch.spec.ts
+++ b/dev/specs/bulkEmbed/failedBatch.spec.ts
@@ -10,10 +10,10 @@ import {
   buildPayloadWithIntegration,
   createMockBulkEmbeddings,
   createTestDb,
-  expectGoodResult,
   waitForBulkJobs,
 } from '../utils.js'
 import { makeDummyEmbedQuery, testEmbeddingVersion } from 'helpers/embed.js'
+import { expectGoodResult } from '../utils.vitest.js'
 
 const DIMS = DEFAULT_DIMS
 const dbName = `bulk_failed_${Date.now()}`
diff --git a/dev/specs/bulkEmbed/multipleBatches.spec.ts b/dev/specs/bulkEmbed/multipleBatches.spec.ts
index 6612847..ee17bdc 100644
--- a/dev/specs/bulkEmbed/multipleBatches.spec.ts
+++ b/dev/specs/bulkEmbed/multipleBatches.spec.ts
@@ -8,11 +8,11 @@ import {
   buildPayloadWithIntegration,
   createMockBulkEmbeddings,
   createTestDb,
-  expectGoodResult,
   waitForBulkJobs,
 } from '../utils.js'
 import { makeDummyEmbedQuery, testEmbeddingVersion } from 'helpers/embed.js'
 import { getVectorizedPayload, VectorizedPayload } from 'payloadcms-vectorize'
+import { expectGoodResult } from '../utils.vitest.js'
 
 const DIMS = DEFAULT_DIMS
 const dbName = `bulk_multibatch_${Date.now()}`
diff --git a/dev/specs/bulkEmbed/multipleChunks.spec.ts b/dev/specs/bulkEmbed/multipleChunks.spec.ts
index 1b913e5..7e05791 100644
--- a/dev/specs/bulkEmbed/multipleChunks.spec.ts
+++ b/dev/specs/bulkEmbed/multipleChunks.spec.ts
@@ -6,11 +6,11 @@ import {
   buildPayloadWithIntegration,
   createMockBulkEmbeddings,
   createTestDb,
-  expectGoodResult,
   waitForBulkJobs,
 } from '../utils.js'
 import { makeDummyEmbedQuery, testEmbeddingVersion } from 'helpers/embed.js'
 import { getVectorizedPayload } from 'payloadcms-vectorize'
+import { expectGoodResult } from '../utils.vitest.js'
 
 const DIMS = DEFAULT_DIMS
 const dbName = `bulk_multichunk_${Date.now()}`
diff --git a/dev/specs/bulkEmbed/partialFailure.spec.ts b/dev/specs/bulkEmbed/partialFailure.spec.ts
index 1cb5171..7eae88b 100644
--- a/dev/specs/bulkEmbed/partialFailure.spec.ts
+++ b/dev/specs/bulkEmbed/partialFailure.spec.ts
@@ -7,11 +7,11 @@ import {
   buildPayloadWithIntegration,
   createMockBulkEmbeddings,
   createTestDb,
-  expectGoodResult,
   waitForBulkJobs,
 } from '../utils.js'
 import { makeDummyEmbedQuery, testEmbeddingVersion } from 'helpers/embed.js'
 import { getVectorizedPayload } from 'payloadcms-vectorize'
+import { expectGoodResult } from '../utils.vitest.js'
 
 const DIMS = DEFAULT_DIMS
 const dbName = `bulk_partial_failure_${Date.now()}`
diff --git a/dev/specs/bulkEmbed/partialFailureNoFail.spec.ts b/dev/specs/bulkEmbed/partialFailureNoFail.spec.ts
index 2211e40..586d4e6 100644
--- a/dev/specs/bulkEmbed/partialFailureNoFail.spec.ts
+++ b/dev/specs/bulkEmbed/partialFailureNoFail.spec.ts
@@ -7,11 +7,11 @@ import {
   buildPayloadWithIntegration,
   createMockBulkEmbeddings,
   createTestDb,
-  expectGoodResult,
   waitForBulkJobs,
 } from '../utils.js'
 import { makeDummyEmbedQuery, testEmbeddingVersion } from 'helpers/embed.js'
 import { getVectorizedPayload } from 'payloadcms-vectorize'
+import { expectGoodResult } from '../utils.vitest.js'
 
 const DIMS = DEFAULT_DIMS
 const dbName = `bulk_partial_failure_nofail_${Date.now()}`
diff --git a/dev/specs/bulkEmbed/polling.spec.ts b/dev/specs/bulkEmbed/polling.spec.ts
index 9ffae7e..9b884c7 100644
--- a/dev/specs/bulkEmbed/polling.spec.ts
+++ b/dev/specs/bulkEmbed/polling.spec.ts
@@ -7,11 +7,11 @@ import {
   buildPayloadWithIntegration,
   createMockBulkEmbeddings,
   createTestDb,
-  expectGoodResult,
   waitForBulkJobs,
 } from '../utils.js'
 import { getVectorizedPayload } from 'payloadcms-vectorize'
 import { makeDummyEmbedQuery, testEmbeddingVersion } from 'helpers/embed.js'
+import { expectGoodResult } from '../utils.vitest.js'
 
 const DIMS = DEFAULT_DIMS
 const dbName = `bulk_polling_${Date.now()}`
diff --git a/dev/specs/bulkEmbed/versionBump.spec.ts b/dev/specs/bulkEmbed/versionBump.spec.ts
index 7f0dd4c..8c5166c 100644
--- a/dev/specs/bulkEmbed/versionBump.spec.ts
+++ b/dev/specs/bulkEmbed/versionBump.spec.ts
@@ -5,11 +5,11 @@ import {
   buildPayloadWithIntegration,
   createMockBulkEmbeddings,
   createTestDb,
-  expectGoodResult,
   waitForBulkJobs,
 } from '../utils.js'
 import { makeDummyEmbedQuery } from 'helpers/embed.js'
 import { getVectorizedPayload } from '../../../src/types.js'
+import { expectGoodResult } from '../utils.vitest.js'
 
 const DIMS = DEFAULT_DIMS
 const dbName = `bulk_version_${Date.now()}`
diff --git a/dev/specs/utils.ts b/dev/specs/utils.ts
index cd1abf2..68e8d63 100644
--- a/dev/specs/utils.ts
+++ b/dev/specs/utils.ts
@@ -13,9 +13,7 @@ import type {
   BulkEmbeddingsFns,
   BulkEmbeddingInput,
   BulkEmbeddingRunStatus,
-  BulkEmbedResult,
 } from '../../src/types.js'
-import { expect } from 'vitest'
 
 export const createTestDb = async ({ dbName }: { dbName: string }) => {
   const adminUri =
@@ -288,9 +286,3 @@ export async function createSucceededBaselineRun(
     },
   })
 }
-
-export const expectGoodResult = (result: BulkEmbedResult | undefined) => {
-  expect(result).toBeDefined()
-  expect(result!.status).toBe('queued')
-  expect((result as any).conflict).toBeUndefined()
-}
diff --git a/dev/specs/utils.vitest.ts b/dev/specs/utils.vitest.ts
new file mode 100644
index 0000000..50000e9
--- /dev/null
+++ b/dev/specs/utils.vitest.ts
@@ -0,0 +1,8 @@
+import { expect } from 'vitest'
+import type { BulkEmbedResult } from '../../src/types.js'
+
+export const expectGoodResult = (result: BulkEmbedResult | undefined) => {
+  expect(result).toBeDefined()
+  expect(result!.status).toBe('queued')
+  expect((result as any).conflict).toBeUndefined()
+}
diff --git a/src/collections/embeddings.ts b/src/collections/embeddings.ts
index da1bc5a..3f8634b 100644
--- a/src/collections/embeddings.ts
+++ b/src/collections/embeddings.ts
@@ -39,8 +39,6 @@ export const createEmbeddingsCollection = (
 
                 // Use getVectorizedPayload to get the vectorized payload object
                 const vectorizedPayload = getVectorizedPayload(payload)
-                console.log('vectorizedPayload', vectorizedPayload)
-                console.log('payload.config.custom', payload.config.custom)
                 if (poolName && typeof poolName === 'string' && vectorizedPayload) {
                   return vectorizedPayload._isBulkEmbedEnabled(poolName)
                 }

From b3312f300250ea022db1321310f5c7ce3af24492 Mon Sep 17 00:00:00 2001
From: techiejd <62455039+techiejd@users.noreply.github.com>
Date: Thu, 15 Jan 2026 17:58:48 +0700
Subject: [PATCH 41/49] Clean up

---
 dev/specs/bulkEmbed/basic.spec.ts                | 2 --
 dev/specs/bulkEmbed/canceledBatch.spec.ts        | 2 --
 dev/specs/bulkEmbed/concurrentRuns.spec.ts       | 2 --
 dev/specs/bulkEmbed/extensionFields.spec.ts      | 2 --
 dev/specs/bulkEmbed/failedBatch.spec.ts          | 2 --
 dev/specs/bulkEmbed/multipleBatches.spec.ts      | 2 --
 dev/specs/bulkEmbed/multipleChunks.spec.ts       | 2 --
 dev/specs/bulkEmbed/onError.spec.ts              | 2 --
 dev/specs/bulkEmbed/partialFailure.spec.ts       | 2 --
 dev/specs/bulkEmbed/partialFailureNoFail.spec.ts | 2 --
 dev/specs/bulkEmbed/polling.spec.ts              | 2 --
 dev/specs/bulkEmbed/realtimeMode.spec.ts         | 2 --
 dev/specs/bulkEmbed/versionBump.spec.ts          | 4 ----
 dev/specs/constants.ts                           | 2 +-
 dev/specs/int.spec.ts                            | 2 +-
 dev/specs/multipools.spec.ts                     | 2 +-
 dev/specs/utils.ts                               | 8 ++------
 17 files changed, 5 insertions(+), 37 deletions(-)

diff --git a/dev/specs/bulkEmbed/basic.spec.ts b/dev/specs/bulkEmbed/basic.spec.ts
index 6664ecc..bb0219f 100644
--- a/dev/specs/bulkEmbed/basic.spec.ts
+++ b/dev/specs/bulkEmbed/basic.spec.ts
@@ -47,8 +47,6 @@ describe('Bulk embed - basic tests', () => {
     const built = await buildPayloadWithIntegration({
       dbName,
       pluginOpts: basePluginOptions,
-      secret: 'test-secret',
-      dims: DIMS,
       key: `basic-${Date.now()}`,
     })
     payload = built.payload
diff --git a/dev/specs/bulkEmbed/canceledBatch.spec.ts b/dev/specs/bulkEmbed/canceledBatch.spec.ts
index 46922d9..2b99b88 100644
--- a/dev/specs/bulkEmbed/canceledBatch.spec.ts
+++ b/dev/specs/bulkEmbed/canceledBatch.spec.ts
@@ -43,8 +43,6 @@ describe('Bulk embed - canceled batch', () => {
         },
         bulkQueueNames: BULK_QUEUE_NAMES,
       },
-      secret: 'test-secret',
-      dims: DIMS,
       key: `canceled-${Date.now()}`,
     })
     payload = built.payload
diff --git a/dev/specs/bulkEmbed/concurrentRuns.spec.ts b/dev/specs/bulkEmbed/concurrentRuns.spec.ts
index 4d3d01b..c03f212 100644
--- a/dev/specs/bulkEmbed/concurrentRuns.spec.ts
+++ b/dev/specs/bulkEmbed/concurrentRuns.spec.ts
@@ -40,8 +40,6 @@ describe('Bulk embed - concurrent runs prevention', () => {
         },
         bulkQueueNames: BULK_QUEUE_NAMES,
       },
-      secret: 'test-secret',
-      dims: DIMS,
       key: `concurrent-${Date.now()}`,
     })
     payload = built.payload
diff --git a/dev/specs/bulkEmbed/extensionFields.spec.ts b/dev/specs/bulkEmbed/extensionFields.spec.ts
index c47fefd..c564bea 100644
--- a/dev/specs/bulkEmbed/extensionFields.spec.ts
+++ b/dev/specs/bulkEmbed/extensionFields.spec.ts
@@ -47,8 +47,6 @@ describe('Bulk embed - extension fields', () => {
         },
         bulkQueueNames: BULK_QUEUE_NAMES,
       },
-      secret: 'test-secret',
-      dims: DIMS,
       key: `extfields-${Date.now()}`,
     })
     payload = built.payload
diff --git a/dev/specs/bulkEmbed/failedBatch.spec.ts b/dev/specs/bulkEmbed/failedBatch.spec.ts
index 7819def..037fdb5 100644
--- a/dev/specs/bulkEmbed/failedBatch.spec.ts
+++ b/dev/specs/bulkEmbed/failedBatch.spec.ts
@@ -43,8 +43,6 @@ describe('Bulk embed - failed batch', () => {
         },
         bulkQueueNames: BULK_QUEUE_NAMES,
       },
-      secret: 'test-secret',
-      dims: DIMS,
       key: `failed-${Date.now()}`,
     })
     payload = built.payload
diff --git a/dev/specs/bulkEmbed/multipleBatches.spec.ts b/dev/specs/bulkEmbed/multipleBatches.spec.ts
index ee17bdc..aa2de86 100644
--- a/dev/specs/bulkEmbed/multipleBatches.spec.ts
+++ b/dev/specs/bulkEmbed/multipleBatches.spec.ts
@@ -45,8 +45,6 @@ describe('Bulk embed - multiple batches', () => {
         },
         bulkQueueNames: BULK_QUEUE_NAMES,
       },
-      secret: 'test-secret',
-      dims: DIMS,
       key: `multibatch-${Date.now()}`,
     })
     payload = built.payload
diff --git a/dev/specs/bulkEmbed/multipleChunks.spec.ts b/dev/specs/bulkEmbed/multipleChunks.spec.ts
index 7e05791..0f99eab 100644
--- a/dev/specs/bulkEmbed/multipleChunks.spec.ts
+++ b/dev/specs/bulkEmbed/multipleChunks.spec.ts
@@ -46,8 +46,6 @@ describe('Bulk embed - multiple chunks with extension fields', () => {
         },
         bulkQueueNames: BULK_QUEUE_NAMES,
       },
-      secret: 'test-secret',
-      dims: DIMS,
       key: `multichunk-${Date.now()}`,
     })
     payload = built.payload
diff --git a/dev/specs/bulkEmbed/onError.spec.ts b/dev/specs/bulkEmbed/onError.spec.ts
index cfc2e89..f128009 100644
--- a/dev/specs/bulkEmbed/onError.spec.ts
+++ b/dev/specs/bulkEmbed/onError.spec.ts
@@ -51,8 +51,6 @@ describe('Bulk embed - onError callback', () => {
         },
         bulkQueueNames: BULK_QUEUE_NAMES,
       },
-      secret: 'test-secret',
-      dims: DIMS,
       key: `onerror-${Date.now()}`,
     })
     payload = built.payload
diff --git a/dev/specs/bulkEmbed/partialFailure.spec.ts b/dev/specs/bulkEmbed/partialFailure.spec.ts
index 7eae88b..d3ef57e 100644
--- a/dev/specs/bulkEmbed/partialFailure.spec.ts
+++ b/dev/specs/bulkEmbed/partialFailure.spec.ts
@@ -73,8 +73,6 @@ describe('Bulk embed - partial chunk failures', () => {
         },
         bulkQueueNames: BULK_QUEUE_NAMES,
       },
-      secret: 'test-secret',
-      dims: DIMS,
       key: `partial-failure-${Date.now()}-${Math.random()}`,
     })
     payload = built.payload
diff --git a/dev/specs/bulkEmbed/partialFailureNoFail.spec.ts b/dev/specs/bulkEmbed/partialFailureNoFail.spec.ts
index 586d4e6..35e877f 100644
--- a/dev/specs/bulkEmbed/partialFailureNoFail.spec.ts
+++ b/dev/specs/bulkEmbed/partialFailureNoFail.spec.ts
@@ -67,8 +67,6 @@ describe('Bulk embed - partial failures', () => {
         },
         bulkQueueNames: BULK_QUEUE_NAMES,
       },
-      secret: 'test-secret',
-      dims: DIMS,
       key: `no-partial-failure-${Date.now()}-${Math.random()}`,
     })
     payload = built.payload
diff --git a/dev/specs/bulkEmbed/polling.spec.ts b/dev/specs/bulkEmbed/polling.spec.ts
index 9b884c7..eedd32a 100644
--- a/dev/specs/bulkEmbed/polling.spec.ts
+++ b/dev/specs/bulkEmbed/polling.spec.ts
@@ -42,8 +42,6 @@ describe('Bulk embed - polling requeue', () => {
         },
         bulkQueueNames: BULK_QUEUE_NAMES,
       },
-      secret: 'test-secret',
-      dims: DIMS,
       key: `polling-${Date.now()}`,
     })
     payload = built.payload
diff --git a/dev/specs/bulkEmbed/realtimeMode.spec.ts b/dev/specs/bulkEmbed/realtimeMode.spec.ts
index e59da32..8e4c224 100644
--- a/dev/specs/bulkEmbed/realtimeMode.spec.ts
+++ b/dev/specs/bulkEmbed/realtimeMode.spec.ts
@@ -41,8 +41,6 @@ describe('Bulk embed - realtime mode', () => {
     const built = await buildPayloadWithIntegration({
       dbName,
       pluginOpts: realtimeOptions,
-      secret: 'test-secret',
-      dims: DIMS,
       key: `realtime-${Date.now()}`,
     })
     payload = built.payload
diff --git a/dev/specs/bulkEmbed/versionBump.spec.ts b/dev/specs/bulkEmbed/versionBump.spec.ts
index 8c5166c..39ab08f 100644
--- a/dev/specs/bulkEmbed/versionBump.spec.ts
+++ b/dev/specs/bulkEmbed/versionBump.spec.ts
@@ -50,8 +50,6 @@ describe('Bulk embed - version bump', () => {
           },
           bulkQueueNames: BULK_QUEUE_NAMES_0,
         },
-        secret: 'test-secret',
-        dims: DIMS,
         key: `payload0`,
       })
     ).payload
@@ -92,8 +90,6 @@ describe('Bulk embed - version bump', () => {
           },
           bulkQueueNames: BULK_QUEUE_NAMES_1,
         },
-        secret: 'test-secret',
-        dims: DIMS,
         key: `payload1`,
       })
     ).payload
diff --git a/dev/specs/constants.ts b/dev/specs/constants.ts
index 47e5784..e695599 100644
--- a/dev/specs/constants.ts
+++ b/dev/specs/constants.ts
@@ -73,7 +73,7 @@ export const dummyPluginOptions = {
 
 export async function buildDummyConfig(cfg: Partial<Config>) {
   const built = await buildConfig({
-    secret: 'test-secret',
+    secret: process.env.PAYLOAD_SECRET || 'test-secret',
     collections: [],
     editor: lexicalEditor(),
     // Provide a dummy db adapter to satisfy types; not used by these tests
diff --git a/dev/specs/int.spec.ts b/dev/specs/int.spec.ts
index 6995412..bea7dab 100644
--- a/dev/specs/int.spec.ts
+++ b/dev/specs/int.spec.ts
@@ -41,7 +41,7 @@ describe('Plugin integration tests', () => {
     })
 
     config = await buildConfig({
-      secret: 'test-secret',
+      secret: process.env.PAYLOAD_SECRET || 'test-secret',
       editor: lexicalEditor(),
       collections: [
         {
diff --git a/dev/specs/multipools.spec.ts b/dev/specs/multipools.spec.ts
index 7288132..8b9c30d 100644
--- a/dev/specs/multipools.spec.ts
+++ b/dev/specs/multipools.spec.ts
@@ -54,7 +54,7 @@ describe('Multiple knowledge pools', () => {
     }
 
     config = await buildConfig({
-      secret: 'test-secret',
+      secret: process.env.PAYLOAD_SECRET || 'test-secret',
       collections: [],
       editor: lexicalEditor(),
       db: postgresAdapter({
diff --git a/dev/specs/utils.ts b/dev/specs/utils.ts
index 68e8d63..214891d 100644
--- a/dev/specs/utils.ts
+++ b/dev/specs/utils.ts
@@ -182,27 +182,23 @@ export function createMockBulkEmbeddings(
 export type BuildPayloadArgs = {
   dbName: string
   pluginOpts: any
-  secret?: string
-  dims?: number
   key?: string
 }
 
 export async function buildPayloadWithIntegration({
   dbName,
   pluginOpts,
-  secret = 'test-secret',
-  dims = DEFAULT_DIMS,
   key,
 }: BuildPayloadArgs): Promise<{ payload: Payload; config: SanitizedConfig }> {
   const integration = createVectorizeIntegration({
     default: {
-      dims,
+      dims: DEFAULT_DIMS,
       ivfflatLists: 1,
     },
   })
 
   const config = await buildConfig({
-    secret,
+    secret: 'test-secret',
     editor: lexicalEditor(),
     collections: [
       {

From 8c75e1a4baa386eb82421dd1935eaba70801b183 Mon Sep 17 00:00:00 2001
From: techiejd <62455039+techiejd@users.noreply.github.com>
Date: Thu, 15 Jan 2026 23:53:38 +0700
Subject: [PATCH 42/49] Better bulkEmbedAll

---
 README.md                                    |  30 +-
 dev/specs/bulkEmbed/ingestionFailure.spec.ts | 101 +++++
 src/tasks/bulkEmbedAll.ts                    | 365 +++++++++++--------
 src/types.ts                                 |  22 +-
 4 files changed, 335 insertions(+), 183 deletions(-)
 create mode 100644 dev/specs/bulkEmbed/ingestionFailure.spec.ts

diff --git a/README.md b/README.md
index 4ce2cf8..2ededdd 100644
--- a/README.md
+++ b/README.md
@@ -299,37 +299,37 @@ type BatchSubmission = {
 - `null` - "I'm accumulating this chunk, not ready to submit yet"
 - `{ providerBatchId }` - "I just submitted a batch to my provider"
 
-**⚠️ Important contract about which chunks are included in a submission:**
+**⚠️ Important contract:**
 
-- When `isLastChunk=false` and you return a submission: all pending chunks **EXCEPT** the current one were submitted (current chunk starts fresh accumulation)
-- When `isLastChunk=true` and you return a submission: all pending chunks **INCLUDING** the current one were submitted
+When you return a submission, the plugin assumes **all chunks currently in `pendingChunks` were submitted**. The plugin tracks chunks and creates batch records based on this assumption. You control which chunks get submitted by managing your own accumulation logic.
+
+**About `isLastChunk`:**
+
+- `isLastChunk=true` indicates this is the final chunk in the run
+- Use this to flush any remaining accumulated chunks before the run completes
+- The plugin uses this only to know when to stop iterating, not to determine which chunks were submitted
 
 **Example implementation:**
 
 ```typescript
 let accumulated: BulkEmbeddingInput[] = []
-let accumulatedSize = 0
-const FILE_SIZE_LIMIT = 50 * 1024 * 1024 // 50MB
+const LINE_LIMIT = 100_000 // e.g., Voyage AI's limit
 
 addChunk: async ({ chunk, isLastChunk }) => {
-  const chunkSize = JSON.stringify(chunk).length
+  // Add current chunk to accumulation first
+  accumulated.push(chunk)
 
-  // Would exceed limit? Submit what we have, keep current for next batch
-  if (accumulatedSize + chunkSize > FILE_SIZE_LIMIT && accumulated.length > 0) {
+  // Check if we've hit the line limit (after adding current chunk)
+  if (accumulated.length === LINE_LIMIT) {
     const result = await submitToProvider(accumulated)
-    accumulated = [chunk] // Start fresh WITH current chunk
-    accumulatedSize = chunkSize
+    accumulated = [] // Clear for next batch
     return { providerBatchId: result.id }
   }
 
-  accumulated.push(chunk)
-  accumulatedSize += chunkSize
-
   // Last chunk? Must flush everything
   if (isLastChunk && accumulated.length > 0) {
     const result = await submitToProvider(accumulated)
     accumulated = []
-    accumulatedSize = 0
     return { providerBatchId: result.id }
   }
 
@@ -337,7 +337,7 @@ addChunk: async ({ chunk, isLastChunk }) => {
 }
 ```
 
-**Note:** If a single chunk exceeds your provider's file size limit, you'll need to handle that edge case in your implementation (e.g., skip it, split it, or fail gracefully).
+**Note:** If a single chunk exceeds your provider's file size or line limit, you'll need to handle that edge case in your implementation (e.g., skip it, split it, or fail gracefully).
 
 #### `pollOrCompleteBatch` - Poll and Stream Results
 
diff --git a/dev/specs/bulkEmbed/ingestionFailure.spec.ts b/dev/specs/bulkEmbed/ingestionFailure.spec.ts
new file mode 100644
index 0000000..24542c6
--- /dev/null
+++ b/dev/specs/bulkEmbed/ingestionFailure.spec.ts
@@ -0,0 +1,101 @@
+import type { Payload } from 'payload'
+import { beforeAll, describe, expect, test } from 'vitest'
+import { BULK_EMBEDDINGS_RUNS_SLUG } from '../../../src/collections/bulkEmbeddingsRuns.js'
+import { BULK_EMBEDDINGS_BATCHES_SLUG } from '../../../src/collections/bulkEmbeddingsBatches.js'
+import {
+  BULK_QUEUE_NAMES,
+  DEFAULT_DIMS,
+  buildPayloadWithIntegration,
+  createMockBulkEmbeddings,
+  createTestDb,
+  waitForBulkJobs,
+} from '../utils.js'
+import { makeDummyEmbedQuery, testEmbeddingVersion } from 'helpers/embed.js'
+import { getVectorizedPayload } from 'payloadcms-vectorize'
+import { expectGoodResult } from '../utils.vitest.js'
+
+const DIMS = DEFAULT_DIMS
+const dbName = `bulk_ingestion_failure_${Date.now()}`
+
+describe('Bulk embed - ingestion validation failures', () => {
+  let payload: Payload
+
+  beforeAll(async () => {
+    await createTestDb({ dbName })
+  })
+
+  test('malformed chunk entry fails the bulk embedding run', async () => {
+    // Use unique version to ensure this test only processes its own data
+    const testVersion = `${testEmbeddingVersion}-ingestion-fail-${Date.now()}`
+
+    const built = await buildPayloadWithIntegration({
+      dbName,
+      pluginOpts: {
+        knowledgePools: {
+          default: {
+            collections: {
+              posts: {
+                // Malformed: second entry missing required "chunk" string
+                toKnowledgePool: async () => [{ chunk: 'ok chunk' }, { bad: 'oops' } as any],
+              },
+            },
+            embeddingConfig: {
+              version: testVersion,
+              queryFn: makeDummyEmbedQuery(DIMS),
+              bulkEmbeddingsFns: createMockBulkEmbeddings({
+                statusSequence: ['succeeded'],
+              }),
+            },
+          },
+        },
+        bulkQueueNames: BULK_QUEUE_NAMES,
+      },
+      key: `ingestion-failure-${Date.now()}-${Math.random()}`,
+    })
+    payload = built.payload
+
+    // Create a post
+    await payload.create({
+      collection: 'posts',
+      data: { title: 'bad chunks' } as any,
+    })
+
+    const vectorizedPayload = getVectorizedPayload(payload)
+    const result = await vectorizedPayload?.bulkEmbed({ knowledgePool: 'default' })
+    expectGoodResult(result)
+
+    // Wait for bulk jobs to finish (or fail)
+    await waitForBulkJobs(payload, 15000)
+
+    // Check the run status - should be failed
+    const run = await payload.findByID({
+      collection: BULK_EMBEDDINGS_RUNS_SLUG,
+      id: result!.runId,
+    })
+
+    expect(run.status).toBe('failed')
+
+    // Check the prepare-bulk-embedding job failed with validation error
+    const res = await payload.find({
+      collection: 'payload-jobs',
+      where: {
+        and: [{ taskSlug: { equals: 'payloadcms-vectorize:prepare-bulk-embedding' } }],
+      },
+      limit: 1,
+      sort: '-createdAt',
+    })
+    const failedJob = (res as any)?.docs?.[0]
+    expect(failedJob.hasError).toBe(true)
+    const errMsg = failedJob.error.message
+    expect(errMsg).toMatch(/chunk/i)
+    expect(errMsg).toMatch(/Invalid indices: 1/)
+
+    // Ensure no embeddings were created (all-or-nothing validation)
+    const embeddingsCount = await payload.count({ collection: 'default' })
+    expect(embeddingsCount.totalDocs).toBe(0)
+
+    // Ensure no batches were created (validation happens before batching)
+    const batchesCount = await payload.count({ collection: BULK_EMBEDDINGS_BATCHES_SLUG })
+    expect(batchesCount.totalDocs).toBe(0)
+  })
+})
diff --git a/src/tasks/bulkEmbedAll.ts b/src/tasks/bulkEmbedAll.ts
index 369ea93..973371c 100644
--- a/src/tasks/bulkEmbedAll.ts
+++ b/src/tasks/bulkEmbedAll.ts
@@ -1,4 +1,11 @@
-import { Payload, TaskConfig, TaskHandlerResult } from 'payload'
+import {
+  JsonObject,
+  PaginatedDocs,
+  Payload,
+  TaskConfig,
+  TaskHandlerResult,
+  TypeWithID,
+} from 'payload'
 import {
   BatchSubmission,
   BulkEmbeddingOutput,
@@ -97,7 +104,7 @@ export const createPrepareBulkEmbeddingTask = ({
         throw new Error('[payloadcms-vectorize] bulk embed runId is required')
       }
       const payload = req.payload
-      const { run, poolName, dynamicConfig } = await loadRunAndConfig({
+      const { poolName, dynamicConfig } = await loadRunAndConfig({
         payload,
         runId: input.runId,
         knowledgePools,
@@ -126,17 +133,34 @@ export const createPrepareBulkEmbeddingTask = ({
       const versionMismatch = baselineVersion !== undefined && baselineVersion !== embeddingVersion
 
       // Stream missing embeddings and create batches
-      const result = await streamAndBatchMissingEmbeddings({
-        payload,
-        runId: input.runId,
-        poolName,
-        dynamicConfig,
-        embeddingVersion,
-        lastBulkCompletedAt,
-        versionMismatch,
-        hasBaseline: Boolean(baselineRun),
-        addChunk: callbacks.addChunk,
-      })
+      let result
+      try {
+        result = await streamAndBatchMissingEmbeddings({
+          payload,
+          runId: input.runId,
+          poolName,
+          dynamicConfig,
+          embeddingVersion,
+          lastBulkCompletedAt,
+          versionMismatch,
+          hasBaseline: Boolean(baselineRun),
+          addChunk: callbacks.addChunk,
+        })
+      } catch (error) {
+        // Ingestion failed (e.g., validation error) - mark run as failed
+        const errorMessage = (error as Error).message || String(error)
+        await payload.update({
+          id: input.runId,
+          collection: BULK_EMBEDDINGS_RUNS_SLUG,
+          data: {
+            status: 'failed',
+            error: errorMessage,
+            completedAt: new Date().toISOString(),
+          },
+        })
+        // Re-throw so Payload's job system marks the job as failed
+        throw error
+      }
 
       if (result.totalInputs === 0) {
         // No inputs to process - mark run as succeeded
@@ -465,9 +489,8 @@ export const createPollOrCompleteBulkEmbeddingTask = ({
  * Stream through missing embeddings, calling addChunk for each.
  * User controls batching via addChunk return value.
  *
- * Uses a two-pass approach:
- * 1. First pass: count total chunks to know when we reach the last one
- * 2. Second pass: stream chunks without holding all in memory
+ * Single-pass approach using async generator to yield chunks sequentially.
+ * This avoids the need for a pre-counting pass while correctly determining isLastChunk.
  */
 async function streamAndBatchMissingEmbeddings(args: {
   payload: Payload
@@ -499,170 +522,192 @@ async function streamAndBatchMissingEmbeddings(args: {
   const lastCompletedAtDate = lastBulkCompletedAt ? new Date(lastBulkCompletedAt) : undefined
   const collectionSlugs = Object.keys(dynamicConfig.collections)
 
-  // First pass: count total chunks to know the last one
-  // We store minimal info (docId + chunkCount) to avoid OOM
-  type DocChunkInfo = { collectionSlug: string; docId: string; chunkCount: number }
-  const docsToProcess: DocChunkInfo[] = []
-  let totalChunkCount = 0
-
-  for (const collectionSlug of collectionSlugs) {
-    const collectionConfig = dynamicConfig.collections[collectionSlug]
-    if (!collectionConfig) continue
-
-    const toKnowledgePool = collectionConfig.toKnowledgePool
-    let page = 1
-    const limit = 50
+  // Async generator that yields chunks one at a time
+  async function* generateChunks(): AsyncGenerator<CollectedEmbeddingInput, void, unknown> {
+    for (const collectionSlug of collectionSlugs) {
+      const collectionConfig = dynamicConfig.collections[collectionSlug]
+      if (!collectionConfig) continue
+
+      const toKnowledgePool = collectionConfig.toKnowledgePool
+      const limit = 50
+
+      // Build where clause: filter by updatedAt if we have lastBulkCompletedAt and !includeAll
+      const where = includeAll
+        ? undefined
+        : lastCompletedAtDate
+          ? {
+              updatedAt: {
+                greater_than: lastCompletedAtDate.toISOString(),
+              },
+            }
+          : undefined
 
-    while (true) {
-      const res = await payload.find({
+      let res: PaginatedDocs<JsonObject & TypeWithID> | undefined = await payload.find({
         collection: collectionSlug,
-        page,
+        where,
         limit,
       })
-      const docs = (res as any)?.docs || []
-      if (!docs.length) break
-      const totalPages = (res as any)?.totalPages ?? page
-
-      for (const doc of docs) {
-        const docUpdatedAt = doc?.updatedAt ? new Date(doc.updatedAt) : undefined
-        let shouldInclude = includeAll
-        if (!shouldInclude) {
-          const updatedAfter =
-            docUpdatedAt && lastCompletedAtDate ? docUpdatedAt > lastCompletedAtDate : false
-          const hasCurrentEmbedding = await docHasEmbeddingVersion({
-            payload,
-            poolName,
-            sourceCollection: collectionSlug,
-            docId: String(doc.id),
-            embeddingVersion,
-          })
-          shouldInclude = updatedAfter || !hasCurrentEmbedding
-        }
-        if (!shouldInclude) continue
-
-        const chunkData = await toKnowledgePool(doc, payload)
-        const validChunkCount = chunkData.filter((c) => c?.chunk).length
-        if (validChunkCount > 0) {
-          docsToProcess.push({
-            collectionSlug,
-            docId: String(doc.id),
-            chunkCount: validChunkCount,
-          })
-          totalChunkCount += validChunkCount
-        }
-      }
+      do {
+        const docs = res?.docs || []
+        if (!docs.length) break
+
+        for (const doc of docs) {
+          // If !includeAll, we still need to check if document has current embedding
+          // (can't filter this in the where clause since it's a cross-collection check)
+          if (!includeAll && !lastCompletedAtDate) {
+            const hasCurrentEmbedding = await docHasEmbeddingVersion({
+              payload,
+              poolName,
+              sourceCollection: collectionSlug,
+              docId: String(doc.id),
+              embeddingVersion,
+            })
+            if (hasCurrentEmbedding) continue
+          }
 
-      page++
-      if (page > totalPages) break
-    }
-  }
+          const chunkData = await toKnowledgePool(doc, payload)
+
+          // Validate chunks (same validation as real-time ingestion)
+          const invalidEntries = chunkData
+            .map((entry, idx) => {
+              if (!entry || typeof entry !== 'object') return idx
+              if (typeof entry.chunk !== 'string') return idx
+              return null
+            })
+            .filter((idx): idx is number => idx !== null)
+
+          if (invalidEntries.length > 0) {
+            throw new Error(
+              `[payloadcms-vectorize] toKnowledgePool returned ${invalidEntries.length} invalid entr${
+                invalidEntries.length === 1 ? 'y' : 'ies'
+              } for document ${doc.id} in collection "${collectionSlug}". Each entry must be an object with a "chunk" string. Invalid indices: ${invalidEntries.join(
+                ', ',
+              )}`,
+            )
+          }
 
-  // If no chunks, return early
-  if (totalChunkCount === 0) {
-    return { batchCount: 0, totalInputs: 0 }
+          // Yield valid chunks
+          for (let idx = 0; idx < chunkData.length; idx++) {
+            const chunkEntry = chunkData[idx]
+            const { chunk, ...extensionFields } = chunkEntry
+
+            yield {
+              id: `${collectionSlug}:${doc.id}:${idx}`,
+              text: chunk,
+              metadata: {
+                sourceCollection: collectionSlug,
+                docId: String(doc.id),
+                chunkIndex: idx,
+                embeddingVersion,
+                extensionFields,
+              },
+            }
+          }
+        }
+      } while (
+        (res = res.nextPage
+          ? await payload.find({
+              collection: collectionSlug,
+              where,
+              limit,
+              page: res.nextPage,
+            })
+          : undefined)
+      )
+    }
   }
 
-  // Second pass: stream chunks without holding all in memory
+  // Process chunks from generator
   let batchIndex = 0
   let totalInputs = 0
-  let processedChunkCount = 0
   const pendingChunks: CollectedEmbeddingInput[] = []
-
-  for (const docInfo of docsToProcess) {
-    const collectionConfig = dynamicConfig.collections[docInfo.collectionSlug]
-    if (!collectionConfig) continue
-
-    // Re-fetch the document to get its data
-    const doc = await payload.findByID({
-      collection: docInfo.collectionSlug as any,
-      id: docInfo.docId,
-    })
-    if (!doc) continue
-
-    const toKnowledgePool = collectionConfig.toKnowledgePool
-    const chunkData = await toKnowledgePool(doc, payload)
-
-    for (let idx = 0; idx < chunkData.length; idx++) {
-      const chunkEntry = chunkData[idx]
-      if (!chunkEntry?.chunk) continue
-
-      processedChunkCount++
-      const isLastChunk = processedChunkCount === totalChunkCount
-
-      const { chunk, ...extensionFields } = chunkEntry
-      const collectedChunk: CollectedEmbeddingInput = {
-        id: `${docInfo.collectionSlug}:${doc.id}:${idx}`,
-        text: chunk,
-        metadata: {
-          sourceCollection: docInfo.collectionSlug,
-          docId: String(doc.id),
-          chunkIndex: idx,
-          embeddingVersion,
-          extensionFields,
+  const chunkIterator = generateChunks()
+  const runIdNum = parseInt(runId, 10)
+  let currentBatchId: number | undefined = undefined
+
+  async function processChunk(
+    chunk: CollectedEmbeddingInput,
+    isLastChunk: boolean = false,
+  ): Promise<void> {
+    // Add to pending queue BEFORE calling addChunk
+    pendingChunks.push(chunk)
+
+    // If this is the first chunk in a new batch, create a placeholder batch record
+    if (pendingChunks.length === 1) {
+      // Starting a new batch - create placeholder batch record
+      const placeholderBatch = await payload.create({
+        collection: BULK_EMBEDDINGS_BATCHES_SLUG,
+        data: {
+          run: runIdNum,
+          batchIndex,
+          providerBatchId: `placeholder-${runId}-${batchIndex}`, // Temporary, will be updated
+          status: 'queued',
+          inputCount: 0, // Will be updated after submission
+          submittedAt: new Date().toISOString(),
         },
-      }
+      })
+      currentBatchId = (placeholderBatch as any).id
+    }
 
-      // Add to pending queue BEFORE calling addChunk
-      pendingChunks.push(collectedChunk)
+    if (!currentBatchId) {
+      throw new Error(
+        `[payloadcms-vectorize] Failed to get batch ID for chunk ${chunk.id} in run ${runId}`,
+      )
+    }
 
-      const submission = await addChunk({
-        chunk: { id: collectedChunk.id, text: collectedChunk.text },
-        isLastChunk,
-      })
+    // Save metadata with the batch ID
+    await payload.create({
+      collection: BULK_EMBEDDINGS_INPUT_METADATA_SLUG,
+      data: {
+        run: runIdNum,
+        batch: currentBatchId,
+        inputId: chunk.id,
+        text: chunk.text,
+        sourceCollection: chunk.metadata.sourceCollection,
+        docId: chunk.metadata.docId,
+        chunkIndex: chunk.metadata.chunkIndex,
+        embeddingVersion: chunk.metadata.embeddingVersion,
+        extensionFields: chunk.metadata.extensionFields,
+      },
+    })
 
-      if (submission) {
-        // User submitted a batch
-        // - If isLastChunk: all pending chunks were submitted
-        // - If not isLastChunk: all except current were submitted (current starts fresh)
-        let submittedChunks: CollectedEmbeddingInput[]
-        if (isLastChunk) {
-          submittedChunks = pendingChunks.splice(0)
-        } else {
-          submittedChunks = pendingChunks.splice(0, pendingChunks.length - 1)
-        }
+    const submission = await addChunk({
+      chunk: { id: chunk.id, text: chunk.text },
+      isLastChunk,
+    })
 
-        // Convert runId to number for postgres relationships
-        const runIdNum = parseInt(runId, 10)
+    if (submission) {
+      // When addChunk returns a submission, all chunks in pendingChunks were submitted
+      // (the provider controls which chunks get submitted)
+      const submittedChunks = pendingChunks.splice(0)
+      const inputCount = submittedChunks.length
 
-        // Create batch record first so we have the batch ID for metadata
-        const batchRecord = await payload.create({
-          collection: BULK_EMBEDDINGS_BATCHES_SLUG,
-          data: {
-            run: runIdNum,
-            batchIndex,
-            providerBatchId: submission.providerBatchId,
-            status: 'queued',
-            inputCount: submittedChunks.length,
-            submittedAt: new Date().toISOString(),
-          },
-        })
+      // Update the batch record with the real providerBatchId and inputCount
+      await payload.update({
+        id: currentBatchId,
+        collection: BULK_EMBEDDINGS_BATCHES_SLUG,
+        data: {
+          providerBatchId: submission.providerBatchId,
+          inputCount,
+        },
+      })
 
-        const batchId = (batchRecord as any).id
-
-        // Store metadata for submitted chunks with batch reference
-        await Promise.all(
-          submittedChunks.map((c) =>
-            payload.create({
-              collection: BULK_EMBEDDINGS_INPUT_METADATA_SLUG,
-              data: {
-                run: runIdNum,
-                batch: batchId,
-                inputId: c.id,
-                text: c.text,
-                sourceCollection: c.metadata.sourceCollection,
-                docId: c.metadata.docId,
-                chunkIndex: c.metadata.chunkIndex,
-                embeddingVersion: c.metadata.embeddingVersion,
-                extensionFields: c.metadata.extensionFields,
-              },
-            }),
-          ),
-        )
+      totalInputs += inputCount
+      batchIndex++
+      currentBatchId = undefined // Reset for next batch
+    }
+  }
 
-        totalInputs += submittedChunks.length
-        batchIndex++
-      }
+  // Process chunks from generator
+  let prevChunk: CollectedEmbeddingInput | undefined = undefined
+  for await (const currentChunk of chunkIterator) {
+    if (prevChunk) {
+      await processChunk(prevChunk)
     }
+    prevChunk = currentChunk
+  }
+  if (prevChunk) {
+    await processChunk(prevChunk, true)
   }
 
   return { batchCount: batchIndex, totalInputs }
diff --git a/src/types.ts b/src/types.ts
index 0a6cd4c..f211516 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -221,18 +221,24 @@ export type OnBulkErrorArgs = {
  */
 export type BulkEmbeddingsFns = {
   /**
-   * Called for each chunk. User accumulates internally based on file size logic.
+   * Called for each chunk. User accumulates internally based on file size/line limits.
    * - Return null to keep accumulating
    * - Return BatchSubmission when ready to submit a batch
    *
-   * **Important contract about which chunks are included:**
-   * - When `isLastChunk=false` and you return a submission: all pending chunks EXCEPT the current one were submitted
-   * - When `isLastChunk=true` and you return a submission: all pending chunks INCLUDING the current one were submitted
+   * **Important contract:**
+   * When you return a submission, all chunks that you've accumulated (and decided to submit)
+   * are considered submitted. The plugin tracks chunks in `pendingChunks` and assumes all
+   * of them were submitted when you return a BatchSubmission.
    *
-   * Example flow when chunk would exceed file limit:
-   * 1. Check if adding current chunk would exceed your provider's file size limit
-   * 2. If yes: submit currently accumulated chunks (without this chunk), start fresh with this chunk
-   * 3. Return the BatchSubmission
+   * **About `isLastChunk`:**
+   * - `isLastChunk=true` indicates this is the final chunk in the run
+   * - Use this to flush any remaining accumulated chunks before the run completes
+   * - The plugin uses this only to know when to stop iterating, not to determine which chunks were submitted
+   *
+   * **Example flow when chunk would exceed limit:**
+   * 1. Check if adding current chunk == limit or if isLastChunk is true
+   * 2. If yes: submit accumulated chunks and return the BatchSubmission
+   * 3. Start fresh in the next call
    */
   addChunk: (args: AddChunkArgs) => Promise<BatchSubmission | null>
 

From 2d15b68fab3b1a3b5947d9ba7c425e818cdfc14f Mon Sep 17 00:00:00 2001
From: techiejd <62455039+techiejd@users.noreply.github.com>
Date: Fri, 16 Jan 2026 01:18:50 +0700
Subject: [PATCH 43/49] new Readme

---
 CHANGELOG.md |  20 ++--
 README.md    | 329 ++++++++++++++++++++++++++++++++++++++++-----------
 2 files changed, 270 insertions(+), 79 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 15f2d73..72a4fbb 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,20 +4,22 @@ All notable changes to this project will be documented in this file.
 
 ## 0.5.0 - 2026-01-15
 
-### Breaking Changes
-
-- **`queueName` renamed to `realtimeQueueName`**: The plugin option `queueName` has been renamed to `realtimeQueueName` to clarify that it only affects realtime vectorization jobs.
-- **`bulkQueueName` changed to `bulkQueueNames`**: The plugin option `bulkQueueName` has been replaced with `bulkQueueNames` object containing `prepareBulkEmbedQueueName` and `pollOrCompleteQueueName` for separate queue isolation of bulk preparation vs polling workloads.
-- **`isVectorizedPayload` replaced with `getVectorizedPayload`**: The type guard `isVectorizedPayload(payload)` has been replaced with `getVectorizedPayload(payload)` which returns the vectorized payload object directly (or `null` if not available). This fixes a bug where methods are missing because onInit was not called.
-
 ### New Features
 
-- **`bulkQueueNames` option**: New plugin option to isolate bulk embedding workloads across separate queues for preparation and polling. Required when any knowledge pool uses bulk ingest mode (`bulkEmbeddings.ingestMode === 'bulk'`).
+- **Bulk Embedding**: That's right! You can now embed in bulk. Very usseful to save money.
+- **`bulkQueueNames` option**: New plugin option to isolate bulk embedding workloads across separate queues for preparation and polling. Required when any knowledge pool uses bulk embeddings.
 - **Non-blocking bulk polling**: Bulk jobs now use separate, short-lived tasks that can safely handle long-running providers (hours/days) without blocking worker processes.
-- **Improved admin UX**: The "Embed all" button now:
+- **Improved admin UX**: The "Embed all" button now exists:
+  - Can be used to trigger an 'embed all' bulk embedding
   - Disables when bulk embeddings are not configured for a pool
   - Links to the latest bulk run for easy status tracking
-- **Enhanced bulk provider support**: Added real Voyage AI Batch API integration in dev environment, demonstrating production-ready bulk embedding with file uploads and async polling.
+- **Showed Voyage AI example**: Added real Voyage AI Batch API integration in helpers/embed, demonstrating production-ready bulk embedding with file uploads and async polling.
+
+### Breaking Changes
+
+- **`queueName` renamed to `realtimeQueueName`**: The plugin option `queueName` has been renamed to `realtimeQueueName` to clarify that it only affects realtime vectorization jobs.
+- **`bulkQueueName` changed to `bulkQueueNames`**: The plugin option `bulkQueueName` has been replaced with `bulkQueueNames` object containing `prepareBulkEmbedQueueName` and `pollOrCompleteQueueName` for separate queue isolation of bulk preparation vs polling workloads.
+- **`isVectorizedPayload` replaced with `getVectorizedPayload`**: The type guard `isVectorizedPayload(payload)` has been replaced with `getVectorizedPayload(payload)` which returns the vectorized payload object directly (or `null` if not available). This fixes a bug where methods are missing because onInit was not called.
 
 ### Tests & Reliability
 
diff --git a/README.md b/README.md
index 2ededdd..a335339 100644
--- a/README.md
+++ b/README.md
@@ -5,8 +5,8 @@ A Payload CMS plugin that adds vector search capabilities to your collections us
 ## Features
 
 - 🔍 **Semantic Search**: Vectorize any collection for intelligent content discovery
-- 🚀 **Automatic**: Documents are automatically vectorized when created or updated, and vectors are deleted as soon as the document is deleted.
-- 🧵 **Bulk embedding**: Run “Embed all” batches that backfill only documents missing the current `embeddingVersion`.
+- 🚀 **Realtime**: Documents are automatically vectorized when created or updated in realtime, and vectors are deleted as soon as the document is deleted.
+- 🧵 **Bulk embedding**: Run “Embed all” batches that backfill only documents missing the current `embeddingVersion` since the last bulk run in order to save money.
 - 📊 **PostgreSQL Integration**: Built on pgvector for high-performance vector operations
 - ⚡ **Background Processing**: Uses Payload's job system for non-blocking vectorization
 - 🎯 **Flexible Chunking**: Drive chunk creation yourself with `toKnowledgePool` functions so you can combine any fields or content types
@@ -20,12 +20,6 @@ A Payload CMS plugin that adds vector search capabilities to your collections us
 - PostgreSQL with pgvector extension
 - Node.js 18+
 
-**Note for Payload 3.54.0+:** When initializing Payload with `getPayload`, you must include `cron: true` if you want the cron jobs to run correctly:
-
-```typescript
-payload = await getPayload({ config, cron: true })
-```
-
 ## Installation
 
 ```bash
@@ -36,7 +30,7 @@ pnpm add payloadcms-vectorize
 
 ### 0. Have pgvector permissions
 
-The plugin expects `vector` extension to be configured when Payload initializes. Your PostgreSQL database user must have permission to create extensions. If your user doesn't have these permissions, someone with permissions may need to manually create the extension once:
+The plugin expects `vector` extension to be configured (`db: postgresAdapter({extensions: ['vector'],...})`) when Payload initializes. Your PostgreSQL database user must have permission to create extensions. If your user doesn't have these permissions, someone with permissions may need to manually create the extension once:
 
 ```sql
 CREATE EXTENSION IF NOT EXISTS vector;
@@ -101,7 +95,7 @@ const postsToKnowledgePool: ToKnowledgePoolFn = async (doc, payload) => {
 // Create the integration with static configs (dims, ivfflatLists)
 const { afterSchemaInitHook, payloadcmsVectorize } = createVectorizeIntegration({
   // Note limitation: Changing these values requires a migration.
-  main: {
+  mainKnowledgePool: {
     dims: 1536, // Vector dimensions
     ivfflatLists: 100, // IVFFLAT index parameter
   },
@@ -119,7 +113,7 @@ export default buildConfig({
   plugins: [
     payloadcmsVectorize({
       knowledgePools: {
-        main: {
+        mainKnowledgePool: {
           collections: {
             posts: {
               toKnowledgePool: postsToKnowledgePool,
@@ -147,18 +141,15 @@ export default buildConfig({
       // },
     }),
   ],
+  jobs: { // Remember to setup your cron for the embedding
+    autoRun: [
+      ...
+    ],
+  },
 })
 ```
 
-**Important:** `knowledgePools` must have **different names than your collections**—reusing a collection name for a knowledge pool **will cause schema conflicts**. (In this example, the knowledge pool is named 'main' and a collection named 'main' will be created.)
-
-### 1.5. Generate Import Map (If Needed)
-
-Payload automatically generates the import map on startup and during development (HMR), so you typically don't need to run this manually. However, if client components (like the "Embed all" button) don't appear in the admin UI, you may need to manually generate the import map:
-
-```bash
-pnpm run generate:importmap
-```
+**Important:** `knowledgePools` must have **different names than your collections**—reusing a collection name for a knowledge pool **will cause schema conflicts**. (In this example, the knowledge pool is named 'mainKnowledgePool' and a collection named 'main-knowledge-pool' will be created.)
 
 **⚠️ Important:** Run this command:
 
@@ -209,12 +200,6 @@ if (vectorizedPayload) {
     limit: 5,
   })
   // results is an array of VectorSearchResult
-
-  // Manually queue an embedding job
-  await vectorizedPayload.queueEmbed({
-    collection: 'posts',
-    docId: 'some-post-id',
-  })
 }
 ```
 
@@ -261,9 +246,9 @@ If neither is provided, embedding is disabled for that pool.
 
 The bulk embedding API is designed for large-scale embedding using provider batch APIs (like Voyage AI). **Bulk runs are never auto-queued** - they must be triggered manually via the admin UI or API.
 
-#### The Streaming Model
+#### The bulk embedding callbacks
 
-The plugin streams chunks to your callbacks one at a time, giving you full control over batching based on your provider's file size limits:
+In order to get bulk embeddings to interface with your provider, you must define the following three callbacks per knowledge pool (the functions do not have to be unique so you can re-use across knowledge pools).
 
 ```typescript
 type BulkEmbeddingsFns = {
@@ -275,7 +260,7 @@ type BulkEmbeddingsFns = {
 
 #### `addChunk` - Accumulate and Submit
 
-Called for each chunk. You manage your own accumulation and decide when to submit based on file size.
+The plugin streams chunks to your callbacks one at a time; the callback is called for each chunk. You manage your own accumulation and decide when to submit based on file size.
 
 ```typescript
 type AddChunkArgs = {
@@ -301,13 +286,12 @@ type BatchSubmission = {
 
 **⚠️ Important contract:**
 
-When you return a submission, the plugin assumes **all chunks currently in `pendingChunks` were submitted**. The plugin tracks chunks and creates batch records based on this assumption. You control which chunks get submitted by managing your own accumulation logic.
+When you return a submission, the plugin assumes **all chunks currently in `pendingChunks` were submitted**. The plugin tracks chunks and creates batch records based on this assumption.
 
 **About `isLastChunk`:**
 
 - `isLastChunk=true` indicates this is the final chunk in the run
 - Use this to flush any remaining accumulated chunks before the run completes
-- The plugin uses this only to know when to stop iterating, not to determine which chunks were submitted
 
 **Example implementation:**
 
@@ -345,7 +329,7 @@ Called repeatedly until the batch reaches a terminal status. When the batch comp
 
 ```typescript
 type PollOrCompleteBatchArgs = {
-  providerBatchId: string
+  providerBatchId: string // You provided it in the earlier step when you submitted a batch.
   onChunk: (chunk: BulkEmbeddingOutput) => Promise<void>
 }
 
@@ -412,14 +396,14 @@ The plugin uses separate Payload jobs for reliability with long-running provider
 
 ### Queue Configuration
 
-For production deployments with bulk embedding:
+For bulk embedding, you must provide the bulk queue names.
 
 ```typescript
 plugins: [
   payloadcmsVectorize({
     knowledgePools: { /* ... */ },
-    realtimeQueueName: 'vectorize-realtime',
-    bulkQueueNames: {
+    realtimeQueueName: 'vectorize-realtime', // optional
+    bulkQueueNames: { // required iff you are using bulk embeddings
       prepareBulkEmbedQueueName: 'vectorize-bulk-prepare',
       pollOrCompleteQueueName: 'vectorize-bulk-poll',
     },
@@ -427,7 +411,7 @@ plugins: [
 ]
 
 jobs: {
-  autoRun: [
+  autoRun: [ // Must match
     { cron: '*/5 * * * * *', limit: 10, queue: 'vectorize-realtime' },
     { cron: '0 0 * * * *', limit: 1, queue: 'vectorize-bulk-prepare' },
     { cron: '*/30 * * * * *', limit: 5, queue: 'vectorize-bulk-poll' },
@@ -435,6 +419,103 @@ jobs: {
 }
 ```
 
+### Endpoints
+
+#### POST `/api/vector-bulk-embed`
+
+Starts a bulk embedding run for a knowledge pool via HTTP. This is the REST API equivalent of `vectorizedPayload.bulkEmbed()`.
+
+**Request Body:**
+
+```json
+{
+  "knowledgePool": "default"
+}
+```
+
+**Success Response** (202 Accepted):
+
+```json
+{
+  "runId": "123",
+  "status": "queued"
+}
+```
+
+**Conflict Response** (409 Conflict) - when a run is already active:
+
+```json
+{
+  "runId": "456",
+  "status": "running",
+  "message": "A bulk embedding run is already running for this knowledge pool. Wait for it to complete or cancel it first.",
+  "conflict": true
+}
+```
+
+**Error Responses:**
+
+- `400 Bad Request`: Missing or invalid `knowledgePool` parameter
+- `500 Internal Server Error`: Server error during processing
+
+**Example:**
+
+```bash
+curl -X POST http://localhost:3000/api/vector-bulk-embed \
+  -H "Content-Type: application/json" \
+  -d '{"knowledgePool": "default"}'
+```
+
+#### POST `/api/vector-retry-failed-batch`
+
+Retries a failed batch from a bulk embedding run via HTTP. This is the REST API equivalent of `vectorizedPayload.retryFailedBatch()`.
+
+**Request Body:**
+
+```json
+{
+  "batchId": "123"
+}
+```
+
+**Success Response** (202 Accepted):
+
+```json
+{
+  "batchId": "123",
+  "newBatchId": "456",
+  "runId": "789",
+  "status": "queued"
+}
+```
+
+**Already Retried Response** (202 Accepted) - when batch was already retried:
+
+```json
+{
+  "batchId": "123",
+  "newBatchId": "456",
+  "runId": "789",
+  "status": "queued",
+  "message": "Batch was already retried. Returning the retry batch."
+}
+```
+
+**Error Responses:**
+
+- `400 Bad Request`: Missing or invalid `batchId` parameter, or batch is not in a retriable state
+- `404 Not Found`: Batch not found
+- `409 Conflict`: Cannot retry while parent run is still active
+- `500 Internal Server Error`: Server error during processing
+
+**Example:**
+
+```bash
+curl -X POST http://localhost:3000/api/vector-retry-failed-batch \
+  -H "Content-Type: application/json" \
+  -d '{"batchId": "123"}'
+```
+
 #### CollectionVectorizeOption
 
 - `toKnowledgePool (doc, payload)` – return an array of `{ chunk, ...extensionFieldValues }`. Each object becomes one embedding row and the index in the array determines `chunkIndex`.
@@ -501,6 +582,8 @@ export const embedQuery = async (text: string): Promise<number[]> => {
 }
 ```
 
+You can see more examples in `dev/helpers/embed.ts`
+
 ## API Reference
 
 ### Search Endpoint
@@ -553,10 +636,11 @@ Search for similar content using vector similarity.
 ### Bulk Embedding (Embed All)
 
 - Each knowledge pool's embeddings list shows an **Embed all** admin button that triggers a bulk run.
-- **Note:** Make sure you've run `pnpm run generate:importmap` after plugin configuration, otherwise the button won't appear.
-- Bulk runs only include documents missing embeddings for the pool's current `embeddingConfig.version`.
-- Progress is recorded in `vector-bulk-embeddings-runs` and `vector-bulk-embeddings-batches` collections.
-- Endpoint: **POST** `/api/vector-bulk-embed`
+- **Note:** Payload automatically generates the import map on startup and during development (HMR), so you typically don't need to run this manually. However, if client components (like the "Embed all" button) don't appear in the admin UI, you may need to manually generate the import map: `pnpm run generate:importmap`.
+- Bulk runs only include documents with mismatched embedding versions for the pool's current `embeddingConfig.version` from the previous bulk run (unless none has been done in which case it embeds all).
+- Progress is recorded in `vector-bulk-embeddings-runs` and `vector-bulk-embeddings-batches` admin UI collections.
+- You can re-run failed bulk embeddings from `vector-bulk-embeddings-batches` admin UI and you can link to the failed batches from the `vector-bulk-embeddings-runs` admin UI.
+- Endpoints: **POST** `/api/vector-bulk-embed` and `/api/vector-retry-failed-batch`
 
 ```jsonc
 {
@@ -564,9 +648,10 @@ Search for similar content using vector similarity.
 }
 ```
 
-The bulk embedding process has **two levels of atomicity**:
+The bulk embedding process has **three levels of failure**:
 
-- **Batch level**: If any batch fails during polling, the entire run fails and no embeddings are written. This is fully atomic.
+- **Run level**: If any chunk fails during ingestion (toKnowledgePool), the entire run fails and no embeddings are written. This is fully atomic. Your onError is expected to handle clean up from this stage.
+- **Batch level**: If any batch fails during polling, the entire run is marked as failed but embeddings from working batches are written.
 - **Chunk level**: If individual chunks fail during completion (e.g., provider returns errors for specific inputs), the run still succeeds and successful embeddings are written. Failed chunks are tracked in `failedChunkData` (with structured `collection`, `documentId`, and `chunkIndex` fields) and passed to the `onError` callback for cleanup.
 
 This design allows for partial success: if 100 chunks are processed and 2 fail, 98 embeddings are written and the 2 failures are tracked for potential retry.
@@ -577,9 +662,37 @@ If `bulkEmbeddingsFns` is not provided, the "Embed all" button is disabled.
 
 ### Local API
 
-The plugin extends the Payload instance with `search` and `queueEmbed` methods.
+The plugin provides a `getVectorizedPayload(payload)` function which returns a 'vectorizedPayload' (an object) with `search`, `queueEmbed`, `bulkEmbed` and `retryFailedBatch` methods.
+
+#### Getting the Vectorized Payload Object
+
+Use the `getVectorizedPayload` function to get the vectorized payload object with all vectorize methods:
+
+```typescript
+import { getVectorizedPayload } from 'payloadcms-vectorize'
 
-#### `payload.search(params)`
+const payload = await getPayload({ config, cron: true })
+const vectorizedPayload = getVectorizedPayload(payload)
+
+if (vectorizedPayload) {
+  // Use all vectorize methods
+  const results = await vectorizedPayload.search({
+    query: 'search query',
+    knowledgePool: 'main',
+  })
+
+  await vectorizedPayload.queueEmbed({
+    collection: 'posts',
+    docId: 'some-id',
+  })
+
+  await vectorizedPayload.bulkEmbed({
+    knowledgePool: 'main',
+  })
+}
+```
+
+#### `vectorizedPayload.search(params)`
 
 Perform vector search programmatically without making an HTTP request.
 
@@ -612,7 +725,7 @@ if (vectorizedPayload) {
 }
 ```
 
-#### `payload.queueEmbed(params)`
+#### `vectorizedPayload.queueEmbed(params)`
 
 Manually queue a vectorization job for a document.
 
@@ -659,43 +772,120 @@ if (vectorizedPayload) {
 }
 ```
 
-#### Getting the Vectorized Payload Object
+#### `vectorizedPayload.bulkEmbed(params)`
 
-Use the `getVectorizedPayload` function to get the vectorized payload object with all vectorize methods:
+Starts a bulk embedding run for a knowledge pool. This method queues a background job that will process all documents in the knowledge pool's collections, chunk them, and submit them to your embedding provider via the `bulkEmbeddingsFns.addChunk` callback.
+
+**Parameters:**
+
+- `params.knowledgePool` (required): The name of the knowledge pool to embed
+
+**Returns:** `Promise<BulkEmbedResult>`
+
+**Success Response:**
 
 ```typescript
-import { getVectorizedPayload } from 'payloadcms-vectorize'
+{
+  runId: string // ID of the created bulk embedding run
+  status: 'queued' // Initial status of the run
+}
+```
 
-const payload = await getPayload({ config, cron: true })
-const vectorizedPayload = getVectorizedPayload(payload)
+**Conflict Response** (if a run is already active):
 
-if (vectorizedPayload) {
-  // Use all vectorize methods
-  const results = await vectorizedPayload.search({
-    query: 'search query',
-    knowledgePool: 'main',
-  })
+```typescript
+{
+  runId: string // ID of the existing active run
+  status: 'queued' | 'running' // Status of the existing run
+  message: string // Explanation of why a new run wasn't started
+  conflict: true // Indicates a conflict occurred
+}
+```
 
-  await vectorizedPayload.queueEmbed({
-    collection: 'posts',
-    docId: 'some-id',
-  })
+**Example:**
 
-  await vectorizedPayload.bulkEmbed({
-    knowledgePool: 'main',
-  })
+```typescript
+const result = await vectorizedPayload.bulkEmbed({ knowledgePool: 'default' })
+if ('conflict' in result && result.conflict) {
+  console.log('A run is already active:', result.message)
+} else {
+  console.log('Bulk embed started with run ID:', result.runId)
 }
 ```
 
-## Changelog
+**Notes:**
 
-See [CHANGELOG.md](./CHANGELOG.md) for release history, migration notes, and upgrade guides.
+- Only one bulk embedding run can be active per knowledge pool at a time
+- The run will process documents that need embedding (those with mismatched `embeddingVersion` or new documents since the last successful run)
+- Progress can be tracked via the `vector-bulk-embeddings-runs` and `vector-bulk-embeddings-batches` collections in the admin UI
+- The run status will progress: `queued` → `running` → `succeeded` or `failed`
 
-## Requirements
+#### `vectorizedPayload.retryFailedBatch(params)`
 
-- Payload CMS >=3.0.0 <4.0.0 (tested on 3.69.0, previously tested on 3.37.0)
-- PostgreSQL with pgvector extension
-- Node.js ^18.20.2
+Retries a failed batch from a bulk embedding run. This method reconstructs the chunks from the batch's metadata, resubmits them to your embedding provider, and creates a new batch record. The original batch is marked as `retried` and linked to the new batch.
+
+**Parameters:**
+
+- `params.batchId` (required): The ID of the failed batch to retry
+
+**Returns:** `Promise<RetryFailedBatchResult>`
+
+**Success Response:**
+
+```typescript
+{
+  batchId: string        // ID of the batch being retried
+  newBatchId: string     // ID of the newly created batch
+  runId: string          // ID of the parent run
+  status: 'queued'       // Status of the new batch
+  message?: string       // Optional confirmation message
+}
+```
+
+**Already Retried Response** (if batch was already retried):
+
+```typescript
+{
+  batchId: string // ID of the original batch
+  newBatchId: string // ID of the existing retry batch
+  runId: string // ID of the parent run
+  status: 'queued' // Status of the retry batch
+  message: string // Message indicating batch was already retried
+}
+```
+
+**Error Response:**
+
+```typescript
+{
+  error: string          // Error message
+  conflict?: true        // Present if error is due to a conflict (e.g., run still active)
+}
+```
+
+**Example:**
+
+```typescript
+const result = await vectorizedPayload.retryFailedBatch({ batchId: '123' })
+if ('error' in result) {
+  console.error('Failed to retry batch:', result.error)
+} else {
+  console.log(`Batch ${result.batchId} retried. New batch ID: ${result.newBatchId}`)
+}
+```
+
+**Notes:**
+
+- Only batches with `failed` or `retried` status can be retried
+- The parent run must be in a terminal state (`succeeded` or `failed`) - cannot retry while run is `queued` or `running`
+- If the parent run was `succeeded` or `failed`, it will be reset to `running` status
+- The original batch is marked as `retried` and linked to the new batch via the `retriedBatch` field
+- Chunks are reconstructed from the batch's metadata, so metadata must still exist for the retry to work
+- If a batch was already retried, calling this method again returns the existing retry batch instead of creating a duplicate
+
+## Changelog
+
+See [CHANGELOG.md](./CHANGELOG.md) for release history, migration notes, and upgrade guides.
 
 ## License
 
@@ -731,7 +921,6 @@ Thank you for the stars! The following updates have been completed:
 
 The following features are planned for future releases based on community interest and stars:
 
-- **Bulk prepare progress visibility**: Real-time progress tracking during the prepare phase for large collections
 - **Migrations for vector dimensions**: Easy migration tools for changing vector dimensions and/or ivfflatLists after initial setup
 - **MongoDB support**: Extend vector search capabilities to MongoDB databases
 - **Vercel support**: Optimized deployment and configuration for Vercel hosting

From 4609ef5097f67aac8e2d6cfc0c79dff9ee148de8 Mon Sep 17 00:00:00 2001
From: techiejd <62455039+techiejd@users.noreply.github.com>
Date: Fri, 16 Jan 2026 21:36:39 +0700
Subject: [PATCH 44/49] Working on adding migrations

---
 README.md                      |  93 +++-
 dev/specs/migrationCli.spec.ts | 863 +++++++++++++++++++++++++++++++++
 package.json                   |   3 +-
 pnpm-lock.yaml                 |  45 +-
 src/bin/vectorize-migrate.ts   | 588 ++++++++++++++++++++++
 src/endpoints/vectorSearch.ts  |   1 +
 src/index.ts                   | 328 +++++++++++--
 src/types.ts                   |   2 +
 8 files changed, 1873 insertions(+), 50 deletions(-)
 create mode 100644 dev/specs/migrationCli.spec.ts
 create mode 100644 src/bin/vectorize-migrate.ts

diff --git a/README.md b/README.md
index a335339..f447455 100644
--- a/README.md
+++ b/README.md
@@ -158,7 +158,34 @@ export default buildConfig({
 
 The import map tells Payload how to resolve component paths (like `'payloadcms-vectorize/client#EmbedAllButton'`) to actual React components. Without it, client components referenced in your collection configs won't render.
 
-### 2. Search Your Content
+### 2. Initial Migration Setup
+
+After configuring the plugin, you need to create an initial migration to set up the IVFFLAT indexes in your database.
+
+**For new setups:**
+
+1. Create your initial Payload migration (this will include the embedding columns via Drizzle schema):
+
+   ```bash
+   pnpm payload migrate:create --name initial
+   ```
+
+2. Use the migration CLI helper to add IVFFLAT index setup:
+
+   ```bash
+   pnpm payload vectorize:migrate
+   ```
+
+   The CLI automatically extracts your static configs from the Payload config and patches the migration file with the necessary IVFFLAT index creation SQL.
+
+3. Review and apply the migration:
+   ```bash
+   pnpm payload migrate
+   ```
+
+**Note:** The embedding columns are created automatically by Drizzle via the `afterSchemaInitHook`, but the IVFFLAT indexes need to be added via migrations for proper schema management.
+
+### 3. Search Your Content
 
 The plugin automatically creates a `/api/vector-search` endpoint:
 
@@ -419,7 +446,68 @@ jobs: {
 }
 ```
 
-### Endpoints
+## Changing Static Config (ivfflatLists or dims) & Migrations
+
+**⚠️ Important:** Changing `dims` is **destructive** - it requires re-embedding all your data. Changing `ivfflatLists` rebuilds the index (non-destructive but may take time).
+
+When you change static config values (`dims` or `ivfflatLists`):
+
+1. **Update your static config** in `payload.config.ts`:
+
+   ```typescript
+   const { afterSchemaInitHook, payloadcmsVectorize } = createVectorizeIntegration({
+     mainKnowledgePool: {
+       dims: 1536, // Changed from previous value
+       ivfflatLists: 200, // Changed from previous value
+     },
+   })
+   ```
+
+2. **Create a migration** using the CLI helper:
+
+   ```bash
+   pnpm payload vectorize:migrate
+   ```
+
+   The CLI will:
+   - Detect changes in your static configs
+   - Create a new Payload migration using `payload.db.createMigration`
+   - Patch it with appropriate SQL:
+     - **If `ivfflatLists` changed**: Rebuilds the IVFFLAT index with the new `lists` parameter (DROP + CREATE INDEX)
+     - **If `dims` changed**: Truncates the embeddings table (destructive - you'll need to re-embed)
+
+3. **Review the migration file** in `src/migrations/` - it will be named something like `*_vectorize-config.ts`
+
+4. **Apply the migration**:
+
+   ```bash
+   pnpm payload migrate
+   ```
+
+5. **If `dims` changed**: Re-embed all your documents using the bulk embed feature.
+
+**Schema name qualification:**
+
+The CLI automatically uses the `schemaName` from your Postgres adapter configuration. If you use a custom schema (e.g., `postgresAdapter({ schemaName: 'custom' })`), all SQL in the migration will be properly qualified with that schema name.
+
+**Idempotency:**
+
+Running `pnpm payload vectorize:migrate` multiple times with no config changes will not create duplicate migrations. The CLI detects when no changes are needed and exits early.
+
+**Development workflow:**
+
+During development, you may want to disable Payload's automatic schema push to ensure migrations are used:
+
+- Set `migrations: { disableAutomaticMigrations: true }` in your Payload config, or
+- Avoid using `pnpm payload migrate:status --force` which auto-generates migrations
+
+This ensures your vector-specific migrations are properly applied.
+
+**Runtime behavior:**
+
+The `ensurePgvectorArtifacts` function is now **presence-only** - it checks that pgvector artifacts (extension, column, index) exist but does not create or modify them. If artifacts are missing, it throws descriptive errors prompting you to run migrations. This ensures migrations are the single source of truth for schema changes.
+
+## Endpoints
 
 #### POST `/api/vector-bulk-embed`
 
@@ -921,7 +1009,6 @@ Thank you for the stars! The following updates have been completed:
 
 The following features are planned for future releases based on community interest and stars:
 
-- **Migrations for vector dimensions**: Easy migration tools for changing vector dimensions and/or ivfflatLists after initial setup
 - **MongoDB support**: Extend vector search capabilities to MongoDB databases
 - **Vercel support**: Optimized deployment and configuration for Vercel hosting
 
diff --git a/dev/specs/migrationCli.spec.ts b/dev/specs/migrationCli.spec.ts
new file mode 100644
index 0000000..edb4473
--- /dev/null
+++ b/dev/specs/migrationCli.spec.ts
@@ -0,0 +1,863 @@
+import type { Payload, SanitizedConfig } from 'payload'
+import { beforeAll, describe, expect, test, afterAll } from 'vitest'
+import { postgresAdapter } from '@payloadcms/db-postgres'
+import { buildConfig, getPayload } from 'payload'
+import { createVectorizeIntegration } from 'payloadcms-vectorize'
+import { makeDummyEmbedDocs, makeDummyEmbedQuery, testEmbeddingVersion } from '../helpers/embed.js'
+import { createTestDb } from './utils.js'
+import { DIMS } from './constants.js'
+import type { PostgresPayload } from '../../src/types.js'
+import { script as vectorizeMigrateScript } from '../../src/bin/vectorize-migrate.js'
+import { readdirSync, statSync, existsSync, readFileSync, rmSync } from 'fs'
+import { join, resolve } from 'path'
+
+describe('Migration CLI and ensurePgvectorArtifacts integration tests', () => {
+  const dbName = `migration_cli_test_${Date.now()}`
+  let payload: Payload
+
+  beforeAll(async () => {
+    await createTestDb({ dbName })
+
+    const integration = createVectorizeIntegration({
+      default: {
+        dims: DIMS,
+        ivfflatLists: 10,
+      },
+    })
+
+    const config = await buildConfig({
+      secret: 'test-secret',
+      collections: [
+        {
+          slug: 'posts',
+          fields: [{ name: 'title', type: 'text' }],
+        },
+      ],
+      db: postgresAdapter({
+        extensions: ['vector'],
+        afterSchemaInit: [integration.afterSchemaInitHook],
+        pool: {
+          connectionString: `postgresql://postgres:password@localhost:5433/${dbName}`,
+        },
+      }),
+      plugins: [
+        integration.payloadcmsVectorize({
+          knowledgePools: {
+            default: {
+              collections: {
+                posts: {
+                  toKnowledgePool: async (doc) => [{ chunk: doc.title || '' }],
+                },
+              },
+              embeddingConfig: {
+                version: testEmbeddingVersion,
+                queryFn: makeDummyEmbedQuery(DIMS),
+                realTimeIngestionFn: makeDummyEmbedDocs(DIMS),
+              },
+            },
+          },
+        }),
+      ],
+      jobs: {
+        tasks: [],
+        autoRun: [
+          {
+            cron: '*/5 * * * * *',
+            limit: 10,
+          },
+        ],
+      },
+    })
+
+    // Temporarily disable onInit for runtime behavior tests
+    // This prevents ensurePgvectorArtifacts from running before tests can set up their state
+
+    payload = await getPayload({
+      config,
+      cron: true,
+      disableOnInit: true,
+      key: `test-runtime-behavior-${Date.now()}`,
+    })
+  })
+
+  describe('Runtime behavior', () => {
+    test('ensurePgvectorArtifacts is presence-only and does not rebuild index', async () => {
+      const postgresPayload = payload as PostgresPayload
+      const schemaName = postgresPayload.db.schemaName || 'public'
+      const tableName = 'default'
+
+      // Manually create the index first (simulating a migration)
+      await postgresPayload.db.pool?.query(
+        `CREATE INDEX IF NOT EXISTS ${tableName}_embedding_ivfflat ON "${schemaName}"."${tableName}" USING ivfflat (embedding vector_cosine_ops) WITH (lists = 10)`,
+      )
+
+      // Get initial index definition
+      const initialIndex = await postgresPayload.db.pool?.query(
+        `SELECT pg_get_indexdef(c.oid) as def
+       FROM pg_indexes i
+       JOIN pg_class c ON c.relname = i.indexname
+       JOIN pg_namespace n ON n.oid = c.relnamespace AND n.nspname = i.schemaname
+       WHERE i.schemaname = $1 AND i.tablename = $2 AND i.indexname = $3`,
+        [schemaName, tableName, `${tableName}_embedding_ivfflat`],
+      )
+      const initialDef = initialIndex?.rows[0]?.def || ''
+
+      // Call ensurePgvectorArtifacts (via onInit which should check presence)
+      // Since we already have the artifacts, it should pass without modifying
+      // Note: onInit calls ensurePgvectorArtifacts, but since artifacts exist, it should just verify
+      await payload.config.onInit?.(payload)
+
+      // Verify index definition hasn't changed
+      const afterIndex = await postgresPayload.db.pool?.query(
+        `SELECT pg_get_indexdef(c.oid) as def
+       FROM pg_indexes i
+       JOIN pg_class c ON c.relname = i.indexname
+       JOIN pg_namespace n ON n.oid = c.relnamespace AND n.nspname = i.schemaname
+       WHERE i.schemaname = $1 AND i.tablename = $2 AND i.indexname = $3`,
+        [schemaName, tableName, `${tableName}_embedding_ivfflat`],
+      )
+      const afterDef = afterIndex?.rows[0]?.def || ''
+
+      // Index should still exist and be the same
+      expect(afterDef).toBeTruthy()
+      expect(afterDef).toBe(initialDef)
+    })
+
+    test('VectorizedPayload has _staticConfigs', async () => {
+      const { getVectorizedPayload } = await import('payloadcms-vectorize')
+      const vectorizedPayload = getVectorizedPayload(payload)
+
+      expect(vectorizedPayload).toBeTruthy()
+      expect(vectorizedPayload?._staticConfigs).toBeDefined()
+      expect(vectorizedPayload?._staticConfigs.default).toBeDefined()
+      expect(vectorizedPayload?._staticConfigs.default.dims).toBe(DIMS)
+      expect(vectorizedPayload?._staticConfigs.default.ivfflatLists).toBe(10)
+    })
+
+    test('ensurePgvectorArtifacts throws error when artifacts are missing (user has not run migrations)', async () => {
+      // Create a new database without any migrations applied
+      // This simulates the state when a user hasn't run migrations yet
+      const testDbName = `migration_cli_test_missing_${Date.now()}`
+      console.log('[TEST] Step 1: Creating test database:', testDbName)
+      await createTestDb({ dbName: testDbName })
+      console.log('[TEST] Step 2: Database created')
+
+      console.log('[TEST] Step 3: Creating integration')
+      const integration = createVectorizeIntegration({
+        default: {
+          dims: DIMS,
+          ivfflatLists: 10,
+        },
+      })
+      console.log('[TEST] Step 4: Integration created')
+
+      console.log('[TEST] Step 5: Starting buildConfig...')
+      const config = await buildConfig({
+        secret: 'test-secret',
+        collections: [
+          {
+            slug: 'posts',
+            fields: [{ name: 'title', type: 'text' }],
+          },
+        ],
+        db: postgresAdapter({
+          extensions: ['vector'],
+          afterSchemaInit: [integration.afterSchemaInitHook],
+          pool: {
+            connectionString: `postgresql://postgres:password@localhost:5433/${testDbName}`,
+          },
+        }),
+        plugins: [
+          integration.payloadcmsVectorize({
+            knowledgePools: {
+              default: {
+                collections: {
+                  posts: {
+                    toKnowledgePool: async (doc) => [{ chunk: doc.title || '' }],
+                  },
+                },
+                embeddingConfig: {
+                  version: testEmbeddingVersion,
+                  queryFn: makeDummyEmbedQuery(DIMS),
+                  realTimeIngestionFn: makeDummyEmbedDocs(DIMS),
+                },
+              },
+            },
+          }),
+        ],
+        jobs: {
+          tasks: [],
+          autoRun: [],
+        },
+      })
+      console.log('[TEST] Step 6: buildConfig completed')
+
+      // Note: onInit will be called during getPayload and will throw because artifacts don't exist
+      // This simulates the real-world scenario where a user hasn't run migrations yet
+      // The error will be "Embedding column not found" (first check that fails)
+      console.log('[TEST] Step 7: Calling getPayload (should throw)...')
+      await expect(
+        getPayload({ config, cron: true, key: `test-missing-artifacts-${Date.now()}` }),
+      ).rejects.toThrow('Embedding column not found')
+      console.log('[TEST] Step 8: getPayload threw as expected')
+    })
+  })
+
+  describe('CLI workflow (sequential)', () => {
+    const cliDbName = `migration_cli_e2e_test_${Date.now()}`
+    let cliPayload: Payload
+    let cliConfig: SanitizedConfig
+    const migrationsDir = resolve(process.cwd(), 'dev', 'test-migrations-cli')
+
+    beforeAll(async () => {
+      await createTestDb({ dbName: cliDbName })
+
+      // Clean up any existing migrations directory to ensure clean state
+      if (existsSync(migrationsDir)) {
+        rmSync(migrationsDir, { recursive: true, force: true })
+      }
+
+      // Create test migrations directory
+      const { mkdirSync } = await import('fs')
+      mkdirSync(migrationsDir, { recursive: true })
+    })
+
+    afterAll(async () => {
+      // Cleanup: remove test migrations directory
+      if (existsSync(migrationsDir)) {
+        rmSync(migrationsDir, { recursive: true, force: true })
+      }
+    })
+
+    test('1. Initial setup: create migration with IVFFLAT index', async () => {
+      // Step 1: Create integration with initial config
+      const integration = createVectorizeIntegration({
+        default: {
+          dims: DIMS,
+          ivfflatLists: 10, // Initial lists parameter
+        },
+      })
+
+      cliConfig = await buildConfig({
+        secret: 'test-secret',
+        collections: [
+          {
+            slug: 'posts',
+            fields: [{ name: 'title', type: 'text' }],
+          },
+        ],
+        db: postgresAdapter({
+          extensions: ['vector'],
+          afterSchemaInit: [integration.afterSchemaInitHook],
+          migrationDir: migrationsDir,
+          pool: {
+            connectionString: `postgresql://postgres:password@localhost:5433/${cliDbName}`,
+          },
+        }),
+        plugins: [
+          integration.payloadcmsVectorize({
+            knowledgePools: {
+              default: {
+                collections: {
+                  posts: {
+                    toKnowledgePool: async (doc) => [{ chunk: doc.title || '' }],
+                  },
+                },
+                embeddingConfig: {
+                  version: testEmbeddingVersion,
+                  queryFn: makeDummyEmbedQuery(DIMS),
+                  realTimeIngestionFn: makeDummyEmbedDocs(DIMS),
+                },
+              },
+            },
+          }),
+        ],
+        jobs: {
+          tasks: [],
+          autoRun: [
+            {
+              cron: '*\/5 * * * * *',
+              limit: 10,
+            },
+          ],
+        },
+      })
+
+      // Temporarily disable onInit to avoid ensurePgvectorArtifacts check before migrations are applied
+      const savedOnInit = cliConfig.onInit
+      cliConfig.onInit = async () => {
+        // No-op: migrations haven't been applied yet
+      }
+
+      cliPayload = await getPayload({
+        config: cliConfig,
+        cron: true,
+        key: `test-initial-setup-${Date.now()}`,
+        disableOnInit: true,
+      })
+
+      // Step 2: Create initial migration (this will include the embedding column via Drizzle)
+      console.log('[TEST] Step 2: Creating initial migration...')
+      await cliPayload.db.createMigration({
+        migrationName: 'initial',
+        payload: cliPayload,
+      })
+      console.log('[TEST] Step 2.5: Initial migration created')
+
+      // Step 3: Run vectorize:migrate to add IVFFLAT index to the migration
+      console.log('[TEST] Step 3: Running vectorize:migrate...')
+      await vectorizeMigrateScript(cliConfig)
+
+      // Debug: Print all files in migrations directory
+      console.log('[TEST] Step 3.5: Listing all files in migrations directory:')
+      const allFiles = readdirSync(migrationsDir)
+      for (const file of allFiles) {
+        const filePath = join(migrationsDir, file)
+        const stats = statSync(filePath)
+        console.log(
+          `[TEST]   - ${file} (${stats.size} bytes, modified: ${stats.mtime.toISOString()})`,
+        )
+        if (file.endsWith('.ts') && file !== 'index.ts') {
+          const content = readFileSync(filePath, 'utf-8')
+          console.log(`[TEST]     Content preview (first 500 chars): ${content.substring(0, 500)}`)
+          console.log(
+            `[TEST]     Contains 'up' function: ${content.includes('export async function up')}`,
+          )
+          console.log(`[TEST]     Contains 'CREATE INDEX': ${content.includes('CREATE INDEX')}`)
+          console.log(`[TEST]     Contains 'ivfflat': ${content.includes('ivfflat')}`)
+          console.log(`[TEST]     Contains 'lists =': ${content.includes('lists =')}`)
+          console.log(
+            `[TEST]     Contains 'default_embedding_ivfflat': ${content.includes('default_embedding_ivfflat')}`,
+          )
+          // Show the last 1000 chars where our code should be
+          console.log(
+            `[TEST]     Content preview (last 1000 chars): ${content.substring(Math.max(0, content.length - 1000))}`,
+          )
+        }
+      }
+
+      // Step 4: Apply the migration
+      console.log('[TEST] Step 4: Applying migration...')
+      try {
+        // Try using db.migrate() if it exists (internal API)
+        if (typeof (cliPayload.db as any).migrate === 'function') {
+          console.log('[TEST] Step 4.1: Using db.migrate() method')
+          await (cliPayload.db as any).migrate()
+        } else {
+          // Fallback: manually load and execute migration files
+          console.log(
+            '[TEST] Step 4.1: db.migrate() not available, using manual migration execution',
+          )
+          const migrationFiles = readdirSync(migrationsDir)
+            .filter((f) => f.endsWith('.ts') && f !== 'index.ts')
+            .sort()
+
+          for (const file of migrationFiles) {
+            const migrationPath = join(migrationsDir, file)
+            console.log(`[TEST] Step 4.2: Loading migration: ${file}`)
+            const migration = await import(migrationPath)
+            if (migration.up) {
+              console.log(`[TEST] Step 4.3: Executing up() for ${file}`)
+              await migration.up({ db: cliPayload.db.drizzle, payload: cliPayload, req: {} as any })
+            }
+          }
+        }
+        console.log('[TEST] Step 4.5: Migration applied')
+      } catch (error) {
+        console.error('[TEST] Step 4.5: Migration failed with error:', error)
+        throw error
+      }
+
+      // Step 4.55: Check database directly to see if index exists
+      const postgresPayloadCheck = cliPayload as PostgresPayload
+      const schemaNameCheck = postgresPayloadCheck.db.schemaName || 'public'
+      const indexNameCheck = 'default_embedding_ivfflat'
+      try {
+        const directIndexCheck = await postgresPayloadCheck.db.pool?.query(
+          `SELECT indexname FROM pg_indexes WHERE schemaname = $1 AND indexname = $2`,
+          [schemaNameCheck, indexNameCheck],
+        )
+        console.log(
+          `[TEST] Step 4.55: Direct database check - index exists: ${(directIndexCheck?.rows.length || 0) > 0}`,
+        )
+        if (directIndexCheck?.rows.length === 0) {
+          console.log(`[TEST] Step 4.55: WARNING - Index not found in database after migration!`)
+          // List all indexes on the default table
+          const allIndexes = await postgresPayloadCheck.db.pool?.query(
+            `SELECT indexname FROM pg_indexes WHERE schemaname = $1 AND tablename = 'default'`,
+            [schemaNameCheck],
+          )
+          console.log(
+            `[TEST] Step 4.55: All indexes on 'default' table: ${allIndexes?.rows.map((r: any) => r.indexname).join(', ') || 'none'}`,
+          )
+        }
+      } catch (error) {
+        console.error('[TEST] Step 4.55: Error checking database:', error)
+      }
+
+      // Step 4.6: Verify the migration file actually contains the IVFFLAT code
+      const allMigrationsAfter = readdirSync(migrationsDir)
+        .filter((f) => f.endsWith('.ts') && f !== 'index.ts')
+        .map((f) => ({
+          name: f,
+          path: join(migrationsDir, f),
+          mtime: statSync(join(migrationsDir, f)).mtime,
+        }))
+        .sort((a, b) => b.mtime.getTime() - a.mtime.getTime())
+      const latestMigrationFile = allMigrationsAfter[0]?.path
+      if (latestMigrationFile) {
+        const migrationFileAfterApply = readFileSync(latestMigrationFile, 'utf-8')
+        console.log(`[TEST] Step 4.6: Checking migration file after apply: ${latestMigrationFile}`)
+        console.log(
+          `[TEST]   File contains 'ivfflat': ${migrationFileAfterApply.includes('ivfflat')}`,
+        )
+        console.log(
+          `[TEST]   File contains 'lists = 10': ${migrationFileAfterApply.includes('lists = 10')}`,
+        )
+        console.log(
+          `[TEST]   File contains 'drizzle.execute': ${migrationFileAfterApply.includes('drizzle.execute')}`,
+        )
+        // Find the IVFFLAT code section
+        const ivfflatMatch = migrationFileAfterApply.match(/ivfflat[\s\S]{0,500}/i)
+        if (ivfflatMatch) {
+          console.log(`[TEST]   IVFFLAT code section: ${ivfflatMatch[0]}`)
+        }
+        // Show the end of the up function where our code should be
+        const upFunctionEnd = migrationFileAfterApply.lastIndexOf('export async function up')
+        if (upFunctionEnd !== -1) {
+          const upFunctionContent = migrationFileAfterApply.substring(upFunctionEnd)
+          const last500OfUp = upFunctionContent.substring(
+            Math.max(0, upFunctionContent.length - 500),
+          )
+          console.log(`[TEST]   Last 500 chars of up function: ${last500OfUp}`)
+        }
+      }
+
+      // Restore onInit and run it now that migrations are applied
+      cliConfig.onInit = savedOnInit
+      if (cliConfig.onInit) {
+        await cliConfig.onInit(cliPayload)
+      }
+
+      // Step 5: Verify index exists with correct lists parameter
+      const postgresPayload = cliPayload as PostgresPayload
+      const schemaName = postgresPayload.db.schemaName || 'public'
+      const tableName = 'default'
+      const indexName = `${tableName}_embedding_ivfflat`
+
+      const indexCheck = await postgresPayload.db.pool?.query(
+        `SELECT pg_get_indexdef(c.oid) as def
+       FROM pg_indexes i
+       JOIN pg_class c ON c.relname = i.indexname
+       JOIN pg_namespace n ON n.oid = c.relnamespace AND n.nspname = i.schemaname
+       WHERE i.schemaname = $1 AND i.tablename = $2 AND i.indexname = $3`,
+        [schemaName, tableName, indexName],
+      )
+      const indexDef = indexCheck?.rows[0]?.def || ''
+      console.log(`[TEST] Step 5.5: Index definition: ${indexDef}`)
+      expect(indexDef).toBeTruthy()
+      // PostgreSQL returns lists='10' (with quotes), so match either format
+      expect(indexDef).toMatch(/lists\s*=\s*['"]?10['"]?/i)
+      console.log('[TEST] Test 1 completed successfully')
+    })
+
+    test('2. Change ivfflatLists: CLI creates migration, apply and verify', async () => {
+      // Step 1: Recreate integration with changed ivfflatLists
+      const integration = createVectorizeIntegration({
+        default: {
+          dims: DIMS,
+          ivfflatLists: 20, // Changed from 10 to 20
+        },
+      })
+
+      // Update config with new integration (this simulates changing static config in payload.config.ts)
+      cliConfig = await buildConfig({
+        secret: 'test-secret',
+        collections: [
+          {
+            slug: 'posts',
+            fields: [{ name: 'title', type: 'text' }],
+          },
+        ],
+        db: postgresAdapter({
+          extensions: ['vector'],
+          afterSchemaInit: [integration.afterSchemaInitHook],
+          migrationDir: migrationsDir,
+          pool: {
+            connectionString: `postgresql://postgres:password@localhost:5433/${cliDbName}`,
+          },
+        }),
+        plugins: [
+          integration.payloadcmsVectorize({
+            knowledgePools: {
+              default: {
+                collections: {
+                  posts: {
+                    toKnowledgePool: async (doc) => [{ chunk: doc.title || '' }],
+                  },
+                },
+                embeddingConfig: {
+                  version: testEmbeddingVersion,
+                  queryFn: makeDummyEmbedQuery(DIMS),
+                  realTimeIngestionFn: makeDummyEmbedDocs(DIMS),
+                },
+              },
+            },
+          }),
+        ],
+        jobs: {
+          tasks: [],
+          autoRun: [
+            {
+              cron: '*\/5 * * * * *',
+              limit: 10,
+            },
+          ],
+        },
+      })
+
+      // Temporarily disable onInit to avoid ensurePgvectorArtifacts check before migrations are applied
+      const savedOnInit = cliConfig.onInit
+      cliConfig.onInit = async () => {
+        // No-op: migrations haven't been applied yet
+      }
+
+      cliPayload = await getPayload({
+        config: cliConfig,
+        cron: true,
+        key: `test-ivfflat-change-${Date.now()}`,
+        disableOnInit: true,
+      })
+
+      // Step 2: Run vectorize:migrate (should detect change and create migration)
+      console.log('[TEST] Step 2: Running vectorize:migrate...')
+      const migrateScriptStart = Date.now()
+      try {
+        await Promise.race([
+          vectorizeMigrateScript(cliConfig),
+          new Promise((_, reject) =>
+            setTimeout(() => reject(new Error('vectorize:migrate timed out after 30s')), 30000),
+          ),
+        ])
+        const migrateScriptEnd = Date.now()
+        console.log(
+          `[TEST] Step 2.5: vectorize:migrate completed in ${migrateScriptEnd - migrateScriptStart}ms`,
+        )
+      } catch (error) {
+        console.error('[TEST] Step 2.5: vectorize:migrate failed:', error)
+        throw error
+      }
+
+      // Step 3: Verify migration file was created and contains correct SQL
+      console.log('[TEST] Step 3: Listing all files in migrations directory:')
+      const allFiles = readdirSync(migrationsDir)
+      for (const file of allFiles) {
+        const filePath = join(migrationsDir, file)
+        const stats = statSync(filePath)
+        console.log(
+          `[TEST]   - ${file} (${stats.size} bytes, modified: ${stats.mtime.toISOString()})`,
+        )
+      }
+
+      const migrations = readdirSync(migrationsDir)
+        .filter(
+          (f) => (f.endsWith('.ts') || f.endsWith('.js')) && f !== 'index.ts' && f !== 'index.js',
+        )
+        .map((f) => ({
+          name: f,
+          path: join(migrationsDir, f),
+          mtime: statSync(join(migrationsDir, f)).mtime,
+        }))
+        .sort((a, b) => b.mtime.getTime() - a.mtime.getTime())
+
+      console.log(`[TEST] Found ${migrations.length} migration files (excluding index.ts/js)`)
+      migrations.forEach((m, i) => {
+        console.log(`[TEST]   ${i + 1}. ${m.name} (${m.mtime.toISOString()})`)
+      })
+
+      const newestMigration = migrations[0]
+      expect(newestMigration).toBeTruthy()
+      console.log(`[TEST] Reading migration file: ${newestMigration.path}`)
+
+      // Verify migration file contains IVFFLAT rebuild SQL
+      const migrationContent = readFileSync(newestMigration.path, 'utf-8')
+      console.log(`[TEST] Migration file content length: ${migrationContent.length} characters`)
+      console.log(
+        `[TEST] Migration file preview (first 1000 chars):\n${migrationContent.substring(0, 1000)}`,
+      )
+      // PostgreSQL returns lists='20' (with quotes), so match either format
+      expect(migrationContent).toMatch(/lists\s*=\s*['"]?20['"]?/i)
+      expect(migrationContent).toContain('DROP INDEX')
+      expect(migrationContent).toContain('CREATE INDEX')
+
+      // Step 4: Apply the migration
+      if (typeof (cliPayload.db as any).migrate === 'function') {
+        await (cliPayload.db as any).migrate()
+      } else {
+        // Fallback: manually load and execute migration files
+        const migrationFiles = readdirSync(migrationsDir)
+          .filter((f) => f.endsWith('.ts') && f !== 'index.ts')
+          .sort()
+
+        for (const file of migrationFiles) {
+          const migrationPath = join(migrationsDir, file)
+          const migration = await import(migrationPath)
+          if (migration.up) {
+            await migration.up({ db: cliPayload.db.drizzle, payload: cliPayload, req: {} as any })
+          }
+        }
+      }
+
+      // Restore onInit and run it now that migrations are applied
+      if (savedOnInit) {
+        cliConfig.onInit = savedOnInit
+        await savedOnInit(cliPayload)
+      }
+
+      // Step 5: Verify index was rebuilt with new lists parameter
+      const postgresPayload = cliPayload as PostgresPayload
+      const schemaName = postgresPayload.db.schemaName || 'public'
+      const tableName = 'default'
+      const indexName = `${tableName}_embedding_ivfflat`
+
+      const indexCheck = await postgresPayload.db.pool?.query(
+        `SELECT pg_get_indexdef(c.oid) as def
+       FROM pg_indexes i
+       JOIN pg_class c ON c.relname = i.indexname
+       JOIN pg_namespace n ON n.oid = c.relnamespace AND n.nspname = i.schemaname
+       WHERE i.schemaname = $1 AND i.tablename = $2 AND i.indexname = $3`,
+        [schemaName, tableName, indexName],
+      )
+      const indexDef = indexCheck?.rows[0]?.def || ''
+      expect(indexDef).toBeTruthy()
+      // PostgreSQL returns lists='20' (with quotes), so match either format
+      expect(indexDef).toMatch(/lists\s*=\s*['"]?20['"]?/i)
+    })
+
+    test('3. Idempotency: CLI does not create duplicate migration when config unchanged', async () => {
+      // Get migration count before
+      const migrationsBefore = readdirSync(migrationsDir).filter(
+        (f) => f.endsWith('.ts') || f.endsWith('.js'),
+      ).length
+
+      // Run vectorize:migrate again (config hasn't changed)
+      console.log('[TEST] Running vectorize:migrate for idempotency check...')
+      const startTime = Date.now()
+      try {
+        await Promise.race([
+          vectorizeMigrateScript(cliConfig),
+          new Promise((_, reject) =>
+            setTimeout(() => reject(new Error('vectorize:migrate timed out after 30s')), 30000),
+          ),
+        ])
+        const endTime = Date.now()
+        console.log(`[TEST] vectorize:migrate completed in ${endTime - startTime}ms`)
+      } catch (error) {
+        console.error('[TEST] vectorize:migrate failed:', error)
+        throw error
+      }
+
+      // Verify no new migration was created
+      const migrationsAfter = readdirSync(migrationsDir).filter(
+        (f) => f.endsWith('.ts') || f.endsWith('.js'),
+      ).length
+
+      expect(migrationsAfter).toBe(migrationsBefore)
+    })
+
+    test('4. Change dims: CLI creates destructive migration', async () => {
+      console.log('[TEST] Starting test 4: Change dims')
+      const NEW_DIMS = DIMS + 2 // Change dimensions (destructive)
+      console.log(`[TEST] NEW_DIMS: ${NEW_DIMS}`)
+
+      // Step 1: Recreate integration with changed dims
+      console.log('[TEST] Step 1: Creating integration with changed dims...')
+      const integration = createVectorizeIntegration({
+        default: {
+          dims: NEW_DIMS, // Changed dimensions
+          ivfflatLists: 20, // Keep same lists
+        },
+      })
+
+      // Update config with new integration
+      cliConfig = await buildConfig({
+        secret: 'test-secret',
+        collections: [
+          {
+            slug: 'posts',
+            fields: [{ name: 'title', type: 'text' }],
+          },
+        ],
+        db: postgresAdapter({
+          extensions: ['vector'],
+          afterSchemaInit: [integration.afterSchemaInitHook],
+          migrationDir: migrationsDir,
+          pool: {
+            connectionString: `postgresql://postgres:password@localhost:5433/${cliDbName}`,
+          },
+        }),
+        plugins: [
+          integration.payloadcmsVectorize({
+            knowledgePools: {
+              default: {
+                collections: {
+                  posts: {
+                    toKnowledgePool: async (doc) => [{ chunk: doc.title || '' }],
+                  },
+                },
+                embeddingConfig: {
+                  version: testEmbeddingVersion,
+                  queryFn: makeDummyEmbedQuery(NEW_DIMS),
+                  realTimeIngestionFn: makeDummyEmbedDocs(NEW_DIMS),
+                },
+              },
+            },
+          }),
+        ],
+        jobs: {
+          tasks: [],
+          autoRun: [
+            {
+              cron: '*\/5 * * * * *',
+              limit: 10,
+            },
+          ],
+        },
+      })
+
+      // Temporarily disable onInit to avoid ensurePgvectorArtifacts check before migrations are applied
+      const savedOnInitDims = cliConfig.onInit
+      cliConfig.onInit = async () => {
+        // No-op: migrations haven't been applied yet
+      }
+
+      cliPayload = await getPayload({
+        config: cliConfig,
+        cron: true,
+        key: `test-dims-change-${Date.now()}`,
+        disableOnInit: true,
+      })
+
+      // Step 2: Run vectorize:migrate (should detect dims change)
+      console.log('[TEST] Step 2: Running vectorize:migrate...')
+      await vectorizeMigrateScript(cliConfig)
+      console.log('[TEST] Step 2.5: vectorize:migrate completed')
+
+      // Step 3: Verify migration file contains destructive SQL (truncate + column type change)
+      console.log('[TEST] Step 3: Listing all files in migrations directory:')
+      const allFiles = readdirSync(migrationsDir)
+      for (const file of allFiles) {
+        const filePath = join(migrationsDir, file)
+        const stats = statSync(filePath)
+        console.log(
+          `[TEST]   - ${file} (${stats.size} bytes, modified: ${stats.mtime.toISOString()})`,
+        )
+      }
+
+      const migrations = readdirSync(migrationsDir)
+        .filter(
+          (f) => (f.endsWith('.ts') || f.endsWith('.js')) && f !== 'index.ts' && f !== 'index.js',
+        )
+        .map((f) => ({
+          name: f,
+          path: join(migrationsDir, f),
+          mtime: statSync(join(migrationsDir, f)).mtime,
+        }))
+        .sort((a, b) => b.mtime.getTime() - a.mtime.getTime())
+
+      console.log(`[TEST] Found ${migrations.length} migration files (excluding index.ts/js)`)
+      const newestMigration = migrations[0]
+      console.log(`[TEST] Reading newest migration: ${newestMigration.path}`)
+      const migrationContent = readFileSync(newestMigration.path, 'utf-8')
+      console.log(`[TEST] Migration content length: ${migrationContent.length} characters`)
+      console.log(
+        `[TEST] Migration content preview (first 1000 chars):\n${migrationContent.substring(0, 1000)}`,
+      )
+
+      // Verify it contains dims change SQL
+      expect(migrationContent).toContain('Changing dims')
+      expect(migrationContent).toContain('TRUNCATE TABLE')
+      expect(migrationContent).toContain(`vector(${NEW_DIMS})`)
+      expect(migrationContent).toContain('ALTER COLUMN embedding TYPE')
+      console.log('[TEST] Step 3.5: Migration file verification passed')
+
+      // Step 4: Apply the migration
+      console.log('[TEST] Step 4: Applying migration...')
+      console.log('[TEST] Step 4.1: About to call cliPayload.db.migrate()...')
+      console.log('[TEST] Step 4.1.1: Migration directory:', migrationsDir)
+      console.log(
+        '[TEST] Step 4.1.2: Payload instance migrationDir:',
+        (cliPayload.db as any).migrationDir,
+      )
+      try {
+        const migrateStart = Date.now()
+        console.log('[TEST] Step 4.1.3: Calling migrate() at', new Date().toISOString())
+        if (typeof (cliPayload.db as any).migrate === 'function') {
+          await (cliPayload.db as any).migrate()
+        } else {
+          // Fallback: manually load and execute migration files
+          const migrationFiles = readdirSync(migrationsDir)
+            .filter((f) => f.endsWith('.ts') && f !== 'index.ts')
+            .sort()
+
+          for (const file of migrationFiles) {
+            const migrationPath = join(migrationsDir, file)
+            const migration = await import(migrationPath)
+            if (migration.up) {
+              await migration.up({ db: cliPayload.db.drizzle, payload: cliPayload, req: {} as any })
+            }
+          }
+        }
+        const migrateEnd = Date.now()
+        console.log(
+          `[TEST] Step 4.2: cliPayload.db.migrate() completed in ${migrateEnd - migrateStart}ms`,
+        )
+      } catch (error) {
+        console.error('[TEST] Step 4.2: Error during migration:', error)
+        throw error
+      }
+      console.log('[TEST] Step 4.5: Migration applied successfully')
+
+      // Restore onInit and run it now that migrations are applied
+      console.log('[TEST] Step 4.6: Restoring onInit...')
+      if (savedOnInitDims) {
+        cliConfig.onInit = savedOnInitDims
+        await savedOnInitDims(cliPayload)
+      }
+      console.log('[TEST] Step 4.7: onInit restored and executed')
+
+      // Step 5: Verify column type changed and table was truncated
+      console.log('[TEST] Step 5: Verifying column type and table state...')
+      const postgresPayload = cliPayload as PostgresPayload
+      const schemaName = postgresPayload.db.schemaName || 'public'
+      const tableName = 'default'
+
+      // Check column type
+      const columnCheck = await postgresPayload.db.pool?.query(
+        `SELECT format_type(atttypid, atttypmod) as column_type
+       FROM pg_attribute 
+       JOIN pg_class ON pg_attribute.attrelid = pg_class.oid
+       JOIN pg_namespace ON pg_class.relnamespace = pg_namespace.oid
+       WHERE pg_namespace.nspname = $1 
+         AND pg_class.relname = $2 
+         AND pg_attribute.attname = 'embedding'
+         AND pg_attribute.attnum > 0
+         AND NOT pg_attribute.attisdropped`,
+        [schemaName, tableName],
+      )
+      const columnType = columnCheck?.rows[0]?.column_type || ''
+      expect(columnType).toContain(`vector(${NEW_DIMS})`)
+
+      // Verify table was truncated (should be empty or have no embeddings)
+      console.log('[TEST] Step 5.5: Checking table row count...')
+      const countCheck = await postgresPayload.db.pool?.query(
+        `SELECT COUNT(*) as count FROM "${schemaName}"."${tableName}"`,
+      )
+      const rowCount = parseInt(countCheck?.rows[0]?.count || '0', 10)
+      console.log(`[TEST] Table row count: ${rowCount}`)
+      // Table should be empty after truncate (unless new embeddings were created during test)
+      expect(rowCount).toBe(0)
+      console.log('[TEST] Test 4 completed successfully')
+    })
+  })
+})
diff --git a/package.json b/package.json
index da2e38e..317c47c 100644
--- a/package.json
+++ b/package.json
@@ -40,7 +40,7 @@
     "test:teardown": "docker-compose -f dev/docker-compose.test.yml down",
     "test": "pnpm test:int && pnpm test:e2e",
     "test:e2e": "playwright test",
-    "test:int": "cross-env DOTENV_CONFIG_PATH=dev/.env.test NODE_OPTIONS=--require=dotenv/config vitest"
+    "test:int": "cross-env DOTENV_CONFIG_PATH=dev/.env.test NODE_OPTIONS='--require=dotenv/config --import=tsx' vitest"
   },
   "devDependencies": {
     "@eslint/eslintrc": "^3.2.0",
@@ -82,6 +82,7 @@
     "sharp": "0.34.2",
     "sort-package-json": "^2.10.0",
     "tailwindcss": "^4.1.14",
+    "tsx": "^4.21.0",
     "typescript": "5.7.3",
     "vite-tsconfig-paths": "^5.1.4",
     "vitest": "^3.1.2",
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index cf56fb2..9c865c8 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -129,15 +129,18 @@ importers:
       tailwindcss:
         specifier: ^4.1.14
         version: 4.1.18
+      tsx:
+        specifier: ^4.21.0
+        version: 4.21.0
       typescript:
         specifier: 5.7.3
         version: 5.7.3
       vite-tsconfig-paths:
         specifier: ^5.1.4
-        version: 5.1.4(typescript@5.7.3)(vite@7.3.0(@types/node@22.19.3)(jiti@2.6.1)(lightningcss@1.30.2)(sass@1.77.4)(tsx@4.20.6)(yaml@2.8.2))
+        version: 5.1.4(typescript@5.7.3)(vite@7.3.0(@types/node@22.19.3)(jiti@2.6.1)(lightningcss@1.30.2)(sass@1.77.4)(tsx@4.21.0)(yaml@2.8.2))
       vitest:
         specifier: ^3.1.2
-        version: 3.2.4(@types/debug@4.1.12)(@types/node@22.19.3)(jiti@2.6.1)(lightningcss@1.30.2)(sass@1.77.4)(tsx@4.20.6)(yaml@2.8.2)
+        version: 3.2.4(@types/debug@4.1.12)(@types/node@22.19.3)(jiti@2.6.1)(lightningcss@1.30.2)(sass@1.77.4)(tsx@4.21.0)(yaml@2.8.2)
       voyage-ai-provider:
         specifier: ^2.0.0
         version: 2.0.0(zod@4.3.4)
@@ -6098,6 +6101,11 @@ packages:
     engines: {node: '>=18.0.0'}
     hasBin: true
 
+  tsx@4.21.0:
+    resolution: {integrity: sha512-5C1sg4USs1lfG0GFb2RLXsdpXqBSEhAaA/0kPL01wxzpMqLILNxIxIOKiILz+cdg/pLnOUxFYOR5yhHU666wbw==}
+    engines: {node: '>=18.0.0'}
+    hasBin: true
+
   tweetnacl@0.14.5:
     resolution: {integrity: sha512-KXXFFdAbFXY4geFIwoyNK+f5Z1b7swfXABfL7HXCmoIWMKU3dmS26672A4EeQtDzLKy7SXmfBu51JolvEKwtGA==}
 
@@ -9197,13 +9205,13 @@ snapshots:
       chai: 5.3.3
       tinyrainbow: 2.0.0
 
-  '@vitest/mocker@3.2.4(vite@7.3.0(@types/node@22.19.3)(jiti@2.6.1)(lightningcss@1.30.2)(sass@1.77.4)(tsx@4.20.6)(yaml@2.8.2))':
+  '@vitest/mocker@3.2.4(vite@7.3.0(@types/node@22.19.3)(jiti@2.6.1)(lightningcss@1.30.2)(sass@1.77.4)(tsx@4.21.0)(yaml@2.8.2))':
     dependencies:
       '@vitest/spy': 3.2.4
       estree-walker: 3.0.3
       magic-string: 0.30.21
     optionalDependencies:
-      vite: 7.3.0(@types/node@22.19.3)(jiti@2.6.1)(lightningcss@1.30.2)(sass@1.77.4)(tsx@4.20.6)(yaml@2.8.2)
+      vite: 7.3.0(@types/node@22.19.3)(jiti@2.6.1)(lightningcss@1.30.2)(sass@1.77.4)(tsx@4.21.0)(yaml@2.8.2)
 
   '@vitest/pretty-format@3.2.4':
     dependencies:
@@ -13518,7 +13526,7 @@ snapshots:
   tsx@4.20.3:
     dependencies:
       esbuild: 0.25.12
-      get-tsconfig: 4.8.1
+      get-tsconfig: 4.13.0
     optionalDependencies:
       fsevents: 2.3.3
 
@@ -13529,6 +13537,13 @@ snapshots:
     optionalDependencies:
       fsevents: 2.3.3
 
+  tsx@4.21.0:
+    dependencies:
+      esbuild: 0.27.2
+      get-tsconfig: 4.13.0
+    optionalDependencies:
+      fsevents: 2.3.3
+
   tweetnacl@0.14.5: {}
 
   type-check@0.4.0:
@@ -13700,13 +13715,13 @@ snapshots:
       '@types/unist': 3.0.3
       unist-util-stringify-position: 4.0.0
 
-  vite-node@3.2.4(@types/node@22.19.3)(jiti@2.6.1)(lightningcss@1.30.2)(sass@1.77.4)(tsx@4.20.6)(yaml@2.8.2):
+  vite-node@3.2.4(@types/node@22.19.3)(jiti@2.6.1)(lightningcss@1.30.2)(sass@1.77.4)(tsx@4.21.0)(yaml@2.8.2):
     dependencies:
       cac: 6.7.14
       debug: 4.4.3
       es-module-lexer: 1.7.0
       pathe: 2.0.3
-      vite: 7.3.0(@types/node@22.19.3)(jiti@2.6.1)(lightningcss@1.30.2)(sass@1.77.4)(tsx@4.20.6)(yaml@2.8.2)
+      vite: 7.3.0(@types/node@22.19.3)(jiti@2.6.1)(lightningcss@1.30.2)(sass@1.77.4)(tsx@4.21.0)(yaml@2.8.2)
     transitivePeerDependencies:
       - '@types/node'
       - jiti
@@ -13721,18 +13736,18 @@ snapshots:
       - tsx
       - yaml
 
-  vite-tsconfig-paths@5.1.4(typescript@5.7.3)(vite@7.3.0(@types/node@22.19.3)(jiti@2.6.1)(lightningcss@1.30.2)(sass@1.77.4)(tsx@4.20.6)(yaml@2.8.2)):
+  vite-tsconfig-paths@5.1.4(typescript@5.7.3)(vite@7.3.0(@types/node@22.19.3)(jiti@2.6.1)(lightningcss@1.30.2)(sass@1.77.4)(tsx@4.21.0)(yaml@2.8.2)):
     dependencies:
       debug: 4.4.3
       globrex: 0.1.2
       tsconfck: 3.1.6(typescript@5.7.3)
     optionalDependencies:
-      vite: 7.3.0(@types/node@22.19.3)(jiti@2.6.1)(lightningcss@1.30.2)(sass@1.77.4)(tsx@4.20.6)(yaml@2.8.2)
+      vite: 7.3.0(@types/node@22.19.3)(jiti@2.6.1)(lightningcss@1.30.2)(sass@1.77.4)(tsx@4.21.0)(yaml@2.8.2)
     transitivePeerDependencies:
       - supports-color
       - typescript
 
-  vite@7.3.0(@types/node@22.19.3)(jiti@2.6.1)(lightningcss@1.30.2)(sass@1.77.4)(tsx@4.20.6)(yaml@2.8.2):
+  vite@7.3.0(@types/node@22.19.3)(jiti@2.6.1)(lightningcss@1.30.2)(sass@1.77.4)(tsx@4.21.0)(yaml@2.8.2):
     dependencies:
       esbuild: 0.27.2
       fdir: 6.5.0(picomatch@4.0.3)
@@ -13746,14 +13761,14 @@ snapshots:
       jiti: 2.6.1
       lightningcss: 1.30.2
       sass: 1.77.4
-      tsx: 4.20.6
+      tsx: 4.21.0
       yaml: 2.8.2
 
-  vitest@3.2.4(@types/debug@4.1.12)(@types/node@22.19.3)(jiti@2.6.1)(lightningcss@1.30.2)(sass@1.77.4)(tsx@4.20.6)(yaml@2.8.2):
+  vitest@3.2.4(@types/debug@4.1.12)(@types/node@22.19.3)(jiti@2.6.1)(lightningcss@1.30.2)(sass@1.77.4)(tsx@4.21.0)(yaml@2.8.2):
     dependencies:
       '@types/chai': 5.2.3
       '@vitest/expect': 3.2.4
-      '@vitest/mocker': 3.2.4(vite@7.3.0(@types/node@22.19.3)(jiti@2.6.1)(lightningcss@1.30.2)(sass@1.77.4)(tsx@4.20.6)(yaml@2.8.2))
+      '@vitest/mocker': 3.2.4(vite@7.3.0(@types/node@22.19.3)(jiti@2.6.1)(lightningcss@1.30.2)(sass@1.77.4)(tsx@4.21.0)(yaml@2.8.2))
       '@vitest/pretty-format': 3.2.4
       '@vitest/runner': 3.2.4
       '@vitest/snapshot': 3.2.4
@@ -13771,8 +13786,8 @@ snapshots:
       tinyglobby: 0.2.15
       tinypool: 1.1.1
       tinyrainbow: 2.0.0
-      vite: 7.3.0(@types/node@22.19.3)(jiti@2.6.1)(lightningcss@1.30.2)(sass@1.77.4)(tsx@4.20.6)(yaml@2.8.2)
-      vite-node: 3.2.4(@types/node@22.19.3)(jiti@2.6.1)(lightningcss@1.30.2)(sass@1.77.4)(tsx@4.20.6)(yaml@2.8.2)
+      vite: 7.3.0(@types/node@22.19.3)(jiti@2.6.1)(lightningcss@1.30.2)(sass@1.77.4)(tsx@4.21.0)(yaml@2.8.2)
+      vite-node: 3.2.4(@types/node@22.19.3)(jiti@2.6.1)(lightningcss@1.30.2)(sass@1.77.4)(tsx@4.21.0)(yaml@2.8.2)
       why-is-node-running: 2.3.0
     optionalDependencies:
       '@types/debug': 4.1.12
diff --git a/src/bin/vectorize-migrate.ts b/src/bin/vectorize-migrate.ts
new file mode 100644
index 0000000..6f17e59
--- /dev/null
+++ b/src/bin/vectorize-migrate.ts
@@ -0,0 +1,588 @@
+import type { SanitizedConfig } from 'payload'
+import { getPayload } from 'payload'
+import { readFileSync, writeFileSync, readdirSync, statSync, existsSync, rmSync } from 'fs'
+import { join, resolve } from 'path'
+import toSnakeCase from 'to-snake-case'
+
+import { getVectorizedPayload } from '../types.js'
+import type { KnowledgePoolStaticConfig } from '../types.js'
+
+/**
+ * Get prior state from existing migrations
+ */
+function getPriorStateFromMigrations(
+  migrationsDir: string,
+  poolNames: string[],
+): Map<string, { dims: number | null; ivfflatLists: number | null }> {
+  const state = new Map<string, { dims: number | null; ivfflatLists: number | null }>()
+
+  // Initialize with null (unknown state)
+  for (const poolName of poolNames) {
+    state.set(poolName, { dims: null, ivfflatLists: null })
+  }
+
+  if (!existsSync(migrationsDir)) {
+    return state
+  }
+
+  // Find all migration files and read them in reverse order (newest first)
+  // Exclude index.ts/index.js as those are not migration files
+  const migrationFiles = readdirSync(migrationsDir)
+    .filter((f) => (f.endsWith('.ts') || f.endsWith('.js')) && f !== 'index.ts' && f !== 'index.js')
+    .map((f) => ({
+      name: f,
+      path: join(migrationsDir, f),
+      mtime: statSync(join(migrationsDir, f)).mtime,
+    }))
+    .sort((a, b) => b.mtime.getTime() - a.mtime.getTime())
+
+  console.log(`[payloadcms-vectorize] Found ${migrationFiles.length} migration file(s) to scan for prior state`)
+
+  // Read migration files to find vector config
+  for (const file of migrationFiles) {
+    try {
+      const content = readFileSync(file.path, 'utf-8')
+      
+      // Extract only the UP function content to avoid matching values in DOWN function
+      // The DOWN function contains previous/rollback values which we don't want
+      const upFunctionMatch = content.match(
+        /export\s+async\s+function\s+up\s*\([^)]*\)[^{]*\{([\s\S]*?)(?=\}\s*(?:export\s+async\s+function\s+down|$))/i,
+      )
+      const upContent = upFunctionMatch ? upFunctionMatch[1] : content
+
+      // Look for IVFFLAT index creation with lists parameter
+      for (const poolName of poolNames) {
+        const tableName = toSnakeCase(poolName)
+        const indexName = `${tableName}_embedding_ivfflat`
+
+        // Check if this migration creates the index (only in UP function)
+        // The code format is: await db.execute(sql.raw(`CREATE INDEX "indexName" ... WITH (lists = 10)`))
+        // We need to match the lists parameter in the template literal
+        // Use non-greedy .*? to match the FIRST occurrence
+        const indexMatch =
+          // Match: db.execute(sql.raw(`...CREATE INDEX..."indexName"...WITH (lists = 10)...`))
+          upContent.match(
+            new RegExp(
+              `db\\.execute\\(sql\\.raw.*?CREATE INDEX.*?"${indexName}".*?WITH\\s*\\(lists\\s*=\\s*(\\d+)\\)`,
+              'is',
+            ),
+          ) ||
+          // Match: CREATE INDEX "indexName" ... WITH (lists = 10) (in any context)
+          upContent.match(
+            new RegExp(`CREATE INDEX.*?"${indexName}".*?WITH\\s*\\(lists\\s*=\\s*(\\d+)\\)`, 'is'),
+          ) ||
+          // Match: lists = <number> near ivfflat (non-greedy)
+          upContent.match(new RegExp(`ivfflat.*?lists\\s*=\\s*(\\d+)`, 'is'))
+        
+        if (indexMatch && !state.get(poolName)?.ivfflatLists) {
+          const lists = parseInt(indexMatch[1], 10)
+          const current = state.get(poolName) || { dims: null, ivfflatLists: null }
+          state.set(poolName, { ...current, ivfflatLists: lists })
+          console.log(
+            `[payloadcms-vectorize] Found prior ivfflatLists=${lists} for pool "${poolName}" in ${file.name}`,
+          )
+        } else if (!state.get(poolName)?.ivfflatLists) {
+          // Debug: log if we didn't find it
+          console.log(
+            `[payloadcms-vectorize] No ivfflatLists found for pool "${poolName}" in ${file.name}`,
+          )
+        }
+
+        // Check for dims in vector column definition (search full content as dims should be consistent)
+        const dimsMatch = content.match(new RegExp(`vector\\((\\d+)\\)`, 'i'))
+        if (dimsMatch && !state.get(poolName)?.dims) {
+          const dims = parseInt(dimsMatch[1], 10)
+          const current = state.get(poolName) || { dims: null, ivfflatLists: null }
+          state.set(poolName, { ...current, dims })
+          console.log(
+            `[payloadcms-vectorize] Found prior dims=${dims} for pool "${poolName}" in ${file.name}`,
+          )
+        }
+      }
+    } catch (err) {
+      // Skip files that can't be read
+      continue
+    }
+  }
+
+  return state
+}
+
+/**
+ * Generate SQL code for IVFFLAT index rebuild
+ */
+function generateIvfflatRebuildCode(
+  tableName: string,
+  schemaName: string,
+  ivfflatLists: number,
+): string {
+  const indexName = `${tableName}_embedding_ivfflat`
+  return `  await db.execute(sql.raw(\`DROP INDEX IF EXISTS "${schemaName}"."${indexName}"\`));
+  await db.execute(sql.raw(\`CREATE INDEX "${indexName}" ON "${schemaName}"."${tableName}" USING ivfflat (embedding vector_cosine_ops) WITH (lists = ${ivfflatLists})\`));`
+}
+
+/**
+ * Generate SQL code for column type change
+ */
+function generateColumnTypeChangeCode(
+  tableName: string,
+  schemaName: string,
+  newDims: number,
+): string {
+  return `  // Change column type to new dimensions
+  await db.execute(sql.raw(\`ALTER TABLE "${schemaName}"."${tableName}" ALTER COLUMN embedding TYPE vector(${newDims})\`));`
+}
+
+/**
+ * Generate SQL code for destructive dims change
+ */
+function generateDimsChangeCode(
+  tableName: string,
+  schemaName: string,
+  newDims: number,
+  newIvfflatLists: number,
+): string {
+  const indexName = `${tableName}_embedding_ivfflat`
+  return `  // WARNING: Changing vector dimensions is destructive and requires re-embedding
+  // Step 1: Drop existing index
+  await db.execute(sql.raw(\`DROP INDEX IF EXISTS "${schemaName}"."${indexName}"\`));
+  // Step 2: Change column type (Payload migration may also generate this, but explicit is safer)
+  await db.execute(sql.raw(\`ALTER TABLE "${schemaName}"."${tableName}" ALTER COLUMN embedding TYPE vector(${newDims})\`));
+  // Step 3: Truncate table (destructive - all embeddings are lost)
+  // Use CASCADE to handle foreign key constraints
+  await db.execute(sql.raw(\`TRUNCATE TABLE "${schemaName}"."${tableName}" CASCADE\`));
+  // Step 4: Recreate index with new parameters
+  await db.execute(sql.raw(\`CREATE INDEX "${indexName}" ON "${schemaName}"."${tableName}" USING ivfflat (embedding vector_cosine_ops) WITH (lists = ${newIvfflatLists})\`));`
+}
+
+/**
+ * Patch a migration file with vector-specific SQL
+ */
+function patchMigrationFile(
+  migrationPath: string,
+  staticConfigs: Record<string, KnowledgePoolStaticConfig>,
+  schemaName: string,
+  priorState: Map<string, { dims: number | null; ivfflatLists: number | null }>,
+): void {
+  console.log(`[vectorize-migrate] Reading migration file: ${migrationPath}`)
+  const content = readFileSync(migrationPath, 'utf-8')
+  console.log(`[vectorize-migrate] File read successfully, length: ${content.length} characters`)
+
+  // Generate SQL code for each pool
+  const vectorUpCode: string[] = []
+  const vectorDownCode: string[] = []
+
+  for (const [poolName, config] of Object.entries(staticConfigs)) {
+    const tableName = toSnakeCase(poolName)
+    const priorConfig = priorState.get(poolName) || { dims: null, ivfflatLists: null }
+    const dimsChanged = priorConfig.dims !== null && priorConfig.dims !== config.dims
+    const ivfflatListsChanged =
+      priorConfig.ivfflatLists !== null && priorConfig.ivfflatLists !== config.ivfflatLists
+
+    // Check if dims changed (destructive) - handle this first as it includes index operations
+    if (dimsChanged) {
+      vectorUpCode.push(
+        `  // payloadcms-vectorize: WARNING - Changing dims from ${priorConfig.dims} to ${config.dims} is destructive`,
+      )
+      // When dims changes, we need to:
+      // 1. Drop existing index first
+      // 2. Change column type (Payload migration may also generate this)
+      // 3. Truncate table (destructive)
+      // 4. Recreate index with new ivfflatLists
+      vectorUpCode.push(
+        generateDimsChangeCode(tableName, schemaName, config.dims, config.ivfflatLists),
+      )
+      // Down migration: restore to previous state (but can't restore data)
+      vectorDownCode.push(
+        `  // payloadcms-vectorize: Revert dims change (WARNING: data was truncated and cannot be restored)`,
+      )
+      // Restore previous column type and index
+      vectorDownCode.push(
+        generateColumnTypeChangeCode(tableName, schemaName, priorConfig.dims || config.dims),
+      )
+      vectorDownCode.push(
+        generateIvfflatRebuildCode(
+          tableName,
+          schemaName,
+          priorConfig.ivfflatLists || config.ivfflatLists,
+        ),
+      )
+      vectorDownCode.push(`  // WARNING: Original data cannot be restored`)
+    } else if (ivfflatListsChanged) {
+      // Check if ivfflatLists changed (only if dims didn't change, since dims change handles index)
+      vectorUpCode.push(
+        `  // payloadcms-vectorize: Rebuild IVFFLAT index for ${poolName} with lists=${config.ivfflatLists}`,
+      )
+      vectorUpCode.push(generateIvfflatRebuildCode(tableName, schemaName, config.ivfflatLists))
+      // Down migration: rebuild with old lists
+      vectorDownCode.push(
+        `  // payloadcms-vectorize: Revert IVFFLAT index for ${poolName} to lists=${priorConfig.ivfflatLists}`,
+      )
+      vectorDownCode.push(
+        generateIvfflatRebuildCode(
+          tableName,
+          schemaName,
+          priorConfig.ivfflatLists || config.ivfflatLists,
+        ),
+      )
+    }
+
+    // If this is the first migration, ensure index exists
+    // Note: Column is handled by Drizzle schema via afterSchemaInit
+    // We only check ivfflatLists because dims will always be found from Drizzle schema
+    if (priorConfig.ivfflatLists === null) {
+      vectorUpCode.push(`  // payloadcms-vectorize: Initial IVFFLAT index setup for ${poolName}`)
+      vectorUpCode.push(
+        `  // Note: Embedding column is created via Drizzle schema (afterSchemaInit hook)`,
+      )
+      vectorUpCode.push(generateIvfflatRebuildCode(tableName, schemaName, config.ivfflatLists))
+      vectorDownCode.push(`  // payloadcms-vectorize: Drop index on rollback`)
+      const indexName = `${tableName}_embedding_ivfflat`
+      vectorDownCode.push(
+        `  await db.execute(sql.raw(\`DROP INDEX IF EXISTS "${schemaName}"."${indexName}"\`));`,
+      )
+    }
+  }
+
+  if (vectorUpCode.length === 0) {
+    // No changes needed
+    return
+  }
+
+  // Find the up function and insert code before the closing brace
+  const upFunctionMatch = content.match(
+    /export\s+async\s+function\s+up\s*\([^)]*\)\s*:\s*Promise<void>\s*\{/i,
+  )
+  if (!upFunctionMatch) {
+    console.error(
+      `[vectorize-migrate] Could not find 'up' function in migration file: ${migrationPath}`,
+    )
+    console.error(`[vectorize-migrate] File content length: ${content.length} characters`)
+    console.error(`[vectorize-migrate] File content (first 1000 chars):`)
+    console.error(content.substring(0, 1000))
+    console.error(`[vectorize-migrate] File content (last 1000 chars):`)
+    console.error(content.substring(Math.max(0, content.length - 1000)))
+    console.error(
+      `[vectorize-migrate] Searching for pattern: /export\\s+async\\s+function\\s+up\\s*\\([^)]*\\)\\s*:\\s*Promise<void>\\s*\\{/i`,
+    )
+    throw new Error(`Could not find 'up' function in migration file: ${migrationPath}`)
+  }
+
+  const upFunctionStart = upFunctionMatch.index! + upFunctionMatch[0].length
+  const downFunctionMatch = content.match(/export\s+async\s+function\s+down\s*\([^)]*\)/i)
+  const searchEnd = downFunctionMatch ? downFunctionMatch.index! : content.length
+
+  // Find the last closing brace before down function or end
+  const upFunctionBody = content.substring(upFunctionStart, searchEnd)
+  const lastBraceIndex = upFunctionBody.lastIndexOf('}')
+  console.log(`[vectorize-migrate] up function body length: ${upFunctionBody.length}`)
+  console.log(`[vectorize-migrate] lastBraceIndex in body: ${lastBraceIndex}`)
+  console.log(`[vectorize-migrate] up function body ends with: ${upFunctionBody.substring(Math.max(0, upFunctionBody.length - 200))}`)
+  if (lastBraceIndex === -1) {
+    throw new Error(
+      `Could not find closing brace for 'up' function in migration file: ${migrationPath}`,
+    )
+  }
+
+  // Insert our code before the closing brace
+  const beforeBrace = content.substring(0, upFunctionStart + lastBraceIndex)
+  const afterBrace = content.substring(upFunctionStart + lastBraceIndex)
+  console.log(`[vectorize-migrate] Insertion point: beforeBrace ends with: ${beforeBrace.substring(Math.max(0, beforeBrace.length - 100))}`)
+  console.log(`[vectorize-migrate] Insertion point: afterBrace starts with: ${afterBrace.substring(0, 100)}`)
+
+  const codeToInsert = '\n' + vectorUpCode.join('\n') + '\n'
+  console.log(`[vectorize-migrate] Inserting ${vectorUpCode.length} line(s) of code into migration`)
+  console.log(`[vectorize-migrate] Code to insert:\n${codeToInsert}`)
+  let newContent = beforeBrace + codeToInsert + afterBrace
+  console.log(`[vectorize-migrate] Migration file will be ${newContent.length} characters after patching (was ${content.length})`)
+  
+  // Verify insertion point looks correct
+  const insertionPointPreview = newContent.substring(
+    Math.max(0, beforeBrace.length - 50),
+    Math.min(newContent.length, beforeBrace.length + codeToInsert.length + 50),
+  )
+  console.log(`[vectorize-migrate] Insertion point preview:\n${insertionPointPreview}`)
+
+  // Handle down function
+  if (downFunctionMatch) {
+    const downFunctionStart = downFunctionMatch.index! + downFunctionMatch[0].length
+    const downBraceMatch = newContent.substring(downFunctionStart).match(/\{/)
+    if (downBraceMatch) {
+      const downBodyStart = downFunctionStart + downBraceMatch.index! + 1
+      const downBody = newContent.substring(downBodyStart)
+      const downLastBraceIndex = downBody.lastIndexOf('}')
+      if (downLastBraceIndex !== -1) {
+        const beforeDownBrace = newContent.substring(0, downBodyStart + downLastBraceIndex)
+        const afterDownBrace = newContent.substring(downBodyStart + downLastBraceIndex)
+        const downCodeToInsert = '\n' + vectorDownCode.join('\n') + '\n'
+        newContent = beforeDownBrace + downCodeToInsert + afterDownBrace
+      }
+    }
+  } else if (vectorDownCode.length > 0) {
+    // Add down function if it doesn't exist
+    const lastFileBrace = newContent.lastIndexOf('}')
+    if (lastFileBrace !== -1) {
+      const beforeLastBrace = newContent.substring(0, lastFileBrace)
+      const afterLastBrace = newContent.substring(lastFileBrace)
+      const downFunctionCode = `\n\nexport async function down({ payload, req }: { payload: any; req: any }): Promise<void> {\n${vectorDownCode.join('\n')}\n}`
+      newContent = beforeLastBrace + downFunctionCode + afterLastBrace
+    }
+  }
+
+  writeFileSync(migrationPath, newContent, 'utf-8')
+  console.log(`[vectorize-migrate] Migration file written successfully`)
+  // Verify the code was inserted
+  const verifyContent = readFileSync(migrationPath, 'utf-8')
+  const hasIvfflatCode = verifyContent.includes('ivfflat') && verifyContent.includes('lists =')
+  console.log(`[vectorize-migrate] Verification: migration contains IVFFLAT code: ${hasIvfflatCode}`)
+  if (!hasIvfflatCode && vectorUpCode.length > 0) {
+    console.error(`[vectorize-migrate] WARNING: IVFFLAT code was supposed to be inserted but not found in file!`)
+    console.error(`[vectorize-migrate] Expected to find: ${vectorUpCode.join(' | ')}`)
+  }
+}
+
+/**
+ * Bin script entry point for creating vector migrations
+ */
+export const script = async (config: SanitizedConfig): Promise<void> => {
+  // Disable onInit to avoid ensurePgvectorArtifacts check - migrations may not be applied yet
+  const payload = await getPayload({
+    config,
+    disableOnInit: true,
+    key: `vectorize-migrate-payload-instance-${Date.now()}`,
+  })
+  const vectorizedPayload = getVectorizedPayload(payload)
+
+  if (!vectorizedPayload) {
+    throw new Error(
+      '[payloadcms-vectorize] Vectorize plugin not found. Ensure payloadcmsVectorize is configured in your Payload config.',
+    )
+  }
+
+  const staticConfigs = vectorizedPayload._staticConfigs
+  if (!staticConfigs || Object.keys(staticConfigs).length === 0) {
+    throw new Error('[payloadcms-vectorize] No static configs found')
+  }
+
+  const poolNames = Object.keys(staticConfigs)
+  const schemaName = (payload.db as any).schemaName || 'public'
+  const migrationsDir = (payload.db as any).migrationDir || resolve(process.cwd(), 'src/migrations')
+
+  console.log('[payloadcms-vectorize] Checking for configuration changes...')
+
+  // Get prior state from migrations
+  const priorState = getPriorStateFromMigrations(migrationsDir, poolNames)
+  
+  // Debug: log prior state
+  console.log('[payloadcms-vectorize] Prior state from migrations:')
+  for (const [poolName, state] of priorState.entries()) {
+    console.log(`[payloadcms-vectorize]   ${poolName}: dims=${state.dims}, ivfflatLists=${state.ivfflatLists}`)
+  }
+  console.log('[payloadcms-vectorize] Current static configs:')
+  for (const [poolName, config] of Object.entries(staticConfigs)) {
+    console.log(`[payloadcms-vectorize]   ${poolName}: dims=${config.dims}, ivfflatLists=${config.ivfflatLists}`)
+  }
+
+  // Check if any changes are needed
+  let hasChanges = false
+  let isFirstMigration = false
+  for (const [poolName, currentConfig] of Object.entries(staticConfigs)) {
+    const prior = priorState.get(poolName) || { dims: null, ivfflatLists: null }
+    
+    // Check if this is the first migration (no IVFFLAT index exists yet)
+    // Note: dims might be found from Drizzle schema, but ivfflatLists won't be found until we create the index
+    if (prior.ivfflatLists === null) {
+      isFirstMigration = true
+      hasChanges = true
+      console.log(
+        `[payloadcms-vectorize] First migration detected for pool "${poolName}" (ivfflatLists not found in prior migrations)`,
+      )
+      break
+    }
+    
+    // Check for actual changes
+    if (
+      prior.dims !== null && prior.dims !== currentConfig.dims ||
+      (prior.ivfflatLists !== null && prior.ivfflatLists !== currentConfig.ivfflatLists)
+    ) {
+      hasChanges = true
+      console.log(
+        `[payloadcms-vectorize] Change detected for pool "${poolName}": dims ${prior.dims}→${currentConfig.dims}, ivfflatLists ${prior.ivfflatLists}→${currentConfig.ivfflatLists}`,
+      )
+      break
+    }
+  }
+
+  // If no changes detected, check if artifacts exist (idempotency)
+  if (!hasChanges) {
+    console.log('[payloadcms-vectorize] No configuration changes detected.')
+    console.log(
+      '[payloadcms-vectorize] If this is the first migration, ensure your initial migration creates the embedding columns via Drizzle schema.',
+    )
+    return
+  }
+
+  console.log('[payloadcms-vectorize] Changes detected.')
+  
+  // Determine if there are actual schema changes (dims change) or just index parameter changes (ivfflatLists)
+  // payload.db.createMigration only works when there are schema changes
+  // For index-only changes, we need to create the migration file manually
+  let hasSchemaChanges = false
+  for (const [poolName, currentConfig] of Object.entries(staticConfigs)) {
+    const prior = priorState.get(poolName) || { dims: null, ivfflatLists: null }
+    if (prior.dims !== null && prior.dims !== currentConfig.dims) {
+      hasSchemaChanges = true
+      console.log(`[payloadcms-vectorize] Schema change detected for pool "${poolName}": dims ${prior.dims}→${currentConfig.dims}`)
+      break
+    }
+  }
+  
+  if (isFirstMigration) {
+    console.log('[payloadcms-vectorize] This is the first migration - checking if we should patch existing migration or create new one')
+    
+    // Check if there's a very recent migration file (created in last 10 seconds) that we should patch
+    const recentMigrations = existsSync(migrationsDir)
+      ? readdirSync(migrationsDir)
+          .filter(
+            (f) => (f.endsWith('.ts') || f.endsWith('.js')) && f !== 'index.ts' && f !== 'index.js',
+          )
+          .map((f) => ({
+            name: f,
+            path: join(migrationsDir, f),
+            mtime: statSync(join(migrationsDir, f)).mtime,
+          }))
+          .filter((m) => Date.now() - m.mtime.getTime() < 10000) // Created in last 10 seconds
+          .sort((a, b) => b.mtime.getTime() - a.mtime.getTime())
+      : []
+    
+    if (recentMigrations.length > 0) {
+      const recentMigration = recentMigrations[0]
+      console.log(`[payloadcms-vectorize] Found recent migration to patch: ${recentMigration.name}`)
+      // Check if it already has IVFFLAT index code
+      const recentContent = readFileSync(recentMigration.path, 'utf-8')
+      const hasIvfflatCode = recentContent.includes('ivfflat') && (recentContent.includes('drizzle.execute') || recentContent.includes('CREATE INDEX'))
+      
+      if (!hasIvfflatCode) {
+        console.log(`[payloadcms-vectorize] Patching existing migration: ${recentMigration.path}`)
+        patchMigrationFile(recentMigration.path, staticConfigs, schemaName, priorState)
+        console.log('[payloadcms-vectorize] Migration patched successfully!')
+        return
+      } else {
+        console.log(`[payloadcms-vectorize] Recent migration already has IVFFLAT code, creating new migration instead`)
+      }
+    }
+    
+    console.log('[payloadcms-vectorize] Creating new migration with IVFFLAT index setup')
+  } else {
+    console.log('[payloadcms-vectorize] Creating new migration for configuration change')
+  }
+
+  // Create migration using Payload's API OR create manually for index-only changes
+  // Note: createMigration may not return the path, so we'll find the newest migration file after creation
+  const migrationsBefore = existsSync(migrationsDir)
+    ? readdirSync(migrationsDir)
+        .filter(
+          (f) => (f.endsWith('.ts') || f.endsWith('.js')) && f !== 'index.ts' && f !== 'index.js',
+        )
+        .map((f) => ({
+          name: f,
+          path: join(migrationsDir, f),
+          mtime: statSync(join(migrationsDir, f)).mtime,
+        }))
+        .sort((a, b) => b.mtime.getTime() - a.mtime.getTime())
+    : []
+
+  let migrationPath: string
+
+  // If there are schema changes (dims changed), use Payload's createMigration
+  // Otherwise (only ivfflatLists changed), create the migration file manually
+  // because Payload's createMigration hangs when there are no schema changes to detect
+  if (hasSchemaChanges) {
+    console.log('[payloadcms-vectorize] Schema changes detected - using payload.db.createMigration...')
+    try {
+      await payload.db.createMigration({
+        migrationName: 'vectorize-config',
+        payload,
+      })
+      console.log('[payloadcms-vectorize] Migration created successfully')
+    } catch (error) {
+      console.error('[payloadcms-vectorize] Error creating migration:', error)
+      throw error
+    }
+
+    // Find the newest migration file (should be the one just created)
+    const migrationsAfter = existsSync(migrationsDir)
+      ? readdirSync(migrationsDir)
+          .filter(
+            (f) => (f.endsWith('.ts') || f.endsWith('.js')) && f !== 'index.ts' && f !== 'index.js',
+          )
+          .map((f) => ({
+            name: f,
+            path: join(migrationsDir, f),
+            mtime: statSync(join(migrationsDir, f)).mtime,
+          }))
+          .sort((a, b) => b.mtime.getTime() - a.mtime.getTime())
+      : []
+
+    // Find the migration that was just created (newest that wasn't there before)
+    const beforePaths = new Set(migrationsBefore.map((m) => m.path))
+    const newMigrations = migrationsAfter.filter((m) => !beforePaths.has(m.path))
+    const foundPath = newMigrations.length > 0 ? newMigrations[0].path : migrationsAfter[0]?.path
+
+    if (!foundPath) {
+      throw new Error(
+        '[payloadcms-vectorize] Failed to create migration file - no new migration found.',
+      )
+    }
+    migrationPath = foundPath
+  } else {
+    // No schema changes (only ivfflatLists changed) - create migration file manually
+    // Payload's createMigration API doesn't support this case (it hangs when no schema changes detected)
+    console.log('[payloadcms-vectorize] No schema changes (only index parameter changes) - creating migration file manually...')
+    
+    // Generate timestamp for migration filename (format: YYYYMMDD_HHMMSS)
+    const now = new Date()
+    const timestamp = [
+      now.getFullYear(),
+      String(now.getMonth() + 1).padStart(2, '0'),
+      String(now.getDate()).padStart(2, '0'),
+      '_',
+      String(now.getHours()).padStart(2, '0'),
+      String(now.getMinutes()).padStart(2, '0'),
+      String(now.getSeconds()).padStart(2, '0'),
+    ].join('')
+    
+    const migrationFileName = `${timestamp}_vectorize_ivfflat_rebuild.ts`
+    migrationPath = join(migrationsDir, migrationFileName)
+    
+    // Create a minimal migration file that we'll patch with our IVFFLAT code
+    const migrationTemplate = `import { MigrateUpArgs, MigrateDownArgs, sql } from '@payloadcms/db-postgres'
+
+export async function up({ db, payload, req }: MigrateUpArgs): Promise<void> {
+  // Index parameter changes only - no schema changes
+}
+
+export async function down({ db, payload, req }: MigrateDownArgs): Promise<void> {
+  // Revert index parameter changes
+}
+`
+    
+    writeFileSync(migrationPath, migrationTemplate, 'utf-8')
+    console.log(`[payloadcms-vectorize] Created migration file: ${migrationPath}`)
+  }
+
+  console.log(`[payloadcms-vectorize] Patching migration: ${migrationPath}`)
+
+  // Patch the migration file
+  patchMigrationFile(migrationPath, staticConfigs, schemaName, priorState)
+
+  console.log('[payloadcms-vectorize] Migration created and patched successfully!')
+  console.log(
+    '[payloadcms-vectorize] Review the migration file and apply it with: pnpm payload migrate',
+  )
+
+  // Only exit if not in test environment (when called from tests, just return)
+  if (process.env.NODE_ENV !== 'test' && !process.env.VITEST) {
+    process.exit(0)
+  }
+}
diff --git a/src/endpoints/vectorSearch.ts b/src/endpoints/vectorSearch.ts
index 274c618..8634eeb 100644
--- a/src/endpoints/vectorSearch.ts
+++ b/src/endpoints/vectorSearch.ts
@@ -100,6 +100,7 @@ async function performCosineSearch(
     throw new Error('Only works with Postgres')
   }
 
+  payload.db.createMigration
   // In PayloadCMS, payload.db IS the adapter, and drizzle is at payload.db.drizzle
   const adapter = payload.db
   if (!adapter) {
diff --git a/src/index.ts b/src/index.ts
index 60d7497..834e393 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -1,6 +1,8 @@
 import type { Config, Payload, PayloadRequest } from 'payload'
 import { customType } from '@payloadcms/db-postgres/drizzle/pg-core'
 import toSnakeCase from 'to-snake-case'
+import { fileURLToPath } from 'url'
+import { dirname, resolve } from 'path'
 
 import { createEmbeddingsCollection } from './collections/embeddings.js'
 import type {
@@ -76,13 +78,21 @@ export type {
 
 export { getVectorizedPayload } from './types.js'
 
+/**
+ * Presence-only safety net: checks that pgvector artifacts exist.
+ * Does NOT create or modify them - migrations should handle that.
+ * This is a runtime check to fail fast if migrations haven't been applied.
+ */
 async function ensurePgvectorArtifacts(args: {
   payload: Payload
   tableName: string
-  dims: number
   ivfflatLists: number
 }): Promise<void> {
-  const { payload, tableName, dims, ivfflatLists } = args
+  const { payload, tableName } = args
+
+  payload.logger.info(
+    `[payloadcms-vectorize] ensurePgvectorArtifacts: Starting verification for table "${tableName}"`,
+  )
 
   if (!isPostgresPayload(payload)) {
     throw new Error(
@@ -94,29 +104,102 @@ async function ensurePgvectorArtifacts(args: {
   const postgresPayload = payload as PostgresPayload
   const schemaName = postgresPayload.db.schemaName || 'public'
 
-  const sqls: string[] = [
-    `CREATE EXTENSION IF NOT EXISTS vector;`,
-    `ALTER TABLE "${schemaName}"."${tableName}" ADD COLUMN IF NOT EXISTS embedding vector(${dims});`,
-    `CREATE INDEX IF NOT EXISTS ${tableName}_embedding_ivfflat ON "${schemaName}"."${tableName}" USING ivfflat (embedding vector_cosine_ops) WITH (lists = ${ivfflatLists});`,
-  ]
+  payload.logger.info(
+    `[payloadcms-vectorize] ensurePgvectorArtifacts: Using schema "${schemaName}" for table "${tableName}"`,
+  )
 
-  try {
+  const runQuery = async (sql: string, params?: any[]): Promise<any> => {
+    payload.logger.debug(`[payloadcms-vectorize] ensurePgvectorArtifacts: Executing query: ${sql}`)
     if (postgresPayload.db.pool?.query) {
-      for (const sql of sqls) {
-        await postgresPayload.db.pool.query(sql)
-      }
-    } else if (postgresPayload.db.drizzle?.execute) {
-      for (const sql of sqls) {
-        await postgresPayload.db.drizzle.execute(sql)
-      }
+      return postgresPayload.db.pool.query(sql, params)
     }
-    postgresPayload.logger.info('[payloadcms-vectorize] pgvector extension/columns/index ensured')
+    if (postgresPayload.db.drizzle?.execute) {
+      return postgresPayload.db.drizzle.execute(sql)
+    }
+    throw new Error('[payloadcms-vectorize] No database query function available')
+  }
+
+  try {
+    // Check extension exists
+    payload.logger.info(
+      '[payloadcms-vectorize] ensurePgvectorArtifacts: Checking pgvector extension...',
+    )
+    const extensionCheck = await runQuery(`SELECT 1 FROM pg_extension WHERE extname = 'vector'`)
+    const extensionRows = Array.isArray(extensionCheck)
+      ? extensionCheck
+      : extensionCheck?.rows || []
+    if (extensionRows.length === 0) {
+      payload.logger.error(
+        '[payloadcms-vectorize] ensurePgvectorArtifacts: pgvector extension not found',
+      )
+      throw new Error(
+        `[payloadcms-vectorize] pgvector extension not found. Please ensure migrations have been applied or manually create the extension: CREATE EXTENSION IF NOT EXISTS vector;`,
+      )
+    }
+    payload.logger.info('[payloadcms-vectorize] ensurePgvectorArtifacts: pgvector extension found')
+
+    // Check column exists with correct dims
+    payload.logger.info(
+      `[payloadcms-vectorize] ensurePgvectorArtifacts: Checking embedding column in "${schemaName}"."${tableName}"...`,
+    )
+    const columnCheck = await runQuery(
+      `SELECT column_name, udt_name 
+       FROM information_schema.columns 
+       WHERE table_schema = $1 AND table_name = $2 AND column_name = 'embedding'`,
+      [schemaName, tableName],
+    )
+    const columnRows = Array.isArray(columnCheck) ? columnCheck : columnCheck?.rows || []
+    if (columnRows.length === 0) {
+      payload.logger.error(
+        `[payloadcms-vectorize] ensurePgvectorArtifacts: Embedding column not found in "${schemaName}"."${tableName}"`,
+      )
+      throw new Error(
+        `[payloadcms-vectorize] Embedding column not found in table "${schemaName}"."${tableName}". Please ensure migrations have been applied.`,
+      )
+    }
+    payload.logger.info(
+      `[payloadcms-vectorize] ensurePgvectorArtifacts: Embedding column found (type: ${columnRows[0]?.udt_name || 'unknown'})`,
+    )
+
+    // Check index exists (don't verify lists parameter - migrations handle that)
+    const indexName = `${tableName}_embedding_ivfflat`
+    payload.logger.info(
+      `[payloadcms-vectorize] ensurePgvectorArtifacts: Checking IVFFLAT index "${indexName}"...`,
+    )
+    const indexCheck = await runQuery(
+      `SELECT 1 
+       FROM pg_indexes 
+       WHERE schemaname = $1 AND tablename = $2 AND indexname = $3`,
+      [schemaName, tableName, indexName],
+    )
+    const indexRows = Array.isArray(indexCheck) ? indexCheck : indexCheck?.rows || []
+    if (indexRows.length === 0) {
+      payload.logger.error(
+        `[payloadcms-vectorize] ensurePgvectorArtifacts: IVFFLAT index "${indexName}" not found on "${schemaName}"."${tableName}"`,
+      )
+      throw new Error(
+        `[payloadcms-vectorize] IVFFLAT index not found on table "${schemaName}"."${tableName}". Please ensure migrations have been applied.`,
+      )
+    }
+    payload.logger.info(
+      `[payloadcms-vectorize] ensurePgvectorArtifacts: IVFFLAT index "${indexName}" found`,
+    )
+
+    postgresPayload.logger.info(
+      `[payloadcms-vectorize] pgvector artifacts verified for table "${schemaName}"."${tableName}"`,
+    )
   } catch (err) {
+    payload.logger.error(
+      `[payloadcms-vectorize] ensurePgvectorArtifacts: Error occurred: ${err instanceof Error ? err.message : String(err)}`,
+    )
+    if (err instanceof Error && err.message.includes('[payloadcms-vectorize]')) {
+      throw err
+    }
     postgresPayload.logger.error(
-      '[payloadcms-vectorize] Failed ensuring pgvector artifacts',
+      '[payloadcms-vectorize] Failed checking pgvector artifacts',
       err as Error,
     )
-    throw new Error(`[payloadcms-vectorize] Failed ensuring pgvector artifacts: ${err}`)
+    throw new Error(`[payloadcms-vectorize] Failed checking pgvector artifacts: ${err}`)
   }
 }
 
@@ -177,28 +260,64 @@ export const createVectorizeIntegration = <TPoolNames extends KnowledgePoolName>
   const payloadcmsVectorize =
     (pluginOptions: PayloadcmsVectorizeConfig<TPoolNames>) =>
     (config: Config): Config => {
+      console.log('[payloadcms-vectorize] payloadcmsVectorize: Plugin initialization started')
+      console.log(
+        `[payloadcms-vectorize] payloadcmsVectorize: Processing ${Object.keys(pluginOptions.knowledgePools).length} knowledge pool(s)`,
+      )
+
       // Ensure collections array exists
       config.collections = [...(config.collections || [])]
+      console.log(
+        `[payloadcms-vectorize] payloadcmsVectorize: Initial collections count: ${config.collections.length}`,
+      )
 
       // Ensure bulk runs collection exists once
+      console.log('[payloadcms-vectorize] payloadcmsVectorize: Adding bulk runs collection...')
       const bulkRunsCollection = createBulkEmbeddingsRunsCollection()
       if (!config.collections.find((c) => c.slug === BULK_EMBEDDINGS_RUNS_SLUG)) {
         config.collections.push(bulkRunsCollection)
+        console.log('[payloadcms-vectorize] payloadcmsVectorize: Bulk runs collection added')
+      } else {
+        console.log(
+          '[payloadcms-vectorize] payloadcmsVectorize: Bulk runs collection already exists',
+        )
       }
       // Ensure bulk input metadata collection exists once
+      console.log(
+        '[payloadcms-vectorize] payloadcmsVectorize: Adding bulk input metadata collection...',
+      )
       const bulkInputMetadataCollection = createBulkEmbeddingInputMetadataCollection()
       if (!config.collections.find((c) => c.slug === BULK_EMBEDDINGS_INPUT_METADATA_SLUG)) {
         config.collections.push(bulkInputMetadataCollection)
+        console.log(
+          '[payloadcms-vectorize] payloadcmsVectorize: Bulk input metadata collection added',
+        )
+      } else {
+        console.log(
+          '[payloadcms-vectorize] payloadcmsVectorize: Bulk input metadata collection already exists',
+        )
       }
       // Ensure bulk batches collection exists once
+      console.log('[payloadcms-vectorize] payloadcmsVectorize: Adding bulk batches collection...')
       const bulkBatchesCollection = createBulkEmbeddingsBatchesCollection()
       if (!config.collections.find((c) => c.slug === BULK_EMBEDDINGS_BATCHES_SLUG)) {
         config.collections.push(bulkBatchesCollection)
+        console.log('[payloadcms-vectorize] payloadcmsVectorize: Bulk batches collection added')
+      } else {
+        console.log(
+          '[payloadcms-vectorize] payloadcmsVectorize: Bulk batches collection already exists',
+        )
       }
 
       // Validate static/dynamic configs share the same pool names
+      console.log(
+        '[payloadcms-vectorize] payloadcmsVectorize: Validating static/dynamic config alignment...',
+      )
       for (const poolName in pluginOptions.knowledgePools) {
         if (!staticConfigs[poolName]) {
+          console.error(
+            `[payloadcms-vectorize] payloadcmsVectorize: Knowledge pool "${poolName}" not found in static configs`,
+          )
           throw new Error(
             `[payloadcms-vectorize] Knowledge pool "${poolName}" not found in static configs`,
           )
@@ -212,10 +331,16 @@ export const createVectorizeIntegration = <TPoolNames extends KnowledgePoolName>
         }
       }
       if (unusedStaticPools.length > 0) {
+        console.error(
+          `[payloadcms-vectorize] payloadcmsVectorize: Static pools without dynamic config: ${unusedStaticPools.join(', ')}`,
+        )
         throw new Error(
           `[payloadcms-vectorize] Static knowledge pool(s) ${unusedStaticPools.join(', ')} lack dynamic configuration`,
         )
       }
+      console.log(
+        '[payloadcms-vectorize] payloadcmsVectorize: Static/dynamic config validation passed',
+      )
 
       // Build reverse mapping: collectionSlug -> KnowledgePoolName[]
       const collectionToPools = new Map<
@@ -227,68 +352,124 @@ export const createVectorizeIntegration = <TPoolNames extends KnowledgePoolName>
       >()
 
       // Process each knowledge pool
+      console.log('[payloadcms-vectorize] payloadcmsVectorize: Processing knowledge pools...')
       for (const poolName in pluginOptions.knowledgePools) {
+        console.log(`[payloadcms-vectorize] payloadcmsVectorize: Processing pool "${poolName}"...`)
         const dynamicConfig = pluginOptions.knowledgePools[poolName]
 
         // Add the embeddings collection for this knowledge pool with extensionFields
+        console.log(
+          `[payloadcms-vectorize] payloadcmsVectorize: Creating embeddings collection for pool "${poolName}"...`,
+        )
         const embeddingsCollection = createEmbeddingsCollection(
           poolName,
           dynamicConfig.extensionFields,
         )
         if (!config.collections.find((c) => c.slug === poolName)) {
           config.collections.push(embeddingsCollection)
+          console.log(
+            `[payloadcms-vectorize] payloadcmsVectorize: Embeddings collection "${poolName}" added`,
+          )
+        } else {
+          console.log(
+            `[payloadcms-vectorize] payloadcmsVectorize: Embeddings collection "${poolName}" already exists`,
+          )
         }
 
         // Build reverse mapping for hooks
-        for (const collectionSlug of Object.keys(dynamicConfig.collections)) {
+        const collectionSlugs = Object.keys(dynamicConfig.collections)
+        console.log(
+          `[payloadcms-vectorize] payloadcmsVectorize: Pool "${poolName}" maps to ${collectionSlugs.length} collection(s): ${collectionSlugs.join(', ')}`,
+        )
+        for (const collectionSlug of collectionSlugs) {
           if (!collectionToPools.has(collectionSlug)) {
             collectionToPools.set(collectionSlug, [])
           }
           collectionToPools.get(collectionSlug)!.push({ pool: poolName, dynamic: dynamicConfig })
         }
+        console.log(
+          `[payloadcms-vectorize] payloadcmsVectorize: Pool "${poolName}" processing complete`,
+        )
       }
+      console.log(
+        `[payloadcms-vectorize] payloadcmsVectorize: Knowledge pools processed. Total collections: ${config.collections.length}`,
+      )
 
       // Validate bulk queue requirements
+      console.log(
+        '[payloadcms-vectorize] payloadcmsVectorize: Validating bulk queue requirements...',
+      )
       let bulkIngestEnabled = false
       for (const poolName in pluginOptions.knowledgePools) {
         const dynamicConfig = pluginOptions.knowledgePools[poolName]
         if (dynamicConfig.embeddingConfig.bulkEmbeddingsFns) {
           bulkIngestEnabled = true
+          console.log(
+            `[payloadcms-vectorize] payloadcmsVectorize: Pool "${poolName}" has bulk embedding enabled`,
+          )
           break
         }
       }
       if (bulkIngestEnabled && !pluginOptions.bulkQueueNames) {
+        console.error(
+          '[payloadcms-vectorize] payloadcmsVectorize: bulkQueueNames required but not provided',
+        )
         throw new Error(
           '[payloadcms-vectorize] bulkQueueNames is required when any knowledge pool has bulk embedding configured (embeddingConfig.bulkEmbeddingsFns).',
         )
       }
+      console.log(
+        `[payloadcms-vectorize] payloadcmsVectorize: Bulk queue validation passed (enabled: ${bulkIngestEnabled})`,
+      )
 
       // Exit early if disabled, but keep embeddings collections present for migrations
-      if (pluginOptions.disabled) return config
+      if (pluginOptions.disabled) {
+        console.log('[payloadcms-vectorize] payloadcmsVectorize: Plugin disabled, exiting early')
+        return config
+      }
 
       // Register a single task using Payload Jobs that can handle any knowledge pool
+      console.log('[payloadcms-vectorize] payloadcmsVectorize: Registering Payload Jobs tasks...')
       const incomingJobs = config.jobs || { tasks: [] }
       const tasks = [...(config.jobs?.tasks || [])]
+      console.log(
+        `[payloadcms-vectorize] payloadcmsVectorize: Existing tasks count: ${tasks.length}`,
+      )
 
+      console.log('[payloadcms-vectorize] payloadcmsVectorize: Creating vectorize task...')
       const vectorizeTask = createVectorizeTask({
         knowledgePools: pluginOptions.knowledgePools,
       })
       tasks.push(vectorizeTask)
+      console.log('[payloadcms-vectorize] payloadcmsVectorize: Vectorize task added')
+
+      console.log('[payloadcms-vectorize] payloadcmsVectorize: Creating prepare bulk embed task...')
       const prepareBulkEmbedTask = createPrepareBulkEmbeddingTask({
         knowledgePools: pluginOptions.knowledgePools,
         pollOrCompleteQueueName: pluginOptions.bulkQueueNames?.pollOrCompleteQueueName,
       })
       tasks.push(prepareBulkEmbedTask)
+      console.log('[payloadcms-vectorize] payloadcmsVectorize: Prepare bulk embed task added')
+
+      console.log(
+        '[payloadcms-vectorize] payloadcmsVectorize: Creating poll or complete bulk embed task...',
+      )
       const pollOrCompleteBulkEmbedTask = createPollOrCompleteBulkEmbeddingTask({
         knowledgePools: pluginOptions.knowledgePools,
         pollOrCompleteQueueName: pluginOptions.bulkQueueNames?.pollOrCompleteQueueName,
       })
       tasks.push(pollOrCompleteBulkEmbedTask)
+      console.log(
+        '[payloadcms-vectorize] payloadcmsVectorize: Poll or complete bulk embed task added',
+      )
 
       config.jobs = {
         ...incomingJobs,
         tasks,
       }
+      console.log(
+        `[payloadcms-vectorize] payloadcmsVectorize: Jobs configured. Total tasks: ${tasks.length}`,
+      )
 
       const collectionToEmbedQueue = new Map<
         string,
@@ -296,11 +477,23 @@ export const createVectorizeIntegration = <TPoolNames extends KnowledgePoolName>
       >()
 
       // Extend configured collections with hooks
+      console.log(
+        `[payloadcms-vectorize] payloadcmsVectorize: Setting up hooks for ${collectionToPools.size} collection(s)...`,
+      )
       for (const [collectionSlug, pools] of collectionToPools.entries()) {
+        console.log(
+          `[payloadcms-vectorize] payloadcmsVectorize: Setting up hooks for collection "${collectionSlug}" (${pools.length} pool(s))...`,
+        )
         const collection = config.collections.find((c) => c.slug === collectionSlug)
         if (!collection) {
+          console.error(
+            `[payloadcms-vectorize] payloadcmsVectorize: Collection "${collectionSlug}" not found`,
+          )
           throw new Error(`[payloadcms-vectorize] Collection ${collectionSlug} not found`)
         }
+        console.log(
+          `[payloadcms-vectorize] payloadcmsVectorize: Collection "${collectionSlug}" found, adding hooks...`,
+        )
 
         const embedQueue = async (doc: any, payload: Payload, req?: PayloadRequest) => {
           // Queue vectorization jobs for ALL knowledge pools containing this collection
@@ -329,6 +522,9 @@ export const createVectorizeIntegration = <TPoolNames extends KnowledgePoolName>
         }
 
         collectionToEmbedQueue.set(collectionSlug, embedQueue)
+        console.log(
+          `[payloadcms-vectorize] payloadcmsVectorize: Embed queue function registered for "${collectionSlug}"`,
+        )
 
         collection.hooks = {
           ...(collection.hooks || {}),
@@ -386,17 +582,27 @@ export const createVectorizeIntegration = <TPoolNames extends KnowledgePoolName>
             },
           ],
         }
+        console.log(
+          `[payloadcms-vectorize] payloadcmsVectorize: Hooks configured for collection "${collectionSlug}"`,
+        )
       }
+      console.log('[payloadcms-vectorize] payloadcmsVectorize: All collection hooks configured')
 
+      console.log('[payloadcms-vectorize] payloadcmsVectorize: Creating vector search handlers...')
       const vectorSearchHandlers = createVectorSearchHandlers(pluginOptions.knowledgePools)
+      console.log('[payloadcms-vectorize] payloadcmsVectorize: Vector search handlers created')
 
       // Create vectorized payload object factory that creates methods bound to a payload instance
+      console.log(
+        '[payloadcms-vectorize] payloadcmsVectorize: Creating vectorized payload object factory...',
+      )
       const createVectorizedPayloadObject = (payload: Payload): VectorizedPayload<TPoolNames> => {
         return {
           _isBulkEmbedEnabled: (knowledgePool: TPoolNames): boolean => {
             const poolConfig = pluginOptions.knowledgePools[knowledgePool]
             return !!poolConfig?.embeddingConfig?.bulkEmbeddingsFns
           },
+          _staticConfigs: staticConfigs,
           search: (params: VectorSearchQuery<TPoolNames>) =>
             vectorSearchHandlers.vectorSearch(
               payload,
@@ -456,29 +662,80 @@ export const createVectorizeIntegration = <TPoolNames extends KnowledgePoolName>
       }
 
       // Store factory in config.custom
+      console.log(
+        '[payloadcms-vectorize] payloadcmsVectorize: Storing vectorized payload factory in config.custom...',
+      )
       config.custom = {
         ...(config.custom || {}),
         createVectorizedPayloadObject,
       }
+      console.log('[payloadcms-vectorize] payloadcmsVectorize: Factory stored in config.custom')
+
+      // Register bin script for migration helper
+      console.log('[payloadcms-vectorize] payloadcmsVectorize: Registering bin script...')
+      const __filename = fileURLToPath(import.meta.url)
+      const __dirname = dirname(__filename)
+      const binScriptPath = resolve(__dirname, 'bin/vectorize-migrate.ts')
+      console.log(`[payloadcms-vectorize] payloadcmsVectorize: Bin script path: ${binScriptPath}`)
+      config.bin = [
+        ...(config.bin || []),
+        {
+          key: 'vectorize:migrate',
+          scriptPath: binScriptPath,
+        },
+      ]
+      console.log('[payloadcms-vectorize] payloadcmsVectorize: Bin script registered')
 
+      console.log('[payloadcms-vectorize] payloadcmsVectorize: Setting up onInit hook...')
       const incomingOnInit = config.onInit
       config.onInit = async (payload) => {
-        if (incomingOnInit) await incomingOnInit(payload)
-        // Ensure pgvector artifacts for each knowledge pool
-        for (const poolName in staticConfigs) {
-          const staticConfig = staticConfigs[poolName]
-          // Drizzle converts camelCase collection slugs to snake_case table names
-          await ensurePgvectorArtifacts({
-            payload,
+        payload.logger.info(
+          '[payloadcms-vectorize] onInit: Starting pgvector artifacts verification',
+        )
+        try {
+          if (incomingOnInit) {
+            payload.logger.info('[payloadcms-vectorize] onInit: Calling incoming onInit hook')
+            await incomingOnInit(payload)
+            payload.logger.info('[payloadcms-vectorize] onInit: Incoming onInit hook completed')
+          }
+          // Ensure pgvector artifacts for each knowledge pool
+          const poolNames = Object.keys(staticConfigs)
+          payload.logger.info(
+            `[payloadcms-vectorize] onInit: Verifying artifacts for ${poolNames.length} knowledge pool(s): ${poolNames.join(', ')}`,
+          )
+          for (const poolName in staticConfigs) {
+            const staticConfig = staticConfigs[poolName]
+            const tableName = toSnakeCase(poolName)
+            payload.logger.info(
+              `[payloadcms-vectorize] onInit: Verifying artifacts for pool "${poolName}" (table: "${tableName}")`,
+            )
             // Drizzle converts camelCase collection slugs to snake_case table names
-            tableName: toSnakeCase(poolName),
-            dims: staticConfig.dims,
-            ivfflatLists: staticConfig.ivfflatLists,
-          })
+            await ensurePgvectorArtifacts({
+              payload,
+              // Drizzle converts camelCase collection slugs to snake_case table names
+              tableName,
+              ivfflatLists: staticConfig.ivfflatLists,
+            })
+            payload.logger.info(
+              `[payloadcms-vectorize] onInit: Artifacts verified for pool "${poolName}"`,
+            )
+          }
+          payload.logger.info(
+            '[payloadcms-vectorize] onInit: All pgvector artifacts verified successfully',
+          )
+        } catch (error) {
+          payload.logger.error(
+            `[payloadcms-vectorize] onInit: Error verifying pgvector artifacts: ${error instanceof Error ? error.message : String(error)}`,
+          )
+          throw error
         }
       }
+      console.log('[payloadcms-vectorize] payloadcmsVectorize: onInit hook configured')
 
       if (pluginOptions.endpointOverrides?.enabled !== false) {
+        console.log(
+          '[payloadcms-vectorize] payloadcmsVectorize: Setting up vector search endpoint...',
+        )
         const path = pluginOptions.endpointOverrides?.path || '/vector-search'
         const inputEndpoints = config.endpoints || []
         const endpoints = [
@@ -506,8 +763,17 @@ export const createVectorizeIntegration = <TPoolNames extends KnowledgePoolName>
           },
         ]
         config.endpoints = endpoints
+        console.log(
+          `[payloadcms-vectorize] payloadcmsVectorize: Vector search endpoint registered at "${path}"`,
+        )
+      } else {
+        console.log('[payloadcms-vectorize] payloadcmsVectorize: Vector search endpoint disabled')
       }
 
+      console.log('[payloadcms-vectorize] payloadcmsVectorize: Plugin initialization complete')
+      console.log(
+        `[payloadcms-vectorize] payloadcmsVectorize: Final collections count: ${config.collections.length}`,
+      )
       return config
     }
   return {
diff --git a/src/types.ts b/src/types.ts
index f211516..9c48a29 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -46,6 +46,8 @@ export type RetryFailedBatchResult =
 export type VectorizedPayload<TPoolNames extends KnowledgePoolName = KnowledgePoolName> = {
   /** Check if bulk embedding is enabled for a knowledge pool */
   _isBulkEmbedEnabled: (knowledgePool: TPoolNames) => boolean
+  /** Static configs for migration helper access */
+  _staticConfigs: Record<TPoolNames, KnowledgePoolStaticConfig>
   search: (params: VectorSearchQuery<TPoolNames>) => Promise<Array<VectorSearchResult>>
   queueEmbed: (
     params:

From 2c8238aae69ea8191bc79a5c135d464cc245dd98 Mon Sep 17 00:00:00 2001
From: techiejd <62455039+techiejd@users.noreply.github.com>
Date: Sat, 17 Jan 2026 10:46:53 +0700
Subject: [PATCH 45/49] WIP

---
 dev/specs/chunkers.spec.ts                    |  22 ++-
 dev/specs/extensionFields.spec.ts             |  19 ++-
 dev/specs/extensionFieldsVectorSearch.spec.ts |  23 ++-
 dev/specs/failedValidation.spec.ts            |  27 +++-
 dev/specs/int.spec.ts                         |  21 ++-
 dev/specs/multipools.spec.ts                  |  12 +-
 dev/specs/queueName.spec.ts                   |  15 +-
 dev/specs/schemaName.spec.ts                  |  17 +-
 dev/specs/utils.ts                            | 152 +++++++++++++++++-
 dev/specs/vectorSearch.spec.ts                |  20 ++-
 dev/specs/vectorizedPayload.spec.ts           |  24 ++-
 src/endpoints/vectorSearch.ts                 |   1 -
 src/index.ts                                  |   2 +-
 13 files changed, 307 insertions(+), 48 deletions(-)

diff --git a/dev/specs/chunkers.spec.ts b/dev/specs/chunkers.spec.ts
index 454d1ea..aef7387 100644
--- a/dev/specs/chunkers.spec.ts
+++ b/dev/specs/chunkers.spec.ts
@@ -1,9 +1,8 @@
-import { getPayload } from 'payload'
-import { beforeAll, describe, expect, test } from 'vitest'
+import { describe, expect, test } from 'vitest'
 import { chunkText, chunkRichText } from 'helpers/chunkers.js'
 import { postgresAdapter } from '@payloadcms/db-postgres'
 import { buildDummyConfig, getInitialMarkdownContent, integration } from './constants.js'
-import { createTestDb } from './utils.js'
+import { createTestDb, initializePayloadWithMigrations, createTestMigrationsDir } from './utils.js'
 
 describe('Chunkers', () => {
   test('textChunker', () => {
@@ -17,20 +16,27 @@ describe('Chunkers', () => {
   })
 
   test('richTextChunker splits by H2', async () => {
-    beforeAll(async () => {
-      createTestDb({ dbName: 'chunkers_test' })
-    })
+    const dbName = 'chunkers_test'
+    await createTestDb({ dbName })
+    const { migrationsDir } = createTestMigrationsDir(dbName)
+
     const cfg = await buildDummyConfig({
       db: postgresAdapter({
         extensions: ['vector'],
         afterSchemaInit: [integration.afterSchemaInitHook],
+        migrationDir: migrationsDir,
+        push: false,
         pool: {
-          connectionString: 'postgresql://postgres:password@localhost:5433/chunkers_test',
+          connectionString: `postgresql://postgres:password@localhost:5433/${dbName}`,
         },
       }),
     })
     const markdownContent = await getInitialMarkdownContent(cfg)
-    const thisPayload = await getPayload({ config: cfg })
+
+    const thisPayload = await initializePayloadWithMigrations({
+      config: cfg,
+      key: `chunkers-test-${Date.now()}`,
+    })
     const chunks = await chunkRichText(markdownContent, thisPayload)
 
     expect(chunks.length).toBe(3)
diff --git a/dev/specs/extensionFields.spec.ts b/dev/specs/extensionFields.spec.ts
index 56ee27a..6fe4f7f 100644
--- a/dev/specs/extensionFields.spec.ts
+++ b/dev/specs/extensionFields.spec.ts
@@ -1,9 +1,13 @@
 import type { Payload } from 'payload'
-import { getPayload } from 'payload'
 import { beforeAll, describe, expect, test } from 'vitest'
 import { postgresAdapter } from '@payloadcms/db-postgres'
 import { buildDummyConfig, integration, plugin } from './constants.js'
-import { createTestDb, waitForVectorizationJobs } from './utils.js'
+import {
+  createTestDb,
+  waitForVectorizationJobs,
+  initializePayloadWithMigrations,
+  createTestMigrationsDir,
+} from './utils.js'
 import { PostgresPayload } from '../../src/types.js'
 import { chunkText, chunkRichText } from 'helpers/chunkers.js'
 import { makeDummyEmbedDocs, makeDummyEmbedQuery, testEmbeddingVersion } from 'helpers/embed.js'
@@ -15,6 +19,8 @@ describe('Extension fields integration tests', () => {
 
   beforeAll(async () => {
     await createTestDb({ dbName })
+    const { migrationsDir } = createTestMigrationsDir(dbName)
+
     const config = await buildDummyConfig({
       jobs: {
         tasks: [],
@@ -39,6 +45,8 @@ describe('Extension fields integration tests', () => {
       db: postgresAdapter({
         extensions: ['vector'],
         afterSchemaInit: [integration.afterSchemaInitHook],
+        migrationDir: migrationsDir,
+        push: false,
         pool: {
           connectionString: `postgresql://postgres:password@localhost:5433/${dbName}`,
         },
@@ -104,7 +112,12 @@ describe('Extension fields integration tests', () => {
         }),
       ],
     })
-    payload = await getPayload({ config, cron: true })
+
+    payload = await initializePayloadWithMigrations({
+      config,
+      key: `extension-fields-test-${Date.now()}`,
+      cron: true,
+    })
   })
 
   test('extension fields are added to the embeddings table schema', async () => {
diff --git a/dev/specs/extensionFieldsVectorSearch.spec.ts b/dev/specs/extensionFieldsVectorSearch.spec.ts
index 1f81419..94b136f 100644
--- a/dev/specs/extensionFieldsVectorSearch.spec.ts
+++ b/dev/specs/extensionFieldsVectorSearch.spec.ts
@@ -1,8 +1,12 @@
-import { getPayload } from 'payload'
 import { describe, expect, test } from 'vitest'
 import { makeDummyEmbedDocs, makeDummyEmbedQuery, testEmbeddingVersion } from 'helpers/embed.js'
 import { buildDummyConfig, DIMS, integration, plugin } from './constants.js'
-import { createTestDb, waitForVectorizationJobs } from './utils.js'
+import {
+  createTestDb,
+  waitForVectorizationJobs,
+  initializePayloadWithMigrations,
+  createTestMigrationsDir,
+} from './utils.js'
 import { postgresAdapter } from '@payloadcms/db-postgres'
 import { chunkRichText, chunkText } from 'helpers/chunkers.js'
 import { createVectorSearchHandlers } from '../../src/endpoints/vectorSearch.js'
@@ -11,7 +15,9 @@ import type { KnowledgePoolDynamicConfig } from 'payloadcms-vectorize'
 describe('extensionFields', () => {
   test('returns extensionFields in search results with correct types', async () => {
     // Create a new payload instance with extensionFields
-    await createTestDb({ dbName: 'endpoint_test_extension' })
+    const dbName = 'endpoint_test_extension'
+    await createTestDb({ dbName })
+    const { migrationsDir } = createTestMigrationsDir(dbName)
     const defaultKnowledgePool: KnowledgePoolDynamicConfig = {
       collections: {
         posts: {
@@ -89,8 +95,10 @@ describe('extensionFields', () => {
       db: postgresAdapter({
         extensions: ['vector'],
         afterSchemaInit: [integration.afterSchemaInitHook],
+        migrationDir: migrationsDir,
+        push: false,
         pool: {
-          connectionString: 'postgresql://postgres:password@localhost:5433/endpoint_test_extension',
+          connectionString: `postgresql://postgres:password@localhost:5433/${dbName}`,
         },
       }),
       plugins: [
@@ -101,7 +109,12 @@ describe('extensionFields', () => {
         }),
       ],
     })
-    const payloadWithExtensions = await getPayload({ config: configWithExtensions, cron: true })
+
+    const payloadWithExtensions = await initializePayloadWithMigrations({
+      config: configWithExtensions,
+      key: `extension-fields-vector-search-test-${Date.now()}`,
+      cron: true,
+    })
 
     // Create a post with extension field values
     const testQuery = 'Extension fields test content'
diff --git a/dev/specs/failedValidation.spec.ts b/dev/specs/failedValidation.spec.ts
index 79ef30e..8520a84 100644
--- a/dev/specs/failedValidation.spec.ts
+++ b/dev/specs/failedValidation.spec.ts
@@ -1,12 +1,17 @@
 import { postgresAdapter } from '@payloadcms/db-postgres'
 import { buildConfig } from 'payload'
-import { getPayload } from 'payload'
 import { describe, expect, test } from 'vitest'
 
 import { createVectorizeIntegration } from '../../src/index.js'
-import { createTestDb, waitForVectorizationJobs } from './utils.js'
+import {
+  createTestDb,
+  waitForVectorizationJobs,
+  initializePayloadWithMigrations,
+  createTestMigrationsDir,
+} from './utils.js'
 
 const DIMS = 8
+const dbName = 'failed_validation_test'
 
 const embedDocs = async (texts: string[]) => texts.map(() => Array(DIMS).fill(0))
 const embedQuery = async (_text: string) => Array(DIMS).fill(0)
@@ -18,8 +23,7 @@ const { afterSchemaInitHook, payloadcmsVectorize } = createVectorizeIntegration(
   },
 })
 
-const buildMalformedConfig = async () => {
-  await createTestDb({ dbName: 'failed_validation_test' })
+const buildMalformedConfig = async (migrationsDir: string) => {
   return buildConfig({
     jobs: {
       tasks: [],
@@ -39,10 +43,12 @@ const buildMalformedConfig = async () => {
     db: postgresAdapter({
       extensions: ['vector'],
       afterSchemaInit: [afterSchemaInitHook],
+      migrationDir: migrationsDir,
+      push: false,
       pool: {
         connectionString:
           process.env.DATABASE_URI ||
-          'postgresql://postgres:password@localhost:5433/failed_validation_test',
+          `postgresql://postgres:password@localhost:5433/${dbName}`,
       },
     }),
     plugins: [
@@ -70,8 +76,15 @@ const buildMalformedConfig = async () => {
 
 describe('Validation failures mark jobs as errored', () => {
   test('malformed chunk entry fails the vectorize job', async () => {
-    const config = await buildMalformedConfig()
-    const payload = await getPayload({ config, cron: true })
+    await createTestDb({ dbName })
+    const { migrationsDir } = createTestMigrationsDir(dbName)
+
+    const config = await buildMalformedConfig(migrationsDir)
+    const payload = await initializePayloadWithMigrations({
+      config,
+      key: `failed-validation-test-${Date.now()}`,
+      cron: true,
+    })
 
     await payload.create({
       collection: 'posts',
diff --git a/dev/specs/int.spec.ts b/dev/specs/int.spec.ts
index bea7dab..cf4b657 100644
--- a/dev/specs/int.spec.ts
+++ b/dev/specs/int.spec.ts
@@ -14,9 +14,14 @@ import { $createHeadingNode } from '@payloadcms/richtext-lexical/lexical/rich-te
 import { PostgresPayload } from '../../src/types.js'
 import { editorConfigFactory, getEnabledNodes, lexicalEditor } from '@payloadcms/richtext-lexical'
 import { DIMS, getInitialMarkdownContent } from './constants.js'
-import { createTestDb, waitForVectorizationJobs } from './utils.js'
+import {
+  createTestDb,
+  waitForVectorizationJobs,
+  initializePayloadWithMigrations,
+  createTestMigrationsDir,
+} from './utils.js'
 import { postgresAdapter } from '@payloadcms/db-postgres'
-import { buildConfig, getPayload } from 'payload'
+import { buildConfig } from 'payload'
 import { createVectorizeIntegration } from 'payloadcms-vectorize'
 
 const embedFn = makeDummyEmbedDocs(DIMS)
@@ -32,6 +37,8 @@ describe('Plugin integration tests', () => {
   beforeAll(async () => {
     await createTestDb({ dbName })
 
+    const { migrationsDir } = createTestMigrationsDir(dbName)
+
     // Create isolated integration for this test suite
     const integration = createVectorizeIntegration({
       default: {
@@ -55,6 +62,8 @@ describe('Plugin integration tests', () => {
       db: postgresAdapter({
         extensions: ['vector'],
         afterSchemaInit: [integration.afterSchemaInitHook],
+        migrationDir: migrationsDir,
+        push: false, // Prevent dev mode schema push - use migrations only
         pool: {
           connectionString: `postgresql://postgres:password@localhost:5433/${dbName}`,
         },
@@ -99,7 +108,13 @@ describe('Plugin integration tests', () => {
       },
     })
 
-    payload = await getPayload({ config, key: `int-test-${Date.now()}`, cron: true })
+    // Initialize Payload with migrations
+    payload = await initializePayloadWithMigrations({
+      config,
+      key: `int-test-${Date.now()}`,
+      cron: true,
+    })
+
     markdownContent = await getInitialMarkdownContent(config)
   })
 
diff --git a/dev/specs/multipools.spec.ts b/dev/specs/multipools.spec.ts
index 8b9c30d..58a9ef6 100644
--- a/dev/specs/multipools.spec.ts
+++ b/dev/specs/multipools.spec.ts
@@ -1,11 +1,11 @@
 import type { Payload, SanitizedConfig } from 'payload'
 
-import { buildConfig, getPayload } from 'payload'
+import { buildConfig } from 'payload'
 import { beforeAll, describe, expect, test } from 'vitest'
 import { createVectorizeIntegration } from 'payloadcms-vectorize'
 import { lexicalEditor } from '@payloadcms/richtext-lexical'
 import { postgresAdapter } from '@payloadcms/db-postgres'
-import { createTestDb } from './utils.js'
+import { createTestDb, initializePayloadWithMigrations, createTestMigrationsDir } from './utils.js'
 import type { PostgresPayload } from '../../src/types.js'
 
 const DIMS_POOL1 = 8
@@ -18,6 +18,7 @@ describe('Multiple knowledge pools', () => {
 
   beforeAll(async () => {
     await createTestDb({ dbName })
+    const { migrationsDir } = createTestMigrationsDir(dbName)
 
     const multiPoolIntegration = createVectorizeIntegration({
       pool1: {
@@ -60,6 +61,8 @@ describe('Multiple knowledge pools', () => {
       db: postgresAdapter({
         extensions: ['vector'],
         afterSchemaInit: [multiPoolIntegration.afterSchemaInitHook],
+        migrationDir: migrationsDir,
+        push: false,
         pool: {
           connectionString: `postgresql://postgres:password@localhost:5433/${dbName}`,
         },
@@ -67,7 +70,10 @@ describe('Multiple knowledge pools', () => {
       plugins: [multiPoolIntegration.payloadcmsVectorize(multiPoolPluginOptions)],
     })
 
-    payload = await getPayload({ config })
+    payload = await initializePayloadWithMigrations({
+      config,
+      key: `multipools-test-${Date.now()}`,
+    })
   })
 
   test('creates two embeddings collections with vector columns', async () => {
diff --git a/dev/specs/queueName.spec.ts b/dev/specs/queueName.spec.ts
index 887a1c0..7b6e7f0 100644
--- a/dev/specs/queueName.spec.ts
+++ b/dev/specs/queueName.spec.ts
@@ -1,11 +1,10 @@
 import type { Payload, SanitizedConfig } from 'payload'
-import { getPayload } from 'payload'
 import { beforeAll, describe, expect, test } from 'vitest'
 import { chunkText, chunkRichText } from 'helpers/chunkers.js'
 import type { SerializedEditorState } from '@payloadcms/richtext-lexical/lexical'
 import { postgresAdapter } from '@payloadcms/db-postgres'
 import { buildDummyConfig, getInitialMarkdownContent, integration, plugin } from './constants.js'
-import { createTestDb } from './utils.js'
+import { createTestDb, initializePayloadWithMigrations, createTestMigrationsDir } from './utils.js'
 
 describe('Queue tests', () => {
   let config: SanitizedConfig
@@ -15,6 +14,8 @@ describe('Queue tests', () => {
   const dbName = 'queue_test'
   beforeAll(async () => {
     await createTestDb({ dbName })
+    const { migrationsDir } = createTestMigrationsDir(dbName)
+
     config = await buildDummyConfig({
       collections: [
         {
@@ -28,8 +29,10 @@ describe('Queue tests', () => {
       db: postgresAdapter({
         extensions: ['vector'],
         afterSchemaInit: [integration.afterSchemaInitHook],
+        migrationDir: migrationsDir,
+        push: false,
         pool: {
-          connectionString: 'postgresql://postgres:password@localhost:5433/queue_test',
+          connectionString: `postgresql://postgres:password@localhost:5433/${dbName}`,
         },
       }),
       plugins: [
@@ -65,7 +68,11 @@ describe('Queue tests', () => {
         }),
       ],
     })
-    payload = await getPayload({ config })
+
+    payload = await initializePayloadWithMigrations({
+      config,
+      key: `queue-test-${Date.now()}`,
+    })
     markdownContent = await getInitialMarkdownContent(config)
   })
   test('vectorization jobs are queued using the queueName', async () => {
diff --git a/dev/specs/schemaName.spec.ts b/dev/specs/schemaName.spec.ts
index 8ec7613..1af1725 100644
--- a/dev/specs/schemaName.spec.ts
+++ b/dev/specs/schemaName.spec.ts
@@ -3,13 +3,17 @@ import type { Payload } from 'payload'
 import { postgresAdapter } from '@payloadcms/db-postgres'
 import { makeDummyEmbedDocs, makeDummyEmbedQuery, testEmbeddingVersion } from 'helpers/embed.js'
 import { Client } from 'pg'
-import { getPayload } from 'payload'
 import { beforeAll, describe, expect, test } from 'vitest'
 
 import type { PostgresPayload } from '../../src/types.js'
 
 import { buildDummyConfig, DIMS, integration, plugin } from './constants.js'
-import { createTestDb, waitForVectorizationJobs } from './utils.js'
+import {
+  createTestDb,
+  waitForVectorizationJobs,
+  initializePayloadWithMigrations,
+  createTestMigrationsDir,
+} from './utils.js'
 import { createVectorSearchHandlers } from '../../src/endpoints/vectorSearch.js'
 import type { KnowledgePoolDynamicConfig } from 'payloadcms-vectorize'
 const CUSTOM_SCHEMA = 'custom'
@@ -20,6 +24,7 @@ describe('Custom schemaName support', () => {
 
   beforeAll(async () => {
     await createTestDb({ dbName })
+    const { migrationsDir } = createTestMigrationsDir(dbName)
 
     // Create the custom schema before Payload initializes
     const client = new Client({
@@ -42,6 +47,8 @@ describe('Custom schemaName support', () => {
       db: postgresAdapter({
         afterSchemaInit: [integration.afterSchemaInitHook],
         extensions: ['vector'],
+        migrationDir: migrationsDir,
+        push: false,
         pool: {
           connectionString: `postgresql://postgres:password@localhost:5433/${dbName}`,
         },
@@ -85,7 +92,11 @@ describe('Custom schemaName support', () => {
       ],
     })
 
-    payload = await getPayload({ config, cron: true })
+    payload = await initializePayloadWithMigrations({
+      config,
+      key: `schema-name-test-${Date.now()}`,
+      cron: true,
+    })
   })
 
   test('embeddings table is created in custom schema', async () => {
diff --git a/dev/specs/utils.ts b/dev/specs/utils.ts
index 214891d..bc0b433 100644
--- a/dev/specs/utils.ts
+++ b/dev/specs/utils.ts
@@ -2,6 +2,8 @@
 import type { Payload, SanitizedConfig } from 'payload'
 import { buildConfig, getPayload } from 'payload'
 import { Client } from 'pg'
+import { mkdirSync, rmSync } from 'fs'
+import { join } from 'path'
 import { postgresAdapter } from '@payloadcms/db-postgres'
 import { lexicalEditor } from '@payloadcms/richtext-lexical'
 import { createVectorizeIntegration } from 'payloadcms-vectorize'
@@ -9,6 +11,7 @@ import { BULK_EMBEDDINGS_RUNS_SLUG } from '../../src/collections/bulkEmbeddingsR
 import { BULK_EMBEDDINGS_INPUT_METADATA_SLUG } from '../../src/collections/bulkEmbeddingInputMetadata.js'
 import { BULK_EMBEDDINGS_BATCHES_SLUG } from '../../src/collections/bulkEmbeddingsBatches.js'
 import { makeDummyEmbedDocs } from '../helpers/embed.js'
+import { script as vectorizeMigrateScript } from '../../src/bin/vectorize-migrate.js'
 import type {
   BulkEmbeddingsFns,
   BulkEmbeddingInput,
@@ -27,6 +30,128 @@ export const createTestDb = async ({ dbName }: { dbName: string }) => {
   await client.end()
 }
 
+/**
+ * Initialize Payload with migrations applied.
+ * This handles the full migration setup:
+ * 1. Get payload with disableOnInit to avoid ensurePgvectorArtifacts check
+ * 2. Create initial migration
+ * 3. Run vectorize:migrate to patch with IVFFLAT index
+ * 4. Apply migrations
+ * 5. Run onInit
+ *
+ * @param config - A pre-built SanitizedConfig (must have migrationDir and push: false in db config)
+ * @param key - Unique key for getPayload caching
+ * @param cron - Whether to enable cron jobs (default: true)
+ */
+export async function initializePayloadWithMigrations({
+  config,
+  key,
+  cron = true,
+}: {
+  config: SanitizedConfig
+  key: string
+  cron?: boolean
+}): Promise<Payload> {
+  // Get payload with disableOnInit to avoid ensurePgvectorArtifacts check before migrations
+  const payload = await getPayload({ config, key, cron, disableOnInit: true })
+
+  // Create initial migration (Payload's schema)
+  await payload.db.createMigration({ migrationName: 'initial', payload })
+
+  // Run vectorize:migrate to patch with IVFFLAT index
+  await vectorizeMigrateScript(config)
+
+  // Apply migrations (forceAcceptWarning bypasses the dev mode prompt)
+  await (payload.db as any).migrate({ forceAcceptWarning: true })
+
+  // Now run onInit (it's still available on config, not destroyed by disableOnInit)
+  if (payload.config.onInit) {
+    await payload.config.onInit(payload)
+  }
+
+  return payload
+}
+
+/**
+ * Create a unique migration directory for a test.
+ * Returns the path and a cleanup function.
+ */
+export function createTestMigrationsDir(dbName: string): {
+  migrationsDir: string
+  cleanup: () => void
+} {
+  const migrationsDir = join(process.cwd(), 'dev', `test-migrations-${dbName}`)
+  // Clean up any existing migration directory
+  rmSync(migrationsDir, { recursive: true, force: true })
+  mkdirSync(migrationsDir, { recursive: true })
+
+  return {
+    migrationsDir,
+    cleanup: () => rmSync(migrationsDir, { recursive: true, force: true }),
+  }
+}
+
+/**
+ * Create pgvector artifacts (extension + IVFFLAT index) for testing.
+ * This should be called after migrations are applied but before onInit runs,
+ * or used with disableOnInit to manually set up the test environment.
+ */
+export const ensureTestPgvectorArtifacts = async ({
+  dbName,
+  tableName = 'default',
+  dims = DEFAULT_DIMS,
+  ivfflatLists = 1,
+}: {
+  dbName: string
+  tableName?: string
+  dims?: number
+  ivfflatLists?: number
+}) => {
+  const client = new Client({
+    connectionString: `postgresql://postgres:password@localhost:5433/${dbName}`,
+  })
+  await client.connect()
+  try {
+    // Ensure pgvector extension exists
+    await client.query('CREATE EXTENSION IF NOT EXISTS vector')
+
+    // Check if table exists (it should be created by Payload's schema init)
+    const tableCheck = await client.query(
+      `SELECT 1 FROM information_schema.tables WHERE table_schema = 'public' AND table_name = $1`,
+      [tableName],
+    )
+    if (tableCheck.rowCount === 0) {
+      // Table doesn't exist yet - this is expected before migrations
+      // We'll skip index creation; it will be handled by migrations
+      return
+    }
+
+    // Check if embedding column exists
+    const columnCheck = await client.query(
+      `SELECT 1 FROM information_schema.columns WHERE table_schema = 'public' AND table_name = $1 AND column_name = 'embedding'`,
+      [tableName],
+    )
+    if (columnCheck.rowCount === 0) {
+      // Column doesn't exist yet - skip index creation
+      return
+    }
+
+    // Create IVFFLAT index if it doesn't exist
+    const indexName = `${tableName}_embedding_ivfflat`
+    const indexCheck = await client.query(
+      `SELECT 1 FROM pg_indexes WHERE schemaname = 'public' AND tablename = $1 AND indexname = $2`,
+      [tableName, indexName],
+    )
+    if (indexCheck.rowCount === 0) {
+      await client.query(
+        `CREATE INDEX "${indexName}" ON "public"."${tableName}" USING ivfflat (embedding vector_cosine_ops) WITH (lists = ${ivfflatLists})`,
+      )
+    }
+  } finally {
+    await client.end()
+  }
+}
+
 async function waitForTasks(
   payload: Payload,
   taskSlugs: string[],
@@ -190,6 +315,13 @@ export async function buildPayloadWithIntegration({
   pluginOpts,
   key,
 }: BuildPayloadArgs): Promise<{ payload: Payload; config: SanitizedConfig }> {
+  // Create a unique migration directory for this test
+  const migrationsDir = join(process.cwd(), 'dev', `test-migrations-${dbName}`)
+  
+  // Clean up any existing migration directory
+  rmSync(migrationsDir, { recursive: true, force: true })
+  mkdirSync(migrationsDir, { recursive: true })
+
   const integration = createVectorizeIntegration({
     default: {
       dims: DEFAULT_DIMS,
@@ -209,6 +341,8 @@ export async function buildPayloadWithIntegration({
     db: postgresAdapter({
       extensions: ['vector'],
       afterSchemaInit: [integration.afterSchemaInitHook],
+      migrationDir: migrationsDir,
+      push: false, // Prevent dev mode schema push - use migrations only
       pool: {
         connectionString: `postgresql://postgres:password@localhost:5433/${dbName}`,
       },
@@ -237,7 +371,23 @@ export async function buildPayloadWithIntegration({
   })
 
   const payloadKey = key ?? `payload-${dbName}-${Date.now()}`
-  const payload = await getPayload({ config, key: payloadKey, cron: true })
+  // Disable onInit to avoid ensurePgvectorArtifacts check before index exists
+  const payload = await getPayload({ config, key: payloadKey, cron: true, disableOnInit: true })
+
+  // Create initial migration (Payload's schema)
+  await payload.db.createMigration({ migrationName: 'initial', payload })
+
+  // Run vectorize:migrate to patch with IVFFLAT index
+  await vectorizeMigrateScript(config)
+
+  // Apply migrations (forceAcceptWarning bypasses the dev mode prompt)
+  await (payload.db as any).migrate({ forceAcceptWarning: true })
+
+  // Now run onInit (it's still available on config, not destroyed by disableOnInit)
+  if (payload.config.onInit) {
+    await payload.config.onInit(payload)
+  }
+
   return { payload, config }
 }
 
diff --git a/dev/specs/vectorSearch.spec.ts b/dev/specs/vectorSearch.spec.ts
index 2ac894a..2c79747 100644
--- a/dev/specs/vectorSearch.spec.ts
+++ b/dev/specs/vectorSearch.spec.ts
@@ -1,6 +1,5 @@
 import type { Payload } from 'payload'
 
-import { getPayload } from 'payload'
 import { beforeAll, describe, expect, test } from 'vitest'
 import { makeDummyEmbedDocs, makeDummyEmbedQuery, testEmbeddingVersion } from 'helpers/embed.js'
 import { type SerializedEditorState } from '@payloadcms/richtext-lexical/lexical'
@@ -10,6 +9,8 @@ import {
   createMockBulkEmbeddings,
   createTestDb,
   waitForVectorizationJobs,
+  initializePayloadWithMigrations,
+  createTestMigrationsDir,
 } from './utils.js'
 import { postgresAdapter } from '@payloadcms/db-postgres'
 import { chunkRichText, chunkText } from 'helpers/chunkers.js'
@@ -77,9 +78,12 @@ describe('Search endpoint integration tests', () => {
   let payload: Payload
   let markdownContent: SerializedEditorState
   const titleAndQuery = 'My query is a title'
+  const dbName = 'endpoint_test'
 
   beforeAll(async () => {
-    await createTestDb({ dbName: 'endpoint_test' })
+    await createTestDb({ dbName })
+    const { migrationsDir } = createTestMigrationsDir(dbName)
+
     const config = await buildDummyConfig({
       jobs: {
         tasks: [],
@@ -102,8 +106,10 @@ describe('Search endpoint integration tests', () => {
       db: postgresAdapter({
         extensions: ['vector'],
         afterSchemaInit: [integration.afterSchemaInitHook],
+        migrationDir: migrationsDir,
+        push: false, // Prevent dev mode schema push - use migrations only
         pool: {
-          connectionString: 'postgresql://postgres:password@localhost:5433/endpoint_test',
+          connectionString: `postgresql://postgres:password@localhost:5433/${dbName}`,
         },
       }),
       plugins: [
@@ -189,7 +195,13 @@ describe('Search endpoint integration tests', () => {
         }),
       ],
     })
-    payload = await getPayload({ config, cron: true })
+
+    // Initialize Payload with migrations
+    payload = await initializePayloadWithMigrations({
+      config,
+      key: `vector-search-test-${Date.now()}`,
+      cron: true,
+    })
     markdownContent = await getInitialMarkdownContent(config)
   })
 
diff --git a/dev/specs/vectorizedPayload.spec.ts b/dev/specs/vectorizedPayload.spec.ts
index 6ea9539..ffe182d 100644
--- a/dev/specs/vectorizedPayload.spec.ts
+++ b/dev/specs/vectorizedPayload.spec.ts
@@ -1,10 +1,14 @@
 import type { Payload } from 'payload'
 
-import { getPayload } from 'payload'
 import { beforeAll, describe, expect, test } from 'vitest'
 import { getVectorizedPayload, VectorizedPayload } from '../../src/types.js'
 import { buildDummyConfig, DIMS, getInitialMarkdownContent } from './constants.js'
-import { createTestDb, waitForVectorizationJobs } from './utils.js'
+import {
+  createTestDb,
+  waitForVectorizationJobs,
+  initializePayloadWithMigrations,
+  createTestMigrationsDir,
+} from './utils.js'
 import { postgresAdapter } from '@payloadcms/db-postgres'
 import { makeDummyEmbedDocs, makeDummyEmbedQuery, testEmbeddingVersion } from 'helpers/embed.js'
 import { chunkRichText, chunkText } from 'helpers/chunkers.js'
@@ -30,9 +34,12 @@ describe('VectorizedPayload', () => {
   let payload: Payload
   let markdownContent: SerializedEditorState
   const titleAndQuery = 'VectorizedPayload Test Title'
+  const dbName = 'vectorized_payload_test'
 
   beforeAll(async () => {
-    await createTestDb({ dbName: 'vectorized_payload_test' })
+    await createTestDb({ dbName })
+    const { migrationsDir } = createTestMigrationsDir(dbName)
+
     const config = await buildDummyConfig({
       jobs: {
         tasks: [],
@@ -55,8 +62,10 @@ describe('VectorizedPayload', () => {
       db: postgresAdapter({
         extensions: ['vector'],
         afterSchemaInit: [integration.afterSchemaInitHook],
+        migrationDir: migrationsDir,
+        push: false,
         pool: {
-          connectionString: 'postgresql://postgres:password@localhost:5433/vectorized_payload_test',
+          connectionString: `postgresql://postgres:password@localhost:5433/${dbName}`,
         },
       }),
       plugins: [
@@ -89,7 +98,12 @@ describe('VectorizedPayload', () => {
         }),
       ],
     })
-    payload = await getPayload({ config, cron: true })
+
+    payload = await initializePayloadWithMigrations({
+      config,
+      key: `vectorized-payload-test-${Date.now()}`,
+      cron: true,
+    })
     markdownContent = await getInitialMarkdownContent(config)
   })
 
diff --git a/src/endpoints/vectorSearch.ts b/src/endpoints/vectorSearch.ts
index 8634eeb..274c618 100644
--- a/src/endpoints/vectorSearch.ts
+++ b/src/endpoints/vectorSearch.ts
@@ -100,7 +100,6 @@ async function performCosineSearch(
     throw new Error('Only works with Postgres')
   }
 
-  payload.db.createMigration
   // In PayloadCMS, payload.db IS the adapter, and drizzle is at payload.db.drizzle
   const adapter = payload.db
   if (!adapter) {
diff --git a/src/index.ts b/src/index.ts
index f5372e8..461ff2b 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -675,7 +675,7 @@ export const createVectorizeIntegration = <TPoolNames extends KnowledgePoolName>
       console.log('[payloadcms-vectorize] payloadcmsVectorize: Registering bin script...')
       const __filename = fileURLToPath(import.meta.url)
       const __dirname = dirname(__filename)
-      const binScriptPath = resolve(__dirname, 'bin/vectorize-migrate.ts')
+      const binScriptPath = resolve(__dirname, 'bin/vectorize-migrate.js')
       console.log(`[payloadcms-vectorize] payloadcmsVectorize: Bin script path: ${binScriptPath}`)
       config.bin = [
         ...(config.bin || []),

From a5abfdd53c56fb135ac8c2970379779c3d77f15f Mon Sep 17 00:00:00 2001
From: techiejd <62455039+techiejd@users.noreply.github.com>
Date: Sat, 17 Jan 2026 14:38:58 +0700
Subject: [PATCH 46/49] WIP

---
 dev/specs/migrationCli.spec.ts | 273 +++++++++++++--------------------
 dev/specs/utils.ts             |  97 +++---------
 src/bin/vectorize-migrate.ts   |  18 ++-
 src/index.ts                   | 170 --------------------
 4 files changed, 138 insertions(+), 420 deletions(-)

diff --git a/dev/specs/migrationCli.spec.ts b/dev/specs/migrationCli.spec.ts
index edb4473..a77c2b6 100644
--- a/dev/specs/migrationCli.spec.ts
+++ b/dev/specs/migrationCli.spec.ts
@@ -11,116 +11,66 @@ import { script as vectorizeMigrateScript } from '../../src/bin/vectorize-migrat
 import { readdirSync, statSync, existsSync, readFileSync, rmSync } from 'fs'
 import { join, resolve } from 'path'
 
-describe('Migration CLI and ensurePgvectorArtifacts integration tests', () => {
-  const dbName = `migration_cli_test_${Date.now()}`
-  let payload: Payload
-
-  beforeAll(async () => {
-    await createTestDb({ dbName })
-
-    const integration = createVectorizeIntegration({
-      default: {
-        dims: DIMS,
-        ivfflatLists: 10,
-      },
-    })
+describe('Migration CLI integration tests', () => {
+  describe('VectorizedPayload access', () => {
+    let payload: Payload
+    const dbName = `migration_cli_test_${Date.now()}`
 
-    const config = await buildConfig({
-      secret: 'test-secret',
-      collections: [
-        {
-          slug: 'posts',
-          fields: [{ name: 'title', type: 'text' }],
-        },
-      ],
-      db: postgresAdapter({
-        extensions: ['vector'],
-        afterSchemaInit: [integration.afterSchemaInitHook],
-        pool: {
-          connectionString: `postgresql://postgres:password@localhost:5433/${dbName}`,
+    beforeAll(async () => {
+      await createTestDb({ dbName })
+
+      const integration = createVectorizeIntegration({
+        default: {
+          dims: DIMS,
+          ivfflatLists: 10,
         },
-      }),
-      plugins: [
-        integration.payloadcmsVectorize({
-          knowledgePools: {
-            default: {
-              collections: {
-                posts: {
-                  toKnowledgePool: async (doc) => [{ chunk: doc.title || '' }],
+      })
+
+      const config = await buildConfig({
+        secret: 'test-secret',
+        collections: [
+          {
+            slug: 'posts',
+            fields: [{ name: 'title', type: 'text' }],
+          },
+        ],
+        db: postgresAdapter({
+          extensions: ['vector'],
+          afterSchemaInit: [integration.afterSchemaInitHook],
+          pool: {
+            connectionString: `postgresql://postgres:password@localhost:5433/${dbName}`,
+          },
+        }),
+        plugins: [
+          integration.payloadcmsVectorize({
+            knowledgePools: {
+              default: {
+                collections: {
+                  posts: {
+                    toKnowledgePool: async (doc) => [{ chunk: doc.title || '' }],
+                  },
+                },
+                embeddingConfig: {
+                  version: testEmbeddingVersion,
+                  queryFn: makeDummyEmbedQuery(DIMS),
+                  realTimeIngestionFn: makeDummyEmbedDocs(DIMS),
                 },
-              },
-              embeddingConfig: {
-                version: testEmbeddingVersion,
-                queryFn: makeDummyEmbedQuery(DIMS),
-                realTimeIngestionFn: makeDummyEmbedDocs(DIMS),
               },
             },
-          },
-        }),
-      ],
-      jobs: {
-        tasks: [],
-        autoRun: [
-          {
-            cron: '*/5 * * * * *',
-            limit: 10,
-          },
+          }),
         ],
-      },
-    })
-
-    // Temporarily disable onInit for runtime behavior tests
-    // This prevents ensurePgvectorArtifacts from running before tests can set up their state
-
-    payload = await getPayload({
-      config,
-      cron: true,
-      disableOnInit: true,
-      key: `test-runtime-behavior-${Date.now()}`,
-    })
-  })
-
-  describe('Runtime behavior', () => {
-    test('ensurePgvectorArtifacts is presence-only and does not rebuild index', async () => {
-      const postgresPayload = payload as PostgresPayload
-      const schemaName = postgresPayload.db.schemaName || 'public'
-      const tableName = 'default'
-
-      // Manually create the index first (simulating a migration)
-      await postgresPayload.db.pool?.query(
-        `CREATE INDEX IF NOT EXISTS ${tableName}_embedding_ivfflat ON "${schemaName}"."${tableName}" USING ivfflat (embedding vector_cosine_ops) WITH (lists = 10)`,
-      )
-
-      // Get initial index definition
-      const initialIndex = await postgresPayload.db.pool?.query(
-        `SELECT pg_get_indexdef(c.oid) as def
-       FROM pg_indexes i
-       JOIN pg_class c ON c.relname = i.indexname
-       JOIN pg_namespace n ON n.oid = c.relnamespace AND n.nspname = i.schemaname
-       WHERE i.schemaname = $1 AND i.tablename = $2 AND i.indexname = $3`,
-        [schemaName, tableName, `${tableName}_embedding_ivfflat`],
-      )
-      const initialDef = initialIndex?.rows[0]?.def || ''
-
-      // Call ensurePgvectorArtifacts (via onInit which should check presence)
-      // Since we already have the artifacts, it should pass without modifying
-      // Note: onInit calls ensurePgvectorArtifacts, but since artifacts exist, it should just verify
-      await payload.config.onInit?.(payload)
-
-      // Verify index definition hasn't changed
-      const afterIndex = await postgresPayload.db.pool?.query(
-        `SELECT pg_get_indexdef(c.oid) as def
-       FROM pg_indexes i
-       JOIN pg_class c ON c.relname = i.indexname
-       JOIN pg_namespace n ON n.oid = c.relnamespace AND n.nspname = i.schemaname
-       WHERE i.schemaname = $1 AND i.tablename = $2 AND i.indexname = $3`,
-        [schemaName, tableName, `${tableName}_embedding_ivfflat`],
-      )
-      const afterDef = afterIndex?.rows[0]?.def || ''
+        jobs: {
+          tasks: [],
+          autoRun: [
+            {
+              cron: '*/5 * * * * *',
+              limit: 10,
+            },
+          ],
+        },
+      })
 
-      // Index should still exist and be the same
-      expect(afterDef).toBeTruthy()
-      expect(afterDef).toBe(initialDef)
+      payload = await getPayload({ config, cron: true })
     })
 
     test('VectorizedPayload has _staticConfigs', async () => {
@@ -133,25 +83,22 @@ describe('Migration CLI and ensurePgvectorArtifacts integration tests', () => {
       expect(vectorizedPayload?._staticConfigs.default.dims).toBe(DIMS)
       expect(vectorizedPayload?._staticConfigs.default.ivfflatLists).toBe(10)
     })
+  })
+
+  describe('Error handling when migrations not run', () => {
+    let payload: Payload
+    const dbName = `migration_error_test_${Date.now()}`
 
-    test('ensurePgvectorArtifacts throws error when artifacts are missing (user has not run migrations)', async () => {
-      // Create a new database without any migrations applied
-      // This simulates the state when a user hasn't run migrations yet
-      const testDbName = `migration_cli_test_missing_${Date.now()}`
-      console.log('[TEST] Step 1: Creating test database:', testDbName)
-      await createTestDb({ dbName: testDbName })
-      console.log('[TEST] Step 2: Database created')
+    beforeAll(async () => {
+      await createTestDb({ dbName })
 
-      console.log('[TEST] Step 3: Creating integration')
       const integration = createVectorizeIntegration({
         default: {
           dims: DIMS,
           ivfflatLists: 10,
         },
       })
-      console.log('[TEST] Step 4: Integration created')
 
-      console.log('[TEST] Step 5: Starting buildConfig...')
       const config = await buildConfig({
         secret: 'test-secret',
         collections: [
@@ -164,8 +111,10 @@ describe('Migration CLI and ensurePgvectorArtifacts integration tests', () => {
           extensions: ['vector'],
           afterSchemaInit: [integration.afterSchemaInitHook],
           pool: {
-            connectionString: `postgresql://postgres:password@localhost:5433/${testDbName}`,
+            connectionString: `postgresql://postgres:password@localhost:5433/${dbName}`,
           },
+          // Don't push schema changes - we want to test without migrations
+          push: false,
         }),
         plugins: [
           integration.payloadcmsVectorize({
@@ -187,19 +136,47 @@ describe('Migration CLI and ensurePgvectorArtifacts integration tests', () => {
         ],
         jobs: {
           tasks: [],
-          autoRun: [],
+          autoRun: [
+            {
+              cron: '*/5 * * * * *',
+              limit: 10,
+            },
+          ],
         },
       })
-      console.log('[TEST] Step 6: buildConfig completed')
 
-      // Note: onInit will be called during getPayload and will throw because artifacts don't exist
-      // This simulates the real-world scenario where a user hasn't run migrations yet
-      // The error will be "Embedding column not found" (first check that fails)
-      console.log('[TEST] Step 7: Calling getPayload (should throw)...')
+      payload = await getPayload({
+        config,
+        cron: false, // Disable cron to avoid background jobs
+        key: `migration-error-test-${Date.now()}`,
+      })
+    })
+
+    test('vector search fails with descriptive error when embedding column missing', async () => {
+      const { getVectorizedPayload } = await import('payloadcms-vectorize')
+      const vectorizedPayload = getVectorizedPayload(payload)
+
+      // Vector search should fail with a descriptive error
       await expect(
-        getPayload({ config, cron: true, key: `test-missing-artifacts-${Date.now()}` }),
-      ).rejects.toThrow('Embedding column not found')
-      console.log('[TEST] Step 8: getPayload threw as expected')
+        vectorizedPayload?.search({
+          knowledgePool: 'default',
+          query: 'test query',
+          limit: 10,
+        }),
+      ).rejects.toThrow()
+    })
+
+    test('creating document fails when embedding table does not exist', async () => {
+      // Try to create a document that would trigger vectorization
+      // This should fail because the embedding table doesn't exist
+      await expect(
+        payload.create({
+          collection: 'posts',
+          data: {
+            title: 'Test Post',
+          },
+        }),
+      ).rejects.toThrow()
     })
   })
 
@@ -283,17 +260,11 @@ describe('Migration CLI and ensurePgvectorArtifacts integration tests', () => {
         },
       })
 
-      // Temporarily disable onInit to avoid ensurePgvectorArtifacts check before migrations are applied
-      const savedOnInit = cliConfig.onInit
-      cliConfig.onInit = async () => {
-        // No-op: migrations haven't been applied yet
-      }
-
+      // Get payload instance
       cliPayload = await getPayload({
         config: cliConfig,
         cron: true,
-        key: `test-initial-setup-${Date.now()}`,
-        disableOnInit: true,
+        key: `migration-cli-test-${Date.now()}`,
       })
 
       // Step 2: Create initial migration (this will include the embedding column via Drizzle)
@@ -433,12 +404,6 @@ describe('Migration CLI and ensurePgvectorArtifacts integration tests', () => {
         }
       }
 
-      // Restore onInit and run it now that migrations are applied
-      cliConfig.onInit = savedOnInit
-      if (cliConfig.onInit) {
-        await cliConfig.onInit(cliPayload)
-      }
-
       // Step 5: Verify index exists with correct lists parameter
       const postgresPayload = cliPayload as PostgresPayload
       const schemaName = postgresPayload.db.schemaName || 'public'
@@ -516,17 +481,11 @@ describe('Migration CLI and ensurePgvectorArtifacts integration tests', () => {
         },
       })
 
-      // Temporarily disable onInit to avoid ensurePgvectorArtifacts check before migrations are applied
-      const savedOnInit = cliConfig.onInit
-      cliConfig.onInit = async () => {
-        // No-op: migrations haven't been applied yet
-      }
-
+      // Get payload instance
       cliPayload = await getPayload({
         config: cliConfig,
         cron: true,
-        key: `test-ivfflat-change-${Date.now()}`,
-        disableOnInit: true,
+        key: `migration-cli-test-${Date.now()}`,
       })
 
       // Step 2: Run vectorize:migrate (should detect change and create migration)
@@ -608,12 +567,6 @@ describe('Migration CLI and ensurePgvectorArtifacts integration tests', () => {
         }
       }
 
-      // Restore onInit and run it now that migrations are applied
-      if (savedOnInit) {
-        cliConfig.onInit = savedOnInit
-        await savedOnInit(cliPayload)
-      }
-
       // Step 5: Verify index was rebuilt with new lists parameter
       const postgresPayload = cliPayload as PostgresPayload
       const schemaName = postgresPayload.db.schemaName || 'public'
@@ -725,17 +678,11 @@ describe('Migration CLI and ensurePgvectorArtifacts integration tests', () => {
         },
       })
 
-      // Temporarily disable onInit to avoid ensurePgvectorArtifacts check before migrations are applied
-      const savedOnInitDims = cliConfig.onInit
-      cliConfig.onInit = async () => {
-        // No-op: migrations haven't been applied yet
-      }
-
+      // Get payload instance
       cliPayload = await getPayload({
         config: cliConfig,
         cron: true,
-        key: `test-dims-change-${Date.now()}`,
-        disableOnInit: true,
+        key: `migration-cli-test-${Date.now()}`,
       })
 
       // Step 2: Run vectorize:migrate (should detect dims change)
@@ -818,14 +765,6 @@ describe('Migration CLI and ensurePgvectorArtifacts integration tests', () => {
       }
       console.log('[TEST] Step 4.5: Migration applied successfully')
 
-      // Restore onInit and run it now that migrations are applied
-      console.log('[TEST] Step 4.6: Restoring onInit...')
-      if (savedOnInitDims) {
-        cliConfig.onInit = savedOnInitDims
-        await savedOnInitDims(cliPayload)
-      }
-      console.log('[TEST] Step 4.7: onInit restored and executed')
-
       // Step 5: Verify column type changed and table was truncated
       console.log('[TEST] Step 5: Verifying column type and table state...')
       const postgresPayload = cliPayload as PostgresPayload
diff --git a/dev/specs/utils.ts b/dev/specs/utils.ts
index bc0b433..99c858b 100644
--- a/dev/specs/utils.ts
+++ b/dev/specs/utils.ts
@@ -23,24 +23,36 @@ export const createTestDb = async ({ dbName }: { dbName: string }) => {
     process.env.DATABASE_ADMIN_URI || 'postgresql://postgres:password@localhost:5433/postgres' // connect to 'postgres'
   const client = new Client({ connectionString: adminUri })
   await client.connect()
+  
+  /*
+  // Drop and recreate the database to ensure a clean state
+  // First, terminate any existing connections to the database
+  await client.query(`
+    SELECT pg_terminate_backend(pg_stat_activity.pid)
+    FROM pg_stat_activity
+    WHERE pg_stat_activity.datname = $1
+      AND pid <> pg_backend_pid()
+  `, [dbName])*/
+  
   const exists = await client.query('SELECT 1 FROM pg_database WHERE datname = $1', [dbName])
   if (exists.rowCount === 0) {
     await client.query(`CREATE DATABASE ${dbName}`)
+    //await client.query(`DROP DATABASE "${dbName}"`)
   }
+  //await client.query(`DROP DATABASE "${dbName}"`)
   await client.end()
 }
 
 /**
  * Initialize Payload with migrations applied.
  * This handles the full migration setup:
- * 1. Get payload with disableOnInit to avoid ensurePgvectorArtifacts check
+ * 1. Get payload instance
  * 2. Create initial migration
  * 3. Run vectorize:migrate to patch with IVFFLAT index
  * 4. Apply migrations
- * 5. Run onInit
  *
  * @param config - A pre-built SanitizedConfig (must have migrationDir and push: false in db config)
- * @param key - Unique key for getPayload caching
+ * @param key - Unique key for getPayload caching (prevents instance collisions in tests)
  * @param cron - Whether to enable cron jobs (default: true)
  */
 export async function initializePayloadWithMigrations({
@@ -49,11 +61,10 @@ export async function initializePayloadWithMigrations({
   cron = true,
 }: {
   config: SanitizedConfig
-  key: string
+  key?: string
   cron?: boolean
 }): Promise<Payload> {
-  // Get payload with disableOnInit to avoid ensurePgvectorArtifacts check before migrations
-  const payload = await getPayload({ config, key, cron, disableOnInit: true })
+  const payload = await getPayload({ config, key, cron })
 
   // Create initial migration (Payload's schema)
   await payload.db.createMigration({ migrationName: 'initial', payload })
@@ -64,11 +75,6 @@ export async function initializePayloadWithMigrations({
   // Apply migrations (forceAcceptWarning bypasses the dev mode prompt)
   await (payload.db as any).migrate({ forceAcceptWarning: true })
 
-  // Now run onInit (it's still available on config, not destroyed by disableOnInit)
-  if (payload.config.onInit) {
-    await payload.config.onInit(payload)
-  }
-
   return payload
 }
 
@@ -91,67 +97,6 @@ export function createTestMigrationsDir(dbName: string): {
   }
 }
 
-/**
- * Create pgvector artifacts (extension + IVFFLAT index) for testing.
- * This should be called after migrations are applied but before onInit runs,
- * or used with disableOnInit to manually set up the test environment.
- */
-export const ensureTestPgvectorArtifacts = async ({
-  dbName,
-  tableName = 'default',
-  dims = DEFAULT_DIMS,
-  ivfflatLists = 1,
-}: {
-  dbName: string
-  tableName?: string
-  dims?: number
-  ivfflatLists?: number
-}) => {
-  const client = new Client({
-    connectionString: `postgresql://postgres:password@localhost:5433/${dbName}`,
-  })
-  await client.connect()
-  try {
-    // Ensure pgvector extension exists
-    await client.query('CREATE EXTENSION IF NOT EXISTS vector')
-
-    // Check if table exists (it should be created by Payload's schema init)
-    const tableCheck = await client.query(
-      `SELECT 1 FROM information_schema.tables WHERE table_schema = 'public' AND table_name = $1`,
-      [tableName],
-    )
-    if (tableCheck.rowCount === 0) {
-      // Table doesn't exist yet - this is expected before migrations
-      // We'll skip index creation; it will be handled by migrations
-      return
-    }
-
-    // Check if embedding column exists
-    const columnCheck = await client.query(
-      `SELECT 1 FROM information_schema.columns WHERE table_schema = 'public' AND table_name = $1 AND column_name = 'embedding'`,
-      [tableName],
-    )
-    if (columnCheck.rowCount === 0) {
-      // Column doesn't exist yet - skip index creation
-      return
-    }
-
-    // Create IVFFLAT index if it doesn't exist
-    const indexName = `${tableName}_embedding_ivfflat`
-    const indexCheck = await client.query(
-      `SELECT 1 FROM pg_indexes WHERE schemaname = 'public' AND tablename = $1 AND indexname = $2`,
-      [tableName, indexName],
-    )
-    if (indexCheck.rowCount === 0) {
-      await client.query(
-        `CREATE INDEX "${indexName}" ON "public"."${tableName}" USING ivfflat (embedding vector_cosine_ops) WITH (lists = ${ivfflatLists})`,
-      )
-    }
-  } finally {
-    await client.end()
-  }
-}
-
 async function waitForTasks(
   payload: Payload,
   taskSlugs: string[],
@@ -371,8 +316,7 @@ export async function buildPayloadWithIntegration({
   })
 
   const payloadKey = key ?? `payload-${dbName}-${Date.now()}`
-  // Disable onInit to avoid ensurePgvectorArtifacts check before index exists
-  const payload = await getPayload({ config, key: payloadKey, cron: true, disableOnInit: true })
+  const payload = await getPayload({ config, key: payloadKey, cron: true })
 
   // Create initial migration (Payload's schema)
   await payload.db.createMigration({ migrationName: 'initial', payload })
@@ -383,11 +327,6 @@ export async function buildPayloadWithIntegration({
   // Apply migrations (forceAcceptWarning bypasses the dev mode prompt)
   await (payload.db as any).migrate({ forceAcceptWarning: true })
 
-  // Now run onInit (it's still available on config, not destroyed by disableOnInit)
-  if (payload.config.onInit) {
-    await payload.config.onInit(payload)
-  }
-
   return { payload, config }
 }
 
diff --git a/src/bin/vectorize-migrate.ts b/src/bin/vectorize-migrate.ts
index 6f17e59..d698a3d 100644
--- a/src/bin/vectorize-migrate.ts
+++ b/src/bin/vectorize-migrate.ts
@@ -345,11 +345,11 @@ function patchMigrationFile(
  * Bin script entry point for creating vector migrations
  */
 export const script = async (config: SanitizedConfig): Promise<void> => {
-  // Disable onInit to avoid ensurePgvectorArtifacts check - migrations may not be applied yet
+  // Use a unique key to ensure we get a fresh Payload instance with the correct config
+  // This is important when running in tests or when the config has been modified
   const payload = await getPayload({
     config,
-    disableOnInit: true,
-    key: `vectorize-migrate-payload-instance-${Date.now()}`,
+    key: `vectorize-migrate-${Date.now()}`,
   })
   const vectorizedPayload = getVectorizedPayload(payload)
 
@@ -366,7 +366,17 @@ export const script = async (config: SanitizedConfig): Promise<void> => {
 
   const poolNames = Object.keys(staticConfigs)
   const schemaName = (payload.db as any).schemaName || 'public'
-  const migrationsDir = (payload.db as any).migrationDir || resolve(process.cwd(), 'src/migrations')
+  
+  // Get migrations directory - the postgres adapter stores it on payload.db.migrationDir
+  // but this may be set to default before config is applied. Try multiple sources.
+  const dbMigrationDir = (payload.db as any).migrationDir
+  
+  // Debug: log migration directory detection
+  console.log('[payloadcms-vectorize] Debug: payload.db.migrationDir =', dbMigrationDir)
+  
+  // Use the payload.db.migrationDir - this is where Payload stores the resolved path
+  const migrationsDir = dbMigrationDir || resolve(process.cwd(), 'src/migrations')
+  console.log('[payloadcms-vectorize] Using migrations directory:', migrationsDir)
 
   console.log('[payloadcms-vectorize] Checking for configuration changes...')
 
diff --git a/src/index.ts b/src/index.ts
index 461ff2b..2083da9 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -78,131 +78,6 @@ export type {
 
 export { getVectorizedPayload } from './types.js'
 
-/**
- * Presence-only safety net: checks that pgvector artifacts exist.
- * Does NOT create or modify them - migrations should handle that.
- * This is a runtime check to fail fast if migrations haven't been applied.
- */
-async function ensurePgvectorArtifacts(args: {
-  payload: Payload
-  tableName: string
-  ivfflatLists: number
-}): Promise<void> {
-  const { payload, tableName } = args
-
-  payload.logger.info(
-    `[payloadcms-vectorize] ensurePgvectorArtifacts: Starting verification for table "${tableName}"`,
-  )
-
-  if (!isPostgresPayload(payload)) {
-    throw new Error(
-      '[payloadcms-vectorize] This plugin requires the Postgres adapter. Please configure @payloadcms/db-postgres.',
-    )
-  }
-
-  // Now payload is typed as PostgresPayload
-  const postgresPayload = payload as PostgresPayload
-  const schemaName = postgresPayload.db.schemaName || 'public'
-
-  payload.logger.info(
-    `[payloadcms-vectorize] ensurePgvectorArtifacts: Using schema "${schemaName}" for table "${tableName}"`,
-  )
-
-  const runQuery = async (sql: string, params?: any[]): Promise<any> => {
-    payload.logger.debug(`[payloadcms-vectorize] ensurePgvectorArtifacts: Executing query: ${sql}`)
-    if (postgresPayload.db.pool?.query) {
-      return postgresPayload.db.pool.query(sql, params)
-    }
-    if (postgresPayload.db.drizzle?.execute) {
-      return postgresPayload.db.drizzle.execute(sql)
-    }
-    throw new Error('[payloadcms-vectorize] No database query function available')
-  }
-
-  try {
-    // Check extension exists
-    payload.logger.info(
-      '[payloadcms-vectorize] ensurePgvectorArtifacts: Checking pgvector extension...',
-    )
-    const extensionCheck = await runQuery(`SELECT 1 FROM pg_extension WHERE extname = 'vector'`)
-    const extensionRows = Array.isArray(extensionCheck)
-      ? extensionCheck
-      : extensionCheck?.rows || []
-    if (extensionRows.length === 0) {
-      payload.logger.error(
-        '[payloadcms-vectorize] ensurePgvectorArtifacts: pgvector extension not found',
-      )
-      throw new Error(
-        `[payloadcms-vectorize] pgvector extension not found. Please ensure migrations have been applied or manually create the extension: CREATE EXTENSION IF NOT EXISTS vector;`,
-      )
-    }
-    payload.logger.info('[payloadcms-vectorize] ensurePgvectorArtifacts: pgvector extension found')
-
-    // Check column exists with correct dims
-    payload.logger.info(
-      `[payloadcms-vectorize] ensurePgvectorArtifacts: Checking embedding column in "${schemaName}"."${tableName}"...`,
-    )
-    const columnCheck = await runQuery(
-      `SELECT column_name, udt_name 
-       FROM information_schema.columns 
-       WHERE table_schema = $1 AND table_name = $2 AND column_name = 'embedding'`,
-      [schemaName, tableName],
-    )
-    const columnRows = Array.isArray(columnCheck) ? columnCheck : columnCheck?.rows || []
-    if (columnRows.length === 0) {
-      payload.logger.error(
-        `[payloadcms-vectorize] ensurePgvectorArtifacts: Embedding column not found in "${schemaName}"."${tableName}"`,
-      )
-      throw new Error(
-        `[payloadcms-vectorize] Embedding column not found in table "${schemaName}"."${tableName}". Please ensure migrations have been applied.`,
-      )
-    }
-    payload.logger.info(
-      `[payloadcms-vectorize] ensurePgvectorArtifacts: Embedding column found (type: ${columnRows[0]?.udt_name || 'unknown'})`,
-    )
-
-    // Check index exists (don't verify lists parameter - migrations handle that)
-    const indexName = `${tableName}_embedding_ivfflat`
-    payload.logger.info(
-      `[payloadcms-vectorize] ensurePgvectorArtifacts: Checking IVFFLAT index "${indexName}"...`,
-    )
-    const indexCheck = await runQuery(
-      `SELECT 1 
-       FROM pg_indexes 
-       WHERE schemaname = $1 AND tablename = $2 AND indexname = $3`,
-      [schemaName, tableName, indexName],
-    )
-    const indexRows = Array.isArray(indexCheck) ? indexCheck : indexCheck?.rows || []
-    if (indexRows.length === 0) {
-      payload.logger.error(
-        `[payloadcms-vectorize] ensurePgvectorArtifacts: IVFFLAT index "${indexName}" not found on "${schemaName}"."${tableName}"`,
-      )
-      throw new Error(
-        `[payloadcms-vectorize] IVFFLAT index not found on table "${schemaName}"."${tableName}". Please ensure migrations have been applied.`,
-      )
-    }
-    payload.logger.info(
-      `[payloadcms-vectorize] ensurePgvectorArtifacts: IVFFLAT index "${indexName}" found`,
-    )
-
-    postgresPayload.logger.info(
-      `[payloadcms-vectorize] pgvector artifacts verified for table "${schemaName}"."${tableName}"`,
-    )
-  } catch (err) {
-    payload.logger.error(
-      `[payloadcms-vectorize] ensurePgvectorArtifacts: Error occurred: ${err instanceof Error ? err.message : String(err)}`,
-    )
-    if (err instanceof Error && err.message.includes('[payloadcms-vectorize]')) {
-      throw err
-    }
-    postgresPayload.logger.error(
-      '[payloadcms-vectorize] Failed checking pgvector artifacts',
-      err as Error,
-    )
-    throw new Error(`[payloadcms-vectorize] Failed checking pgvector artifacts: ${err}`)
-  }
-}
-
 // ==================
 // Plugin entry point
 // ==================
@@ -686,51 +561,6 @@ export const createVectorizeIntegration = <TPoolNames extends KnowledgePoolName>
       ]
       console.log('[payloadcms-vectorize] payloadcmsVectorize: Bin script registered')
 
-      console.log('[payloadcms-vectorize] payloadcmsVectorize: Setting up onInit hook...')
-      const incomingOnInit = config.onInit
-      config.onInit = async (payload) => {
-        payload.logger.info(
-          '[payloadcms-vectorize] onInit: Starting pgvector artifacts verification',
-        )
-        try {
-          if (incomingOnInit) {
-            payload.logger.info('[payloadcms-vectorize] onInit: Calling incoming onInit hook')
-            await incomingOnInit(payload)
-            payload.logger.info('[payloadcms-vectorize] onInit: Incoming onInit hook completed')
-          }
-          // Ensure pgvector artifacts for each knowledge pool
-          const poolNames = Object.keys(staticConfigs)
-          payload.logger.info(
-            `[payloadcms-vectorize] onInit: Verifying artifacts for ${poolNames.length} knowledge pool(s): ${poolNames.join(', ')}`,
-          )
-          for (const poolName in staticConfigs) {
-            const staticConfig = staticConfigs[poolName]
-            const tableName = toSnakeCase(poolName)
-            payload.logger.info(
-              `[payloadcms-vectorize] onInit: Verifying artifacts for pool "${poolName}" (table: "${tableName}")`,
-            )
-            // Drizzle converts camelCase collection slugs to snake_case table names
-            await ensurePgvectorArtifacts({
-              payload,
-              tableName,
-              ivfflatLists: staticConfig.ivfflatLists,
-            })
-            payload.logger.info(
-              `[payloadcms-vectorize] onInit: Artifacts verified for pool "${poolName}"`,
-            )
-          }
-          payload.logger.info(
-            '[payloadcms-vectorize] onInit: All pgvector artifacts verified successfully',
-          )
-        } catch (error) {
-          payload.logger.error(
-            `[payloadcms-vectorize] onInit: Error verifying pgvector artifacts: ${error instanceof Error ? error.message : String(error)}`,
-          )
-          throw error
-        }
-      }
-      console.log('[payloadcms-vectorize] payloadcmsVectorize: onInit hook configured')
-
       if (pluginOptions.endpointOverrides?.enabled !== false) {
         console.log(
           '[payloadcms-vectorize] payloadcmsVectorize: Setting up vector search endpoint...',

From cc6d39dbac01dfa3132f44c10b1f5d5bbfba5db3 Mon Sep 17 00:00:00 2001
From: techiejd <62455039+techiejd@users.noreply.github.com>
Date: Sat, 17 Jan 2026 17:55:00 +0700
Subject: [PATCH 47/49] WIP

---
 dev/specs/migrationCli.spec.ts | 276 +++++++++++++++++++++++++++++++++
 src/bin/vectorize-migrate.ts   |  32 +++-
 2 files changed, 302 insertions(+), 6 deletions(-)

diff --git a/dev/specs/migrationCli.spec.ts b/dev/specs/migrationCli.spec.ts
index a77c2b6..97c6ec4 100644
--- a/dev/specs/migrationCli.spec.ts
+++ b/dev/specs/migrationCli.spec.ts
@@ -798,5 +798,281 @@ describe('Migration CLI integration tests', () => {
       expect(rowCount).toBe(0)
       console.log('[TEST] Test 4 completed successfully')
     })
+
+    test('5. Add new knowledgePool: CLI creates migration for new table', async () => {
+      console.log('[TEST] Starting test 5: Add new knowledgePool')
+
+      // Step 1: Create integration with an additional knowledgePool "secondary"
+      const integrationWithSecondary = createVectorizeIntegration({
+        default: {
+          dims: 10, // Keep same dims as test 4
+          ivfflatLists: 20, // Keep same lists as test 4
+        },
+        secondary: {
+          dims: DIMS,
+          ivfflatLists: 5,
+        },
+      })
+
+      cliConfig = await buildConfig({
+        secret: 'test-secret',
+        collections: [
+          {
+            slug: 'posts',
+            fields: [{ name: 'title', type: 'text' }],
+          },
+          {
+            slug: 'articles',
+            fields: [{ name: 'content', type: 'text' }],
+          },
+        ],
+        db: postgresAdapter({
+          extensions: ['vector'],
+          afterSchemaInit: [integrationWithSecondary.afterSchemaInitHook],
+          migrationDir: migrationsDir,
+          push: false,
+          pool: {
+            connectionString: `postgresql://postgres:password@localhost:5433/${cliDbName}`,
+          },
+        }),
+        plugins: [
+          integrationWithSecondary.payloadcmsVectorize({
+            knowledgePools: {
+              default: {
+                collections: {
+                  posts: {
+                    toKnowledgePool: async (doc) => [{ chunk: doc.title || '' }],
+                  },
+                },
+                embeddingConfig: {
+                  version: testEmbeddingVersion,
+                  queryFn: makeDummyEmbedQuery(10),
+                  realTimeIngestionFn: makeDummyEmbedDocs(10),
+                },
+              },
+              secondary: {
+                collections: {
+                  articles: {
+                    toKnowledgePool: async (doc: any) => [{ chunk: doc.content || '' }],
+                  },
+                } as any,
+                embeddingConfig: {
+                  version: testEmbeddingVersion,
+                  queryFn: makeDummyEmbedQuery(DIMS),
+                  realTimeIngestionFn: makeDummyEmbedDocs(DIMS),
+                },
+              },
+            },
+          }),
+        ],
+        jobs: {
+          tasks: [],
+          autoRun: [
+            {
+              cron: '*/5 * * * * *',
+              limit: 10,
+            },
+          ],
+        },
+      })
+
+      // Get new payload instance
+      cliPayload = await getPayload({
+        config: cliConfig,
+        cron: true,
+        key: `migration-cli-test-5-${Date.now()}`,
+      })
+
+      // Step 2: Create migration for new table
+      console.log('[TEST] Step 2: Creating migration for new knowledgePool...')
+      try {
+        await cliPayload.db.createMigration({
+          migrationName: 'add_secondary_pool',
+          payload: cliPayload,
+          forceAcceptWarning: true, // Skip prompts in tests
+        })
+        console.log('[TEST] Step 2.5: Migration created')
+      } catch (e) {
+        console.error('[TEST] Step 2 ERROR - createMigration failed:', e)
+        throw e
+      }
+
+      // Step 3: Run vectorize:migrate to add IVFFLAT index for new pool
+      console.log('[TEST] Step 3: Running vectorize:migrate...')
+      try {
+        await vectorizeMigrateScript(cliConfig)
+        console.log('[TEST] Step 3.5: vectorize:migrate completed')
+      } catch (e) {
+        console.error('[TEST] Step 3 ERROR - vectorize:migrate failed:', e)
+        throw e
+      }
+
+      // Step 4: Verify migration file contains secondary table creation and IVFFLAT index
+      const migrations = readdirSync(migrationsDir)
+        .filter(
+          (f) => (f.endsWith('.ts') || f.endsWith('.js')) && f !== 'index.ts' && f !== 'index.js',
+        )
+        .map((f) => ({
+          name: f,
+          path: join(migrationsDir, f),
+          mtime: statSync(join(migrationsDir, f)).mtime,
+        }))
+        .sort((a, b) => b.mtime.getTime() - a.mtime.getTime())
+
+      const newestMigration = migrations[0]
+      console.log(`[TEST] Step 4: Checking newest migration: ${newestMigration.name}`)
+      const migrationContent = readFileSync(newestMigration.path, 'utf-8')
+
+      // Should contain secondary table creation
+      expect(migrationContent).toContain('secondary')
+      // Should contain IVFFLAT index for secondary pool
+      expect(migrationContent).toContain('secondary_embedding_ivfflat')
+      console.log('[TEST] Step 4.5: Migration file verification passed')
+
+      // Step 5: Apply the migration
+      console.log('[TEST] Step 5: Applying migration...')
+      try {
+        await (cliPayload.db as any).migrate({ forceAcceptWarning: true })
+        console.log('[TEST] Step 5.5: Migration applied')
+      } catch (e) {
+        console.error('[TEST] Step 5 ERROR - migrate failed:', e)
+        throw e
+      }
+
+      // Step 6: Verify new table exists with IVFFLAT index
+      const postgresPayload = cliPayload as PostgresPayload
+      const schemaName = postgresPayload.db.schemaName || 'public'
+
+      // Check table exists
+      const tableCheck = await postgresPayload.db.pool?.query(
+        `SELECT EXISTS (
+          SELECT FROM information_schema.tables 
+          WHERE table_schema = $1 AND table_name = 'secondary'
+        )`,
+        [schemaName],
+      )
+      expect(tableCheck?.rows[0]?.exists).toBe(true)
+      console.log('[TEST] Step 6: Secondary table exists')
+
+      // Check IVFFLAT index exists
+      const indexCheck = await postgresPayload.db.pool?.query(
+        `SELECT indexname FROM pg_indexes WHERE schemaname = $1 AND indexname = $2`,
+        [schemaName, 'secondary_embedding_ivfflat'],
+      )
+      expect(indexCheck?.rows.length).toBeGreaterThan(0)
+      console.log('[TEST] Step 6.5: Secondary IVFFLAT index exists')
+      console.log('[TEST] Test 5 completed successfully')
+    })
+
+    test('6. Remove knowledgePool: Secondary table can be dropped manually', async () => {
+      console.log('[TEST] Starting test 6: Remove knowledgePool')
+
+      // Note: Payload's migration system doesn't automatically generate DROP TABLE 
+      // migrations when collections are removed. Users need to manually drop tables.
+      // This test verifies that after removing a pool, the vectorize plugin handles
+      // it gracefully and the table can be dropped manually.
+
+      // Step 1: Create integration with only 'default' pool (removing 'secondary')
+      const integrationWithoutSecondary = createVectorizeIntegration({
+        default: {
+          dims: 10,
+          ivfflatLists: 20,
+        },
+      })
+
+      cliConfig = await buildConfig({
+        secret: 'test-secret',
+        collections: [
+          {
+            slug: 'posts',
+            fields: [{ name: 'title', type: 'text' }],
+          },
+        ],
+        db: postgresAdapter({
+          extensions: ['vector'],
+          afterSchemaInit: [integrationWithoutSecondary.afterSchemaInitHook],
+          migrationDir: migrationsDir,
+          push: false,
+          pool: {
+            connectionString: `postgresql://postgres:password@localhost:5433/${cliDbName}`,
+          },
+        }),
+        plugins: [
+          integrationWithoutSecondary.payloadcmsVectorize({
+            knowledgePools: {
+              default: {
+                collections: {
+                  posts: {
+                    toKnowledgePool: async (doc) => [{ chunk: doc.title || '' }],
+                  },
+                },
+                embeddingConfig: {
+                  version: testEmbeddingVersion,
+                  queryFn: makeDummyEmbedQuery(10),
+                  realTimeIngestionFn: makeDummyEmbedDocs(10),
+                },
+              },
+            },
+          }),
+        ],
+        jobs: {
+          tasks: [],
+          autoRun: [
+            {
+              cron: '*/5 * * * * *',
+              limit: 10,
+            },
+          ],
+        },
+      })
+
+      // Get new payload instance
+      cliPayload = await getPayload({
+        config: cliConfig,
+        cron: true,
+        key: `migration-cli-test-6-${Date.now()}`,
+      })
+
+      // Step 2: Run vectorize:migrate - should detect no changes for default pool
+      // and not error out because secondary is no longer in config
+      console.log('[TEST] Step 2: Running vectorize:migrate with secondary pool removed...')
+      await vectorizeMigrateScript(cliConfig)
+      console.log('[TEST] Step 2.5: vectorize:migrate completed (no changes expected)')
+
+      // Step 3: Verify secondary table still exists (Payload doesn't auto-drop)
+      const postgresPayload = cliPayload as PostgresPayload
+      const schemaName = postgresPayload.db.schemaName || 'public'
+
+      const tableCheck = await postgresPayload.db.pool?.query(
+        `SELECT EXISTS (
+          SELECT FROM information_schema.tables 
+          WHERE table_schema = $1 AND table_name = 'secondary'
+        )`,
+        [schemaName],
+      )
+      // Table should still exist since Payload doesn't auto-drop tables
+      expect(tableCheck?.rows[0]?.exists).toBe(true)
+      console.log('[TEST] Step 3: Secondary table still exists (as expected - manual drop required)')
+
+      // Step 4: Manually drop the secondary table and its index
+      console.log('[TEST] Step 4: Manually dropping secondary table...')
+      await postgresPayload.db.pool?.query(
+        `DROP INDEX IF EXISTS "${schemaName}"."secondary_embedding_ivfflat"`,
+      )
+      await postgresPayload.db.pool?.query(`DROP TABLE IF EXISTS "${schemaName}"."secondary" CASCADE`)
+      console.log('[TEST] Step 4.5: Secondary table dropped')
+
+      // Step 5: Verify secondary table no longer exists
+      const tableCheckAfter = await postgresPayload.db.pool?.query(
+        `SELECT EXISTS (
+          SELECT FROM information_schema.tables 
+          WHERE table_schema = $1 AND table_name = 'secondary'
+        )`,
+        [schemaName],
+      )
+      expect(tableCheckAfter?.rows[0]?.exists).toBe(false)
+      console.log('[TEST] Step 5: Secondary table no longer exists')
+      console.log('[TEST] Test 6 completed successfully')
+    })
   })
 })
diff --git a/src/bin/vectorize-migrate.ts b/src/bin/vectorize-migrate.ts
index d698a3d..de0c3fc 100644
--- a/src/bin/vectorize-migrate.ts
+++ b/src/bin/vectorize-migrate.ts
@@ -88,14 +88,33 @@ function getPriorStateFromMigrations(
           )
         }
 
-        // Check for dims in vector column definition (search full content as dims should be consistent)
-        const dimsMatch = content.match(new RegExp(`vector\\((\\d+)\\)`, 'i'))
+        // Check for dims in vector column definition
+        // Look for pool-specific patterns to avoid mixing up dims from different pools
+        // Use non-greedy .*? to match table-specific sections
+        const dimsMatch =
+          // ALTER TABLE specific to this table
+          content.match(
+            new RegExp(`ALTER\\s+TABLE[^;]*?"${tableName}"[^;]*?vector\\((\\d+)\\)`, 'is'),
+          ) ||
+          // CREATE TABLE for this table (with non-greedy match to the table content)
+          content.match(
+            new RegExp(`CREATE\\s+TABLE[^;]*?"${tableName}"[^;]*?embedding[^;]*?vector\\((\\d+)\\)`, 'is'),
+          ) ||
+          // Table definition in Drizzle format: "tableName" (...embedding vector(X)...)
+          content.match(
+            new RegExp(`"${tableName}"\\s*\\([^)]*embedding[^)]*vector\\((\\d+)\\)`, 'is'),
+          )
+        
         if (dimsMatch && !state.get(poolName)?.dims) {
           const dims = parseInt(dimsMatch[1], 10)
           const current = state.get(poolName) || { dims: null, ivfflatLists: null }
           state.set(poolName, { ...current, dims })
           console.log(
-            `[payloadcms-vectorize] Found prior dims=${dims} for pool "${poolName}" in ${file.name}`,
+            `[payloadcms-vectorize] Found prior dims=${dims} for pool "${poolName}" (table="${tableName}") in ${file.name}`,
+          )
+        } else if (!state.get(poolName)?.dims) {
+          console.log(
+            `[payloadcms-vectorize] No dims found for pool "${poolName}" (table="${tableName}") in ${file.name}`,
           )
         }
       }
@@ -345,14 +364,14 @@ function patchMigrationFile(
  * Bin script entry point for creating vector migrations
  */
 export const script = async (config: SanitizedConfig): Promise<void> => {
-  // Use a unique key to ensure we get a fresh Payload instance with the correct config
-  // This is important when running in tests or when the config has been modified
+  // Get Payload instance for db operations and to access static configs via VectorizedPayload
   const payload = await getPayload({
     config,
     key: `vectorize-migrate-${Date.now()}`,
   })
-  const vectorizedPayload = getVectorizedPayload(payload)
 
+  // Get static configs from VectorizedPayload
+  const vectorizedPayload = getVectorizedPayload(payload)
   if (!vectorizedPayload) {
     throw new Error(
       '[payloadcms-vectorize] Vectorize plugin not found. Ensure payloadcmsVectorize is configured in your Payload config.',
@@ -513,6 +532,7 @@ export const script = async (config: SanitizedConfig): Promise<void> => {
       await payload.db.createMigration({
         migrationName: 'vectorize-config',
         payload,
+        forceAcceptWarning: true,
       })
       console.log('[payloadcms-vectorize] Migration created successfully')
     } catch (error) {

From c5b7164fb9a151c6af522429f9354a7f1b1c6cd6 Mon Sep 17 00:00:00 2001
From: techiejd <62455039+techiejd@users.noreply.github.com>
Date: Sat, 17 Jan 2026 19:11:05 +0700
Subject: [PATCH 48/49] WIP

---
 src/bin/vectorize-migrate.ts | 146 ++---------------------------------
 src/index.ts                 | 138 +--------------------------------
 2 files changed, 9 insertions(+), 275 deletions(-)

diff --git a/src/bin/vectorize-migrate.ts b/src/bin/vectorize-migrate.ts
index de0c3fc..105a711 100644
--- a/src/bin/vectorize-migrate.ts
+++ b/src/bin/vectorize-migrate.ts
@@ -36,15 +36,12 @@ function getPriorStateFromMigrations(
     }))
     .sort((a, b) => b.mtime.getTime() - a.mtime.getTime())
 
-  console.log(`[payloadcms-vectorize] Found ${migrationFiles.length} migration file(s) to scan for prior state`)
-
   // Read migration files to find vector config
   for (const file of migrationFiles) {
     try {
       const content = readFileSync(file.path, 'utf-8')
       
       // Extract only the UP function content to avoid matching values in DOWN function
-      // The DOWN function contains previous/rollback values which we don't want
       const upFunctionMatch = content.match(
         /export\s+async\s+function\s+up\s*\([^)]*\)[^{]*\{([\s\S]*?)(?=\}\s*(?:export\s+async\s+function\s+down|$))/i,
       )
@@ -55,52 +52,32 @@ function getPriorStateFromMigrations(
         const tableName = toSnakeCase(poolName)
         const indexName = `${tableName}_embedding_ivfflat`
 
-        // Check if this migration creates the index (only in UP function)
-        // The code format is: await db.execute(sql.raw(`CREATE INDEX "indexName" ... WITH (lists = 10)`))
-        // We need to match the lists parameter in the template literal
-        // Use non-greedy .*? to match the FIRST occurrence
         const indexMatch =
-          // Match: db.execute(sql.raw(`...CREATE INDEX..."indexName"...WITH (lists = 10)...`))
           upContent.match(
             new RegExp(
               `db\\.execute\\(sql\\.raw.*?CREATE INDEX.*?"${indexName}".*?WITH\\s*\\(lists\\s*=\\s*(\\d+)\\)`,
               'is',
             ),
           ) ||
-          // Match: CREATE INDEX "indexName" ... WITH (lists = 10) (in any context)
           upContent.match(
             new RegExp(`CREATE INDEX.*?"${indexName}".*?WITH\\s*\\(lists\\s*=\\s*(\\d+)\\)`, 'is'),
           ) ||
-          // Match: lists = <number> near ivfflat (non-greedy)
           upContent.match(new RegExp(`ivfflat.*?lists\\s*=\\s*(\\d+)`, 'is'))
         
         if (indexMatch && !state.get(poolName)?.ivfflatLists) {
           const lists = parseInt(indexMatch[1], 10)
           const current = state.get(poolName) || { dims: null, ivfflatLists: null }
           state.set(poolName, { ...current, ivfflatLists: lists })
-          console.log(
-            `[payloadcms-vectorize] Found prior ivfflatLists=${lists} for pool "${poolName}" in ${file.name}`,
-          )
-        } else if (!state.get(poolName)?.ivfflatLists) {
-          // Debug: log if we didn't find it
-          console.log(
-            `[payloadcms-vectorize] No ivfflatLists found for pool "${poolName}" in ${file.name}`,
-          )
         }
 
-        // Check for dims in vector column definition
-        // Look for pool-specific patterns to avoid mixing up dims from different pools
-        // Use non-greedy .*? to match table-specific sections
+        // Check for dims in vector column definition (pool-specific patterns)
         const dimsMatch =
-          // ALTER TABLE specific to this table
           content.match(
             new RegExp(`ALTER\\s+TABLE[^;]*?"${tableName}"[^;]*?vector\\((\\d+)\\)`, 'is'),
           ) ||
-          // CREATE TABLE for this table (with non-greedy match to the table content)
           content.match(
             new RegExp(`CREATE\\s+TABLE[^;]*?"${tableName}"[^;]*?embedding[^;]*?vector\\((\\d+)\\)`, 'is'),
           ) ||
-          // Table definition in Drizzle format: "tableName" (...embedding vector(X)...)
           content.match(
             new RegExp(`"${tableName}"\\s*\\([^)]*embedding[^)]*vector\\((\\d+)\\)`, 'is'),
           )
@@ -109,13 +86,6 @@ function getPriorStateFromMigrations(
           const dims = parseInt(dimsMatch[1], 10)
           const current = state.get(poolName) || { dims: null, ivfflatLists: null }
           state.set(poolName, { ...current, dims })
-          console.log(
-            `[payloadcms-vectorize] Found prior dims=${dims} for pool "${poolName}" (table="${tableName}") in ${file.name}`,
-          )
-        } else if (!state.get(poolName)?.dims) {
-          console.log(
-            `[payloadcms-vectorize] No dims found for pool "${poolName}" (table="${tableName}") in ${file.name}`,
-          )
         }
       }
     } catch (err) {
@@ -183,9 +153,7 @@ function patchMigrationFile(
   schemaName: string,
   priorState: Map<string, { dims: number | null; ivfflatLists: number | null }>,
 ): void {
-  console.log(`[vectorize-migrate] Reading migration file: ${migrationPath}`)
   const content = readFileSync(migrationPath, 'utf-8')
-  console.log(`[vectorize-migrate] File read successfully, length: ${content.length} characters`)
 
   // Generate SQL code for each pool
   const vectorUpCode: string[] = []
@@ -273,17 +241,6 @@ function patchMigrationFile(
     /export\s+async\s+function\s+up\s*\([^)]*\)\s*:\s*Promise<void>\s*\{/i,
   )
   if (!upFunctionMatch) {
-    console.error(
-      `[vectorize-migrate] Could not find 'up' function in migration file: ${migrationPath}`,
-    )
-    console.error(`[vectorize-migrate] File content length: ${content.length} characters`)
-    console.error(`[vectorize-migrate] File content (first 1000 chars):`)
-    console.error(content.substring(0, 1000))
-    console.error(`[vectorize-migrate] File content (last 1000 chars):`)
-    console.error(content.substring(Math.max(0, content.length - 1000)))
-    console.error(
-      `[vectorize-migrate] Searching for pattern: /export\\s+async\\s+function\\s+up\\s*\\([^)]*\\)\\s*:\\s*Promise<void>\\s*\\{/i`,
-    )
     throw new Error(`Could not find 'up' function in migration file: ${migrationPath}`)
   }
 
@@ -294,9 +251,6 @@ function patchMigrationFile(
   // Find the last closing brace before down function or end
   const upFunctionBody = content.substring(upFunctionStart, searchEnd)
   const lastBraceIndex = upFunctionBody.lastIndexOf('}')
-  console.log(`[vectorize-migrate] up function body length: ${upFunctionBody.length}`)
-  console.log(`[vectorize-migrate] lastBraceIndex in body: ${lastBraceIndex}`)
-  console.log(`[vectorize-migrate] up function body ends with: ${upFunctionBody.substring(Math.max(0, upFunctionBody.length - 200))}`)
   if (lastBraceIndex === -1) {
     throw new Error(
       `Could not find closing brace for 'up' function in migration file: ${migrationPath}`,
@@ -306,21 +260,9 @@ function patchMigrationFile(
   // Insert our code before the closing brace
   const beforeBrace = content.substring(0, upFunctionStart + lastBraceIndex)
   const afterBrace = content.substring(upFunctionStart + lastBraceIndex)
-  console.log(`[vectorize-migrate] Insertion point: beforeBrace ends with: ${beforeBrace.substring(Math.max(0, beforeBrace.length - 100))}`)
-  console.log(`[vectorize-migrate] Insertion point: afterBrace starts with: ${afterBrace.substring(0, 100)}`)
 
   const codeToInsert = '\n' + vectorUpCode.join('\n') + '\n'
-  console.log(`[vectorize-migrate] Inserting ${vectorUpCode.length} line(s) of code into migration`)
-  console.log(`[vectorize-migrate] Code to insert:\n${codeToInsert}`)
   let newContent = beforeBrace + codeToInsert + afterBrace
-  console.log(`[vectorize-migrate] Migration file will be ${newContent.length} characters after patching (was ${content.length})`)
-  
-  // Verify insertion point looks correct
-  const insertionPointPreview = newContent.substring(
-    Math.max(0, beforeBrace.length - 50),
-    Math.min(newContent.length, beforeBrace.length + codeToInsert.length + 50),
-  )
-  console.log(`[vectorize-migrate] Insertion point preview:\n${insertionPointPreview}`)
 
   // Handle down function
   if (downFunctionMatch) {
@@ -349,15 +291,6 @@ function patchMigrationFile(
   }
 
   writeFileSync(migrationPath, newContent, 'utf-8')
-  console.log(`[vectorize-migrate] Migration file written successfully`)
-  // Verify the code was inserted
-  const verifyContent = readFileSync(migrationPath, 'utf-8')
-  const hasIvfflatCode = verifyContent.includes('ivfflat') && verifyContent.includes('lists =')
-  console.log(`[vectorize-migrate] Verification: migration contains IVFFLAT code: ${hasIvfflatCode}`)
-  if (!hasIvfflatCode && vectorUpCode.length > 0) {
-    console.error(`[vectorize-migrate] WARNING: IVFFLAT code was supposed to be inserted but not found in file!`)
-    console.error(`[vectorize-migrate] Expected to find: ${vectorUpCode.join(' | ')}`)
-  }
 }
 
 /**
@@ -386,31 +319,12 @@ export const script = async (config: SanitizedConfig): Promise<void> => {
   const poolNames = Object.keys(staticConfigs)
   const schemaName = (payload.db as any).schemaName || 'public'
   
-  // Get migrations directory - the postgres adapter stores it on payload.db.migrationDir
-  // but this may be set to default before config is applied. Try multiple sources.
+  // Get migrations directory
   const dbMigrationDir = (payload.db as any).migrationDir
-  
-  // Debug: log migration directory detection
-  console.log('[payloadcms-vectorize] Debug: payload.db.migrationDir =', dbMigrationDir)
-  
-  // Use the payload.db.migrationDir - this is where Payload stores the resolved path
   const migrationsDir = dbMigrationDir || resolve(process.cwd(), 'src/migrations')
-  console.log('[payloadcms-vectorize] Using migrations directory:', migrationsDir)
-
-  console.log('[payloadcms-vectorize] Checking for configuration changes...')
 
   // Get prior state from migrations
   const priorState = getPriorStateFromMigrations(migrationsDir, poolNames)
-  
-  // Debug: log prior state
-  console.log('[payloadcms-vectorize] Prior state from migrations:')
-  for (const [poolName, state] of priorState.entries()) {
-    console.log(`[payloadcms-vectorize]   ${poolName}: dims=${state.dims}, ivfflatLists=${state.ivfflatLists}`)
-  }
-  console.log('[payloadcms-vectorize] Current static configs:')
-  for (const [poolName, config] of Object.entries(staticConfigs)) {
-    console.log(`[payloadcms-vectorize]   ${poolName}: dims=${config.dims}, ivfflatLists=${config.ivfflatLists}`)
-  }
 
   // Check if any changes are needed
   let hasChanges = false
@@ -419,13 +333,9 @@ export const script = async (config: SanitizedConfig): Promise<void> => {
     const prior = priorState.get(poolName) || { dims: null, ivfflatLists: null }
     
     // Check if this is the first migration (no IVFFLAT index exists yet)
-    // Note: dims might be found from Drizzle schema, but ivfflatLists won't be found until we create the index
     if (prior.ivfflatLists === null) {
       isFirstMigration = true
       hasChanges = true
-      console.log(
-        `[payloadcms-vectorize] First migration detected for pool "${poolName}" (ivfflatLists not found in prior migrations)`,
-      )
       break
     }
     
@@ -435,40 +345,27 @@ export const script = async (config: SanitizedConfig): Promise<void> => {
       (prior.ivfflatLists !== null && prior.ivfflatLists !== currentConfig.ivfflatLists)
     ) {
       hasChanges = true
-      console.log(
-        `[payloadcms-vectorize] Change detected for pool "${poolName}": dims ${prior.dims}→${currentConfig.dims}, ivfflatLists ${prior.ivfflatLists}→${currentConfig.ivfflatLists}`,
-      )
       break
     }
   }
 
-  // If no changes detected, check if artifacts exist (idempotency)
+  // If no changes detected
   if (!hasChanges) {
     console.log('[payloadcms-vectorize] No configuration changes detected.')
-    console.log(
-      '[payloadcms-vectorize] If this is the first migration, ensure your initial migration creates the embedding columns via Drizzle schema.',
-    )
     return
   }
-
-  console.log('[payloadcms-vectorize] Changes detected.')
   
   // Determine if there are actual schema changes (dims change) or just index parameter changes (ivfflatLists)
-  // payload.db.createMigration only works when there are schema changes
-  // For index-only changes, we need to create the migration file manually
   let hasSchemaChanges = false
   for (const [poolName, currentConfig] of Object.entries(staticConfigs)) {
     const prior = priorState.get(poolName) || { dims: null, ivfflatLists: null }
     if (prior.dims !== null && prior.dims !== currentConfig.dims) {
       hasSchemaChanges = true
-      console.log(`[payloadcms-vectorize] Schema change detected for pool "${poolName}": dims ${prior.dims}→${currentConfig.dims}`)
       break
     }
   }
   
   if (isFirstMigration) {
-    console.log('[payloadcms-vectorize] This is the first migration - checking if we should patch existing migration or create new one')
-    
     // Check if there's a very recent migration file (created in last 10 seconds) that we should patch
     const recentMigrations = existsSync(migrationsDir)
       ? readdirSync(migrationsDir)
@@ -486,24 +383,16 @@ export const script = async (config: SanitizedConfig): Promise<void> => {
     
     if (recentMigrations.length > 0) {
       const recentMigration = recentMigrations[0]
-      console.log(`[payloadcms-vectorize] Found recent migration to patch: ${recentMigration.name}`)
       // Check if it already has IVFFLAT index code
       const recentContent = readFileSync(recentMigration.path, 'utf-8')
       const hasIvfflatCode = recentContent.includes('ivfflat') && (recentContent.includes('drizzle.execute') || recentContent.includes('CREATE INDEX'))
       
       if (!hasIvfflatCode) {
-        console.log(`[payloadcms-vectorize] Patching existing migration: ${recentMigration.path}`)
         patchMigrationFile(recentMigration.path, staticConfigs, schemaName, priorState)
         console.log('[payloadcms-vectorize] Migration patched successfully!')
         return
-      } else {
-        console.log(`[payloadcms-vectorize] Recent migration already has IVFFLAT code, creating new migration instead`)
       }
     }
-    
-    console.log('[payloadcms-vectorize] Creating new migration with IVFFLAT index setup')
-  } else {
-    console.log('[payloadcms-vectorize] Creating new migration for configuration change')
   }
 
   // Create migration using Payload's API OR create manually for index-only changes
@@ -525,20 +414,12 @@ export const script = async (config: SanitizedConfig): Promise<void> => {
 
   // If there are schema changes (dims changed), use Payload's createMigration
   // Otherwise (only ivfflatLists changed), create the migration file manually
-  // because Payload's createMigration hangs when there are no schema changes to detect
   if (hasSchemaChanges) {
-    console.log('[payloadcms-vectorize] Schema changes detected - using payload.db.createMigration...')
-    try {
-      await payload.db.createMigration({
-        migrationName: 'vectorize-config',
-        payload,
-        forceAcceptWarning: true,
-      })
-      console.log('[payloadcms-vectorize] Migration created successfully')
-    } catch (error) {
-      console.error('[payloadcms-vectorize] Error creating migration:', error)
-      throw error
-    }
+    await payload.db.createMigration({
+      migrationName: 'vectorize-config',
+      payload,
+      forceAcceptWarning: true,
+    })
 
     // Find the newest migration file (should be the one just created)
     const migrationsAfter = existsSync(migrationsDir)
@@ -567,10 +448,6 @@ export const script = async (config: SanitizedConfig): Promise<void> => {
     migrationPath = foundPath
   } else {
     // No schema changes (only ivfflatLists changed) - create migration file manually
-    // Payload's createMigration API doesn't support this case (it hangs when no schema changes detected)
-    console.log('[payloadcms-vectorize] No schema changes (only index parameter changes) - creating migration file manually...')
-    
-    // Generate timestamp for migration filename (format: YYYYMMDD_HHMMSS)
     const now = new Date()
     const timestamp = [
       now.getFullYear(),
@@ -585,7 +462,6 @@ export const script = async (config: SanitizedConfig): Promise<void> => {
     const migrationFileName = `${timestamp}_vectorize_ivfflat_rebuild.ts`
     migrationPath = join(migrationsDir, migrationFileName)
     
-    // Create a minimal migration file that we'll patch with our IVFFLAT code
     const migrationTemplate = `import { MigrateUpArgs, MigrateDownArgs, sql } from '@payloadcms/db-postgres'
 
 export async function up({ db, payload, req }: MigrateUpArgs): Promise<void> {
@@ -598,18 +474,12 @@ export async function down({ db, payload, req }: MigrateDownArgs): Promise<void>
 `
     
     writeFileSync(migrationPath, migrationTemplate, 'utf-8')
-    console.log(`[payloadcms-vectorize] Created migration file: ${migrationPath}`)
   }
 
-  console.log(`[payloadcms-vectorize] Patching migration: ${migrationPath}`)
-
   // Patch the migration file
   patchMigrationFile(migrationPath, staticConfigs, schemaName, priorState)
 
   console.log('[payloadcms-vectorize] Migration created and patched successfully!')
-  console.log(
-    '[payloadcms-vectorize] Review the migration file and apply it with: pnpm payload migrate',
-  )
 
   // Only exit if not in test environment (when called from tests, just return)
   if (process.env.NODE_ENV !== 'test' && !process.env.VITEST) {
diff --git a/src/index.ts b/src/index.ts
index 2083da9..bf80c36 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -135,64 +135,28 @@ export const createVectorizeIntegration = <TPoolNames extends KnowledgePoolName>
   const payloadcmsVectorize =
     (pluginOptions: PayloadcmsVectorizeConfig<TPoolNames>) =>
     (config: Config): Config => {
-      console.log('[payloadcms-vectorize] payloadcmsVectorize: Plugin initialization started')
-      console.log(
-        `[payloadcms-vectorize] payloadcmsVectorize: Processing ${Object.keys(pluginOptions.knowledgePools).length} knowledge pool(s)`,
-      )
-
       // Ensure collections array exists
       config.collections = [...(config.collections || [])]
-      console.log(
-        `[payloadcms-vectorize] payloadcmsVectorize: Initial collections count: ${config.collections.length}`,
-      )
 
       // Ensure bulk runs collection exists once
-      console.log('[payloadcms-vectorize] payloadcmsVectorize: Adding bulk runs collection...')
       const bulkRunsCollection = createBulkEmbeddingsRunsCollection()
       if (!config.collections.find((c) => c.slug === BULK_EMBEDDINGS_RUNS_SLUG)) {
         config.collections.push(bulkRunsCollection)
-        console.log('[payloadcms-vectorize] payloadcmsVectorize: Bulk runs collection added')
-      } else {
-        console.log(
-          '[payloadcms-vectorize] payloadcmsVectorize: Bulk runs collection already exists',
-        )
       }
       // Ensure bulk input metadata collection exists once
-      console.log(
-        '[payloadcms-vectorize] payloadcmsVectorize: Adding bulk input metadata collection...',
-      )
       const bulkInputMetadataCollection = createBulkEmbeddingInputMetadataCollection()
       if (!config.collections.find((c) => c.slug === BULK_EMBEDDINGS_INPUT_METADATA_SLUG)) {
         config.collections.push(bulkInputMetadataCollection)
-        console.log(
-          '[payloadcms-vectorize] payloadcmsVectorize: Bulk input metadata collection added',
-        )
-      } else {
-        console.log(
-          '[payloadcms-vectorize] payloadcmsVectorize: Bulk input metadata collection already exists',
-        )
       }
       // Ensure bulk batches collection exists once
-      console.log('[payloadcms-vectorize] payloadcmsVectorize: Adding bulk batches collection...')
       const bulkBatchesCollection = createBulkEmbeddingsBatchesCollection()
       if (!config.collections.find((c) => c.slug === BULK_EMBEDDINGS_BATCHES_SLUG)) {
         config.collections.push(bulkBatchesCollection)
-        console.log('[payloadcms-vectorize] payloadcmsVectorize: Bulk batches collection added')
-      } else {
-        console.log(
-          '[payloadcms-vectorize] payloadcmsVectorize: Bulk batches collection already exists',
-        )
       }
 
       // Validate static/dynamic configs share the same pool names
-      console.log(
-        '[payloadcms-vectorize] payloadcmsVectorize: Validating static/dynamic config alignment...',
-      )
       for (const poolName in pluginOptions.knowledgePools) {
         if (!staticConfigs[poolName]) {
-          console.error(
-            `[payloadcms-vectorize] payloadcmsVectorize: Knowledge pool "${poolName}" not found in static configs`,
-          )
           throw new Error(
             `[payloadcms-vectorize] Knowledge pool "${poolName}" not found in static configs`,
           )
@@ -206,16 +170,10 @@ export const createVectorizeIntegration = <TPoolNames extends KnowledgePoolName>
         }
       }
       if (unusedStaticPools.length > 0) {
-        console.error(
-          `[payloadcms-vectorize] payloadcmsVectorize: Static pools without dynamic config: ${unusedStaticPools.join(', ')}`,
-        )
         throw new Error(
           `[payloadcms-vectorize] Static knowledge pool(s) ${unusedStaticPools.join(', ')} lack dynamic configuration`,
         )
       }
-      console.log(
-        '[payloadcms-vectorize] payloadcmsVectorize: Static/dynamic config validation passed',
-      )
 
       // Build reverse mapping: collectionSlug -> KnowledgePoolName[]
       const collectionToPools = new Map<
@@ -227,124 +185,73 @@ export const createVectorizeIntegration = <TPoolNames extends KnowledgePoolName>
       >()
 
       // Process each knowledge pool
-      console.log('[payloadcms-vectorize] payloadcmsVectorize: Processing knowledge pools...')
       for (const poolName in pluginOptions.knowledgePools) {
-        console.log(`[payloadcms-vectorize] payloadcmsVectorize: Processing pool "${poolName}"...`)
         const dynamicConfig = pluginOptions.knowledgePools[poolName]
 
         // Add the embeddings collection for this knowledge pool with extensionFields
-        console.log(
-          `[payloadcms-vectorize] payloadcmsVectorize: Creating embeddings collection for pool "${poolName}"...`,
-        )
         const embeddingsCollection = createEmbeddingsCollection(
           poolName,
           dynamicConfig.extensionFields,
         )
         if (!config.collections.find((c) => c.slug === poolName)) {
           config.collections.push(embeddingsCollection)
-          console.log(
-            `[payloadcms-vectorize] payloadcmsVectorize: Embeddings collection "${poolName}" added`,
-          )
-        } else {
-          console.log(
-            `[payloadcms-vectorize] payloadcmsVectorize: Embeddings collection "${poolName}" already exists`,
-          )
         }
 
         // Build reverse mapping for hooks
         const collectionSlugs = Object.keys(dynamicConfig.collections)
-        console.log(
-          `[payloadcms-vectorize] payloadcmsVectorize: Pool "${poolName}" maps to ${collectionSlugs.length} collection(s): ${collectionSlugs.join(', ')}`,
-        )
         for (const collectionSlug of collectionSlugs) {
           if (!collectionToPools.has(collectionSlug)) {
             collectionToPools.set(collectionSlug, [])
           }
           collectionToPools.get(collectionSlug)!.push({ pool: poolName, dynamic: dynamicConfig })
         }
-        console.log(
-          `[payloadcms-vectorize] payloadcmsVectorize: Pool "${poolName}" processing complete`,
-        )
       }
-      console.log(
-        `[payloadcms-vectorize] payloadcmsVectorize: Knowledge pools processed. Total collections: ${config.collections.length}`,
-      )
 
       // Validate bulk queue requirements
-      console.log(
-        '[payloadcms-vectorize] payloadcmsVectorize: Validating bulk queue requirements...',
-      )
       let bulkIngestEnabled = false
       for (const poolName in pluginOptions.knowledgePools) {
         const dynamicConfig = pluginOptions.knowledgePools[poolName]
         if (dynamicConfig.embeddingConfig.bulkEmbeddingsFns) {
           bulkIngestEnabled = true
-          console.log(
-            `[payloadcms-vectorize] payloadcmsVectorize: Pool "${poolName}" has bulk embedding enabled`,
-          )
           break
         }
       }
       if (bulkIngestEnabled && !pluginOptions.bulkQueueNames) {
-        console.error(
-          '[payloadcms-vectorize] payloadcmsVectorize: bulkQueueNames required but not provided',
-        )
         throw new Error(
           '[payloadcms-vectorize] bulkQueueNames is required when any knowledge pool has bulk embedding configured (embeddingConfig.bulkEmbeddingsFns).',
         )
       }
-      console.log(
-        `[payloadcms-vectorize] payloadcmsVectorize: Bulk queue validation passed (enabled: ${bulkIngestEnabled})`,
-      )
 
       // Exit early if disabled, but keep embeddings collections present for migrations
       if (pluginOptions.disabled) {
-        console.log('[payloadcms-vectorize] payloadcmsVectorize: Plugin disabled, exiting early')
         return config
       }
 
-      // Register a single task using Payload Jobs that can handle any knowledge pool
-      console.log('[payloadcms-vectorize] payloadcmsVectorize: Registering Payload Jobs tasks...')
+      // Register tasks using Payload Jobs
       const incomingJobs = config.jobs || { tasks: [] }
       const tasks = [...(config.jobs?.tasks || [])]
-      console.log(
-        `[payloadcms-vectorize] payloadcmsVectorize: Existing tasks count: ${tasks.length}`,
-      )
 
-      console.log('[payloadcms-vectorize] payloadcmsVectorize: Creating vectorize task...')
       const vectorizeTask = createVectorizeTask({
         knowledgePools: pluginOptions.knowledgePools,
       })
       tasks.push(vectorizeTask)
-      console.log('[payloadcms-vectorize] payloadcmsVectorize: Vectorize task added')
 
-      console.log('[payloadcms-vectorize] payloadcmsVectorize: Creating prepare bulk embed task...')
       const prepareBulkEmbedTask = createPrepareBulkEmbeddingTask({
         knowledgePools: pluginOptions.knowledgePools,
         pollOrCompleteQueueName: pluginOptions.bulkQueueNames?.pollOrCompleteQueueName,
       })
       tasks.push(prepareBulkEmbedTask)
-      console.log('[payloadcms-vectorize] payloadcmsVectorize: Prepare bulk embed task added')
 
-      console.log(
-        '[payloadcms-vectorize] payloadcmsVectorize: Creating poll or complete bulk embed task...',
-      )
       const pollOrCompleteBulkEmbedTask = createPollOrCompleteBulkEmbeddingTask({
         knowledgePools: pluginOptions.knowledgePools,
         pollOrCompleteQueueName: pluginOptions.bulkQueueNames?.pollOrCompleteQueueName,
       })
       tasks.push(pollOrCompleteBulkEmbedTask)
-      console.log(
-        '[payloadcms-vectorize] payloadcmsVectorize: Poll or complete bulk embed task added',
-      )
 
       config.jobs = {
         ...incomingJobs,
         tasks,
       }
-      console.log(
-        `[payloadcms-vectorize] payloadcmsVectorize: Jobs configured. Total tasks: ${tasks.length}`,
-      )
 
       const collectionToEmbedQueue = new Map<
         string,
@@ -352,23 +259,11 @@ export const createVectorizeIntegration = <TPoolNames extends KnowledgePoolName>
       >()
 
       // Extend configured collections with hooks
-      console.log(
-        `[payloadcms-vectorize] payloadcmsVectorize: Setting up hooks for ${collectionToPools.size} collection(s)...`,
-      )
       for (const [collectionSlug, pools] of collectionToPools.entries()) {
-        console.log(
-          `[payloadcms-vectorize] payloadcmsVectorize: Setting up hooks for collection "${collectionSlug}" (${pools.length} pool(s))...`,
-        )
         const collection = config.collections.find((c) => c.slug === collectionSlug)
         if (!collection) {
-          console.error(
-            `[payloadcms-vectorize] payloadcmsVectorize: Collection "${collectionSlug}" not found`,
-          )
           throw new Error(`[payloadcms-vectorize] Collection ${collectionSlug} not found`)
         }
-        console.log(
-          `[payloadcms-vectorize] payloadcmsVectorize: Collection "${collectionSlug}" found, adding hooks...`,
-        )
 
         const embedQueue = async (doc: any, payload: Payload, req?: PayloadRequest) => {
           // Queue vectorization jobs for ALL knowledge pools containing this collection
@@ -397,9 +292,6 @@ export const createVectorizeIntegration = <TPoolNames extends KnowledgePoolName>
         }
 
         collectionToEmbedQueue.set(collectionSlug, embedQueue)
-        console.log(
-          `[payloadcms-vectorize] payloadcmsVectorize: Embed queue function registered for "${collectionSlug}"`,
-        )
 
         collection.hooks = {
           ...(collection.hooks || {}),
@@ -457,20 +349,11 @@ export const createVectorizeIntegration = <TPoolNames extends KnowledgePoolName>
             },
           ],
         }
-        console.log(
-          `[payloadcms-vectorize] payloadcmsVectorize: Hooks configured for collection "${collectionSlug}"`,
-        )
       }
-      console.log('[payloadcms-vectorize] payloadcmsVectorize: All collection hooks configured')
 
-      console.log('[payloadcms-vectorize] payloadcmsVectorize: Creating vector search handlers...')
       const vectorSearchHandlers = createVectorSearchHandlers(pluginOptions.knowledgePools)
-      console.log('[payloadcms-vectorize] payloadcmsVectorize: Vector search handlers created')
 
       // Create vectorized payload object factory that creates methods bound to a payload instance
-      console.log(
-        '[payloadcms-vectorize] payloadcmsVectorize: Creating vectorized payload object factory...',
-      )
       const createVectorizedPayloadObject = (payload: Payload): VectorizedPayload<TPoolNames> => {
         return {
           _isBulkEmbedEnabled: (knowledgePool: TPoolNames): boolean => {
@@ -537,21 +420,15 @@ export const createVectorizeIntegration = <TPoolNames extends KnowledgePoolName>
       }
 
       // Store factory in config.custom
-      console.log(
-        '[payloadcms-vectorize] payloadcmsVectorize: Storing vectorized payload factory in config.custom...',
-      )
       config.custom = {
         ...(config.custom || {}),
         createVectorizedPayloadObject,
       }
-      console.log('[payloadcms-vectorize] payloadcmsVectorize: Factory stored in config.custom')
 
       // Register bin script for migration helper
-      console.log('[payloadcms-vectorize] payloadcmsVectorize: Registering bin script...')
       const __filename = fileURLToPath(import.meta.url)
       const __dirname = dirname(__filename)
       const binScriptPath = resolve(__dirname, 'bin/vectorize-migrate.js')
-      console.log(`[payloadcms-vectorize] payloadcmsVectorize: Bin script path: ${binScriptPath}`)
       config.bin = [
         ...(config.bin || []),
         {
@@ -559,12 +436,8 @@ export const createVectorizeIntegration = <TPoolNames extends KnowledgePoolName>
           scriptPath: binScriptPath,
         },
       ]
-      console.log('[payloadcms-vectorize] payloadcmsVectorize: Bin script registered')
 
       if (pluginOptions.endpointOverrides?.enabled !== false) {
-        console.log(
-          '[payloadcms-vectorize] payloadcmsVectorize: Setting up vector search endpoint...',
-        )
         const path = pluginOptions.endpointOverrides?.path || '/vector-search'
         const inputEndpoints = config.endpoints || []
         const endpoints = [
@@ -592,17 +465,8 @@ export const createVectorizeIntegration = <TPoolNames extends KnowledgePoolName>
           },
         ]
         config.endpoints = endpoints
-        console.log(
-          `[payloadcms-vectorize] payloadcmsVectorize: Vector search endpoint registered at "${path}"`,
-        )
-      } else {
-        console.log('[payloadcms-vectorize] payloadcmsVectorize: Vector search endpoint disabled')
       }
 
-      console.log('[payloadcms-vectorize] payloadcmsVectorize: Plugin initialization complete')
-      console.log(
-        `[payloadcms-vectorize] payloadcmsVectorize: Final collections count: ${config.collections.length}`,
-      )
       return config
     }
   return {

From 35db4c0f74fba533fb06538feaf2e49d196adc7b Mon Sep 17 00:00:00 2001
From: techiejd <62455039+techiejd@users.noreply.github.com>
Date: Sat, 17 Jan 2026 21:19:05 +0700
Subject: [PATCH 49/49] WIP

---
 .gitignore                   | 1 +
 dev/.env.test                | 3 ++-
 src/bin/vectorize-migrate.ts | 3 ++-
 3 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/.gitignore b/.gitignore
index 3d5eadc..d2757c1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -11,6 +11,7 @@
 
 # testing
 /coverage
+/dev/test-migrations-*
 
 # next.js
 .next/
diff --git a/dev/.env.test b/dev/.env.test
index 7337099..8e37b53 100644
--- a/dev/.env.test
+++ b/dev/.env.test
@@ -1,2 +1,3 @@
 DIMS=8
-IVFFLATLISTS=1
\ No newline at end of file
+IVFFLATLISTS=1
+TEST_ENV=1
\ No newline at end of file
diff --git a/src/bin/vectorize-migrate.ts b/src/bin/vectorize-migrate.ts
index 105a711..1c3631e 100644
--- a/src/bin/vectorize-migrate.ts
+++ b/src/bin/vectorize-migrate.ts
@@ -300,7 +300,8 @@ export const script = async (config: SanitizedConfig): Promise<void> => {
   // Get Payload instance for db operations and to access static configs via VectorizedPayload
   const payload = await getPayload({
     config,
-    key: `vectorize-migrate-${Date.now()}`,
+    // In test environment, use unique key and enable cron for job processing
+    ...(process.env.TEST_ENV ? { key: `vectorize-migrate-${Date.now()}`, cron: true } : {}),
   })
 
   // Get static configs from VectorizedPayload