diff --git a/.gitignore b/.gitignore index 3d5eadc..d2757c1 100644 --- a/.gitignore +++ b/.gitignore @@ -11,6 +11,7 @@ # testing /coverage +/dev/test-migrations-* # next.js .next/ diff --git a/README.md b/README.md index a335339..6865fed 100644 --- a/README.md +++ b/README.md @@ -158,7 +158,41 @@ export default buildConfig({ The import map tells Payload how to resolve component paths (like `'payloadcms-vectorize/client#EmbedAllButton'`) to actual React components. Without it, client components referenced in your collection configs won't render. -### 2. Search Your Content +**⚠️ Important:** Run this command: + +- After initial plugin setup +- If the "Embed all" button doesn't appear in the admin UI + +The import map tells Payload how to resolve component paths (like `'payloadcms-vectorize/client#EmbedAllButton'`) to actual React components. Without it, client components referenced in your collection configs won't render. + +### 2. Initial Migration Setup + +After configuring the plugin, you need to create an initial migration to set up the IVFFLAT indexes in your database. + +**For new setups:** + +1. Create your initial Payload migration (this will include the embedding columns via Drizzle schema): + + ```bash + pnpm payload migrate:create --name initial + ``` + +2. Use the migration CLI helper to add IVFFLAT index setup: + + ```bash + pnpm payload vectorize:migrate + ``` + + The CLI automatically extracts your static configs from the Payload config and patches the migration file with the necessary IVFFLAT index creation SQL. + +3. Review and apply the migration: + ```bash + pnpm payload migrate + ``` + +**Note:** The embedding columns are created automatically by Drizzle via the `afterSchemaInitHook`, but the IVFFLAT indexes need to be added via migrations for proper schema management. + +### 3. Search Your Content The plugin automatically creates a `/api/vector-search` endpoint: @@ -419,6 +453,67 @@ jobs: { } ``` +## Changing Static Config (ivfflatLists or dims) & Migrations + +**⚠️ Important:** Changing `dims` is **destructive** - it requires re-embedding all your data. Changing `ivfflatLists` rebuilds the index (non-destructive but may take time). + +When you change static config values (`dims` or `ivfflatLists`): + +1. **Update your static config** in `payload.config.ts`: + + ```typescript + const { afterSchemaInitHook, payloadcmsVectorize } = createVectorizeIntegration({ + mainKnowledgePool: { + dims: 1536, // Changed from previous value + ivfflatLists: 200, // Changed from previous value + }, + }) + ``` + +2. **Create a migration** using the CLI helper: + + ```bash + pnpm payload vectorize:migrate + ``` + + The CLI will: + - Detect changes in your static configs + - Create a new Payload migration using `payload.db.createMigration` + - Patch it with appropriate SQL: + - **If `ivfflatLists` changed**: Rebuilds the IVFFLAT index with the new `lists` parameter (DROP + CREATE INDEX) + - **If `dims` changed**: Truncates the embeddings table (destructive - you'll need to re-embed) + +3. **Review the migration file** in `src/migrations/` - it will be named something like `*_vectorize-config.ts` + +4. **Apply the migration**: + + ```bash + pnpm payload migrate + ``` + +5. **If `dims` changed**: Re-embed all your documents using the bulk embed feature. + +**Schema name qualification:** + +The CLI automatically uses the `schemaName` from your Postgres adapter configuration. If you use a custom schema (e.g., `postgresAdapter({ schemaName: 'custom' })`), all SQL in the migration will be properly qualified with that schema name. + +**Idempotency:** + +Running `pnpm payload vectorize:migrate` multiple times with no config changes will not create duplicate migrations. The CLI detects when no changes are needed and exits early. + +**Development workflow:** + +During development, you may want to disable Payload's automatic schema push to ensure migrations are used: + +- Set `migrations: { disableAutomaticMigrations: true }` in your Payload config, or +- Avoid using `pnpm payload migrate:status --force` which auto-generates migrations + +This ensures your vector-specific migrations are properly applied. + +**Runtime behavior:** + +The `ensurePgvectorArtifacts` function is now **presence-only** - it checks that pgvector artifacts (extension, column, index) exist but does not create or modify them. If artifacts are missing, it throws descriptive errors prompting you to run migrations. This ensures migrations are the single source of truth for schema changes. + ### Endpoints #### POST `/api/vector-bulk-embed` @@ -921,7 +1016,6 @@ Thank you for the stars! The following updates have been completed: The following features are planned for future releases based on community interest and stars: -- **Migrations for vector dimensions**: Easy migration tools for changing vector dimensions and/or ivfflatLists after initial setup - **MongoDB support**: Extend vector search capabilities to MongoDB databases - **Vercel support**: Optimized deployment and configuration for Vercel hosting diff --git a/dev/.env.test b/dev/.env.test index 7337099..8e37b53 100644 --- a/dev/.env.test +++ b/dev/.env.test @@ -1,2 +1,3 @@ DIMS=8 -IVFFLATLISTS=1 \ No newline at end of file +IVFFLATLISTS=1 +TEST_ENV=1 \ No newline at end of file diff --git a/dev/specs/chunkers.spec.ts b/dev/specs/chunkers.spec.ts index 454d1ea..aef7387 100644 --- a/dev/specs/chunkers.spec.ts +++ b/dev/specs/chunkers.spec.ts @@ -1,9 +1,8 @@ -import { getPayload } from 'payload' -import { beforeAll, describe, expect, test } from 'vitest' +import { describe, expect, test } from 'vitest' import { chunkText, chunkRichText } from 'helpers/chunkers.js' import { postgresAdapter } from '@payloadcms/db-postgres' import { buildDummyConfig, getInitialMarkdownContent, integration } from './constants.js' -import { createTestDb } from './utils.js' +import { createTestDb, initializePayloadWithMigrations, createTestMigrationsDir } from './utils.js' describe('Chunkers', () => { test('textChunker', () => { @@ -17,20 +16,27 @@ describe('Chunkers', () => { }) test('richTextChunker splits by H2', async () => { - beforeAll(async () => { - createTestDb({ dbName: 'chunkers_test' }) - }) + const dbName = 'chunkers_test' + await createTestDb({ dbName }) + const { migrationsDir } = createTestMigrationsDir(dbName) + const cfg = await buildDummyConfig({ db: postgresAdapter({ extensions: ['vector'], afterSchemaInit: [integration.afterSchemaInitHook], + migrationDir: migrationsDir, + push: false, pool: { - connectionString: 'postgresql://postgres:password@localhost:5433/chunkers_test', + connectionString: `postgresql://postgres:password@localhost:5433/${dbName}`, }, }), }) const markdownContent = await getInitialMarkdownContent(cfg) - const thisPayload = await getPayload({ config: cfg }) + + const thisPayload = await initializePayloadWithMigrations({ + config: cfg, + key: `chunkers-test-${Date.now()}`, + }) const chunks = await chunkRichText(markdownContent, thisPayload) expect(chunks.length).toBe(3) diff --git a/dev/specs/extensionFields.spec.ts b/dev/specs/extensionFields.spec.ts index 56ee27a..6fe4f7f 100644 --- a/dev/specs/extensionFields.spec.ts +++ b/dev/specs/extensionFields.spec.ts @@ -1,9 +1,13 @@ import type { Payload } from 'payload' -import { getPayload } from 'payload' import { beforeAll, describe, expect, test } from 'vitest' import { postgresAdapter } from '@payloadcms/db-postgres' import { buildDummyConfig, integration, plugin } from './constants.js' -import { createTestDb, waitForVectorizationJobs } from './utils.js' +import { + createTestDb, + waitForVectorizationJobs, + initializePayloadWithMigrations, + createTestMigrationsDir, +} from './utils.js' import { PostgresPayload } from '../../src/types.js' import { chunkText, chunkRichText } from 'helpers/chunkers.js' import { makeDummyEmbedDocs, makeDummyEmbedQuery, testEmbeddingVersion } from 'helpers/embed.js' @@ -15,6 +19,8 @@ describe('Extension fields integration tests', () => { beforeAll(async () => { await createTestDb({ dbName }) + const { migrationsDir } = createTestMigrationsDir(dbName) + const config = await buildDummyConfig({ jobs: { tasks: [], @@ -39,6 +45,8 @@ describe('Extension fields integration tests', () => { db: postgresAdapter({ extensions: ['vector'], afterSchemaInit: [integration.afterSchemaInitHook], + migrationDir: migrationsDir, + push: false, pool: { connectionString: `postgresql://postgres:password@localhost:5433/${dbName}`, }, @@ -104,7 +112,12 @@ describe('Extension fields integration tests', () => { }), ], }) - payload = await getPayload({ config, cron: true }) + + payload = await initializePayloadWithMigrations({ + config, + key: `extension-fields-test-${Date.now()}`, + cron: true, + }) }) test('extension fields are added to the embeddings table schema', async () => { diff --git a/dev/specs/extensionFieldsVectorSearch.spec.ts b/dev/specs/extensionFieldsVectorSearch.spec.ts index 1f81419..94b136f 100644 --- a/dev/specs/extensionFieldsVectorSearch.spec.ts +++ b/dev/specs/extensionFieldsVectorSearch.spec.ts @@ -1,8 +1,12 @@ -import { getPayload } from 'payload' import { describe, expect, test } from 'vitest' import { makeDummyEmbedDocs, makeDummyEmbedQuery, testEmbeddingVersion } from 'helpers/embed.js' import { buildDummyConfig, DIMS, integration, plugin } from './constants.js' -import { createTestDb, waitForVectorizationJobs } from './utils.js' +import { + createTestDb, + waitForVectorizationJobs, + initializePayloadWithMigrations, + createTestMigrationsDir, +} from './utils.js' import { postgresAdapter } from '@payloadcms/db-postgres' import { chunkRichText, chunkText } from 'helpers/chunkers.js' import { createVectorSearchHandlers } from '../../src/endpoints/vectorSearch.js' @@ -11,7 +15,9 @@ import type { KnowledgePoolDynamicConfig } from 'payloadcms-vectorize' describe('extensionFields', () => { test('returns extensionFields in search results with correct types', async () => { // Create a new payload instance with extensionFields - await createTestDb({ dbName: 'endpoint_test_extension' }) + const dbName = 'endpoint_test_extension' + await createTestDb({ dbName }) + const { migrationsDir } = createTestMigrationsDir(dbName) const defaultKnowledgePool: KnowledgePoolDynamicConfig = { collections: { posts: { @@ -89,8 +95,10 @@ describe('extensionFields', () => { db: postgresAdapter({ extensions: ['vector'], afterSchemaInit: [integration.afterSchemaInitHook], + migrationDir: migrationsDir, + push: false, pool: { - connectionString: 'postgresql://postgres:password@localhost:5433/endpoint_test_extension', + connectionString: `postgresql://postgres:password@localhost:5433/${dbName}`, }, }), plugins: [ @@ -101,7 +109,12 @@ describe('extensionFields', () => { }), ], }) - const payloadWithExtensions = await getPayload({ config: configWithExtensions, cron: true }) + + const payloadWithExtensions = await initializePayloadWithMigrations({ + config: configWithExtensions, + key: `extension-fields-vector-search-test-${Date.now()}`, + cron: true, + }) // Create a post with extension field values const testQuery = 'Extension fields test content' diff --git a/dev/specs/failedValidation.spec.ts b/dev/specs/failedValidation.spec.ts index 79ef30e..8520a84 100644 --- a/dev/specs/failedValidation.spec.ts +++ b/dev/specs/failedValidation.spec.ts @@ -1,12 +1,17 @@ import { postgresAdapter } from '@payloadcms/db-postgres' import { buildConfig } from 'payload' -import { getPayload } from 'payload' import { describe, expect, test } from 'vitest' import { createVectorizeIntegration } from '../../src/index.js' -import { createTestDb, waitForVectorizationJobs } from './utils.js' +import { + createTestDb, + waitForVectorizationJobs, + initializePayloadWithMigrations, + createTestMigrationsDir, +} from './utils.js' const DIMS = 8 +const dbName = 'failed_validation_test' const embedDocs = async (texts: string[]) => texts.map(() => Array(DIMS).fill(0)) const embedQuery = async (_text: string) => Array(DIMS).fill(0) @@ -18,8 +23,7 @@ const { afterSchemaInitHook, payloadcmsVectorize } = createVectorizeIntegration( }, }) -const buildMalformedConfig = async () => { - await createTestDb({ dbName: 'failed_validation_test' }) +const buildMalformedConfig = async (migrationsDir: string) => { return buildConfig({ jobs: { tasks: [], @@ -39,10 +43,12 @@ const buildMalformedConfig = async () => { db: postgresAdapter({ extensions: ['vector'], afterSchemaInit: [afterSchemaInitHook], + migrationDir: migrationsDir, + push: false, pool: { connectionString: process.env.DATABASE_URI || - 'postgresql://postgres:password@localhost:5433/failed_validation_test', + `postgresql://postgres:password@localhost:5433/${dbName}`, }, }), plugins: [ @@ -70,8 +76,15 @@ const buildMalformedConfig = async () => { describe('Validation failures mark jobs as errored', () => { test('malformed chunk entry fails the vectorize job', async () => { - const config = await buildMalformedConfig() - const payload = await getPayload({ config, cron: true }) + await createTestDb({ dbName }) + const { migrationsDir } = createTestMigrationsDir(dbName) + + const config = await buildMalformedConfig(migrationsDir) + const payload = await initializePayloadWithMigrations({ + config, + key: `failed-validation-test-${Date.now()}`, + cron: true, + }) await payload.create({ collection: 'posts', diff --git a/dev/specs/int.spec.ts b/dev/specs/int.spec.ts index bea7dab..cf4b657 100644 --- a/dev/specs/int.spec.ts +++ b/dev/specs/int.spec.ts @@ -14,9 +14,14 @@ import { $createHeadingNode } from '@payloadcms/richtext-lexical/lexical/rich-te import { PostgresPayload } from '../../src/types.js' import { editorConfigFactory, getEnabledNodes, lexicalEditor } from '@payloadcms/richtext-lexical' import { DIMS, getInitialMarkdownContent } from './constants.js' -import { createTestDb, waitForVectorizationJobs } from './utils.js' +import { + createTestDb, + waitForVectorizationJobs, + initializePayloadWithMigrations, + createTestMigrationsDir, +} from './utils.js' import { postgresAdapter } from '@payloadcms/db-postgres' -import { buildConfig, getPayload } from 'payload' +import { buildConfig } from 'payload' import { createVectorizeIntegration } from 'payloadcms-vectorize' const embedFn = makeDummyEmbedDocs(DIMS) @@ -32,6 +37,8 @@ describe('Plugin integration tests', () => { beforeAll(async () => { await createTestDb({ dbName }) + const { migrationsDir } = createTestMigrationsDir(dbName) + // Create isolated integration for this test suite const integration = createVectorizeIntegration({ default: { @@ -55,6 +62,8 @@ describe('Plugin integration tests', () => { db: postgresAdapter({ extensions: ['vector'], afterSchemaInit: [integration.afterSchemaInitHook], + migrationDir: migrationsDir, + push: false, // Prevent dev mode schema push - use migrations only pool: { connectionString: `postgresql://postgres:password@localhost:5433/${dbName}`, }, @@ -99,7 +108,13 @@ describe('Plugin integration tests', () => { }, }) - payload = await getPayload({ config, key: `int-test-${Date.now()}`, cron: true }) + // Initialize Payload with migrations + payload = await initializePayloadWithMigrations({ + config, + key: `int-test-${Date.now()}`, + cron: true, + }) + markdownContent = await getInitialMarkdownContent(config) }) diff --git a/dev/specs/migrationCli.spec.ts b/dev/specs/migrationCli.spec.ts new file mode 100644 index 0000000..97c6ec4 --- /dev/null +++ b/dev/specs/migrationCli.spec.ts @@ -0,0 +1,1078 @@ +import type { Payload, SanitizedConfig } from 'payload' +import { beforeAll, describe, expect, test, afterAll } from 'vitest' +import { postgresAdapter } from '@payloadcms/db-postgres' +import { buildConfig, getPayload } from 'payload' +import { createVectorizeIntegration } from 'payloadcms-vectorize' +import { makeDummyEmbedDocs, makeDummyEmbedQuery, testEmbeddingVersion } from '../helpers/embed.js' +import { createTestDb } from './utils.js' +import { DIMS } from './constants.js' +import type { PostgresPayload } from '../../src/types.js' +import { script as vectorizeMigrateScript } from '../../src/bin/vectorize-migrate.js' +import { readdirSync, statSync, existsSync, readFileSync, rmSync } from 'fs' +import { join, resolve } from 'path' + +describe('Migration CLI integration tests', () => { + describe('VectorizedPayload access', () => { + let payload: Payload + const dbName = `migration_cli_test_${Date.now()}` + + beforeAll(async () => { + await createTestDb({ dbName }) + + const integration = createVectorizeIntegration({ + default: { + dims: DIMS, + ivfflatLists: 10, + }, + }) + + const config = await buildConfig({ + secret: 'test-secret', + collections: [ + { + slug: 'posts', + fields: [{ name: 'title', type: 'text' }], + }, + ], + db: postgresAdapter({ + extensions: ['vector'], + afterSchemaInit: [integration.afterSchemaInitHook], + pool: { + connectionString: `postgresql://postgres:password@localhost:5433/${dbName}`, + }, + }), + plugins: [ + integration.payloadcmsVectorize({ + knowledgePools: { + default: { + collections: { + posts: { + toKnowledgePool: async (doc) => [{ chunk: doc.title || '' }], + }, + }, + embeddingConfig: { + version: testEmbeddingVersion, + queryFn: makeDummyEmbedQuery(DIMS), + realTimeIngestionFn: makeDummyEmbedDocs(DIMS), + }, + }, + }, + }), + ], + jobs: { + tasks: [], + autoRun: [ + { + cron: '*/5 * * * * *', + limit: 10, + }, + ], + }, + }) + + payload = await getPayload({ config, cron: true }) + }) + + test('VectorizedPayload has _staticConfigs', async () => { + const { getVectorizedPayload } = await import('payloadcms-vectorize') + const vectorizedPayload = getVectorizedPayload(payload) + + expect(vectorizedPayload).toBeTruthy() + expect(vectorizedPayload?._staticConfigs).toBeDefined() + expect(vectorizedPayload?._staticConfigs.default).toBeDefined() + expect(vectorizedPayload?._staticConfigs.default.dims).toBe(DIMS) + expect(vectorizedPayload?._staticConfigs.default.ivfflatLists).toBe(10) + }) + }) + + describe('Error handling when migrations not run', () => { + let payload: Payload + const dbName = `migration_error_test_${Date.now()}` + + beforeAll(async () => { + await createTestDb({ dbName }) + + const integration = createVectorizeIntegration({ + default: { + dims: DIMS, + ivfflatLists: 10, + }, + }) + + const config = await buildConfig({ + secret: 'test-secret', + collections: [ + { + slug: 'posts', + fields: [{ name: 'title', type: 'text' }], + }, + ], + db: postgresAdapter({ + extensions: ['vector'], + afterSchemaInit: [integration.afterSchemaInitHook], + pool: { + connectionString: `postgresql://postgres:password@localhost:5433/${dbName}`, + }, + // Don't push schema changes - we want to test without migrations + push: false, + }), + plugins: [ + integration.payloadcmsVectorize({ + knowledgePools: { + default: { + collections: { + posts: { + toKnowledgePool: async (doc) => [{ chunk: doc.title || '' }], + }, + }, + embeddingConfig: { + version: testEmbeddingVersion, + queryFn: makeDummyEmbedQuery(DIMS), + realTimeIngestionFn: makeDummyEmbedDocs(DIMS), + }, + }, + }, + }), + ], + jobs: { + tasks: [], + autoRun: [ + { + cron: '*/5 * * * * *', + limit: 10, + }, + ], + }, + }) + + payload = await getPayload({ + config, + cron: false, // Disable cron to avoid background jobs + key: `migration-error-test-${Date.now()}`, + }) + }) + + test('vector search fails with descriptive error when embedding column missing', async () => { + const { getVectorizedPayload } = await import('payloadcms-vectorize') + const vectorizedPayload = getVectorizedPayload(payload) + + // Vector search should fail with a descriptive error + await expect( + vectorizedPayload?.search({ + knowledgePool: 'default', + query: 'test query', + limit: 10, + }), + ).rejects.toThrow() + }) + + test('creating document fails when embedding table does not exist', async () => { + // Try to create a document that would trigger vectorization + // This should fail because the embedding table doesn't exist + await expect( + payload.create({ + collection: 'posts', + data: { + title: 'Test Post', + }, + }), + ).rejects.toThrow() + }) + }) + + describe('CLI workflow (sequential)', () => { + const cliDbName = `migration_cli_e2e_test_${Date.now()}` + let cliPayload: Payload + let cliConfig: SanitizedConfig + const migrationsDir = resolve(process.cwd(), 'dev', 'test-migrations-cli') + + beforeAll(async () => { + await createTestDb({ dbName: cliDbName }) + + // Clean up any existing migrations directory to ensure clean state + if (existsSync(migrationsDir)) { + rmSync(migrationsDir, { recursive: true, force: true }) + } + + // Create test migrations directory + const { mkdirSync } = await import('fs') + mkdirSync(migrationsDir, { recursive: true }) + }) + + afterAll(async () => { + // Cleanup: remove test migrations directory + if (existsSync(migrationsDir)) { + rmSync(migrationsDir, { recursive: true, force: true }) + } + }) + + test('1. Initial setup: create migration with IVFFLAT index', async () => { + // Step 1: Create integration with initial config + const integration = createVectorizeIntegration({ + default: { + dims: DIMS, + ivfflatLists: 10, // Initial lists parameter + }, + }) + + cliConfig = await buildConfig({ + secret: 'test-secret', + collections: [ + { + slug: 'posts', + fields: [{ name: 'title', type: 'text' }], + }, + ], + db: postgresAdapter({ + extensions: ['vector'], + afterSchemaInit: [integration.afterSchemaInitHook], + migrationDir: migrationsDir, + pool: { + connectionString: `postgresql://postgres:password@localhost:5433/${cliDbName}`, + }, + }), + plugins: [ + integration.payloadcmsVectorize({ + knowledgePools: { + default: { + collections: { + posts: { + toKnowledgePool: async (doc) => [{ chunk: doc.title || '' }], + }, + }, + embeddingConfig: { + version: testEmbeddingVersion, + queryFn: makeDummyEmbedQuery(DIMS), + realTimeIngestionFn: makeDummyEmbedDocs(DIMS), + }, + }, + }, + }), + ], + jobs: { + tasks: [], + autoRun: [ + { + cron: '*\/5 * * * * *', + limit: 10, + }, + ], + }, + }) + + // Get payload instance + cliPayload = await getPayload({ + config: cliConfig, + cron: true, + key: `migration-cli-test-${Date.now()}`, + }) + + // Step 2: Create initial migration (this will include the embedding column via Drizzle) + console.log('[TEST] Step 2: Creating initial migration...') + await cliPayload.db.createMigration({ + migrationName: 'initial', + payload: cliPayload, + }) + console.log('[TEST] Step 2.5: Initial migration created') + + // Step 3: Run vectorize:migrate to add IVFFLAT index to the migration + console.log('[TEST] Step 3: Running vectorize:migrate...') + await vectorizeMigrateScript(cliConfig) + + // Debug: Print all files in migrations directory + console.log('[TEST] Step 3.5: Listing all files in migrations directory:') + const allFiles = readdirSync(migrationsDir) + for (const file of allFiles) { + const filePath = join(migrationsDir, file) + const stats = statSync(filePath) + console.log( + `[TEST] - ${file} (${stats.size} bytes, modified: ${stats.mtime.toISOString()})`, + ) + if (file.endsWith('.ts') && file !== 'index.ts') { + const content = readFileSync(filePath, 'utf-8') + console.log(`[TEST] Content preview (first 500 chars): ${content.substring(0, 500)}`) + console.log( + `[TEST] Contains 'up' function: ${content.includes('export async function up')}`, + ) + console.log(`[TEST] Contains 'CREATE INDEX': ${content.includes('CREATE INDEX')}`) + console.log(`[TEST] Contains 'ivfflat': ${content.includes('ivfflat')}`) + console.log(`[TEST] Contains 'lists =': ${content.includes('lists =')}`) + console.log( + `[TEST] Contains 'default_embedding_ivfflat': ${content.includes('default_embedding_ivfflat')}`, + ) + // Show the last 1000 chars where our code should be + console.log( + `[TEST] Content preview (last 1000 chars): ${content.substring(Math.max(0, content.length - 1000))}`, + ) + } + } + + // Step 4: Apply the migration + console.log('[TEST] Step 4: Applying migration...') + try { + // Try using db.migrate() if it exists (internal API) + if (typeof (cliPayload.db as any).migrate === 'function') { + console.log('[TEST] Step 4.1: Using db.migrate() method') + await (cliPayload.db as any).migrate() + } else { + // Fallback: manually load and execute migration files + console.log( + '[TEST] Step 4.1: db.migrate() not available, using manual migration execution', + ) + const migrationFiles = readdirSync(migrationsDir) + .filter((f) => f.endsWith('.ts') && f !== 'index.ts') + .sort() + + for (const file of migrationFiles) { + const migrationPath = join(migrationsDir, file) + console.log(`[TEST] Step 4.2: Loading migration: ${file}`) + const migration = await import(migrationPath) + if (migration.up) { + console.log(`[TEST] Step 4.3: Executing up() for ${file}`) + await migration.up({ db: cliPayload.db.drizzle, payload: cliPayload, req: {} as any }) + } + } + } + console.log('[TEST] Step 4.5: Migration applied') + } catch (error) { + console.error('[TEST] Step 4.5: Migration failed with error:', error) + throw error + } + + // Step 4.55: Check database directly to see if index exists + const postgresPayloadCheck = cliPayload as PostgresPayload + const schemaNameCheck = postgresPayloadCheck.db.schemaName || 'public' + const indexNameCheck = 'default_embedding_ivfflat' + try { + const directIndexCheck = await postgresPayloadCheck.db.pool?.query( + `SELECT indexname FROM pg_indexes WHERE schemaname = $1 AND indexname = $2`, + [schemaNameCheck, indexNameCheck], + ) + console.log( + `[TEST] Step 4.55: Direct database check - index exists: ${(directIndexCheck?.rows.length || 0) > 0}`, + ) + if (directIndexCheck?.rows.length === 0) { + console.log(`[TEST] Step 4.55: WARNING - Index not found in database after migration!`) + // List all indexes on the default table + const allIndexes = await postgresPayloadCheck.db.pool?.query( + `SELECT indexname FROM pg_indexes WHERE schemaname = $1 AND tablename = 'default'`, + [schemaNameCheck], + ) + console.log( + `[TEST] Step 4.55: All indexes on 'default' table: ${allIndexes?.rows.map((r: any) => r.indexname).join(', ') || 'none'}`, + ) + } + } catch (error) { + console.error('[TEST] Step 4.55: Error checking database:', error) + } + + // Step 4.6: Verify the migration file actually contains the IVFFLAT code + const allMigrationsAfter = readdirSync(migrationsDir) + .filter((f) => f.endsWith('.ts') && f !== 'index.ts') + .map((f) => ({ + name: f, + path: join(migrationsDir, f), + mtime: statSync(join(migrationsDir, f)).mtime, + })) + .sort((a, b) => b.mtime.getTime() - a.mtime.getTime()) + const latestMigrationFile = allMigrationsAfter[0]?.path + if (latestMigrationFile) { + const migrationFileAfterApply = readFileSync(latestMigrationFile, 'utf-8') + console.log(`[TEST] Step 4.6: Checking migration file after apply: ${latestMigrationFile}`) + console.log( + `[TEST] File contains 'ivfflat': ${migrationFileAfterApply.includes('ivfflat')}`, + ) + console.log( + `[TEST] File contains 'lists = 10': ${migrationFileAfterApply.includes('lists = 10')}`, + ) + console.log( + `[TEST] File contains 'drizzle.execute': ${migrationFileAfterApply.includes('drizzle.execute')}`, + ) + // Find the IVFFLAT code section + const ivfflatMatch = migrationFileAfterApply.match(/ivfflat[\s\S]{0,500}/i) + if (ivfflatMatch) { + console.log(`[TEST] IVFFLAT code section: ${ivfflatMatch[0]}`) + } + // Show the end of the up function where our code should be + const upFunctionEnd = migrationFileAfterApply.lastIndexOf('export async function up') + if (upFunctionEnd !== -1) { + const upFunctionContent = migrationFileAfterApply.substring(upFunctionEnd) + const last500OfUp = upFunctionContent.substring( + Math.max(0, upFunctionContent.length - 500), + ) + console.log(`[TEST] Last 500 chars of up function: ${last500OfUp}`) + } + } + + // Step 5: Verify index exists with correct lists parameter + const postgresPayload = cliPayload as PostgresPayload + const schemaName = postgresPayload.db.schemaName || 'public' + const tableName = 'default' + const indexName = `${tableName}_embedding_ivfflat` + + const indexCheck = await postgresPayload.db.pool?.query( + `SELECT pg_get_indexdef(c.oid) as def + FROM pg_indexes i + JOIN pg_class c ON c.relname = i.indexname + JOIN pg_namespace n ON n.oid = c.relnamespace AND n.nspname = i.schemaname + WHERE i.schemaname = $1 AND i.tablename = $2 AND i.indexname = $3`, + [schemaName, tableName, indexName], + ) + const indexDef = indexCheck?.rows[0]?.def || '' + console.log(`[TEST] Step 5.5: Index definition: ${indexDef}`) + expect(indexDef).toBeTruthy() + // PostgreSQL returns lists='10' (with quotes), so match either format + expect(indexDef).toMatch(/lists\s*=\s*['"]?10['"]?/i) + console.log('[TEST] Test 1 completed successfully') + }) + + test('2. Change ivfflatLists: CLI creates migration, apply and verify', async () => { + // Step 1: Recreate integration with changed ivfflatLists + const integration = createVectorizeIntegration({ + default: { + dims: DIMS, + ivfflatLists: 20, // Changed from 10 to 20 + }, + }) + + // Update config with new integration (this simulates changing static config in payload.config.ts) + cliConfig = await buildConfig({ + secret: 'test-secret', + collections: [ + { + slug: 'posts', + fields: [{ name: 'title', type: 'text' }], + }, + ], + db: postgresAdapter({ + extensions: ['vector'], + afterSchemaInit: [integration.afterSchemaInitHook], + migrationDir: migrationsDir, + pool: { + connectionString: `postgresql://postgres:password@localhost:5433/${cliDbName}`, + }, + }), + plugins: [ + integration.payloadcmsVectorize({ + knowledgePools: { + default: { + collections: { + posts: { + toKnowledgePool: async (doc) => [{ chunk: doc.title || '' }], + }, + }, + embeddingConfig: { + version: testEmbeddingVersion, + queryFn: makeDummyEmbedQuery(DIMS), + realTimeIngestionFn: makeDummyEmbedDocs(DIMS), + }, + }, + }, + }), + ], + jobs: { + tasks: [], + autoRun: [ + { + cron: '*\/5 * * * * *', + limit: 10, + }, + ], + }, + }) + + // Get payload instance + cliPayload = await getPayload({ + config: cliConfig, + cron: true, + key: `migration-cli-test-${Date.now()}`, + }) + + // Step 2: Run vectorize:migrate (should detect change and create migration) + console.log('[TEST] Step 2: Running vectorize:migrate...') + const migrateScriptStart = Date.now() + try { + await Promise.race([ + vectorizeMigrateScript(cliConfig), + new Promise((_, reject) => + setTimeout(() => reject(new Error('vectorize:migrate timed out after 30s')), 30000), + ), + ]) + const migrateScriptEnd = Date.now() + console.log( + `[TEST] Step 2.5: vectorize:migrate completed in ${migrateScriptEnd - migrateScriptStart}ms`, + ) + } catch (error) { + console.error('[TEST] Step 2.5: vectorize:migrate failed:', error) + throw error + } + + // Step 3: Verify migration file was created and contains correct SQL + console.log('[TEST] Step 3: Listing all files in migrations directory:') + const allFiles = readdirSync(migrationsDir) + for (const file of allFiles) { + const filePath = join(migrationsDir, file) + const stats = statSync(filePath) + console.log( + `[TEST] - ${file} (${stats.size} bytes, modified: ${stats.mtime.toISOString()})`, + ) + } + + const migrations = readdirSync(migrationsDir) + .filter( + (f) => (f.endsWith('.ts') || f.endsWith('.js')) && f !== 'index.ts' && f !== 'index.js', + ) + .map((f) => ({ + name: f, + path: join(migrationsDir, f), + mtime: statSync(join(migrationsDir, f)).mtime, + })) + .sort((a, b) => b.mtime.getTime() - a.mtime.getTime()) + + console.log(`[TEST] Found ${migrations.length} migration files (excluding index.ts/js)`) + migrations.forEach((m, i) => { + console.log(`[TEST] ${i + 1}. ${m.name} (${m.mtime.toISOString()})`) + }) + + const newestMigration = migrations[0] + expect(newestMigration).toBeTruthy() + console.log(`[TEST] Reading migration file: ${newestMigration.path}`) + + // Verify migration file contains IVFFLAT rebuild SQL + const migrationContent = readFileSync(newestMigration.path, 'utf-8') + console.log(`[TEST] Migration file content length: ${migrationContent.length} characters`) + console.log( + `[TEST] Migration file preview (first 1000 chars):\n${migrationContent.substring(0, 1000)}`, + ) + // PostgreSQL returns lists='20' (with quotes), so match either format + expect(migrationContent).toMatch(/lists\s*=\s*['"]?20['"]?/i) + expect(migrationContent).toContain('DROP INDEX') + expect(migrationContent).toContain('CREATE INDEX') + + // Step 4: Apply the migration + if (typeof (cliPayload.db as any).migrate === 'function') { + await (cliPayload.db as any).migrate() + } else { + // Fallback: manually load and execute migration files + const migrationFiles = readdirSync(migrationsDir) + .filter((f) => f.endsWith('.ts') && f !== 'index.ts') + .sort() + + for (const file of migrationFiles) { + const migrationPath = join(migrationsDir, file) + const migration = await import(migrationPath) + if (migration.up) { + await migration.up({ db: cliPayload.db.drizzle, payload: cliPayload, req: {} as any }) + } + } + } + + // Step 5: Verify index was rebuilt with new lists parameter + const postgresPayload = cliPayload as PostgresPayload + const schemaName = postgresPayload.db.schemaName || 'public' + const tableName = 'default' + const indexName = `${tableName}_embedding_ivfflat` + + const indexCheck = await postgresPayload.db.pool?.query( + `SELECT pg_get_indexdef(c.oid) as def + FROM pg_indexes i + JOIN pg_class c ON c.relname = i.indexname + JOIN pg_namespace n ON n.oid = c.relnamespace AND n.nspname = i.schemaname + WHERE i.schemaname = $1 AND i.tablename = $2 AND i.indexname = $3`, + [schemaName, tableName, indexName], + ) + const indexDef = indexCheck?.rows[0]?.def || '' + expect(indexDef).toBeTruthy() + // PostgreSQL returns lists='20' (with quotes), so match either format + expect(indexDef).toMatch(/lists\s*=\s*['"]?20['"]?/i) + }) + + test('3. Idempotency: CLI does not create duplicate migration when config unchanged', async () => { + // Get migration count before + const migrationsBefore = readdirSync(migrationsDir).filter( + (f) => f.endsWith('.ts') || f.endsWith('.js'), + ).length + + // Run vectorize:migrate again (config hasn't changed) + console.log('[TEST] Running vectorize:migrate for idempotency check...') + const startTime = Date.now() + try { + await Promise.race([ + vectorizeMigrateScript(cliConfig), + new Promise((_, reject) => + setTimeout(() => reject(new Error('vectorize:migrate timed out after 30s')), 30000), + ), + ]) + const endTime = Date.now() + console.log(`[TEST] vectorize:migrate completed in ${endTime - startTime}ms`) + } catch (error) { + console.error('[TEST] vectorize:migrate failed:', error) + throw error + } + + // Verify no new migration was created + const migrationsAfter = readdirSync(migrationsDir).filter( + (f) => f.endsWith('.ts') || f.endsWith('.js'), + ).length + + expect(migrationsAfter).toBe(migrationsBefore) + }) + + test('4. Change dims: CLI creates destructive migration', async () => { + console.log('[TEST] Starting test 4: Change dims') + const NEW_DIMS = DIMS + 2 // Change dimensions (destructive) + console.log(`[TEST] NEW_DIMS: ${NEW_DIMS}`) + + // Step 1: Recreate integration with changed dims + console.log('[TEST] Step 1: Creating integration with changed dims...') + const integration = createVectorizeIntegration({ + default: { + dims: NEW_DIMS, // Changed dimensions + ivfflatLists: 20, // Keep same lists + }, + }) + + // Update config with new integration + cliConfig = await buildConfig({ + secret: 'test-secret', + collections: [ + { + slug: 'posts', + fields: [{ name: 'title', type: 'text' }], + }, + ], + db: postgresAdapter({ + extensions: ['vector'], + afterSchemaInit: [integration.afterSchemaInitHook], + migrationDir: migrationsDir, + pool: { + connectionString: `postgresql://postgres:password@localhost:5433/${cliDbName}`, + }, + }), + plugins: [ + integration.payloadcmsVectorize({ + knowledgePools: { + default: { + collections: { + posts: { + toKnowledgePool: async (doc) => [{ chunk: doc.title || '' }], + }, + }, + embeddingConfig: { + version: testEmbeddingVersion, + queryFn: makeDummyEmbedQuery(NEW_DIMS), + realTimeIngestionFn: makeDummyEmbedDocs(NEW_DIMS), + }, + }, + }, + }), + ], + jobs: { + tasks: [], + autoRun: [ + { + cron: '*\/5 * * * * *', + limit: 10, + }, + ], + }, + }) + + // Get payload instance + cliPayload = await getPayload({ + config: cliConfig, + cron: true, + key: `migration-cli-test-${Date.now()}`, + }) + + // Step 2: Run vectorize:migrate (should detect dims change) + console.log('[TEST] Step 2: Running vectorize:migrate...') + await vectorizeMigrateScript(cliConfig) + console.log('[TEST] Step 2.5: vectorize:migrate completed') + + // Step 3: Verify migration file contains destructive SQL (truncate + column type change) + console.log('[TEST] Step 3: Listing all files in migrations directory:') + const allFiles = readdirSync(migrationsDir) + for (const file of allFiles) { + const filePath = join(migrationsDir, file) + const stats = statSync(filePath) + console.log( + `[TEST] - ${file} (${stats.size} bytes, modified: ${stats.mtime.toISOString()})`, + ) + } + + const migrations = readdirSync(migrationsDir) + .filter( + (f) => (f.endsWith('.ts') || f.endsWith('.js')) && f !== 'index.ts' && f !== 'index.js', + ) + .map((f) => ({ + name: f, + path: join(migrationsDir, f), + mtime: statSync(join(migrationsDir, f)).mtime, + })) + .sort((a, b) => b.mtime.getTime() - a.mtime.getTime()) + + console.log(`[TEST] Found ${migrations.length} migration files (excluding index.ts/js)`) + const newestMigration = migrations[0] + console.log(`[TEST] Reading newest migration: ${newestMigration.path}`) + const migrationContent = readFileSync(newestMigration.path, 'utf-8') + console.log(`[TEST] Migration content length: ${migrationContent.length} characters`) + console.log( + `[TEST] Migration content preview (first 1000 chars):\n${migrationContent.substring(0, 1000)}`, + ) + + // Verify it contains dims change SQL + expect(migrationContent).toContain('Changing dims') + expect(migrationContent).toContain('TRUNCATE TABLE') + expect(migrationContent).toContain(`vector(${NEW_DIMS})`) + expect(migrationContent).toContain('ALTER COLUMN embedding TYPE') + console.log('[TEST] Step 3.5: Migration file verification passed') + + // Step 4: Apply the migration + console.log('[TEST] Step 4: Applying migration...') + console.log('[TEST] Step 4.1: About to call cliPayload.db.migrate()...') + console.log('[TEST] Step 4.1.1: Migration directory:', migrationsDir) + console.log( + '[TEST] Step 4.1.2: Payload instance migrationDir:', + (cliPayload.db as any).migrationDir, + ) + try { + const migrateStart = Date.now() + console.log('[TEST] Step 4.1.3: Calling migrate() at', new Date().toISOString()) + if (typeof (cliPayload.db as any).migrate === 'function') { + await (cliPayload.db as any).migrate() + } else { + // Fallback: manually load and execute migration files + const migrationFiles = readdirSync(migrationsDir) + .filter((f) => f.endsWith('.ts') && f !== 'index.ts') + .sort() + + for (const file of migrationFiles) { + const migrationPath = join(migrationsDir, file) + const migration = await import(migrationPath) + if (migration.up) { + await migration.up({ db: cliPayload.db.drizzle, payload: cliPayload, req: {} as any }) + } + } + } + const migrateEnd = Date.now() + console.log( + `[TEST] Step 4.2: cliPayload.db.migrate() completed in ${migrateEnd - migrateStart}ms`, + ) + } catch (error) { + console.error('[TEST] Step 4.2: Error during migration:', error) + throw error + } + console.log('[TEST] Step 4.5: Migration applied successfully') + + // Step 5: Verify column type changed and table was truncated + console.log('[TEST] Step 5: Verifying column type and table state...') + const postgresPayload = cliPayload as PostgresPayload + const schemaName = postgresPayload.db.schemaName || 'public' + const tableName = 'default' + + // Check column type + const columnCheck = await postgresPayload.db.pool?.query( + `SELECT format_type(atttypid, atttypmod) as column_type + FROM pg_attribute + JOIN pg_class ON pg_attribute.attrelid = pg_class.oid + JOIN pg_namespace ON pg_class.relnamespace = pg_namespace.oid + WHERE pg_namespace.nspname = $1 + AND pg_class.relname = $2 + AND pg_attribute.attname = 'embedding' + AND pg_attribute.attnum > 0 + AND NOT pg_attribute.attisdropped`, + [schemaName, tableName], + ) + const columnType = columnCheck?.rows[0]?.column_type || '' + expect(columnType).toContain(`vector(${NEW_DIMS})`) + + // Verify table was truncated (should be empty or have no embeddings) + console.log('[TEST] Step 5.5: Checking table row count...') + const countCheck = await postgresPayload.db.pool?.query( + `SELECT COUNT(*) as count FROM "${schemaName}"."${tableName}"`, + ) + const rowCount = parseInt(countCheck?.rows[0]?.count || '0', 10) + console.log(`[TEST] Table row count: ${rowCount}`) + // Table should be empty after truncate (unless new embeddings were created during test) + expect(rowCount).toBe(0) + console.log('[TEST] Test 4 completed successfully') + }) + + test('5. Add new knowledgePool: CLI creates migration for new table', async () => { + console.log('[TEST] Starting test 5: Add new knowledgePool') + + // Step 1: Create integration with an additional knowledgePool "secondary" + const integrationWithSecondary = createVectorizeIntegration({ + default: { + dims: 10, // Keep same dims as test 4 + ivfflatLists: 20, // Keep same lists as test 4 + }, + secondary: { + dims: DIMS, + ivfflatLists: 5, + }, + }) + + cliConfig = await buildConfig({ + secret: 'test-secret', + collections: [ + { + slug: 'posts', + fields: [{ name: 'title', type: 'text' }], + }, + { + slug: 'articles', + fields: [{ name: 'content', type: 'text' }], + }, + ], + db: postgresAdapter({ + extensions: ['vector'], + afterSchemaInit: [integrationWithSecondary.afterSchemaInitHook], + migrationDir: migrationsDir, + push: false, + pool: { + connectionString: `postgresql://postgres:password@localhost:5433/${cliDbName}`, + }, + }), + plugins: [ + integrationWithSecondary.payloadcmsVectorize({ + knowledgePools: { + default: { + collections: { + posts: { + toKnowledgePool: async (doc) => [{ chunk: doc.title || '' }], + }, + }, + embeddingConfig: { + version: testEmbeddingVersion, + queryFn: makeDummyEmbedQuery(10), + realTimeIngestionFn: makeDummyEmbedDocs(10), + }, + }, + secondary: { + collections: { + articles: { + toKnowledgePool: async (doc: any) => [{ chunk: doc.content || '' }], + }, + } as any, + embeddingConfig: { + version: testEmbeddingVersion, + queryFn: makeDummyEmbedQuery(DIMS), + realTimeIngestionFn: makeDummyEmbedDocs(DIMS), + }, + }, + }, + }), + ], + jobs: { + tasks: [], + autoRun: [ + { + cron: '*/5 * * * * *', + limit: 10, + }, + ], + }, + }) + + // Get new payload instance + cliPayload = await getPayload({ + config: cliConfig, + cron: true, + key: `migration-cli-test-5-${Date.now()}`, + }) + + // Step 2: Create migration for new table + console.log('[TEST] Step 2: Creating migration for new knowledgePool...') + try { + await cliPayload.db.createMigration({ + migrationName: 'add_secondary_pool', + payload: cliPayload, + forceAcceptWarning: true, // Skip prompts in tests + }) + console.log('[TEST] Step 2.5: Migration created') + } catch (e) { + console.error('[TEST] Step 2 ERROR - createMigration failed:', e) + throw e + } + + // Step 3: Run vectorize:migrate to add IVFFLAT index for new pool + console.log('[TEST] Step 3: Running vectorize:migrate...') + try { + await vectorizeMigrateScript(cliConfig) + console.log('[TEST] Step 3.5: vectorize:migrate completed') + } catch (e) { + console.error('[TEST] Step 3 ERROR - vectorize:migrate failed:', e) + throw e + } + + // Step 4: Verify migration file contains secondary table creation and IVFFLAT index + const migrations = readdirSync(migrationsDir) + .filter( + (f) => (f.endsWith('.ts') || f.endsWith('.js')) && f !== 'index.ts' && f !== 'index.js', + ) + .map((f) => ({ + name: f, + path: join(migrationsDir, f), + mtime: statSync(join(migrationsDir, f)).mtime, + })) + .sort((a, b) => b.mtime.getTime() - a.mtime.getTime()) + + const newestMigration = migrations[0] + console.log(`[TEST] Step 4: Checking newest migration: ${newestMigration.name}`) + const migrationContent = readFileSync(newestMigration.path, 'utf-8') + + // Should contain secondary table creation + expect(migrationContent).toContain('secondary') + // Should contain IVFFLAT index for secondary pool + expect(migrationContent).toContain('secondary_embedding_ivfflat') + console.log('[TEST] Step 4.5: Migration file verification passed') + + // Step 5: Apply the migration + console.log('[TEST] Step 5: Applying migration...') + try { + await (cliPayload.db as any).migrate({ forceAcceptWarning: true }) + console.log('[TEST] Step 5.5: Migration applied') + } catch (e) { + console.error('[TEST] Step 5 ERROR - migrate failed:', e) + throw e + } + + // Step 6: Verify new table exists with IVFFLAT index + const postgresPayload = cliPayload as PostgresPayload + const schemaName = postgresPayload.db.schemaName || 'public' + + // Check table exists + const tableCheck = await postgresPayload.db.pool?.query( + `SELECT EXISTS ( + SELECT FROM information_schema.tables + WHERE table_schema = $1 AND table_name = 'secondary' + )`, + [schemaName], + ) + expect(tableCheck?.rows[0]?.exists).toBe(true) + console.log('[TEST] Step 6: Secondary table exists') + + // Check IVFFLAT index exists + const indexCheck = await postgresPayload.db.pool?.query( + `SELECT indexname FROM pg_indexes WHERE schemaname = $1 AND indexname = $2`, + [schemaName, 'secondary_embedding_ivfflat'], + ) + expect(indexCheck?.rows.length).toBeGreaterThan(0) + console.log('[TEST] Step 6.5: Secondary IVFFLAT index exists') + console.log('[TEST] Test 5 completed successfully') + }) + + test('6. Remove knowledgePool: Secondary table can be dropped manually', async () => { + console.log('[TEST] Starting test 6: Remove knowledgePool') + + // Note: Payload's migration system doesn't automatically generate DROP TABLE + // migrations when collections are removed. Users need to manually drop tables. + // This test verifies that after removing a pool, the vectorize plugin handles + // it gracefully and the table can be dropped manually. + + // Step 1: Create integration with only 'default' pool (removing 'secondary') + const integrationWithoutSecondary = createVectorizeIntegration({ + default: { + dims: 10, + ivfflatLists: 20, + }, + }) + + cliConfig = await buildConfig({ + secret: 'test-secret', + collections: [ + { + slug: 'posts', + fields: [{ name: 'title', type: 'text' }], + }, + ], + db: postgresAdapter({ + extensions: ['vector'], + afterSchemaInit: [integrationWithoutSecondary.afterSchemaInitHook], + migrationDir: migrationsDir, + push: false, + pool: { + connectionString: `postgresql://postgres:password@localhost:5433/${cliDbName}`, + }, + }), + plugins: [ + integrationWithoutSecondary.payloadcmsVectorize({ + knowledgePools: { + default: { + collections: { + posts: { + toKnowledgePool: async (doc) => [{ chunk: doc.title || '' }], + }, + }, + embeddingConfig: { + version: testEmbeddingVersion, + queryFn: makeDummyEmbedQuery(10), + realTimeIngestionFn: makeDummyEmbedDocs(10), + }, + }, + }, + }), + ], + jobs: { + tasks: [], + autoRun: [ + { + cron: '*/5 * * * * *', + limit: 10, + }, + ], + }, + }) + + // Get new payload instance + cliPayload = await getPayload({ + config: cliConfig, + cron: true, + key: `migration-cli-test-6-${Date.now()}`, + }) + + // Step 2: Run vectorize:migrate - should detect no changes for default pool + // and not error out because secondary is no longer in config + console.log('[TEST] Step 2: Running vectorize:migrate with secondary pool removed...') + await vectorizeMigrateScript(cliConfig) + console.log('[TEST] Step 2.5: vectorize:migrate completed (no changes expected)') + + // Step 3: Verify secondary table still exists (Payload doesn't auto-drop) + const postgresPayload = cliPayload as PostgresPayload + const schemaName = postgresPayload.db.schemaName || 'public' + + const tableCheck = await postgresPayload.db.pool?.query( + `SELECT EXISTS ( + SELECT FROM information_schema.tables + WHERE table_schema = $1 AND table_name = 'secondary' + )`, + [schemaName], + ) + // Table should still exist since Payload doesn't auto-drop tables + expect(tableCheck?.rows[0]?.exists).toBe(true) + console.log('[TEST] Step 3: Secondary table still exists (as expected - manual drop required)') + + // Step 4: Manually drop the secondary table and its index + console.log('[TEST] Step 4: Manually dropping secondary table...') + await postgresPayload.db.pool?.query( + `DROP INDEX IF EXISTS "${schemaName}"."secondary_embedding_ivfflat"`, + ) + await postgresPayload.db.pool?.query(`DROP TABLE IF EXISTS "${schemaName}"."secondary" CASCADE`) + console.log('[TEST] Step 4.5: Secondary table dropped') + + // Step 5: Verify secondary table no longer exists + const tableCheckAfter = await postgresPayload.db.pool?.query( + `SELECT EXISTS ( + SELECT FROM information_schema.tables + WHERE table_schema = $1 AND table_name = 'secondary' + )`, + [schemaName], + ) + expect(tableCheckAfter?.rows[0]?.exists).toBe(false) + console.log('[TEST] Step 5: Secondary table no longer exists') + console.log('[TEST] Test 6 completed successfully') + }) + }) +}) diff --git a/dev/specs/multipools.spec.ts b/dev/specs/multipools.spec.ts index 8b9c30d..58a9ef6 100644 --- a/dev/specs/multipools.spec.ts +++ b/dev/specs/multipools.spec.ts @@ -1,11 +1,11 @@ import type { Payload, SanitizedConfig } from 'payload' -import { buildConfig, getPayload } from 'payload' +import { buildConfig } from 'payload' import { beforeAll, describe, expect, test } from 'vitest' import { createVectorizeIntegration } from 'payloadcms-vectorize' import { lexicalEditor } from '@payloadcms/richtext-lexical' import { postgresAdapter } from '@payloadcms/db-postgres' -import { createTestDb } from './utils.js' +import { createTestDb, initializePayloadWithMigrations, createTestMigrationsDir } from './utils.js' import type { PostgresPayload } from '../../src/types.js' const DIMS_POOL1 = 8 @@ -18,6 +18,7 @@ describe('Multiple knowledge pools', () => { beforeAll(async () => { await createTestDb({ dbName }) + const { migrationsDir } = createTestMigrationsDir(dbName) const multiPoolIntegration = createVectorizeIntegration({ pool1: { @@ -60,6 +61,8 @@ describe('Multiple knowledge pools', () => { db: postgresAdapter({ extensions: ['vector'], afterSchemaInit: [multiPoolIntegration.afterSchemaInitHook], + migrationDir: migrationsDir, + push: false, pool: { connectionString: `postgresql://postgres:password@localhost:5433/${dbName}`, }, @@ -67,7 +70,10 @@ describe('Multiple knowledge pools', () => { plugins: [multiPoolIntegration.payloadcmsVectorize(multiPoolPluginOptions)], }) - payload = await getPayload({ config }) + payload = await initializePayloadWithMigrations({ + config, + key: `multipools-test-${Date.now()}`, + }) }) test('creates two embeddings collections with vector columns', async () => { diff --git a/dev/specs/queueName.spec.ts b/dev/specs/queueName.spec.ts index 887a1c0..7b6e7f0 100644 --- a/dev/specs/queueName.spec.ts +++ b/dev/specs/queueName.spec.ts @@ -1,11 +1,10 @@ import type { Payload, SanitizedConfig } from 'payload' -import { getPayload } from 'payload' import { beforeAll, describe, expect, test } from 'vitest' import { chunkText, chunkRichText } from 'helpers/chunkers.js' import type { SerializedEditorState } from '@payloadcms/richtext-lexical/lexical' import { postgresAdapter } from '@payloadcms/db-postgres' import { buildDummyConfig, getInitialMarkdownContent, integration, plugin } from './constants.js' -import { createTestDb } from './utils.js' +import { createTestDb, initializePayloadWithMigrations, createTestMigrationsDir } from './utils.js' describe('Queue tests', () => { let config: SanitizedConfig @@ -15,6 +14,8 @@ describe('Queue tests', () => { const dbName = 'queue_test' beforeAll(async () => { await createTestDb({ dbName }) + const { migrationsDir } = createTestMigrationsDir(dbName) + config = await buildDummyConfig({ collections: [ { @@ -28,8 +29,10 @@ describe('Queue tests', () => { db: postgresAdapter({ extensions: ['vector'], afterSchemaInit: [integration.afterSchemaInitHook], + migrationDir: migrationsDir, + push: false, pool: { - connectionString: 'postgresql://postgres:password@localhost:5433/queue_test', + connectionString: `postgresql://postgres:password@localhost:5433/${dbName}`, }, }), plugins: [ @@ -65,7 +68,11 @@ describe('Queue tests', () => { }), ], }) - payload = await getPayload({ config }) + + payload = await initializePayloadWithMigrations({ + config, + key: `queue-test-${Date.now()}`, + }) markdownContent = await getInitialMarkdownContent(config) }) test('vectorization jobs are queued using the queueName', async () => { diff --git a/dev/specs/schemaName.spec.ts b/dev/specs/schemaName.spec.ts index 8ec7613..1af1725 100644 --- a/dev/specs/schemaName.spec.ts +++ b/dev/specs/schemaName.spec.ts @@ -3,13 +3,17 @@ import type { Payload } from 'payload' import { postgresAdapter } from '@payloadcms/db-postgres' import { makeDummyEmbedDocs, makeDummyEmbedQuery, testEmbeddingVersion } from 'helpers/embed.js' import { Client } from 'pg' -import { getPayload } from 'payload' import { beforeAll, describe, expect, test } from 'vitest' import type { PostgresPayload } from '../../src/types.js' import { buildDummyConfig, DIMS, integration, plugin } from './constants.js' -import { createTestDb, waitForVectorizationJobs } from './utils.js' +import { + createTestDb, + waitForVectorizationJobs, + initializePayloadWithMigrations, + createTestMigrationsDir, +} from './utils.js' import { createVectorSearchHandlers } from '../../src/endpoints/vectorSearch.js' import type { KnowledgePoolDynamicConfig } from 'payloadcms-vectorize' const CUSTOM_SCHEMA = 'custom' @@ -20,6 +24,7 @@ describe('Custom schemaName support', () => { beforeAll(async () => { await createTestDb({ dbName }) + const { migrationsDir } = createTestMigrationsDir(dbName) // Create the custom schema before Payload initializes const client = new Client({ @@ -42,6 +47,8 @@ describe('Custom schemaName support', () => { db: postgresAdapter({ afterSchemaInit: [integration.afterSchemaInitHook], extensions: ['vector'], + migrationDir: migrationsDir, + push: false, pool: { connectionString: `postgresql://postgres:password@localhost:5433/${dbName}`, }, @@ -85,7 +92,11 @@ describe('Custom schemaName support', () => { ], }) - payload = await getPayload({ config, cron: true }) + payload = await initializePayloadWithMigrations({ + config, + key: `schema-name-test-${Date.now()}`, + cron: true, + }) }) test('embeddings table is created in custom schema', async () => { diff --git a/dev/specs/utils.ts b/dev/specs/utils.ts index 214891d..99c858b 100644 --- a/dev/specs/utils.ts +++ b/dev/specs/utils.ts @@ -2,6 +2,8 @@ import type { Payload, SanitizedConfig } from 'payload' import { buildConfig, getPayload } from 'payload' import { Client } from 'pg' +import { mkdirSync, rmSync } from 'fs' +import { join } from 'path' import { postgresAdapter } from '@payloadcms/db-postgres' import { lexicalEditor } from '@payloadcms/richtext-lexical' import { createVectorizeIntegration } from 'payloadcms-vectorize' @@ -9,6 +11,7 @@ import { BULK_EMBEDDINGS_RUNS_SLUG } from '../../src/collections/bulkEmbeddingsR import { BULK_EMBEDDINGS_INPUT_METADATA_SLUG } from '../../src/collections/bulkEmbeddingInputMetadata.js' import { BULK_EMBEDDINGS_BATCHES_SLUG } from '../../src/collections/bulkEmbeddingsBatches.js' import { makeDummyEmbedDocs } from '../helpers/embed.js' +import { script as vectorizeMigrateScript } from '../../src/bin/vectorize-migrate.js' import type { BulkEmbeddingsFns, BulkEmbeddingInput, @@ -20,13 +23,80 @@ export const createTestDb = async ({ dbName }: { dbName: string }) => { process.env.DATABASE_ADMIN_URI || 'postgresql://postgres:password@localhost:5433/postgres' // connect to 'postgres' const client = new Client({ connectionString: adminUri }) await client.connect() + + /* + // Drop and recreate the database to ensure a clean state + // First, terminate any existing connections to the database + await client.query(` + SELECT pg_terminate_backend(pg_stat_activity.pid) + FROM pg_stat_activity + WHERE pg_stat_activity.datname = $1 + AND pid <> pg_backend_pid() + `, [dbName])*/ + const exists = await client.query('SELECT 1 FROM pg_database WHERE datname = $1', [dbName]) if (exists.rowCount === 0) { await client.query(`CREATE DATABASE ${dbName}`) + //await client.query(`DROP DATABASE "${dbName}"`) } + //await client.query(`DROP DATABASE "${dbName}"`) await client.end() } +/** + * Initialize Payload with migrations applied. + * This handles the full migration setup: + * 1. Get payload instance + * 2. Create initial migration + * 3. Run vectorize:migrate to patch with IVFFLAT index + * 4. Apply migrations + * + * @param config - A pre-built SanitizedConfig (must have migrationDir and push: false in db config) + * @param key - Unique key for getPayload caching (prevents instance collisions in tests) + * @param cron - Whether to enable cron jobs (default: true) + */ +export async function initializePayloadWithMigrations({ + config, + key, + cron = true, +}: { + config: SanitizedConfig + key?: string + cron?: boolean +}): Promise { + const payload = await getPayload({ config, key, cron }) + + // Create initial migration (Payload's schema) + await payload.db.createMigration({ migrationName: 'initial', payload }) + + // Run vectorize:migrate to patch with IVFFLAT index + await vectorizeMigrateScript(config) + + // Apply migrations (forceAcceptWarning bypasses the dev mode prompt) + await (payload.db as any).migrate({ forceAcceptWarning: true }) + + return payload +} + +/** + * Create a unique migration directory for a test. + * Returns the path and a cleanup function. + */ +export function createTestMigrationsDir(dbName: string): { + migrationsDir: string + cleanup: () => void +} { + const migrationsDir = join(process.cwd(), 'dev', `test-migrations-${dbName}`) + // Clean up any existing migration directory + rmSync(migrationsDir, { recursive: true, force: true }) + mkdirSync(migrationsDir, { recursive: true }) + + return { + migrationsDir, + cleanup: () => rmSync(migrationsDir, { recursive: true, force: true }), + } +} + async function waitForTasks( payload: Payload, taskSlugs: string[], @@ -190,6 +260,13 @@ export async function buildPayloadWithIntegration({ pluginOpts, key, }: BuildPayloadArgs): Promise<{ payload: Payload; config: SanitizedConfig }> { + // Create a unique migration directory for this test + const migrationsDir = join(process.cwd(), 'dev', `test-migrations-${dbName}`) + + // Clean up any existing migration directory + rmSync(migrationsDir, { recursive: true, force: true }) + mkdirSync(migrationsDir, { recursive: true }) + const integration = createVectorizeIntegration({ default: { dims: DEFAULT_DIMS, @@ -209,6 +286,8 @@ export async function buildPayloadWithIntegration({ db: postgresAdapter({ extensions: ['vector'], afterSchemaInit: [integration.afterSchemaInitHook], + migrationDir: migrationsDir, + push: false, // Prevent dev mode schema push - use migrations only pool: { connectionString: `postgresql://postgres:password@localhost:5433/${dbName}`, }, @@ -238,6 +317,16 @@ export async function buildPayloadWithIntegration({ const payloadKey = key ?? `payload-${dbName}-${Date.now()}` const payload = await getPayload({ config, key: payloadKey, cron: true }) + + // Create initial migration (Payload's schema) + await payload.db.createMigration({ migrationName: 'initial', payload }) + + // Run vectorize:migrate to patch with IVFFLAT index + await vectorizeMigrateScript(config) + + // Apply migrations (forceAcceptWarning bypasses the dev mode prompt) + await (payload.db as any).migrate({ forceAcceptWarning: true }) + return { payload, config } } diff --git a/dev/specs/vectorSearch.spec.ts b/dev/specs/vectorSearch.spec.ts index 2ac894a..2c79747 100644 --- a/dev/specs/vectorSearch.spec.ts +++ b/dev/specs/vectorSearch.spec.ts @@ -1,6 +1,5 @@ import type { Payload } from 'payload' -import { getPayload } from 'payload' import { beforeAll, describe, expect, test } from 'vitest' import { makeDummyEmbedDocs, makeDummyEmbedQuery, testEmbeddingVersion } from 'helpers/embed.js' import { type SerializedEditorState } from '@payloadcms/richtext-lexical/lexical' @@ -10,6 +9,8 @@ import { createMockBulkEmbeddings, createTestDb, waitForVectorizationJobs, + initializePayloadWithMigrations, + createTestMigrationsDir, } from './utils.js' import { postgresAdapter } from '@payloadcms/db-postgres' import { chunkRichText, chunkText } from 'helpers/chunkers.js' @@ -77,9 +78,12 @@ describe('Search endpoint integration tests', () => { let payload: Payload let markdownContent: SerializedEditorState const titleAndQuery = 'My query is a title' + const dbName = 'endpoint_test' beforeAll(async () => { - await createTestDb({ dbName: 'endpoint_test' }) + await createTestDb({ dbName }) + const { migrationsDir } = createTestMigrationsDir(dbName) + const config = await buildDummyConfig({ jobs: { tasks: [], @@ -102,8 +106,10 @@ describe('Search endpoint integration tests', () => { db: postgresAdapter({ extensions: ['vector'], afterSchemaInit: [integration.afterSchemaInitHook], + migrationDir: migrationsDir, + push: false, // Prevent dev mode schema push - use migrations only pool: { - connectionString: 'postgresql://postgres:password@localhost:5433/endpoint_test', + connectionString: `postgresql://postgres:password@localhost:5433/${dbName}`, }, }), plugins: [ @@ -189,7 +195,13 @@ describe('Search endpoint integration tests', () => { }), ], }) - payload = await getPayload({ config, cron: true }) + + // Initialize Payload with migrations + payload = await initializePayloadWithMigrations({ + config, + key: `vector-search-test-${Date.now()}`, + cron: true, + }) markdownContent = await getInitialMarkdownContent(config) }) diff --git a/dev/specs/vectorizedPayload.spec.ts b/dev/specs/vectorizedPayload.spec.ts index 6ea9539..ffe182d 100644 --- a/dev/specs/vectorizedPayload.spec.ts +++ b/dev/specs/vectorizedPayload.spec.ts @@ -1,10 +1,14 @@ import type { Payload } from 'payload' -import { getPayload } from 'payload' import { beforeAll, describe, expect, test } from 'vitest' import { getVectorizedPayload, VectorizedPayload } from '../../src/types.js' import { buildDummyConfig, DIMS, getInitialMarkdownContent } from './constants.js' -import { createTestDb, waitForVectorizationJobs } from './utils.js' +import { + createTestDb, + waitForVectorizationJobs, + initializePayloadWithMigrations, + createTestMigrationsDir, +} from './utils.js' import { postgresAdapter } from '@payloadcms/db-postgres' import { makeDummyEmbedDocs, makeDummyEmbedQuery, testEmbeddingVersion } from 'helpers/embed.js' import { chunkRichText, chunkText } from 'helpers/chunkers.js' @@ -30,9 +34,12 @@ describe('VectorizedPayload', () => { let payload: Payload let markdownContent: SerializedEditorState const titleAndQuery = 'VectorizedPayload Test Title' + const dbName = 'vectorized_payload_test' beforeAll(async () => { - await createTestDb({ dbName: 'vectorized_payload_test' }) + await createTestDb({ dbName }) + const { migrationsDir } = createTestMigrationsDir(dbName) + const config = await buildDummyConfig({ jobs: { tasks: [], @@ -55,8 +62,10 @@ describe('VectorizedPayload', () => { db: postgresAdapter({ extensions: ['vector'], afterSchemaInit: [integration.afterSchemaInitHook], + migrationDir: migrationsDir, + push: false, pool: { - connectionString: 'postgresql://postgres:password@localhost:5433/vectorized_payload_test', + connectionString: `postgresql://postgres:password@localhost:5433/${dbName}`, }, }), plugins: [ @@ -89,7 +98,12 @@ describe('VectorizedPayload', () => { }), ], }) - payload = await getPayload({ config, cron: true }) + + payload = await initializePayloadWithMigrations({ + config, + key: `vectorized-payload-test-${Date.now()}`, + cron: true, + }) markdownContent = await getInitialMarkdownContent(config) }) diff --git a/package.json b/package.json index da2e38e..317c47c 100644 --- a/package.json +++ b/package.json @@ -40,7 +40,7 @@ "test:teardown": "docker-compose -f dev/docker-compose.test.yml down", "test": "pnpm test:int && pnpm test:e2e", "test:e2e": "playwright test", - "test:int": "cross-env DOTENV_CONFIG_PATH=dev/.env.test NODE_OPTIONS=--require=dotenv/config vitest" + "test:int": "cross-env DOTENV_CONFIG_PATH=dev/.env.test NODE_OPTIONS='--require=dotenv/config --import=tsx' vitest" }, "devDependencies": { "@eslint/eslintrc": "^3.2.0", @@ -82,6 +82,7 @@ "sharp": "0.34.2", "sort-package-json": "^2.10.0", "tailwindcss": "^4.1.14", + "tsx": "^4.21.0", "typescript": "5.7.3", "vite-tsconfig-paths": "^5.1.4", "vitest": "^3.1.2", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index cf56fb2..9c865c8 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -129,15 +129,18 @@ importers: tailwindcss: specifier: ^4.1.14 version: 4.1.18 + tsx: + specifier: ^4.21.0 + version: 4.21.0 typescript: specifier: 5.7.3 version: 5.7.3 vite-tsconfig-paths: specifier: ^5.1.4 - version: 5.1.4(typescript@5.7.3)(vite@7.3.0(@types/node@22.19.3)(jiti@2.6.1)(lightningcss@1.30.2)(sass@1.77.4)(tsx@4.20.6)(yaml@2.8.2)) + version: 5.1.4(typescript@5.7.3)(vite@7.3.0(@types/node@22.19.3)(jiti@2.6.1)(lightningcss@1.30.2)(sass@1.77.4)(tsx@4.21.0)(yaml@2.8.2)) vitest: specifier: ^3.1.2 - version: 3.2.4(@types/debug@4.1.12)(@types/node@22.19.3)(jiti@2.6.1)(lightningcss@1.30.2)(sass@1.77.4)(tsx@4.20.6)(yaml@2.8.2) + version: 3.2.4(@types/debug@4.1.12)(@types/node@22.19.3)(jiti@2.6.1)(lightningcss@1.30.2)(sass@1.77.4)(tsx@4.21.0)(yaml@2.8.2) voyage-ai-provider: specifier: ^2.0.0 version: 2.0.0(zod@4.3.4) @@ -6098,6 +6101,11 @@ packages: engines: {node: '>=18.0.0'} hasBin: true + tsx@4.21.0: + resolution: {integrity: sha512-5C1sg4USs1lfG0GFb2RLXsdpXqBSEhAaA/0kPL01wxzpMqLILNxIxIOKiILz+cdg/pLnOUxFYOR5yhHU666wbw==} + engines: {node: '>=18.0.0'} + hasBin: true + tweetnacl@0.14.5: resolution: {integrity: sha512-KXXFFdAbFXY4geFIwoyNK+f5Z1b7swfXABfL7HXCmoIWMKU3dmS26672A4EeQtDzLKy7SXmfBu51JolvEKwtGA==} @@ -9197,13 +9205,13 @@ snapshots: chai: 5.3.3 tinyrainbow: 2.0.0 - '@vitest/mocker@3.2.4(vite@7.3.0(@types/node@22.19.3)(jiti@2.6.1)(lightningcss@1.30.2)(sass@1.77.4)(tsx@4.20.6)(yaml@2.8.2))': + '@vitest/mocker@3.2.4(vite@7.3.0(@types/node@22.19.3)(jiti@2.6.1)(lightningcss@1.30.2)(sass@1.77.4)(tsx@4.21.0)(yaml@2.8.2))': dependencies: '@vitest/spy': 3.2.4 estree-walker: 3.0.3 magic-string: 0.30.21 optionalDependencies: - vite: 7.3.0(@types/node@22.19.3)(jiti@2.6.1)(lightningcss@1.30.2)(sass@1.77.4)(tsx@4.20.6)(yaml@2.8.2) + vite: 7.3.0(@types/node@22.19.3)(jiti@2.6.1)(lightningcss@1.30.2)(sass@1.77.4)(tsx@4.21.0)(yaml@2.8.2) '@vitest/pretty-format@3.2.4': dependencies: @@ -13518,7 +13526,7 @@ snapshots: tsx@4.20.3: dependencies: esbuild: 0.25.12 - get-tsconfig: 4.8.1 + get-tsconfig: 4.13.0 optionalDependencies: fsevents: 2.3.3 @@ -13529,6 +13537,13 @@ snapshots: optionalDependencies: fsevents: 2.3.3 + tsx@4.21.0: + dependencies: + esbuild: 0.27.2 + get-tsconfig: 4.13.0 + optionalDependencies: + fsevents: 2.3.3 + tweetnacl@0.14.5: {} type-check@0.4.0: @@ -13700,13 +13715,13 @@ snapshots: '@types/unist': 3.0.3 unist-util-stringify-position: 4.0.0 - vite-node@3.2.4(@types/node@22.19.3)(jiti@2.6.1)(lightningcss@1.30.2)(sass@1.77.4)(tsx@4.20.6)(yaml@2.8.2): + vite-node@3.2.4(@types/node@22.19.3)(jiti@2.6.1)(lightningcss@1.30.2)(sass@1.77.4)(tsx@4.21.0)(yaml@2.8.2): dependencies: cac: 6.7.14 debug: 4.4.3 es-module-lexer: 1.7.0 pathe: 2.0.3 - vite: 7.3.0(@types/node@22.19.3)(jiti@2.6.1)(lightningcss@1.30.2)(sass@1.77.4)(tsx@4.20.6)(yaml@2.8.2) + vite: 7.3.0(@types/node@22.19.3)(jiti@2.6.1)(lightningcss@1.30.2)(sass@1.77.4)(tsx@4.21.0)(yaml@2.8.2) transitivePeerDependencies: - '@types/node' - jiti @@ -13721,18 +13736,18 @@ snapshots: - tsx - yaml - vite-tsconfig-paths@5.1.4(typescript@5.7.3)(vite@7.3.0(@types/node@22.19.3)(jiti@2.6.1)(lightningcss@1.30.2)(sass@1.77.4)(tsx@4.20.6)(yaml@2.8.2)): + vite-tsconfig-paths@5.1.4(typescript@5.7.3)(vite@7.3.0(@types/node@22.19.3)(jiti@2.6.1)(lightningcss@1.30.2)(sass@1.77.4)(tsx@4.21.0)(yaml@2.8.2)): dependencies: debug: 4.4.3 globrex: 0.1.2 tsconfck: 3.1.6(typescript@5.7.3) optionalDependencies: - vite: 7.3.0(@types/node@22.19.3)(jiti@2.6.1)(lightningcss@1.30.2)(sass@1.77.4)(tsx@4.20.6)(yaml@2.8.2) + vite: 7.3.0(@types/node@22.19.3)(jiti@2.6.1)(lightningcss@1.30.2)(sass@1.77.4)(tsx@4.21.0)(yaml@2.8.2) transitivePeerDependencies: - supports-color - typescript - vite@7.3.0(@types/node@22.19.3)(jiti@2.6.1)(lightningcss@1.30.2)(sass@1.77.4)(tsx@4.20.6)(yaml@2.8.2): + vite@7.3.0(@types/node@22.19.3)(jiti@2.6.1)(lightningcss@1.30.2)(sass@1.77.4)(tsx@4.21.0)(yaml@2.8.2): dependencies: esbuild: 0.27.2 fdir: 6.5.0(picomatch@4.0.3) @@ -13746,14 +13761,14 @@ snapshots: jiti: 2.6.1 lightningcss: 1.30.2 sass: 1.77.4 - tsx: 4.20.6 + tsx: 4.21.0 yaml: 2.8.2 - vitest@3.2.4(@types/debug@4.1.12)(@types/node@22.19.3)(jiti@2.6.1)(lightningcss@1.30.2)(sass@1.77.4)(tsx@4.20.6)(yaml@2.8.2): + vitest@3.2.4(@types/debug@4.1.12)(@types/node@22.19.3)(jiti@2.6.1)(lightningcss@1.30.2)(sass@1.77.4)(tsx@4.21.0)(yaml@2.8.2): dependencies: '@types/chai': 5.2.3 '@vitest/expect': 3.2.4 - '@vitest/mocker': 3.2.4(vite@7.3.0(@types/node@22.19.3)(jiti@2.6.1)(lightningcss@1.30.2)(sass@1.77.4)(tsx@4.20.6)(yaml@2.8.2)) + '@vitest/mocker': 3.2.4(vite@7.3.0(@types/node@22.19.3)(jiti@2.6.1)(lightningcss@1.30.2)(sass@1.77.4)(tsx@4.21.0)(yaml@2.8.2)) '@vitest/pretty-format': 3.2.4 '@vitest/runner': 3.2.4 '@vitest/snapshot': 3.2.4 @@ -13771,8 +13786,8 @@ snapshots: tinyglobby: 0.2.15 tinypool: 1.1.1 tinyrainbow: 2.0.0 - vite: 7.3.0(@types/node@22.19.3)(jiti@2.6.1)(lightningcss@1.30.2)(sass@1.77.4)(tsx@4.20.6)(yaml@2.8.2) - vite-node: 3.2.4(@types/node@22.19.3)(jiti@2.6.1)(lightningcss@1.30.2)(sass@1.77.4)(tsx@4.20.6)(yaml@2.8.2) + vite: 7.3.0(@types/node@22.19.3)(jiti@2.6.1)(lightningcss@1.30.2)(sass@1.77.4)(tsx@4.21.0)(yaml@2.8.2) + vite-node: 3.2.4(@types/node@22.19.3)(jiti@2.6.1)(lightningcss@1.30.2)(sass@1.77.4)(tsx@4.21.0)(yaml@2.8.2) why-is-node-running: 2.3.0 optionalDependencies: '@types/debug': 4.1.12 diff --git a/src/bin/vectorize-migrate.ts b/src/bin/vectorize-migrate.ts new file mode 100644 index 0000000..1c3631e --- /dev/null +++ b/src/bin/vectorize-migrate.ts @@ -0,0 +1,489 @@ +import type { SanitizedConfig } from 'payload' +import { getPayload } from 'payload' +import { readFileSync, writeFileSync, readdirSync, statSync, existsSync, rmSync } from 'fs' +import { join, resolve } from 'path' +import toSnakeCase from 'to-snake-case' + +import { getVectorizedPayload } from '../types.js' +import type { KnowledgePoolStaticConfig } from '../types.js' + +/** + * Get prior state from existing migrations + */ +function getPriorStateFromMigrations( + migrationsDir: string, + poolNames: string[], +): Map { + const state = new Map() + + // Initialize with null (unknown state) + for (const poolName of poolNames) { + state.set(poolName, { dims: null, ivfflatLists: null }) + } + + if (!existsSync(migrationsDir)) { + return state + } + + // Find all migration files and read them in reverse order (newest first) + // Exclude index.ts/index.js as those are not migration files + const migrationFiles = readdirSync(migrationsDir) + .filter((f) => (f.endsWith('.ts') || f.endsWith('.js')) && f !== 'index.ts' && f !== 'index.js') + .map((f) => ({ + name: f, + path: join(migrationsDir, f), + mtime: statSync(join(migrationsDir, f)).mtime, + })) + .sort((a, b) => b.mtime.getTime() - a.mtime.getTime()) + + // Read migration files to find vector config + for (const file of migrationFiles) { + try { + const content = readFileSync(file.path, 'utf-8') + + // Extract only the UP function content to avoid matching values in DOWN function + const upFunctionMatch = content.match( + /export\s+async\s+function\s+up\s*\([^)]*\)[^{]*\{([\s\S]*?)(?=\}\s*(?:export\s+async\s+function\s+down|$))/i, + ) + const upContent = upFunctionMatch ? upFunctionMatch[1] : content + + // Look for IVFFLAT index creation with lists parameter + for (const poolName of poolNames) { + const tableName = toSnakeCase(poolName) + const indexName = `${tableName}_embedding_ivfflat` + + const indexMatch = + upContent.match( + new RegExp( + `db\\.execute\\(sql\\.raw.*?CREATE INDEX.*?"${indexName}".*?WITH\\s*\\(lists\\s*=\\s*(\\d+)\\)`, + 'is', + ), + ) || + upContent.match( + new RegExp(`CREATE INDEX.*?"${indexName}".*?WITH\\s*\\(lists\\s*=\\s*(\\d+)\\)`, 'is'), + ) || + upContent.match(new RegExp(`ivfflat.*?lists\\s*=\\s*(\\d+)`, 'is')) + + if (indexMatch && !state.get(poolName)?.ivfflatLists) { + const lists = parseInt(indexMatch[1], 10) + const current = state.get(poolName) || { dims: null, ivfflatLists: null } + state.set(poolName, { ...current, ivfflatLists: lists }) + } + + // Check for dims in vector column definition (pool-specific patterns) + const dimsMatch = + content.match( + new RegExp(`ALTER\\s+TABLE[^;]*?"${tableName}"[^;]*?vector\\((\\d+)\\)`, 'is'), + ) || + content.match( + new RegExp(`CREATE\\s+TABLE[^;]*?"${tableName}"[^;]*?embedding[^;]*?vector\\((\\d+)\\)`, 'is'), + ) || + content.match( + new RegExp(`"${tableName}"\\s*\\([^)]*embedding[^)]*vector\\((\\d+)\\)`, 'is'), + ) + + if (dimsMatch && !state.get(poolName)?.dims) { + const dims = parseInt(dimsMatch[1], 10) + const current = state.get(poolName) || { dims: null, ivfflatLists: null } + state.set(poolName, { ...current, dims }) + } + } + } catch (err) { + // Skip files that can't be read + continue + } + } + + return state +} + +/** + * Generate SQL code for IVFFLAT index rebuild + */ +function generateIvfflatRebuildCode( + tableName: string, + schemaName: string, + ivfflatLists: number, +): string { + const indexName = `${tableName}_embedding_ivfflat` + return ` await db.execute(sql.raw(\`DROP INDEX IF EXISTS "${schemaName}"."${indexName}"\`)); + await db.execute(sql.raw(\`CREATE INDEX "${indexName}" ON "${schemaName}"."${tableName}" USING ivfflat (embedding vector_cosine_ops) WITH (lists = ${ivfflatLists})\`));` +} + +/** + * Generate SQL code for column type change + */ +function generateColumnTypeChangeCode( + tableName: string, + schemaName: string, + newDims: number, +): string { + return ` // Change column type to new dimensions + await db.execute(sql.raw(\`ALTER TABLE "${schemaName}"."${tableName}" ALTER COLUMN embedding TYPE vector(${newDims})\`));` +} + +/** + * Generate SQL code for destructive dims change + */ +function generateDimsChangeCode( + tableName: string, + schemaName: string, + newDims: number, + newIvfflatLists: number, +): string { + const indexName = `${tableName}_embedding_ivfflat` + return ` // WARNING: Changing vector dimensions is destructive and requires re-embedding + // Step 1: Drop existing index + await db.execute(sql.raw(\`DROP INDEX IF EXISTS "${schemaName}"."${indexName}"\`)); + // Step 2: Change column type (Payload migration may also generate this, but explicit is safer) + await db.execute(sql.raw(\`ALTER TABLE "${schemaName}"."${tableName}" ALTER COLUMN embedding TYPE vector(${newDims})\`)); + // Step 3: Truncate table (destructive - all embeddings are lost) + // Use CASCADE to handle foreign key constraints + await db.execute(sql.raw(\`TRUNCATE TABLE "${schemaName}"."${tableName}" CASCADE\`)); + // Step 4: Recreate index with new parameters + await db.execute(sql.raw(\`CREATE INDEX "${indexName}" ON "${schemaName}"."${tableName}" USING ivfflat (embedding vector_cosine_ops) WITH (lists = ${newIvfflatLists})\`));` +} + +/** + * Patch a migration file with vector-specific SQL + */ +function patchMigrationFile( + migrationPath: string, + staticConfigs: Record, + schemaName: string, + priorState: Map, +): void { + const content = readFileSync(migrationPath, 'utf-8') + + // Generate SQL code for each pool + const vectorUpCode: string[] = [] + const vectorDownCode: string[] = [] + + for (const [poolName, config] of Object.entries(staticConfigs)) { + const tableName = toSnakeCase(poolName) + const priorConfig = priorState.get(poolName) || { dims: null, ivfflatLists: null } + const dimsChanged = priorConfig.dims !== null && priorConfig.dims !== config.dims + const ivfflatListsChanged = + priorConfig.ivfflatLists !== null && priorConfig.ivfflatLists !== config.ivfflatLists + + // Check if dims changed (destructive) - handle this first as it includes index operations + if (dimsChanged) { + vectorUpCode.push( + ` // payloadcms-vectorize: WARNING - Changing dims from ${priorConfig.dims} to ${config.dims} is destructive`, + ) + // When dims changes, we need to: + // 1. Drop existing index first + // 2. Change column type (Payload migration may also generate this) + // 3. Truncate table (destructive) + // 4. Recreate index with new ivfflatLists + vectorUpCode.push( + generateDimsChangeCode(tableName, schemaName, config.dims, config.ivfflatLists), + ) + // Down migration: restore to previous state (but can't restore data) + vectorDownCode.push( + ` // payloadcms-vectorize: Revert dims change (WARNING: data was truncated and cannot be restored)`, + ) + // Restore previous column type and index + vectorDownCode.push( + generateColumnTypeChangeCode(tableName, schemaName, priorConfig.dims || config.dims), + ) + vectorDownCode.push( + generateIvfflatRebuildCode( + tableName, + schemaName, + priorConfig.ivfflatLists || config.ivfflatLists, + ), + ) + vectorDownCode.push(` // WARNING: Original data cannot be restored`) + } else if (ivfflatListsChanged) { + // Check if ivfflatLists changed (only if dims didn't change, since dims change handles index) + vectorUpCode.push( + ` // payloadcms-vectorize: Rebuild IVFFLAT index for ${poolName} with lists=${config.ivfflatLists}`, + ) + vectorUpCode.push(generateIvfflatRebuildCode(tableName, schemaName, config.ivfflatLists)) + // Down migration: rebuild with old lists + vectorDownCode.push( + ` // payloadcms-vectorize: Revert IVFFLAT index for ${poolName} to lists=${priorConfig.ivfflatLists}`, + ) + vectorDownCode.push( + generateIvfflatRebuildCode( + tableName, + schemaName, + priorConfig.ivfflatLists || config.ivfflatLists, + ), + ) + } + + // If this is the first migration, ensure index exists + // Note: Column is handled by Drizzle schema via afterSchemaInit + // We only check ivfflatLists because dims will always be found from Drizzle schema + if (priorConfig.ivfflatLists === null) { + vectorUpCode.push(` // payloadcms-vectorize: Initial IVFFLAT index setup for ${poolName}`) + vectorUpCode.push( + ` // Note: Embedding column is created via Drizzle schema (afterSchemaInit hook)`, + ) + vectorUpCode.push(generateIvfflatRebuildCode(tableName, schemaName, config.ivfflatLists)) + vectorDownCode.push(` // payloadcms-vectorize: Drop index on rollback`) + const indexName = `${tableName}_embedding_ivfflat` + vectorDownCode.push( + ` await db.execute(sql.raw(\`DROP INDEX IF EXISTS "${schemaName}"."${indexName}"\`));`, + ) + } + } + + if (vectorUpCode.length === 0) { + // No changes needed + return + } + + // Find the up function and insert code before the closing brace + const upFunctionMatch = content.match( + /export\s+async\s+function\s+up\s*\([^)]*\)\s*:\s*Promise\s*\{/i, + ) + if (!upFunctionMatch) { + throw new Error(`Could not find 'up' function in migration file: ${migrationPath}`) + } + + const upFunctionStart = upFunctionMatch.index! + upFunctionMatch[0].length + const downFunctionMatch = content.match(/export\s+async\s+function\s+down\s*\([^)]*\)/i) + const searchEnd = downFunctionMatch ? downFunctionMatch.index! : content.length + + // Find the last closing brace before down function or end + const upFunctionBody = content.substring(upFunctionStart, searchEnd) + const lastBraceIndex = upFunctionBody.lastIndexOf('}') + if (lastBraceIndex === -1) { + throw new Error( + `Could not find closing brace for 'up' function in migration file: ${migrationPath}`, + ) + } + + // Insert our code before the closing brace + const beforeBrace = content.substring(0, upFunctionStart + lastBraceIndex) + const afterBrace = content.substring(upFunctionStart + lastBraceIndex) + + const codeToInsert = '\n' + vectorUpCode.join('\n') + '\n' + let newContent = beforeBrace + codeToInsert + afterBrace + + // Handle down function + if (downFunctionMatch) { + const downFunctionStart = downFunctionMatch.index! + downFunctionMatch[0].length + const downBraceMatch = newContent.substring(downFunctionStart).match(/\{/) + if (downBraceMatch) { + const downBodyStart = downFunctionStart + downBraceMatch.index! + 1 + const downBody = newContent.substring(downBodyStart) + const downLastBraceIndex = downBody.lastIndexOf('}') + if (downLastBraceIndex !== -1) { + const beforeDownBrace = newContent.substring(0, downBodyStart + downLastBraceIndex) + const afterDownBrace = newContent.substring(downBodyStart + downLastBraceIndex) + const downCodeToInsert = '\n' + vectorDownCode.join('\n') + '\n' + newContent = beforeDownBrace + downCodeToInsert + afterDownBrace + } + } + } else if (vectorDownCode.length > 0) { + // Add down function if it doesn't exist + const lastFileBrace = newContent.lastIndexOf('}') + if (lastFileBrace !== -1) { + const beforeLastBrace = newContent.substring(0, lastFileBrace) + const afterLastBrace = newContent.substring(lastFileBrace) + const downFunctionCode = `\n\nexport async function down({ payload, req }: { payload: any; req: any }): Promise {\n${vectorDownCode.join('\n')}\n}` + newContent = beforeLastBrace + downFunctionCode + afterLastBrace + } + } + + writeFileSync(migrationPath, newContent, 'utf-8') +} + +/** + * Bin script entry point for creating vector migrations + */ +export const script = async (config: SanitizedConfig): Promise => { + // Get Payload instance for db operations and to access static configs via VectorizedPayload + const payload = await getPayload({ + config, + // In test environment, use unique key and enable cron for job processing + ...(process.env.TEST_ENV ? { key: `vectorize-migrate-${Date.now()}`, cron: true } : {}), + }) + + // Get static configs from VectorizedPayload + const vectorizedPayload = getVectorizedPayload(payload) + if (!vectorizedPayload) { + throw new Error( + '[payloadcms-vectorize] Vectorize plugin not found. Ensure payloadcmsVectorize is configured in your Payload config.', + ) + } + + const staticConfigs = vectorizedPayload._staticConfigs + if (!staticConfigs || Object.keys(staticConfigs).length === 0) { + throw new Error('[payloadcms-vectorize] No static configs found') + } + + const poolNames = Object.keys(staticConfigs) + const schemaName = (payload.db as any).schemaName || 'public' + + // Get migrations directory + const dbMigrationDir = (payload.db as any).migrationDir + const migrationsDir = dbMigrationDir || resolve(process.cwd(), 'src/migrations') + + // Get prior state from migrations + const priorState = getPriorStateFromMigrations(migrationsDir, poolNames) + + // Check if any changes are needed + let hasChanges = false + let isFirstMigration = false + for (const [poolName, currentConfig] of Object.entries(staticConfigs)) { + const prior = priorState.get(poolName) || { dims: null, ivfflatLists: null } + + // Check if this is the first migration (no IVFFLAT index exists yet) + if (prior.ivfflatLists === null) { + isFirstMigration = true + hasChanges = true + break + } + + // Check for actual changes + if ( + prior.dims !== null && prior.dims !== currentConfig.dims || + (prior.ivfflatLists !== null && prior.ivfflatLists !== currentConfig.ivfflatLists) + ) { + hasChanges = true + break + } + } + + // If no changes detected + if (!hasChanges) { + console.log('[payloadcms-vectorize] No configuration changes detected.') + return + } + + // Determine if there are actual schema changes (dims change) or just index parameter changes (ivfflatLists) + let hasSchemaChanges = false + for (const [poolName, currentConfig] of Object.entries(staticConfigs)) { + const prior = priorState.get(poolName) || { dims: null, ivfflatLists: null } + if (prior.dims !== null && prior.dims !== currentConfig.dims) { + hasSchemaChanges = true + break + } + } + + if (isFirstMigration) { + // Check if there's a very recent migration file (created in last 10 seconds) that we should patch + const recentMigrations = existsSync(migrationsDir) + ? readdirSync(migrationsDir) + .filter( + (f) => (f.endsWith('.ts') || f.endsWith('.js')) && f !== 'index.ts' && f !== 'index.js', + ) + .map((f) => ({ + name: f, + path: join(migrationsDir, f), + mtime: statSync(join(migrationsDir, f)).mtime, + })) + .filter((m) => Date.now() - m.mtime.getTime() < 10000) // Created in last 10 seconds + .sort((a, b) => b.mtime.getTime() - a.mtime.getTime()) + : [] + + if (recentMigrations.length > 0) { + const recentMigration = recentMigrations[0] + // Check if it already has IVFFLAT index code + const recentContent = readFileSync(recentMigration.path, 'utf-8') + const hasIvfflatCode = recentContent.includes('ivfflat') && (recentContent.includes('drizzle.execute') || recentContent.includes('CREATE INDEX')) + + if (!hasIvfflatCode) { + patchMigrationFile(recentMigration.path, staticConfigs, schemaName, priorState) + console.log('[payloadcms-vectorize] Migration patched successfully!') + return + } + } + } + + // Create migration using Payload's API OR create manually for index-only changes + // Note: createMigration may not return the path, so we'll find the newest migration file after creation + const migrationsBefore = existsSync(migrationsDir) + ? readdirSync(migrationsDir) + .filter( + (f) => (f.endsWith('.ts') || f.endsWith('.js')) && f !== 'index.ts' && f !== 'index.js', + ) + .map((f) => ({ + name: f, + path: join(migrationsDir, f), + mtime: statSync(join(migrationsDir, f)).mtime, + })) + .sort((a, b) => b.mtime.getTime() - a.mtime.getTime()) + : [] + + let migrationPath: string + + // If there are schema changes (dims changed), use Payload's createMigration + // Otherwise (only ivfflatLists changed), create the migration file manually + if (hasSchemaChanges) { + await payload.db.createMigration({ + migrationName: 'vectorize-config', + payload, + forceAcceptWarning: true, + }) + + // Find the newest migration file (should be the one just created) + const migrationsAfter = existsSync(migrationsDir) + ? readdirSync(migrationsDir) + .filter( + (f) => (f.endsWith('.ts') || f.endsWith('.js')) && f !== 'index.ts' && f !== 'index.js', + ) + .map((f) => ({ + name: f, + path: join(migrationsDir, f), + mtime: statSync(join(migrationsDir, f)).mtime, + })) + .sort((a, b) => b.mtime.getTime() - a.mtime.getTime()) + : [] + + // Find the migration that was just created (newest that wasn't there before) + const beforePaths = new Set(migrationsBefore.map((m) => m.path)) + const newMigrations = migrationsAfter.filter((m) => !beforePaths.has(m.path)) + const foundPath = newMigrations.length > 0 ? newMigrations[0].path : migrationsAfter[0]?.path + + if (!foundPath) { + throw new Error( + '[payloadcms-vectorize] Failed to create migration file - no new migration found.', + ) + } + migrationPath = foundPath + } else { + // No schema changes (only ivfflatLists changed) - create migration file manually + const now = new Date() + const timestamp = [ + now.getFullYear(), + String(now.getMonth() + 1).padStart(2, '0'), + String(now.getDate()).padStart(2, '0'), + '_', + String(now.getHours()).padStart(2, '0'), + String(now.getMinutes()).padStart(2, '0'), + String(now.getSeconds()).padStart(2, '0'), + ].join('') + + const migrationFileName = `${timestamp}_vectorize_ivfflat_rebuild.ts` + migrationPath = join(migrationsDir, migrationFileName) + + const migrationTemplate = `import { MigrateUpArgs, MigrateDownArgs, sql } from '@payloadcms/db-postgres' + +export async function up({ db, payload, req }: MigrateUpArgs): Promise { + // Index parameter changes only - no schema changes +} + +export async function down({ db, payload, req }: MigrateDownArgs): Promise { + // Revert index parameter changes +} +` + + writeFileSync(migrationPath, migrationTemplate, 'utf-8') + } + + // Patch the migration file + patchMigrationFile(migrationPath, staticConfigs, schemaName, priorState) + + console.log('[payloadcms-vectorize] Migration created and patched successfully!') + + // Only exit if not in test environment (when called from tests, just return) + if (process.env.NODE_ENV !== 'test' && !process.env.VITEST) { + process.exit(0) + } +} diff --git a/src/index.ts b/src/index.ts index 60d7497..bf80c36 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,6 +1,8 @@ import type { Config, Payload, PayloadRequest } from 'payload' import { customType } from '@payloadcms/db-postgres/drizzle/pg-core' import toSnakeCase from 'to-snake-case' +import { fileURLToPath } from 'url' +import { dirname, resolve } from 'path' import { createEmbeddingsCollection } from './collections/embeddings.js' import type { @@ -76,50 +78,6 @@ export type { export { getVectorizedPayload } from './types.js' -async function ensurePgvectorArtifacts(args: { - payload: Payload - tableName: string - dims: number - ivfflatLists: number -}): Promise { - const { payload, tableName, dims, ivfflatLists } = args - - if (!isPostgresPayload(payload)) { - throw new Error( - '[payloadcms-vectorize] This plugin requires the Postgres adapter. Please configure @payloadcms/db-postgres.', - ) - } - - // Now payload is typed as PostgresPayload - const postgresPayload = payload as PostgresPayload - const schemaName = postgresPayload.db.schemaName || 'public' - - const sqls: string[] = [ - `CREATE EXTENSION IF NOT EXISTS vector;`, - `ALTER TABLE "${schemaName}"."${tableName}" ADD COLUMN IF NOT EXISTS embedding vector(${dims});`, - `CREATE INDEX IF NOT EXISTS ${tableName}_embedding_ivfflat ON "${schemaName}"."${tableName}" USING ivfflat (embedding vector_cosine_ops) WITH (lists = ${ivfflatLists});`, - ] - - try { - if (postgresPayload.db.pool?.query) { - for (const sql of sqls) { - await postgresPayload.db.pool.query(sql) - } - } else if (postgresPayload.db.drizzle?.execute) { - for (const sql of sqls) { - await postgresPayload.db.drizzle.execute(sql) - } - } - postgresPayload.logger.info('[payloadcms-vectorize] pgvector extension/columns/index ensured') - } catch (err) { - postgresPayload.logger.error( - '[payloadcms-vectorize] Failed ensuring pgvector artifacts', - err as Error, - ) - throw new Error(`[payloadcms-vectorize] Failed ensuring pgvector artifacts: ${err}`) - } -} - // ================== // Plugin entry point // ================== @@ -240,7 +198,8 @@ export const createVectorizeIntegration = } // Build reverse mapping for hooks - for (const collectionSlug of Object.keys(dynamicConfig.collections)) { + const collectionSlugs = Object.keys(dynamicConfig.collections) + for (const collectionSlug of collectionSlugs) { if (!collectionToPools.has(collectionSlug)) { collectionToPools.set(collectionSlug, []) } @@ -264,9 +223,11 @@ export const createVectorizeIntegration = } // Exit early if disabled, but keep embeddings collections present for migrations - if (pluginOptions.disabled) return config + if (pluginOptions.disabled) { + return config + } - // Register a single task using Payload Jobs that can handle any knowledge pool + // Register tasks using Payload Jobs const incomingJobs = config.jobs || { tasks: [] } const tasks = [...(config.jobs?.tasks || [])] @@ -274,11 +235,13 @@ export const createVectorizeIntegration = knowledgePools: pluginOptions.knowledgePools, }) tasks.push(vectorizeTask) + const prepareBulkEmbedTask = createPrepareBulkEmbeddingTask({ knowledgePools: pluginOptions.knowledgePools, pollOrCompleteQueueName: pluginOptions.bulkQueueNames?.pollOrCompleteQueueName, }) tasks.push(prepareBulkEmbedTask) + const pollOrCompleteBulkEmbedTask = createPollOrCompleteBulkEmbeddingTask({ knowledgePools: pluginOptions.knowledgePools, pollOrCompleteQueueName: pluginOptions.bulkQueueNames?.pollOrCompleteQueueName, @@ -397,6 +360,7 @@ export const createVectorizeIntegration = const poolConfig = pluginOptions.knowledgePools[knowledgePool] return !!poolConfig?.embeddingConfig?.bulkEmbeddingsFns }, + _staticConfigs: staticConfigs, search: (params: VectorSearchQuery) => vectorSearchHandlers.vectorSearch( payload, @@ -461,22 +425,17 @@ export const createVectorizeIntegration = createVectorizedPayloadObject, } - const incomingOnInit = config.onInit - config.onInit = async (payload) => { - if (incomingOnInit) await incomingOnInit(payload) - // Ensure pgvector artifacts for each knowledge pool - for (const poolName in staticConfigs) { - const staticConfig = staticConfigs[poolName] - // Drizzle converts camelCase collection slugs to snake_case table names - await ensurePgvectorArtifacts({ - payload, - // Drizzle converts camelCase collection slugs to snake_case table names - tableName: toSnakeCase(poolName), - dims: staticConfig.dims, - ivfflatLists: staticConfig.ivfflatLists, - }) - } - } + // Register bin script for migration helper + const __filename = fileURLToPath(import.meta.url) + const __dirname = dirname(__filename) + const binScriptPath = resolve(__dirname, 'bin/vectorize-migrate.js') + config.bin = [ + ...(config.bin || []), + { + key: 'vectorize:migrate', + scriptPath: binScriptPath, + }, + ] if (pluginOptions.endpointOverrides?.enabled !== false) { const path = pluginOptions.endpointOverrides?.path || '/vector-search' diff --git a/src/types.ts b/src/types.ts index f211516..9c48a29 100644 --- a/src/types.ts +++ b/src/types.ts @@ -46,6 +46,8 @@ export type RetryFailedBatchResult = export type VectorizedPayload = { /** Check if bulk embedding is enabled for a knowledge pool */ _isBulkEmbedEnabled: (knowledgePool: TPoolNames) => boolean + /** Static configs for migration helper access */ + _staticConfigs: Record search: (params: VectorSearchQuery) => Promise> queueEmbed: ( params: