Skip to content

Conversation

@subtleGradient
Copy link
Member

No description provided.

Three experiments to diagnose PDF input failures via OpenRouter:

- pdf-vs-image: proves image_url format fails for PDFs, file+plugin works
- pdf-message-shape-matrix: tests shape (file-only vs text+file) × format
- pdf-direct-input: compares PDF support across OpenAI/Anthropic/Google

Key finding: 'file' content type with file-parser plugin is the universal
format. AI SDK's image_url approach fails for OpenAI PDFs.
- Add shared request-cache utility that caches API responses to disk
- Update fetch/pdf-direct-input to use caching
- Add ai-sdk-v5/pdf-openai-regression tests with caching
- Cache avoids hitting OpenRouter API repeatedly during development
If response body parses as JSON, store as body.json object.
Otherwise store as body.text string. Makes cache files readable.
@socket-security
Copy link

Review the following changes in direct dependencies. Learn more about Socket for GitHub.

Diff Package Supply Chain
Security
Vulnerability Quality Maintenance License
Updated@​openrouter/​ai-sdk-provider@​1.5.3 ⏵ 1.5.499 +110010099 +1100

View full report

Copy link

@mohamdawad135-oss mohamdawad135-oss left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

// utils/request-cache.js
import fs from 'fs/promises';
import path from 'path';
import crypto from 'crypto';

export class RequestCache {
constructor(cacheDir = './.cache') {
this.cacheDir = cacheDir;
}

async getKey(key) {
const hash = crypto.createHash('md5').update(key).digest('hex');
return path.join(this.cacheDir, ${hash}.json);
}

async get(key) {
try {
const filePath = await this.getKey(key);
const data = await fs.readFile(filePath, 'utf-8');
return JSON.parse(data);
} catch {
return null;
}
}

async set(key, value, ttl = 3600) {
await fs.mkdir(this.cacheDir, { recursive: true });
const filePath = await this.getKey(key);
const cacheData = {
value,
expiresAt: Date.now() + (ttl * 1000)
};
await fs.writeFile(filePath, JSON.stringify(cacheData));
}

async clear() {
await fs.rm(this.cacheDir, { recursive: true, force: true });
}
}

// fetch/pdf-direct-input.js
import { RequestCache } from '../utils/request-cache.js';

const cache = new RequestCache();

export async function fetchWithCache(url, options = {}) {
const cacheKey = fetch:${url}:${JSON.stringify(options)};

// محاولة جلب البيانات من التخزين المؤقت
const cached = await cache.get(cacheKey);
if (cached && cached.expiresAt > Date.now()) {
console.log('📦 Using cached response for:', url);
return cached.value;
}

// جلب البيانات من API إذا لم تكن موجودة في التخزين المؤقت
console.log('🌐 Fetching from API:', url);
const response = await fetch(url, options);
const data = await response.json();

// تخزين النتيجة في الذاكرة المؤقتة
await cache.set(cacheKey, data, 3600); // صلاحية ساعة واحدة

return data;
}

// tests/ai-sdk-v5/pdf-openai-regression.test.js
import { RequestCache } from '../../utils/request-cache.js';
import { processPDFWithAI } from '../pdf-processor.js';

describe('PDF OpenAI Regression Tests with Caching', () => {
let cache;

beforeAll(() => {
cache = new RequestCache('./.test-cache');
});

afterAll(async () => {
await cache.clear();
});

test('should cache PDF processing requests', async () => {
const pdfUrl = 'https://example.com/test.pdf';
const cacheKey = pdf-process:${pdfUrl};

// المرة الأولى: يجب جلب البيانات من API
const result1 = await processPDFWithAI(pdfUrl, { cache });
expect(result1).toBeDefined();

// المرة الثانية: يجب استخدام التخزين المؤقت
const result2 = await processPDFWithAI(pdfUrl, { cache });
expect(result2).toEqual(result1);

});
});

{
"scripts": {
"dev": "node --experimental-modules server.js",
"test": "jest --testPathPattern=pdf",
"clear-cache": "rm -rf .cache .test-cache",
"dev:cache": "CACHE_ENABLED=true npm run dev"
}
}

// config/cache-config.js
export const cacheConfig = {
enabled: process.env.CACHE_ENABLED === 'true',
ttl: {
pdf: 3600, // ساعة واحدة لملفات PDF
api: 300, // 5 دقائق للطلبات العامة
test: 60 // دقيقة واحدة للاختبارات
},
directories: {
main: './.cache',
test: './.test-cache'
}
};

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

None yet

Projects

None yet

Development

Successfully merging this pull request may close these issues.

3 participants