-
Notifications
You must be signed in to change notification settings - Fork 46
Tom/unbreak aisdk pdf OpenAI #62
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
Three experiments to diagnose PDF input failures via OpenRouter: - pdf-vs-image: proves image_url format fails for PDFs, file+plugin works - pdf-message-shape-matrix: tests shape (file-only vs text+file) × format - pdf-direct-input: compares PDF support across OpenAI/Anthropic/Google Key finding: 'file' content type with file-parser plugin is the universal format. AI SDK's image_url approach fails for OpenAI PDFs.
- Add shared request-cache utility that caches API responses to disk - Update fetch/pdf-direct-input to use caching - Add ai-sdk-v5/pdf-openai-regression tests with caching - Cache avoids hitting OpenRouter API repeatedly during development
If response body parses as JSON, store as body.json object. Otherwise store as body.text string. Makes cache files readable.
|
Review the following changes in direct dependencies. Learn more about Socket for GitHub.
|
mohamdawad135-oss
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
// utils/request-cache.js
import fs from 'fs/promises';
import path from 'path';
import crypto from 'crypto';
export class RequestCache {
constructor(cacheDir = './.cache') {
this.cacheDir = cacheDir;
}
async getKey(key) {
const hash = crypto.createHash('md5').update(key).digest('hex');
return path.join(this.cacheDir, ${hash}.json);
}
async get(key) {
try {
const filePath = await this.getKey(key);
const data = await fs.readFile(filePath, 'utf-8');
return JSON.parse(data);
} catch {
return null;
}
}
async set(key, value, ttl = 3600) {
await fs.mkdir(this.cacheDir, { recursive: true });
const filePath = await this.getKey(key);
const cacheData = {
value,
expiresAt: Date.now() + (ttl * 1000)
};
await fs.writeFile(filePath, JSON.stringify(cacheData));
}
async clear() {
await fs.rm(this.cacheDir, { recursive: true, force: true });
}
}
// fetch/pdf-direct-input.js
import { RequestCache } from '../utils/request-cache.js';
const cache = new RequestCache();
export async function fetchWithCache(url, options = {}) {
const cacheKey = fetch:${url}:${JSON.stringify(options)};
// محاولة جلب البيانات من التخزين المؤقت
const cached = await cache.get(cacheKey);
if (cached && cached.expiresAt > Date.now()) {
console.log('📦 Using cached response for:', url);
return cached.value;
}
// جلب البيانات من API إذا لم تكن موجودة في التخزين المؤقت
console.log('🌐 Fetching from API:', url);
const response = await fetch(url, options);
const data = await response.json();
// تخزين النتيجة في الذاكرة المؤقتة
await cache.set(cacheKey, data, 3600); // صلاحية ساعة واحدة
return data;
}
// tests/ai-sdk-v5/pdf-openai-regression.test.js
import { RequestCache } from '../../utils/request-cache.js';
import { processPDFWithAI } from '../pdf-processor.js';
describe('PDF OpenAI Regression Tests with Caching', () => {
let cache;
beforeAll(() => {
cache = new RequestCache('./.test-cache');
});
afterAll(async () => {
await cache.clear();
});
test('should cache PDF processing requests', async () => {
const pdfUrl = 'https://example.com/test.pdf';
const cacheKey = pdf-process:${pdfUrl};
// المرة الأولى: يجب جلب البيانات من API
const result1 = await processPDFWithAI(pdfUrl, { cache });
expect(result1).toBeDefined();
// المرة الثانية: يجب استخدام التخزين المؤقت
const result2 = await processPDFWithAI(pdfUrl, { cache });
expect(result2).toEqual(result1);
});
});
{
"scripts": {
"dev": "node --experimental-modules server.js",
"test": "jest --testPathPattern=pdf",
"clear-cache": "rm -rf .cache .test-cache",
"dev:cache": "CACHE_ENABLED=true npm run dev"
}
}
// config/cache-config.js
export const cacheConfig = {
enabled: process.env.CACHE_ENABLED === 'true',
ttl: {
pdf: 3600, // ساعة واحدة لملفات PDF
api: 300, // 5 دقائق للطلبات العامة
test: 60 // دقيقة واحدة للاختبارات
},
directories: {
main: './.cache',
test: './.test-cache'
}
};
No description provided.