diff --git a/src/common/orpc/schemas/providerOptions.ts b/src/common/orpc/schemas/providerOptions.ts index a443d9b69a..c58c4eda22 100644 --- a/src/common/orpc/schemas/providerOptions.ts +++ b/src/common/orpc/schemas/providerOptions.ts @@ -10,6 +10,10 @@ export const MuxProviderOptionsSchema = z.object({ .optional(), openai: z .object({ + serviceTier: z.enum(["auto", "default", "flex", "priority"]).optional().meta({ + description: + "OpenAI service tier: priority (low-latency), flex (50% cheaper, higher latency), auto/default (standard)", + }), disableAutoTruncation: z .boolean() .optional() diff --git a/src/common/utils/ai/providerOptions.ts b/src/common/utils/ai/providerOptions.ts index 0a05cabbe8..430a2a314f 100644 --- a/src/common/utils/ai/providerOptions.ts +++ b/src/common/utils/ai/providerOptions.ts @@ -217,11 +217,12 @@ export function buildProviderOptions( disableAutoTruncation, }); + const serviceTier = muxProviderOptions?.openai?.serviceTier ?? "priority"; + const options: ProviderOptions = { openai: { parallelToolCalls: true, // Always enable concurrent tool execution - // TODO: allow this to be configured - serviceTier: "auto", // Use "auto" to automatically select the best service tier + serviceTier, // Automatically truncate conversation to fit context window, unless disabled for testing truncation: disableAutoTruncation ? "disabled" : "auto", // Conditionally add reasoning configuration diff --git a/src/node/services/aiService.ts b/src/node/services/aiService.ts index b47fc44b46..cac12377ea 100644 --- a/src/node/services/aiService.ts +++ b/src/node/services/aiService.ts @@ -498,6 +498,16 @@ export class AIService extends EventEmitter { provider: providerName, }); } + + // Extract serviceTier from config to pass through to buildProviderOptions + const configServiceTier = providerConfig.serviceTier as string | undefined; + if (configServiceTier && muxProviderOptions) { + muxProviderOptions.openai = { + ...muxProviderOptions.openai, + serviceTier: configServiceTier as "auto" | "default" | "flex" | "priority", + }; + } + const baseFetch = getProviderFetch(providerConfig); // Wrap fetch to force truncation: "auto" for OpenAI Responses API calls.