From 4d8d570cacb7440f96dc4f068583dd407025d1c6 Mon Sep 17 00:00:00 2001 From: Ammar Date: Thu, 11 Dec 2025 19:18:38 -0600 Subject: [PATCH] =?UTF-8?q?=F0=9F=A4=96=20feat:=20default=20OpenAI=20servi?= =?UTF-8?q?ce=5Ftier=20to=20priority,=20allow=20override=20via=20providers?= =?UTF-8?q?.jsonc?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Change default from 'auto' to 'priority' for low-latency responses - Support all OpenAI service tiers: auto, default, flex, priority - Read serviceTier from providers.jsonc config and pass through to API Users can override in ~/.mux/providers.jsonc: { "openai": { "apiKey": "sk-...", "serviceTier": "flex" // 50% cheaper, higher latency } } _Generated with mux_ --- src/common/orpc/schemas/providerOptions.ts | 4 ++++ src/common/utils/ai/providerOptions.ts | 5 +++-- src/node/services/aiService.ts | 10 ++++++++++ 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/src/common/orpc/schemas/providerOptions.ts b/src/common/orpc/schemas/providerOptions.ts index a443d9b69a..c58c4eda22 100644 --- a/src/common/orpc/schemas/providerOptions.ts +++ b/src/common/orpc/schemas/providerOptions.ts @@ -10,6 +10,10 @@ export const MuxProviderOptionsSchema = z.object({ .optional(), openai: z .object({ + serviceTier: z.enum(["auto", "default", "flex", "priority"]).optional().meta({ + description: + "OpenAI service tier: priority (low-latency), flex (50% cheaper, higher latency), auto/default (standard)", + }), disableAutoTruncation: z .boolean() .optional() diff --git a/src/common/utils/ai/providerOptions.ts b/src/common/utils/ai/providerOptions.ts index 0a05cabbe8..430a2a314f 100644 --- a/src/common/utils/ai/providerOptions.ts +++ b/src/common/utils/ai/providerOptions.ts @@ -217,11 +217,12 @@ export function buildProviderOptions( disableAutoTruncation, }); + const serviceTier = muxProviderOptions?.openai?.serviceTier ?? "priority"; + const options: ProviderOptions = { openai: { parallelToolCalls: true, // Always enable concurrent tool execution - // TODO: allow this to be configured - serviceTier: "auto", // Use "auto" to automatically select the best service tier + serviceTier, // Automatically truncate conversation to fit context window, unless disabled for testing truncation: disableAutoTruncation ? "disabled" : "auto", // Conditionally add reasoning configuration diff --git a/src/node/services/aiService.ts b/src/node/services/aiService.ts index b47fc44b46..cac12377ea 100644 --- a/src/node/services/aiService.ts +++ b/src/node/services/aiService.ts @@ -498,6 +498,16 @@ export class AIService extends EventEmitter { provider: providerName, }); } + + // Extract serviceTier from config to pass through to buildProviderOptions + const configServiceTier = providerConfig.serviceTier as string | undefined; + if (configServiceTier && muxProviderOptions) { + muxProviderOptions.openai = { + ...muxProviderOptions.openai, + serviceTier: configServiceTier as "auto" | "default" | "flex" | "priority", + }; + } + const baseFetch = getProviderFetch(providerConfig); // Wrap fetch to force truncation: "auto" for OpenAI Responses API calls.