Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 10 additions & 8 deletions internal/event/event_with_request_and_response.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,17 @@ import (
"github.com/bricks-cloud/bricksllm/internal/key"
"github.com/bricks-cloud/bricksllm/internal/provider"
"github.com/bricks-cloud/bricksllm/internal/provider/custom"
"github.com/bricks-cloud/bricksllm/internal/provider/openai"
)

type EventWithRequestAndContent struct {
Event *Event
IsEmbeddingsRequest bool
RouteConfig *custom.RouteConfig
Request interface{}
Content string
Response interface{}
Key *key.ResponseKey
CostMap *provider.CostMap
Event *Event
IsEmbeddingsRequest bool
RouteConfig *custom.RouteConfig
Request interface{}
Content string
Response interface{}
Key *key.ResponseKey
CostMap *provider.CostMap
ImageResponseMetadata *openai.ImageResponseMetadata
}
9 changes: 5 additions & 4 deletions internal/message/handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"github.com/bricks-cloud/bricksllm/internal/provider"
"github.com/bricks-cloud/bricksllm/internal/provider/anthropic"
"github.com/bricks-cloud/bricksllm/internal/provider/custom"
"github.com/bricks-cloud/bricksllm/internal/provider/openai"
"github.com/bricks-cloud/bricksllm/internal/provider/vllm"
"github.com/bricks-cloud/bricksllm/internal/telemetry"
"github.com/bricks-cloud/bricksllm/internal/user"
Expand Down Expand Up @@ -39,7 +40,7 @@ type estimator interface {
EstimateTotalCost(model string, promptTks, completionTks int) (float64, error)
EstimateEmbeddingsInputCost(model string, tks int) (float64, error)
EstimateChatCompletionPromptTokenCounts(model string, r *goopenai.ChatCompletionRequest) (int, error)
EstimateImagesCost(model, quality, resolution string) (float64, error)
EstimateImagesCost(model, quality, resolution string, metadata *openai.ImageResponseMetadata) (float64, error)
}

type azureEstimator interface {
Expand Down Expand Up @@ -428,7 +429,7 @@ func (h *Handler) decorateEvent(m Message) error {
return errors.New("event request data cannot be parsed as openai image request")
}
if e.Event.Status == http.StatusOK {
cost, err := h.e.EstimateImagesCost(string(gir.Model), string(gir.Quality), string(gir.Size))
cost, err := h.e.EstimateImagesCost(string(gir.Model), string(gir.Quality), string(gir.Size), e.ImageResponseMetadata)
if err != nil {
telemetry.Incr("bricksllm.message.handler.decorate_event.estimate_completion_cost_error", nil, 1)
return err
Expand All @@ -445,7 +446,7 @@ func (h *Handler) decorateEvent(m Message) error {
return errors.New("event request data cannot be parsed as openai image edit request")
}
if e.Event.Status == http.StatusOK {
cost, err := h.e.EstimateImagesCost(string(eir.Model), "", string(eir.Size))
cost, err := h.e.EstimateImagesCost(string(eir.Model), "", string(eir.Size), e.ImageResponseMetadata)
if err != nil {
telemetry.Incr("bricksllm.message.handler.decorate_event.estimate_completion_cost_error", nil, 1)
return err
Expand All @@ -462,7 +463,7 @@ func (h *Handler) decorateEvent(m Message) error {
return errors.New("event request data cannot be parsed as openai image variation request")
}
if e.Event.Status == http.StatusOK {
cost, err := h.e.EstimateImagesCost(string(vir.Model), "", string(vir.Size))
cost, err := h.e.EstimateImagesCost(string(vir.Model), "", string(vir.Size), e.ImageResponseMetadata)
if err != nil {
telemetry.Incr("bricksllm.message.handler.decorate_event.estimate_completion_cost_error", nil, 1)
return err
Expand Down
156 changes: 155 additions & 1 deletion internal/provider/openai/cost.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,11 @@ func parseFinetuneModel(model string) string {

var OpenAiPerThousandTokenCost = map[string]map[string]float64{
"prompt": {
"gpt-image-1.5": 0.005,
"gpt-image-1": 0.005,
"chatgpt-image-latest": 0.005,
"gpt-image-1-mini": 0.002,

"gpt-5.2-chat-latest": 0.001750,
"gpt-5.1-chat-latest": 0.001250,
"gpt-5.1-codex-max": 0.001250,
Expand Down Expand Up @@ -95,6 +100,11 @@ var OpenAiPerThousandTokenCost = map[string]map[string]float64{
"babbage-002": 0.000400,
},
"cached-prompt": {
"gpt-image-1.5": 0.00125,
"gpt-image-1": 0.00125,
"chatgpt-image-latest": 0.00125,
"gpt-image-1-mini": 0.0002,

"gpt-5.2-chat-latest": 0.000175,
"gpt-5.1-chat-latest": 0.000125,
"gpt-5.1-codex-max": 0.000125,
Expand Down Expand Up @@ -143,6 +153,9 @@ var OpenAiPerThousandTokenCost = map[string]map[string]float64{
"tts-1-hd": 0.03,
},
"completion": {
"gpt-image-1.5": 0.010,
"chatgpt-image-latest": 0.010,

"gpt-5.2-chat-latest": 0.014000,
"gpt-5.1-chat-latest": 0.010000,
"gpt-5.1-codex-max": 0.010000,
Expand Down Expand Up @@ -212,9 +225,63 @@ var OpenAiPerThousandTokenCost = map[string]map[string]float64{
"dall-e-3-1792-standart": 0.08,
"dall-e-3-1024-hd": 0.08,
"dall-e-3-1792-hd": 0.12,

"gpt-image-1.5-1536-high": 0.2,
"gpt-image-1.5-1536-medium": 0.05,
"gpt-image-1.5-1536-low": 0.013,
"gpt-image-1.5-1024-high": 0.133,
"gpt-image-1.5-1024-medium": 0.034,
"gpt-image-1.5-1024-low": 0.009,

"chatgpt-image-latest-1536-high": 0.2,
"chatgpt-image-latest-1536-medium": 0.05,
"chatgpt-image-latest-1536-low": 0.013,
"chatgpt-image-latest-1024-high": 0.133,
"chatgpt-image-latest-1024-medium": 0.034,
"chatgpt-image-latest-1024-low": 0.009,

"gpt-image-1-1536-high": 0.25,
"gpt-image-1-1536-medium": 0.063,
"gpt-image-1-1536-low": 0.016,
"gpt-image-1-1024-high": 0.167,
"gpt-image-1-1024-medium": 0.042,
"gpt-image-1-1024-low": 0.011,

"gpt-image-1-mini-1536-high": 0.052,
"gpt-image-1-mini-1536-medium": 0.015,
"gpt-image-1-mini-1536-low": 0.006,
"gpt-image-1-mini-1024-high": 0.036,
"gpt-image-1-mini-1024-medium": 0.011,
"gpt-image-1-mini-1024-low": 0.005,
},
"images-tokens-input": {
"gpt-image-1.5": 0.008,
"gpt-image-1": 0.010,
"chatgpt-image-latest": 0.008,
"gpt-image-1-mini": 0.0025,
},
"images-tokens-cached-input": {
"gpt-image-1.5": 0.002,
"gpt-image-1": 0.0025,
"chatgpt-image-latest": 0.002,
"gpt-image-1-mini": 0.00025,
},
"images-tokens-output": {
"gpt-image-1.5": 0.032,
"gpt-image-1": 0.040,
"chatgpt-image-latest": 0.032,
"gpt-image-1-mini": 0.008,
},
}

var imageModelsWithTokensCost = map[string]interface{}{}

func init() {
for model := range OpenAiPerThousandTokenCost["images-tokens-input"] {
imageModelsWithTokensCost[model] = struct{}{}
}
}

var OpenAiPerThousandCallsToolCost = map[string]float64{
"web_search": 10.0,
"web_search_preview": 25.0,
Expand Down Expand Up @@ -402,7 +469,47 @@ func (ce *CostEstimator) EstimateCompletionsStreamCostWithTokenCounts(model stri
return tks, cost, nil
}

func (ce *CostEstimator) EstimateImagesCost(model, quality, resolution string) (float64, error) {
func (ce *CostEstimator) estimateImageByMetadata(model string, metadata *ImageResponseMetadata) (float64, error) {
if metadata == nil {
return 0, errors.New("metadata is nil")
}
if _, ok := imageModelsWithTokensCost[model]; !ok {
return 0, errors.New("model is not present in the images tokens cost map")
}
var totalCost float64

textInputTokens := metadata.Usage.InputTokensDetails.TextTokens
textInputCostMap, ok := ce.tokenCostMap["prompt"]
if !ok {
return 0, errors.New("images input tokens cost map is not provided")
}
textInputCost, _ := textInputCostMap[model]
totalCost += (float64(textInputTokens) / 1000) * textInputCost

imageInputTokens := metadata.Usage.InputTokensDetails.ImageTokens
imageInputCostMap, ok := ce.tokenCostMap["images-tokens-input"]
if !ok {
return 0, errors.New("images input tokens cost map is not provided")
}
imageInputCost, _ := imageInputCostMap[model]
totalCost += (float64(imageInputTokens) / 1000) * imageInputCost

outputTokens := metadata.Usage.OutputTokens
imageOutputCostMap, ok := ce.tokenCostMap["images-tokens-output"]
if !ok {
return 0, errors.New("images output tokens cost map is not provided")
}
imageOutputCost, _ := imageOutputCostMap[model]
totalCost += (float64(outputTokens) / 1000) * imageOutputCost

return totalCost, nil
}

func (ce *CostEstimator) EstimateImagesCost(model, quality, resolution string, metadata *ImageResponseMetadata) (float64, error) {
mCost, err := ce.estimateImageByMetadata(model, metadata)
if err == nil {
return mCost, nil
}
simpleRes, err := convertResToSimple(resolution)
if err != nil {
return 0, err
Expand All @@ -419,6 +526,11 @@ func (ce *CostEstimator) EstimateImagesCost(model, quality, resolution string) (
if err != nil {
return 0, err
}
case "gpt-image-1", "gpt-image-1.5", "chatgpt-image-latest", "gpt-image-1-mini":
normalizedModel, err = prepareGptImageModel(quality, simpleRes, model)
if err != nil {
return 0, err
}
default:
return 0, errors.New("model is not present in the images cost map")
}
Expand All @@ -445,6 +557,9 @@ func convertResToSimple(resolution string) (string, error) {
if strings.Contains(resolution, "1792") {
return "1792", nil
}
if strings.Contains(resolution, "1536") {
return "1536", nil
}
if strings.Contains(resolution, "1024") {
return "1024", nil
}
Expand Down Expand Up @@ -494,6 +609,45 @@ func prepareDallE3Quality(quality string) (string, error) {
return quality, nil
}

var allowedGptImageResolutions = []string{"1024", "1536", "auto"}
var allowedGptImageQualities = []string{"low", "medium", "high", "auto"}

func prepareGptImageModel(quality, resolution, model string) (string, error) {
preparedQuality, err := prepareGptImageQuality(quality)
if err != nil {
return "", err
}
simpleRes, err := convertResToSimple(resolution)
if err != nil {
return "", err
}
preparedResolution, err := prepareGptImageResolution(simpleRes)
if err != nil {
return "", err
}
return fmt.Sprintf("%s-%s-%s", model, preparedResolution, preparedQuality), nil
}

func prepareGptImageResolution(resolution string) (string, error) {
if resolution != "" && !slices.Contains(allowedGptImageResolutions, resolution) {
return "", errors.New("resolution is not valid")
}
if resolution == "" || resolution == "auto" {
return "1536", nil
}
return resolution, nil
}

func prepareGptImageQuality(quality string) (string, error) {
if quality != "" && !slices.Contains(allowedGptImageQualities, quality) {
return "", errors.New("quality is not valid")
}
if quality == "" || quality == "auto" {
return "high", nil
}
return quality, nil
}

func (ce *CostEstimator) EstimateTranscriptionCost(secs float64, model string) (float64, error) {
costMap, ok := ce.tokenCostMap["audio"]
if !ok {
Expand Down
17 changes: 17 additions & 0 deletions internal/provider/openai/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,3 +72,20 @@ func (u *ResponseRequestToolUnion) GetContainerAsResponseRequestToolContainer()
}
return nil
}

type ImageResponseUsage struct {
TotalTokens int `json:"total_tokens,omitempty"`
InputTokens int `json:"input_tokens,omitempty"`
OutputTokens int `json:"output_tokens,omitempty"`
InputTokensDetails ImageResponseInputTokensDetails `json:"input_tokens_details,omitempty"`
}

type ImageResponseInputTokensDetails struct {
TextTokens int `json:"text_tokens,omitempty"`
ImageTokens int `json:"image_tokens,omitempty"`
}
type ImageResponseMetadata struct {
Quality string `json:"quality,omitempty"`
Size string `json:"size,omitempty"`
Usage ImageResponseUsage `json:"usage,omitempty"`
}
33 changes: 33 additions & 0 deletions internal/server/web/proxy/image.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ package proxy
import (
"encoding/json"

"github.com/bricks-cloud/bricksllm/internal/provider/openai"
"github.com/gin-gonic/gin"
goopenai "github.com/sashabaranov/go-openai"
"go.uber.org/zap"
"go.uber.org/zap/zapcore"
Expand Down Expand Up @@ -108,3 +110,34 @@ func logImageResponse(log *zap.Logger, data []byte, prod, private bool) {
log.Info("openai image response", fields...)
}
}

func imageResponseMetadataFromBytes(log *zap.Logger, data []byte, prod bool) *openai.ImageResponseMetadata {
ir := &openai.ImageResponseMetadata{}
err := json.Unmarshal(data, ir)
if err != nil {
logError(log, "error when unmarshalling image response metadata", prod, err)
return nil
}
return ir
}

const imageResponseMetadataKey = "image_response_metadata"

func setCtxImageResponseMetadata(ctx *gin.Context, imageResponse *openai.ImageResponseMetadata) {
if imageResponse == nil {
return
}
ctx.Set(imageResponseMetadataKey, imageResponse.Usage)
}

func getCtxImageResponseMetadata(ctx *gin.Context) *openai.ImageResponseMetadata {
usage, exists := ctx.Get(imageResponseMetadataKey)
if !exists {
return nil
}
iru, ok := usage.(openai.ImageResponseMetadata)
if !ok {
return nil
}
return &iru
}
5 changes: 5 additions & 0 deletions internal/server/web/proxy/middleware.go
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,11 @@ func getMiddleware(cpm CustomProvidersManager, rm routeManager, pm PoliciesManag
enrichedEvent.Response = resp
}

imageResponseMetadata := getCtxImageResponseMetadata(c)
if imageResponseMetadata != nil {
enrichedEvent.ImageResponseMetadata = imageResponseMetadata
}

pub.Publish(message.Message{
Type: "event",
Data: enrichedEvent,
Expand Down
6 changes: 6 additions & 0 deletions internal/server/web/proxy/proxy.go
Original file line number Diff line number Diff line change
Expand Up @@ -685,14 +685,20 @@ func getPassThroughHandler(prod, private bool, client http.Client) gin.HandlerFu
}

if c.FullPath() == "/api/providers/openai/v1/images/generations" && c.Request.Method == http.MethodPost {
metadata := imageResponseMetadataFromBytes(log, bytes, prod)
setCtxImageResponseMetadata(c, metadata)
logImageResponse(log, bytes, prod, private)
}

if c.FullPath() == "/api/providers/openai/v1/images/edits" && c.Request.Method == http.MethodPost {
metadata := imageResponseMetadataFromBytes(log, bytes, prod)
setCtxImageResponseMetadata(c, metadata)
logImageResponse(log, bytes, prod, private)
}

if c.FullPath() == "/api/providers/openai/v1/images/variations" && c.Request.Method == http.MethodPost {
metadata := imageResponseMetadataFromBytes(log, bytes, prod)
setCtxImageResponseMetadata(c, metadata)
logImageResponse(log, bytes, prod, private)
}
}
Expand Down
Loading