diff --git a/assistant.go b/assistant.go index 33e7bd0..081e5bc 100644 --- a/assistant.go +++ b/assistant.go @@ -193,8 +193,8 @@ func handleAssistantConversation(c tele.Context, thread []*tele.Message) error { logger.Warnw("failed to cache message", "error", err) } - nBytes := 0 // Used to estimated number of tokens. For now we treat 3 bytes as 1 token. - nBytesMax := (4096 - 512) * 3 // Leave some space for the response + nBytes := 0 // Used to estimated number of tokens. For now we treat 3 bytes as 1 token. + nBytesMax := 16384 * 3 // Leave some space for the response sysMsg := prompts.Assistant() chatReqMsgs := []openai.ChatMessage{ @@ -237,7 +237,6 @@ func handleAssistantConversation(c tele.Context, thread []*tele.Message) error { Model: openai.ModelGpt4O, Messages: chatReqMsgs, Temperature: lo.ToPtr(0.42), - MaxTokens: 2048, User: assistantHashUserId(lastMsg.Sender.ID), } diff --git a/openai/chat.go b/openai/chat.go index 3564ed6..605f43c 100644 --- a/openai/chat.go +++ b/openai/chat.go @@ -6,28 +6,40 @@ type ChatRole string const ( ChatRoleSystem ChatRole = "system" + ChatRoleDeveloper ChatRole = "developer" // replaces `system` role for o1 and newer models + ChatRoleTool ChatRole = "tool" ChatRoleAssistant ChatRole = "assistant" ChatRoleUser ChatRole = "user" ) +type ReasoningEffort string + +const ( + ReasoningEffortLow ReasoningEffort = "low" + ReasoningEffortMedium ReasoningEffort = "medium" + ReasoningEffortHigh ReasoningEffort = "high" +) + type ChatMessage struct { Role ChatRole `json:"role"` Content string `json:"content"` } type ChatRequest struct { - Model string `json:"model"` - Messages []ChatMessage `json:"messages"` - Temperature *float64 `json:"temperature,omitempty"` // What sampling temperature to use, between 0 and 2. - TopP *float64 `json:"top_p,omitempty"` // Nucleus sampling. Specify this or temperature but not both. - N int `json:"n,omitempty"` // How many chat completion choices to generate for each input message. - Stream bool `json:"stream,omitempty"` // If set, partial message deltas will be sent as data-only server-sent events as they become available. - Stop []string `json:"stop,omitempty"` // Up to 4 sequences where the API will stop generating further tokens. - MaxTokens int `json:"max_tokens,omitempty"` - PresencePenalty *float64 `json:"presence_penalty,omitempty"` // Number between -2.0 and 2.0. - FrequencyPenalty *float64 `json:"frequency_penalty,omitempty"` // Number between -2.0 and 2.0. - LogitBias map[string]float64 `json:"logit_bias,omitempty"` // Modify the likelihood of specified tokens appearing in the completion. - User string `json:"user,omitempty"` // A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. + Model string `json:"model"` + Messages []ChatMessage `json:"messages"` + Temperature *float64 `json:"temperature,omitempty"` // What sampling temperature to use, between 0 and 2. + TopP *float64 `json:"top_p,omitempty"` // Nucleus sampling. Specify this or temperature but not both. + N int `json:"n,omitempty"` // How many chat completion choices to generate for each input message. + Stream bool `json:"stream,omitempty"` // If set, partial message deltas will be sent as data-only server-sent events as they become available. + Stop []string `json:"stop,omitempty"` // Up to 4 sequences where the API will stop generating further tokens. + MaxTokens int `json:"max_tokens,omitempty"` // Deprecated: in favor of `max_completion_tokens` + MaxCompletionTokens int `json:"max_completion_tokens,omitempty"` // Including visible output tokens and reasoning tokens. + PresencePenalty *float64 `json:"presence_penalty,omitempty"` // Number between -2.0 and 2.0. + FrequencyPenalty *float64 `json:"frequency_penalty,omitempty"` // Number between -2.0 and 2.0. + LogitBias map[string]float64 `json:"logit_bias,omitempty"` // Modify the likelihood of specified tokens appearing in the completion. + User string `json:"user,omitempty"` // A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. + ReasoningEffort ReasoningEffort `json:"reasoning_effort,omitempty"` // Constrains effort on reasoning for reasoning models. } type ChatResponseChoice struct { diff --git a/openai/client.go b/openai/client.go index 1800cf9..2802647 100644 --- a/openai/client.go +++ b/openai/client.go @@ -20,7 +20,7 @@ func NewClient(apiKey string) *Client { cli := resty.New(). SetTransport(&http.Transport{ Proxy: http.ProxyFromEnvironment, - ResponseHeaderTimeout: 10 * time.Second, + ResponseHeaderTimeout: 90 * time.Second, }). SetBaseURL("https://api.openai.com"). SetHeader("Authorization", "Bearer "+apiKey). diff --git a/openai/models.go b/openai/models.go index 7398247..0307fcc 100644 --- a/openai/models.go +++ b/openai/models.go @@ -2,6 +2,7 @@ package openai const ( ModelGpt4O = "gpt-4o" // Safe default - ModelO1Preview = "o1-preview" // Expensive - ModelO1Mini = "o1-mini" + ModelO1Preview = "o1-preview" // Expensive reasoning model + ModelO1Mini = "o1-mini" // Cheaper reasoning model + ModelO3Mini = "o3-mini" // Cheaper yet powerful reasoning model )