diff --git a/assistant.go b/assistant.go
index 33e7bd0..081e5bc 100644
--- a/assistant.go
+++ b/assistant.go
@@ -193,8 +193,8 @@ func handleAssistantConversation(c tele.Context, thread []*tele.Message) error {
 		logger.Warnw("failed to cache message", "error", err)
 	}
 
-	nBytes := 0                   // Used to estimated number of tokens. For now we treat 3 bytes as 1 token.
-	nBytesMax := (4096 - 512) * 3 // Leave some space for the response
+	nBytes := 0            // Used to estimated number of tokens. For now we treat 3 bytes as 1 token.
+	nBytesMax := 16384 * 3 // Leave some space for the response
 
 	sysMsg := prompts.Assistant()
 	chatReqMsgs := []openai.ChatMessage{
@@ -237,7 +237,6 @@ func handleAssistantConversation(c tele.Context, thread []*tele.Message) error {
 		Model:       openai.ModelGpt4O,
 		Messages:    chatReqMsgs,
 		Temperature: lo.ToPtr(0.42),
-		MaxTokens:   2048,
 		User:        assistantHashUserId(lastMsg.Sender.ID),
 	}
 
diff --git a/openai/chat.go b/openai/chat.go
index 3564ed6..605f43c 100644
--- a/openai/chat.go
+++ b/openai/chat.go
@@ -6,28 +6,40 @@ type ChatRole string
 
 const (
 	ChatRoleSystem    ChatRole = "system"
+	ChatRoleDeveloper ChatRole = "developer" // replaces `system` role for o1 and newer models
+	ChatRoleTool      ChatRole = "tool"
 	ChatRoleAssistant ChatRole = "assistant"
 	ChatRoleUser      ChatRole = "user"
 )
 
+type ReasoningEffort string
+
+const (
+	ReasoningEffortLow    ReasoningEffort = "low"
+	ReasoningEffortMedium ReasoningEffort = "medium"
+	ReasoningEffortHigh   ReasoningEffort = "high"
+)
+
 type ChatMessage struct {
 	Role    ChatRole `json:"role"`
 	Content string   `json:"content"`
 }
 
 type ChatRequest struct {
-	Model            string             `json:"model"`
-	Messages         []ChatMessage      `json:"messages"`
-	Temperature      *float64           `json:"temperature,omitempty"` // What sampling temperature to use, between 0 and 2.
-	TopP             *float64           `json:"top_p,omitempty"`       // Nucleus sampling. Specify this or temperature but not both.
-	N                int                `json:"n,omitempty"`           // How many chat completion choices to generate for each input message.
-	Stream           bool               `json:"stream,omitempty"`      // If set, partial message deltas will be sent as data-only server-sent events as they become available.
-	Stop             []string           `json:"stop,omitempty"`        // Up to 4 sequences where the API will stop generating further tokens.
-	MaxTokens        int                `json:"max_tokens,omitempty"`
-	PresencePenalty  *float64           `json:"presence_penalty,omitempty"`  // Number between -2.0 and 2.0.
-	FrequencyPenalty *float64           `json:"frequency_penalty,omitempty"` // Number between -2.0 and 2.0.
-	LogitBias        map[string]float64 `json:"logit_bias,omitempty"`        // Modify the likelihood of specified tokens appearing in the completion.
-	User             string             `json:"user,omitempty"`              // A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse.
+	Model               string             `json:"model"`
+	Messages            []ChatMessage      `json:"messages"`
+	Temperature         *float64           `json:"temperature,omitempty"`           // What sampling temperature to use, between 0 and 2.
+	TopP                *float64           `json:"top_p,omitempty"`                 // Nucleus sampling. Specify this or temperature but not both.
+	N                   int                `json:"n,omitempty"`                     // How many chat completion choices to generate for each input message.
+	Stream              bool               `json:"stream,omitempty"`                // If set, partial message deltas will be sent as data-only server-sent events as they become available.
+	Stop                []string           `json:"stop,omitempty"`                  // Up to 4 sequences where the API will stop generating further tokens.
+	MaxTokens           int                `json:"max_tokens,omitempty"`            // Deprecated: in favor of `max_completion_tokens`
+	MaxCompletionTokens int                `json:"max_completion_tokens,omitempty"` // Including visible output tokens and reasoning tokens.
+	PresencePenalty     *float64           `json:"presence_penalty,omitempty"`      // Number between -2.0 and 2.0.
+	FrequencyPenalty    *float64           `json:"frequency_penalty,omitempty"`     // Number between -2.0 and 2.0.
+	LogitBias           map[string]float64 `json:"logit_bias,omitempty"`            // Modify the likelihood of specified tokens appearing in the completion.
+	User                string             `json:"user,omitempty"`                  // A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse.
+	ReasoningEffort     ReasoningEffort    `json:"reasoning_effort,omitempty"`      // Constrains effort on reasoning for reasoning models.
 }
 
 type ChatResponseChoice struct {
diff --git a/openai/client.go b/openai/client.go
index 1800cf9..2802647 100644
--- a/openai/client.go
+++ b/openai/client.go
@@ -20,7 +20,7 @@ func NewClient(apiKey string) *Client {
 	cli := resty.New().
 		SetTransport(&http.Transport{
 			Proxy:                 http.ProxyFromEnvironment,
-			ResponseHeaderTimeout: 10 * time.Second,
+			ResponseHeaderTimeout: 90 * time.Second,
 		}).
 		SetBaseURL("https://api.openai.com").
 		SetHeader("Authorization", "Bearer "+apiKey).
diff --git a/openai/models.go b/openai/models.go
index 7398247..0307fcc 100644
--- a/openai/models.go
+++ b/openai/models.go
@@ -2,6 +2,7 @@ package openai
 
 const (
 	ModelGpt4O     = "gpt-4o"     // Safe default
-	ModelO1Preview = "o1-preview" // Expensive
-	ModelO1Mini    = "o1-mini"
+	ModelO1Preview = "o1-preview" // Expensive reasoning model
+	ModelO1Mini    = "o1-mini"    // Cheaper reasoning model
+	ModelO3Mini    = "o3-mini"    // Cheaper yet powerful reasoning model
 )