diff --git a/assistant.go b/assistant.go
index 342e855..4b4ebdc 100644
--- a/assistant.go
+++ b/assistant.go
@@ -95,7 +95,7 @@ type assistantStreamedResponseCb func(text string, finished bool) error
 
 func assistantStreamedResponse(request openai.ChatRequest, cb assistantStreamedResponseCb) error {
 	logger.Debugw("Openai chat request", "req", request)
-	ai := openai.NewClient(config.OpenAIApiKey)
+	ai := openai.NewClientWithBaseUrl(config.OpenAIApiKey, config.OpenAIApiBaseURL)
 
 	var (
 		resp *openai.ChatResponseStream
@@ -234,23 +234,24 @@ func handleAssistantConversation(c tele.Context, thread []*tele.Message) error {
 	}
 
 	req := openai.ChatRequest{
-		Model:       openai.ModelGpt5,
-		Messages:    chatReqMsgs,
-		Temperature: nil, // lo.ToPtr(0.42),
-		User:        assistantHashUserId(lastMsg.Sender.ID),
+		Model:           openai.ModelGpt5Online,
+		Messages:        chatReqMsgs,
+		ReasoningEffort: openai.ReasoningEffortMedium,
+		Temperature:     nil, // lo.ToPtr(0.42),
+		User:            assistantHashUserId(lastMsg.Sender.ID),
 	}
 
-	typingNotifyCh := setTyping(c)
+	// typingNotifyCh := setTyping(c)
 
-	var replyMsg *tele.Message
+	replyMsg, err := c.Bot().Reply(lastMsg, reasoningIndicatorMessage, tele.Silent)
+	if err != nil {
+		logger.Errorw("assistant: failed to complete reasoning request", "error", err)
+		return c.Reply("Sorry, there's a technical issue. 😵💫 Please try again later.", tele.Silent)
+	}
 	reqErr := assistantStreamedResponse(req, func(text string, finished bool) error {
 		var err error
-		if replyMsg == nil {
-			<-typingNotifyCh
-			replyMsg, err = c.Bot().Reply(lastMsg, text, tele.Silent)
-		} else {
-			replyMsg, err = c.Bot().Edit(replyMsg, text)
-		}
+		replyMsg, err = c.Bot().Edit(replyMsg, text)
+
 		if finished && err == nil {
 			replyMsg.ReplyTo = lastMsg // nasty bug
 			if err := cacheMessage(replyMsg); err != nil {
diff --git a/cfg.go b/cfg.go
index ce9efca..dc03e14 100644
--- a/cfg.go
+++ b/cfg.go
@@ -24,7 +24,8 @@ type ConfigDef struct {
 	MonthlyTrafficLimitGiB int    `env:"TG_MONTHLY_TRAFFIC_LIMIT_GIB" env-default:"1000"`
 
 	// AI
-	OpenAIApiKey string `env:"TG_OPENAI_API_KEY"`
+	OpenAIApiKey     string `env:"TG_OPENAI_API_KEY"`
+	OpenAIApiBaseURL string `env:"TG_OPENAI_API_BASE_URL"`
 
 	// Parsed fields
 	adminUidLookup map[int64]struct{}
diff --git a/msgcache.go b/msgcache.go
index 6e64aec..e9a86eb 100644
--- a/msgcache.go
+++ b/msgcache.go
@@ -5,7 +5,7 @@ import (
 	"fmt"
 
 	"git.gensokyo.cafe/kkyy/tgbot_misaka_5882f7/utils"
-	"github.com/dgraph-io/ristretto"
+	"github.com/dgraph-io/ristretto/v2"
 	"github.com/eko/gocache/lib/v4/cache"
 	gocache_lib "github.com/eko/gocache/lib/v4/store"
 	ristretto_store "github.com/eko/gocache/store/ristretto/v4"
@@ -19,7 +19,7 @@ var (
 )
 
 func initMsgCache() error {
-	ristrettoCache, err := ristretto.NewCache(&ristretto.Config{
+	ristrettoCache, err := ristretto.NewCache(&ristretto.Config[string, tele.Message]{
 		NumCounters: 100_000,
 		MaxCost:     20 << 20, // 20 MiB
 		BufferItems: 64,
diff --git a/openai/chat.go b/openai/chat.go
index 605f43c..f3f0333 100644
--- a/openai/chat.go
+++ b/openai/chat.go
@@ -38,7 +38,7 @@ type ChatRequest struct {
 	PresencePenalty     *float64           `json:"presence_penalty,omitempty"`      // Number between -2.0 and 2.0.
 	FrequencyPenalty    *float64           `json:"frequency_penalty,omitempty"`     // Number between -2.0 and 2.0.
 	LogitBias           map[string]float64 `json:"logit_bias,omitempty"`            // Modify the likelihood of specified tokens appearing in the completion.
-	User                string             `json:"user,omitempty"`                  // A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse.
+	User                string             `json:"user,omitempty"`                  // Deprecated: A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse.
 	ReasoningEffort     ReasoningEffort    `json:"reasoning_effort,omitempty"`      // Constrains effort on reasoning for reasoning models.
 }
 
diff --git a/openai/client.go b/openai/client.go
index 68eea1d..9e1d53a 100644
--- a/openai/client.go
+++ b/openai/client.go
@@ -29,6 +29,14 @@ func NewClient(apiKey string) *Client {
 	return &Client{rest: cli}
 }
 
+func NewClientWithBaseUrl(apiKey string, baseURL string) *Client {
+	cli := NewClient(apiKey)
+	if baseURL != "" {
+		cli.rest.SetBaseURL(baseURL)
+	}
+	return cli
+}
+
 func (c *Client) ChatCompletion(request ChatRequest) (*ChatResponse, error) {
 	// Note: this function might not work due to the header timeout set on the http client.
 	// We should probably not use this anyway.
diff --git a/openai/models.go b/openai/models.go
index 6555536..f575491 100644
--- a/openai/models.go
+++ b/openai/models.go
@@ -1,16 +1,8 @@
 package openai
 
 const (
-	ModelGpt5   = "gpt-5"   // OpenAI's Flagship model for general use
-	ModelO3     = "o3"      // OpenAI's Flagship reasoning model for daily use
-	ModelO4Mini = "o4-mini" // OpenAI's faster reasoning model
-
-	// Deprecated: obsolete model
-	ModelGpt41 = "gpt-4.1"
-
-	// Deprecated: obsolete model
-	ModelO1 = "o1" // Expensive reasoning model
-
-	// Deprecated: obsolete model
-	ModelGpt4O = "gpt-4o" // The safe default, balanced model.
+	ModelGpt5       = "gpt-5"        // OpenAI's Flagship model for general use
+	ModelGpt5Online = "gpt-5:online" // OpenAI's Flagship model for general use (via OpenRouter with web search tool)
+	ModelO3         = "o3"           // OpenAI's Flagship reasoning model for daily use
+	ModelO4Mini     = "o4-mini"      // OpenAI's faster reasoning model
 )
diff --git a/openai/prompts/prompts.go b/openai/prompts/prompts.go
index 019e1a4..0da1c58 100644
--- a/openai/prompts/prompts.go
+++ b/openai/prompts/prompts.go
@@ -11,6 +11,8 @@ func Assistant() string {
 		"Misaka likes to use many cheerful emojis in chat 😝🥹, but she avoids using any in serious contexts, such as when providing technical solutions.",
 		"Most importantly, Misaka is a helpful assistant.",
 		"",
+		"Despite wanting to talk more, Misaka has to avoid making a single response too long, since Telegram has message length limits (4,096 characters—maybe around 700 English words or 1,000 CJK characters).",
+		"",
 		"Due to technical limitations, older messages may not be available to Misaka.",
 		"",
 		"We are currently in the second half of 2025.",