MemTensor · 369pro · Dec 25, 2025 · Dec 25, 2025 · Copilot · Dec 25, 2025
diff --git a/src/memos/llms/openai.py b/src/memos/llms/openai.py
@@ -1,4 +1,5 @@
 import json
+import time
 
 from collections.abc import Generator
 
@@ -89,10 +90,29 @@ def generate_stream(self, messages: MessageList, **kwargs) -> Generator[str, Non
         response = self.client.chat.completions.create(**request_body)
 
         reasoning_started = False
+        first_token_time = None
+        start_time = time.perf_counter()
 
         for chunk in response:
             delta = chunk.choices[0].delta
 
+            # Calculate TTFT on first token
+            if first_token_time is None:
+                first_token_time = time.perf_counter()
+                ttft_ms = (first_token_time - start_time) * 1000.0
+
+                # 尝试从响应中获取实际模型信息
+                actual_model = getattr(chunk, "model", None) or self.config.model_name_or_path
+                requested_model = self.config.model_name_or_path
+
+                # Print TTFT info - 显示请求模型和实际模型(如果不一致)
-                # 尝试从响应中获取实际模型信息
-                actual_model = getattr(chunk, "model", None) or self.config.model_name_or_path
-                requested_model = self.config.model_name_or_path
-
-                # Print TTFT info - 显示请求模型和实际模型(如果不一致)
+                # Try to get the actual model information from the response
+                actual_model = getattr(chunk, "model", None) or self.config.model_name_or_path
+                requested_model = self.config.model_name_or_path
+
+                # Print TTFT info - show requested model and actual model (if they differ)
-                # 尝试从响应中获取实际模型信息
-                actual_model = getattr(chunk, "model", None) or self.config.model_name_or_path
-                requested_model = self.config.model_name_or_path
-
-                # Print TTFT info - 显示请求模型和实际模型(如果不一致)
+                # Try to obtain the actual model information from the response
+                actual_model = getattr(chunk, "model", None) or self.config.model_name_or_path
+                requested_model = self.config.model_name_or_path
+
+                # Print TTFT info - show requested model and actual model (if they differ)
-                # 尝试从响应中获取实际模型信息
-                actual_model = getattr(chunk, "model", None) or self.config.model_name_or_path
-                requested_model = self.config.model_name_or_path
-
-                # Print TTFT info - 显示请求模型和实际模型(如果不一致)
+                # Try to get the actual model information from the response
+                actual_model = getattr(chunk, "model", None) or self.config.model_name_or_path
+                requested_model = self.config.model_name_or_path
+
+                # Print TTFT info - show requested model and actual model (if they differ)
-                # 尝试从响应中获取实际模型信息
-                actual_model = getattr(chunk, "model", None) or self.config.model_name_or_path
-                requested_model = self.config.model_name_or_path
-
-                # Print TTFT info - 显示请求模型和实际模型(如果不一致)
+                # Try to obtain the actual model information from the response
+                actual_model = getattr(chunk, "model", None) or self.config.model_name_or_path
+                requested_model = self.config.model_name_or_path
+
+                # Print TTFT info - show requested model and actual model (if they differ)
+                if actual_model != requested_model:
+                    logger.info(
+                        f"TTFT: {ttft_ms:.2f}ms | Requested: {requested_model} | Actual: {actual_model}"
+                    )
+                else:
+                    logger.info(f"TTFT: {ttft_ms:.2f}ms | {requested_model}")
+
             # Support for custom 'reasoning_content' (if present in OpenAI-compatible models like Qwen, DeepSeek)
             if hasattr(delta, "reasoning_content") and delta.reasoning_content:
                 if not reasoning_started and not self.config.remove_think_prefix:

diff --git a/src/memos/utils.py b/src/memos/utils.py
@@ -23,6 +23,7 @@ def timed_with_status(
     - log_extra_args:
         - can be a dict: fixed contextual fields that are always attached to logs;
         - or a callable: like `fn(*args, **kwargs) -> dict`, used to dynamically generate contextual fields at runtime.
+    - target_models: list of target models to filter performance tracking
-    - target_models: list of target models to filter performance tracking
-    - target_models: list of target models to filter performance tracking
     """
 
     if isinstance(log_args, str):
@@ -51,6 +52,7 @@ def wrapper(*args, **kwargs):
                     return result
             finally:
                 elapsed_ms = (time.perf_counter() - start) * 1000.0
+                total_time = elapsed_ms / 1000.0  # Convert to seconds
 
                 ctx_parts = []
                 # 1) Collect parameters from kwargs by name
@@ -80,12 +82,32 @@ def wrapper(*args, **kwargs):
                 if not success_flag and exc_type is not None:
                     status_info += f", error: {exc_type.__name__}"
 
-                msg = (
-                    f"[TIMER_WITH_STATUS] {log_prefix or fn.__name__} "
-                    f"took {elapsed_ms:.0f} ms{status_info}, args: {ctx_str}"
-                )
-
-                logger.info(msg)
+                # Enhanced logging with content metrics for LLM calls
+                if success_flag and result:
+                    # Calculate content metrics
+                    content_length = len(result) if isinstance(result, str) else 0
+                    speed = content_length / total_time if total_time > 0 else 0
+
+                    # Get model name from self.config
+                    self = args[0] if args else None
+                    model_name = (
+                        getattr(self, "config", {}).get("model_name_or_path", "unknown")
+                        if self
+                        else "unknown"
+                    )
-                    # Get model name from self.config
-                    self = args[0] if args else None
-                    model_name = (
-                        getattr(self, "config", {}).get("model_name_or_path", "unknown")
-                        if self
-                        else "unknown"
-                    )
+                    # Get model name from self.config, if available and well-formed
+                    self_obj = args[0] if args else None
+                    model_name = "unknown"
+                    if self_obj is not None:
+                        config = getattr(self_obj, "config", None)
+                        if isinstance(config, dict):
+                            model_name = config.get("model_name_or_path", "unknown")
-                    # Get model name from self.config
-                    self = args[0] if args else None
-                    model_name = (
-                        getattr(self, "config", {}).get("model_name_or_path", "unknown")
-                        if self
-                        else "unknown"
-                    )
+                    # Get model name from self.config, if available and well-formed
+                    self_obj = args[0] if args else None
+                    model_name = "unknown"
+                    if self_obj is not None:
+                        config = getattr(self_obj, "config", None)
+                        if isinstance(config, dict):
+                            model_name = config.get("model_name_or_path", "unknown")
+
+                    # Console output for target models with performance metrics
+                    logger.info(
+                        f"{model_name}: {log_prefix or fn.__name__} finished | Total time: {total_time:.2f}s | Speed: {speed:.2f} chars/s | Length: {content_length}"
+                    )
+                elif (
+                    success_flag
+                ):  # Standard logging for non-target models or when not tracking metrics
+                    msg = (
+                        f"[TIMER_WITH_STATUS] {log_prefix or fn.__name__} "
+                        f"took {elapsed_ms:.0f} ms{status_info}, args: {ctx_str}"
+                    )
+                    logger.info(msg)
 
         return wrapper