diff --git a/src/memos/llms/openai.py b/src/memos/llms/openai.py index ea488329d..a4e3308c6 100644 --- a/src/memos/llms/openai.py +++ b/src/memos/llms/openai.py @@ -1,4 +1,5 @@ import json +import time from collections.abc import Generator @@ -89,10 +90,29 @@ def generate_stream(self, messages: MessageList, **kwargs) -> Generator[str, Non response = self.client.chat.completions.create(**request_body) reasoning_started = False + first_token_time = None + start_time = time.perf_counter() for chunk in response: delta = chunk.choices[0].delta + # Calculate TTFT on first token + if first_token_time is None: + first_token_time = time.perf_counter() + ttft_ms = (first_token_time - start_time) * 1000.0 + + # 尝试从响应中获取实际模型信息 + actual_model = getattr(chunk, "model", None) or self.config.model_name_or_path + requested_model = self.config.model_name_or_path + + # Print TTFT info - 显示请求模型和实际模型(如果不一致) + if actual_model != requested_model: + logger.info( + f"TTFT: {ttft_ms:.2f}ms | Requested: {requested_model} | Actual: {actual_model}" + ) + else: + logger.info(f"TTFT: {ttft_ms:.2f}ms | {requested_model}") + # Support for custom 'reasoning_content' (if present in OpenAI-compatible models like Qwen, DeepSeek) if hasattr(delta, "reasoning_content") and delta.reasoning_content: if not reasoning_started and not self.config.remove_think_prefix: diff --git a/src/memos/utils.py b/src/memos/utils.py index b57967db0..c640e273d 100644 --- a/src/memos/utils.py +++ b/src/memos/utils.py @@ -23,6 +23,7 @@ def timed_with_status( - log_extra_args: - can be a dict: fixed contextual fields that are always attached to logs; - or a callable: like `fn(*args, **kwargs) -> dict`, used to dynamically generate contextual fields at runtime. + - target_models: list of target models to filter performance tracking """ if isinstance(log_args, str): @@ -51,6 +52,7 @@ def wrapper(*args, **kwargs): return result finally: elapsed_ms = (time.perf_counter() - start) * 1000.0 + total_time = elapsed_ms / 1000.0 # Convert to seconds ctx_parts = [] # 1) Collect parameters from kwargs by name @@ -80,12 +82,32 @@ def wrapper(*args, **kwargs): if not success_flag and exc_type is not None: status_info += f", error: {exc_type.__name__}" - msg = ( - f"[TIMER_WITH_STATUS] {log_prefix or fn.__name__} " - f"took {elapsed_ms:.0f} ms{status_info}, args: {ctx_str}" - ) - - logger.info(msg) + # Enhanced logging with content metrics for LLM calls + if success_flag and result: + # Calculate content metrics + content_length = len(result) if isinstance(result, str) else 0 + speed = content_length / total_time if total_time > 0 else 0 + + # Get model name from self.config + self = args[0] if args else None + model_name = ( + getattr(self, "config", {}).get("model_name_or_path", "unknown") + if self + else "unknown" + ) + + # Console output for target models with performance metrics + logger.info( + f"{model_name}: {log_prefix or fn.__name__} finished | Total time: {total_time:.2f}s | Speed: {speed:.2f} chars/s | Length: {content_length}" + ) + elif ( + success_flag + ): # Standard logging for non-target models or when not tracking metrics + msg = ( + f"[TIMER_WITH_STATUS] {log_prefix or fn.__name__} " + f"took {elapsed_ms:.0f} ms{status_info}, args: {ctx_str}" + ) + logger.info(msg) return wrapper