Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions src/memos/llms/openai.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import json
import time

from collections.abc import Generator

Expand Down Expand Up @@ -89,10 +90,29 @@ def generate_stream(self, messages: MessageList, **kwargs) -> Generator[str, Non
response = self.client.chat.completions.create(**request_body)

reasoning_started = False
first_token_time = None
start_time = time.perf_counter()

for chunk in response:
delta = chunk.choices[0].delta

# Calculate TTFT on first token
if first_token_time is None:
first_token_time = time.perf_counter()
ttft_ms = (first_token_time - start_time) * 1000.0

# 尝试从响应中获取实际模型信息
actual_model = getattr(chunk, "model", None) or self.config.model_name_or_path
requested_model = self.config.model_name_or_path

# Print TTFT info - 显示请求模型和实际模型(如果不一致)
Comment on lines +104 to +108
Copy link

Copilot AI Dec 25, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The comment is in Chinese. For consistency with the rest of the codebase which uses English comments, this should be translated to English.

Suggested change
# 尝试从响应中获取实际模型信息
actual_model = getattr(chunk, "model", None) or self.config.model_name_or_path
requested_model = self.config.model_name_or_path
# Print TTFT info - 显示请求模型和实际模型(如果不一致)
# Try to get the actual model information from the response
actual_model = getattr(chunk, "model", None) or self.config.model_name_or_path
requested_model = self.config.model_name_or_path
# Print TTFT info - show requested model and actual model (if they differ)

Copilot uses AI. Check for mistakes.
Comment on lines +104 to +108
Copy link

Copilot AI Dec 25, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The comment is in Chinese. For consistency with the rest of the codebase which uses English comments, this should be translated to English.

Suggested change
# 尝试从响应中获取实际模型信息
actual_model = getattr(chunk, "model", None) or self.config.model_name_or_path
requested_model = self.config.model_name_or_path
# Print TTFT info - 显示请求模型和实际模型(如果不一致)
# Try to obtain the actual model information from the response
actual_model = getattr(chunk, "model", None) or self.config.model_name_or_path
requested_model = self.config.model_name_or_path
# Print TTFT info - show requested model and actual model (if they differ)

Copilot uses AI. Check for mistakes.
if actual_model != requested_model:
logger.info(
f"TTFT: {ttft_ms:.2f}ms | Requested: {requested_model} | Actual: {actual_model}"
)
else:
logger.info(f"TTFT: {ttft_ms:.2f}ms | {requested_model}")
Comment on lines +93 to +114
Copy link

Copilot AI Dec 25, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The TTFT (Time To First Token) calculation and logging happens inside the generator loop but only once when first_token_time is None. However, this timing logic doesn't account for cases where the stream might be empty or where no chunks are yielded. Additionally, since this code is inside generate_stream which is decorated with @timed_with_status, there's now redundant timing logic - the decorator already tracks total time. Consider whether TTFT should be tracked separately from the decorator's timing or if this overlaps with the decorator's functionality.

Copilot uses AI. Check for mistakes.
Comment on lines +93 to +114
Copy link

Copilot AI Dec 25, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The new TTFT timing logic and model name retrieval in generate_stream lacks test coverage. The existing test at line 46-103 in test_openai.py doesn't verify the TTFT calculation or the logging of model information. Consider adding tests to verify that TTFT is calculated correctly and that model name logging works as expected.

Copilot uses AI. Check for mistakes.

# Support for custom 'reasoning_content' (if present in OpenAI-compatible models like Qwen, DeepSeek)
if hasattr(delta, "reasoning_content") and delta.reasoning_content:
if not reasoning_started and not self.config.remove_think_prefix:
Expand Down
34 changes: 28 additions & 6 deletions src/memos/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ def timed_with_status(
- log_extra_args:
- can be a dict: fixed contextual fields that are always attached to logs;
- or a callable: like `fn(*args, **kwargs) -> dict`, used to dynamically generate contextual fields at runtime.
- target_models: list of target models to filter performance tracking
Copy link

Copilot AI Dec 25, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The documentation mentions a new parameter "target_models" but this parameter is not actually implemented in the function signature or used in the function body. Either remove this documentation line or implement the parameter.

Suggested change
- target_models: list of target models to filter performance tracking

Copilot uses AI. Check for mistakes.
"""

if isinstance(log_args, str):
Expand Down Expand Up @@ -51,6 +52,7 @@ def wrapper(*args, **kwargs):
return result
finally:
elapsed_ms = (time.perf_counter() - start) * 1000.0
total_time = elapsed_ms / 1000.0 # Convert to seconds

ctx_parts = []
# 1) Collect parameters from kwargs by name
Expand Down Expand Up @@ -80,12 +82,32 @@ def wrapper(*args, **kwargs):
if not success_flag and exc_type is not None:
status_info += f", error: {exc_type.__name__}"

msg = (
f"[TIMER_WITH_STATUS] {log_prefix or fn.__name__} "
f"took {elapsed_ms:.0f} ms{status_info}, args: {ctx_str}"
)

logger.info(msg)
# Enhanced logging with content metrics for LLM calls
if success_flag and result:
# Calculate content metrics
content_length = len(result) if isinstance(result, str) else 0
Comment on lines +86 to +88
Copy link

Copilot AI Dec 25, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The logic assumes that when success_flag is True and result is truthy, the result will be a string for length calculation. However, this function is a generic decorator that could be applied to any function. If the result is not a string (e.g., a list, dict, or custom object), this will cause a bug. Consider adding a type check before attempting to measure content_length, or document that this decorator should only be used with string-returning functions.

Copilot uses AI. Check for mistakes.
speed = content_length / total_time if total_time > 0 else 0

# Get model name from self.config
self = args[0] if args else None
model_name = (
getattr(self, "config", {}).get("model_name_or_path", "unknown")
if self
else "unknown"
)
Comment on lines +91 to +97
Copy link

Copilot AI Dec 25, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Accessing self.config using getattr on a potentially None object is fragile. If args[0] is not an object with a config attribute (or if args is empty), this will fail silently and return "unknown". This assumes all decorated functions are methods with a specific structure. Consider adding more robust type checking or documenting this requirement clearly.

Suggested change
# Get model name from self.config
self = args[0] if args else None
model_name = (
getattr(self, "config", {}).get("model_name_or_path", "unknown")
if self
else "unknown"
)
# Get model name from self.config, if available and well-formed
self_obj = args[0] if args else None
model_name = "unknown"
if self_obj is not None:
config = getattr(self_obj, "config", None)
if isinstance(config, dict):
model_name = config.get("model_name_or_path", "unknown")

Copilot uses AI. Check for mistakes.

# Console output for target models with performance metrics
logger.info(
f"{model_name}: {log_prefix or fn.__name__} finished | Total time: {total_time:.2f}s | Speed: {speed:.2f} chars/s | Length: {content_length}"
)
elif (
success_flag
): # Standard logging for non-target models or when not tracking metrics
msg = (
f"[TIMER_WITH_STATUS] {log_prefix or fn.__name__} "
f"took {elapsed_ms:.0f} ms{status_info}, args: {ctx_str}"
)
logger.info(msg)
Comment on lines +103 to +110
Copy link

Copilot AI Dec 25, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The logging condition is confusing. The branch at line 103-105 logs when success_flag is True but result is falsy (None, empty string, 0, False, etc.). This means successful executions with falsy results use the standard logging format, while successful executions with truthy results use the performance metrics format. This logic seems inconsistent - consider what should happen when a function successfully returns an empty string or None.

Copilot uses AI. Check for mistakes.

return wrapper

Expand Down