-
Notifications
You must be signed in to change notification settings - Fork 104
add new flag to skip hints with the current goal in the hint source t… #310
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -13,6 +13,7 @@ | |
| import requests | ||
| from agentlab.llm.chat_api import ChatModel | ||
| import re | ||
| import json | ||
| from agentlab.llm.response_api import APIPayload | ||
|
|
||
| logger = logging.getLogger(__name__) | ||
|
|
@@ -25,6 +26,7 @@ def __init__( | |
| hint_db_path: str, | ||
| hint_retrieval_mode: Literal["direct", "llm", "emb"] = "direct", | ||
| skip_hints_for_current_task: bool = False, | ||
| skip_hints_for_current_goal: bool = False, | ||
| top_n: int = 4, | ||
| embedder_model: str = "Qwen/Qwen3-Embedding-0.6B", | ||
| embedder_server: str = "http://localhost:5000", | ||
|
|
@@ -36,6 +38,7 @@ def __init__( | |
| self.hint_db_path = hint_db_path | ||
| self.hint_retrieval_mode = hint_retrieval_mode | ||
| self.skip_hints_for_current_task = skip_hints_for_current_task | ||
| self.skip_hints_for_current_goal = skip_hints_for_current_goal | ||
| self.top_n = top_n | ||
| self.embedder_model = embedder_model | ||
| self.embedder_server = embedder_server | ||
|
|
@@ -45,7 +48,16 @@ def __init__( | |
| self.hint_db_path = Path(hint_db_path).as_posix() | ||
| else: | ||
| self.hint_db_path = (Path(__file__).parent / self.hint_db_path).as_posix() | ||
| self.hint_db = pd.read_csv(self.hint_db_path, header=0, index_col=None, dtype=str) | ||
| self.hint_db = pd.read_csv( | ||
| self.hint_db_path, | ||
| header=0, | ||
| index_col=None, | ||
| dtype=str, | ||
| converters={ | ||
| "trace_paths_json": lambda x: json.loads(x) if pd.notna(x) else [], | ||
| "source_trace_goals": lambda x: json.loads(x) if pd.notna(x) else [], | ||
| }, | ||
|
Comment on lines
+56
to
+59
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Eager JSON parsing during CSV load
Tell me moreWhat is the issue?JSON parsing is performed for every row during CSV loading, even for columns that may not be used frequently. Why this mattersThis eager parsing approach increases memory usage and loading time, especially for large CSV files. If these JSON fields are only accessed occasionally, the parsing overhead is wasted for unused data. Suggested change ∙ Feature PreviewConsider lazy parsing by keeping JSON fields as strings initially and parsing them on-demand: # Load without converters initially
self.hint_db = pd.read_csv(self.hint_db_path, header=0, index_col=None, dtype=str)
# Parse JSON only when needed
def _get_source_trace_goals(self, row_index):
goals_str = self.hint_db.loc[row_index, 'source_trace_goals']
return json.loads(goals_str) if pd.notna(goals_str) else []Provide feedback to improve future suggestions💬 Looking for more details? Reply to this comment to chat with Korbit. |
||
| ) | ||
| logger.info(f"Loaded {len(self.hint_db)} hints from database {self.hint_db_path}") | ||
| if self.hint_retrieval_mode == "emb": | ||
| self.load_hint_vectors() | ||
|
|
@@ -84,7 +96,9 @@ def choose_hints_llm(self, llm, goal: str, task_name: str) -> list[str]: | |
| topic_to_hints = defaultdict(list) | ||
| skip_hints = [] | ||
| if self.skip_hints_for_current_task: | ||
| skip_hints = self.get_current_task_hints(task_name) | ||
| skip_hints += self.get_current_task_hints(task_name) | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's a minor issue but if skip_hints_for_current_task is True, there is no need to check for skip_hints_for_current_goal, right? |
||
| if self.skip_hints_for_current_goal: | ||
| skip_hints += self.get_current_goal_hints(goal) | ||
| for _, row in self.hint_db.iterrows(): | ||
| hint = row["hint"] | ||
| if hint in skip_hints: | ||
|
|
@@ -128,7 +142,9 @@ def choose_hints_emb(self, goal: str, task_name: str) -> list[str]: | |
| all_hints = self.uniq_hints["hint"].tolist() | ||
| skip_hints = [] | ||
| if self.skip_hints_for_current_task: | ||
| skip_hints = self.get_current_task_hints(task_name) | ||
| skip_hints += self.get_current_task_hints(task_name) | ||
| if self.skip_hints_for_current_goal: | ||
| skip_hints += self.get_current_goal_hints(goal) | ||
| hint_embeddings = [] | ||
| id_to_hint = {} | ||
| for hint, emb in zip(all_hints, self.hint_embeddings): | ||
|
|
@@ -199,3 +215,7 @@ def get_current_task_hints(self, task_name): | |
| self.hint_db["task_name"].apply(lambda x: fnmatch.fnmatch(x, task_name)) | ||
| ] | ||
| return hints_df["hint"].tolist() | ||
|
|
||
| def get_current_goal_hints(self, goal_str: str): | ||
| mask = self.hint_db["source_trace_goals"].apply(lambda goals: goal_str in goals) | ||
| return self.hint_db.loc[mask, "hint"].tolist() | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Missing error handling for JSON parsing in CSV converters
Tell me more
What is the issue?
The JSON parsing converter does not handle potential JSON parsing errors that could occur with malformed JSON data in the CSV file.
Why this matters
If the CSV contains invalid JSON in the source_trace_goals column, json.loads() will raise a JSONDecodeError, causing the entire hint database loading to fail and breaking the application startup.
Suggested change ∙ Feature Preview
Add try-except blocks to handle JSON parsing errors gracefully:
Better solution:
Provide feedback to improve future suggestions
💬 Looking for more details? Reply to this comment to chat with Korbit.