Skip to content

Commit 6dce26a

Browse files
asong56WillemJiangCopilot
authored
fix: resolve tool duplication and skill parser YAML inconsistencies (bytedance#1803) (bytedance#2107)
* Refactor tests for SKILL.md parser Updated tests for SKILL.md parser to handle quoted names and descriptions correctly. Added new tests for parsing plain and single-quoted names, and ensured multi-line descriptions are processed properly. * Implement tool name validation and deduplication Add tool name mismatch warning and deduplication logic * Refactor skill file parsing and error handling * Add tests for tool name deduplication Added tests for tool name deduplication in get_available_tools(). Ensured that duplicates are not returned, the first occurrence is kept, and warnings are logged for skipped duplicates. * Apply suggestions from code review Co-authored-by: Copilot Autofix powered by AI <[email protected]> * Update minimal config to include tools list * Update test for nonexistent skill file Ensure the test for nonexistent files checks for None. * Refactor tool loading and add skill management support Refactor tool loading logic to include skill management tools based on configuration and clean up comments. * Enhance code comments for tool loading logic Added comments to clarify the purpose of various code sections related to tool loading and configuration. * Fix assertion for duplicate tool name warning * Fix indentation issues in tools.py * Fix the lint error of test_tool_deduplication * Fix the lint error of tools.py * Fix the lint error * Fix the lint error * make format --------- Co-authored-by: Willem Jiang <[email protected]> Co-authored-by: Copilot Autofix powered by AI <[email protected]>
1 parent fc94e90 commit 6dce26a

4 files changed

Lines changed: 297 additions & 193 deletions

File tree

backend/packages/harness/deerflow/skills/parser.py

Lines changed: 36 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -2,112 +2,67 @@
22
import re
33
from pathlib import Path
44

5+
import yaml
6+
57
from .types import Skill
68

79
logger = logging.getLogger(__name__)
810

911

1012
def parse_skill_file(skill_file: Path, category: str, relative_path: Path | None = None) -> Skill | None:
11-
"""
12-
Parse a SKILL.md file and extract metadata.
13+
"""Parse a SKILL.md file and extract metadata.
1314
1415
Args:
15-
skill_file: Path to the SKILL.md file
16-
category: Category of the skill ('public' or 'custom')
16+
skill_file: Path to the SKILL.md file.
17+
category: Category of the skill ('public' or 'custom').
18+
relative_path: Relative path from the category root to the skill
19+
directory. Defaults to the skill directory name when omitted.
1720
1821
Returns:
19-
Skill object if parsing succeeds, None otherwise
22+
Skill object if parsing succeeds, None otherwise.
2023
"""
2124
if not skill_file.exists() or skill_file.name != "SKILL.md":
2225
return None
2326

2427
try:
2528
content = skill_file.read_text(encoding="utf-8")
2629

27-
# Extract YAML front matter
28-
# Pattern: ---\nkey: value\n---
30+
# Extract YAML front-matter block between leading ``---`` fences.
2931
front_matter_match = re.match(r"^---\s*\n(.*?)\n---\s*\n", content, re.DOTALL)
30-
3132
if not front_matter_match:
3233
return None
3334

34-
front_matter = front_matter_match.group(1)
35-
36-
# Parse YAML front matter with basic multiline string support
37-
metadata = {}
38-
lines = front_matter.split("\n")
39-
current_key = None
40-
current_value = []
41-
is_multiline = False
42-
multiline_style = None
43-
indent_level = None
44-
45-
for line in lines:
46-
if is_multiline:
47-
if not line.strip():
48-
current_value.append("")
49-
continue
50-
51-
current_indent = len(line) - len(line.lstrip())
52-
53-
if indent_level is None:
54-
if current_indent > 0:
55-
indent_level = current_indent
56-
current_value.append(line[indent_level:])
57-
continue
58-
elif current_indent >= indent_level:
59-
current_value.append(line[indent_level:])
60-
continue
61-
62-
# If we reach here, it's either a new key or the end of multiline
63-
if current_key and is_multiline:
64-
if multiline_style == "|":
65-
metadata[current_key] = "\n".join(current_value).rstrip()
66-
else:
67-
text = "\n".join(current_value).rstrip()
68-
# Replace single newlines with spaces for folded blocks
69-
metadata[current_key] = re.sub(r"(?<!\n)\n(?!\n)", " ", text)
70-
71-
current_key = None
72-
current_value = []
73-
is_multiline = False
74-
multiline_style = None
75-
indent_level = None
76-
77-
if not line.strip():
78-
continue
79-
80-
if ":" in line:
81-
# Handle nested dicts simply by ignoring indentation for now,
82-
# or just extracting top-level keys
83-
key, value = line.split(":", 1)
84-
key = key.strip()
85-
value = value.strip()
86-
87-
if value in (">", "|"):
88-
current_key = key
89-
is_multiline = True
90-
multiline_style = value
91-
current_value = []
92-
indent_level = None
93-
else:
94-
metadata[key] = value
95-
96-
if current_key and is_multiline:
97-
if multiline_style == "|":
98-
metadata[current_key] = "\n".join(current_value).rstrip()
99-
else:
100-
text = "\n".join(current_value).rstrip()
101-
metadata[current_key] = re.sub(r"(?<!\n)\n(?!\n)", " ", text)
102-
103-
# Extract required fields
35+
front_matter_text = front_matter_match.group(1)
36+
37+
try:
38+
metadata = yaml.safe_load(front_matter_text)
39+
except yaml.YAMLError as exc:
40+
logger.error("Invalid YAML front-matter in %s: %s", skill_file, exc)
41+
return None
42+
43+
if not isinstance(metadata, dict):
44+
logger.error("Front-matter in %s is not a YAML mapping", skill_file)
45+
return None
46+
47+
# Extract required fields. Both must be non-empty strings.
10448
name = metadata.get("name")
10549
description = metadata.get("description")
10650

51+
if not name or not isinstance(name, str):
52+
return None
53+
if not description or not isinstance(description, str):
54+
return None
55+
56+
# Normalise: strip surrounding whitespace that YAML may preserve.
57+
name = name.strip()
58+
description = description.strip()
59+
10760
if not name or not description:
10861
return None
10962

11063
license_text = metadata.get("license")
64+
if license_text is not None:
65+
license_text = str(license_text).strip() or None
11166

11267
return Skill(
11368
name=name,
@@ -117,9 +72,9 @@ def parse_skill_file(skill_file: Path, category: str, relative_path: Path | None
11772
skill_file=skill_file,
11873
relative_path=relative_path or Path(skill_file.parent.name),
11974
category=category,
120-
enabled=True, # Default to enabled, actual state comes from config file
75+
enabled=True, # Actual state comes from the extensions config file.
12176
)
12277

123-
except Exception as e:
124-
logger.error("Error parsing skill file %s: %s", skill_file, e)
78+
except Exception:
79+
logger.exception("Unexpected error parsing skill file %s", skill_file)
12580
return None

backend/packages/harness/deerflow/tools/tools.py

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,22 @@ def get_available_tools(
5959
if not is_host_bash_allowed(config):
6060
tool_configs = [tool for tool in tool_configs if not _is_host_bash_tool(tool)]
6161

62-
loaded_tools = [resolve_variable(tool.use, BaseTool) for tool in tool_configs]
62+
loaded_tools_raw = [(cfg, resolve_variable(cfg.use, BaseTool)) for cfg in tool_configs]
63+
64+
# Warn when the config ``name`` field and the tool object's ``.name``
65+
# attribute diverge — this mismatch is the root cause of issue #1803 where
66+
# the LLM receives one name in its tool schema but the runtime router
67+
# recognises a different name, producing "not a valid tool" errors.
68+
for cfg, loaded in loaded_tools_raw:
69+
if cfg.name != loaded.name:
70+
logger.warning(
71+
"Tool name mismatch: config name %r does not match tool .name %r (use: %s). The tool's own .name will be used for binding.",
72+
cfg.name,
73+
loaded.name,
74+
cfg.use,
75+
)
76+
77+
loaded_tools = [t for _, t in loaded_tools_raw]
6378

6479
# Conditionally add tools based on config
6580
builtin_tools = BUILTIN_TOOLS.copy()
@@ -134,4 +149,20 @@ def get_available_tools(
134149
logger.warning(f"Failed to load ACP tool: {e}")
135150

136151
logger.info(f"Total tools loaded: {len(loaded_tools)}, built-in tools: {len(builtin_tools)}, MCP tools: {len(mcp_tools)}, ACP tools: {len(acp_tools)}")
137-
return loaded_tools + builtin_tools + mcp_tools + acp_tools
152+
153+
# Deduplicate by tool name — config-loaded tools take priority, followed by
154+
# built-ins, MCP tools, and ACP tools. Duplicate names cause the LLM to
155+
# receive ambiguous or concatenated function schemas (issue #1803).
156+
all_tools = loaded_tools + builtin_tools + mcp_tools + acp_tools
157+
seen_names: set[str] = set()
158+
unique_tools: list[BaseTool] = []
159+
for t in all_tools:
160+
if t.name not in seen_names:
161+
unique_tools.append(t)
162+
seen_names.add(t.name)
163+
else:
164+
logger.warning(
165+
"Duplicate tool name %r detected and skipped — check your config.yaml and MCP server registrations (issue #1803).",
166+
t.name,
167+
)
168+
return unique_tools

0 commit comments

Comments
 (0)