diff --git a/sdk/nexent/core/tools/datamate_search_tool.py b/sdk/nexent/core/tools/datamate_search_tool.py index 5c2e2525b..626cbbca4 100644 --- a/sdk/nexent/core/tools/datamate_search_tool.py +++ b/sdk/nexent/core/tools/datamate_search_tool.py @@ -85,7 +85,7 @@ def __init__( threshold: float = Field( description="Default similarity threshold for search results", default=0.2), kb_page: int = Field( - description="Page index when listing knowledge bases from DataMate", default=0), + description="Page index when listing knowledge bases from DataMate", default=1), kb_page_size: int = Field( description="Page size when listing knowledge bases from DataMate", default=20), ): diff --git a/sdk/nexent/core/tools/knowledge_base_search_tool.py b/sdk/nexent/core/tools/knowledge_base_search_tool.py index 2ac5562df..c6e76f834 100644 --- a/sdk/nexent/core/tools/knowledge_base_search_tool.py +++ b/sdk/nexent/core/tools/knowledge_base_search_tool.py @@ -36,6 +36,7 @@ class KnowledgeBaseSearchTool(Tool): "description_zh": "要执行的搜索查询词" }, "index_names": { + "type": "array", "description": "The list of index names to search", "description_zh": "要索引的知识库" }, @@ -97,9 +98,9 @@ def __init__( self.running_prompt_en = "Searching the knowledge base..." - def forward(self, query: str, index_names: str) -> str: + def forward(self, query: str, index_names: List[str]) -> str: # Parse index_names from string (always required) - search_index_names = [name.strip() for name in index_names.split(",") if name.strip()] + search_index_names = index_names # Use the instance search_mode search_mode = self.search_mode diff --git a/sdk/nexent/datamate/datamate_client.py b/sdk/nexent/datamate/datamate_client.py index d0894db76..af3065084 100644 --- a/sdk/nexent/datamate/datamate_client.py +++ b/sdk/nexent/datamate/datamate_client.py @@ -134,44 +134,65 @@ def _make_request( def list_knowledge_bases( self, - page: int = 0, + page: int = 1, size: int = 20, authorization: Optional[str] = None ) -> List[Dict[str, Any]]: """ - Get list of knowledge bases from DataMate. + Get list of all knowledge bases from DataMate by paginating through all pages. + + Always starts from page 1, reads the total page count from the first response, + then fetches all remaining pages and aggregates the results. Args: - page: Page index (default: 0) - size: Page size (default: 20) - authorization: Optional authorization header + page: Ignored; pagination always starts from page 1 (kept for backward compat). + size: Page size for each request (default: 20). + authorization: Optional authorization header. Returns: - List of knowledge base dictionaries with their IDs and metadata. + Aggregated list of all knowledge base dictionaries with their IDs and metadata. Raises: - RuntimeError: If the API request fails + RuntimeError: If any API request fails. """ try: url = self._build_url("/api/knowledge-base/list") - payload = {"page": page, "size": size} headers = self._build_headers(authorization) - logger.info( - f"Fetching DataMate knowledge bases from: {url}, page={page}, size={size}") + all_knowledge_bases: List[Dict[str, Any]] = [] - response = self._make_request( - "POST", url, headers, json=payload, error_message="Failed to get knowledge base list") - data = response.json() + # Always start from page 1 to get totalPages + current_page = 1 + total_pages = 1 - # Extract knowledge base list from response - knowledge_bases = [] - if data.get("data"): - knowledge_bases = data.get("data").get("content", []) + while current_page <= total_pages: + payload = {"page": current_page, "size": size} + logger.info( + f"Fetching DataMate knowledge bases from: {url}, page={current_page}, size={size}") + + response = self._make_request( + "POST", url, headers, json=payload, + error_message="Failed to get knowledge base list") + data = response.json() + + page_content: List[Dict[str, Any]] = [] + if data.get("data"): + page_content = data.get("data", {}).get("content", []) + + # Read totalPages from the first page response only + if current_page == 1: + total_pages = data.get("data", {}).get("totalPages", 1) + + all_knowledge_bases.extend(page_content) + logger.info( + f"Fetched page {current_page}/{total_pages} " + f"({len(page_content)} items, cumulative: {len(all_knowledge_bases)})") + current_page += 1 logger.info( - f"Successfully fetched {len(knowledge_bases)} knowledge bases from DataMate") - return knowledge_bases + f"Successfully fetched {len(all_knowledge_bases)} knowledge bases from DataMate " + f"across {total_pages} page(s)") + return all_knowledge_bases except httpx.HTTPError as e: logger.error( diff --git a/test/sdk/core/tools/test_knowledge_base_search_tool.py b/test/sdk/core/tools/test_knowledge_base_search_tool.py index 06f54c298..9ac1d6c51 100644 --- a/test/sdk/core/tools/test_knowledge_base_search_tool.py +++ b/test/sdk/core/tools/test_knowledge_base_search_tool.py @@ -248,10 +248,10 @@ def test_forward_with_custom_index_names(self, knowledge_base_search_tool): mock_results = create_mock_search_result(2) knowledge_base_search_tool.vdb_core.hybrid_search.return_value = mock_results - # Pass index_names as parameter (comma-separated string) - result = knowledge_base_search_tool.forward("test query", index_names="custom_index1,custom_index2") + # Pass index_names as a list parameter (forward expects List[str]) + knowledge_base_search_tool.forward("test query", index_names=["custom_index1", "custom_index2"]) - # Verify vdb_core was called with parsed index names + # Verify vdb_core was called with the index names as-is knowledge_base_search_tool.vdb_core.hybrid_search.assert_called_once_with( index_names=["custom_index1", "custom_index2"], query_text="test query", @@ -329,7 +329,8 @@ def test_forward_single_index_name(self, knowledge_base_search_tool): mock_results = create_mock_search_result(1) knowledge_base_search_tool.vdb_core.hybrid_search.return_value = mock_results - result = knowledge_base_search_tool.forward("test query", index_names="single_index") + # Pass index_names as a list parameter (forward expects List[str]) + knowledge_base_search_tool.forward("test query", index_names=["single_index"]) # Verify vdb_core was called with single index knowledge_base_search_tool.vdb_core.hybrid_search.assert_called_once_with( @@ -345,12 +346,12 @@ def test_forward_with_whitespace_in_index_names(self, knowledge_base_search_tool mock_results = create_mock_search_result(1) knowledge_base_search_tool.vdb_core.hybrid_search.return_value = mock_results - # Pass index_names with extra whitespace - result = knowledge_base_search_tool.forward("test query", index_names=" index1 , index2 ") + # Pass index_names as a list parameter (forward expects List[str]) + knowledge_base_search_tool.forward("test query", index_names=[" index1 ", " index2 "]) - # Verify whitespace is stripped + # Verify vdb_core was called with the index names as-is (no stripping performed) knowledge_base_search_tool.vdb_core.hybrid_search.assert_called_once_with( - index_names=["index1", "index2"], + index_names=[" index1 ", " index2 "], query_text="test query", embedding_model=knowledge_base_search_tool.embedding_model, top_k=5 diff --git a/test/sdk/datamate/test_datamate_client.py b/test/sdk/datamate/test_datamate_client.py index 79f50c96b..793d2a631 100644 --- a/test/sdk/datamate/test_datamate_client.py +++ b/test/sdk/datamate/test_datamate_client.py @@ -368,7 +368,7 @@ def test_default_parameters(self, client: DataMateClient): client._http_client.post.assert_called_once_with( "http://datamate.local:30000/api/knowledge-base/list", - json={"page": 0, "size": 20}, + json={"page": 1, "size": 20}, headers={}, timeout=client.timeout, )