- {t("knowledgeBase.fileDetail.messages.chunkCount", { count: totalElements })},第 {totalElements === 0 ? 0 : (currentPage - 1) * pageSize + 1}-
- {Math.min(currentPage * pageSize, totalElements)} 个
+ {t("knowledgeBase.fileDetail.messages.totalChunks", { count: totalElements })},{t("knowledgeBase.fileDetail.messages.showingRange", { start: totalElements === 0 ? 0 : (currentPage - 1) * pageSize + 1, end: totalElements === 0 ? 0 : Math.min(currentPage * pageSize, totalElements) })}
}
onClick={() => setCurrentPage(Math.max(1, currentPage - 1))}
disabled={currentPage <= 1}
- >
- {t("knowledgeBase.fileDetail.messages.previousPage")}
-
+ />
{totalPages === 0 ? 0 : currentPage} / {totalPages}
+ />
-
+
{currentChunks.map((chunk) => (
-
-
-
-
-
-
{t("knowledgeBase.fileDetail.messages.chunkLabel")} {chunk.id}
- {/* 算子名:从 metadata.sliceOperator 显示 */}
- {chunk.metadata?.sliceOperator && (
-
- {chunk.metadata.sliceOperator}
-
- )}
-
-
- {editingChunk === chunk.id ? (
- <>
-
-
- >
- ) : (
- <>
-
-
-
- >
- )}
-
-
-
- {editingChunk === chunk.id ? (
- setEditChunkContent(e.target.value)}
- rows={3}
- />
- ) : (
- chunk.text
- )}
-
- {/* 元数据展示,保持和召回结果风格一致 */}
-
-
{t("knowledgeBase.fileDetail.modal.metadata")}:
-
- {typeof chunk.metadata === "string"
- ? chunk.metadata
- : JSON.stringify(chunk.metadata ?? {}, null, 2)}
-
-
- {/* 结构化元数据的快捷标签(若可用) */}
-
- {chunk?.metadata?.position && {t("knowledgeBase.fileDetail.columns.position")}: {chunk.metadata.position}}
- {chunk?.metadata?.tokens && Token: {chunk.metadata.tokens}}
- {chunk?.metadata?.page && {t("knowledgeBase.fileDetail.columns.page")}: {chunk.metadata.page}}
- {chunk?.metadata?.section && {t("knowledgeBase.fileDetail.columns.section")}: {chunk.metadata.section}}
-
+
+ {t("knowledgeBase.fileDetail.messages.chunkLabel")} {chunk.id}
+ {chunk.metadata?.sliceOperator && (
+
+ {chunk.metadata.sliceOperator}
+
+ )}
+
+ }
+ extra={
+
+
+
+
+
+
+
+
+
+
+ }
+ style={{ wordBreak: "break-all" }}
+ >
+
+ {chunk.text}
+
+
+ metadata
+
+ {typeof chunk.metadata === "string" ? chunk.metadata : JSON.stringify(chunk.metadata ?? {}, null, 2)}
+
))}
{!loading && currentChunks.length === 0 && (
-
+
+
+
)}
@@ -235,29 +292,26 @@ const KnowledgeBaseFileDetail: React.FC = () => {
items={[
{ title:
{t("knowledgeBase.fileDetail.breadcrumb.kbList")} },
{ title: (
{t("knowledgeBase.fileDetail.breadcrumb.kbDetail")}) },
- { title: fileName || `文件 ${ragFileId}` },
+ { title: fileName || t("knowledgeBase.fileDetail.defaultFileName", { id: ragFileId }) },
]}
/>
- {/* 头部统计使用最简占位,后续可扩展 */}
,
iconColor: "#a27e7e",
- status: { label: t("knowledgeBase.fileDetail.messages.ready"), color: "default" },
- name: fileName || `文件 ${ragFileId}`,
+ name: fileName || t("knowledgeBase.fileDetail.defaultFileName", { id: ragFileId }),
description: `${totalElements} ${t("knowledgeBase.fileDetail.messages.chunkCount", { count: 0 })}`,
createdAt: "",
lastUpdated: "",
}}
statistics={[]}
- operations={[{ key: "download", label: t("knowledgeBase.fileDetail.actions.download"), icon:
, onClick: () => {} }]}
+ operations={[]}
/>
{loading ?
: renderChunks()}
- {/* Slice Trace Modal */}
setShowSliceTraceDialog(null)}
@@ -266,7 +320,6 @@ const KnowledgeBaseFileDetail: React.FC = () => {
width={800}
destroyOnClose
>
- {/* 简化为内容占位,真实数据待后端提供更多字段 */}
{t("knowledgeBase.fileDetail.modal.sliceProcessTitle")}
@@ -286,7 +339,6 @@ const KnowledgeBaseFileDetail: React.FC = () => {
- {/* Chunk Detail Modal */}
setChunkDetailModal(null)}
@@ -317,29 +369,160 @@ const KnowledgeBaseFileDetail: React.FC = () => {
key: "metadata",
label: t("knowledgeBase.fileDetail.modal.metadata"),
children: (
-
-
-
{t("knowledgeBase.fileDetail.modal.position")}
-
c.id === chunkDetailModal)?.metadata?.position || ""} readOnly />
-
-
-
{t("knowledgeBase.fileDetail.modal.tokenCount")}
-
c.id === chunkDetailModal)?.metadata?.tokens || ""} readOnly />
-
-
-
{t("knowledgeBase.fileDetail.modal.pageNumber")}
-
c.id === chunkDetailModal)?.metadata?.page || ""} readOnly />
-
-
-
{t("knowledgeBase.fileDetail.modal.chapter")}
-
c.id === chunkDetailModal)?.metadata?.section || ""} readOnly />
-
+
+
+ {JSON.stringify(
+ currentChunks.find((c) => c.id === chunkDetailModal)?.metadata || {},
+ null,
+ 2
+ ) || "{}"}
+
),
},
]}
/>
+
+
setDeleteConfirmModal(null)}
+ onOk={() => handleDeleteChunk(deleteConfirmModal!)}
+ title={t("knowledgeBase.fileDetail.modal.deleteConfirmTitle")}
+ okText={t("knowledgeBase.fileDetail.actions.confirm")}
+ cancelText={t("knowledgeBase.fileDetail.actions.cancel")}
+ okButtonProps={{ danger: true, loading: deleting }}
+ centered
+ >
+ {t("knowledgeBase.fileDetail.modal.deleteConfirmMessage")}
+
+
+
{
+ setEditingChunk(null);
+ setEditChunkContent("");
+ setEditChunkMetadata("");
+ setMetadataValid(true);
+ setMetadataError(null);
+ }}
+ footer={null}
+ title={
+
+
+ {t("knowledgeBase.fileDetail.modal.editChunkTitle")} - {editingChunk}
+
+ }
+ width={900}
+ destroyOnClose
+ >
+
+
+
+
+
+
setEditChunkContent(e.target.value)}
+ rows={6}
+ placeholder={t("knowledgeBase.fileDetail.placeholders.chunkContent")}
+ className="font-mono"
+ style={{
+ fontFamily: 'ui-monospace, SFMono-Regular, "SF Mono", Menlo, Consolas, monospace',
+ }}
+ />
+
+
+
+
+
+
+ {metadataValid ? (
+
+
+ {t("knowledgeBase.fileDetail.messages.jsonValid")}
+
+ ) : (
+
+
+ {t("knowledgeBase.fileDetail.messages.jsonInvalid")}
+
+ )}
+
}
+ onClick={formatJson}
+ type="default"
+ >
+ {t("knowledgeBase.fileDetail.actions.formatJson")}
+
+
+
+
+
handleMetadataChange(e.target.value)}
+ rows={10}
+ placeholder={t("knowledgeBase.fileDetail.placeholders.metadata")}
+ className="font-mono"
+ style={{
+ fontFamily: 'ui-monospace, SFMono-Regular, "SF Mono", Menlo, Consolas, monospace',
+ borderColor: metadataValid ? '#d9d9d9' : '#ff4d4f',
+ }}
+ status={metadataValid ? undefined : 'error'}
+ />
+ {metadataError && (
+
+ )}
+
+ {t("knowledgeBase.fileDetail.messages.metadataHint")}
+
+
+
+
+
+
+
+
+
+
);
};
diff --git a/frontend/src/pages/KnowledgeBase/components/CreateKnowledgeBase.tsx b/frontend/src/pages/KnowledgeBase/components/CreateKnowledgeBase.tsx
index 035fecf06..c476cec26 100644
--- a/frontend/src/pages/KnowledgeBase/components/CreateKnowledgeBase.tsx
+++ b/frontend/src/pages/KnowledgeBase/components/CreateKnowledgeBase.tsx
@@ -177,7 +177,14 @@ export default function CreateKnowledgeBase({
diff --git a/frontend/src/pages/KnowledgeBase/knowledge-base.api.ts b/frontend/src/pages/KnowledgeBase/knowledge-base.api.ts
index 102520c82..1ac851737 100644
--- a/frontend/src/pages/KnowledgeBase/knowledge-base.api.ts
+++ b/frontend/src/pages/KnowledgeBase/knowledge-base.api.ts
@@ -87,3 +87,21 @@ export function queryKnowledgeBase(data: {
}) {
return post("/api/knowledge-base/query", data);
}
+
+export function updateKnowledgeBaseChunk(
+ knowledgeBaseId: string,
+ chunkId: string,
+ data: { text: string; metadata?: Record }
+) {
+ return (put as unknown as (url: string, data?: object) => Promise)(
+ `/api/knowledge-base/${knowledgeBaseId}/chunks/${chunkId}`,
+ data
+ );
+}
+
+export function deleteKnowledgeBaseChunk(
+ knowledgeBaseId: string,
+ chunkId: string
+) {
+ return del(`/api/knowledge-base/${knowledgeBaseId}/chunks/${chunkId}`);
+}
diff --git a/frontend/vite.config.ts b/frontend/vite.config.ts
index 4ff0c64fe..1c69395cb 100644
--- a/frontend/vite.config.ts
+++ b/frontend/vite.config.ts
@@ -49,9 +49,9 @@ export default defineConfig({
};
// Python 服务: rag, synthesis, annotation, evaluation, models
- const pythonPaths = ["rag", "operators", "categories", "synthesis", "annotation", "knowledge-base", "data-collection", "evaluation", "models"];
+ const pythonPaths = ["rag", "cleaning", "operators", "categories", "synthesis", "annotation", "knowledge-base", "data-collection", "evaluation", "models", "sys-param"];
// Java 服务: data-management, knowledge-base
- const javaPaths = ["data-management", "sys-param"];
+ const javaPaths = ["data-management"];
const proxy: Record = {};
// SSE 端点需要禁用缓冲
diff --git a/runtime/datamate-python/app/module/rag/infra/vectorstore/__init__.py b/runtime/datamate-python/app/module/rag/infra/vectorstore/__init__.py
index 289fdb7aa..9d4101935 100644
--- a/runtime/datamate-python/app/module/rag/infra/vectorstore/__init__.py
+++ b/runtime/datamate-python/app/module/rag/infra/vectorstore/__init__.py
@@ -12,6 +12,8 @@
drop_collection,
get_vector_dimension,
rename_collection,
+ update_chunk_by_id,
+ delete_chunk_by_id,
)
__all__ = [
@@ -23,4 +25,6 @@
"get_vector_dimension",
"delete_chunks_by_rag_file_ids",
"chunks_to_documents",
+ "update_chunk_by_id",
+ "delete_chunk_by_id",
]
diff --git a/runtime/datamate-python/app/module/rag/infra/vectorstore/store.py b/runtime/datamate-python/app/module/rag/infra/vectorstore/store.py
index dd70933e2..58ae56ba1 100644
--- a/runtime/datamate-python/app/module/rag/infra/vectorstore/store.py
+++ b/runtime/datamate-python/app/module/rag/infra/vectorstore/store.py
@@ -13,7 +13,6 @@
"""
from __future__ import annotations
-import json
import logging
from typing import List, Optional
@@ -27,6 +26,47 @@
logger = logging.getLogger(__name__)
+BATCH_DELETE_SIZE = 100
+
+
+def _delete_chunks_by_rag_file_id_batched(client, collection_name: str, rag_file_id: str) -> int:
+ """分批删除指定 rag_file_id 的所有 chunks
+
+ Args:
+ client: Milvus 客户端
+ collection_name: 集合名称
+ rag_file_id: RAG 文件 ID
+
+ Returns:
+ 删除的总数量
+ """
+ filter_expr = f'metadata["rag_file_id"] == "{rag_file_id}"'
+ total_deleted = 0
+
+ while True:
+ try:
+ results = client.query(
+ collection_name=collection_name,
+ filter=filter_expr,
+ output_fields=["id"],
+ limit=BATCH_DELETE_SIZE,
+ )
+ if not results:
+ break
+
+ chunk_ids = [r["id"] for r in results]
+ id_filter = ' || '.join([f'id == "{cid}"' for cid in chunk_ids])
+ client.delete(collection_name=collection_name, filter=f"({id_filter})")
+ total_deleted += len(chunk_ids)
+
+ if len(chunk_ids) < BATCH_DELETE_SIZE:
+ break
+ except Exception as e:
+ logger.warning("分批删除失败: collection=%s rag_file_id=%s error=%s", collection_name, rag_file_id, e)
+ break
+
+ return total_deleted
+
def drop_collection(collection_name: str) -> None:
"""删除 Milvus 集合
@@ -201,12 +241,8 @@ def delete_chunks_by_rag_file_ids(collection_name: str, rag_file_ids: List[str])
client = get_milvus_client()
for rid in rag_file_ids:
- json_value = json.dumps({"rag_file_id": rid})
- filter_expr = f'JSON_CONTAINS(metadata, \'{json_value}\')'
- try:
- client.delete(collection_name=collection_name, filter=filter_expr)
- except Exception as del_err:
- logger.warning("删除分块时部分失败: collection=%s rag_file_id=%s: %s", collection_name, rid, del_err)
+ deleted = _delete_chunks_by_rag_file_id_batched(client, collection_name, rid)
+ logger.info("删除文件分块: collection=%s rag_file_id=%s deleted=%d", collection_name, rid, deleted)
logger.info("已按 rag_file_id 删除集合 %s 中的分块: %s", collection_name, rag_file_ids)
@@ -238,3 +274,111 @@ def chunks_to_documents(
documents.append(doc)
return documents, ids
+
+
+def update_chunk_by_id(
+ collection_name: str,
+ chunk_id: str,
+ text: str,
+ metadata: Optional[dict] = None,
+ embedding_instance=None,
+) -> None:
+ """更新指定 ID 的分块
+
+ Args:
+ collection_name: 集合名称
+ chunk_id: 分块 ID
+ text: 新的文本内容
+ metadata: 新的元数据(可选)
+ embedding_instance: Embeddings 实例
+ """
+ try:
+ client = get_milvus_client()
+
+ filter_expr = f'id == "{chunk_id}"'
+ existing = client.query(
+ collection_name=collection_name,
+ filter=filter_expr,
+ output_fields=["metadata"],
+ )
+
+ if not existing:
+ raise BusinessError(
+ ErrorCodes.RAG_CHUNK_NOT_FOUND,
+ f"Chunk not found: {chunk_id}"
+ )
+
+ existing_metadata = existing[0].get("metadata", {})
+
+ if metadata is None:
+ metadata = existing_metadata
+ else:
+ # 确保保留原有的 rag_file_id 字段,防止用户修改时丢失
+ if "rag_file_id" in existing_metadata and "rag_file_id" not in metadata:
+ metadata = {**metadata, "rag_file_id": existing_metadata["rag_file_id"]}
+
+ if embedding_instance:
+ embedding = embedding_instance
+ else:
+ from app.module.rag.infra.embeddings import EmbeddingFactory
+ embedding = EmbeddingFactory.create_embeddings()
+
+ vector = embedding.embed_query(text)
+
+ client.delete(collection_name=collection_name, filter=filter_expr)
+
+ client.insert(
+ collection_name=collection_name,
+ data=[{
+ "id": chunk_id,
+ "text": text,
+ "metadata": metadata,
+ "vector": vector,
+ }]
+ )
+
+ logger.info("成功更新分块: collection=%s chunk_id=%s", collection_name, chunk_id)
+
+ except BusinessError:
+ raise
+ except Exception as e:
+ logger.error("更新分块失败: %s", e)
+ raise BusinessError(ErrorCodes.RAG_MILVUS_ERROR, f"更新分块失败: {str(e)}") from e
+
+
+def delete_chunk_by_id(collection_name: str, chunk_id: str) -> Optional[str]:
+ """删除指定 ID 的分块
+
+ Args:
+ collection_name: 集合名称
+ chunk_id: 分块 ID
+
+ Returns:
+ 被删除分块对应的 rag_file_id(如果存在),否则返回 None
+ """
+ try:
+ client = get_milvus_client()
+
+ filter_expr = f'id == "{chunk_id}"'
+
+ # 先查询 chunk 的 metadata 获取 rag_file_id
+ existing = client.query(
+ collection_name=collection_name,
+ filter=filter_expr,
+ output_fields=["metadata"],
+ )
+
+ rag_file_id = None
+ if existing:
+ metadata = existing[0].get("metadata", {})
+ rag_file_id = metadata.get("rag_file_id")
+
+ client.delete(collection_name=collection_name, filter=filter_expr)
+
+ logger.info("成功删除分块: collection=%s chunk_id=%s rag_file_id=%s", collection_name, chunk_id, rag_file_id)
+
+ return rag_file_id
+
+ except Exception as e:
+ logger.error("删除分块失败: %s", e)
+ raise BusinessError(ErrorCodes.RAG_MILVUS_ERROR, f"删除分块失败: {str(e)}") from e
diff --git a/runtime/datamate-python/app/module/rag/interface/knowledge_base.py b/runtime/datamate-python/app/module/rag/interface/knowledge_base.py
index 70f8d4b60..2b212f605 100644
--- a/runtime/datamate-python/app/module/rag/interface/knowledge_base.py
+++ b/runtime/datamate-python/app/module/rag/interface/knowledge_base.py
@@ -21,6 +21,7 @@
RetrieveReq,
PagingQuery,
QueryRequest,
+ ChunkUpdateReq,
)
from app.module.rag.service.knowledge_base_service import KnowledgeBaseService
from app.module.rag.service.unified_retrieval_service import UnifiedRetrievalService
@@ -151,6 +152,39 @@ async def get_file_chunks(
return SuccessResponse(data=result)
+@router.put("/{knowledge_base_id}/chunks/{chunk_id}", response_model=SuccessResponse)
+async def update_chunk(
+ knowledge_base_id: str,
+ chunk_id: str,
+ request: ChunkUpdateReq,
+ db: AsyncSession = Depends(get_db),
+):
+ """更新指定分块的文本和元数据"""
+ service = KnowledgeBaseService(db)
+ await service.update_chunk(
+ knowledge_base_id=knowledge_base_id,
+ chunk_id=chunk_id,
+ text=request.text,
+ metadata=request.metadata,
+ )
+ return SuccessResponse(message="分块更新成功")
+
+
+@router.delete("/{knowledge_base_id}/chunks/{chunk_id}", response_model=SuccessResponse)
+async def delete_chunk(
+ knowledge_base_id: str,
+ chunk_id: str,
+ db: AsyncSession = Depends(get_db),
+):
+ """删除指定分块"""
+ service = KnowledgeBaseService(db)
+ await service.delete_chunk(
+ knowledge_base_id=knowledge_base_id,
+ chunk_id=chunk_id,
+ )
+ return SuccessResponse(message="分块删除成功")
+
+
@router.post("/retrieve", response_model=SuccessResponse)
async def retrieve_knowledge_base(
request: RetrieveReq,
diff --git a/runtime/datamate-python/app/module/rag/schema/request.py b/runtime/datamate-python/app/module/rag/schema/request.py
index 262fb785c..4463339cc 100644
--- a/runtime/datamate-python/app/module/rag/schema/request.py
+++ b/runtime/datamate-python/app/module/rag/schema/request.py
@@ -342,3 +342,21 @@ class Config:
"query": "什么是机器学习?"
}
}
+
+
+class ChunkUpdateReq(BaseModel):
+ """Chunk更新请求"""
+ text: str = Field(..., min_length=1, description="分块文本内容")
+ metadata: Optional[dict] = Field(default=None, description="元数据")
+
+ class Config:
+ json_schema_extra = {
+ "example": {
+ "text": "这是修改后的分块内容...",
+ "metadata": {
+ "fileName": "document.pdf",
+ "chunkIndex": 0,
+ "customField": "custom value"
+ }
+ }
+ }
diff --git a/runtime/datamate-python/app/module/rag/service/knowledge_base_service.py b/runtime/datamate-python/app/module/rag/service/knowledge_base_service.py
index 41105c89d..99027ce67 100644
--- a/runtime/datamate-python/app/module/rag/service/knowledge_base_service.py
+++ b/runtime/datamate-python/app/module/rag/service/knowledge_base_service.py
@@ -16,7 +16,14 @@
from app.db.models.dataset_management import DatasetFiles
from app.db.models.knowledge_gen import KnowledgeBase, RagFile, FileStatus, RagType
from app.db.models.models import Models
-from app.module.rag.infra.vectorstore import drop_collection, rename_collection, delete_chunks_by_rag_file_ids
+from app.module.rag.infra.embeddings import EmbeddingFactory
+from app.module.rag.infra.vectorstore import (
+ drop_collection,
+ rename_collection,
+ delete_chunks_by_rag_file_ids,
+ update_chunk_by_id,
+ delete_chunk_by_id,
+)
from app.module.rag.repository import KnowledgeBaseRepository, RagFileRepository
from app.module.rag.schema.request import (
KnowledgeBaseCreateReq,
@@ -388,3 +395,94 @@ async def delete_files(self, knowledge_base_id: str, request: DeleteFilesReq) ->
await self.db.commit()
logger.info("成功删除 %d 个文件", len(rag_files))
+
+ async def update_chunk(
+ self,
+ knowledge_base_id: str,
+ chunk_id: str,
+ text: str,
+ metadata: dict = None,
+ ) -> None:
+ """更新指定分块的文本和元数据
+
+ Args:
+ knowledge_base_id: 知识库 ID
+ chunk_id: 分块 ID
+ text: 新的文本内容
+ metadata: 新的元数据(可选)
+ """
+ knowledge_base = await self.kb_repo.get_by_id(knowledge_base_id)
+ if not knowledge_base:
+ raise BusinessError(ErrorCodes.RAG_KNOWLEDGE_BASE_NOT_FOUND)
+
+ if knowledge_base.type != RagType.DOCUMENT.value:
+ raise BusinessError(
+ ErrorCodes.RAG_INVALID_REQUEST,
+ f"知识库类型 {knowledge_base.type} 不支持分块更新"
+ )
+
+ from app.module.system.service.common_service import get_model_by_id
+ import asyncio
+
+ embedding_entity = await get_model_by_id(self.db, knowledge_base.embedding_model)
+ if not embedding_entity:
+ raise BusinessError(ErrorCodes.RAG_MODEL_NOT_FOUND)
+
+ embedding = EmbeddingFactory.create_embeddings(
+ model_name=str(embedding_entity.model_name),
+ base_url=getattr(embedding_entity, "base_url", None),
+ api_key=getattr(embedding_entity, "api_key", None),
+ )
+
+ await asyncio.to_thread(
+ update_chunk_by_id,
+ collection_name=str(knowledge_base.name),
+ chunk_id=chunk_id,
+ text=text,
+ metadata=metadata,
+ embedding_instance=embedding,
+ )
+
+ logger.info(
+ "成功更新分块: kb=%s chunk_id=%s",
+ knowledge_base_id, chunk_id
+ )
+
+ async def delete_chunk(
+ self,
+ knowledge_base_id: str,
+ chunk_id: str,
+ ) -> None:
+ """删除指定分块
+
+ Args:
+ knowledge_base_id: 知识库 ID
+ chunk_id: 分块 ID
+ """
+ knowledge_base = await self.kb_repo.get_by_id(knowledge_base_id)
+ if not knowledge_base:
+ raise BusinessError(ErrorCodes.RAG_KNOWLEDGE_BASE_NOT_FOUND)
+
+ if knowledge_base.type != RagType.DOCUMENT.value:
+ raise BusinessError(
+ ErrorCodes.RAG_INVALID_REQUEST,
+ f"知识库类型 {knowledge_base.type} 不支持分块删除"
+ )
+
+ import asyncio
+ rag_file_id = await asyncio.to_thread(
+ delete_chunk_by_id,
+ collection_name=str(knowledge_base.name),
+ chunk_id=chunk_id,
+ )
+
+ if rag_file_id:
+ rag_file = await self.file_repo.get_by_id(rag_file_id)
+ if rag_file and rag_file.chunk_count and rag_file.chunk_count > 0:
+ rag_file.chunk_count = rag_file.chunk_count - 1
+ await self.db.commit()
+
+ logger.info(
+ "成功删除分块: kb=%s chunk_id=%s",
+ knowledge_base_id, chunk_id
+ )
diff --git a/runtime/datamate-python/app/module/rag/service/strategy/vector_strategy.py b/runtime/datamate-python/app/module/rag/service/strategy/vector_strategy.py
index 5570f51a7..9db26c2bd 100644
--- a/runtime/datamate-python/app/module/rag/service/strategy/vector_strategy.py
+++ b/runtime/datamate-python/app/module/rag/service/strategy/vector_strategy.py
@@ -395,3 +395,78 @@ def _filter_and_clean_chunks(self, chunks: list) -> list:
logger.info("有效分块数量: %d / %d", len(valid_chunks), len(chunks))
return valid_chunks
+
+ async def update_chunk(
+ self,
+ knowledge_base_id: str,
+ chunk_id: str,
+ text: str,
+ metadata: Optional[dict] = None,
+ ) -> None:
+ """更新指定分块的文本和元数据
+
+ Args:
+ knowledge_base_id: 知识库 ID
+ chunk_id: 分块 ID
+ text: 新的文本内容
+ metadata: 新的元数据(可选)
+ """
+ from app.module.rag.infra.vectorstore import update_chunk_by_id
+
+ kb_repo = KnowledgeBaseRepository(self.db)
+ kb = await kb_repo.get_by_id(knowledge_base_id)
+ if not kb:
+ raise BusinessError(ErrorCodes.RAG_KNOWLEDGE_BASE_NOT_FOUND)
+
+ embedding_entity = await get_model_by_id(self.db, kb.embedding_model)
+ if not embedding_entity:
+ raise BusinessError(ErrorCodes.RAG_MODEL_NOT_FOUND)
+
+ embedding = EmbeddingFactory.create_embeddings(
+ model_name=str(embedding_entity.model_name),
+ base_url=getattr(embedding_entity, "base_url", None),
+ api_key=getattr(embedding_entity, "api_key", None),
+ )
+
+ await asyncio.to_thread(
+ update_chunk_by_id,
+ collection_name=str(kb.name),
+ chunk_id=chunk_id,
+ text=text,
+ metadata=metadata,
+ embedding_instance=embedding,
+ )
+
+ logger.info(
+ "成功更新分块: kb=%s chunk_id=%s",
+ knowledge_base_id, chunk_id
+ )
+
+ async def delete_chunk(
+ self,
+ knowledge_base_id: str,
+ chunk_id: str,
+ ) -> None:
+ """删除指定分块
+
+ Args:
+ knowledge_base_id: 知识库 ID
+ chunk_id: 分块 ID
+ """
+ from app.module.rag.infra.vectorstore import delete_chunk_by_id
+
+ kb_repo = KnowledgeBaseRepository(self.db)
+ kb = await kb_repo.get_by_id(knowledge_base_id)
+ if not kb:
+ raise BusinessError(ErrorCodes.RAG_KNOWLEDGE_BASE_NOT_FOUND)
+
+ await asyncio.to_thread(
+ delete_chunk_by_id,
+ collection_name=str(kb.name),
+ chunk_id=chunk_id,
+ )
+
+ logger.info(
+ "成功删除分块: kb=%s chunk_id=%s",
+ knowledge_base_id, chunk_id
+ )