diff --git a/frontend/src/components/DeleteConfirmModal.tsx b/frontend/src/components/DeleteConfirmModal.tsx index 87f5438d1..1a7a87790 100644 --- a/frontend/src/components/DeleteConfirmModal.tsx +++ b/frontend/src/components/DeleteConfirmModal.tsx @@ -57,6 +57,7 @@ export default function DeleteConfirmModal({ okType="danger" okText={t("components.deleteConfirm.confirm")} cancelText={t("components.deleteConfirm.cancel")} + centered >

{defaultMessage}

diff --git a/frontend/src/components/DetailHeader.tsx b/frontend/src/components/DetailHeader.tsx index fcf7480bb..cbd332619 100644 --- a/frontend/src/components/DetailHeader.tsx +++ b/frontend/src/components/DetailHeader.tsx @@ -1,6 +1,6 @@ import React, { useLayoutEffect, useEffect, useRef, useState, useCallback, useMemo } from "react"; import { Database } from "lucide-react"; -import { Card, Button, Tag, Tooltip, Popconfirm, Popover } from "antd"; +import { Card, Button, Tag, Tooltip, Modal } from "antd"; import type { ItemType } from "antd/es/menu/interface"; import AddTagPopover from "./AddTagPopover"; import ActionDropdown from "./ActionDropdown"; @@ -291,23 +291,30 @@ function DetailHeader({ ); } if (op.confirm) { + const showConfirmModal = () => { + Modal.confirm({ + title: op.confirm?.title, + content: op.confirm?.description, + okText: op.confirm?.okText, + okType: op.danger ? "danger" : "primary", + cancelText: op.confirm?.cancelText, + centered: true, + onOk: () => { + if (op.onClick) { + op.onClick(); + } else { + op?.confirm?.onConfirm?.(); + } + }, + }); + }; return ( - { - if (op.onClick) { - op.onClick() - } else { - op?.confirm?.onConfirm?.(); - } - }} - okType={op.danger ? "danger" : "primary"} - overlayStyle={{ zIndex: 9999 }} - > - + /> {totalPages === 0 ? 0 : currentPage} / {totalPages} + /> -
+
{currentChunks.map((chunk) => ( - -
-
-
-
-

{t("knowledgeBase.fileDetail.messages.chunkLabel")} {chunk.id}

- {/* 算子名:从 metadata.sliceOperator 显示 */} - {chunk.metadata?.sliceOperator && ( - - {chunk.metadata.sliceOperator} - - )} -
-
- {editingChunk === chunk.id ? ( - <> - - - - ) : ( - <> - - - - - )} -
-
-
- {editingChunk === chunk.id ? ( - setEditChunkContent(e.target.value)} - rows={3} - /> - ) : ( - chunk.text - )} -
- {/* 元数据展示,保持和召回结果风格一致 */} -
-
{t("knowledgeBase.fileDetail.modal.metadata")}:
-
-                    {typeof chunk.metadata === "string"
-                      ? chunk.metadata
-                      : JSON.stringify(chunk.metadata ?? {}, null, 2)}
-                  
-
- {/* 结构化元数据的快捷标签(若可用) */} -
- {chunk?.metadata?.position && {t("knowledgeBase.fileDetail.columns.position")}: {chunk.metadata.position}} - {chunk?.metadata?.tokens && Token: {chunk.metadata.tokens}} - {chunk?.metadata?.page && {t("knowledgeBase.fileDetail.columns.page")}: {chunk.metadata.page}} - {chunk?.metadata?.section && {t("knowledgeBase.fileDetail.columns.section")}: {chunk.metadata.section}} -
+ + {t("knowledgeBase.fileDetail.messages.chunkLabel")} {chunk.id} + {chunk.metadata?.sliceOperator && ( + + {chunk.metadata.sliceOperator} + + )} +
+ } + extra={ +
+ + + + + + + + +
+ } + style={{ wordBreak: "break-all" }} + > +
+ {chunk.text} +
+
+ metadata +
+                {typeof chunk.metadata === "string" ? chunk.metadata : JSON.stringify(chunk.metadata ?? {}, null, 2)}
+              
))} {!loading && currentChunks.length === 0 && ( - +
+ +
)}
@@ -235,29 +292,26 @@ const KnowledgeBaseFileDetail: React.FC = () => { items={[ { title: {t("knowledgeBase.fileDetail.breadcrumb.kbList")} }, { title: ({t("knowledgeBase.fileDetail.breadcrumb.kbDetail")}) }, - { title: fileName || `文件 ${ragFileId}` }, + { title: fileName || t("knowledgeBase.fileDetail.defaultFileName", { id: ragFileId }) }, ]} /> - {/* 头部统计使用最简占位,后续可扩展 */} , iconColor: "#a27e7e", - status: { label: t("knowledgeBase.fileDetail.messages.ready"), color: "default" }, - name: fileName || `文件 ${ragFileId}`, + name: fileName || t("knowledgeBase.fileDetail.defaultFileName", { id: ragFileId }), description: `${totalElements} ${t("knowledgeBase.fileDetail.messages.chunkCount", { count: 0 })}`, createdAt: "", lastUpdated: "", }} statistics={[]} - operations={[{ key: "download", label: t("knowledgeBase.fileDetail.actions.download"), icon: , onClick: () => {} }]} + operations={[]} /> {loading ?
: renderChunks()}
- {/* Slice Trace Modal */} setShowSliceTraceDialog(null)} @@ -266,7 +320,6 @@ const KnowledgeBaseFileDetail: React.FC = () => { width={800} destroyOnClose > - {/* 简化为内容占位,真实数据待后端提供更多字段 */}

{t("knowledgeBase.fileDetail.modal.sliceProcessTitle")}

@@ -286,7 +339,6 @@ const KnowledgeBaseFileDetail: React.FC = () => {
- {/* Chunk Detail Modal */} setChunkDetailModal(null)} @@ -317,29 +369,160 @@ const KnowledgeBaseFileDetail: React.FC = () => { key: "metadata", label: t("knowledgeBase.fileDetail.modal.metadata"), children: ( -
-
-
{t("knowledgeBase.fileDetail.modal.position")}
- c.id === chunkDetailModal)?.metadata?.position || ""} readOnly /> -
-
-
{t("knowledgeBase.fileDetail.modal.tokenCount")}
- c.id === chunkDetailModal)?.metadata?.tokens || ""} readOnly /> -
-
-
{t("knowledgeBase.fileDetail.modal.pageNumber")}
- c.id === chunkDetailModal)?.metadata?.page || ""} readOnly /> -
-
-
{t("knowledgeBase.fileDetail.modal.chapter")}
- c.id === chunkDetailModal)?.metadata?.section || ""} readOnly /> -
+
+ + {JSON.stringify( + currentChunks.find((c) => c.id === chunkDetailModal)?.metadata || {}, + null, + 2 + ) || "{}"} +
), }, ]} /> + + setDeleteConfirmModal(null)} + onOk={() => handleDeleteChunk(deleteConfirmModal!)} + title={t("knowledgeBase.fileDetail.modal.deleteConfirmTitle")} + okText={t("knowledgeBase.fileDetail.actions.confirm")} + cancelText={t("knowledgeBase.fileDetail.actions.cancel")} + okButtonProps={{ danger: true, loading: deleting }} + centered + > +

{t("knowledgeBase.fileDetail.modal.deleteConfirmMessage")}

+
+ + { + setEditingChunk(null); + setEditChunkContent(""); + setEditChunkMetadata(""); + setMetadataValid(true); + setMetadataError(null); + }} + footer={null} + title={ +
+ + {t("knowledgeBase.fileDetail.modal.editChunkTitle")} - {editingChunk} +
+ } + width={900} + destroyOnClose + > +
+
+
+ +
+ setEditChunkContent(e.target.value)} + rows={6} + placeholder={t("knowledgeBase.fileDetail.placeholders.chunkContent")} + className="font-mono" + style={{ + fontFamily: 'ui-monospace, SFMono-Regular, "SF Mono", Menlo, Consolas, monospace', + }} + /> +
+ +
+
+ +
+ {metadataValid ? ( + + + {t("knowledgeBase.fileDetail.messages.jsonValid")} + + ) : ( + + + {t("knowledgeBase.fileDetail.messages.jsonInvalid")} + + )} + +
+
+
+ handleMetadataChange(e.target.value)} + rows={10} + placeholder={t("knowledgeBase.fileDetail.placeholders.metadata")} + className="font-mono" + style={{ + fontFamily: 'ui-monospace, SFMono-Regular, "SF Mono", Menlo, Consolas, monospace', + borderColor: metadataValid ? '#d9d9d9' : '#ff4d4f', + }} + status={metadataValid ? undefined : 'error'} + /> + {metadataError && ( +
+ + {metadataError} +
+ )} +
+ {t("knowledgeBase.fileDetail.messages.metadataHint")} +
+
+
+ +
+ + +
+
+
); }; diff --git a/frontend/src/pages/KnowledgeBase/components/CreateKnowledgeBase.tsx b/frontend/src/pages/KnowledgeBase/components/CreateKnowledgeBase.tsx index 035fecf06..c476cec26 100644 --- a/frontend/src/pages/KnowledgeBase/components/CreateKnowledgeBase.tsx +++ b/frontend/src/pages/KnowledgeBase/components/CreateKnowledgeBase.tsx @@ -177,7 +177,14 @@ export default function CreateKnowledgeBase({ diff --git a/frontend/src/pages/KnowledgeBase/knowledge-base.api.ts b/frontend/src/pages/KnowledgeBase/knowledge-base.api.ts index 102520c82..1ac851737 100644 --- a/frontend/src/pages/KnowledgeBase/knowledge-base.api.ts +++ b/frontend/src/pages/KnowledgeBase/knowledge-base.api.ts @@ -87,3 +87,21 @@ export function queryKnowledgeBase(data: { }) { return post("/api/knowledge-base/query", data); } + +export function updateKnowledgeBaseChunk( + knowledgeBaseId: string, + chunkId: string, + data: { text: string; metadata?: Record } +) { + return (put as unknown as (url: string, data?: object) => Promise)( + `/api/knowledge-base/${knowledgeBaseId}/chunks/${chunkId}`, + data + ); +} + +export function deleteKnowledgeBaseChunk( + knowledgeBaseId: string, + chunkId: string +) { + return del(`/api/knowledge-base/${knowledgeBaseId}/chunks/${chunkId}`); +} diff --git a/frontend/vite.config.ts b/frontend/vite.config.ts index 4ff0c64fe..1c69395cb 100644 --- a/frontend/vite.config.ts +++ b/frontend/vite.config.ts @@ -49,9 +49,9 @@ export default defineConfig({ }; // Python 服务: rag, synthesis, annotation, evaluation, models - const pythonPaths = ["rag", "operators", "categories", "synthesis", "annotation", "knowledge-base", "data-collection", "evaluation", "models"]; + const pythonPaths = ["rag", "cleaning", "operators", "categories", "synthesis", "annotation", "knowledge-base", "data-collection", "evaluation", "models", "sys-param"]; // Java 服务: data-management, knowledge-base - const javaPaths = ["data-management", "sys-param"]; + const javaPaths = ["data-management"]; const proxy: Record = {}; // SSE 端点需要禁用缓冲 diff --git a/runtime/datamate-python/app/module/rag/infra/vectorstore/__init__.py b/runtime/datamate-python/app/module/rag/infra/vectorstore/__init__.py index 289fdb7aa..9d4101935 100644 --- a/runtime/datamate-python/app/module/rag/infra/vectorstore/__init__.py +++ b/runtime/datamate-python/app/module/rag/infra/vectorstore/__init__.py @@ -12,6 +12,8 @@ drop_collection, get_vector_dimension, rename_collection, + update_chunk_by_id, + delete_chunk_by_id, ) __all__ = [ @@ -23,4 +25,6 @@ "get_vector_dimension", "delete_chunks_by_rag_file_ids", "chunks_to_documents", + "update_chunk_by_id", + "delete_chunk_by_id", ] diff --git a/runtime/datamate-python/app/module/rag/infra/vectorstore/store.py b/runtime/datamate-python/app/module/rag/infra/vectorstore/store.py index dd70933e2..58ae56ba1 100644 --- a/runtime/datamate-python/app/module/rag/infra/vectorstore/store.py +++ b/runtime/datamate-python/app/module/rag/infra/vectorstore/store.py @@ -13,7 +13,6 @@ """ from __future__ import annotations -import json import logging from typing import List, Optional @@ -27,6 +26,47 @@ logger = logging.getLogger(__name__) +BATCH_DELETE_SIZE = 100 + + +def _delete_chunks_by_rag_file_id_batched(client, collection_name: str, rag_file_id: str) -> int: + """分批删除指定 rag_file_id 的所有 chunks + + Args: + client: Milvus 客户端 + collection_name: 集合名称 + rag_file_id: RAG 文件 ID + + Returns: + 删除的总数量 + """ + filter_expr = f'metadata["rag_file_id"] == "{rag_file_id}"' + total_deleted = 0 + + while True: + try: + results = client.query( + collection_name=collection_name, + filter=filter_expr, + output_fields=["id"], + limit=BATCH_DELETE_SIZE, + ) + if not results: + break + + chunk_ids = [r["id"] for r in results] + id_filter = ' || '.join([f'id == "{cid}"' for cid in chunk_ids]) + client.delete(collection_name=collection_name, filter=f"({id_filter})") + total_deleted += len(chunk_ids) + + if len(chunk_ids) < BATCH_DELETE_SIZE: + break + except Exception as e: + logger.warning("分批删除失败: collection=%s rag_file_id=%s error=%s", collection_name, rag_file_id, e) + break + + return total_deleted + def drop_collection(collection_name: str) -> None: """删除 Milvus 集合 @@ -201,12 +241,8 @@ def delete_chunks_by_rag_file_ids(collection_name: str, rag_file_ids: List[str]) client = get_milvus_client() for rid in rag_file_ids: - json_value = json.dumps({"rag_file_id": rid}) - filter_expr = f'JSON_CONTAINS(metadata, \'{json_value}\')' - try: - client.delete(collection_name=collection_name, filter=filter_expr) - except Exception as del_err: - logger.warning("删除分块时部分失败: collection=%s rag_file_id=%s: %s", collection_name, rid, del_err) + deleted = _delete_chunks_by_rag_file_id_batched(client, collection_name, rid) + logger.info("删除文件分块: collection=%s rag_file_id=%s deleted=%d", collection_name, rid, deleted) logger.info("已按 rag_file_id 删除集合 %s 中的分块: %s", collection_name, rag_file_ids) @@ -238,3 +274,111 @@ def chunks_to_documents( documents.append(doc) return documents, ids + + +def update_chunk_by_id( + collection_name: str, + chunk_id: str, + text: str, + metadata: Optional[dict] = None, + embedding_instance=None, +) -> None: + """更新指定 ID 的分块 + + Args: + collection_name: 集合名称 + chunk_id: 分块 ID + text: 新的文本内容 + metadata: 新的元数据(可选) + embedding_instance: Embeddings 实例 + """ + try: + client = get_milvus_client() + + filter_expr = f'id == "{chunk_id}"' + existing = client.query( + collection_name=collection_name, + filter=filter_expr, + output_fields=["metadata"], + ) + + if not existing: + raise BusinessError( + ErrorCodes.RAG_CHUNK_NOT_FOUND, + f"Chunk not found: {chunk_id}" + ) + + existing_metadata = existing[0].get("metadata", {}) + + if metadata is None: + metadata = existing_metadata + else: + # 确保保留原有的 rag_file_id 字段,防止用户修改时丢失 + if "rag_file_id" in existing_metadata and "rag_file_id" not in metadata: + metadata = {**metadata, "rag_file_id": existing_metadata["rag_file_id"]} + + if embedding_instance: + embedding = embedding_instance + else: + from app.module.rag.infra.embeddings import EmbeddingFactory + embedding = EmbeddingFactory.create_embeddings() + + vector = embedding.embed_query(text) + + client.delete(collection_name=collection_name, filter=filter_expr) + + client.insert( + collection_name=collection_name, + data=[{ + "id": chunk_id, + "text": text, + "metadata": metadata, + "vector": vector, + }] + ) + + logger.info("成功更新分块: collection=%s chunk_id=%s", collection_name, chunk_id) + + except BusinessError: + raise + except Exception as e: + logger.error("更新分块失败: %s", e) + raise BusinessError(ErrorCodes.RAG_MILVUS_ERROR, f"更新分块失败: {str(e)}") from e + + +def delete_chunk_by_id(collection_name: str, chunk_id: str) -> Optional[str]: + """删除指定 ID 的分块 + + Args: + collection_name: 集合名称 + chunk_id: 分块 ID + + Returns: + 被删除分块对应的 rag_file_id(如果存在),否则返回 None + """ + try: + client = get_milvus_client() + + filter_expr = f'id == "{chunk_id}"' + + # 先查询 chunk 的 metadata 获取 rag_file_id + existing = client.query( + collection_name=collection_name, + filter=filter_expr, + output_fields=["metadata"], + ) + + rag_file_id = None + if existing: + metadata = existing[0].get("metadata", {}) + rag_file_id = metadata.get("rag_file_id") + + client.delete(collection_name=collection_name, filter=filter_expr) + + logger.info("成功删除分块: collection=%s chunk_id=%s rag_file_id=%s", collection_name, chunk_id, rag_file_id) + + return rag_file_id + + except Exception as e: + logger.error("删除分块失败: %s", e) + raise BusinessError(ErrorCodes.RAG_MILVUS_ERROR, f"删除分块失败: {str(e)}") from e diff --git a/runtime/datamate-python/app/module/rag/interface/knowledge_base.py b/runtime/datamate-python/app/module/rag/interface/knowledge_base.py index 70f8d4b60..2b212f605 100644 --- a/runtime/datamate-python/app/module/rag/interface/knowledge_base.py +++ b/runtime/datamate-python/app/module/rag/interface/knowledge_base.py @@ -21,6 +21,7 @@ RetrieveReq, PagingQuery, QueryRequest, + ChunkUpdateReq, ) from app.module.rag.service.knowledge_base_service import KnowledgeBaseService from app.module.rag.service.unified_retrieval_service import UnifiedRetrievalService @@ -151,6 +152,39 @@ async def get_file_chunks( return SuccessResponse(data=result) +@router.put("/{knowledge_base_id}/chunks/{chunk_id}", response_model=SuccessResponse) +async def update_chunk( + knowledge_base_id: str, + chunk_id: str, + request: ChunkUpdateReq, + db: AsyncSession = Depends(get_db), +): + """更新指定分块的文本和元数据""" + service = KnowledgeBaseService(db) + await service.update_chunk( + knowledge_base_id=knowledge_base_id, + chunk_id=chunk_id, + text=request.text, + metadata=request.metadata, + ) + return SuccessResponse(message="分块更新成功") + + +@router.delete("/{knowledge_base_id}/chunks/{chunk_id}", response_model=SuccessResponse) +async def delete_chunk( + knowledge_base_id: str, + chunk_id: str, + db: AsyncSession = Depends(get_db), +): + """删除指定分块""" + service = KnowledgeBaseService(db) + await service.delete_chunk( + knowledge_base_id=knowledge_base_id, + chunk_id=chunk_id, + ) + return SuccessResponse(message="分块删除成功") + + @router.post("/retrieve", response_model=SuccessResponse) async def retrieve_knowledge_base( request: RetrieveReq, diff --git a/runtime/datamate-python/app/module/rag/schema/request.py b/runtime/datamate-python/app/module/rag/schema/request.py index 262fb785c..4463339cc 100644 --- a/runtime/datamate-python/app/module/rag/schema/request.py +++ b/runtime/datamate-python/app/module/rag/schema/request.py @@ -342,3 +342,21 @@ class Config: "query": "什么是机器学习?" } } + + +class ChunkUpdateReq(BaseModel): + """Chunk更新请求""" + text: str = Field(..., min_length=1, description="分块文本内容") + metadata: Optional[dict] = Field(default=None, description="元数据") + + class Config: + json_schema_extra = { + "example": { + "text": "这是修改后的分块内容...", + "metadata": { + "fileName": "document.pdf", + "chunkIndex": 0, + "customField": "custom value" + } + } + } diff --git a/runtime/datamate-python/app/module/rag/service/knowledge_base_service.py b/runtime/datamate-python/app/module/rag/service/knowledge_base_service.py index 41105c89d..99027ce67 100644 --- a/runtime/datamate-python/app/module/rag/service/knowledge_base_service.py +++ b/runtime/datamate-python/app/module/rag/service/knowledge_base_service.py @@ -16,7 +16,14 @@ from app.db.models.dataset_management import DatasetFiles from app.db.models.knowledge_gen import KnowledgeBase, RagFile, FileStatus, RagType from app.db.models.models import Models -from app.module.rag.infra.vectorstore import drop_collection, rename_collection, delete_chunks_by_rag_file_ids +from app.module.rag.infra.embeddings import EmbeddingFactory +from app.module.rag.infra.vectorstore import ( + drop_collection, + rename_collection, + delete_chunks_by_rag_file_ids, + update_chunk_by_id, + delete_chunk_by_id, +) from app.module.rag.repository import KnowledgeBaseRepository, RagFileRepository from app.module.rag.schema.request import ( KnowledgeBaseCreateReq, @@ -388,3 +395,94 @@ async def delete_files(self, knowledge_base_id: str, request: DeleteFilesReq) -> await self.db.commit() logger.info("成功删除 %d 个文件", len(rag_files)) + + async def update_chunk( + self, + knowledge_base_id: str, + chunk_id: str, + text: str, + metadata: dict = None, + ) -> None: + """更新指定分块的文本和元数据 + + Args: + knowledge_base_id: 知识库 ID + chunk_id: 分块 ID + text: 新的文本内容 + metadata: 新的元数据(可选) + """ + knowledge_base = await self.kb_repo.get_by_id(knowledge_base_id) + if not knowledge_base: + raise BusinessError(ErrorCodes.RAG_KNOWLEDGE_BASE_NOT_FOUND) + + if knowledge_base.type != RagType.DOCUMENT.value: + raise BusinessError( + ErrorCodes.RAG_INVALID_REQUEST, + f"知识库类型 {knowledge_base.type} 不支持分块更新" + ) + + from app.module.system.service.common_service import get_model_by_id + import asyncio + + embedding_entity = await get_model_by_id(self.db, knowledge_base.embedding_model) + if not embedding_entity: + raise BusinessError(ErrorCodes.RAG_MODEL_NOT_FOUND) + + embedding = EmbeddingFactory.create_embeddings( + model_name=str(embedding_entity.model_name), + base_url=getattr(embedding_entity, "base_url", None), + api_key=getattr(embedding_entity, "api_key", None), + ) + + await asyncio.to_thread( + update_chunk_by_id, + collection_name=str(knowledge_base.name), + chunk_id=chunk_id, + text=text, + metadata=metadata, + embedding_instance=embedding, + ) + + logger.info( + "成功更新分块: kb=%s chunk_id=%s", + knowledge_base_id, chunk_id + ) + + async def delete_chunk( + self, + knowledge_base_id: str, + chunk_id: str, + ) -> None: + """删除指定分块 + + Args: + knowledge_base_id: 知识库 ID + chunk_id: 分块 ID + """ + knowledge_base = await self.kb_repo.get_by_id(knowledge_base_id) + if not knowledge_base: + raise BusinessError(ErrorCodes.RAG_KNOWLEDGE_BASE_NOT_FOUND) + + if knowledge_base.type != RagType.DOCUMENT.value: + raise BusinessError( + ErrorCodes.RAG_INVALID_REQUEST, + f"知识库类型 {knowledge_base.type} 不支持分块删除" + ) + + import asyncio + rag_file_id = await asyncio.to_thread( + delete_chunk_by_id, + collection_name=str(knowledge_base.name), + chunk_id=chunk_id, + ) + + if rag_file_id: + rag_file = await self.file_repo.get_by_id(rag_file_id) + if rag_file and rag_file.chunk_count and rag_file.chunk_count > 0: + rag_file.chunk_count = rag_file.chunk_count - 1 + await self.db.commit() + + logger.info( + "成功删除分块: kb=%s chunk_id=%s", + knowledge_base_id, chunk_id + ) diff --git a/runtime/datamate-python/app/module/rag/service/strategy/vector_strategy.py b/runtime/datamate-python/app/module/rag/service/strategy/vector_strategy.py index 5570f51a7..9db26c2bd 100644 --- a/runtime/datamate-python/app/module/rag/service/strategy/vector_strategy.py +++ b/runtime/datamate-python/app/module/rag/service/strategy/vector_strategy.py @@ -395,3 +395,78 @@ def _filter_and_clean_chunks(self, chunks: list) -> list: logger.info("有效分块数量: %d / %d", len(valid_chunks), len(chunks)) return valid_chunks + + async def update_chunk( + self, + knowledge_base_id: str, + chunk_id: str, + text: str, + metadata: Optional[dict] = None, + ) -> None: + """更新指定分块的文本和元数据 + + Args: + knowledge_base_id: 知识库 ID + chunk_id: 分块 ID + text: 新的文本内容 + metadata: 新的元数据(可选) + """ + from app.module.rag.infra.vectorstore import update_chunk_by_id + + kb_repo = KnowledgeBaseRepository(self.db) + kb = await kb_repo.get_by_id(knowledge_base_id) + if not kb: + raise BusinessError(ErrorCodes.RAG_KNOWLEDGE_BASE_NOT_FOUND) + + embedding_entity = await get_model_by_id(self.db, kb.embedding_model) + if not embedding_entity: + raise BusinessError(ErrorCodes.RAG_MODEL_NOT_FOUND) + + embedding = EmbeddingFactory.create_embeddings( + model_name=str(embedding_entity.model_name), + base_url=getattr(embedding_entity, "base_url", None), + api_key=getattr(embedding_entity, "api_key", None), + ) + + await asyncio.to_thread( + update_chunk_by_id, + collection_name=str(kb.name), + chunk_id=chunk_id, + text=text, + metadata=metadata, + embedding_instance=embedding, + ) + + logger.info( + "成功更新分块: kb=%s chunk_id=%s", + knowledge_base_id, chunk_id + ) + + async def delete_chunk( + self, + knowledge_base_id: str, + chunk_id: str, + ) -> None: + """删除指定分块 + + Args: + knowledge_base_id: 知识库 ID + chunk_id: 分块 ID + """ + from app.module.rag.infra.vectorstore import delete_chunk_by_id + + kb_repo = KnowledgeBaseRepository(self.db) + kb = await kb_repo.get_by_id(knowledge_base_id) + if not kb: + raise BusinessError(ErrorCodes.RAG_KNOWLEDGE_BASE_NOT_FOUND) + + await asyncio.to_thread( + delete_chunk_by_id, + collection_name=str(kb.name), + chunk_id=chunk_id, + ) + + logger.info( + "成功删除分块: kb=%s chunk_id=%s", + knowledge_base_id, chunk_id + )