diff --git a/src/memos/mem_reader/multi_modal_struct.py b/src/memos/mem_reader/multi_modal_struct.py index ed139f95..219b24e5 100644 --- a/src/memos/mem_reader/multi_modal_struct.py +++ b/src/memos/mem_reader/multi_modal_struct.py @@ -570,6 +570,9 @@ def _process_transfer_multi_modal_data( "session_id": raw_node.metadata.session_id, **(raw_node.metadata.info or {}), } + # Explicitly pass file_ids to preserve file association during transfer + if hasattr(raw_node.metadata, "file_ids") and raw_node.metadata.file_ids: + info["file_ids"] = raw_node.metadata.file_ids fine_memory_items = [] # Part A: call llm diff --git a/src/memos/mem_reader/read_multi_modal/file_content_parser.py b/src/memos/mem_reader/read_multi_modal/file_content_parser.py index cce99e76..4561c10d 100644 --- a/src/memos/mem_reader/read_multi_modal/file_content_parser.py +++ b/src/memos/mem_reader/read_multi_modal/file_content_parser.py @@ -179,6 +179,11 @@ def create_source( "doc_path": file_info.get("filename") or file_info.get("file_id", ""), "content": chunk_content if chunk_content else file_info.get("file_data", ""), } + # Persist file_id in source_dict if available + file_id = file_info.get("file_id") + if file_id: + source_dict["file_id"] = file_id + # Add chunk ordering information if provided if chunk_index is not None: source_dict["chunk_index"] = chunk_index @@ -199,13 +204,18 @@ def rebuild_from_source( source: SourceMessage, ) -> File: """Rebuild file content part from SourceMessage.""" + file_data = { + "filename": source.doc_path or "", + "file_data": source.content or "", + } + # Restore file_id if present in source + if hasattr(source, "file_id") and source.file_id: + file_data["file_id"] = source.file_id + # Rebuild from source fields return { "type": "file", - "file": { - "filename": source.doc_path or "", - "file_data": source.content or "", - }, + "file": file_data, } def _parse_file(self, file_info: dict[str, Any]) -> str: @@ -527,7 +537,18 @@ def parse_fine( session_id = info_.pop("session_id", "") if file_id: info_["file_id"] = file_id - file_ids = [file_id] if file_id else [] + + file_ids = [] + if file_id: + file_ids = [file_id] + elif info.get("file_id"): + file_ids = [info.get("file_id")] + info_["file_id"] = info.get("file_id") + elif info.get("file_ids"): + # Support retrieve from file_ids list + file_ids = info.get("file_ids") + if len(file_ids) > 0: + info_["file_id"] = file_ids[0] # For file content parts, default to LongTermMemory memory_type = "LongTermMemory"