From 47d73256c4ddd427cc093858bbc3bd49ea4b78cb Mon Sep 17 00:00:00 2001 From: "glin1993@outlook.com" <> Date: Mon, 8 Dec 2025 15:42:55 +0800 Subject: [PATCH 1/4] fix: restore file_id fallback in parse_fine from info dict Recent changes to rebuild_from_source removed original_part, causing file_id to be lost during memory transfer. This resulted in null source_doc_id in logs. This fix restores file_id by falling back to the info dictionary. --- .../read_multi_modal/file_content_parser.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/memos/mem_reader/read_multi_modal/file_content_parser.py b/src/memos/mem_reader/read_multi_modal/file_content_parser.py index cce99e76..d20e3ac0 100644 --- a/src/memos/mem_reader/read_multi_modal/file_content_parser.py +++ b/src/memos/mem_reader/read_multi_modal/file_content_parser.py @@ -527,7 +527,18 @@ def parse_fine( session_id = info_.pop("session_id", "") if file_id: info_["file_id"] = file_id - file_ids = [file_id] if file_id else [] + + file_ids = [] + if file_id: + file_ids = [file_id] + elif info.get("file_id"): + file_ids = [info.get("file_id")] + info_["file_id"] = info.get("file_id") + elif info.get("file_ids"): + # Support retrieve from file_ids list + file_ids = info.get("file_ids") + if len(file_ids) > 0: + info_["file_id"] = file_ids[0] # For file content parts, default to LongTermMemory memory_type = "LongTermMemory" From d18197ead391d95ea18f84c10e731aaafd4d72bf Mon Sep 17 00:00:00 2001 From: "glin1993@outlook.com" <> Date: Mon, 8 Dec 2025 15:51:41 +0800 Subject: [PATCH 2/4] fix: pass file_ids from raw_node.metadata to info dict In _process_transfer_multi_modal_data, file_ids were not explicitly passed from raw_node.metadata to the info dictionary. This prevented file_content_parser from accessing the file_ids during memory transfer, leading to null source_doc_id in logs. This commit ensures file_ids are properly propagated. --- src/memos/mem_reader/multi_modal_struct.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/memos/mem_reader/multi_modal_struct.py b/src/memos/mem_reader/multi_modal_struct.py index ed139f95..ea5941b9 100644 --- a/src/memos/mem_reader/multi_modal_struct.py +++ b/src/memos/mem_reader/multi_modal_struct.py @@ -570,6 +570,9 @@ def _process_transfer_multi_modal_data( "session_id": raw_node.metadata.session_id, **(raw_node.metadata.info or {}), } + # Explicitly pass file_ids to preserve file association during transfer + if hasattr(raw_node.metadata, "file_ids") and raw_node.metadata.file_ids: + info["file_ids"] = raw_node.metadata.file_ids fine_memory_items = [] # Part A: call llm From a7d50733483f8f8773deab1b24aa5ed655aaddec Mon Sep 17 00:00:00 2001 From: "glin1993@outlook.com" <> Date: Mon, 8 Dec 2025 15:56:51 +0800 Subject: [PATCH 3/4] fix: robust file_id preservation in memory transfer 1. file_content_parser: Persist file_id in SourceMessage and restore it in rebuild_from_source to prevent data loss during object reconstruction. 2. multi_modal_struct: Fix indentation and ensure file_ids are propagated via info dict as a safety fallback. --- src/memos/mem_reader/multi_modal_struct.py | 2 +- .../read_multi_modal/file_content_parser.py | 18 ++++++++++++++---- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/src/memos/mem_reader/multi_modal_struct.py b/src/memos/mem_reader/multi_modal_struct.py index ea5941b9..219b24e5 100644 --- a/src/memos/mem_reader/multi_modal_struct.py +++ b/src/memos/mem_reader/multi_modal_struct.py @@ -572,7 +572,7 @@ def _process_transfer_multi_modal_data( } # Explicitly pass file_ids to preserve file association during transfer if hasattr(raw_node.metadata, "file_ids") and raw_node.metadata.file_ids: - info["file_ids"] = raw_node.metadata.file_ids + info["file_ids"] = raw_node.metadata.file_ids fine_memory_items = [] # Part A: call llm diff --git a/src/memos/mem_reader/read_multi_modal/file_content_parser.py b/src/memos/mem_reader/read_multi_modal/file_content_parser.py index d20e3ac0..81a04ee0 100644 --- a/src/memos/mem_reader/read_multi_modal/file_content_parser.py +++ b/src/memos/mem_reader/read_multi_modal/file_content_parser.py @@ -179,6 +179,11 @@ def create_source( "doc_path": file_info.get("filename") or file_info.get("file_id", ""), "content": chunk_content if chunk_content else file_info.get("file_data", ""), } + # Persist file_id in source_dict if available + file_id = file_info.get("file_id") + if file_id: + source_dict["file_id"] = file_id + # Add chunk ordering information if provided if chunk_index is not None: source_dict["chunk_index"] = chunk_index @@ -199,13 +204,18 @@ def rebuild_from_source( source: SourceMessage, ) -> File: """Rebuild file content part from SourceMessage.""" + file_data = { + "filename": source.doc_path or "", + "file_data": source.content or "", + } + # Restore file_id if present in source + if hasattr(source, "file_id") and source.file_id: + file_data["file_id"] = source.file_id + # Rebuild from source fields return { "type": "file", - "file": { - "filename": source.doc_path or "", - "file_data": source.content or "", - }, + "file": file_data, } def _parse_file(self, file_info: dict[str, Any]) -> str: From d34f336bdb2f88105bc497432e51c2a0aac16a55 Mon Sep 17 00:00:00 2001 From: "glin1993@outlook.com" <> Date: Mon, 8 Dec 2025 15:58:47 +0800 Subject: [PATCH 4/4] style: ruff format and lint check --- .../read_multi_modal/file_content_parser.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/memos/mem_reader/read_multi_modal/file_content_parser.py b/src/memos/mem_reader/read_multi_modal/file_content_parser.py index 81a04ee0..4561c10d 100644 --- a/src/memos/mem_reader/read_multi_modal/file_content_parser.py +++ b/src/memos/mem_reader/read_multi_modal/file_content_parser.py @@ -183,7 +183,7 @@ def create_source( file_id = file_info.get("file_id") if file_id: source_dict["file_id"] = file_id - + # Add chunk ordering information if provided if chunk_index is not None: source_dict["chunk_index"] = chunk_index @@ -537,7 +537,7 @@ def parse_fine( session_id = info_.pop("session_id", "") if file_id: info_["file_id"] = file_id - + file_ids = [] if file_id: file_ids = [file_id] @@ -545,10 +545,10 @@ def parse_fine( file_ids = [info.get("file_id")] info_["file_id"] = info.get("file_id") elif info.get("file_ids"): - # Support retrieve from file_ids list - file_ids = info.get("file_ids") - if len(file_ids) > 0: - info_["file_id"] = file_ids[0] + # Support retrieve from file_ids list + file_ids = info.get("file_ids") + if len(file_ids) > 0: + info_["file_id"] = file_ids[0] # For file content parts, default to LongTermMemory memory_type = "LongTermMemory"