From 47d73256c4ddd427cc093858bbc3bd49ea4b78cb Mon Sep 17 00:00:00 2001
From: "glin1993@outlook.com" <>
Date: Mon, 8 Dec 2025 15:42:55 +0800
Subject: [PATCH 1/4] fix: restore file_id fallback in parse_fine from info
 dict

Recent changes to rebuild_from_source removed original_part, causing file_id to be lost during memory transfer. This resulted in null source_doc_id in logs. This fix restores file_id by falling back to the info dictionary.
---
 .../read_multi_modal/file_content_parser.py         | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/src/memos/mem_reader/read_multi_modal/file_content_parser.py b/src/memos/mem_reader/read_multi_modal/file_content_parser.py
index cce99e76..d20e3ac0 100644
--- a/src/memos/mem_reader/read_multi_modal/file_content_parser.py
+++ b/src/memos/mem_reader/read_multi_modal/file_content_parser.py
@@ -527,7 +527,18 @@ def parse_fine(
         session_id = info_.pop("session_id", "")
         if file_id:
             info_["file_id"] = file_id
-        file_ids = [file_id] if file_id else []
+        
+        file_ids = []
+        if file_id:
+            file_ids = [file_id]
+        elif info.get("file_id"):
+            file_ids = [info.get("file_id")]
+            info_["file_id"] = info.get("file_id")
+        elif info.get("file_ids"):
+             # Support retrieve from file_ids list
+             file_ids = info.get("file_ids")
+             if len(file_ids) > 0:
+                 info_["file_id"] = file_ids[0]
         # For file content parts, default to LongTermMemory
         memory_type = "LongTermMemory"
 

From d18197ead391d95ea18f84c10e731aaafd4d72bf Mon Sep 17 00:00:00 2001
From: "glin1993@outlook.com" <>
Date: Mon, 8 Dec 2025 15:51:41 +0800
Subject: [PATCH 2/4] fix: pass file_ids from raw_node.metadata to info dict

In _process_transfer_multi_modal_data, file_ids were not explicitly passed from raw_node.metadata to the info dictionary. This prevented file_content_parser from accessing the file_ids during memory transfer, leading to null source_doc_id in logs. This commit ensures file_ids are properly propagated.
---
 src/memos/mem_reader/multi_modal_struct.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/memos/mem_reader/multi_modal_struct.py b/src/memos/mem_reader/multi_modal_struct.py
index ed139f95..ea5941b9 100644
--- a/src/memos/mem_reader/multi_modal_struct.py
+++ b/src/memos/mem_reader/multi_modal_struct.py
@@ -570,6 +570,9 @@ def _process_transfer_multi_modal_data(
             "session_id": raw_node.metadata.session_id,
             **(raw_node.metadata.info or {}),
         }
+        # Explicitly pass file_ids to preserve file association during transfer
+        if hasattr(raw_node.metadata, "file_ids") and raw_node.metadata.file_ids:
+             info["file_ids"] = raw_node.metadata.file_ids
 
         fine_memory_items = []
         # Part A: call llm

From a7d50733483f8f8773deab1b24aa5ed655aaddec Mon Sep 17 00:00:00 2001
From: "glin1993@outlook.com" <>
Date: Mon, 8 Dec 2025 15:56:51 +0800
Subject: [PATCH 3/4] fix: robust file_id preservation in memory transfer

1. file_content_parser: Persist file_id in SourceMessage and restore it in rebuild_from_source to prevent data loss during object reconstruction.
2. multi_modal_struct: Fix indentation and ensure file_ids are propagated via info dict as a safety fallback.
---
 src/memos/mem_reader/multi_modal_struct.py     |  2 +-
 .../read_multi_modal/file_content_parser.py    | 18 ++++++++++++++----
 2 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/src/memos/mem_reader/multi_modal_struct.py b/src/memos/mem_reader/multi_modal_struct.py
index ea5941b9..219b24e5 100644
--- a/src/memos/mem_reader/multi_modal_struct.py
+++ b/src/memos/mem_reader/multi_modal_struct.py
@@ -572,7 +572,7 @@ def _process_transfer_multi_modal_data(
         }
         # Explicitly pass file_ids to preserve file association during transfer
         if hasattr(raw_node.metadata, "file_ids") and raw_node.metadata.file_ids:
-             info["file_ids"] = raw_node.metadata.file_ids
+            info["file_ids"] = raw_node.metadata.file_ids
 
         fine_memory_items = []
         # Part A: call llm
diff --git a/src/memos/mem_reader/read_multi_modal/file_content_parser.py b/src/memos/mem_reader/read_multi_modal/file_content_parser.py
index d20e3ac0..81a04ee0 100644
--- a/src/memos/mem_reader/read_multi_modal/file_content_parser.py
+++ b/src/memos/mem_reader/read_multi_modal/file_content_parser.py
@@ -179,6 +179,11 @@ def create_source(
                 "doc_path": file_info.get("filename") or file_info.get("file_id", ""),
                 "content": chunk_content if chunk_content else file_info.get("file_data", ""),
             }
+            # Persist file_id in source_dict if available
+            file_id = file_info.get("file_id")
+            if file_id:
+                source_dict["file_id"] = file_id
+            
             # Add chunk ordering information if provided
             if chunk_index is not None:
                 source_dict["chunk_index"] = chunk_index
@@ -199,13 +204,18 @@ def rebuild_from_source(
         source: SourceMessage,
     ) -> File:
         """Rebuild file content part from SourceMessage."""
+        file_data = {
+            "filename": source.doc_path or "",
+            "file_data": source.content or "",
+        }
+        # Restore file_id if present in source
+        if hasattr(source, "file_id") and source.file_id:
+            file_data["file_id"] = source.file_id
+
         # Rebuild from source fields
         return {
             "type": "file",
-            "file": {
-                "filename": source.doc_path or "",
-                "file_data": source.content or "",
-            },
+            "file": file_data,
         }
 
     def _parse_file(self, file_info: dict[str, Any]) -> str:

From d34f336bdb2f88105bc497432e51c2a0aac16a55 Mon Sep 17 00:00:00 2001
From: "glin1993@outlook.com" <>
Date: Mon, 8 Dec 2025 15:58:47 +0800
Subject: [PATCH 4/4] style: ruff format and lint check

---
 .../read_multi_modal/file_content_parser.py          | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/memos/mem_reader/read_multi_modal/file_content_parser.py b/src/memos/mem_reader/read_multi_modal/file_content_parser.py
index 81a04ee0..4561c10d 100644
--- a/src/memos/mem_reader/read_multi_modal/file_content_parser.py
+++ b/src/memos/mem_reader/read_multi_modal/file_content_parser.py
@@ -183,7 +183,7 @@ def create_source(
             file_id = file_info.get("file_id")
             if file_id:
                 source_dict["file_id"] = file_id
-            
+
             # Add chunk ordering information if provided
             if chunk_index is not None:
                 source_dict["chunk_index"] = chunk_index
@@ -537,7 +537,7 @@ def parse_fine(
         session_id = info_.pop("session_id", "")
         if file_id:
             info_["file_id"] = file_id
-        
+
         file_ids = []
         if file_id:
             file_ids = [file_id]
@@ -545,10 +545,10 @@ def parse_fine(
             file_ids = [info.get("file_id")]
             info_["file_id"] = info.get("file_id")
         elif info.get("file_ids"):
-             # Support retrieve from file_ids list
-             file_ids = info.get("file_ids")
-             if len(file_ids) > 0:
-                 info_["file_id"] = file_ids[0]
+            # Support retrieve from file_ids list
+            file_ids = info.get("file_ids")
+            if len(file_ids) > 0:
+                info_["file_id"] = file_ids[0]
         # For file content parts, default to LongTermMemory
         memory_type = "LongTermMemory"