From becbd94011d0fdc37543a6b096f959ef0025f579 Mon Sep 17 00:00:00 2001
From: hassineabd <contact.abdelkaderhassine@gmail.com>
Date: Tue, 30 Dec 2025 13:18:56 +0100
Subject: [PATCH 1/5] added logging reason for choosing a specific tool

---
 Agent/tools/mobile/click_element.py | 10 ++++++++--
 Agent/tools/mobile/go_back.py       |  9 ++++++++-
 Agent/tools/mobile/hide_keyboard.py |  9 ++++++++-
 Agent/tools/mobile/input_text.py    | 20 +++++++++++++-------
 Agent/tools/mobile/long_press.py    |  6 ++++++
 Agent/tools/mobile/scroll_down.py   | 11 +++++++++--
 Agent/tools/mobile/swipe_left.py    | 11 +++++++++--
 Agent/tools/mobile/swipe_right.py   | 11 +++++++++--
 Agent/tools/mobile/swipe_up.py      | 11 +++++++++--
 9 files changed, 79 insertions(+), 19 deletions(-)

diff --git a/Agent/tools/mobile/click_element.py b/Agent/tools/mobile/click_element.py
index c5048f3..6cf4feb 100644
--- a/Agent/tools/mobile/click_element.py
+++ b/Agent/tools/mobile/click_element.py
@@ -15,11 +15,11 @@ class ClickElementTool(BaseTool):
     
     @property
     def name(self) -> str:
-        return "tap_element"
+        return "click_element"
     
     @property
     def description(self) -> str:
-        return "Tap/click element by INDEX. DO NOT use for text input - use input_text instead."
+        return "CLICK/TAP on ANY visible element (buttons, links, suggestions, icons, list items). USE THIS for all clicking actions."
     
     @property
     def category(self) -> ToolCategory:
@@ -45,6 +45,10 @@ def get_parameters_schema(self) -> Dict[str, Any]:
                     "type": "integer",
                     "description": "The index number of the element from the UI elements list (1-based)",
                     "minimum": 1
+                },
+                "reasoning": {
+                    "type": "string",
+                    "description": "Brief explanation (1 sentence) of WHY you chose this element and action"
                 }
             },
             "required": ["element_index"]
@@ -57,6 +61,7 @@ def execute(
         context: Dict[str, Any]
     ) -> None:
         element_index = arguments["element_index"]
+        reasoning = arguments.get("reasoning", "No reasoning provided")
         ui_candidates = context.get("ui_candidates", [])
         
         if element_index < 1 or element_index > len(ui_candidates):
@@ -67,6 +72,7 @@ def execute(
         element = ui_candidates[element_index - 1]
         x, y = get_element_center(element)
         
+        logger.info(f"🧠 AI reasoning: {reasoning}")
         logger.debug(f"Tapping at ({x}, {y}) for element: {element.get('text', '')}")
         executor.run_keyword("Tap", [x, y])
 
diff --git a/Agent/tools/mobile/go_back.py b/Agent/tools/mobile/go_back.py
index 1a81e1e..90f2538 100644
--- a/Agent/tools/mobile/go_back.py
+++ b/Agent/tools/mobile/go_back.py
@@ -32,7 +32,12 @@ def works_on_coordinates(self) -> bool:
     def get_parameters_schema(self) -> Dict[str, Any]:
         return {
             "type": "object",
-            "properties": {},
+            "properties": {
+                "reasoning": {
+                    "type": "string",
+                    "description": "Brief explanation (1 sentence) of WHY you chose this action"
+                }
+            },
             "required": []
         }
     
@@ -42,5 +47,7 @@ def execute(
         arguments: Dict[str, Any], 
         context: Dict[str, Any]
     ) -> None:
+        reasoning = arguments.get("reasoning", "No reasoning provided")
+        logger.info(f"🧠 AI reasoning: {reasoning}")
         executor.run_keyword("Go Back")
 
diff --git a/Agent/tools/mobile/hide_keyboard.py b/Agent/tools/mobile/hide_keyboard.py
index fffc99c..4575e91 100644
--- a/Agent/tools/mobile/hide_keyboard.py
+++ b/Agent/tools/mobile/hide_keyboard.py
@@ -33,7 +33,12 @@ def works_on_coordinates(self) -> bool:
     def get_parameters_schema(self) -> Dict[str, Any]:
         return {
             "type": "object",
-            "properties": {},
+            "properties": {
+                "reasoning": {
+                    "type": "string",
+                    "description": "Brief explanation (1 sentence) of WHY you chose this action"
+                }
+            },
             "required": []
         }
     
@@ -43,6 +48,8 @@ def execute(
         arguments: Dict[str, Any], 
         context: Dict[str, Any]
     ) -> None:
+        reasoning = arguments.get("reasoning", "No reasoning provided")
+        logger.info(f"🧠 AI reasoning: {reasoning}")
         # Hide Keyboard without arguments for iOS/Android compatibility
         executor.run_keyword("Hide Keyboard")
 
diff --git a/Agent/tools/mobile/input_text.py b/Agent/tools/mobile/input_text.py
index c6d9679..caad5ac 100644
--- a/Agent/tools/mobile/input_text.py
+++ b/Agent/tools/mobile/input_text.py
@@ -13,7 +13,7 @@ def name(self) -> str:
     
     @property
     def description(self) -> str:
-        return "USE THIS when instruction contains 'input', 'type', 'enter', 'fill', 'write', 'saisir', 'taper' or mentions entering text. Types text into a text field."
+        return "USE THIS ONLY when instruction explicitly mentions entering TEXT: 'input', 'type', 'enter', 'fill', 'write', 'saisir', 'taper'. Types text into a text field. DO NOT use this tool to click or tap - use tap_element for that."
     
     @property
     def category(self) -> ToolCategory:
@@ -39,6 +39,10 @@ def get_parameters_schema(self) -> Dict[str, Any]:
                 "text": {
                     "type": "string",
                     "description": "The text to input into the element"
+                },
+                "reasoning": {
+                    "type": "string",
+                    "description": "Brief explanation (1 sentence) of WHY you chose this element and action"
                 }
             },
             "required": ["element_index", "text"]
@@ -50,21 +54,23 @@ def execute(
         arguments: Dict[str, Any], 
         context: Dict[str, Any]
     ) -> None:
-        element_index = arguments["element_index"]
-        text = arguments["text"]
+        element_index = arguments.get("element_index")
+        text = arguments.get("text")
+        reasoning = arguments.get("reasoning", "No reasoning provided")
         ui_candidates = context.get("ui_candidates", [])
         
-        if element_index < 1 or element_index > len(ui_candidates):
+        if not text:
+            raise AssertionError("'input_text' requires text argument. Use tap_element to click without entering text.")
+        
+        if element_index is None or element_index < 1 or element_index > len(ui_candidates):
             raise AssertionError(
                 f"Invalid element_index: {element_index}. Must be 1-{len(ui_candidates)}"
             )
         
-        if not text:
-            raise AssertionError("'input_text' requires text argument")
-        
         element = ui_candidates[element_index - 1]
         x, y = get_element_center(element)
         
+        logger.info(f"🧠 AI reasoning: {reasoning}")
         logger.debug(f"Tapping at ({x}, {y}) to focus, then input: '{text}'")
         executor.run_keyword("Tap", [x, y])
         executor.run_keyword("Sleep", "1s")
diff --git a/Agent/tools/mobile/long_press.py b/Agent/tools/mobile/long_press.py
index d3bb67c..5478635 100644
--- a/Agent/tools/mobile/long_press.py
+++ b/Agent/tools/mobile/long_press.py
@@ -35,6 +35,10 @@ def get_parameters_schema(self) -> Dict[str, Any]:
                     "type": "integer",
                     "description": "The index number of the element from the UI elements list (1-based)",
                     "minimum": 1
+                },
+                "reasoning": {
+                    "type": "string",
+                    "description": "Brief explanation (1 sentence) of WHY you chose this element and action"
                 }
             },
             "required": ["element_index"]
@@ -47,6 +51,7 @@ def execute(
         context: Dict[str, Any]
     ) -> None:
         element_index = arguments["element_index"]
+        reasoning = arguments.get("reasoning", "No reasoning provided")
         ui_candidates = context.get("ui_candidates", [])
         
         if element_index < 1 or element_index > len(ui_candidates):
@@ -57,6 +62,7 @@ def execute(
         element = ui_candidates[element_index - 1]
         x, y = get_element_center(element)
         
+        logger.info(f"🧠 AI reasoning: {reasoning}")
         logger.debug(f"Long pressing at ({x}, {y}) for 2s")
         executor.run_keyword("Tap", [x, y], 1, "2s")
 
diff --git a/Agent/tools/mobile/scroll_down.py b/Agent/tools/mobile/scroll_down.py
index 618b67d..0829c01 100644
--- a/Agent/tools/mobile/scroll_down.py
+++ b/Agent/tools/mobile/scroll_down.py
@@ -12,7 +12,7 @@ def name(self) -> str:
     
     @property
     def description(self) -> str:
-        return "Scroll down the mobile screen"
+        return "NAVIGATION ONLY: Scroll content DOWN to reveal elements BELOW. NOT for clicking visible elements - use tap_element to click."
     
     @property
     def category(self) -> ToolCategory:
@@ -29,7 +29,12 @@ def works_on_coordinates(self) -> bool:
     def get_parameters_schema(self) -> Dict[str, Any]:
         return {
             "type": "object",
-            "properties": {},
+            "properties": {
+                "reasoning": {
+                    "type": "string",
+                    "description": "Brief explanation (1 sentence) of WHY you chose this action"
+                }
+            },
             "required": []
         }
     
@@ -39,5 +44,7 @@ def execute(
         arguments: Dict[str, Any], 
         context: Dict[str, Any]
     ) -> None:
+        reasoning = arguments.get("reasoning", "No reasoning provided")
+        logger.info(f"🧠 AI reasoning: {reasoning}")
         executor.run_keyword("Swipe By Percent", 50, 80, 50, 20, "1s")
 
diff --git a/Agent/tools/mobile/swipe_left.py b/Agent/tools/mobile/swipe_left.py
index 0ecb460..1931173 100644
--- a/Agent/tools/mobile/swipe_left.py
+++ b/Agent/tools/mobile/swipe_left.py
@@ -15,7 +15,7 @@ def name(self) -> str:
     
     @property
     def description(self) -> str:
-        return "Swipe left on the mobile screen (for carousel, tabs, horizontal scrolling)"
+        return "USE THIS ONLY for horizontal navigation: carousels, image galleries, tabs. Do NOT use to click on visible elements - use tap_element instead."
     
     @property
     def category(self) -> ToolCategory:
@@ -32,7 +32,12 @@ def works_on_coordinates(self) -> bool:
     def get_parameters_schema(self) -> Dict[str, Any]:
         return {
             "type": "object",
-            "properties": {},
+            "properties": {
+                "reasoning": {
+                    "type": "string",
+                    "description": "Brief explanation (1 sentence) of WHY you chose this action"
+                }
+            },
             "required": []
         }
     
@@ -42,6 +47,8 @@ def execute(
         arguments: Dict[str, Any], 
         context: Dict[str, Any]
     ) -> None:
+        reasoning = arguments.get("reasoning", "No reasoning provided")
+        logger.info(f"🧠 AI reasoning: {reasoning}")
         # Swipe from right (80%) to left (20%) horizontally, middle of screen vertically
         executor.run_keyword("Swipe By Percent", 80, 50, 20, 50, "1s")
 
diff --git a/Agent/tools/mobile/swipe_right.py b/Agent/tools/mobile/swipe_right.py
index 8692619..c5a63b1 100644
--- a/Agent/tools/mobile/swipe_right.py
+++ b/Agent/tools/mobile/swipe_right.py
@@ -15,7 +15,7 @@ def name(self) -> str:
     
     @property
     def description(self) -> str:
-        return "Swipe right on the mobile screen (for carousel, tabs, horizontal scrolling)"
+        return "USE THIS ONLY for horizontal navigation: carousels, image galleries, tabs. Do NOT use to click on visible elements - use tap_element instead."
     
     @property
     def category(self) -> ToolCategory:
@@ -32,7 +32,12 @@ def works_on_coordinates(self) -> bool:
     def get_parameters_schema(self) -> Dict[str, Any]:
         return {
             "type": "object",
-            "properties": {},
+            "properties": {
+                "reasoning": {
+                    "type": "string",
+                    "description": "Brief explanation (1 sentence) of WHY you chose this action"
+                }
+            },
             "required": []
         }
     
@@ -42,6 +47,8 @@ def execute(
         arguments: Dict[str, Any], 
         context: Dict[str, Any]
     ) -> None:
+        reasoning = arguments.get("reasoning", "No reasoning provided")
+        logger.info(f"🧠 AI reasoning: {reasoning}")
         # Swipe from left (20%) to right (80%) horizontally, middle of screen vertically
         executor.run_keyword("Swipe By Percent", 20, 50, 80, 50, "1s")
 
diff --git a/Agent/tools/mobile/swipe_up.py b/Agent/tools/mobile/swipe_up.py
index 95df12b..7d40e6c 100644
--- a/Agent/tools/mobile/swipe_up.py
+++ b/Agent/tools/mobile/swipe_up.py
@@ -12,7 +12,7 @@ def name(self) -> str:
     
     @property
     def description(self) -> str:
-        return "Scroll content UP (reveal content above)"
+        return "NAVIGATION ONLY: Scroll content UP to reveal elements ABOVE. NOT for clicking visible elements - use tap_element to click."
     
     @property
     def category(self) -> ToolCategory:
@@ -29,7 +29,12 @@ def works_on_coordinates(self) -> bool:
     def get_parameters_schema(self) -> Dict[str, Any]:
         return {
             "type": "object",
-            "properties": {},
+            "properties": {
+                "reasoning": {
+                    "type": "string",
+                    "description": "Brief explanation (1 sentence) of WHY you chose this action"
+                }
+            },
             "required": []
         }
     
@@ -39,6 +44,8 @@ def execute(
         arguments: Dict[str, Any], 
         context: Dict[str, Any]
     ) -> None:
+        reasoning = arguments.get("reasoning", "No reasoning provided")
+        logger.info(f"🧠 AI reasoning: {reasoning}")
         # Swipe from top (20%) to bottom (80%) vertically - scrolls content UP
         executor.run_keyword("Swipe By Percent", 50, 20, 50, 80, "1s")
 

From 825737c9a0b4a91fa00256a4c7a7f82979d6517d Mon Sep 17 00:00:00 2001
From: hassineabd <contact.abdelkaderhassine@gmail.com>
Date: Tue, 30 Dec 2025 13:19:13 +0100
Subject: [PATCH 2/5] ask moved as a screen tools

---
 Agent/tools/screen/__init__.py      |   8 ++
 Agent/tools/screen/answer_json.py   |  52 +++++++++++
 Agent/tools/screen/answer_text.py   |  50 ++++++++++
 Agent/tools/screen/assert_screen.py | 139 ++++++++++++++++++++++++++++
 4 files changed, 249 insertions(+)
 create mode 100644 Agent/tools/screen/__init__.py
 create mode 100644 Agent/tools/screen/answer_json.py
 create mode 100644 Agent/tools/screen/answer_text.py
 create mode 100644 Agent/tools/screen/assert_screen.py

diff --git a/Agent/tools/screen/__init__.py b/Agent/tools/screen/__init__.py
new file mode 100644
index 0000000..0ac52af
--- /dev/null
+++ b/Agent/tools/screen/__init__.py
@@ -0,0 +1,8 @@
+from Agent.tools.screen.answer_text import AnswerTextTool
+from Agent.tools.screen.answer_json import AnswerJsonTool
+from Agent.tools.screen.assert_screen import AssertScreenTool
+
+SCREEN_TOOLS = [AnswerTextTool, AnswerJsonTool, AssertScreenTool]
+
+__all__ = ["SCREEN_TOOLS", "AnswerTextTool", "AnswerJsonTool", "AssertScreenTool"]
+
diff --git a/Agent/tools/screen/answer_json.py b/Agent/tools/screen/answer_json.py
new file mode 100644
index 0000000..f4e7361
--- /dev/null
+++ b/Agent/tools/screen/answer_json.py
@@ -0,0 +1,52 @@
+from typing import Any, Dict
+import json
+from Agent.tools.base import BaseTool, ExecutorProtocol, ToolCategory
+from robot.api import logger
+
+
+class AnswerJsonTool(BaseTool):
+    """Answer question about the screen with JSON response."""
+    
+    @property
+    def name(self) -> str:
+        return "answer_question_json"
+    
+    @property
+    def description(self) -> str:
+        return "Provide a JSON object answer to the question about the screen content"
+    
+    @property
+    def category(self) -> ToolCategory:
+        return ToolCategory.SCREEN
+    
+    @property
+    def works_on_locator(self) -> bool:
+        return False
+    
+    @property
+    def works_on_coordinates(self) -> bool:
+        return False
+    
+    def get_parameters_schema(self) -> Dict[str, Any]:
+        return {
+            "type": "object",
+            "properties": {
+                "answer": {
+                    "type": "object",
+                    "description": "The JSON object answer to the question based on what you see in the screenshot"
+                }
+            },
+            "required": ["answer"]
+        }
+    
+    def execute(
+        self, 
+        executor: ExecutorProtocol, 
+        arguments: Dict[str, Any], 
+        context: Dict[str, Any]
+    ) -> str:
+        answer = arguments.get("answer", {})
+        answer_str = json.dumps(answer, ensure_ascii=False)
+        logger.info(f"💬 AI Answer (JSON): {answer_str[:100]}..." if len(answer_str) > 100 else f"💬 AI Answer (JSON): {answer_str}")
+        return answer_str
+
diff --git a/Agent/tools/screen/answer_text.py b/Agent/tools/screen/answer_text.py
new file mode 100644
index 0000000..21dd151
--- /dev/null
+++ b/Agent/tools/screen/answer_text.py
@@ -0,0 +1,50 @@
+from typing import Any, Dict
+from Agent.tools.base import BaseTool, ExecutorProtocol, ToolCategory
+from robot.api import logger
+
+
+class AnswerTextTool(BaseTool):
+    """Answer question about the screen with text response."""
+    
+    @property
+    def name(self) -> str:
+        return "answer_question"
+    
+    @property
+    def description(self) -> str:
+        return "Provide a text answer to the question about the screen content"
+    
+    @property
+    def category(self) -> ToolCategory:
+        return ToolCategory.SCREEN
+    
+    @property
+    def works_on_locator(self) -> bool:
+        return False
+    
+    @property
+    def works_on_coordinates(self) -> bool:
+        return False
+    
+    def get_parameters_schema(self) -> Dict[str, Any]:
+        return {
+            "type": "object",
+            "properties": {
+                "answer": {
+                    "type": "string",
+                    "description": "The text answer to the question based on what you see in the screenshot"
+                }
+            },
+            "required": ["answer"]
+        }
+    
+    def execute(
+        self, 
+        executor: ExecutorProtocol, 
+        arguments: Dict[str, Any], 
+        context: Dict[str, Any]
+    ) -> str:
+        answer = arguments.get("answer", "")
+        logger.info(f"💬 AI Answer: {answer[:100]}..." if len(answer) > 100 else f"💬 AI Answer: {answer}")
+        return answer
+
diff --git a/Agent/tools/screen/assert_screen.py b/Agent/tools/screen/assert_screen.py
new file mode 100644
index 0000000..5a8f2fb
--- /dev/null
+++ b/Agent/tools/screen/assert_screen.py
@@ -0,0 +1,139 @@
+from typing import Any, Dict
+from Agent.tools.base import BaseTool, ExecutorProtocol, ToolCategory
+from robot.api import logger
+
+
+class AssertScreenTool(BaseTool):
+    """Screen assertion tool - analyzes screenshots to verify conditions.
+    
+    This tool is used by Agent.VisualCheck to verify UI states, presence of elements,
+    visual appearance, etc. by analyzing screenshots with AI vision models.
+    """
+    
+    @property
+    def name(self) -> str:
+        return "assert_screen"
+    
+    @property
+    def description(self) -> str:
+        return "Report the results of visual verification against the given instruction"
+    
+    @property
+    def category(self) -> ToolCategory:
+        return ToolCategory.SCREEN
+    
+    def get_parameters_schema(self) -> Dict[str, Any]:
+        return {
+            "type": "object",
+            "properties": {
+                "verification_result": {
+                    "type": "boolean",
+                    "description": "Whether the screenshot matches the instruction (true) or not (false)"
+                },
+                "confidence_score": {
+                    "type": "number",
+                    "description": "Confidence level of the verification from 0.0 (no confidence) to 1.0 (completely confident)",
+                    "minimum": 0.0,
+                    "maximum": 1.0
+                },
+                "analysis": {
+                    "type": "string",
+                    "description": "Detailed analysis explaining why the verification passed or failed"
+                },
+                "found_elements": {
+                    "type": "array",
+                    "description": "Optional list of UI elements found in the screenshot",
+                    "items": {
+                        "type": "object",
+                        "properties": {
+                            "element_type": {"type": "string"},
+                            "description": {"type": "string"},
+                            "location": {"type": "string"},
+                            "confidence": {"type": "number"}
+                        }
+                    }
+                },
+                "issues": {
+                    "type": "array",
+                    "description": "Optional list of issues or problems found",
+                    "items": {"type": "string"}
+                }
+            },
+            "required": ["verification_result", "confidence_score", "analysis"]
+        }
+    
+    def execute(
+        self,
+        executor: ExecutorProtocol,
+        arguments: Dict[str, Any],
+        context: Dict[str, Any]
+    ) -> None:
+        """Execute visual verification - log results and assert if failed.
+        
+        Note: Screen tools don't use the executor for actions, they analyze results.
+        """
+        verification_result = arguments.get("verification_result")
+        confidence_score = arguments.get("confidence_score")
+        analysis = arguments.get("analysis")
+        found_elements = arguments.get("found_elements", [])
+        issues = arguments.get("issues", [])
+        
+        min_confidence = context.get("min_confidence", 0.7)
+
+        logger.info(f"👁️ Visual verification results: {arguments}")
+        
+        logger.debug("=" * 80)
+        logger.debug("AI VISUAL VERIFICATION RESPONSE")
+        logger.debug("=" * 80)
+        logger.debug(f"Verification Result: {'PASS' if verification_result else 'FAIL'}")
+        logger.debug(f"Confidence Score: {confidence_score:.2f}")
+        logger.debug(f"Analysis: {analysis}")
+        
+        if found_elements:
+            logger.debug(f"Found Elements ({len(found_elements)} total):")
+            for i, element in enumerate(found_elements[:10], 1):
+                element_type = element.get("element_type", "unknown")
+                description = element.get("description", "no description")
+                location = element.get("location", "unknown location")
+                confidence = element.get("confidence", 0.0)
+                logger.debug(f"  {i}. {element_type}: {description}")
+                logger.debug(f"     Location: {location}")
+                logger.debug(f"     Confidence: {confidence:.2f}")
+        
+        if issues:
+            logger.debug(f"Issues Found ({len(issues)} total):")
+            for i, issue in enumerate(issues, 1):
+                logger.debug(f"  {i}. {issue}")
+        
+        logger.debug("=" * 80)
+
+        logger.debug(f"🔍 Verification result: {verification_result}")
+        logger.debug(f"📊 Confidence score: {confidence_score}")
+        logger.debug(f"📝 Analysis: {analysis}")
+        
+        if found_elements:
+            logger.debug(f"🎯 Found elements: {len(found_elements)} elements detected")
+            for i, element in enumerate(found_elements[:5], 1):
+                element_type = element.get("element_type", "unknown")
+                description = element.get("description", "no description")
+                confidence = element.get("confidence", 0.0)
+                logger.debug(f"  {i}. {element_type}: {description} (confidence: {confidence:.2f})")
+        
+        if issues:
+            logger.debug(f"⚠️ Issues found: {len(issues)} issues detected")
+            for i, issue in enumerate(issues[:3], 1):
+                logger.debug(f"  {i}. {issue}")
+
+        if not verification_result:
+            error_msg = f"Visual verification failed. Analysis: {analysis}"
+            if issues:
+                error_msg += f" Issues: {', '.join(issues[:3])}"
+            raise AssertionError(error_msg)
+        elif confidence_score < min_confidence:
+            raise AssertionError(
+                f"Confidence score too low: {confidence_score:.2f} < {min_confidence} "
+                f"(threshold). Analysis: {analysis}"
+            )
+        else:
+            logger.info(f"✅ Visual verification passed (confidence: {confidence_score:.2f})")
+

From a903ef7534b0069a0b78e36551b5e381cb6db15c Mon Sep 17 00:00:00 2001
From: hassineabd <contact.abdelkaderhassine@gmail.com>
Date: Tue, 30 Dec 2025 13:19:52 +0100
Subject: [PATCH 3/5] adaption and selection one screen tool programatically to
 reduce hallucination

---
 Agent/agent_engine.py              | 102 +++++++++-------------
 Agent/ai/_promptcomposer.py        |  30 +++----
 Agent/tools/base.py                |   4 +-
 Agent/tools/registry.py            |  46 +++++++++-
 Agent/tools/visual/__init__.py     |  10 ---
 Agent/tools/visual/verify_match.py | 135 -----------------------------
 6 files changed, 99 insertions(+), 228 deletions(-)
 delete mode 100644 Agent/tools/visual/__init__.py
 delete mode 100644 Agent/tools/visual/verify_match.py

diff --git a/Agent/agent_engine.py b/Agent/agent_engine.py
index dbba091..aaaf58b 100644
--- a/Agent/agent_engine.py
+++ b/Agent/agent_engine.py
@@ -8,7 +8,9 @@
 from Agent.tools.base import ToolCategory
 from Agent.core.keyword_runner import KeywordRunner
 from Agent.tools.mobile import MOBILE_TOOLS
-from Agent.tools.visual import VISUAL_TOOLS
+from Agent.tools.screen.answer_text import AnswerTextTool
+from Agent.tools.screen.answer_json import AnswerJsonTool
+from Agent.tools.screen.assert_screen import AssertScreenTool
 from robot.api import logger
 
 
@@ -38,13 +40,16 @@ def __init__(
         self.executor = KeywordRunner(self.platform)
         
         self._register_mobile_tools()
-        self._register_visual_tools()
         
         self.prompt_composer = AgentPromptComposer(
             tool_registry=self.tool_registry,
             platform_connector=self.platform
         )
         
+        self.tool_registry.register(AnswerTextTool())
+        self.tool_registry.register(AnswerJsonTool())
+        self.tool_registry.register(AssertScreenTool())
+        
         self.element_source = element_source
         self.llm_input_format = llm_input_format
         logger.info(f"🎯 Element source: {element_source}, LLM input format: {llm_input_format}")
@@ -55,12 +60,6 @@ def _register_mobile_tools(self) -> None:
         mobile_tools_count = len(self.tool_registry.get_by_category(ToolCategory.MOBILE))
         logger.debug(f"📱 Registered {mobile_tools_count} mobile tools")
     
-    def _register_visual_tools(self) -> None:
-        for ToolClass in VISUAL_TOOLS:
-            self.tool_registry.register(ToolClass())
-        visual_tools_count = len(self.tool_registry.get_by_category(ToolCategory.VISUAL))
-        logger.debug(f"👁️ Registered {visual_tools_count} visual tools")
-    
     # ----------------------- Public API -----------------------
     
     def set_element_source(self, source: str) -> None:
@@ -203,44 +202,44 @@ def do(self, instruction: str) -> None:
         self._execute_do_from_tool_calls(result, context, instruction)
         logger.info("Agent.Do completed")
 
-    def visual_check(self, instruction: str) -> None:
+    def visual_check(self, instruction: str, min_confidence: float = 0.7) -> None:
         """Execute visual verification based on natural language instruction.
         
         Args:
             instruction: Natural language verification instruction 
                         (e.g., "verify the home screen is displayed")
+            min_confidence: Minimum confidence score required (0.0-1.0, default 0.7)
         """
-        logger.info(f"👁️ Starting Agent.VisualCheck: '{instruction}'")
+        logger.info(f"👁️ Starting Agent.VisualCheck: '{instruction}' (min_confidence={min_confidence})")
 
         if hasattr(self.platform, 'wait_for_page_stable'):
             self.platform.wait_for_page_stable()
 
         screenshot_base64 = self.platform.get_screenshot_base64()
-        
-        # Embed screenshot to Robot Framework log
         self.platform.embed_image_to_log(screenshot_base64)
-        logger.debug("Screenshot captured and sent to AI for analysis")
+        
         image_url = self.image_uploader.upload_from_base64(screenshot_base64)
-
-        # Prepare AI request
-        messages = self.prompt_composer.compose_visual_check_messages(instruction, image_url)
-        tools = self.prompt_composer.get_visual_check_tools()
-        logger.debug(f"Visual check tools: {len(tools)} tools")
         
-        if not tools:
-            raise RuntimeError("No visual tools registered. Check tool registration.")
+        tool = self.tool_registry.get_tool_for_query("visual_check")
+        if not tool:
+            raise AssertionError("visual_check tool not found")
+        
+        messages = self.prompt_composer.compose_visual_check_messages(instruction, image_url)
         
-        # Call AI
         result = self.llm.send_ai_request_with_tools(
             messages=messages,
-            tools=tools,
+            tools=[tool.to_tool_spec()],
             tool_choice="required",
             temperature=0
         )
 
-        logger.debug("Executing visual verification...")
-        self._execute_visual_check_from_tool_calls(result)
-        logger.debug("Agent.VisualCheck completed successfully")
+        tool_call = result.get("tool_calls", [{}])[0]
+        arguments = tool_call.get("function", {}).get("arguments", {})
+        
+        context = {"min_confidence": min_confidence}
+        tool.execute(self.executor, arguments, context)
+        
+        logger.info("Agent.VisualCheck completed")
 
     def ask(self, question: str, response_format: str = "text") -> str:
         """Ask AI a question about the current screen.
@@ -252,8 +251,7 @@ def ask(self, question: str, response_format: str = "text") -> str:
         Returns:
             AI response as string (or JSON string if format=json)
         """
-        import json
-        logger.info(f"❓ Agent.Ask: '{question}'")
+        logger.info(f"❓ Starting Agent.Ask: '{question}' (format: {response_format})")
         
         if hasattr(self.platform, 'wait_for_page_stable'):
             self.platform.wait_for_page_stable()
@@ -261,18 +259,25 @@ def ask(self, question: str, response_format: str = "text") -> str:
         screenshot_base64 = self.platform.get_screenshot_base64()
         self.platform.embed_image_to_log(screenshot_base64)
         
-        messages = self.prompt_composer.compose_ask_messages(
-            question, screenshot_base64, response_format
+        tool = self.tool_registry.get_tool_for_query("ask", response_format=response_format)
+        if not tool:
+            raise AssertionError(f"No tool found for response_format: {response_format}")
+        
+        messages = self.prompt_composer.compose_ask_messages(question, screenshot_base64, response_format)
+        
+        result = self.llm.send_ai_request_with_tools(
+            messages=messages,
+            tools=[tool.to_tool_spec()],
+            tool_choice="required",
+            temperature=0
         )
         
-        if response_format == "json":
-            response_dict = self.llm.send_ai_request_and_return_response(messages=messages, temperature=0)
-            response = json.dumps(response_dict, ensure_ascii=False)
-        else:
-            response = self.llm.send_ai_request(messages=messages, temperature=0)
+        tool_call = result.get("tool_calls", [{}])[0]
+        arguments = tool_call.get("function", {}).get("arguments", {})
         
-        logger.info(f"💬 Response: {response[:100]}..." if len(response) > 100 else f"💬 Response: {response}")
-        return response
+        answer = tool.execute(self.executor, arguments, {})
+        logger.info("Agent.Ask completed")
+        return answer
 
     def find_visual_element(self, description: str, format: str = "center") -> Dict[str, Any]:
         """Find element visually using OmniParser and return bbox.
@@ -358,29 +363,4 @@ def _execute_do_from_tool_calls(
         # Execute the tool
         tool.execute(self.executor, arguments, context)
 
-    def _execute_visual_check_from_tool_calls(self, result: Dict[str, Any]) -> None:
-        """Execute visual check from tool calls returned by the LLM using the tool registry."""
-        tool_calls = result.get("tool_calls", [])
-        
-        if not tool_calls:
-            logger.error("No tool calls in visual check response")
-            raise AssertionError("AI did not return any tool calls for visual verification")
-        
-        # Extract the first tool call (typically verify_visual_match)
-        tool_call = tool_calls[0]
-        function_name = tool_call["function"]["name"]
-        arguments = tool_call["function"]["arguments"]
-        
-        logger.debug(f"⚙️ Executing visual tool: {function_name}")
-        
-        # Get tool from registry
-        tool = self.tool_registry.get(function_name)
-        if not tool:
-            raise AssertionError(f"Unknown visual tool: {function_name}")
-        
-        # Prepare context for tool execution (visual tools don't need ui_candidates)
-        context = {}
-        
-        # Execute the visual tool (will handle logging and assertions)
-        tool.execute(self.executor, arguments, context)
 
diff --git a/Agent/ai/_promptcomposer.py b/Agent/ai/_promptcomposer.py
index df3324c..dd6d261 100644
--- a/Agent/ai/_promptcomposer.py
+++ b/Agent/ai/_promptcomposer.py
@@ -202,7 +202,7 @@ def compose_visual_check_messages(
         system_content = (
             "You are a mobile app visual verification engine. "
             "Analyze the screenshot and verify if it matches the instruction. "
-            "Use the verify_visual_match function to report your findings."
+            "Use the assert_screen function to report your findings."
         )
         user_content = [
             {"type": "text", "text": f"Verify: {instruction}"},
@@ -214,32 +214,24 @@ def compose_visual_check_messages(
             {"role": "user", "content": user_content}
         ]
 
-    def get_visual_check_tools(self) -> List[Dict[str, Any]]:
-        """Return tool definitions for visual check actions from the registry.
-        
-        Returns tool specs in standard format (works with OpenAI, Anthropic, Gemini, etc.)
-        """
-        return self.registry.get_tool_specs(category=ToolCategory.VISUAL)
 
     def compose_ask_messages(
         self,
         question: str,
         screenshot_base64: str,
-        response_format: str = "text",
+        response_format: str = "text"
     ) -> List[Dict[str, Any]]:
-        """Build messages for asking AI about current screen."""
+        """Build messages for asking AI about current screen using tool calling."""
         if response_format == "json":
-            system_content = (
-                "You are a screen analysis assistant. "
-                "Answer questions about what you see in the screenshot. "
-                "IMPORTANT: Always respond with valid JSON only, no markdown, no explanation outside JSON."
-            )
+            instruction = "Use the answer_question_json function to provide your answer as a JSON object."
         else:
-            system_content = (
-                "You are a screen analysis assistant. "
-                "Answer questions about what you see in the screenshot. "
-                "Be concise and direct."
-            )
+            instruction = "Use the answer_question function to provide your answer as text."
+        
+        system_content = (
+            "You are a screen analysis assistant. "
+            "Answer questions about what you see in the screenshot. "
+            f"{instruction}"
+        )
         
         user_content = [
             {"type": "text", "text": question},
diff --git a/Agent/tools/base.py b/Agent/tools/base.py
index ef6337f..77cd27c 100644
--- a/Agent/tools/base.py
+++ b/Agent/tools/base.py
@@ -7,7 +7,7 @@
 class ToolCategory(Enum):
     MOBILE = "mobile"
     WEB = "web"
-    VISUAL = "visual"
+    SCREEN = "screen"
 
 
 class BaseTool(ABC):
@@ -35,7 +35,7 @@ def description(self) -> str:
     @property
     @abstractmethod
     def category(self) -> ToolCategory:
-        """Tool category: ToolCategory.MOBILE, ToolCategory.WEB, or ToolCategory.VISUAL."""
+        """Tool category: ToolCategory.MOBILE, ToolCategory.WEB, or ToolCategory.SCREEN."""
         pass
     
     @property
diff --git a/Agent/tools/registry.py b/Agent/tools/registry.py
index b6230e8..ba7284e 100644
--- a/Agent/tools/registry.py
+++ b/Agent/tools/registry.py
@@ -8,10 +8,21 @@ class ToolRegistry:
     
     Tools can be registered dynamically and retrieved by:
     - name
-    - category (mobile, web, visual)
+    - category (mobile, web, screen)
     - all tools
+    - action-based selection (filters tools by parameters)
     """
     
+    SCREEN_TOOL_SELECTION_RULES = {
+        "ask": {
+            "text": "answer_question",
+            "json": "answer_question_json"
+        },
+        "visual_check": {
+            "default": "assert_screen"
+        }
+    }
+    
     _instance: Optional['ToolRegistry'] = None
     _tools: Dict[str, BaseTool] = {}
     
@@ -86,6 +97,39 @@ def get_tools_for_source(self, category: Union[ToolCategory, str], element_sourc
                 if not (tool.works_on_coordinates and not tool.works_on_locator)
             ]
     
+    def get_tool_for_query(self, query_type: str, **kwargs) -> Optional[BaseTool]:
+        """Get single tool based on query type and parameters.
+        
+        Args:
+            query_type: Query type (e.g., "ask", "visual_check")
+            **kwargs: Parameters to filter tool (e.g., response_format="json")
+        
+        Returns:
+            Single tool matching criteria or None if not found
+        
+        Example:
+            tool = registry.get_tool_for_query("ask", response_format="json")
+            tool = registry.get_tool_for_query("visual_check")
+        """
+        rules = self.SCREEN_TOOL_SELECTION_RULES.get(query_type)
+        if not rules:
+            logger.warn(f"No selection rules found for query: {query_type}")
+            return None
+        
+        if query_type == "ask":
+            format_key = kwargs.get("response_format", "text")
+            tool_name = rules.get(format_key)
+        elif query_type == "visual_check":
+            tool_name = rules.get("default")
+        else:
+            return None
+        
+        if not tool_name:
+            logger.warn(f"No tool found for query: {query_type} with kwargs: {kwargs}")
+            return None
+        
+        return self.get(tool_name)
+    
     def clear(self) -> None:
         self._tools.clear()
     
diff --git a/Agent/tools/visual/__init__.py b/Agent/tools/visual/__init__.py
deleted file mode 100644
index 00bca38..0000000
--- a/Agent/tools/visual/__init__.py
+++ /dev/null
@@ -1,10 +0,0 @@
-from Agent.tools.visual.verify_match import VerifyVisualMatchTool
-
-
-# Declarative list of visual tools for registration
-VISUAL_TOOLS = [
-    VerifyVisualMatchTool,
-]
-
-__all__ = ["VISUAL_TOOLS", "VerifyVisualMatchTool"]
-
diff --git a/Agent/tools/visual/verify_match.py b/Agent/tools/visual/verify_match.py
deleted file mode 100644
index abc41c6..0000000
--- a/Agent/tools/visual/verify_match.py
+++ /dev/null
@@ -1,135 +0,0 @@
-from typing import Any, Dict
-from Agent.tools.base import BaseTool, ExecutorProtocol, ToolCategory
-from robot.api import logger
-
-
-class VerifyVisualMatchTool(BaseTool):
-    """Visual verification tool - analyzes screenshots to verify conditions.
-    
-    This tool is used by Agent.VisualCheck to verify UI states, presence of elements,
-    visual appearance, etc. by analyzing screenshots with AI vision models.
-    """
-    
-    @property
-    def name(self) -> str:
-        return "verify_visual_match"
-    
-    @property
-    def description(self) -> str:
-        return "Report the results of visual verification against the given instruction"
-    
-    @property
-    def category(self) -> ToolCategory:
-        return ToolCategory.VISUAL
-    
-    def get_parameters_schema(self) -> Dict[str, Any]:
-        return {
-            "type": "object",
-            "properties": {
-                "verification_result": {
-                    "type": "boolean",
-                    "description": "Whether the screenshot matches the instruction (true) or not (false)"
-                },
-                "confidence_score": {
-                    "type": "number",
-                    "description": "Confidence level of the verification from 0.0 (no confidence) to 1.0 (completely confident)",
-                    "minimum": 0.0,
-                    "maximum": 1.0
-                },
-                "analysis": {
-                    "type": "string",
-                    "description": "Detailed analysis explaining why the verification passed or failed"
-                },
-                "found_elements": {
-                    "type": "array",
-                    "description": "Optional list of UI elements found in the screenshot",
-                    "items": {
-                        "type": "object",
-                        "properties": {
-                            "element_type": {"type": "string"},
-                            "description": {"type": "string"},
-                            "location": {"type": "string"},
-                            "confidence": {"type": "number"}
-                        }
-                    }
-                },
-                "issues": {
-                    "type": "array",
-                    "description": "Optional list of issues or problems found",
-                    "items": {"type": "string"}
-                }
-            },
-            "required": ["verification_result", "confidence_score", "analysis"]
-        }
-    
-    def execute(
-        self,
-        executor: ExecutorProtocol,
-        arguments: Dict[str, Any],
-        context: Dict[str, Any]
-    ) -> None:
-        """Execute visual verification - log results and assert if failed.
-        
-        Note: Visual tools don't use the executor for actions, they analyze results.
-        """
-        verification_result = arguments.get("verification_result")
-        confidence_score = arguments.get("confidence_score")
-        analysis = arguments.get("analysis")
-        found_elements = arguments.get("found_elements", [])
-        issues = arguments.get("issues", [])
-
-        logger.info(f"👁️ Visual verification results: {arguments}")
-        
-        # Log detailed AI response
-        logger.debug("=" * 80)
-        logger.debug("AI VISUAL VERIFICATION RESPONSE")
-        logger.debug("=" * 80)
-        logger.debug(f"Verification Result: {'PASS' if verification_result else 'FAIL'}")
-        logger.debug(f"Confidence Score: {confidence_score:.2f}")
-        logger.debug(f"Analysis: {analysis}")
-        
-        if found_elements:
-            logger.debug(f"Found Elements ({len(found_elements)} total):")
-            for i, element in enumerate(found_elements[:10], 1):
-                element_type = element.get("element_type", "unknown")
-                description = element.get("description", "no description")
-                location = element.get("location", "unknown location")
-                confidence = element.get("confidence", 0.0)
-                logger.debug(f"  {i}. {element_type}: {description}")
-                logger.debug(f"     Location: {location}")
-                logger.debug(f"     Confidence: {confidence:.2f}")
-        
-        if issues:
-            logger.debug(f"Issues Found ({len(issues)} total):")
-            for i, issue in enumerate(issues, 1):
-                logger.debug(f"  {i}. {issue}")
-        
-        logger.debug("=" * 80)
-
-        # Compact log for custom logger
-        logger.debug(f"🔍 Verification result: {verification_result}")
-        logger.debug(f"📊 Confidence score: {confidence_score}")
-        logger.debug(f"📝 Analysis: {analysis}")
-        
-        if found_elements:
-            logger.debug(f"🎯 Found elements: {len(found_elements)} elements detected")
-            for i, element in enumerate(found_elements[:5], 1):
-                element_type = element.get("element_type", "unknown")
-                description = element.get("description", "no description")
-                confidence = element.get("confidence", 0.0)
-                logger.debug(f"  {i}. {element_type}: {description} (confidence: {confidence:.2f})")
-        
-        if issues:
-            logger.debug(f"⚠️ Issues found: {len(issues)} issues detected")
-            for i, issue in enumerate(issues[:3], 1):
-                logger.debug(f"  {i}. {issue}")
-
-        # Assert based on verification result
-        if verification_result:
-            logger.info("✅ Visual verification passed")
-        else:
-            error_msg = f"Visual verification failed. Analysis: {analysis}"
-            if issues:
-                error_msg += f" Issues: {', '.join(issues[:3])}"
-            raise AssertionError(error_msg)
-

From 923f6e0db4ca6135538f76575f7587252e4a3a78 Mon Sep 17 00:00:00 2001
From: hassineabd <contact.abdelkaderhassine@gmail.com>
Date: Tue, 30 Dec 2025 13:45:56 +0100
Subject: [PATCH 4/5] improve logging on assert screen tool

---
 Agent/tools/screen/assert_screen.py | 51 ++++-------------------------
 1 file changed, 7 insertions(+), 44 deletions(-)

diff --git a/Agent/tools/screen/assert_screen.py b/Agent/tools/screen/assert_screen.py
index 5a8f2fb..715e88e 100644
--- a/Agent/tools/screen/assert_screen.py
+++ b/Agent/tools/screen/assert_screen.py
@@ -80,60 +80,23 @@ def execute(
         
         min_confidence = context.get("min_confidence", 0.7)
 
-        logger.info(f"👁️ Visual verification results: {arguments}")
-        
-        logger.debug("=" * 80)
-        logger.debug("AI VISUAL VERIFICATION RESPONSE")
-        logger.debug("=" * 80)
-        logger.debug(f"Verification Result: {'PASS' if verification_result else 'FAIL'}")
-        logger.debug(f"Confidence Score: {confidence_score:.2f}")
+        result_status = "PASS" if verification_result else "FAIL"
+        logger.info(f"Visual check: {result_status} (confidence: {confidence_score:.2f})")
         logger.debug(f"Analysis: {analysis}")
         
         if found_elements:
-            logger.debug(f"Found Elements ({len(found_elements)} total):")
-            for i, element in enumerate(found_elements[:10], 1):
-                element_type = element.get("element_type", "unknown")
-                description = element.get("description", "no description")
-                location = element.get("location", "unknown location")
-                confidence = element.get("confidence", 0.0)
-                logger.debug(f"  {i}. {element_type}: {description}")
-                logger.debug(f"     Location: {location}")
-                logger.debug(f"     Confidence: {confidence:.2f}")
-        
-        if issues:
-            logger.debug(f"Issues Found ({len(issues)} total):")
-            for i, issue in enumerate(issues, 1):
-                logger.debug(f"  {i}. {issue}")
-        
-        logger.debug("=" * 80)
-
-        logger.debug(f"🔍 Verification result: {verification_result}")
-        logger.debug(f"📊 Confidence score: {confidence_score}")
-        logger.debug(f"📝 Analysis: {analysis}")
-        
-        if found_elements:
-            logger.debug(f"🎯 Found elements: {len(found_elements)} elements detected")
-            for i, element in enumerate(found_elements[:5], 1):
-                element_type = element.get("element_type", "unknown")
-                description = element.get("description", "no description")
-                confidence = element.get("confidence", 0.0)
-                logger.debug(f"  {i}. {element_type}: {description} (confidence: {confidence:.2f})")
+            logger.debug(f"Found elements: {found_elements}")
         
         if issues:
-            logger.debug(f"⚠️ Issues found: {len(issues)} issues detected")
-            for i, issue in enumerate(issues[:3], 1):
-                logger.debug(f"  {i}. {issue}")
+            logger.debug(f"Issues: {', '.join(issues[:3])}")
 
         if not verification_result:
-            error_msg = f"Visual verification failed. Analysis: {analysis}"
+            error_msg = f"Visual verification failed: {analysis}"
             if issues:
-                error_msg += f" Issues: {', '.join(issues[:3])}"
+                error_msg += f" | Issues: {', '.join(issues[:3])}"
             raise AssertionError(error_msg)
         elif confidence_score < min_confidence:
             raise AssertionError(
-                f"Confidence score too low: {confidence_score:.2f} < {min_confidence} "
-                f"(threshold). Analysis: {analysis}"
+                f"Confidence too low: {confidence_score:.2f} < {min_confidence}. {analysis}"
             )
-        else:
-            logger.info(f"✅ Visual verification passed (confidence: {confidence_score:.2f})")
 

From ef23457d1d1ceb5698c000d6cd0f57abfb0db7dc Mon Sep 17 00:00:00 2001
From: hassineabd <contact.abdelkaderhassine@gmail.com>
Date: Tue, 30 Dec 2025 13:46:18 +0100
Subject: [PATCH 5/5] improve click element tool description

---
 Agent/tools/mobile/click_element.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Agent/tools/mobile/click_element.py b/Agent/tools/mobile/click_element.py
index 6cf4feb..348f05b 100644
--- a/Agent/tools/mobile/click_element.py
+++ b/Agent/tools/mobile/click_element.py
@@ -19,7 +19,7 @@ def name(self) -> str:
     
     @property
     def description(self) -> str:
-        return "CLICK/TAP on ANY visible element (buttons, links, suggestions, icons, list items). USE THIS for all clicking actions."
+        return "CLICK/TAP on visible elements. PREFER elements that contains/englobes CLEAR TEXT/LABELS over icons when possible. Choose the most explicit element (e.g., text suggestions, labeled buttons) rather than ambiguous icons."
     
     @property
     def category(self) -> ToolCategory: