diff --git a/Agent/agent_engine.py b/Agent/agent_engine.py
index dbba091..0983dcf 100644
--- a/Agent/agent_engine.py
+++ b/Agent/agent_engine.py
@@ -15,6 +15,12 @@
 class AgentEngine:
     """Core engine for AI-driven Android test automation."""
 
+    SOM_CONFIG = {
+        'visual_annotation': True,
+        'text_format': 'compact',
+        'output_type': 'text'
+    }
+
     def __init__(
         self, 
         llm_client: str = "openai", 
@@ -176,6 +182,7 @@ def do(self, instruction: str) -> None:
             llm_input_format=self.llm_input_format,
             screenshot_base64=screenshot_base64,
             annotated_image_path=annotated_image_path,
+            som_config=self.SOM_CONFIG if self.llm_input_format == "som" else None,
         )
         if annotated_image_path:
             logger.info(f"Annotated image: {annotated_image_path}")
diff --git a/Agent/ai/_promptcomposer.py b/Agent/ai/_promptcomposer.py
index df3324c..2cb6132 100644
--- a/Agent/ai/_promptcomposer.py
+++ b/Agent/ai/_promptcomposer.py
@@ -1,6 +1,7 @@
 from typing import List, Dict, Optional, Any
 from Agent.tools.registry import ToolRegistry
 from Agent.tools.base import ToolCategory
+from Agent.platforms.grounding import SomComposer
 from robot.api import logger
 import base64
 import os
@@ -46,6 +47,7 @@ def compose_do_messages(
         llm_input_format: str = "text",
         screenshot_base64: Optional[str] = None,
         annotated_image_path: Optional[str] = None,
+        som_config: Optional[Dict[str, Any]] = None,
     ) -> List[Dict[str, Any]]:
         """Build DO action messages using tool calling approach.
         
@@ -57,6 +59,12 @@ def compose_do_messages(
             llm_input_format: 'text' or 'som'
             screenshot_base64: Screenshot (required for SoM mode)
             annotated_image_path: Pre-annotated image from OmniParser
+            som_config: SoM configuration dict {
+                'visual_annotation': True/False,
+                'text_format': 'compact'/'detailed'/'minimal',
+                'output_type': 'text'/'json',
+                'include_screenshot': True/False
+            }
         """
         # Base system prompt
         is_mobile = platform in ("android", "ios")
@@ -64,6 +72,14 @@ def compose_do_messages(
             system_content = (
                 "You are a MOBILE app test automation engine (Appium).\n"
                 "Your job: analyze the instruction and call the appropriate function to interact with the mobile UI.\n"
+                "\n⚠️ CRITICAL TOOL SELECTION:\n"
+                "- IF instruction says 'click', 'tap', 'select', 'choose' → ALWAYS use tap_element(index)\n"
+                "- scroll/swipe tools are ONLY for navigation - NEVER use them to click/tap\n"
+                "\n⚠️ IMPORTANT:\n"
+                "ALL tools have a 'reasoning' parameter. You MUST provide a brief explanation (1 sentence) of:\n"
+                "- Which element you chose and why (for element-based actions)\n"
+                "- Why this action matches the instruction (for all actions)\n"
+                "Example: {\"element_index\": 5, \"reasoning\": \"Clicking the search icon at the top right to open search\"}\n"
             )
             
             if element_source == "vision":
@@ -75,94 +91,138 @@ def compose_do_messages(
                 )
             else:
                 system_content += (
-                    "\nUSE LOCATOR TOOLS:\n"
-                    "1. FOR TEXT INPUT: input_text(element_index, text) - select from numbered list\n"
-                    "2. FOR CLICKING: tap_element(index) - select from numbered list\n"
-                    "3. OTHER: scroll_down(), swipe_left/right/up(), long_press(index), hide_keyboard(), go_back()\n"
+                    "\n🎯 TOOL SELECTION RULES:\n"
+                    "1. IF element is VISIBLE in the UI list → USE tap_element(index) to click it\n"
+                    "2. IF you need to type text → USE input_text(index, text)\n"
+                    "3. IF target element is NOT in the list → USE scroll_down/swipe_up to reveal it\n"
+                    "4. NEVER use scroll/swipe when the target element is already visible!\n"
+                    "5. scroll_down, swipe_up, swipe_left, swipe_right are ONLY for navigation - NOT for clicking!\n"
+                    "6. To click ANY element from the list, ALWAYS use tap_element(index)\n"
+                    "\nCRITICAL NOTES:\n"
+                    "- The screenshot shows NUMBERED bounding boxes. Use what you SEE in the image!\n"
+                    "- tap_element() clicks by COORDINATES - you CAN tap ANY visible element, even if not marked as clickable\n"
+                    "- If you see the target element on screen, CLICK IT directly with tap_element()\n"
+                    "- Search suggestions, list items, buttons = ALL require tap_element()\n"
                 )
             
             system_content += (
                 "\nIMPORTANT: You are working with MOBILE apps (Android/iOS), NOT web browsers."
             )
-        else:
-            system_content = (
-                "You are a WEB test automation engine.\n"
-                "Your job: analyze the instruction and call the appropriate function to interact with the web page.\n"
-            )
-            
-            if element_source == "vision":
-                system_content += (
-                    "\nUSE VISUAL TOOLS:\n"
-                    "- click_visual_element(description): Click by visual description\n"
-                    "- input_text_visual(description, text): Input text by visual description\n"
-                    "- hover_visual(description): Hover by visual description\n"
-                    "- double_click_visual(description): Double click by visual description\n"
-                    "- Elements were detected using computer vision (OmniParser)\n"
-                )
-            else:
-                system_content += (
-                    "\nUSE LOCATOR TOOLS:\n"
-                    "1. FOR TEXT INPUT: input_text(index, text) for <input> or <textarea> elements\n"
-                    "2. FOR CLICKING: click_element(index) for <button> or <a> elements\n"
-                    "3. FOR DROPDOWN: select_option(index, value) for <select> elements\n"
-                    "4. OTHER: scroll_down(), scroll_up(), press_key(), go_back(), hover(), double_click()\n"
-                )
-
-            system_content += (
-                "\nCRITICAL: Pay attention to element tags when using standard tools:\n"
-                "- <input> or <textarea> = text input fields (use input_text tool)\n"
-                "- <button> or <a> = clickable elements (use click_element tool)\n"
-                "- <select> = dropdown (use select_option tool)\n"
-            )
+        # else:
+        #     system_content = (
+        #         "You are a WEB test automation engine.\n"
+        #         "Your job: analyze the instruction and call the appropriate function to interact with the web page.\n"
+        #     )
+        #     
+        #     if element_source == "vision":
+        #         system_content += (
+        #             "\nUSE VISUAL TOOLS:\n"
+        #             "- click_visual_element(description): Click by visual description\n"
+        #             "- input_text_visual(description, text): Input text by visual description\n"
+        #             "- hover_visual(description): Hover by visual description\n"
+        #             "- double_click_visual(description): Double click by visual description\n"
+        #             "- Elements were detected using computer vision (OmniParser)\n"
+        #         )
+        #     else:
+        #         system_content += (
+        #             "\nUSE LOCATOR TOOLS:\n"
+        #             "1. FOR TEXT INPUT: input_text(index, text) for <input> or <textarea> elements\n"
+        #             "2. FOR CLICKING: click_element(index) for <button> or <a> elements\n"
+        #             "3. FOR DROPDOWN: select_option(index, value) for <select> elements\n"
+        #             "4. OTHER: scroll_down(), scroll_up(), press_key(), go_back(), hover(), double_click()\n"
+        #         )
+        # 
+        #     system_content += (
+        #         "\nCRITICAL: Pay attention to element tags when using standard tools:\n"
+        #         "- <input> or <textarea> = text input fields (use input_text tool)\n"
+        #         "- <button> or <a> = clickable elements (use click_element tool)\n"
+        #         "- <select> = dropdown (use select_option tool)\n"
+        #     )
         
         # Build user content based on llm_input_format
-        ui_label = "Mobile UI Elements" if is_mobile else "Web Elements"
+        # ui_label = "Mobile UI Elements" if is_mobile else "Web Elements"
+        ui_label = "Mobile UI Elements"
         
         if llm_input_format == "som" and ui_elements:
             source_info = "detected via computer vision" if element_source == "vision" else "from accessibility tree"
             
-            legend_lines = []
-            for idx, elem in enumerate(ui_elements, start=1):
-                text = elem.get("text", "").replace("\n", " ").strip()[:40]
-                tag = elem.get("class_name", "")
-                short_tag = tag.split('.')[-1] if '.' in tag else tag
-                desc = text if text else (elem.get("aria_label") or elem.get("content_desc") or elem.get("placeholder") or "")
-                bbox = elem.get("bbox", {})
-                pos_info = ""
-                if bbox:
-                    y = bbox.get("y", 0)
-                    x = bbox.get("x", 0)
-                    pos = "top" if y < 400 else "mid" if y < 1200 else "bot"
-                    side = "L" if x < 300 else "C" if x < 700 else "R"
-                    pos_info = f" @{pos}-{side}"
-                legend_lines.append(f"[{idx}] {short_tag}: {desc}{pos_info}".strip())
-            legend_text = "\n".join(legend_lines)
+            # Get screen dimensions
+            screen_size = self.platform.get_screen_size()
+            screen_width = screen_size['width']
+            screen_height = screen_size['height']
             
-            text_content = (
-                f"Instruction: {instruction}\n\n"
-                f"ANNOTATED SCREENSHOT: Each UI element has a GREEN BOX with its ID NUMBER in a small rectangle at the top-left.\n"
-                f"ELEMENT LIST ({source_info}):\n{legend_text}\n\n"
-                f"IMPORTANT: Select the element by its ID NUMBER that best matches the instruction."
-            )
+            # Default SoM config
+            if som_config is None:
+                som_config = {
+                    'visual_annotation': True,
+                    'text_format': 'compact',
+                    'output_type': 'text'
+                }
+            
+            # Use SomComposer to generate SoM components
+            som_composer = SomComposer(platform, screen_width, screen_height)
             
             # Use pre-annotated image from OmniParser if available (Visual + SoM)
             if annotated_image_path:
                 with open(annotated_image_path, "rb") as img_file:
                     annotated_base64 = base64.b64encode(img_file.read()).decode("utf-8")
                 self._save_annotated_image(annotated_base64, source="omniparser")
+                
+                # Generate text legend using SomComposer
+                som_result = som_composer.compose(
+                    screenshot_base64=None,
+                    elements=ui_elements,
+                    config={**som_config, 'visual_annotation': False}
+                )
+                
+                if som_config.get('output_type') == 'json':
+                    legend_text = som_result.get('elements_json', '')
+                else:
+                    legend_text = som_result.get('text_legend', '')
+                
+                text_content = (
+                    f"Instruction: {instruction}\n\n"
+                    f"ANNOTATED SCREENSHOT: Each UI element has a GREEN BOX with its ID NUMBER in a small rectangle at the top-left.\n"
+                    f"ELEMENT LIST ({source_info}):\n{legend_text}\n\n"
+                    f"IMPORTANT: Select the element by its ID NUMBER that best matches the instruction."
+                )
+                
                 user_content = [
                     {"type": "text", "text": text_content},
                     {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{annotated_base64}"}}
                 ]
             # Otherwise render SoM for DOM elements (DOM + SoM)
             elif screenshot_base64:
-                from Agent.platforms.collectors.som_renderer import render_som
-                annotated_screenshot = render_som(screenshot_base64, ui_elements)
-                self._save_annotated_image(annotated_screenshot, source="dom")
-                user_content = [
-                    {"type": "text", "text": text_content},
-                    {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{annotated_screenshot}"}}
-                ]
+                som_result = som_composer.compose(
+                    screenshot_base64=screenshot_base64,
+                    elements=ui_elements,
+                    config=som_config
+                )
+                
+                annotated_screenshot = som_result.get('annotated_image_base64', '')
+                
+                if som_config.get('output_type') == 'json':
+                    legend_text = som_result.get('elements_json', '')
+                else:
+                    legend_text = som_result.get('text_legend', '')
+                
+                if annotated_screenshot:
+                    self._save_annotated_image(annotated_screenshot, source="dom")
+                
+                text_content = (
+                    f"Instruction: {instruction}\n\n"
+                    f"ANNOTATED SCREENSHOT: Each UI element has a GREEN BOX with its ID NUMBER in a small rectangle at the top-left.\n"
+                    f"ELEMENT LIST ({source_info}):\n{legend_text}\n\n"
+                    f"IMPORTANT: Select the element by its ID NUMBER that best matches the instruction."
+                )
+                
+                if annotated_screenshot:
+                    user_content = [
+                        {"type": "text", "text": text_content},
+                        {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{annotated_screenshot}"}}
+                    ]
+                else:
+                    user_content = text_content
             else:
                 user_content = f"Instruction: {instruction}\n\nError: SoM mode requires screenshot"
         else:
diff --git a/Agent/ai/prompts/__init__.py b/Agent/ai/prompts/__init__.py
index 162137d..960b2ff 100644
--- a/Agent/ai/prompts/__init__.py
+++ b/Agent/ai/prompts/__init__.py
@@ -1,4 +1 @@
-from Agent.ai.prompts.renderer import UIRenderer
-
-__all__ = ["UIRenderer"]
-
+# TODO: prompt templates
diff --git a/Agent/ai/prompts/renderer.py b/Agent/ai/prompts/renderer.py
deleted file mode 100644
index 5c8fc7c..0000000
--- a/Agent/ai/prompts/renderer.py
+++ /dev/null
@@ -1,123 +0,0 @@
-from typing import Any, Dict, List
-
-
-class UIRenderer:
-    """Renders UI elements as text for AI prompts."""
-    
-    def render(self, elements: List[Dict[str, Any]], platform: str = "web") -> str:
-        """
-        Render UI elements as numbered text list for AI prompt.
-        
-        Args:
-            elements: List of UI element dictionaries
-            platform: 'web', 'android', or 'ios'
-        Returns:
-            Formatted string with numbered elements
-        """
-        if not elements:
-            return "(no UI elements found)"
-        
-        is_mobile = platform in ("android", "ios")
-        max_items = 50 if is_mobile else 150
-        
-        rendered = []
-        for i, el in enumerate(elements[:max_items], 1):
-            if platform == "ios":
-                line = self._render_ios_element(i, el)
-            elif platform == "android":
-                line = self._render_android_element(i, el)
-            else:
-                line = self._render_web_element(i, el)
-            rendered.append(line)
-        
-        return "\n".join(rendered)
-    
-    def _render_web_element(self, index: int, el: Dict[str, Any]) -> str:
-        """Render a single web element."""
-        parts = []
-        
-        tag = el.get('class_name', '') or el.get('tag', 'unknown')
-        elem_type = el.get('type', '')
-        if elem_type and elem_type not in ['text', '']:
-            parts.append(f"<{tag} type='{elem_type}'>")
-        else:
-            parts.append(f"<{tag}>")
-        
-        aria_label = el.get("aria_label", '')
-        if aria_label:
-            parts.append(f"aria-label='{aria_label}'")
-        
-        placeholder = el.get("placeholder", '')
-        if placeholder:
-            parts.append(f"placeholder='{placeholder}'")
-        
-        if el.get("text"):
-            parts.append(f"text='{el['text']}'")
-        
-        if el.get("resource_id"):
-            parts.append(f"id='{el['resource_id']}'")
-        
-        if el.get("name"):
-            parts.append(f"name='{el['name']}'")
-        
-        return f"{index}. {' | '.join(parts)}"
-    
-    def _render_android_element(self, index: int, el: Dict[str, Any]) -> str:
-        """Render a single Android element."""
-        parts = []
-        
-        class_name = el.get('class_name', 'unknown')
-        short_class = class_name.split('.')[-1] if '.' in class_name else class_name
-        parts.append(f"[{short_class}]")
-        
-        if el.get("text"):
-            parts.append(f"text='{el['text']}'")
-        
-        if el.get("resource_id"):
-            parts.append(f"id='{el['resource_id']}'")
-        
-        content_desc = el.get("accessibility_label", '') or el.get("content_desc", '')
-        if content_desc:
-            parts.append(f"desc='{content_desc}'")
-        
-        bbox = el.get("bbox", {})
-        if bbox:
-            y = bbox.get("y", 0)
-            x = bbox.get("x", 0)
-            w = bbox.get("width", 0)
-            h = bbox.get("height", 0)
-            pos = "top" if y < 400 else "middle" if y < 1200 else "bottom"
-            side = "left" if x < 300 else "center" if x < 700 else "right"
-            parts.append(f"pos={pos}-{side} size={w}x{h}")
-        
-        return f"{index}. {' | '.join(parts)}"
-    
-    def _render_ios_element(self, index: int, el: Dict[str, Any]) -> str:
-        """Render a single iOS element."""
-        parts = []
-        
-        class_name = el.get('class_name', 'unknown')
-        short_class = class_name.replace('XCUIElementType', '') if 'XCUIElementType' in class_name else class_name
-        parts.append(f"[{short_class}]")
-        
-        if el.get("text"):
-            parts.append(f"text='{el['text']}'")
-        
-        if el.get("resource_id"):
-            parts.append(f"name='{el['resource_id']}'")
-        
-        label = el.get("accessibility_label", '') or el.get("label", '')
-        if label:
-            parts.append(f"label='{label}'")
-        
-        bbox = el.get("bbox", {})
-        if bbox:
-            y = bbox.get("y", 0)
-            x = bbox.get("x", 0)
-            w = bbox.get("width", 0)
-            h = bbox.get("height", 0)
-            pos = "top" if y < 400 else "middle" if y < 1200 else "bottom"
-            side = "left" if x < 300 else "center" if x < 700 else "right"
-            parts.append(f"pos={pos}-{side} size={w}x{h}")
-        
-        return f"{index}. {' | '.join(parts)}"
diff --git a/Agent/platforms/__init__.py b/Agent/platforms/__init__.py
index 25a24a8..264ae37 100644
--- a/Agent/platforms/__init__.py
+++ b/Agent/platforms/__init__.py
@@ -1,9 +1,8 @@
 from Agent.platforms._mobileconnector import DeviceConnector
 from Agent.platforms._platformfactory import create_platform
 from Agent.platforms.locators import MobileLocatorBuilder
-from Agent.platforms.collectors import XMLCollector
+from Agent.platforms.collectors import AndroidCollector, IOSCollector
 
-# Placeholder for future web support
 WebConnectorRF = None
 
 __all__ = [
@@ -11,5 +10,6 @@
     "WebConnectorRF",
     "create_platform",
     "MobileLocatorBuilder",
-    "XMLCollector",
+    "AndroidCollector",
+    "IOSCollector",
 ]
diff --git a/Agent/platforms/_mobileconnector.py b/Agent/platforms/_mobileconnector.py
index bc541a7..d794b49 100644
--- a/Agent/platforms/_mobileconnector.py
+++ b/Agent/platforms/_mobileconnector.py
@@ -1,9 +1,7 @@
 from typing import Any, Dict, List
 from robot.api import logger
 from robot.libraries.BuiltIn import BuiltIn
-from Agent.platforms.collectors.xml_collector import XMLCollector
-from Agent.platforms.locators.mobile import MobileLocatorBuilder
-from Agent.ai.prompts.renderer import UIRenderer
+# Lazy import in methods for collectors, renderer, and locator builder
 
 
 class DeviceConnector:
@@ -13,12 +11,13 @@ def __init__(self):
         self._appium_lib = None
         self._driver = None
         self._session_id = None
-        self._collector = XMLCollector()
-        self.locator_builder = MobileLocatorBuilder()
-        self._renderer = UIRenderer()
+        self._platform = None
+        self._collector = None  # Lazy init
+        self._filter_pipeline = None  # Lazy init
+        self._locator_builder = None  # Lazy init
+        self._renderer = None  # Lazy init
 
     def _get_driver(self) -> Any:
-        """Get Appium driver instance."""
         if self._appium_lib is None:
             self._appium_lib = BuiltIn().get_library_instance('AppiumLibrary')
         
@@ -54,31 +53,89 @@ def _get_driver(self) -> Any:
         return self._driver
 
     def get_platform(self) -> str:
-        """Detect platform from driver capabilities."""
-        caps = self._get_driver().capabilities
-        platform = caps.get('platformName', '').lower()
-        return 'ios' if 'ios' in platform else 'android'
+        if self._platform is None:
+            caps = self._get_driver().capabilities
+            platform = caps.get('platformName', '').lower()
+            self._platform = 'ios' if 'ios' in platform else 'android'
+        return self._platform
+
+    def get_screen_size(self) -> Dict[str, int]:
+        try:
+            size = self._get_driver().get_window_size()
+            return {'width': size.get('width', 0), 'height': size.get('height', 0)}
+            #TODO: see if this is really needed and if there is better fallback
+        except Exception:
+            logger.warn("⚠️ Could not get screen size, using fallback 1080x1920")
+            return {'width': 1080, 'height': 1920}
 
     def get_ui_xml(self) -> str:
         return self._get_driver().page_source
 
     def collect_ui_candidates(self, max_items: int = 50) -> List[Dict[str, Any]]:
-        """Collect interactive UI elements from current screen."""
         xml = self.get_ui_xml()
-        platform = self.get_platform()
-        self._collector.set_platform(platform)
-        return self._collector.parse_xml(xml, max_items=max_items)
+        collector = self._get_collector()
+        pipeline = self._get_filter_pipeline()
+        
+        elements = collector.parse_xml(xml)
+        filtered = pipeline.apply(elements)
+        
+        screen_size = self.get_screen_size()
+        self._add_normalized_bbox(filtered, screen_size)
+        
+        filtered.sort(
+            key=lambda e: (
+                bool(e.get('resource-id', '').strip()),
+                bool(e.get('content-desc', '').strip()),
+                bool(e.get('text', '').strip()),
+                e.get('clickable') == 'true',
+            ),
+            reverse=True
+        )
+        
+        return filtered[:max_items]
 
-    def build_locator_from_element(self, element: Dict[str, Any]) -> str:
-        """Build Appium locator from element attributes."""
-        platform = self.get_platform()
-        self.locator_builder.set_platform(platform)
-        return self.locator_builder.build(element)
+    def collect_all_elements(self) -> List[Dict[str, Any]]:
+        xml = self.get_ui_xml()
+        collector = self._get_collector()
+        elements = collector.parse_xml(xml)
+        
+        screen_size = self.get_screen_size()
+        self._add_normalized_bbox(elements, screen_size)
+        
+        return elements
+    
+    def _add_normalized_bbox(self, elements: List[Dict[str, Any]], screen_size: Dict[str, int]) -> None:
+        """Add bbox_normalized to each element."""
+        sw = screen_size.get('width', 0)
+        sh = screen_size.get('height', 0)
+        
+        if sw <= 0 or sh <= 0:
+            return
+        
+        for elem in elements:
+            bbox = elem.get('bbox', {})
+            if bbox:
+                elem['bbox_normalized'] = {
+                    'x': round(bbox.get('x', 0) / sw, 4),
+                    'y': round(bbox.get('y', 0) / sh, 4),
+                    'width': round(bbox.get('width', 0) / sw, 4),
+                    'height': round(bbox.get('height', 0) / sh, 4),
+                }
+
+    def build_locator_from_element(self, element: Dict[str, Any], strategy: str = 'auto') -> str:
+        """
+        Args:
+            element: Dict with raw XML attributes
+            strategy: 'auto' | 'id_only' | 'bounds' | 'xpath_attrs' | 'xpath_all'
+        Returns:
+            Appium locator string
+        Example: build_locator_from_element(elem, 'id_only') -> 'id=com.android:id/button'
+        """
+        return self._get_locator_builder().build(element, strategy=strategy)
 
     def render_ui_for_prompt(self, ui_elements: List[Dict[str, Any]]) -> str:
-        """Render UI elements as text for AI prompt."""
         platform = self.get_platform()
-        return self._renderer.render(ui_elements, platform=platform)
+        return self._get_renderer().serialize(ui_elements, platform=platform)
 
     def get_screenshot_base64(self) -> str:
         return self._get_driver().get_screenshot_as_base64()
@@ -90,3 +147,38 @@ def embed_image_to_log(self, base64_screenshot: str, width: int = 400) -> None:
     def wait_for_page_stable(self, delay: float = 1.0) -> None:
         import time
         time.sleep(delay)
+    
+    def _get_locator_builder(self):
+        if self._locator_builder is None:
+            from Agent.platforms.locators import MobileLocatorBuilder
+            platform = self.get_platform()
+            self._locator_builder = MobileLocatorBuilder(platform=platform)
+        return self._locator_builder
+    
+    def _get_collector(self):
+        if self._collector is None:
+            platform = self.get_platform()
+            if platform == 'ios':
+                from Agent.platforms.collectors import IOSCollector
+                self._collector = IOSCollector()
+            else:
+                from Agent.platforms.collectors import AndroidCollector
+                self._collector = AndroidCollector()
+        return self._collector
+    
+    def _get_filter_pipeline(self):
+        if self._filter_pipeline is None:
+            platform = self.get_platform()
+            if platform == 'ios':
+                from Agent.platforms.filters.pipeline import FilterPipeline
+                self._filter_pipeline = FilterPipeline()
+            else:
+                from Agent.platforms.filters.android import AndroidFilterPipeline
+                self._filter_pipeline = AndroidFilterPipeline()
+        return self._filter_pipeline
+    
+    def _get_renderer(self):
+        if self._renderer is None:
+            from Agent.platforms.grounding.text.serializer import TextSerializer
+            self._renderer = TextSerializer()
+        return self._renderer
diff --git a/Agent/platforms/collectors/__init__.py b/Agent/platforms/collectors/__init__.py
index 9d0a93e..ad510fd 100644
--- a/Agent/platforms/collectors/__init__.py
+++ b/Agent/platforms/collectors/__init__.py
@@ -1,19 +1,17 @@
 """
 UI Collectors for mobile automation.
 
-This module provides strategies for collecting UI elements:
-- XMLCollector: XML page source parsing (Android/iOS)
+- AndroidCollector: Android XML page source parsing
+- IOSCollector: iOS XML page source parsing (NotImplemented)
 """
 
-from Agent.platforms.collectors.base_collector import BaseUICollector
-from Agent.platforms.collectors.collector_factory import CollectorRegistry
-from Agent.platforms.collectors.xml_collector import XMLCollector
-from Agent.platforms.collectors.som_renderer import render_som, bbox_center
+from Agent.platforms.collectors.android_collector import AndroidCollector
+from Agent.platforms.collectors.ios_collector import IOSCollector
+from Agent.platforms.grounding.som.annotator import annotate_screenshot, bbox_center
 
 __all__ = [
-    'BaseUICollector',
-    'CollectorRegistry',
-    'XMLCollector',
-    'render_som',
+    'AndroidCollector',
+    'IOSCollector',
+    'annotate_screenshot',
     'bbox_center',
 ]
diff --git a/Agent/platforms/collectors/android_collector.py b/Agent/platforms/collectors/android_collector.py
new file mode 100644
index 0000000..6c8dc56
--- /dev/null
+++ b/Agent/platforms/collectors/android_collector.py
@@ -0,0 +1,57 @@
+from typing import Any, Dict, List
+import xml.etree.ElementTree as ET
+
+
+class AndroidCollector:
+    """Collects UI elements from Android XML page source."""
+    
+    def get_name(self) -> str:
+        return "android"
+    
+    def parse_xml(self, xml_source: str) -> List[Dict[str, Any]]:
+        """
+        Args:
+            xml_source: Appium page source XML
+        Returns:
+            List of element dicts with raw XML attributes + computed bbox
+        """
+        root = ET.fromstring(xml_source)
+        elements = []
+        
+        def walk(node: Any) -> None:
+            attrs = self._parse_node(node)
+            elements.append(attrs)
+            for child in node:
+                walk(child)
+        
+        walk(root)
+        return elements
+    
+    def _parse_node(self, node: Any) -> Dict[str, Any]:
+        raw_attrs = dict(node.attrib)
+        
+        bounds_str = raw_attrs.get('bounds', '')
+        bbox = self._parse_bounds(bounds_str)
+        
+        return {
+            **raw_attrs,
+            'bbox': bbox,
+        }
+    
+    def _parse_bounds(self, bounds_str: str) -> Dict[str, int]:
+        """
+        Args:
+            bounds_str: "[0,72][1080,200]"
+        Returns:
+            {'x': 0, 'y': 72, 'width': 1080, 'height': 128}
+        """
+        if not bounds_str:
+            return {}
+        try:
+            parts = bounds_str.replace('][', ',').strip('[]').split(',')
+            if len(parts) == 4:
+                x1, y1, x2, y2 = map(int, parts)
+                return {'x': x1, 'y': y1, 'width': x2 - x1, 'height': y2 - y1}
+        except (ValueError, AttributeError):
+            pass
+        return {}
diff --git a/Agent/platforms/collectors/base_collector.py b/Agent/platforms/collectors/base_collector.py
deleted file mode 100644
index c9603d6..0000000
--- a/Agent/platforms/collectors/base_collector.py
+++ /dev/null
@@ -1,57 +0,0 @@
-"""
-Base abstract class for UI element collectors.
-
-All collector strategies must inherit from BaseUICollector and implement
-the required methods.
-"""
-
-from abc import ABC, abstractmethod
-from typing import Any, Dict, List
-
-
-class BaseUICollector(ABC):
-    """
-    Abstract base class for UI element collection strategies.
-    
-    Each collector strategy must implement:
-    1. collect_elements() - to gather UI elements from the page
-    2. get_name() - to identify the strategy
-    """
-    
-    @abstractmethod
-    def collect_elements(self, max_items: int = 500) -> List[Dict[str, Any]]:
-        """
-        Collect interactive UI elements from the current page.
-        
-        Args:
-            max_items: Maximum number of elements to return
-            
-        Returns:
-            List of dictionaries with element attributes:
-            {
-                'text': str,          # Visible text
-                'resource_id': str,   # ID or test-id
-                'content_desc': str,  # aria-label or placeholder
-                'label': str,         # Associated label text
-                'class_name': str,    # Tag name (button, input, etc.)
-                'role': str,          # ARIA role
-                'name': str,          # name attribute
-                'type': str,          # input type
-                'href': str,          # href for links
-                'clickable': bool,    # Is element clickable
-                'enabled': bool,      # Is element enabled
-                'bbox': dict          # Bounding box {'x': int, 'y': int, 'width': int, 'height': int}
-            }
-        """
-        pass
-    
-    @abstractmethod
-    def get_name(self) -> str:
-        """
-        Return the name of this collector strategy.
-        
-        Returns:
-            String identifier (e.g., "js_query")
-        """
-        pass
-
diff --git a/Agent/platforms/collectors/collector_factory.py b/Agent/platforms/collectors/collector_factory.py
deleted file mode 100644
index 1259391..0000000
--- a/Agent/platforms/collectors/collector_factory.py
+++ /dev/null
@@ -1,48 +0,0 @@
-"""
-Factory and Registry for UI Collectors.
-"""
-
-from typing import Dict, List, Type
-from robot.api import logger
-from Agent.platforms.collectors.base_collector import BaseUICollector
-
-
-class CollectorRegistry:
-    """Registry for UI collector strategies."""
-    
-    _collectors: Dict[str, Type[BaseUICollector]] = {}
-    
-    @classmethod
-    def register(cls, name: str, collector_class: Type[BaseUICollector]) -> None:
-        if not issubclass(collector_class, BaseUICollector):
-            raise TypeError(f"{collector_class} must inherit from BaseUICollector")
-        cls._collectors[name] = collector_class
-        logger.debug(f"Registered UI collector: '{name}' -> {collector_class.__name__}")
-    
-    @classmethod
-    def create(cls, strategy: str) -> BaseUICollector:
-        if strategy not in cls._collectors:
-            available = cls.list_available()
-            raise ValueError(f"Unknown strategy: '{strategy}'. Available: {available}")
-        
-        collector_class = cls._collectors[strategy]
-        return collector_class()
-    
-    @classmethod
-    def list_available(cls) -> List[str]:
-        return list(cls._collectors.keys())
-    
-    @classmethod
-    def is_registered(cls, strategy: str) -> bool:
-        return strategy in cls._collectors
-
-
-def _register_builtin_collectors():
-    try:
-        from Agent.platforms.collectors.xml_collector import XMLCollector
-        CollectorRegistry.register("xml", XMLCollector)
-    except ImportError as e:
-        logger.warn(f"Could not register XMLCollector: {e}")
-
-
-_register_builtin_collectors()
diff --git a/Agent/platforms/collectors/ios_collector.py b/Agent/platforms/collectors/ios_collector.py
new file mode 100644
index 0000000..6dd7fdc
--- /dev/null
+++ b/Agent/platforms/collectors/ios_collector.py
@@ -0,0 +1,11 @@
+from typing import Any, Dict, List
+
+
+class IOSCollector:
+    """Collects UI elements from iOS XML page source."""
+    
+    def get_name(self) -> str:
+        return "ios"
+    
+    def parse_xml(self, xml_source: str) -> List[Dict[str, Any]]:
+        raise NotImplementedError("iOS collector not implemented yet")
diff --git a/Agent/platforms/collectors/xml_collector.py b/Agent/platforms/collectors/xml_collector.py
deleted file mode 100644
index d4e0ec0..0000000
--- a/Agent/platforms/collectors/xml_collector.py
+++ /dev/null
@@ -1,125 +0,0 @@
-
-
-from typing import Any, Dict, List
-import xml.etree.ElementTree as ET
-from robot.api import logger
-from Agent.platforms.collectors.base_collector import BaseUICollector
-
-
-class XMLCollector(BaseUICollector):
-    """
-    Collects UI elements by parsing Appium XML page source.
-    
-    Supports both Android and iOS XML formats.
-    """
-    
-    def __init__(self, platform: str = "android"):
-        self._platform = platform
-    
-    def get_name(self) -> str:
-        return "xml"
-    
-    def set_platform(self, platform: str) -> None:
-        self._platform = platform
-    
-    def collect_elements(self, max_items: int = 50) -> List[Dict[str, Any]]:
-        raise NotImplementedError("Use parse_xml() with XML source instead")
-    
-    def parse_xml(self, xml_source: str, max_items: int = 50) -> List[Dict[str, Any]]:
-        """
-        Parse XML page source to extract interactive elements.
-        
-        Args:
-            xml_source: Appium page source XML string
-            max_items: Maximum elements to return
-        Returns:
-            List of element dictionaries
-        """
-        root = ET.fromstring(xml_source)
-        candidates = []
-        
-        def walk(node: Any) -> None:
-            if self._platform == 'ios':
-                attrs = self._parse_ios_node(node)
-            else:
-                attrs = self._parse_android_node(node)
-            
-            if attrs['clickable'] and attrs['enabled']:
-                candidates.append(attrs)
-            
-            for child in node:
-                walk(child)
-        
-        walk(root)
-        
-        candidates.sort(
-            key=lambda x: (
-                bool(x.get('text')),
-                bool(x.get('accessibility_label')),
-                bool(x.get('resource_id'))
-            ),
-            reverse=True
-        )
-        
-        logger.debug(f"[{self.get_name()}] Platform: {self._platform}, Found {len(candidates)} interactive elements")
-        return candidates[:max_items]
-    
-    def _parse_android_node(self, node: Any) -> Dict[str, Any]:
-        """Parse Android XML node to element dict."""
-        bbox = self._parse_android_bounds(node.get('bounds', ''))
-        content_desc = node.get('content-desc', '')
-        return {
-            'text': node.get('text', ''),
-            'resource_id': node.get('resource-id', ''),
-            'class_name': node.get('class', ''),
-            'accessibility_label': content_desc,
-            'content_desc': content_desc,  # backward compat
-            'clickable': node.get('clickable', 'false') == 'true',
-            'enabled': node.get('enabled', 'false') == 'true',
-            'bbox': bbox,
-        }
-    
-    def _parse_ios_node(self, node: Any) -> Dict[str, Any]:
-        """Parse iOS XML node to element dict."""
-        try:
-            bbox = {
-                'x': int(node.get('x', 0)),
-                'y': int(node.get('y', 0)),
-                'width': int(node.get('width', 0)),
-                'height': int(node.get('height', 0)),
-            }
-        except (ValueError, TypeError):
-            bbox = {}
-        
-        if bbox.get('width', 0) <= 0:
-            bbox = {}
-        
-        label = node.get('label', '')
-        return {
-            'text': node.get('value', '') or label,
-            'resource_id': node.get('name', ''),
-            'class_name': node.get('type', ''),
-            'accessibility_label': label,
-            'label': label,  # iOS-specific
-            'clickable': node.get('enabled', 'false') == 'true',
-            'enabled': node.get('enabled', 'false') == 'true',
-            'bbox': bbox,
-        }
-    
-    def _parse_android_bounds(self, bounds_str: str) -> Dict[str, int]:
-        """
-        Parse Android bounds string to bbox dict.
-        
-        Format: "[0,72][1080,200]" -> {x, y, width, height}
-        """
-        if not bounds_str:
-            return {}
-        try:
-            parts = bounds_str.replace('][', ',').strip('[]').split(',')
-            if len(parts) == 4:
-                x1, y1, x2, y2 = map(int, parts)
-                return {'x': x1, 'y': y1, 'width': x2 - x1, 'height': y2 - y1}
-        except (ValueError, AttributeError):
-            pass
-        return {}
-
diff --git a/Agent/platforms/filters/__init__.py b/Agent/platforms/filters/__init__.py
new file mode 100644
index 0000000..b885f85
--- /dev/null
+++ b/Agent/platforms/filters/__init__.py
@@ -0,0 +1,7 @@
+from Agent.platforms.filters.pipeline import FilterPipeline
+from Agent.platforms.filters.android import AndroidFilterPipeline
+
+__all__ = [
+    'FilterPipeline',
+    'AndroidFilterPipeline',
+]
diff --git a/Agent/platforms/filters/android/__init__.py b/Agent/platforms/filters/android/__init__.py
new file mode 100644
index 0000000..0239554
--- /dev/null
+++ b/Agent/platforms/filters/android/__init__.py
@@ -0,0 +1,39 @@
+from typing import Any, Dict, List
+from Agent.platforms.filters.android.displayed import DisplayedFilter
+from Agent.platforms.filters.android.bounds import BoundsFilter
+from Agent.platforms.filters.android.interactive import InteractiveFilter
+from Agent.platforms.filters.android.smart_hierarchy import SmartHierarchyFilter
+from Agent.platforms.filters.android.container import ContainerFilter
+from Agent.platforms.filters.pipeline import FilterPipeline
+
+
+class AndroidFilterPipeline:
+    """Pre-configured filter pipeline for Android elements."""
+    
+    def __init__(self, screen_size: Dict[str, int] = None):
+        screen_size = screen_size or {}
+        self._pipeline = FilterPipeline([
+            DisplayedFilter(),
+            BoundsFilter(screen_size.get('width', 0), screen_size.get('height', 0)),
+            InteractiveFilter(),
+            SmartHierarchyFilter(
+                prefer_parent_when_clickable=True,
+                min_relevance_score=5,
+                overlap_threshold=0.9
+            ),
+            ContainerFilter(),
+        ])
+    
+    def apply(self, elements: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+        return self._pipeline.apply(elements)
+
+
+__all__ = [
+    'DisplayedFilter',
+    'BoundsFilter',
+    'InteractiveFilter',
+    'SmartHierarchyFilter',
+    'ContainerFilter',
+    'AndroidFilterPipeline',
+]
+
diff --git a/Agent/platforms/filters/android/bounds.py b/Agent/platforms/filters/android/bounds.py
new file mode 100644
index 0000000..c1cc6ee
--- /dev/null
+++ b/Agent/platforms/filters/android/bounds.py
@@ -0,0 +1,34 @@
+from typing import Any, Dict, List
+
+
+class BoundsFilter:
+    """Keep Android elements with valid bounds that intersect the screen."""
+    
+    def __init__(self, screen_width: int = 0, screen_height: int = 0):
+        self._screen_width = screen_width
+        self._screen_height = screen_height
+    
+    def apply(self, elements: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+        result = []
+        for e in elements:
+            bbox = e.get('bbox', {})
+            if not bbox:
+                continue
+            
+            x = bbox.get('x', 0)
+            y = bbox.get('y', 0)
+            w = bbox.get('width', 0)
+            h = bbox.get('height', 0)
+            
+            if w <= 0 or h <= 0:
+                continue
+            
+            if self._screen_width > 0 and self._screen_height > 0:
+                if x + w < 0 or y + h < 0:
+                    continue
+                if x > self._screen_width or y > self._screen_height:
+                    continue
+            
+            result.append(e)
+        return result
+
diff --git a/Agent/platforms/filters/android/container.py b/Agent/platforms/filters/android/container.py
new file mode 100644
index 0000000..c1c1dc6
--- /dev/null
+++ b/Agent/platforms/filters/android/container.py
@@ -0,0 +1,67 @@
+from typing import Any, Dict, List, Set
+
+
+class ContainerFilter:
+    """Remove containers that have interactive children in the list"""
+    
+    CONTAINER_CLASSES = {
+        'RecyclerView', 'ScrollView', 'HorizontalScrollView',
+        'LinearLayout', 'RelativeLayout', 'FrameLayout',
+        'ViewGroup', 'ViewPager', 'ConstraintLayout', 'CoordinatorLayout'
+    }
+    
+    def apply(self, elements: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+        """
+        Args:
+            elements: List of interactive elements
+        Returns:
+            List without containers that have children
+        """
+        result = []
+        
+        for elem in elements:
+            if not self._is_container(elem):
+                result.append(elem)
+                continue
+            
+            if not self._has_interactive_children(elem, elements):
+                result.append(elem)
+        
+        return result
+    
+    def _is_container(self, elem: Dict[str, Any]) -> bool:
+        """Check if element is a layout container"""
+        class_name = elem.get('class', '')
+        return any(c in class_name for c in self.CONTAINER_CLASSES)
+    
+    def _has_interactive_children(self, container, all_elements):
+        """Check if container has children in the element list"""
+        container_bbox = container.get('bbox', {})
+        if not container_bbox:
+            return False
+        
+        cx = container_bbox.get('x', 0)
+        cy = container_bbox.get('y', 0)
+        cw = container_bbox.get('width', 0)
+        ch = container_bbox.get('height', 0)
+        
+        for other in all_elements:
+            if other is container:
+                continue
+            
+            other_bbox = other.get('bbox', {})
+            if not other_bbox:
+                continue
+            
+            ox = other_bbox.get('x', 0)
+            oy = other_bbox.get('y', 0)
+            ow = other_bbox.get('width', 0)
+            oh = other_bbox.get('height', 0)
+            
+            # If other is contained in container
+            if (ox >= cx and oy >= cy and 
+                ox + ow <= cx + cw and oy + oh <= cy + ch):
+                return True
+        
+        return False
+
diff --git a/Agent/platforms/filters/android/displayed.py b/Agent/platforms/filters/android/displayed.py
new file mode 100644
index 0000000..5ab4c0f
--- /dev/null
+++ b/Agent/platforms/filters/android/displayed.py
@@ -0,0 +1,9 @@
+from typing import Any, Dict, List
+
+
+class DisplayedFilter:
+    """Keep only displayed Android elements."""
+    
+    def apply(self, elements: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+        return [e for e in elements if e.get('displayed') == 'true']
+
diff --git a/Agent/platforms/filters/android/interactive.py b/Agent/platforms/filters/android/interactive.py
new file mode 100644
index 0000000..c68b959
--- /dev/null
+++ b/Agent/platforms/filters/android/interactive.py
@@ -0,0 +1,40 @@
+from typing import Any, Dict, List, Set
+
+
+class InteractiveFilter:
+    """Keep Android elements that are likely interactive."""
+    
+    INTERACTIVE_CLASSES: Set[str] = {
+        'Button', 'ImageButton', 'EditText', 'TextView', 'CheckBox',
+        'RadioButton', 'Switch', 'ToggleButton', 'Spinner', 'SeekBar',
+        'ImageView', 'FloatingActionButton', 'Chip', 'Tab',
+    }
+    
+    def apply(self, elements: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+        return [e for e in elements if self._is_interactive(e)]
+    
+    def _is_interactive(self, e: Dict[str, Any]) -> bool:
+        if e.get('clickable') == 'true':
+            return True
+        if e.get('focusable') == 'true':
+            return True
+        if e.get('scrollable') == 'true':
+            return True
+        
+        text = e.get('text', '')
+        if text and str(text).strip():
+            return True
+        
+        content_desc = e.get('content-desc', '')
+        if content_desc and str(content_desc).strip():
+            return True
+        
+        resource_id = e.get('resource-id', '').strip()
+        class_name = e.get('class', '')
+        
+        if resource_id:
+            for interactive_class in self.INTERACTIVE_CLASSES:
+                if interactive_class in class_name:
+                    return True
+        
+        return False
diff --git a/Agent/platforms/filters/android/smart_hierarchy.py b/Agent/platforms/filters/android/smart_hierarchy.py
new file mode 100644
index 0000000..d370be6
--- /dev/null
+++ b/Agent/platforms/filters/android/smart_hierarchy.py
@@ -0,0 +1,228 @@
+from typing import Any, Dict, List, Set, Optional
+
+
+class SmartHierarchyFilter:
+    """
+    Args:
+        prefer_parent_when_clickable: Keep parent if clickable, ignore children
+        min_relevance_score: Min score to keep element
+        overlap_threshold: Min overlap ratio (0-1) to group elements
+    Example: SmartHierarchyFilter(prefer_parent_when_clickable=True, min_relevance_score=5)
+    """
+    
+    CONTAINER_CLASSES = {
+        'RecyclerView', 'ScrollView', 'HorizontalScrollView',
+        'LinearLayout', 'RelativeLayout', 'FrameLayout',
+        'ViewGroup', 'ViewPager', 'ConstraintLayout'
+    }
+    
+    def __init__(
+        self,
+        prefer_parent_when_clickable: bool = True,
+        min_relevance_score: int = 0,
+        overlap_threshold: float = 0.9
+    ):
+        self._prefer_parent_when_clickable = prefer_parent_when_clickable
+        self._min_relevance_score = min_relevance_score
+        self._overlap_threshold = overlap_threshold
+    
+    def apply(self, elements: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+        """
+        Args:
+            elements: List of elements
+        Returns:
+            List with one element per overlapping group
+        """
+        if not elements:
+            return []
+        
+        groups = self._build_overlap_groups(elements)
+        selected_indices = set()
+        
+        for group_indices in groups:
+            best_idx = self._select_best_from_group(elements, group_indices)
+            if best_idx is not None:
+                selected_indices.add(best_idx)
+        
+        return [elements[i] for i in sorted(selected_indices)]
+    
+    def _build_overlap_groups(self, elements: List[Dict[str, Any]]) -> List[List[int]]:
+        """
+        Args:
+            elements: List of elements
+        Returns:
+            List of groups, each group is list of overlapping element indices
+        """
+        n = len(elements)
+        visited = [False] * n
+        groups = []
+        
+        for i in range(n):
+            if visited[i]:
+                continue
+            
+            elem_i = elements[i]
+            bbox_i = elem_i.get('bbox', {})
+            if not bbox_i:
+                groups.append([i])
+                visited[i] = True
+                continue
+            
+            group = [i]
+            visited[i] = True
+            
+            for j in range(i + 1, n):
+                if visited[j]:
+                    continue
+                
+                elem_j = elements[j]
+                bbox_j = elem_j.get('bbox', {})
+                if not bbox_j:
+                    continue
+                
+                if self._should_group(elem_i, elem_j, bbox_i, bbox_j):
+                    group.append(j)
+                    visited[j] = True
+            
+            groups.append(group)
+        
+        return groups
+    
+    def _should_group(
+        self,
+        elem1: Dict[str, Any],
+        elem2: Dict[str, Any],
+        bbox1: Dict[str, int],
+        bbox2: Dict[str, int]
+    ) -> bool:
+        """Check if two elements should be grouped together"""
+        if self._is_container(elem1) or self._is_container(elem2):
+            return False
+        
+        if not self._is_interactive(elem1) and not self._is_interactive(elem2):
+            return False
+        
+        return self._has_significant_overlap(bbox1, bbox2)
+    
+    def _is_container(self, elem: Dict[str, Any]) -> bool:
+        """Check if element is a layout container"""
+        class_name = elem.get('class', '')
+        return any(c in class_name for c in self.CONTAINER_CLASSES)
+    
+    def _has_significant_overlap(
+        self,
+        bbox1: Dict[str, int],
+        bbox2: Dict[str, int]
+    ) -> bool:
+        """Check if two bboxes overlap significantly"""
+        x1 = bbox1.get('x', 0)
+        y1 = bbox1.get('y', 0)
+        w1 = bbox1.get('width', 0)
+        h1 = bbox1.get('height', 0)
+        
+        x2 = bbox2.get('x', 0)
+        y2 = bbox2.get('y', 0)
+        w2 = bbox2.get('width', 0)
+        h2 = bbox2.get('height', 0)
+        
+        x_left = max(x1, x2)
+        y_top = max(y1, y2)
+        x_right = min(x1 + w1, x2 + w2)
+        y_bottom = min(y1 + h1, y2 + h2)
+        
+        if x_right < x_left or y_bottom < y_top:
+            return False
+        
+        intersection = (x_right - x_left) * (y_bottom - y_top)
+        area1 = w1 * h1
+        area2 = w2 * h2
+        
+        if area1 == 0 or area2 == 0:
+            return False
+        
+        overlap_ratio = max(intersection / area1, intersection / area2)
+        return overlap_ratio > self._overlap_threshold
+    
+    def _select_best_from_group(
+        self,
+        elements: List[Dict[str, Any]],
+        group_indices: List[int]
+    ) -> Optional[int]:
+        """
+        Args:
+            elements: All elements
+            group_indices: Indices of parent and children
+        Returns:
+            Index of best element to keep
+        """
+        if not group_indices:
+            return None
+        
+        parent_idx = group_indices[0]
+        parent = elements[parent_idx]
+        
+        if self._prefer_parent_when_clickable:
+            if self._is_interactive(parent):
+                return parent_idx
+        
+        best_idx = parent_idx
+        best_score = self._get_relevance_score(parent)
+        
+        for idx in group_indices:
+            elem = elements[idx]
+            score = self._get_relevance_score(elem)
+            
+            if score > best_score:
+                best_score = score
+                best_idx = idx
+        
+        if best_score >= self._min_relevance_score:
+            return best_idx
+        
+        return None
+    
+    def _is_interactive(self, elem: Dict[str, Any]) -> bool:
+        """Check if element is interactive"""
+        return (
+            elem.get('clickable') == 'true' or
+            elem.get('focusable') == 'true' or
+            elem.get('long-clickable') == 'true'
+        )
+    
+    def _get_relevance_score(self, elem: Dict[str, Any]) -> int:
+        """
+        Args:
+            elem: Element dict
+        Returns:
+            Relevance score (higher = more relevant)
+        """
+        score = 0
+        
+        if elem.get('clickable') == 'true':
+            score += 20
+        if elem.get('focusable') == 'true':
+            score += 15
+        
+        class_name = elem.get('class', '')
+        if 'Button' in class_name or 'EditText' in class_name or 'ImageButton' in class_name:
+            score += 10
+        
+        if elem.get('text', '').strip():
+            score += 8
+        if elem.get('content-desc', '').strip():
+            score += 6
+        if elem.get('resource-id', '').strip():
+            score += 4
+        
+        if 'Layout' in class_name or 'ViewGroup' in class_name or 'FrameLayout' in class_name:
+            score -= 10
+        
+        bbox = elem.get('bbox', {})
+        if bbox:
+            width = bbox.get('width', 0)
+            height = bbox.get('height', 0)
+            if width < 10 or height < 10:
+                score -= 5
+        
+        return score
+
diff --git a/Agent/platforms/filters/pipeline.py b/Agent/platforms/filters/pipeline.py
new file mode 100644
index 0000000..75daef9
--- /dev/null
+++ b/Agent/platforms/filters/pipeline.py
@@ -0,0 +1,18 @@
+from typing import Any, Dict, List
+
+
+class FilterPipeline:
+    """Composable pipeline of filters."""
+    
+    def __init__(self, filters: List = None):
+        self._filters = filters or []
+    
+    def add(self, f) -> 'FilterPipeline':
+        self._filters.append(f)
+        return self
+    
+    def apply(self, elements: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+        for f in self._filters:
+            elements = f.apply(elements)
+        return elements
+
diff --git a/Agent/platforms/grounding/__init__.py b/Agent/platforms/grounding/__init__.py
new file mode 100644
index 0000000..f946a42
--- /dev/null
+++ b/Agent/platforms/grounding/__init__.py
@@ -0,0 +1,12 @@
+from Agent.platforms.grounding.som.annotator import annotate_screenshot
+from Agent.platforms.grounding.som.serializer import SomSerializer
+from Agent.platforms.grounding.text.serializer import TextSerializer
+from Agent.platforms.grounding.composer import SomComposer
+
+__all__ = [
+    'annotate_screenshot',
+    'SomSerializer',
+    'TextSerializer',
+    'SomComposer',
+]
+
diff --git a/Agent/platforms/grounding/composer.py b/Agent/platforms/grounding/composer.py
new file mode 100644
index 0000000..99b5fe7
--- /dev/null
+++ b/Agent/platforms/grounding/composer.py
@@ -0,0 +1,63 @@
+from typing import Any, Dict, List, Optional
+from Agent.platforms.grounding.som.serializer import SomSerializer
+from Agent.platforms.grounding.som.annotator import annotate_screenshot
+
+
+class SomComposer:
+    """
+    Orchestrates SoM components (visual annotation + text legend).
+    
+    Args:
+        platform: "android" (implemented), "ios"/"web" (future)
+        screen_width: Screen width in pixels
+        screen_height: Screen height in pixels
+    """
+    
+    def __init__(
+        self, 
+        platform: str = "android",
+        screen_width: int = 1080,
+        screen_height: int = 1920
+    ):
+        self.platform = platform
+        self.serializer = SomSerializer(platform, screen_width, screen_height)
+    
+    def compose(
+        self,
+        screenshot_base64: Optional[str],
+        elements: List[Dict[str, Any]],
+        config: Optional[Dict[str, Any]] = None
+    ) -> Dict[str, Any]:
+        """
+        Args:
+            screenshot_base64: Base64 screenshot (required if visual_annotation=True)
+            elements: List of UI elements
+            config: {visual_annotation, text_format, output_type, include_screenshot}
+        Returns:
+            {annotated_image_base64, elements_json, text_legend}
+        """
+        if config is None:
+            config = {}
+        
+        visual_annotation = config.get('visual_annotation', True)
+        text_format = config.get('text_format', 'compact')
+        output_type = config.get('output_type', 'text')
+        include_screenshot = config.get('include_screenshot', True)
+        
+        result = {}
+        
+        if visual_annotation:
+            if not screenshot_base64:
+                raise ValueError("screenshot_base64 required when visual_annotation=True")
+            
+            annotated_image = annotate_screenshot(screenshot_base64, elements)
+            if include_screenshot:
+                result['annotated_image_base64'] = annotated_image
+        
+        if output_type == 'json':
+            result['elements_json'] = self.serializer.serialize(elements, output_type='json')
+        elif output_type == 'text':
+            result['text_legend'] = self.serializer.serialize(elements, format=text_format, output_type='text')
+        
+        return result
+
diff --git a/Agent/platforms/grounding/som/__init__.py b/Agent/platforms/grounding/som/__init__.py
new file mode 100644
index 0000000..ab5575b
--- /dev/null
+++ b/Agent/platforms/grounding/som/__init__.py
@@ -0,0 +1,5 @@
+from Agent.platforms.grounding.som.annotator import annotate_screenshot
+from Agent.platforms.grounding.som.serializer import SomSerializer
+
+__all__ = ['annotate_screenshot', 'SomSerializer']
+
diff --git a/Agent/platforms/collectors/som_renderer.py b/Agent/platforms/grounding/som/annotator.py
similarity index 68%
rename from Agent/platforms/collectors/som_renderer.py
rename to Agent/platforms/grounding/som/annotator.py
index ac7a33a..54aafba 100644
--- a/Agent/platforms/collectors/som_renderer.py
+++ b/Agent/platforms/grounding/som/annotator.py
@@ -1,5 +1,5 @@
 """
-Set-of-Mark (SoM) Renderer.
+SoM Visual Annotator.
 
 Draws numbered bounding boxes on screenshots for visual grounding.
 """
@@ -11,31 +11,23 @@
 from PIL import Image, ImageDraw, ImageFont
 
 
-# Colors for different sources
-COLOR_DOM = (34, 197, 94)        # Green for DOM elements (has locator)
-COLOR_OMNIPARSER = (249, 115, 22)  # Orange for OmniParser (click-only)
-COLOR_DEFAULT = (59, 130, 246)   # Blue default
+COLOR_DOM = (34, 197, 94)
+COLOR_OMNIPARSER = (249, 115, 22)
+COLOR_DEFAULT = (59, 130, 246)
 
 
-def render_som(
+def annotate_screenshot(
     screenshot_base64: str,
     elements: List[Dict[str, Any]],
     source_key: str = "source",
 ) -> str:
     """
-    Draw numbered bounding boxes on screenshot.
-    
     Args:
         screenshot_base64: Base64 encoded PNG/JPEG
-        elements: List with 'bbox' key {x, y, width, height} and optional source
-        source_key: Key to check for source type ("dom" or "omniparser")
-    
+        elements: List with 'bbox' key {x, y, width, height}
+        source_key: Key to check for source type
     Returns:
         Base64 of annotated image
-    
-    Example:
-        >>> elements = [{'text': 'Search', 'bbox': {'x': 10, 'y': 20, 'width': 100, 'height': 30}, 'source': 'dom'}]
-        >>> annotated = render_som(screenshot_b64, elements)
     """
     img_bytes = base64.b64decode(screenshot_base64)
     img = Image.open(io.BytesIO(img_bytes)).convert("RGBA")
@@ -44,10 +36,19 @@ def render_som(
     draw = ImageDraw.Draw(overlay)
     
     try:
-        #TODO: fix this for windows and linux ( pixelized font on those OS )
-        font = ImageFont.truetype("/System/Library/Fonts/Helvetica.ttc", 14)
-        font_large = ImageFont.truetype("/System/Library/Fonts/Helvetica.ttc", 24)
-    except:
+        import platform
+        system = platform.system()
+        
+        if system == "Darwin":
+            font = ImageFont.truetype("/System/Library/Fonts/Helvetica.ttc", 14)
+            font_large = ImageFont.truetype("/System/Library/Fonts/Helvetica.ttc", 24)
+        elif system == "Windows":
+            font = ImageFont.truetype("arial.ttf", 14)
+            font_large = ImageFont.truetype("arial.ttf", 24)
+        else:
+            font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", 14)
+            font_large = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", 24)
+    except Exception:
         font = ImageFont.load_default()
         font_large = font
     
@@ -67,21 +68,28 @@ def render_som(
         source = element.get(source_key, "dom")
         color = COLOR_DOM if source == "dom" else COLOR_OMNIPARSER if source == "omniparser" else COLOR_DEFAULT
         
-        # Apply margin to create visual spacing between boxes
         margin = 4
+        if w <= 2 * margin:
+            margin = max(0, w // 2 - 1)
+        if h <= 2 * margin:
+            margin = min(margin, max(0, h // 2 - 1))
+        
         box_x1 = x + margin
         box_y1 = y + margin
         box_x2 = x + w - margin
         box_y2 = y + h - margin
         
-        # Draw box with transparency
+        if box_x2 <= box_x1:
+            box_x2 = box_x1 + 1
+        if box_y2 <= box_y1:
+            box_y2 = box_y1 + 1
+        
         draw.rectangle(
             [box_x1, box_y1, box_x2, box_y2],
             outline=color + (255,),
             width=2
         )
         
-        # Draw label background (top-left inside box)
         label = str(idx)
         label_bbox = draw.textbbox((0, 0), label, font=font_large)
         label_w = label_bbox[2] - label_bbox[0] + 12
@@ -95,7 +103,6 @@ def render_som(
             fill=color + (230,)
         )
         
-        # Draw label text with stroke for better contrast
         draw.text(
             (label_x + 6, label_y + 4),
             label,
@@ -121,7 +128,3 @@ def bbox_center(bbox: Dict[str, int]) -> Tuple[int, int]:
     h = bbox.get("height", 0)
     return (x + w // 2, y + h // 2)
 
-
-
-
-
diff --git a/Agent/platforms/grounding/som/serializer.py b/Agent/platforms/grounding/som/serializer.py
new file mode 100644
index 0000000..d0557b5
--- /dev/null
+++ b/Agent/platforms/grounding/som/serializer.py
@@ -0,0 +1,148 @@
+from typing import Any, Dict, List
+import json
+
+
+class SomSerializer:
+    """
+    Serializes UI elements as text or JSON for SoM prompts.
+    
+    Args:
+        platform: "android" (implemented), "ios"/"web" (future)
+        screen_width: Screen width in pixels
+        screen_height: Screen height in pixels
+    """
+    
+    def __init__(self, platform: str = "android", screen_width: int = 1080, screen_height: int = 1920):
+        self.platform = platform
+        self.screen_width = screen_width
+        self.screen_height = screen_height
+    
+    def serialize(
+        self, 
+        elements: List[Dict[str, Any]], 
+        format: str = "compact",
+        output_type: str = "text"
+    ) -> str:
+        """
+        Args:
+            elements: List of UI elements
+            format: "compact", "detailed", "minimal" (for text mode)
+            output_type: "text" or "json"
+        Returns:
+            Formatted string
+        """
+        if output_type == "json":
+            return self._to_json(elements)
+        else:
+            return self._to_text(elements, format)
+    
+    def _to_text(self, elements: List[Dict[str, Any]], format: str) -> str:
+        if not elements:
+            return "(no elements)"
+        
+        lines = []
+        for idx, elem in enumerate(elements, start=1):
+            if format == "minimal":
+                line = self._minimal(idx, elem)
+            elif format == "detailed":
+                line = self._detailed(idx, elem)
+            else:
+                line = self._compact(idx, elem)
+            lines.append(line)
+        
+        return "\n".join(lines)
+    
+    def _minimal(self, idx: int, elem: Dict[str, Any]) -> str:
+        text = self._get_text(elem)
+        return f"[{idx}] {text}" if text else f"[{idx}] (no text)"
+    
+    def _compact(self, idx: int, elem: Dict[str, Any]) -> str:
+        class_name = elem.get("class_name") or elem.get("class", "")
+        short_class = class_name.split('.')[-1] if '.' in class_name else class_name
+        text = self._get_text(elem)
+        position = self._get_position(elem.get("bbox", {}))
+        return f"[{idx}] {short_class}: {text} @{position}"
+    
+    def _detailed(self, idx: int, elem: Dict[str, Any]) -> str:
+        parts = []
+        class_name = elem.get("class_name") or elem.get("class", "")
+        short_class = class_name.split('.')[-1] if '.' in class_name else class_name
+        parts.append(f"[{idx}] {short_class}")
+        
+        text = self._get_text(elem)
+        if text:
+            parts.append(f"text='{text}'")
+        
+        resource_id = self._get_resource_id(elem)
+        if resource_id:
+            parts.append(f"id='{resource_id}'")
+        
+        content_desc = self._get_content_desc(elem)
+        if content_desc:
+            parts.append(f"desc='{content_desc}'")
+        
+        bbox = elem.get("bbox", {})
+        if bbox:
+            position = self._get_position(bbox)
+            w = bbox.get("width", 0)
+            h = bbox.get("height", 0)
+            parts.append(f"pos={position} size={w}x{h}")
+        
+        return " | ".join(parts)
+    
+    def _to_json(self, elements: List[Dict[str, Any]]) -> str:
+        if self.platform != "android":
+            raise NotImplementedError(f"JSON not implemented for: {self.platform}")
+        
+        boxes = []
+        for idx, elem in enumerate(elements, start=1):
+            bbox_norm = elem.get("bbox_normalized", {})
+            if not bbox_norm:
+                bbox_norm = self._normalize(elem.get("bbox", {}))
+            
+            boxes.append({
+                "mark_id": idx,
+                "class_name": elem.get("class_name") or elem.get("class", ""),
+                "text": self._get_text(elem) or "",
+                "resource_id": self._get_resource_id(elem) or "",
+                "content_desc": self._get_content_desc(elem) or "",
+                "bbox": bbox_norm
+            })
+        
+        return json.dumps({
+            "screen": {"width": self.screen_width, "height": self.screen_height},
+            "som_version": "1.0",
+            "boxes": boxes
+        }, indent=2, ensure_ascii=False)
+    
+    def _normalize(self, bbox: Dict[str, int]) -> Dict[str, float]:
+        if not bbox or self.screen_width <= 0 or self.screen_height <= 0:
+            return {}
+        return {
+            'x': round(bbox.get('x', 0) / self.screen_width, 4),
+            'y': round(bbox.get('y', 0) / self.screen_height, 4),
+            'width': round(bbox.get('width', 0) / self.screen_width, 4),
+            'height': round(bbox.get('height', 0) / self.screen_height, 4),
+        }
+    
+    def _get_text(self, elem: Dict[str, Any]) -> str:
+        text = elem.get("text", "")
+        if isinstance(text, str):
+            return text.replace("\n", " ").strip()[:40]
+        return ""
+    
+    def _get_resource_id(self, elem: Dict[str, Any]) -> str:
+        return elem.get("resource_id") or elem.get("resource-id", "")
+    
+    def _get_content_desc(self, elem: Dict[str, Any]) -> str:
+        return elem.get("content_desc") or elem.get("content-desc", "")
+    
+    def _get_position(self, bbox: Dict[str, int]) -> str:
+        if not bbox:
+            return "unknown"
+        y = bbox.get("y", 0)
+        x = bbox.get("x", 0)
+        pos = "top" if y < 400 else "mid" if y < 1200 else "bot"
+        side = "L" if x < 300 else "C" if x < 700 else "R"
+        return f"{pos}-{side}"
+
diff --git a/Agent/platforms/grounding/text/__init__.py b/Agent/platforms/grounding/text/__init__.py
new file mode 100644
index 0000000..6e02dc7
--- /dev/null
+++ b/Agent/platforms/grounding/text/__init__.py
@@ -0,0 +1,4 @@
+from Agent.platforms.grounding.text.serializer import TextSerializer
+
+__all__ = ['TextSerializer']
+
diff --git a/Agent/platforms/grounding/text/serializer.py b/Agent/platforms/grounding/text/serializer.py
new file mode 100644
index 0000000..c7407dd
--- /dev/null
+++ b/Agent/platforms/grounding/text/serializer.py
@@ -0,0 +1,109 @@
+from typing import Any, Dict, List
+
+
+class TextSerializer:
+    """Serializes UI elements as numbered text list."""
+    
+    def serialize(self, elements: List[Dict[str, Any]], platform: str = "android") -> str:
+        """
+        Args:
+            elements: List of UI element dictionaries
+            platform: 'android', 'ios', or 'web'
+        Returns:
+            Formatted numbered text
+        """
+        if not elements:
+            return "(no elements)"
+        
+        # is_mobile = platform in ("android", "ios")
+        # max_items = 50 if is_mobile else 150
+        max_items = 50
+        
+        lines = []
+        for i, el in enumerate(elements[:max_items], 1):
+            # if platform == "ios":
+            #     line = self._ios(i, el)
+            # elif platform == "android":
+            #     line = self._android(i, el)
+            # else:
+            #     line = self._web(i, el)
+            line = self._android(i, el)
+            lines.append(line)
+        
+        return "\n".join(lines)
+    
+    # def _web(self, idx: int, el: Dict[str, Any]) -> str:
+    #     parts = []
+    #     tag = el.get('class_name', '') or el.get('tag', 'unknown')
+    #     elem_type = el.get('type', '')
+    #     if elem_type and elem_type not in ['text', '']:
+    #         parts.append(f"<{tag} type='{elem_type}'>")
+    #     else:
+    #         parts.append(f"<{tag}>")
+    #     
+    #     if el.get("aria_label"):
+    #         parts.append(f"aria-label='{el['aria_label']}'")
+    #     if el.get("placeholder"):
+    #         parts.append(f"placeholder='{el['placeholder']}'")
+    #     if el.get("text"):
+    #         parts.append(f"text='{el['text']}'")
+    #     if el.get("resource_id"):
+    #         parts.append(f"id='{el['resource_id']}'")
+    #     if el.get("name"):
+    #         parts.append(f"name='{el['name']}'")
+    #     
+    #     return f"{idx}. {' | '.join(parts)}"
+    
+    def _android(self, idx: int, el: Dict[str, Any]) -> str:
+        parts = []
+        class_name = el.get('class_name') or el.get('class', 'unknown')
+        short_class = class_name.split('.')[-1] if '.' in class_name else class_name
+        parts.append(f"[{short_class}]")
+        
+        if el.get("text"):
+            parts.append(f"text='{el['text']}'")
+        if el.get("resource_id") or el.get("resource-id"):
+            parts.append(f"id='{el.get('resource_id') or el.get('resource-id')}'")
+        
+        content_desc = el.get("accessibility_label", '') or el.get("content_desc", '') or el.get("content-desc", '')
+        if content_desc:
+            parts.append(f"desc='{content_desc}'")
+        
+        bbox = el.get("bbox", {})
+        if bbox:
+            y = bbox.get("y", 0)
+            x = bbox.get("x", 0)
+            w = bbox.get("width", 0)
+            h = bbox.get("height", 0)
+            pos = "top" if y < 400 else "middle" if y < 1200 else "bottom"
+            side = "left" if x < 300 else "center" if x < 700 else "right"
+            parts.append(f"pos={pos}-{side} size={w}x{h}")
+        
+        return f"{idx}. {' | '.join(parts)}"
+    
+    # def _ios(self, idx: int, el: Dict[str, Any]) -> str:
+    #     parts = []
+    #     class_name = el.get('class_name', 'unknown')
+    #     short_class = class_name.replace('XCUIElementType', '') if 'XCUIElementType' in class_name else class_name
+    #     parts.append(f"[{short_class}]")
+    #     
+    #     if el.get("text"):
+    #         parts.append(f"text='{el['text']}'")
+    #     if el.get("resource_id"):
+    #         parts.append(f"name='{el['resource_id']}'")
+    #     
+    #     label = el.get("accessibility_label", '') or el.get("label", '')
+    #     if label:
+    #         parts.append(f"label='{label}'")
+    #     
+    #     bbox = el.get("bbox", {})
+    #     if bbox:
+    #         y = bbox.get("y", 0)
+    #         x = bbox.get("x", 0)
+    #         w = bbox.get("width", 0)
+    #         h = bbox.get("height", 0)
+    #         pos = "top" if y < 400 else "middle" if y < 1200 else "bottom"
+    #         side = "left" if x < 300 else "center" if x < 700 else "right"
+    #         parts.append(f"pos={pos}-{side} size={w}x{h}")
+    #     
+    #     return f"{idx}. {' | '.join(parts)}"
diff --git a/Agent/platforms/locators/__init__.py b/Agent/platforms/locators/__init__.py
index 2bf4991..985b634 100644
--- a/Agent/platforms/locators/__init__.py
+++ b/Agent/platforms/locators/__init__.py
@@ -1,3 +1,3 @@
-from Agent.platforms.locators.mobile import MobileLocatorBuilder
+from Agent.platforms.locators.mobile_locator import MobileLocatorBuilder
 
 __all__ = ["MobileLocatorBuilder"]
diff --git a/Agent/platforms/locators/android_locator.py b/Agent/platforms/locators/android_locator.py
new file mode 100644
index 0000000..6affa63
--- /dev/null
+++ b/Agent/platforms/locators/android_locator.py
@@ -0,0 +1,177 @@
+from typing import Any, Dict
+
+
+class AndroidLocatorBuilder:
+    """Builds Appium locators for Android elements."""
+    
+    def build(self, element: Dict[str, Any], strategy: str = 'auto') -> str:
+        """
+        Args:
+            element: Dict with raw XML attributes
+            strategy: 'auto' | 'id_only' | 'bounds' | 'xpath_attrs' | 'xpath_all'
+        Returns:
+            Appium locator string
+        Example: build(elem, 'id_only') -> 'id=com.android:id/button'
+        """
+        if strategy == 'auto':
+            return self._build_locator_unique_content(element)
+        elif strategy == 'id_only':
+            return self.build_identifiers_only(element)
+        elif strategy == 'bounds':
+            return self.build_by_bounds(element)
+        elif strategy == 'xpath_attrs':
+            return self.build_xpath_attributes(element)
+        elif strategy == 'xpath_all':
+            return self.build_xpath_all(element)
+        else:
+            raise ValueError(f"Unknown strategy: {strategy}")
+    
+    def _build_locator_unique_content(self, element: Dict[str, Any]) -> str:
+        resource_id = self._get_str(element, 'resource-id')
+        if resource_id:
+            return f"id={resource_id}"
+        
+        content_desc = self._get_str(element, 'content-desc')
+        if content_desc:
+            return f"accessibility_id={content_desc}"
+        
+        text = self._get_str(element, 'text')
+        if text:
+            return f"//*[@text={self._escape_xpath(text)}]"
+        
+        raise AssertionError("Cannot build locator: no usable attributes")
+    
+    #TODO: see if this should be private after adding locator strategies ( build )
+    def build_identifiers_only(self, element: Dict[str, Any]) -> str:
+        """
+        Args:
+            element: Dict with raw XML attributes
+        Returns:
+            Identifiers only: resource-id > content-desc, raise if none
+        Example: 'id=com.android:id/button' or 'accessibility_id=Navigate up'
+        """
+        content_desc = self._get_str(element, 'content-desc')
+        if content_desc:
+            return f"accessibility_id={content_desc}"
+        
+        resource_id = self._get_str(element, 'resource-id')
+        if resource_id:
+            return f"id={resource_id}"
+        
+        raise ValueError("No ID attributes available")
+    
+    def build_by_bounds(self, element: Dict[str, Any]) -> str:
+        """
+        Args:
+            element: Dict with raw XML attributes
+        Returns:
+            XPath with bounds attribute
+        Example: '//*[@bounds="[0,72][1080,200]"]'
+        """
+        bounds = self._get_str(element, 'bounds')
+        if not bounds:
+            raise ValueError("No bounds attribute available")
+        
+        class_name = self._get_str(element, 'class')
+        base = f"//{class_name}" if class_name else "//*"
+        
+        return f"{base}[@bounds='{bounds}']"
+
+    def build_xpath_attributes(self, element: Dict[str, Any]) -> str:
+        """
+        Args:
+            element: Dict with raw XML attributes
+        Returns:
+            XPath with content attributes (resource-id, content-desc, text)
+        Example: '//Button[@resource-id="btn" and @text="Login"]'
+        """
+        return self._build_full_xpath(element, exclude_metadata=True)
+    
+    def build_xpath_all(self, element: Dict[str, Any]) -> str:
+        """
+        Args:
+            element: Dict with raw XML attributes
+        Returns:
+            XPath with ALL attributes including metadata (clickable, enabled, etc.)
+        Example: '//Button[@resource-id="btn" and @clickable="true"]'
+        """
+        return self._build_full_xpath(element, exclude_metadata=False)
+
+    def _build_full_xpath(
+        self, 
+        element: Dict[str, Any],
+        exclude_metadata: bool = True
+    ) -> str:
+        """
+        Args:
+            element: Dict with raw XML attributes
+            exclude_metadata: If True, exclude only computed (bbox, elementId, package)
+                             If False, also exclude bool/numeric values (except bounds)
+        Returns:
+            XPath combining selected attributes dynamically
+        """
+        excluded_base = {'bbox', 'elementId', 'package'}
+        
+        conditions = []
+        class_name = self._get_str(element, 'class')
+        
+        for key, value in element.items():
+            if key == 'class':
+                continue
+            
+            if key in excluded_base:
+                continue
+            
+            val_str = str(value).strip() if value else ''
+            if not val_str:
+                continue
+            
+            if not exclude_metadata:
+                if key != 'bounds':
+                    if val_str in ('true', 'false'):
+                        continue
+                    if val_str.isdigit():
+                        continue
+            
+            conditions.append(f"@{key}={self._escape_xpath(val_str)}")
+        
+        base = f"//{class_name}" if class_name else "//*"
+        
+        if not conditions:
+            if class_name:
+                return base
+            raise ValueError("No attributes available")
+        
+        return f"{base}[{' and '.join(conditions)}]"
+    
+    def _get_str(self, element: Dict[str, Any], key: str) -> str:
+        val = element.get(key, '')
+        return str(val).strip() if val else ''
+    
+    def _escape_xpath(self, value: str) -> str:
+        """
+        Args:
+            value: "It's a test"
+        Returns:
+            concat('It', \"'\", 's a test') or 'simple'
+        """
+        if "'" not in value:
+            return f"'{value}'"
+        if '"' not in value:
+            return f'"{value}"'
+        
+        parts = []
+        current = ""
+        for char in value:
+            if char == "'":
+                if current:
+                    parts.append(f"'{current}'")
+                    current = ""
+                parts.append("\"'\"")
+            else:
+                current += char
+        if current:
+            parts.append(f"'{current}'")
+        
+        return f"concat({', '.join(parts)})"
+
diff --git a/Agent/platforms/locators/ios_locator.py b/Agent/platforms/locators/ios_locator.py
new file mode 100644
index 0000000..307940b
--- /dev/null
+++ b/Agent/platforms/locators/ios_locator.py
@@ -0,0 +1,22 @@
+from typing import Any, Dict
+
+
+class IOSLocatorBuilder:
+    """Builds Appium locators for iOS elements."""
+    
+    def build(self, element: Dict[str, Any], strategy: str = 'auto') -> str:
+        raise NotImplementedError("iOS locator builder not implemented yet")
+    
+    #TODO: see if this should be private after adding locator strategies ( build )
+    def build_identifiers_only(self, element: Dict[str, Any]) -> str:
+        raise NotImplementedError("iOS locator builder not implemented yet")
+    
+    def build_by_bounds(self, element: Dict[str, Any]) -> str:
+        raise NotImplementedError("iOS locator builder not implemented yet")
+    
+    def build_xpath_attributes(self, element: Dict[str, Any]) -> str:
+        raise NotImplementedError("iOS locator builder not implemented yet")
+    
+    def build_xpath_all(self, element: Dict[str, Any]) -> str:
+        raise NotImplementedError("iOS locator builder not implemented yet")
+
diff --git a/Agent/platforms/locators/mobile.py b/Agent/platforms/locators/mobile.py
deleted file mode 100644
index a6fc8d3..0000000
--- a/Agent/platforms/locators/mobile.py
+++ /dev/null
@@ -1,81 +0,0 @@
-from typing import Any, Dict
-
-
-class MobileLocatorBuilder:
-    """Builds Appium locators for Android and iOS."""
-    
-    def __init__(self, platform: str = "android"):
-        self._platform = platform
-    
-    def set_platform(self, platform: str) -> None:
-        self._platform = platform
-    
-    def build(self, element: Dict[str, Any]) -> str:
-        """Dispatch to platform-specific method."""
-        if self._platform == "ios":
-            return self.build_ios(element)
-        return self.build_android(element)
-    
-    def build_android(self, element: Dict[str, Any]) -> str:
-        """
-        Build XPath locator combining all available attributes.
-        
-        Returns: "//*[@resource-id='x' and @content-desc='y' and @text='z']"
-        """
-        resource_id = element.get('resource_id', '').strip()
-        acc_label = element.get('accessibility_label', '') or element.get('content_desc', '')
-        acc_label = acc_label.strip() if acc_label else ''
-        text = element.get('text', '').strip()
-        class_name = element.get('class_name', '').strip()
-        
-        conditions = []
-        
-        if resource_id:
-            conditions.append(f"@resource-id='{resource_id}'")
-        
-        if acc_label:
-            conditions.append(f"@content-desc='{acc_label}'")
-        
-        if text:
-            conditions.append(f"@text='{text}'")
-        
-        if not conditions:
-            if class_name:
-                return f"//{class_name}"
-            raise AssertionError("Cannot build locator: element has no usable attributes")
-        
-        base = f"//{class_name}" if class_name else "//*"
-        return f"{base}[{' and '.join(conditions)}]"
-    
-    def build_ios(self, element: Dict[str, Any]) -> str:
-        """
-        Build iOS predicate string combining all available attributes.
-        
-        Returns: "-ios predicate string:name == 'x' AND label == 'y'"
-        """
-        resource_id = element.get('resource_id', '').strip()
-        acc_label = element.get('accessibility_label', '') or element.get('label', '')
-        acc_label = acc_label.strip() if acc_label else ''
-        text = element.get('text', '').strip()
-        class_name = element.get('class_name', '').strip()
-        
-        conditions = []
-        
-        if resource_id:
-            conditions.append(f"name == '{resource_id}'")
-        
-        if acc_label:
-            escaped = acc_label.replace("'", "\\'")
-            conditions.append(f"label == '{escaped}'")
-        
-        if text:
-            escaped = text.replace("'", "\\'")
-            conditions.append(f"value == '{escaped}'")
-        
-        if class_name:
-            conditions.append(f"type == '{class_name}'")
-        
-        if not conditions:
-            raise AssertionError("Cannot build locator: element has no usable attributes")
-        
-        return f"-ios predicate string:{' AND '.join(conditions)}"
diff --git a/Agent/platforms/locators/mobile_locator.py b/Agent/platforms/locators/mobile_locator.py
new file mode 100644
index 0000000..9a08c5b
--- /dev/null
+++ b/Agent/platforms/locators/mobile_locator.py
@@ -0,0 +1,38 @@
+from typing import Any, Dict, Literal
+
+StrategyType = Literal['auto', 'id_only', 'bounds', 'xpath_attrs', 'xpath_all']
+
+
+class MobileLocatorBuilder:
+    """Facade that dispatches to platform-specific locator builders with lazy init and flexible platform."""
+    
+    def __init__(self, platform: str = None):
+        self._platform = platform
+        self._builder = None
+    
+    def set_platform(self, platform: str):
+        if self._platform != platform:
+            self._platform = platform
+            self._builder = None
+    
+    def _get_builder(self):
+        if self._builder is None:
+            if self._platform == 'ios':
+                from Agent.platforms.locators.ios_locator import IOSLocatorBuilder
+                self._builder = IOSLocatorBuilder()
+            else:
+                from Agent.platforms.locators.android_locator import AndroidLocatorBuilder
+                self._builder = AndroidLocatorBuilder()
+        return self._builder
+    
+    def build(self, element: Dict[str, Any], strategy: StrategyType = 'auto') -> str:
+        """
+        Args:
+            element: Dict with raw XML attributes
+            strategy: 'auto' | 'id_only' | 'bounds' | 'xpath_attrs' | 'xpath_all'
+        Returns:
+            Appium locator string
+        Example: build(elem, 'id_only') -> 'id=com.android:id/button'
+        """
+        return self._get_builder().build(element, strategy=strategy)
+