diff --git a/Dockerfile b/Dockerfile
index 60e0cba4bf..3f9343429f 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -12,6 +12,7 @@ RUN apt-get update && \
     apt-get install -y --no-install-recommends $(grep -vE "^\s*#" apt.txt | tr "\n" " ") && \
     apt-get install libpq-dev postgresql-client -y --no-install-recommends && \
     apt-get install poppler-utils -y && \
+    apt-get install default-jre -y && \
     apt-get clean && \
     apt-get purge &&  \
     rm -rf /var/lib/apt/lists/*
diff --git a/learning_resources/converters/__init__.py b/learning_resources/converters/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/learning_resources/converters/opendataloader_llm_converter.py b/learning_resources/converters/opendataloader_llm_converter.py
new file mode 100644
index 0000000000..a6df9f5060
--- /dev/null
+++ b/learning_resources/converters/opendataloader_llm_converter.py
@@ -0,0 +1,766 @@
+"""
+PDF to Markdown converter using OpenDataLoader JSON output and LLM-based OCR.
+
+Strategy:
+1. Parse PDF structure.
+2. Calculate a "Math Density Score" for each page .
+3. Decision Logic:
+   - High Score (> Threshold): OCR the entire page (preserves complex layout/formulas).
+   - Low Score: Use standard parsed text and only OCR specific embedded images.
+"""
+
+import base64
+import gc
+import json
+import logging
+import re
+import tempfile
+import uuid
+from collections import defaultdict
+from dataclasses import dataclass, field
+from io import BytesIO
+from pathlib import Path
+from typing import Any
+
+import cv2
+import litellm
+import numpy as np
+import opendataloader_pdf
+import pdf2image
+from django.conf import settings
+from litellm import batch_completion
+from PIL import Image
+
+log = logging.getLogger(__name__)
+
+# --- Configuration ---
+MIN_IMAGE_DIMENSION = 32
+MIN_IMAGE_RATIO = 12
+IMAGE_BATCH_SIZE = 10
+PDF_POINTS_PER_INCH = 72
+
+# Score > 5 triggers full page OCR.
+
+
+MATH_FONTS = {"cmmi", "cmsy", "cmex", "msbm", "msam", "eufm", "dsrom", "wasy", "stmary"}
+
+
+BASIC_MATH_SYMBOLS = re.compile(r"[+=<>≤≥≠±\-]")
+# Regex for "Complex" math (Integrals, Sums, Keywords)
+COMPLEX_MATH_REGEX = re.compile(
+    r"([∑∏∫∂∇√∞∀∃∈⊂∪→⇒⇔αβγδεθλμπστφωΩΓΛΨ]|\b(lim|sin|cos|tan|log|ln|det|mod)\b)"  # noqa: RUF001
+)
+
+
+# --- Content Block Types (from JsonName.java) ---
+class BlockType:
+    IMAGE = "image"
+    LIST_ITEM = "list item"
+    LINE = "line"
+    TABLE = "table"
+    TEXT_BLOCK = "text block"
+    LIST = "list"
+    TABLE_CELL = "table cell"
+    TABLE_ROW = "table row"
+    PARAGRAPH = "paragraph"
+    HEADING = "heading"
+    TEXT_CHUNK = "text chunk"
+    FULL_PAGE_OCR = "full_page_ocr"
+
+
+@dataclass
+class TableCell:
+    """Represents a cell in a table."""
+
+    content: str | None = None
+    column_number: int = 0
+    row_number: int = 0
+    column_span: int = 1
+    row_span: int = 1
+    kids: list[Any] = field(default_factory=list)
+
+
+@dataclass
+class TableRow:
+    """Represents a row in a table."""
+
+    row_number: int = 0
+    cells: list[TableCell] = field(default_factory=list)
+
+
+@dataclass
+class ListItem:
+    """Represents an item in a list."""
+
+    content: str | None = None
+    kids: list[Any] = field(default_factory=list)
+
+
+@dataclass
+class ContentBlock:
+    block_type: str
+    block_id: int
+    page_number: int
+    bounding_box: list[float]
+    content: str | None = None
+    heading_level: int | None = None
+    font: str | None = None
+    # Table-specific fields
+    rows: list[TableRow] = field(default_factory=list)
+    number_of_rows: int = 0
+    number_of_columns: int = 0
+    # List-specific fields
+    list_items: list[ListItem] = field(default_factory=list)
+    numbering_style: str | None = None
+    # Nested content (for complex structures)
+    kids: list[Any] = field(default_factory=list)
+
+
+@dataclass
+class ImageForOCR:
+    block_id: int
+    pil_image: Image.Image
+    is_full_page: bool
+
+
+def _image_to_base64_uri(pil_image: Image.Image) -> str:
+    buffer = BytesIO()
+    pil_image.save(buffer, format="JPEG", optimize=True)
+    image_b64 = base64.b64encode(buffer.getvalue()).decode("utf-8")
+    return f"data:image/jpeg;base64,{image_b64}"
+
+
+def _build_ocr_message(image_uri: str, prompt: str) -> list[dict]:
+    return [
+        {
+            "role": "user",
+            "content": [
+                {"type": "text", "text": prompt},
+                {"type": "image_url", "image_url": {"url": image_uri}},
+            ],
+        }
+    ]
+
+
+def _is_valid_image_dimensions(width: int, height: int) -> bool:
+    """Check if image dimensions are valid for processing."""
+    if width < MIN_IMAGE_DIMENSION or height < MIN_IMAGE_DIMENSION:
+        return False
+
+    # Avoid extremely thin strips (often separator lines)
+    if height > 0:
+        aspect_ratio = width / height
+        if aspect_ratio > MIN_IMAGE_RATIO or aspect_ratio < (1 / MIN_IMAGE_RATIO):
+            return False
+
+    return True
+
+
+def _optimize_image(pil_image: Image.Image) -> Image.Image:
+    if pil_image.mode != "RGB":
+        pil_image = pil_image.convert("RGB")
+    np_image = np.array(pil_image)
+    UNIQUE_COLOR_THRESHOLD = 200
+    unique_colors = len(np.unique(np_image.reshape(-1, 3), axis=0))
+    if unique_colors > UNIQUE_COLOR_THRESHOLD:
+        processed = cv2.cvtColor(np_image, cv2.COLOR_RGB2GRAY)
+        processed = cv2.GaussianBlur(processed, (3, 3), 0)
+        return Image.fromarray(processed)
+    else:
+        gray = cv2.cvtColor(np_image, cv2.COLOR_RGB2GRAY)
+        _, processed = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
+        return Image.fromarray(processed)
+
+
+class PDFPageRenderer:
+    def __init__(self, document_path: Path, dpi: int = 150):
+        self.document_path = document_path
+        self.dpi = dpi
+        self._page_cache: dict[int, Image.Image] = {}
+        self._scale = dpi / PDF_POINTS_PER_INCH
+
+    def get_page_image(self, page_number: int) -> Image.Image:
+        """
+        Get a specific page from the pdf as an image
+        """
+        if page_number not in self._page_cache:
+            images = pdf2image.convert_from_path(
+                self.document_path,
+                dpi=self.dpi,
+                first_page=page_number,
+                last_page=page_number,
+            )
+            self._page_cache[page_number] = images[0]
+        return self._page_cache[page_number].copy()
+
+    def extract_region(self, page_number: int, bbox: list[float]) -> Image.Image | None:
+        """
+        Clip a specific region on a page as an image
+        """
+        page_image = self.get_page_image(page_number)
+        page_width, page_height = page_image.size
+
+        pdf_x1, pdf_y1, pdf_x2, pdf_y2 = bbox
+        pil_x1 = int(pdf_x1 * self._scale)
+        pil_x2 = int(pdf_x2 * self._scale)
+        pil_y1 = int(page_height - (pdf_y2 * self._scale))
+        pil_y2 = int(page_height - (pdf_y1 * self._scale))
+
+        # Clamp
+        left, right = min(pil_x1, pil_x2), max(pil_x1, pil_x2)
+        upper, lower = min(pil_y1, pil_y2), max(pil_y1, pil_y2)
+
+        left = max(0, min(left, page_width))
+        right = max(0, min(right, page_width))
+        upper = max(0, min(upper, page_height))
+        lower = max(0, min(lower, page_height))
+
+        width = right - left
+        height = lower - upper
+
+        # validate dimensions before cropping
+        if not _is_valid_image_dimensions(width, height):
+            log.debug("Skipping image with invalid dimensions: %dx%d", width, height)
+            return None
+
+        return page_image.crop((left, upper, right, lower))
+
+    def cleanup(self) -> None:
+        """
+        Clean up processed images
+        """
+        for page_image in self._page_cache.values():
+            page_image.close()
+        self._page_cache.clear()
+
+
+class OCRProcessor:
+    def __init__(self, batch_size: int = IMAGE_BATCH_SIZE):
+        self.batch_size = batch_size
+
+    def process_images(self, images: list[ImageForOCR]) -> dict[int, str]:
+        """
+        Batch OCR images
+        """
+        if not images:
+            return {}
+        block_ids = [img.block_id for img in images]
+        messages = self._prepare_messages(images)
+        ocr_texts = self._execute_batch_ocr(messages)
+        return dict(zip(block_ids, ocr_texts, strict=True))
+
+    def _prepare_messages(self, images: list[ImageForOCR]) -> list[list[dict]]:
+        messages = []
+        for img in images:
+            image_uri = _image_to_base64_uri(img.pil_image)
+            img.pil_image.close()
+            messages.append(_build_ocr_message(image_uri, settings.OCR_PROMPT))
+        return messages
+
+    def _execute_batch_ocr(self, messages_list: list[list[dict]]) -> list[str]:
+        all_texts = []
+        for i in range(0, len(messages_list), self.batch_size):
+            batch = messages_list[i : i + self.batch_size]
+            responses = batch_completion(
+                custom_llm_provider=settings.LITELLM_CUSTOM_PROVIDER,
+                api_base=settings.LITELLM_API_BASE,
+                model=settings.OCR_MODEL,
+                messages=batch,
+            )
+            all_texts.extend([resp.choices[0].message.content for resp in responses])
+            gc.collect()
+        return all_texts
+
+
+class MarkdownAssembler:
+    """
+    Assembles markdown from parsed content blocks.
+
+    Handles all content types from OpenDataLoader JSON output:
+    - heading: Section headings with levels
+    - paragraph: Text paragraphs
+    - image: Embedded images (processed via OCR)
+    - table: Tables with rows and cells
+    - list: Ordered/unordered lists with items
+    - text chunk / text block: Raw text content
+    - line: Single lines of text
+    - list item: Individual list items (when standalone)
+    """
+
+    def assemble(self, blocks: list[ContentBlock], ocr_results: dict[int, str]) -> str:
+        formatted_parts = []
+        for block in blocks:
+            # Full Page OCR overrides all individual blocks on that page
+            if block.block_type == BlockType.FULL_PAGE_OCR:
+                formatted_parts.append(ocr_results.get(block.block_id, ""))
+                continue
+
+            # Standard formatting
+            text = self._format_block(block, ocr_results)
+            if text:
+                formatted_parts.append(text)
+        return "\n\n".join(formatted_parts)
+
+    def _format_block(
+        self, block: ContentBlock, ocr_results: dict[int, str]
+    ) -> str | None:
+        """
+        Format a content block to markdown.
+
+        Supports all content types from OpenDataLoader:
+        - heading: Markdown headings with # prefix
+        - paragraph: Plain text paragraphs
+        - image: OCR results or placeholder
+        - table: Markdown tables with | separators
+        - list: Markdown lists with - or 1. prefixes
+        - text chunk / text block / line: Plain text
+        - list item: Single list item (when standalone)
+        """
+        block_type = block.block_type
+        block_content = None
+        # Heading
+        if block_type == BlockType.HEADING and block.content:
+            level = min(block.heading_level or 1, 6)
+            block_content = f"{'#' * level} {block.content}"
+
+        # Paragraph
+        if block_type == BlockType.PARAGRAPH and block.content:
+            block_content = block.content
+
+        # Image - use OCR result
+        if block_type == BlockType.IMAGE:
+            block_content = ocr_results.get(block.block_id)
+
+        # Table
+        if block_type == BlockType.TABLE:
+            block_content = self._format_table(block)
+
+        # List
+        if block_type == BlockType.LIST:
+            block_content = self._format_list(block)
+
+        # Text chunk / text block / line - plain text content
+        if block_type in (BlockType.TEXT_CHUNK, BlockType.TEXT_BLOCK, BlockType.LINE):
+            block_content = block.content if block.content else None
+
+        # Standalone list item
+        if block_type == BlockType.LIST_ITEM and block.content:
+            block_content = f"- {block.content}"
+
+        # Table cell (standalone, unusual but possible)
+        if block_type == BlockType.TABLE_CELL and block.content:
+            block_content = block.content
+
+        # Table row (standalone, unusual but possible)
+        if block_type == BlockType.TABLE_ROW:
+            block_content = self._format_table_row_standalone(block)
+
+        return block_content
+
+    def _format_table(self, block: ContentBlock) -> str | None:
+        """
+        Format a table block to markdown table syntax.
+        """
+        if not block.rows:
+            # Try to extract table from nested kids structure
+            return self._format_table_from_kids(block)
+
+        lines = []
+        num_cols = block.number_of_columns or self._detect_column_count(block.rows)
+
+        for row_idx, row in enumerate(block.rows):
+            # Build row content
+            cells_content = []
+            for cell in row.cells:
+                cell_text = self._get_cell_content(cell)
+                # Replace newlines with HTML breaks for table cells
+                cell_text = cell_text.replace("\n", "<br>") if cell_text else ""
+                cells_content.append(cell_text)
+
+            # Pad if fewer cells than expected
+            while len(cells_content) < num_cols:
+                cells_content.append("")
+
+            row_line = "| " + " | ".join(cells_content) + " |"
+            lines.append(row_line)
+
+            # Add header separator after first row
+            if row_idx == 0:
+                separator = "| " + " | ".join(["---"] * num_cols) + " |"
+                lines.append(separator)
+
+        return "\n".join(lines) if lines else None
+
+    def _format_table_from_kids(self, block: ContentBlock) -> str | None:
+        """
+        Format table when structure is in 'kids' field rather than 'rows'.
+        """
+        if not block.kids:
+            return None
+
+        lines = []
+        num_cols = block.number_of_columns or 0
+        row_idx = 0
+
+        for kid in block.kids:
+            if isinstance(kid, dict) and kid.get("type") == BlockType.TABLE_ROW:
+                cells = kid.get("cells", [])
+                cells_content = []
+
+                for cell_data in cells:
+                    if isinstance(cell_data, dict):
+                        cell_text = cell_data.get("content", "")
+                        # Handle nested kids in cell
+                        if not cell_text and "kids" in cell_data:
+                            cell_text = self._extract_text_from_kids(cell_data["kids"])
+                    else:
+                        cell_text = str(cell_data) if cell_data else ""
+
+                    cell_text = cell_text.replace("\n", "<br>") if cell_text else ""
+                    cells_content.append(cell_text)
+
+                if not num_cols:
+                    num_cols = len(cells_content)
+
+                while len(cells_content) < num_cols:
+                    cells_content.append("")
+
+                row_line = "| " + " | ".join(cells_content) + " |"
+                lines.append(row_line)
+
+                if row_idx == 0:
+                    separator = "| " + " | ".join(["---"] * num_cols) + " |"
+                    lines.append(separator)
+
+                row_idx += 1
+
+        return "\n".join(lines) if lines else None
+
+    def _format_table_row_standalone(self, block: ContentBlock) -> str | None:
+        """Format a standalone table row (unusual case)."""
+        if not hasattr(block, "kids") or not block.kids:
+            return block.content
+
+        cells_content = []
+        for cell in block.kids:
+            if isinstance(cell, dict):
+                cell_text = cell.get("content", "")
+            else:
+                cell_text = str(cell) if cell else ""
+            cells_content.append(cell_text.replace("\n", "<br>") if cell_text else "")
+
+        return "| " + " | ".join(cells_content) + " |" if cells_content else None
+
+    def _get_cell_content(self, cell: TableCell) -> str:
+        """Extract text content from a table cell."""
+        if cell.content:
+            return cell.content
+
+        # Check nested kids for content
+        if cell.kids:
+            return self._extract_text_from_kids(cell.kids)
+
+        return ""
+
+    def _extract_text_from_kids(self, kids: list[Any]) -> str:
+        """Recursively extract text from nested kids structure."""
+        texts = []
+        for kid in kids:
+            if isinstance(kid, dict):
+                if "content" in kid:
+                    texts.append(kid["content"])
+                if "kids" in kid:
+                    texts.append(self._extract_text_from_kids(kid["kids"]))
+            elif isinstance(kid, str):
+                texts.append(kid)
+        return " ".join(filter(None, texts))
+
+    def _detect_column_count(self, rows: list[TableRow]) -> int:
+        """Detect number of columns from rows."""
+        if not rows:
+            return 0
+        return max(len(row.cells) for row in rows)
+
+    def _format_list(self, block: ContentBlock) -> str | None:
+        """
+        Format a list block to markdown list syntax.
+
+        Supports ordered (numbered) and unordered (bullet) lists.
+        """
+        if not block.list_items and not block.kids:
+            return None
+
+        lines = []
+        is_ordered = self._is_ordered_list(block.numbering_style)
+
+        # Use list_items if available, otherwise parse from kids
+        items = (
+            block.list_items
+            if block.list_items
+            else self._parse_list_items_from_kids(block.kids)
+        )
+
+        for idx, item in enumerate(items, start=1):
+            item_content = self._get_list_item_content(item)
+            if item_content:
+                if is_ordered:
+                    lines.append(f"{idx}. {item_content}")
+                else:
+                    lines.append(f"- {item_content}")
+
+        return "\n".join(lines) if lines else None
+
+    def _is_ordered_list(self, numbering_style: str | None) -> bool:
+        """Determine if list should be ordered based on numbering style."""
+        if not numbering_style:
+            return False
+        # Common ordered list indicators
+        ordered_indicators = ["decimal", "number", "alpha", "roman", "1", "a", "i"]
+        return any(ind in numbering_style.lower() for ind in ordered_indicators)
+
+    def _parse_list_items_from_kids(self, kids: list[Any]) -> list[ListItem]:
+        """Parse list items from kids structure."""
+
+        return [
+            ListItem(content=kid.get("content"), kids=kid.get("kids", []))
+            for kid in kids
+            if isinstance(kid, dict) and kid.get("type") == BlockType.LIST_ITEM
+        ]
+
+    def _get_list_item_content(self, item: ListItem) -> str:
+        """Extract text content from a list item."""
+        if item.content:
+            return item.content
+
+        if item.kids:
+            return self._extract_text_from_kids(item.kids)
+
+        return ""
+
+
+class OpenDataLoaderLLMConverter:
+    def __init__(
+        self,
+        document_path: Path,
+        output_dir: Path | None = None,
+        pdf_dpi: int = 150,
+        *,
+        debug_mode=False,
+    ):
+        self.document_path = Path(document_path)
+        self.debug_mode = debug_mode
+        self._tempdir = tempfile.TemporaryDirectory()
+        self.output_dir = output_dir or Path(self._tempdir.name)
+        self._page_renderer = PDFPageRenderer(self.document_path, dpi=pdf_dpi)
+        self._ocr_processor = OCRProcessor()
+        self._markdown_assembler = MarkdownAssembler()
+
+        if debug_mode:
+            litellm._turn_on_debug()  # noqa: SLF001
+
+    @property
+    def _debug_dir(self) -> Path:
+        d = Path(settings.OCR_DEBUG_DIRECTORY) / self.document_path.stem
+        d.mkdir(parents=True, exist_ok=True)
+        return d
+
+    def _save_debug_image(self, pil_image: Image.Image, prefix: str = "") -> str:
+        fp = self._debug_dir / f"{prefix}{self.document_path.name}-{uuid.uuid4()}.png"
+        pil_image.save(fp)
+        return str(fp)
+
+    def _save_debug_markdown(self, markdown_content: str) -> str:
+        fp = self._debug_dir / f"{self.document_path.name}.md"
+        fp.write_text(markdown_content)
+        return str(fp)
+
+    def _convert_pdf_to_json(self) -> dict[str, Any]:
+        self.output_dir.mkdir(parents=True, exist_ok=True)
+        opendataloader_pdf.convert(
+            input_path=[str(self.document_path)],
+            output_dir=str(self.output_dir),
+            format="json",
+            use_struct_tree=True,
+            keep_line_breaks=True,
+        )
+        return json.loads(
+            (self.output_dir / f"{self.document_path.stem}.json").read_text()
+        )
+
+    def _parse_content_block(self, block_data: dict[str, Any]) -> ContentBlock:
+        """
+        Parse a content block from JSON data.
+
+        Handles all content types including nested structures for tables and lists.
+        """
+        block_type = block_data.get("type", "unknown")
+
+        # Parse table rows if present
+        rows = []
+        if block_type == BlockType.TABLE and "rows" in block_data:
+            rows = self._parse_table_rows(block_data["rows"])
+
+        # Parse list items if present
+        list_items = []
+        if block_type == BlockType.LIST and "list items" in block_data:
+            list_items = self._parse_list_items(block_data["list items"])
+
+        return ContentBlock(
+            block_type=block_type,
+            block_id=block_data.get("id", 0),
+            page_number=block_data.get("page number", 1),
+            bounding_box=block_data.get("bounding box", [0, 0, 0, 0]),
+            content=block_data.get("content"),
+            heading_level=block_data.get("heading level"),
+            font=block_data.get("font"),
+            rows=rows,
+            number_of_rows=block_data.get("number of rows", 0),
+            number_of_columns=block_data.get("number of columns", 0),
+            list_items=list_items,
+            numbering_style=block_data.get("numbering style"),
+            kids=block_data.get("kids", []),
+        )
+
+    def _parse_table_rows(self, rows_data: list[dict]) -> list[TableRow]:
+        """Parse table rows from JSON data."""
+        rows = []
+        for row_data in rows_data:
+            cells = [
+                TableCell(
+                    content=cell_data.get("content"),
+                    column_number=cell_data.get("column number", 0),
+                    row_number=cell_data.get("row number", 0),
+                    column_span=cell_data.get("column span", 1),
+                    row_span=cell_data.get("row span", 1),
+                    kids=cell_data.get("kids", []),
+                )
+                for cell_data in row_data.get("cells", [])
+            ]
+
+            rows.append(
+                TableRow(
+                    row_number=row_data.get("row number", 0),
+                    cells=cells,
+                )
+            )
+        return rows
+
+    def _parse_list_items(self, items_data: list[dict]) -> list[ListItem]:
+        """Parse list items from JSON data."""
+        return [
+            ListItem(
+                content=item_data.get("content"),
+                kids=item_data.get("kids", []),
+            )
+            for item_data in items_data
+        ]
+
+    def _calculate_page_math_score(self, blocks: list[ContentBlock]) -> int:
+        """
+        Calculate a score representing 'math density' for a page.
+        """
+        score = 0
+        for b in blocks:
+            if b.font:
+                font_base = "".join([c for c in b.font.lower() if c.isalpha()])
+                if any(mf in font_base for mf in MATH_FONTS):
+                    score += 1
+
+            if b.content:
+                text = b.content.strip()
+                if COMPLEX_MATH_REGEX.search(text):
+                    score += 3
+                elif BASIC_MATH_SYMBOLS.search(text):
+                    score += 1
+        return score
+
+    def convert_to_markdown(self) -> str:
+        try:
+            # get document elements and structure as json
+            json_data = self._convert_pdf_to_json()
+            raw_blocks = [
+                self._parse_content_block(k) for k in json_data.get("kids", [])
+            ]
+
+            # Group by Page
+            pages = defaultdict(list)
+            for b in raw_blocks:
+                pages[b.page_number].append(b)
+
+            final_blocks = []
+            images_for_ocr = []
+
+            # Process Per Page
+            for page_num in sorted(pages.keys()):
+                page_blocks = pages[page_num]
+
+                math_score = self._calculate_page_math_score(page_blocks)
+                should_full_ocr = math_score > settings.OCR_MATH_DENSITY_THRESHOLD
+
+                if should_full_ocr:
+                    log.info(
+                        "Page %d: High Math Score (%f).Strategy: Full Page OCR.",
+                        page_num,
+                        math_score,
+                    )
+
+                    page_block_id = 888000 + page_num
+
+                    full_page_img = self._page_renderer.get_page_image(page_num)
+                    optimized_img = _optimize_image(full_page_img)
+
+                    images_for_ocr.append(
+                        ImageForOCR(page_block_id, optimized_img, is_full_page=True)
+                    )
+
+                    final_blocks.append(
+                        ContentBlock(
+                            block_type=BlockType.FULL_PAGE_OCR,
+                            block_id=page_block_id,
+                            page_number=page_num,
+                            bounding_box=[],
+                        )
+                    )
+
+                    if self.debug_mode:
+                        self._save_debug_image(
+                            optimized_img, prefix=f"FULLPAGE_{page_num}_"
+                        )
+
+                else:
+                    log.info(
+                        "Page %d: Low Math Score (%f).Strategy: Standard Parse.",
+                        page_num,
+                        math_score,
+                    )
+
+                    for block in page_blocks:
+                        final_blocks.append(block)
+
+                        if block.block_type == BlockType.IMAGE:
+                            img = self._page_renderer.extract_region(
+                                block.page_number, block.bounding_box
+                            )
+                            if img:
+                                opt = _optimize_image(img)
+                                images_for_ocr.append(
+                                    ImageForOCR(block.block_id, opt, is_full_page=False)
+                                )
+                                if self.debug_mode:
+                                    self._save_debug_image(
+                                        opt, prefix=f"IMG_{block.block_id}_"
+                                    )
+
+            # Batch OCR
+            ocr_results = self._ocr_processor.process_images(images_for_ocr)
+
+            # Assemble
+            final_md = self._markdown_assembler.assemble(final_blocks, ocr_results)
+
+            if self.debug_mode:
+                self._save_debug_markdown(final_md)
+
+            return final_md
+
+        finally:
+            self._page_renderer.cleanup()
diff --git a/learning_resources/converters/opendataloader_llm_converter_test.py b/learning_resources/converters/opendataloader_llm_converter_test.py
new file mode 100644
index 0000000000..8133a63ee7
--- /dev/null
+++ b/learning_resources/converters/opendataloader_llm_converter_test.py
@@ -0,0 +1,106 @@
+from pathlib import Path
+
+import pytest
+
+from learning_resources.converters.opendataloader_llm_converter import (
+    ImageForOCR,
+    OpenDataLoaderLLMConverter,
+)
+
+
+@pytest.fixture
+def fake_renderer(mocker):
+    renderer = mocker.MagicMock()
+    renderer.render_page.return_value = mocker.MagicMock(name="page_image")
+    renderer.cleanup = mocker.MagicMock()
+    return renderer
+
+
+@pytest.fixture
+def fake_ocr(mocker):
+    ocr = mocker.MagicMock()
+    ocr.ocr_image.return_value = "OCR_TEXT"
+    return ocr
+
+
+@pytest.fixture(autouse=True)
+def mock_litellm(mocker):
+    mocker.patch(
+        "learning_resources.converters.opendataloader_llm_converter.litellm.completion",
+        return_value={"choices": [{"message": {"content": "OCR TEXT"}}]},
+    )
+    mocker.patch(
+        "learning_resources.converters.opendataloader_llm_converter.OCRProcessor._execute_batch_ocr",
+        return_value=["OCR TEXT"],
+    )
+
+
+def test_basic_conversion(settings, fake_renderer, mocker):
+    """
+    Test a very basic conversion of pdf to markdown
+    """
+    settings.OCR_MODEL = "test"
+    sample_pdf = Path("test_pdfs/notes.pdf")
+    mocker.patch(
+        "learning_resources.converters.opendataloader_llm_converter._optimize_image",
+        return_value=mocker.MagicMock(),
+    )
+    mocker.patch(
+        "learning_resources.converters.opendataloader_llm_converter.PDFPageRenderer",
+        return_value=fake_renderer,
+    )
+    converter = OpenDataLoaderLLMConverter(document_path=sample_pdf, debug_mode=False)
+    markdown = converter.convert_to_markdown()
+    assert isinstance(markdown, str)
+    assert "OCR TEXT" in markdown
+
+
+def test_debug_images_written(tmp_path, mocker, settings, fake_renderer):
+    """
+    Test debug_mode flag outputs debug images
+    """
+    settings.OCR_MODEL = "test"
+    mocker.patch(
+        "learning_resources.converters.opendataloader_llm_converter.settings.OCR_DEBUG_DIRECTORY",
+        tmp_path,
+    )
+    mocker.patch(
+        "learning_resources.converters.opendataloader_llm_converter._optimize_image",
+        return_value=mocker.MagicMock(),
+    )
+    mocker.patch(
+        "learning_resources.converters.opendataloader_llm_converter.PDFPageRenderer",
+        return_value=fake_renderer,
+    )
+    sample_pdf = Path("test_pdfs/notes.pdf")
+    converter = OpenDataLoaderLLMConverter(sample_pdf, debug_mode=True)
+    converter.convert_to_markdown()
+    assert len(list(tmp_path.glob("*notes"))) > 0
+
+
+def test_tiny_images_are_skipped(fake_renderer, fake_ocr, mocker):
+    """
+    Test that small images or images with odd dimensions are skipped
+    """
+    with (
+        mocker.patch(
+            "learning_resources.converters.opendataloader_llm_converter.PDFPageRenderer",
+            return_value=fake_renderer,
+        ),
+        mocker.patch(
+            "learning_resources.converters.opendataloader_llm_converter.OCRProcessor",
+            return_value=fake_ocr,
+        ),
+    ):
+        sample_pdf = Path("test_pdfs/notes.pdf")
+        conv = OpenDataLoaderLLMConverter(sample_pdf)
+
+        tiny = ImageForOCR(
+            pil_image=mocker.MagicMock(size=(41, 5)),
+            is_full_page=False,
+            block_id="tiny",
+        )
+
+        conv._ocr_processor.process_images([tiny])  # noqa: SLF001
+
+        fake_ocr.ocr_image.assert_not_called()
diff --git a/learning_resources/etl/canvas.py b/learning_resources/etl/canvas.py
index 277d27d540..86455005df 100644
--- a/learning_resources/etl/canvas.py
+++ b/learning_resources/etl/canvas.py
@@ -158,7 +158,9 @@ def _generate_content():
                 else:
                     log.debug("skipping unpublished file %s", member.filename)
 
-            for content_data in process_olx_path(olx_path, run, overwrite=overwrite):
+            for content_data in process_olx_path(
+                olx_path, run, overwrite=overwrite, use_ocr=True
+            ):
                 url_path = content_data["source_path"].lstrip(
                     content_data["source_path"].split("/")[0]
                 )
@@ -209,6 +211,7 @@ def transform_canvas_problem_files(
             overwrite=overwrite,
             valid_file_types=VALID_TUTOR_PROBLEM_FILE_TYPES,
             is_tutor_problem_file_import=True,
+            use_ocr=True,
         ):
             keys_to_keep = [
                 "run",
diff --git a/learning_resources/etl/canvas_test.py b/learning_resources/etl/canvas_test.py
index 710f80c853..3340ee2821 100644
--- a/learning_resources/etl/canvas_test.py
+++ b/learning_resources/etl/canvas_test.py
@@ -96,6 +96,11 @@
 """
 
 
+@pytest.fixture
+def sample_pdf_content():
+    return Path("test_pdfs/notes.pdf").read_bytes()
+
+
 @pytest.fixture(autouse=True)
 def canvas_platform():
     """Fixture for the canvas platform"""
@@ -443,8 +448,8 @@ def test_transform_canvas_content_files_removes_unpublished_content(mocker, tmp_
 
 @pytest.mark.parametrize("overwrite", [True, False])
 @pytest.mark.parametrize("existing_file", [True, False])
-def test_transform_canvas_problem_files_pdf_calls_pdf_to_markdown(
-    tmp_path, mocker, settings, overwrite, existing_file
+def test_transform_canvas_problem_files_pdf_calls_pdf_to_markdown(  # noqa: PLR0913
+    tmp_path, mocker, settings, overwrite, existing_file, sample_pdf_content
 ):
     """
     Test that transform_canvas_problem_files calls _pdf_to_markdown for PDF files.
@@ -452,11 +457,12 @@ def test_transform_canvas_problem_files_pdf_calls_pdf_to_markdown(
     """
 
     settings.CANVAS_TUTORBOT_FOLDER = "tutorbot/"
-    settings.CANVAS_PDF_TRANSCRIPTION_MODEL = "fake-model"
+    settings.OCR_MODEL = "fake-model"
+
     pdf_filename = "problemset1/problem.pdf"
-    pdf_content = b"%PDF-1.4 fake pdf content"
+
     zip_path = make_canvas_zip(
-        tmp_path, files=[(f"tutorbot/{pdf_filename}", pdf_content)]
+        tmp_path, files=[(f"tutorbot/{pdf_filename}", sample_pdf_content)]
     )
 
     # return a file with pdf extension
@@ -524,7 +530,7 @@ def test_transform_canvas_problem_files_non_pdf_does_not_call_pdf_to_markdown(
     there is an existing file.
     """
     settings.CANVAS_TUTORBOT_FOLDER = "tutorbot/"
-    settings.CANVAS_PDF_TRANSCRIPTION_MODEL = "fake-model"
+    settings.OCR_MODEL = "fake-model"
     csv_filename = "problemset2/problem.csv"
     csv_content = "a,b,c\n1,2,3"
     zip_path = make_canvas_zip(
@@ -1031,7 +1037,7 @@ def test_parse_files_meta_excludes_tutorbot_folder(tmp_path, settings):
     assert result["unpublished"][0]["path"].name == "tutorfile.html"
 
 
-def test_embedded_files_from_html(tmp_path, mocker):
+def test_embedded_files_from_html(tmp_path, mocker, sample_pdf_content):
     """
     Test that _embedded_files_from_html processes files embedded in HTML content
     even if they are not in modules_meta.xml or files_meta.xml.
@@ -1083,7 +1089,7 @@ def test_embedded_files_from_html(tmp_path, mocker):
         tmp_path, module_xml=module_xml, manifest_xml=manifest_xml
     )
     with zipfile.ZipFile(zip_path, "a") as zf:
-        zf.writestr("web_resources/file1.pdf", "content of file1")
+        zf.writestr("web_resources/file1.pdf", sample_pdf_content)
         zf.writestr("web_resources/file2.html", "content of file2")
         zf.writestr("web_resources/html_page.html", html_content)
 
@@ -1742,7 +1748,9 @@ def test_get_published_items_for_attachment_module(mocker, tmp_path):
     assert Path("web_resources/visible_attachment_module.txt").resolve() in published
 
 
-def test_ingestion_finishes_with_missing_xml_files(tmp_path, mocker):
+def test_ingestion_finishes_with_missing_xml_files(
+    tmp_path, mocker, sample_pdf_content
+):
     """
     Test that canvas course ingestion succeeds even if some config XML files are missing
     """
@@ -1787,7 +1795,7 @@ def test_ingestion_finishes_with_missing_xml_files(tmp_path, mocker):
         manifest_xml=manifest_xml,
         files=[
             ("course_settings/files_meta.xml", files_xml),
-            ("web_resources/file1.pdf", "content of file1"),
+            ("web_resources/file1.pdf", sample_pdf_content),
             ("web_resources/file2.html", "content of file2"),
             ("web_resources/html_page.html", ""),
         ],
@@ -1804,3 +1812,75 @@ def test_ingestion_finishes_with_missing_xml_files(tmp_path, mocker):
     )
     assert run is not None
     assert len(content_results) > 0
+
+
+@pytest.mark.parametrize("overwrite", [True, False])
+@pytest.mark.parametrize("existing_file", [True, False])
+def test_transform_canvas_problem_files_pdf_calls_llm_for_ocr(  # noqa: PLR0913
+    tmp_path, mocker, settings, overwrite, existing_file, sample_pdf_content
+):
+    """
+    Test that transform_canvas_problem_files calls _pdf_to_markdown for PDF files.
+    if overwrite is True or there is no existing file. Tikka should not be called
+    """
+
+    settings.CANVAS_TUTORBOT_FOLDER = "tutorbot/"
+    settings.OCR_MODEL = "test model"
+
+    pdf_filename = "problemset1/problem.pdf"
+
+    zip_path = make_canvas_zip(
+        tmp_path, files=[(f"tutorbot/{pdf_filename}", sample_pdf_content)]
+    )
+
+    # return a file with pdf extension
+    fake_file_data = {
+        "run": "run",
+        "content_type": "application/pdf",
+        "archive_checksum": "checksum",
+        "source_path": f"tutorbot/{pdf_filename}",
+        "file_extension": ".pdf",
+    }
+
+    mocker.patch(
+        "learning_resources.etl.utils.documents_from_olx",
+        return_value=iter([[mocker.Mock(), fake_file_data]]),
+    )
+
+    # Patch _pdf_to_markdown to return a known value
+    pdf_to_md = mocker.patch(
+        "learning_resources.etl.utils._pdf_to_markdown",
+        return_value="markdown content from pdf",
+    )
+
+    tika = mocker.patch(
+        "learning_resources.etl.utils.extract_text_metadata",
+    )
+
+    run = LearningResourceRunFactory.create()
+
+    if existing_file:
+        TutorProblemFileFactory.create(
+            run=run,
+            type="problem",
+            archive_checksum="checksum",
+            source_path=f"tutorbot/{pdf_filename}",
+            content="existing content",
+            file_name="problem1.pdf",
+        )
+
+    results = list(transform_canvas_problem_files(zip_path, run, overwrite=overwrite))
+
+    if overwrite or not existing_file:
+        pdf_to_md.assert_called_once()
+    else:
+        pdf_to_md.assert_not_called()
+
+    tika.assert_not_called()
+
+    assert (
+        results[0]["content"] == "markdown content from pdf"
+        if overwrite or not existing_file
+        else "existing content"
+    )
+    assert results[0]["problem_title"] == "problemset1"
diff --git a/learning_resources/etl/utils.py b/learning_resources/etl/utils.py
index 7cbdf0e73c..7c42ad0eb9 100644
--- a/learning_resources/etl/utils.py
+++ b/learning_resources/etl/utils.py
@@ -29,9 +29,9 @@
 from django.conf import settings
 from django.utils.dateparse import parse_duration
 from django.utils.text import slugify
-from litellm import completion
 from PIL import Image
 from pycountry import currencies
+from pypdf import PdfReader
 from tika import parser as tika_parser
 
 from learning_resources.constants import (
@@ -46,6 +46,9 @@
     OfferedBy,
     RunStatus,
 )
+from learning_resources.converters.opendataloader_llm_converter import (
+    OpenDataLoaderLLMConverter,
+)
 from learning_resources.etl.constants import (
     RESOURCE_DELIVERY_MAPPING,
     TIME_INTERVAL_MAPPING,
@@ -534,13 +537,14 @@ def get_video_metadata(olx_path: str, run: LearningResourceRun) -> dict:
     return video_transcript_mapping
 
 
-def process_olx_path(
+def process_olx_path(  # noqa: PLR0913
     olx_path: str,
     run: LearningResourceRun,
     *,
     overwrite,
     valid_file_types=VALID_TEXT_FILE_TYPES,
     is_tutor_problem_file_import=False,
+    use_ocr=False,
 ) -> Generator[dict, None, None]:
     video_srt_metadata = get_video_metadata(olx_path, run)
     for document, metadata in documents_from_olx(
@@ -571,8 +575,13 @@ def process_olx_path(
                 }
             elif (
                 file_extension == ".pdf"
-                and is_tutor_problem_file_import
-                and settings.CANVAS_PDF_TRANSCRIPTION_MODEL
+                and use_ocr
+                and settings.OCR_MODEL
+                and (
+                    len(PdfReader(Path(olx_path) / Path(source_path)).pages)
+                    <= settings.OCR_PDF_MAX_PAGE_THRESHOLD
+                    or is_tutor_problem_file_import
+                )
             ):
                 markdown_content = _pdf_to_markdown(Path(olx_path) / Path(source_path))
                 content_dict = {
@@ -1072,38 +1081,8 @@ def _pdf_to_markdown(pdf_path):
     """
     Convert a PDF file to markdown using an llm
     """
-    markdown = ""
-    for im in pdf_to_base64_images(pdf_path):
-        response = completion(
-            api_base=settings.LITELLM_API_BASE,
-            custom_llm_provider=settings.LITELLM_CUSTOM_PROVIDER,
-            model=settings.CANVAS_PDF_TRANSCRIPTION_MODEL,
-            messages=[
-                {
-                    "role": "user",
-                    "content": [
-                        {
-                            "type": "text",
-                            "text": settings.CANVAS_TRANSCRIPTION_PROMPT,
-                        },
-                        {
-                            "type": "image_url",
-                            "image_url": {
-                                "url": f"data:image/jpeg;base64,{im}",
-                            },
-                        },
-                    ],
-                }
-            ],
-        )
-        markdown_snippet = (
-            response.json()["choices"][0]["message"]["content"]
-            .removeprefix("```markdown\n")
-            .removesuffix("\n```")
-        )
-
-        markdown += markdown_snippet
-    return markdown
+    converter = OpenDataLoaderLLMConverter(pdf_path)
+    return converter.convert_to_markdown()
 
 
 def pdf_to_base64_images(pdf_path, fmt="JPEG", max_size=2000, quality=85):
diff --git a/main/settings_course_etl.py b/main/settings_course_etl.py
index 6f70c4d6c4..7b01bd504b 100644
--- a/main/settings_course_etl.py
+++ b/main/settings_course_etl.py
@@ -68,14 +68,27 @@
 CANVAS_COURSE_BUCKET_PREFIX = get_string(
     "CANVAS_COURSE_BUCKET_PREFIX", "canvas/course_content"
 )
-CANVAS_PDF_TRANSCRIPTION_MODEL = get_string(
-    name="CANVAS_PDF_TRANSCRIPTION_MODEL", default=None
-)
-CANVAS_TRANSCRIPTION_PROMPT = get_string(
-    "CANVAS_TRANSCRIPTION_PROMPT",
-    """Transcribe the contents of this file into markdown.
-    Do not include anything but the markdown content in your response""",
+
+OCR_MODEL = get_string(name="OCR_MODEL", default=None)
+
+OCR_PROMPT = get_string(
+    "OCR_PROMPT",
+    "Transcribe this image to markdown. Properly format text, formulas, "
+    "tables, and code into markdown format."
+    "Include a markdown comment for elements that cannot be transcribed such as "
+    "photos and other visual elements."
+    "Do not include any indications your output is the result of a transcription."
+    "Do not include extra commentary - "
+    "ONLY include the resulting transcribed markdown.",
 )
+# Do not OCR if the PDF has exceeds this many pages
+OCR_PDF_MAX_PAGE_THRESHOLD = get_int(name="OCR_PDF_MAX_PAGE_THRESHOLD", default=10)
+
+# OCR the entire page if the density of math formulas exceeds this threshold
+OCR_MATH_DENSITY_THRESHOLD = get_int(name="OCR_MATH_DENSITY_THRESHOLD", default=5)
+OCR_DEBUG_DIRECTORY = get_string(name="OCR_DEBUG_DIRECTORY", default="ocr_debug")
+
+
 # More MIT URLs
 SEE_API_URL = get_string("SEE_API_URL", None)
 SEE_API_ACCESS_TOKEN_URL = get_string("SEE_API_ACCESS_TOKEN_URL", None)
diff --git a/poetry.lock b/poetry.lock
index bd745467ab..b5374f27bc 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -4977,86 +4977,67 @@ six = ">=1.9.0"
 
 [[package]]
 name = "numpy"
-version = "2.3.5"
+version = "2.2.6"
 description = "Fundamental package for array computing in Python"
 optional = false
-python-versions = ">=3.11"
+python-versions = ">=3.10"
 groups = ["main"]
 files = [
-    {file = "numpy-2.3.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:de5672f4a7b200c15a4127042170a694d4df43c992948f5e1af57f0174beed10"},
-    {file = "numpy-2.3.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:acfd89508504a19ed06ef963ad544ec6664518c863436306153e13e94605c218"},
-    {file = "numpy-2.3.5-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:ffe22d2b05504f786c867c8395de703937f934272eb67586817b46188b4ded6d"},
-    {file = "numpy-2.3.5-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:872a5cf366aec6bb1147336480fef14c9164b154aeb6542327de4970282cd2f5"},
-    {file = "numpy-2.3.5-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3095bdb8dd297e5920b010e96134ed91d852d81d490e787beca7e35ae1d89cf7"},
-    {file = "numpy-2.3.5-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8cba086a43d54ca804ce711b2a940b16e452807acebe7852ff327f1ecd49b0d4"},
-    {file = "numpy-2.3.5-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:6cf9b429b21df6b99f4dee7a1218b8b7ffbbe7df8764dc0bd60ce8a0708fed1e"},
-    {file = "numpy-2.3.5-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:396084a36abdb603546b119d96528c2f6263921c50df3c8fd7cb28873a237748"},
-    {file = "numpy-2.3.5-cp311-cp311-win32.whl", hash = "sha256:b0c7088a73aef3d687c4deef8452a3ac7c1be4e29ed8bf3b366c8111128ac60c"},
-    {file = "numpy-2.3.5-cp311-cp311-win_amd64.whl", hash = "sha256:a414504bef8945eae5f2d7cb7be2d4af77c5d1cb5e20b296c2c25b61dff2900c"},
-    {file = "numpy-2.3.5-cp311-cp311-win_arm64.whl", hash = "sha256:0cd00b7b36e35398fa2d16af7b907b65304ef8bb4817a550e06e5012929830fa"},
-    {file = "numpy-2.3.5-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:74ae7b798248fe62021dbf3c914245ad45d1a6b0cb4a29ecb4b31d0bfbc4cc3e"},
-    {file = "numpy-2.3.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ee3888d9ff7c14604052b2ca5535a30216aa0a58e948cdd3eeb8d3415f638769"},
-    {file = "numpy-2.3.5-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:612a95a17655e213502f60cfb9bf9408efdc9eb1d5f50535cc6eb365d11b42b5"},
-    {file = "numpy-2.3.5-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:3101e5177d114a593d79dd79658650fe28b5a0d8abeb8ce6f437c0e6df5be1a4"},
-    {file = "numpy-2.3.5-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8b973c57ff8e184109db042c842423ff4f60446239bd585a5131cc47f06f789d"},
-    {file = "numpy-2.3.5-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0d8163f43acde9a73c2a33605353a4f1bc4798745a8b1d73183b28e5b435ae28"},
-    {file = "numpy-2.3.5-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:51c1e14eb1e154ebd80e860722f9e6ed6ec89714ad2db2d3aa33c31d7c12179b"},
-    {file = "numpy-2.3.5-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b46b4ec24f7293f23adcd2d146960559aaf8020213de8ad1909dba6c013bf89c"},
-    {file = "numpy-2.3.5-cp312-cp312-win32.whl", hash = "sha256:3997b5b3c9a771e157f9aae01dd579ee35ad7109be18db0e85dbdbe1de06e952"},
-    {file = "numpy-2.3.5-cp312-cp312-win_amd64.whl", hash = "sha256:86945f2ee6d10cdfd67bcb4069c1662dd711f7e2a4343db5cecec06b87cf31aa"},
-    {file = "numpy-2.3.5-cp312-cp312-win_arm64.whl", hash = "sha256:f28620fe26bee16243be2b7b874da327312240a7cdc38b769a697578d2100013"},
-    {file = "numpy-2.3.5-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:d0f23b44f57077c1ede8c5f26b30f706498b4862d3ff0a7298b8411dd2f043ff"},
-    {file = "numpy-2.3.5-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:aa5bc7c5d59d831d9773d1170acac7893ce3a5e130540605770ade83280e7188"},
-    {file = "numpy-2.3.5-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:ccc933afd4d20aad3c00bcef049cb40049f7f196e0397f1109dba6fed63267b0"},
-    {file = "numpy-2.3.5-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:afaffc4393205524af9dfa400fa250143a6c3bc646c08c9f5e25a9f4b4d6a903"},
-    {file = "numpy-2.3.5-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9c75442b2209b8470d6d5d8b1c25714270686f14c749028d2199c54e29f20b4d"},
-    {file = "numpy-2.3.5-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:11e06aa0af8c0f05104d56450d6093ee639e15f24ecf62d417329d06e522e017"},
-    {file = "numpy-2.3.5-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ed89927b86296067b4f81f108a2271d8926467a8868e554eaf370fc27fa3ccaf"},
-    {file = "numpy-2.3.5-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:51c55fe3451421f3a6ef9a9c1439e82101c57a2c9eab9feb196a62b1a10b58ce"},
-    {file = "numpy-2.3.5-cp313-cp313-win32.whl", hash = "sha256:1978155dd49972084bd6ef388d66ab70f0c323ddee6f693d539376498720fb7e"},
-    {file = "numpy-2.3.5-cp313-cp313-win_amd64.whl", hash = "sha256:00dc4e846108a382c5869e77c6ed514394bdeb3403461d25a829711041217d5b"},
-    {file = "numpy-2.3.5-cp313-cp313-win_arm64.whl", hash = "sha256:0472f11f6ec23a74a906a00b48a4dcf3849209696dff7c189714511268d103ae"},
-    {file = "numpy-2.3.5-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:414802f3b97f3c1eef41e530aaba3b3c1620649871d8cb38c6eaff034c2e16bd"},
-    {file = "numpy-2.3.5-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:5ee6609ac3604fa7780e30a03e5e241a7956f8e2fcfe547d51e3afa5247ac47f"},
-    {file = "numpy-2.3.5-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:86d835afea1eaa143012a2d7a3f45a3adce2d7adc8b4961f0b362214d800846a"},
-    {file = "numpy-2.3.5-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:30bc11310e8153ca664b14c5f1b73e94bd0503681fcf136a163de856f3a50139"},
-    {file = "numpy-2.3.5-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1062fde1dcf469571705945b0f221b73928f34a20c904ffb45db101907c3454e"},
-    {file = "numpy-2.3.5-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ce581db493ea1a96c0556360ede6607496e8bf9b3a8efa66e06477267bc831e9"},
-    {file = "numpy-2.3.5-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:cc8920d2ec5fa99875b670bb86ddeb21e295cb07aa331810d9e486e0b969d946"},
-    {file = "numpy-2.3.5-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:9ee2197ef8c4f0dfe405d835f3b6a14f5fee7782b5de51ba06fb65fc9b36e9f1"},
-    {file = "numpy-2.3.5-cp313-cp313t-win32.whl", hash = "sha256:70b37199913c1bd300ff6e2693316c6f869c7ee16378faf10e4f5e3275b299c3"},
-    {file = "numpy-2.3.5-cp313-cp313t-win_amd64.whl", hash = "sha256:b501b5fa195cc9e24fe102f21ec0a44dffc231d2af79950b451e0d99cea02234"},
-    {file = "numpy-2.3.5-cp313-cp313t-win_arm64.whl", hash = "sha256:a80afd79f45f3c4a7d341f13acbe058d1ca8ac017c165d3fa0d3de6bc1a079d7"},
-    {file = "numpy-2.3.5-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:bf06bc2af43fa8d32d30fae16ad965663e966b1a3202ed407b84c989c3221e82"},
-    {file = "numpy-2.3.5-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:052e8c42e0c49d2575621c158934920524f6c5da05a1d3b9bab5d8e259e045f0"},
-    {file = "numpy-2.3.5-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:1ed1ec893cff7040a02c8aa1c8611b94d395590d553f6b53629a4461dc7f7b63"},
-    {file = "numpy-2.3.5-cp314-cp314-macosx_14_0_x86_64.whl", hash = "sha256:2dcd0808a421a482a080f89859a18beb0b3d1e905b81e617a188bd80422d62e9"},
-    {file = "numpy-2.3.5-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:727fd05b57df37dc0bcf1a27767a3d9a78cbbc92822445f32cc3436ba797337b"},
-    {file = "numpy-2.3.5-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fffe29a1ef00883599d1dc2c51aa2e5d80afe49523c261a74933df395c15c520"},
-    {file = "numpy-2.3.5-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:8f7f0e05112916223d3f438f293abf0727e1181b5983f413dfa2fefc4098245c"},
-    {file = "numpy-2.3.5-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:2e2eb32ddb9ccb817d620ac1d8dae7c3f641c1e5f55f531a33e8ab97960a75b8"},
-    {file = "numpy-2.3.5-cp314-cp314-win32.whl", hash = "sha256:66f85ce62c70b843bab1fb14a05d5737741e74e28c7b8b5a064de10142fad248"},
-    {file = "numpy-2.3.5-cp314-cp314-win_amd64.whl", hash = "sha256:e6a0bc88393d65807d751a614207b7129a310ca4fe76a74e5c7da5fa5671417e"},
-    {file = "numpy-2.3.5-cp314-cp314-win_arm64.whl", hash = "sha256:aeffcab3d4b43712bb7a60b65f6044d444e75e563ff6180af8f98dd4b905dfd2"},
-    {file = "numpy-2.3.5-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:17531366a2e3a9e30762c000f2c43a9aaa05728712e25c11ce1dbe700c53ad41"},
-    {file = "numpy-2.3.5-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:d21644de1b609825ede2f48be98dfde4656aefc713654eeee280e37cadc4e0ad"},
-    {file = "numpy-2.3.5-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:c804e3a5aba5460c73955c955bdbd5c08c354954e9270a2c1565f62e866bdc39"},
-    {file = "numpy-2.3.5-cp314-cp314t-macosx_14_0_x86_64.whl", hash = "sha256:cc0a57f895b96ec78969c34f682c602bf8da1a0270b09bc65673df2e7638ec20"},
-    {file = "numpy-2.3.5-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:900218e456384ea676e24ea6a0417f030a3b07306d29d7ad843957b40a9d8d52"},
-    {file = "numpy-2.3.5-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:09a1bea522b25109bf8e6f3027bd810f7c1085c64a0c7ce050c1676ad0ba010b"},
-    {file = "numpy-2.3.5-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:04822c00b5fd0323c8166d66c701dc31b7fbd252c100acd708c48f763968d6a3"},
-    {file = "numpy-2.3.5-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:d6889ec4ec662a1a37eb4b4fb26b6100841804dac55bd9df579e326cdc146227"},
-    {file = "numpy-2.3.5-cp314-cp314t-win32.whl", hash = "sha256:93eebbcf1aafdf7e2ddd44c2923e2672e1010bddc014138b229e49725b4d6be5"},
-    {file = "numpy-2.3.5-cp314-cp314t-win_amd64.whl", hash = "sha256:c8a9958e88b65c3b27e22ca2a076311636850b612d6bbfb76e8d156aacde2aaf"},
-    {file = "numpy-2.3.5-cp314-cp314t-win_arm64.whl", hash = "sha256:6203fdf9f3dc5bdaed7319ad8698e685c7a3be10819f41d32a0723e611733b42"},
-    {file = "numpy-2.3.5-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:f0963b55cdd70fad460fa4c1341f12f976bb26cb66021a5580329bd498988310"},
-    {file = "numpy-2.3.5-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:f4255143f5160d0de972d28c8f9665d882b5f61309d8362fdd3e103cf7bf010c"},
-    {file = "numpy-2.3.5-pp311-pypy311_pp73-macosx_14_0_arm64.whl", hash = "sha256:a4b9159734b326535f4dd01d947f919c6eefd2d9827466a696c44ced82dfbc18"},
-    {file = "numpy-2.3.5-pp311-pypy311_pp73-macosx_14_0_x86_64.whl", hash = "sha256:2feae0d2c91d46e59fcd62784a3a83b3fb677fead592ce51b5a6fbb4f95965ff"},
-    {file = "numpy-2.3.5-pp311-pypy311_pp73-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ffac52f28a7849ad7576293c0cb7b9f08304e8f7d738a8cb8a90ec4c55a998eb"},
-    {file = "numpy-2.3.5-pp311-pypy311_pp73-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:63c0e9e7eea69588479ebf4a8a270d5ac22763cc5854e9a7eae952a3908103f7"},
-    {file = "numpy-2.3.5-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:f16417ec91f12f814b10bafe79ef77e70113a2f5f7018640e7425ff979253425"},
-    {file = "numpy-2.3.5.tar.gz", hash = "sha256:784db1dcdab56bf0517743e746dfb0f885fc68d948aba86eeec2cba234bdf1c0"},
+    {file = "numpy-2.2.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b412caa66f72040e6d268491a59f2c43bf03eb6c96dd8f0307829feb7fa2b6fb"},
+    {file = "numpy-2.2.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8e41fd67c52b86603a91c1a505ebaef50b3314de0213461c7a6e99c9a3beff90"},
+    {file = "numpy-2.2.6-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:37e990a01ae6ec7fe7fa1c26c55ecb672dd98b19c3d0e1d1f326fa13cb38d163"},
+    {file = "numpy-2.2.6-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:5a6429d4be8ca66d889b7cf70f536a397dc45ba6faeb5f8c5427935d9592e9cf"},
+    {file = "numpy-2.2.6-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:efd28d4e9cd7d7a8d39074a4d44c63eda73401580c5c76acda2ce969e0a38e83"},
+    {file = "numpy-2.2.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fc7b73d02efb0e18c000e9ad8b83480dfcd5dfd11065997ed4c6747470ae8915"},
+    {file = "numpy-2.2.6-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:74d4531beb257d2c3f4b261bfb0fc09e0f9ebb8842d82a7b4209415896adc680"},
+    {file = "numpy-2.2.6-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:8fc377d995680230e83241d8a96def29f204b5782f371c532579b4f20607a289"},
+    {file = "numpy-2.2.6-cp310-cp310-win32.whl", hash = "sha256:b093dd74e50a8cba3e873868d9e93a85b78e0daf2e98c6797566ad8044e8363d"},
+    {file = "numpy-2.2.6-cp310-cp310-win_amd64.whl", hash = "sha256:f0fd6321b839904e15c46e0d257fdd101dd7f530fe03fd6359c1ea63738703f3"},
+    {file = "numpy-2.2.6-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f9f1adb22318e121c5c69a09142811a201ef17ab257a1e66ca3025065b7f53ae"},
+    {file = "numpy-2.2.6-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c820a93b0255bc360f53eca31a0e676fd1101f673dda8da93454a12e23fc5f7a"},
+    {file = "numpy-2.2.6-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:3d70692235e759f260c3d837193090014aebdf026dfd167834bcba43e30c2a42"},
+    {file = "numpy-2.2.6-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:481b49095335f8eed42e39e8041327c05b0f6f4780488f61286ed3c01368d491"},
+    {file = "numpy-2.2.6-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b64d8d4d17135e00c8e346e0a738deb17e754230d7e0810ac5012750bbd85a5a"},
+    {file = "numpy-2.2.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba10f8411898fc418a521833e014a77d3ca01c15b0c6cdcce6a0d2897e6dbbdf"},
+    {file = "numpy-2.2.6-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:bd48227a919f1bafbdda0583705e547892342c26fb127219d60a5c36882609d1"},
+    {file = "numpy-2.2.6-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:9551a499bf125c1d4f9e250377c1ee2eddd02e01eac6644c080162c0c51778ab"},
+    {file = "numpy-2.2.6-cp311-cp311-win32.whl", hash = "sha256:0678000bb9ac1475cd454c6b8c799206af8107e310843532b04d49649c717a47"},
+    {file = "numpy-2.2.6-cp311-cp311-win_amd64.whl", hash = "sha256:e8213002e427c69c45a52bbd94163084025f533a55a59d6f9c5b820774ef3303"},
+    {file = "numpy-2.2.6-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:41c5a21f4a04fa86436124d388f6ed60a9343a6f767fced1a8a71c3fbca038ff"},
+    {file = "numpy-2.2.6-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:de749064336d37e340f640b05f24e9e3dd678c57318c7289d222a8a2f543e90c"},
+    {file = "numpy-2.2.6-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:894b3a42502226a1cac872f840030665f33326fc3dac8e57c607905773cdcde3"},
+    {file = "numpy-2.2.6-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:71594f7c51a18e728451bb50cc60a3ce4e6538822731b2933209a1f3614e9282"},
+    {file = "numpy-2.2.6-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f2618db89be1b4e05f7a1a847a9c1c0abd63e63a1607d892dd54668dd92faf87"},
+    {file = "numpy-2.2.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fd83c01228a688733f1ded5201c678f0c53ecc1006ffbc404db9f7a899ac6249"},
+    {file = "numpy-2.2.6-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:37c0ca431f82cd5fa716eca9506aefcabc247fb27ba69c5062a6d3ade8cf8f49"},
+    {file = "numpy-2.2.6-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:fe27749d33bb772c80dcd84ae7e8df2adc920ae8297400dabec45f0dedb3f6de"},
+    {file = "numpy-2.2.6-cp312-cp312-win32.whl", hash = "sha256:4eeaae00d789f66c7a25ac5f34b71a7035bb474e679f410e5e1a94deb24cf2d4"},
+    {file = "numpy-2.2.6-cp312-cp312-win_amd64.whl", hash = "sha256:c1f9540be57940698ed329904db803cf7a402f3fc200bfe599334c9bd84a40b2"},
+    {file = "numpy-2.2.6-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:0811bb762109d9708cca4d0b13c4f67146e3c3b7cf8d34018c722adb2d957c84"},
+    {file = "numpy-2.2.6-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:287cc3162b6f01463ccd86be154f284d0893d2b3ed7292439ea97eafa8170e0b"},
+    {file = "numpy-2.2.6-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:f1372f041402e37e5e633e586f62aa53de2eac8d98cbfb822806ce4bbefcb74d"},
+    {file = "numpy-2.2.6-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:55a4d33fa519660d69614a9fad433be87e5252f4b03850642f88993f7b2ca566"},
+    {file = "numpy-2.2.6-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f92729c95468a2f4f15e9bb94c432a9229d0d50de67304399627a943201baa2f"},
+    {file = "numpy-2.2.6-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1bc23a79bfabc5d056d106f9befb8d50c31ced2fbc70eedb8155aec74a45798f"},
+    {file = "numpy-2.2.6-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:e3143e4451880bed956e706a3220b4e5cf6172ef05fcc397f6f36a550b1dd868"},
+    {file = "numpy-2.2.6-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:b4f13750ce79751586ae2eb824ba7e1e8dba64784086c98cdbbcc6a42112ce0d"},
+    {file = "numpy-2.2.6-cp313-cp313-win32.whl", hash = "sha256:5beb72339d9d4fa36522fc63802f469b13cdbe4fdab4a288f0c441b74272ebfd"},
+    {file = "numpy-2.2.6-cp313-cp313-win_amd64.whl", hash = "sha256:b0544343a702fa80c95ad5d3d608ea3599dd54d4632df855e4c8d24eb6ecfa1c"},
+    {file = "numpy-2.2.6-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:0bca768cd85ae743b2affdc762d617eddf3bcf8724435498a1e80132d04879e6"},
+    {file = "numpy-2.2.6-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:fc0c5673685c508a142ca65209b4e79ed6740a4ed6b2267dbba90f34b0b3cfda"},
+    {file = "numpy-2.2.6-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:5bd4fc3ac8926b3819797a7c0e2631eb889b4118a9898c84f585a54d475b7e40"},
+    {file = "numpy-2.2.6-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:fee4236c876c4e8369388054d02d0e9bb84821feb1a64dd59e137e6511a551f8"},
+    {file = "numpy-2.2.6-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e1dda9c7e08dc141e0247a5b8f49cf05984955246a327d4c48bda16821947b2f"},
+    {file = "numpy-2.2.6-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f447e6acb680fd307f40d3da4852208af94afdfab89cf850986c3ca00562f4fa"},
+    {file = "numpy-2.2.6-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:389d771b1623ec92636b0786bc4ae56abafad4a4c513d36a55dce14bd9ce8571"},
+    {file = "numpy-2.2.6-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:8e9ace4a37db23421249ed236fdcdd457d671e25146786dfc96835cd951aa7c1"},
+    {file = "numpy-2.2.6-cp313-cp313t-win32.whl", hash = "sha256:038613e9fb8c72b0a41f025a7e4c3f0b7a1b5d768ece4796b674c8f3fe13efff"},
+    {file = "numpy-2.2.6-cp313-cp313t-win_amd64.whl", hash = "sha256:6031dd6dfecc0cf9f668681a37648373bddd6421fff6c66ec1624eed0180ee06"},
+    {file = "numpy-2.2.6-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:0b605b275d7bd0c640cad4e5d30fa701a8d59302e127e5f79138ad62762c3e3d"},
+    {file = "numpy-2.2.6-pp310-pypy310_pp73-macosx_14_0_x86_64.whl", hash = "sha256:7befc596a7dc9da8a337f79802ee8adb30a552a94f792b9c9d18c840055907db"},
+    {file = "numpy-2.2.6-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ce47521a4754c8f4593837384bd3424880629f718d87c5d44f8ed763edd63543"},
+    {file = "numpy-2.2.6-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:d042d24c90c41b54fd506da306759e06e568864df8ec17ccc17e9e884634fd00"},
+    {file = "numpy-2.2.6.tar.gz", hash = "sha256:e29554e2bef54a90aa5cc07da6ce955accb83f21ab5de01a62c8478897b264fd"},
 ]
 
 [[package]]
@@ -5158,6 +5139,37 @@ datalib = ["numpy (>=1)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)"]
 realtime = ["websockets (>=13,<16)"]
 voice-helpers = ["numpy (>=2.0.2)", "sounddevice (>=0.5.1)"]
 
+[[package]]
+name = "opencv-python"
+version = "4.12.0.88"
+description = "Wrapper package for OpenCV python bindings."
+optional = false
+python-versions = ">=3.6"
+groups = ["main"]
+files = [
+    {file = "opencv-python-4.12.0.88.tar.gz", hash = "sha256:8b738389cede219405f6f3880b851efa3415ccd674752219377353f017d2994d"},
+    {file = "opencv_python-4.12.0.88-cp37-abi3-macosx_13_0_arm64.whl", hash = "sha256:f9a1f08883257b95a5764bf517a32d75aec325319c8ed0f89739a57fae9e92a5"},
+    {file = "opencv_python-4.12.0.88-cp37-abi3-macosx_13_0_x86_64.whl", hash = "sha256:812eb116ad2b4de43ee116fcd8991c3a687f099ada0b04e68f64899c09448e81"},
+    {file = "opencv_python-4.12.0.88-cp37-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:51fd981c7df6af3e8f70b1556696b05224c4e6b6777bdd2a46b3d4fb09de1a92"},
+    {file = "opencv_python-4.12.0.88-cp37-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:092c16da4c5a163a818f120c22c5e4a2f96e0db4f24e659c701f1fe629a690f9"},
+    {file = "opencv_python-4.12.0.88-cp37-abi3-win32.whl", hash = "sha256:ff554d3f725b39878ac6a2e1fa232ec509c36130927afc18a1719ebf4fbf4357"},
+    {file = "opencv_python-4.12.0.88-cp37-abi3-win_amd64.whl", hash = "sha256:d98edb20aa932fd8ebd276a72627dad9dc097695b3d435a4257557bbb49a79d2"},
+]
+
+[package.dependencies]
+numpy = {version = ">=2,<2.3.0", markers = "python_version >= \"3.9\""}
+
+[[package]]
+name = "opendataloader-pdf"
+version = "1.3.0"
+description = "A Python wrapper for the opendataloader-pdf Java CLI."
+optional = false
+python-versions = "<4.0,>=3.9"
+groups = ["main"]
+files = [
+    {file = "opendataloader_pdf-1.3.0-py3-none-any.whl", hash = "sha256:a6b80d8a6d11e21f0712c3d5f2b4fef8a62c3d57eff8e9c5c7b3deefcb411a07"},
+]
+
 [[package]]
 name = "opensearch-dsl"
 version = "2.1.0"
@@ -5739,6 +5751,21 @@ pygments = "*"
 [package.extras]
 testing = ["ipython", "pexpect", "pytest", "pytest-cov"]
 
+[[package]]
+name = "pdf2image"
+version = "1.17.0"
+description = "A wrapper around the pdftoppm and pdftocairo command line tools to convert PDF to a PIL Image list."
+optional = false
+python-versions = "*"
+groups = ["main"]
+files = [
+    {file = "pdf2image-1.17.0-py3-none-any.whl", hash = "sha256:ecdd58d7afb810dffe21ef2b1bbc057ef434dabbac6c33778a38a3f7744a27e2"},
+    {file = "pdf2image-1.17.0.tar.gz", hash = "sha256:eaa959bc116b420dd7ec415fcae49b98100dda3dd18cd2fdfa86d09f112f6d57"},
+]
+
+[package.dependencies]
+pillow = "*"
+
 [[package]]
 name = "pexpect"
 version = "4.9.0"
@@ -6606,6 +6633,23 @@ dev = ["coverage[toml] (==5.0.4)", "cryptography (>=3.4.0)", "pre-commit", "pyte
 docs = ["sphinx", "sphinx-rtd-theme", "zope.interface"]
 tests = ["coverage[toml] (==5.0.4)", "pytest (>=6.0.0,<7.0.0)"]
 
+[[package]]
+name = "pymupdf"
+version = "1.26.6"
+description = "A high performance Python library for data extraction, analysis, conversion & manipulation of PDF (and other) documents."
+optional = false
+python-versions = ">=3.10"
+groups = ["main"]
+files = [
+    {file = "pymupdf-1.26.6-cp310-abi3-macosx_10_9_x86_64.whl", hash = "sha256:e46f320a136ad55e5219e8f0f4061bdf3e4c12b126d2740d5a49f73fae7ea176"},
+    {file = "pymupdf-1.26.6-cp310-abi3-macosx_11_0_arm64.whl", hash = "sha256:6844cd2396553c0fa06de4869d5d5ecb1260e6fc3b9d85abe8fa35f14dd9d688"},
+    {file = "pymupdf-1.26.6-cp310-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:617ba69e02c44f0da1c0e039ea4a26cf630849fd570e169c71daeb8ac52a81d6"},
+    {file = "pymupdf-1.26.6-cp310-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:7777d0b7124c2ebc94849536b6a1fb85d158df3b9d873935e63036559391534c"},
+    {file = "pymupdf-1.26.6-cp310-abi3-win32.whl", hash = "sha256:8f3ef05befc90ca6bb0f12983200a7048d5bff3e1c1edef1bb3de60b32cb5274"},
+    {file = "pymupdf-1.26.6-cp310-abi3-win_amd64.whl", hash = "sha256:ce02ca96ed0d1acfd00331a4d41a34c98584d034155b06fd4ec0f051718de7ba"},
+    {file = "pymupdf-1.26.6.tar.gz", hash = "sha256:a2b4531cd4ab36d6f1f794bb6d3c33b49bda22f36d58bb1f3e81cbc10183bd2b"},
+]
+
 [[package]]
 name = "pynacl"
 version = "1.5.0"
@@ -9392,4 +9436,4 @@ cffi = ["cffi (>=1.11)"]
 [metadata]
 lock-version = "2.1"
 python-versions = "~3.12"
-content-hash = "4eabef5281cd51058e626275a93902503c902070bc6c2e4d26616f44e814fe3d"
+content-hash = "d9d5fa4f85de593da403f14418d80173a0e81a0f53336065eaf744c23c3ee1c6"
diff --git a/pyproject.toml b/pyproject.toml
index a685c117ce..b0184716bb 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -116,6 +116,10 @@ youtube-transcript-api = "^1.0.0"
 pypdfium2 = "^4.30.0"
 pyarrow = "^21.0.0"
 django-zeal = "^2.0.4"
+pdf2image = "^1.17.0"
+opendataloader-pdf = "^1.3.0"
+pymupdf = "^1.26.6"
+opencv-python = "^4.12.0.88"
 
 
 
diff --git a/test_pdfs/notes.pdf b/test_pdfs/notes.pdf
new file mode 100644
index 0000000000..c5f99c95a8
Binary files /dev/null and b/test_pdfs/notes.pdf differ