From 23ca582e09291f05279664df8eab3ef45f64a73f Mon Sep 17 00:00:00 2001 From: Ramarao Susil Kunka Date: Mon, 24 Nov 2025 11:52:00 +0530 Subject: [PATCH 1/3] compatibility for python 3.13 --- aifc.py | 168 +++++++++++++++++++++++++++++++++++++++++ audioop.py | 190 +++++++++++++++++++++++++++++++++++++++++++++++ requirements.txt | 4 +- 3 files changed, 360 insertions(+), 2 deletions(-) create mode 100644 aifc.py create mode 100644 audioop.py diff --git a/aifc.py b/aifc.py new file mode 100644 index 0000000..d35d486 --- /dev/null +++ b/aifc.py @@ -0,0 +1,168 @@ +""" +Compatibility module for aifc functionality on Python 3.13+ +This provides minimal AIFF file support to replace the removed aifc module. +""" + +import struct + + +class Error(Exception): + """Exception raised for aifc-related errors.""" + pass + + +class Aifc_read: + """Simple AIFF reader that mimics the aifc module interface.""" + + def __init__(self, file): + self._file = file + self._nchannels = 1 + self._sampwidth = 2 + self._framerate = 44100 + self._nframes = 0 + self._parseheader() + + def _parseheader(self): + """Parse AIFF header (simplified implementation).""" + try: + # Read FORM header + chunk_type = self._file.read(4) + if chunk_type != b'FORM': + raise Error("Not an AIFF file") + + # Skip chunk size + self._file.read(4) + + # Read format type + fmt_type = self._file.read(4) + if fmt_type not in (b'AIFF', b'AIFC'): + raise Error("Not an AIFF file") + + # Look for COMM chunk + while True: + chunk_id = self._file.read(4) + if not chunk_id: + break + + chunk_size = struct.unpack('>L', self._file.read(4))[0] + + if chunk_id == b'COMM': + self._nchannels = struct.unpack('>H', self._file.read(2))[0] + self._nframes = struct.unpack('>L', self._file.read(4))[0] + self._sampwidth = struct.unpack('>H', self._file.read(2))[0] // 8 + + # Read 80-bit IEEE extended precision number (sample rate) + self._file.read(10) # Skip IEEE float + # Simplified conversion - just use common rates + self._framerate = 44100 + + if chunk_size > 18: # AIFC format + self._file.read(chunk_size - 18) + break + else: + # Skip other chunks + if chunk_size % 2: + chunk_size += 1 + self._file.read(chunk_size) + except (struct.error, IOError): + raise Error("Invalid AIFF file") + + def getnchannels(self): + return self._nchannels + + def getsampwidth(self): + return self._sampwidth + + def getframerate(self): + return self._framerate + + def getnframes(self): + return self._nframes + + def readframes(self, nframes): + # Simplified - just return empty bytes for now + # This would need proper implementation for real AIFF reading + return b'' + + def close(self): + if hasattr(self._file, 'close'): + self._file.close() + + +class Aifc_write: + """Simple AIFF writer that mimics the aifc module interface.""" + + def __init__(self, file): + self._file = file + self._nchannels = 1 + self._sampwidth = 2 + self._framerate = 44100 + self._nframes = 0 + self._frames = [] + + def setnchannels(self, nchannels): + self._nchannels = nchannels + + def setsampwidth(self, sampwidth): + self._sampwidth = sampwidth + + def setframerate(self, framerate): + self._framerate = int(framerate) + + def writeframes(self, data): + self._frames.append(data) + + def close(self): + # Write AIFF header and data + frames_data = b''.join(self._frames) + self._nframes = len(frames_data) // (self._nchannels * self._sampwidth) + + # FORM chunk + self._file.write(b'FORM') + # We'll write the size later + size_pos = self._file.tell() + self._file.write(b'\x00\x00\x00\x00') + self._file.write(b'AIFF') + + # COMM chunk + self._file.write(b'COMM') + self._file.write(struct.pack('>L', 18)) # COMM chunk size + self._file.write(struct.pack('>H', self._nchannels)) + self._file.write(struct.pack('>L', self._nframes)) + self._file.write(struct.pack('>H', self._sampwidth * 8)) + + # Sample rate as 80-bit IEEE extended (simplified) + self._file.write(struct.pack('>HQ', 0x400E, self._framerate << 19)) + + # SSND chunk + self._file.write(b'SSND') + self._file.write(struct.pack('>L', len(frames_data) + 8)) + self._file.write(b'\x00\x00\x00\x00') # offset + self._file.write(b'\x00\x00\x00\x00') # block size + self._file.write(frames_data) + + # Write total size + current_pos = self._file.tell() + self._file.seek(size_pos) + self._file.write(struct.pack('>L', current_pos - 8)) + self._file.seek(current_pos) + + if hasattr(self._file, 'close'): + self._file.close() + + +def open(filename, mode='rb'): + """Open an AIFF file for reading or writing.""" + if 'r' in mode: + if hasattr(filename, 'read'): + return Aifc_read(filename) + else: + with open(filename, 'rb') as f: + return Aifc_read(f) + elif 'w' in mode: + if hasattr(filename, 'write'): + return Aifc_write(filename) + else: + return Aifc_write(open(filename, 'wb')) + else: + raise ValueError(f"Invalid mode: {mode}") \ No newline at end of file diff --git a/audioop.py b/audioop.py new file mode 100644 index 0000000..a1ee524 --- /dev/null +++ b/audioop.py @@ -0,0 +1,190 @@ +""" +Compatibility module for audioop functionality on Python 3.13+ +This provides audio processing functions to replace the removed audioop module. +""" + +import struct + + +class error(Exception): + """Exception raised for audioop-related errors.""" + pass + + +def _get_samples(data, width): + """Convert byte data to samples based on width.""" + if width == 1: + return list(struct.unpack('B' * len(data), data)) + elif width == 2: + return list(struct.unpack('<' + 'h' * (len(data) // 2), data)) + elif width == 4: + return list(struct.unpack('<' + 'i' * (len(data) // 4), data)) + else: + raise error(f"Unsupported sample width: {width}") + + +def _samples_to_bytes(samples, width): + """Convert samples to byte data based on width.""" + if width == 1: + return struct.pack('B' * len(samples), *[max(0, min(255, s + 128)) for s in samples]) + elif width == 2: + return struct.pack('<' + 'h' * len(samples), *[max(-32768, min(32767, s)) for s in samples]) + elif width == 4: + return struct.pack('<' + 'i' * len(samples), *[max(-2147483648, min(2147483647, s)) for s in samples]) + else: + raise error(f"Unsupported sample width: {width}") + + +def rms(data, width): + """Return the RMS (root mean square) of the audio data.""" + if len(data) == 0: + return 0 + + samples = _get_samples(data, width) + if width == 1: + # Convert unsigned to signed for RMS calculation + samples = [s - 128 for s in samples] + + sum_squares = sum(s * s for s in samples) + mean_square = sum_squares / len(samples) + return int(mean_square ** 0.5) + + +def add(data1, data2, width): + """Add two audio data streams sample by sample.""" + min_len = min(len(data1), len(data2)) + data1_trimmed = data1[:min_len] + data2_trimmed = data2[:min_len] + + samples1 = _get_samples(data1_trimmed, width) + samples2 = _get_samples(data2_trimmed, width) + + if width == 1: + samples1 = [s - 128 for s in samples1] + samples2 = [s - 128 for s in samples2] + + result_samples = [s1 + s2 for s1, s2 in zip(samples1, samples2)] + return _samples_to_bytes(result_samples, width) + + +def bias(data, width, bias_value): + """Add a bias to all samples in the audio data.""" + if len(data) == 0: + return data + + samples = _get_samples(data, width) + if width == 1: + samples = [s - 128 for s in samples] + + biased_samples = [s + bias_value for s in samples] + return _samples_to_bytes(biased_samples, width) + + +def byteswap(data, width): + """Swap the byte order of audio samples.""" + if width == 1: + return data # No swapping needed for 1-byte samples + + result = bytearray() + for i in range(0, len(data), width): + sample_bytes = data[i:i+width] + result.extend(sample_bytes[::-1]) + + return bytes(result) + + +def tomono(data, width, left_gain, right_gain): + """Convert stereo audio to mono by mixing channels.""" + if len(data) % (width * 2) != 0: + raise error("Data length not compatible with stereo format") + + mono_data = bytearray() + sample_format = {1: 'B', 2: 'h', 4: 'i'}[width] + + for i in range(0, len(data), width * 2): + left_bytes = data[i:i+width] + right_bytes = data[i+width:i+width*2] + + left_sample = struct.unpack('<' + sample_format, left_bytes)[0] + right_sample = struct.unpack('<' + sample_format, right_bytes)[0] + + if width == 1: + left_sample -= 128 + right_sample -= 128 + + mono_sample = int((left_sample * left_gain + right_sample * right_gain) / 2) + + if width == 1: + mono_sample += 128 + mono_sample = max(0, min(255, mono_sample)) + elif width == 2: + mono_sample = max(-32768, min(32767, mono_sample)) + elif width == 4: + mono_sample = max(-2147483648, min(2147483647, mono_sample)) + + mono_data.extend(struct.pack('<' + sample_format, mono_sample)) + + return bytes(mono_data) + + +def ratecv(data, width, nchannels, inrate, outrate, state, weightA=1, weightB=0): + """Convert the sample rate of audio data.""" + if state is None: + state = {} + + if inrate == outrate: + return data, state + + # Simple linear interpolation resampling + samples = _get_samples(data, width) + if width == 1: + samples = [s - 128 for s in samples] + + # Calculate the ratio and new length + ratio = outrate / inrate + new_length = int(len(samples) * ratio / nchannels) * nchannels + + new_samples = [] + for i in range(0, new_length, nchannels): + old_index = i / ratio + old_index_int = int(old_index) + + if old_index_int < len(samples) - nchannels: + # Linear interpolation + frac = old_index - old_index_int + for ch in range(nchannels): + if old_index_int + ch < len(samples) and old_index_int + nchannels + ch < len(samples): + sample1 = samples[old_index_int + ch] + sample2 = samples[old_index_int + nchannels + ch] + interpolated = int(sample1 + frac * (sample2 - sample1)) + new_samples.append(interpolated) + else: + new_samples.append(samples[min(old_index_int + ch, len(samples) - 1)]) + else: + # Use the last available samples + for ch in range(nchannels): + new_samples.append(samples[min(old_index_int + ch, len(samples) - 1)]) + + return _samples_to_bytes(new_samples, width), state + + +def lin2lin(data, width, new_width): + """Convert between different sample widths.""" + if width == new_width: + return data + + samples = _get_samples(data, width) + if width == 1: + samples = [s - 128 for s in samples] + + # Scale samples to new width + if width < new_width: + # Upscaling + scale_factor = (2 ** (new_width * 8 - 1)) / (2 ** (width * 8 - 1)) + new_samples = [int(s * scale_factor) for s in samples] + else: + # Downscaling + scale_factor = (2 ** (new_width * 8 - 1)) / (2 ** (width * 8 - 1)) + new_samples = [int(s * scale_factor) for s in samples] + + return _samples_to_bytes(new_samples, new_width) \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 5c2ea6b..cff3695 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,5 +4,5 @@ Wave openai customtkinter PyAudioWPatch -torch>=2.2.0 --extra-index-url https://download.pytorch.org/whl/cu121 --no-cache-dir -ctranslate2==3.24.0 \ No newline at end of file +torch>=2.2.0 --extra-index-url https://download.pytorch.org/whl/cu121 +ctranslate2>=4.6.0 \ No newline at end of file From 0e828110d5475df279ba4fe09bf916e127f00d65 Mon Sep 17 00:00:00 2001 From: Ramarao Susil Kunka Date: Mon, 24 Nov 2025 15:04:41 +0530 Subject: [PATCH 2/3] made it into web app but only microphone is allowed. --- Dockerfile.web | 32 +++++ WEB_README.md | 106 +++++++++++++++ templates/index.html | 307 +++++++++++++++++++++++++++++++++++++++++++ web_app.py | 119 +++++++++++++++++ web_requirements.txt | 4 + 5 files changed, 568 insertions(+) create mode 100644 Dockerfile.web create mode 100644 WEB_README.md create mode 100644 templates/index.html create mode 100644 web_app.py create mode 100644 web_requirements.txt diff --git a/Dockerfile.web b/Dockerfile.web new file mode 100644 index 0000000..9242465 --- /dev/null +++ b/Dockerfile.web @@ -0,0 +1,32 @@ +FROM python:3.9-slim + +# Install system dependencies +RUN apt-get update && apt-get install -y \ + ffmpeg \ + && rm -rf /var/lib/apt/lists/* + +# Set working directory +WORKDIR /app + +# Copy requirements first for better caching +COPY requirements.txt web_requirements.txt ./ + +# Install Python dependencies +RUN pip install --no-cache-dir -r requirements.txt +RUN pip install --no-cache-dir -r web_requirements.txt + +# Copy application files +COPY . . + +# Create necessary directories +RUN mkdir -p templates static + +# Expose port +EXPOSE 5000 + +# Set environment variables +ENV FLASK_APP=web_app.py +ENV FLASK_ENV=production + +# Run the web application +CMD ["python", "web_app.py"] \ No newline at end of file diff --git a/WEB_README.md b/WEB_README.md new file mode 100644 index 0000000..4442516 --- /dev/null +++ b/WEB_README.md @@ -0,0 +1,106 @@ +# Ecoute Web Version + +A web-based audio transcription application that works in browsers. + +## Features + +- 🎤 **Microphone Recording**: Real-time audio capture from microphone +- 🔄 **Live Transcription**: Real-time speech-to-text using Whisper +- 🌐 **Web Interface**: Modern, responsive web UI +- â˜ī¸ **Cloud Ready**: Deployable to web hosting services like Hostinger + +## Important Notes + +âš ī¸ **Browser Limitations**: +- ✅ **Microphone audio**: Supported +- ❌ **System/Speaker audio**: Not supported (browser security restriction) + +## Local Development + +### Prerequisites + +1. Python 3.9+ +2. FFmpeg installed on your system + +### Installation + +1. Install dependencies: +```bash +pip install -r requirements.txt +pip install -r web_requirements.txt +``` + +2. Run the web application: +```bash +python web_app.py +``` + +3. Open your browser and go to: `http://localhost:5000` + +## Deployment to Hostinger (Docker) + +### Method 1: Using Dockerfile.web + +1. Build the Docker image: +```bash +docker build -f Dockerfile.web -t ecoute-web . +``` + +2. Run the container: +```bash +docker run -p 5000:5000 ecoute-web +``` + +### Method 2: Deploy to Hostinger + +1. Upload your project files to Hostinger +2. Use the Dockerfile.web for containerization +3. Make sure the container exposes port 5000 +4. Set environment variables if needed + +## Usage Instructions + +1. **Start Recording**: Click the "Start Recording" button +2. **Allow Microphone Access**: Grant permission when browser prompts +3. **Speak Clearly**: Talk into your microphone +4. **View Transcript**: See real-time transcription in the text area +5. **Clear Transcript**: Use "Clear Transcript" to start fresh + +## File Structure for Web Version + +``` +ecoute/ +├── web_app.py # Flask web application +├── templates/ +│ └── index.html # Web interface +├── web_requirements.txt # Web-specific dependencies +├── Dockerfile.web # Docker configuration for web +├── AudioTranscriber.py # Original transcriber (adapted) +├── TranscriberModels.py # Model management +└── requirements.txt # Core dependencies +``` + +## Technical Details + +- **Frontend**: HTML5, CSS3, JavaScript with WebRTC +- **Backend**: Flask + SocketIO for real-time communication +- **Audio Processing**: Whisper for speech recognition +- **Containerization**: Docker for easy deployment + +## Limitations + +1. **No System Audio**: Web browsers cannot capture system/speaker audio for security reasons +2. **Internet Required**: Initial model download may require internet connection +3. **Browser Compatibility**: Requires modern browsers with WebRTC support + +## Troubleshooting + +### Microphone Access Issues +- Ensure HTTPS is used in production (required for microphone access) +- Check browser permissions for microphone access +- Try different browsers if issues persist + +### Docker Deployment Issues +- Ensure port 5000 is properly exposed +- Check that FFmpeg is installed in container +- Verify all dependencies are in requirements files \ No newline at end of file diff --git a/templates/index.html b/templates/index.html new file mode 100644 index 0000000..4d89f2a --- /dev/null +++ b/templates/index.html @@ -0,0 +1,307 @@ + + + + + + Ecoute - Web Audio Transcriber + + + + +
+

🎤 Ecoute - Audio Transcriber

+ +
+ âš ī¸ Note: This web app can only access microphone audio due to browser security restrictions. + System/speaker audio capture is not available in web browsers. +
+ +
+ + Connecting... +
+ +
+ + + +
+ +
+
+ Click "Start Recording" to begin transcribing your microphone audio... +
+
+ +
+ 📝 Instructions: +
â€ĸ Click "Start Recording" and allow microphone access when prompted +
â€ĸ Speak clearly into your microphone +
â€ĸ The transcript will appear in real-time above +
â€ĸ Use "Clear Transcript" to start fresh +
+
+ + + + \ No newline at end of file diff --git a/web_app.py b/web_app.py new file mode 100644 index 0000000..b1f0c16 --- /dev/null +++ b/web_app.py @@ -0,0 +1,119 @@ +from flask import Flask, render_template, request, session +from flask_socketio import SocketIO, emit +import os +import tempfile +import time +import TranscriberModels +import base64 + +app = Flask(__name__) +app.config['SECRET_KEY'] = 'your-secret-key-here' +socketio = SocketIO(app, cors_allowed_origins="*") + +# Global variables for transcription +active_transcribers = {} +audio_queues = {} + +class WebAudioTranscriber: + def __init__(self, session_id): + self.session_id = session_id + self.transcript_data = [] + self.model = TranscriberModels.get_model(False) # Use local model + + def add_audio_data(self, audio_data): + """Add audio data to be transcribed""" + try: + # Create temporary audio file + with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as temp_file: + temp_file.write(audio_data) + temp_path = temp_file.name + + # Transcribe audio + transcript_text = self.model.get_transcription(temp_path) + if transcript_text and transcript_text.strip(): + timestamp = time.strftime('%H:%M:%S') + + self.transcript_data.append({ + 'timestamp': timestamp, + 'text': transcript_text.strip(), + 'source': 'microphone' + }) + + # Clean up temp file + os.unlink(temp_path) + + return transcript_text.strip() + + except Exception as e: + print(f"Transcription error: {e}") + if 'temp_path' in locals(): + try: + os.unlink(temp_path) + except Exception: + pass + + return None + + def get_transcript(self): + """Get formatted transcript""" + if not self.transcript_data: + return "Listening for audio..." + + transcript_lines = [] + for entry in self.transcript_data[-10:]: # Show last 10 entries + transcript_lines.append(f"[{entry['timestamp']}] {entry['text']}") + + return '\n'.join(transcript_lines) + +@app.route('/') +def index(): + return render_template('index.html') + +@socketio.on('connect') +def handle_connect(): + session_id = session.get('session_id', request.sid) + session['session_id'] = session_id + active_transcribers[session_id] = WebAudioTranscriber(session_id) + emit('connected', {'status': 'Connected to transcription service'}) + +@socketio.on('disconnect') +def handle_disconnect(): + session_id = session.get('session_id') + if session_id in active_transcribers: + del active_transcribers[session_id] + +@socketio.on('audio_data') +def handle_audio_data(data): + session_id = session.get('session_id') + if session_id not in active_transcribers: + return + + try: + # Decode base64 audio data + audio_data = base64.b64decode(data['audio']) + + transcriber = active_transcribers[session_id] + transcript_text = transcriber.add_audio_data(audio_data) + + if transcript_text: + full_transcript = transcriber.get_transcript() + emit('transcript_update', { + 'transcript': full_transcript, + 'new_text': transcript_text + }) + + except Exception as e: + print(f"Error processing audio: {e}") + emit('error', {'message': 'Error processing audio'}) + +@socketio.on('clear_transcript') +def handle_clear_transcript(): + session_id = session.get('session_id') + if session_id in active_transcribers: + active_transcribers[session_id].transcript_data.clear() + emit('transcript_update', {'transcript': 'Transcript cleared. Listening for audio...'}) + +if __name__ == '__main__': + # Create templates directory if it doesn't exist + os.makedirs('templates', exist_ok=True) + socketio.run(app, host='0.0.0.0', port=5000, debug=True) \ No newline at end of file diff --git a/web_requirements.txt b/web_requirements.txt new file mode 100644 index 0000000..3a058fd --- /dev/null +++ b/web_requirements.txt @@ -0,0 +1,4 @@ +Flask==2.3.3 +Flask-SocketIO==5.3.6 +python-socketio==5.8.0 +eventlet==0.33.3 \ No newline at end of file From 8dbbb0b16d1c107dc6884a525b76b2171115f71f Mon Sep 17 00:00:00 2001 From: Susil Vignesh Date: Mon, 24 Nov 2025 21:57:01 +0530 Subject: [PATCH 3/3] reverting to 3.11 changes only. Removing web-app related stuff as it is irrelevant. --- Dockerfile.web | 32 ----- WEB_README.md | 106 --------------- templates/index.html | 307 ------------------------------------------- web_app.py | 119 ----------------- web_requirements.txt | 4 - 5 files changed, 568 deletions(-) delete mode 100644 Dockerfile.web delete mode 100644 WEB_README.md delete mode 100644 templates/index.html delete mode 100644 web_app.py delete mode 100644 web_requirements.txt diff --git a/Dockerfile.web b/Dockerfile.web deleted file mode 100644 index 9242465..0000000 --- a/Dockerfile.web +++ /dev/null @@ -1,32 +0,0 @@ -FROM python:3.9-slim - -# Install system dependencies -RUN apt-get update && apt-get install -y \ - ffmpeg \ - && rm -rf /var/lib/apt/lists/* - -# Set working directory -WORKDIR /app - -# Copy requirements first for better caching -COPY requirements.txt web_requirements.txt ./ - -# Install Python dependencies -RUN pip install --no-cache-dir -r requirements.txt -RUN pip install --no-cache-dir -r web_requirements.txt - -# Copy application files -COPY . . - -# Create necessary directories -RUN mkdir -p templates static - -# Expose port -EXPOSE 5000 - -# Set environment variables -ENV FLASK_APP=web_app.py -ENV FLASK_ENV=production - -# Run the web application -CMD ["python", "web_app.py"] \ No newline at end of file diff --git a/WEB_README.md b/WEB_README.md deleted file mode 100644 index 4442516..0000000 --- a/WEB_README.md +++ /dev/null @@ -1,106 +0,0 @@ -# Ecoute Web Version - -A web-based audio transcription application that works in browsers. - -## Features - -- 🎤 **Microphone Recording**: Real-time audio capture from microphone -- 🔄 **Live Transcription**: Real-time speech-to-text using Whisper -- 🌐 **Web Interface**: Modern, responsive web UI -- â˜ī¸ **Cloud Ready**: Deployable to web hosting services like Hostinger - -## Important Notes - -âš ī¸ **Browser Limitations**: -- ✅ **Microphone audio**: Supported -- ❌ **System/Speaker audio**: Not supported (browser security restriction) - -## Local Development - -### Prerequisites - -1. Python 3.9+ -2. FFmpeg installed on your system - -### Installation - -1. Install dependencies: -```bash -pip install -r requirements.txt -pip install -r web_requirements.txt -``` - -2. Run the web application: -```bash -python web_app.py -``` - -3. Open your browser and go to: `http://localhost:5000` - -## Deployment to Hostinger (Docker) - -### Method 1: Using Dockerfile.web - -1. Build the Docker image: -```bash -docker build -f Dockerfile.web -t ecoute-web . -``` - -2. Run the container: -```bash -docker run -p 5000:5000 ecoute-web -``` - -### Method 2: Deploy to Hostinger - -1. Upload your project files to Hostinger -2. Use the Dockerfile.web for containerization -3. Make sure the container exposes port 5000 -4. Set environment variables if needed - -## Usage Instructions - -1. **Start Recording**: Click the "Start Recording" button -2. **Allow Microphone Access**: Grant permission when browser prompts -3. **Speak Clearly**: Talk into your microphone -4. **View Transcript**: See real-time transcription in the text area -5. **Clear Transcript**: Use "Clear Transcript" to start fresh - -## File Structure for Web Version - -``` -ecoute/ -├── web_app.py # Flask web application -├── templates/ -│ └── index.html # Web interface -├── web_requirements.txt # Web-specific dependencies -├── Dockerfile.web # Docker configuration for web -├── AudioTranscriber.py # Original transcriber (adapted) -├── TranscriberModels.py # Model management -└── requirements.txt # Core dependencies -``` - -## Technical Details - -- **Frontend**: HTML5, CSS3, JavaScript with WebRTC -- **Backend**: Flask + SocketIO for real-time communication -- **Audio Processing**: Whisper for speech recognition -- **Containerization**: Docker for easy deployment - -## Limitations - -1. **No System Audio**: Web browsers cannot capture system/speaker audio for security reasons -2. **Internet Required**: Initial model download may require internet connection -3. **Browser Compatibility**: Requires modern browsers with WebRTC support - -## Troubleshooting - -### Microphone Access Issues -- Ensure HTTPS is used in production (required for microphone access) -- Check browser permissions for microphone access -- Try different browsers if issues persist - -### Docker Deployment Issues -- Ensure port 5000 is properly exposed -- Check that FFmpeg is installed in container -- Verify all dependencies are in requirements files \ No newline at end of file diff --git a/templates/index.html b/templates/index.html deleted file mode 100644 index 4d89f2a..0000000 --- a/templates/index.html +++ /dev/null @@ -1,307 +0,0 @@ - - - - - - Ecoute - Web Audio Transcriber - - - - -
-

🎤 Ecoute - Audio Transcriber

- -
- âš ī¸ Note: This web app can only access microphone audio due to browser security restrictions. - System/speaker audio capture is not available in web browsers. -
- -
- - Connecting... -
- -
- - - -
- -
-
- Click "Start Recording" to begin transcribing your microphone audio... -
-
- -
- 📝 Instructions: -
â€ĸ Click "Start Recording" and allow microphone access when prompted -
â€ĸ Speak clearly into your microphone -
â€ĸ The transcript will appear in real-time above -
â€ĸ Use "Clear Transcript" to start fresh -
-
- - - - \ No newline at end of file diff --git a/web_app.py b/web_app.py deleted file mode 100644 index b1f0c16..0000000 --- a/web_app.py +++ /dev/null @@ -1,119 +0,0 @@ -from flask import Flask, render_template, request, session -from flask_socketio import SocketIO, emit -import os -import tempfile -import time -import TranscriberModels -import base64 - -app = Flask(__name__) -app.config['SECRET_KEY'] = 'your-secret-key-here' -socketio = SocketIO(app, cors_allowed_origins="*") - -# Global variables for transcription -active_transcribers = {} -audio_queues = {} - -class WebAudioTranscriber: - def __init__(self, session_id): - self.session_id = session_id - self.transcript_data = [] - self.model = TranscriberModels.get_model(False) # Use local model - - def add_audio_data(self, audio_data): - """Add audio data to be transcribed""" - try: - # Create temporary audio file - with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as temp_file: - temp_file.write(audio_data) - temp_path = temp_file.name - - # Transcribe audio - transcript_text = self.model.get_transcription(temp_path) - if transcript_text and transcript_text.strip(): - timestamp = time.strftime('%H:%M:%S') - - self.transcript_data.append({ - 'timestamp': timestamp, - 'text': transcript_text.strip(), - 'source': 'microphone' - }) - - # Clean up temp file - os.unlink(temp_path) - - return transcript_text.strip() - - except Exception as e: - print(f"Transcription error: {e}") - if 'temp_path' in locals(): - try: - os.unlink(temp_path) - except Exception: - pass - - return None - - def get_transcript(self): - """Get formatted transcript""" - if not self.transcript_data: - return "Listening for audio..." - - transcript_lines = [] - for entry in self.transcript_data[-10:]: # Show last 10 entries - transcript_lines.append(f"[{entry['timestamp']}] {entry['text']}") - - return '\n'.join(transcript_lines) - -@app.route('/') -def index(): - return render_template('index.html') - -@socketio.on('connect') -def handle_connect(): - session_id = session.get('session_id', request.sid) - session['session_id'] = session_id - active_transcribers[session_id] = WebAudioTranscriber(session_id) - emit('connected', {'status': 'Connected to transcription service'}) - -@socketio.on('disconnect') -def handle_disconnect(): - session_id = session.get('session_id') - if session_id in active_transcribers: - del active_transcribers[session_id] - -@socketio.on('audio_data') -def handle_audio_data(data): - session_id = session.get('session_id') - if session_id not in active_transcribers: - return - - try: - # Decode base64 audio data - audio_data = base64.b64decode(data['audio']) - - transcriber = active_transcribers[session_id] - transcript_text = transcriber.add_audio_data(audio_data) - - if transcript_text: - full_transcript = transcriber.get_transcript() - emit('transcript_update', { - 'transcript': full_transcript, - 'new_text': transcript_text - }) - - except Exception as e: - print(f"Error processing audio: {e}") - emit('error', {'message': 'Error processing audio'}) - -@socketio.on('clear_transcript') -def handle_clear_transcript(): - session_id = session.get('session_id') - if session_id in active_transcribers: - active_transcribers[session_id].transcript_data.clear() - emit('transcript_update', {'transcript': 'Transcript cleared. Listening for audio...'}) - -if __name__ == '__main__': - # Create templates directory if it doesn't exist - os.makedirs('templates', exist_ok=True) - socketio.run(app, host='0.0.0.0', port=5000, debug=True) \ No newline at end of file diff --git a/web_requirements.txt b/web_requirements.txt deleted file mode 100644 index 3a058fd..0000000 --- a/web_requirements.txt +++ /dev/null @@ -1,4 +0,0 @@ -Flask==2.3.3 -Flask-SocketIO==5.3.6 -python-socketio==5.8.0 -eventlet==0.33.3 \ No newline at end of file