diff --git a/.azure.env b/.azure.env new file mode 100644 index 00000000..6cdc9f61 --- /dev/null +++ b/.azure.env @@ -0,0 +1,14 @@ +SENTRIUS_VERSION=1.1.51 +SENTRIUS_SSH_VERSION=1.1.10 +SENTRIUS_KEYCLOAK_VERSION=1.1.13 +SENTRIUS_AGENT_VERSION=1.1.22 +SENTRIUS_AI_AGENT_VERSION=1.1.3 +LLMPROXY_VERSION=1.1.3 +LAUNCHER_VERSION=1.1.3 +AGENTPROXY_VERSION=1.1.3 +SSHPROXY_VERSION=1.1.3 +RDPPROXY_VERSION=1.1.3 +GITHUB_MCP_VERSION=1.1.3 +PROMPT_ADVISOR_VERSION=1.1.6 +MONITORING_AGENT_VERSION=1.1.21 +SSH_AGENT_VERSION=1.1.3 diff --git a/CUSTOM_AGENTS.md b/CUSTOM_AGENTS.md new file mode 100644 index 00000000..538263c6 --- /dev/null +++ b/CUSTOM_AGENTS.md @@ -0,0 +1,582 @@ +# Custom Agents + +Sentrius supports both Java and Python-based custom agents that can extend the platform's functionality for monitoring, automation, and user assistance. + +## Table of Contents + +- [Overview](#overview) +- [Java Agents](#java-agents) +- [Python Agents](#python-agents) +- [Agent Development Best Practices](#agent-development-best-practices) + +## Overview + +Custom agents in Sentrius can: +- Monitor SSH sessions and system activity +- Provide user assistance and automation +- Integrate with external services via zero trust access +- Execute custom business logic +- Submit provenance events for audit trails + +## Java Agents + +Java agents are built using the Spring Boot framework and integrate with the Sentrius ecosystem through the agent launcher service. + +### Creating a Custom Java Agent + +#### 1. Create Module Structure + +``` +my-custom-agent/ +├── src/main/java/ +│ └── io/sentrius/agent/mycustom/ +│ ├── MyCustomAgent.java +│ └── MyCustomAgentConfig.java +└── pom.xml +``` + +#### 2. Implement the Agent Interface + +```java +@Component +@ConditionalOnProperty(name = "agents.mycustom.enabled", havingValue = "true") +public class MyCustomAgent implements ApplicationListener { + + @Autowired + private AgentService agentService; + + @Override + public void onApplicationEvent(ApplicationReadyEvent event) { + // Register agent and start processing + agentService.register(this); + } + + @Scheduled(fixedDelay = 60000) // Run every minute + public void processTask() { + // Your agent logic here + logger.info("Processing custom agent task"); + } +} +``` + +#### 3. Configuration Properties + +```java +@ConfigurationProperties(prefix = "agents.mycustom") +@Data +public class MyCustomAgentConfig { + private boolean enabled = false; + private String name = "my-custom-agent"; + private String description = "Custom agent for specialized tasks"; + private int pollInterval = 60000; +} +``` + +#### 4. Add to application.properties + +```properties +agents.mycustom.enabled=true +agents.mycustom.name=my-custom-agent +agents.mycustom.description=Custom agent for specialized tasks +agents.mycustom.pollInterval=60000 +``` + +#### 5. Deploy with Helm Chart + +Add to `values.yaml`: + +```yaml +mycustomagent: + image: + repository: my-custom-agent + tag: latest + oauth2: + client_id: java-agents + client_secret: your-secret + resources: + requests: + memory: "256Mi" + cpu: "100m" + limits: + memory: "512Mi" + cpu: "500m" +``` + +### Java Agent Features + +- **Zero Trust Integration**: Automatic ZTAT (Zero Trust Access Token) handling +- **Provenance Tracking**: Built-in event logging and audit trails +- **LLM Integration**: Access to language models through the LLM proxy +- **Session Monitoring**: Real-time SSH session monitoring capabilities +- **RESTful APIs**: Full access to Sentrius APIs and data + +### Example: Session Monitoring Agent + +```java +@Component +public class SessionMonitorAgent implements ApplicationListener { + + @Autowired + private SshSessionService sessionService; + + @Autowired + private ProvenanceService provenanceService; + + @Scheduled(fixedDelay = 30000) + public void monitorSessions() { + List activeSessions = sessionService.getActiveSessions(); + + for (SshSession session : activeSessions) { + if (isAnomalous(session)) { + provenanceService.submit(ProvenanceEvent.builder() + .eventType("ANOMALOUS_SESSION_DETECTED") + .sessionId(session.getId()) + .details(Map.of( + "user", session.getUsername(), + "reason", "Suspicious command pattern detected" + )) + .build()); + + // Take action + sessionService.flagSession(session.getId()); + } + } + } + + private boolean isAnomalous(SshSession session) { + // Your anomaly detection logic + return false; + } +} +``` + +## Python Agents + +Python agents provide a flexible framework for creating custom automation and user assistance tools. + +### Creating a Custom Python Agent + +#### 1. Set up the Agent Structure + +```python +# agents/my_custom/my_custom_agent.py +from agents.base import BaseAgent + +class MyCustomAgent(BaseAgent): + def __init__(self, config_manager): + super().__init__(config_manager, name="my-custom-agent") + self.agent_definition = config_manager.get_agent_definition('my.custom') + + def execute_task(self, task_data=None): + """Execute the agent's main task""" + self.logger.info(f"Executing custom task with data: {task_data}") + + # Your custom logic here + result = self.process_data(task_data) + + # Submit provenance event + self.submit_provenance( + event_type="CUSTOM_TASK", + details={ + "task": "custom_operation", + "data": task_data, + "result": result + } + ) + + return { + "status": "completed", + "result": result + } + + def process_data(self, data): + """Process the task data""" + # Implement your logic + return "processed_result" +``` + +#### 2. Create Agent Configuration + +Create `my-custom.yaml`: + +```yaml +description: "Custom agent that performs specialized tasks" +context: | + You are a custom agent designed to handle specific business logic. + Process requests according to your specialized capabilities. + + Your responsibilities include: + - Processing custom data + - Submitting provenance events + - Integrating with external services +``` + +#### 3. Add to application.properties + +```properties +agent.my.custom.config=my-custom.yaml +agent.my.custom.enabled=true +agent.my.custom.poll.interval=60000 +``` + +#### 4. Register in main.py + +```python +from agents.my_custom.my_custom_agent import MyCustomAgent + +AVAILABLE_AGENTS = { + 'chat-helper': ChatHelperAgent, + 'my-custom': MyCustomAgent, # Add your agent here + 'mcp': MCPAgent, +} +``` + +#### 5. Run Your Custom Agent + +```bash +# Test mode (no external services) +TEST_MODE=true python main.py my-custom --task-data '{"operation": "process_data"}' + +# With properties configuration +python main.py my-custom --config my-app.properties + +# With environment variables +export KEYCLOAK_BASE_URL=http://localhost:8180 +export KEYCLOAK_CLIENT_ID=python-agents +python main.py my-custom +``` + +### Python Agent Features + +- **API Integration**: Full access to Sentrius APIs using JWT authentication +- **Configuration Management**: Support for properties files and YAML configurations +- **LLM Proxy Access**: Integration with language models for AI-powered tasks +- **Provenance Submission**: Automatic event tracking and audit logging +- **Keycloak Authentication**: Built-in OAuth2/JWT token management + +### Example: Data Processing Agent + +```python +from agents.base import BaseAgent +import requests + +class DataProcessingAgent(BaseAgent): + def __init__(self, config_manager): + super().__init__(config_manager, name="data-processor") + self.api_endpoint = config_manager.get_property('api.endpoint') + + def execute_task(self, task_data=None): + """Process data from external sources""" + + # Fetch data from API + headers = self.get_auth_headers() + response = requests.get( + f"{self.api_endpoint}/data", + headers=headers + ) + + if response.status_code == 200: + data = response.json() + processed = self.process(data) + + # Submit results + self.submit_results(processed) + + # Track in provenance + self.submit_provenance( + event_type="DATA_PROCESSED", + details={ + "records": len(processed), + "status": "success" + } + ) + + return {"status": "completed", "records": len(processed)} + else: + self.logger.error(f"Failed to fetch data: {response.status_code}") + return {"status": "failed", "error": response.text} + + def process(self, data): + """Process the data""" + # Your processing logic + return [item for item in data if self.is_valid(item)] + + def is_valid(self, item): + """Validate data item""" + return item.get('status') == 'active' + + def submit_results(self, processed_data): + """Submit processed data back to API""" + headers = self.get_auth_headers() + requests.post( + f"{self.api_endpoint}/results", + headers=headers, + json=processed_data + ) +``` + +## Agent Development Best Practices + +### 1. Authentication + +Always use proper OAuth2/JWT authentication: + +**Java:** +```java +@Autowired +private OAuth2ClientService oauth2Client; + +public String getAccessToken() { + return oauth2Client.getAccessToken("java-agents"); +} +``` + +**Python:** +```python +def get_auth_headers(self): + token = self.auth_manager.get_access_token() + return { + 'Authorization': f'Bearer {token}', + 'Content-Type': 'application/json' + } +``` + +### 2. Provenance Tracking + +Submit detailed provenance events for audit trails: + +**Java:** +```java +provenanceService.submit(ProvenanceEvent.builder() + .eventType("AGENT_ACTION") + .agentName("my-agent") + .action("process_data") + .details(Map.of( + "records_processed", count, + "duration_ms", duration + )) + .build()); +``` + +**Python:** +```python +self.submit_provenance( + event_type="AGENT_ACTION", + details={ + "action": "process_data", + "records_processed": count, + "duration_ms": duration + } +) +``` + +### 3. Error Handling + +Implement robust error handling and logging: + +**Java:** +```java +try { + processData(); +} catch (Exception e) { + logger.error("Failed to process data", e); + provenanceService.submit(ProvenanceEvent.builder() + .eventType("AGENT_ERROR") + .error(e.getMessage()) + .build()); + throw new AgentException("Processing failed", e); +} +``` + +**Python:** +```python +try: + self.process_data() +except Exception as e: + self.logger.error(f"Failed to process data: {e}") + self.submit_provenance( + event_type="AGENT_ERROR", + details={"error": str(e)} + ) + raise +``` + +### 4. Configuration Management + +Use environment-specific configurations: + +**Java:** +```java +@ConfigurationProperties(prefix = "agents.mycustom") +public class MyAgentConfig { + private String apiEndpoint; + private int timeout = 30000; + private boolean enableRetry = true; + // Getters and setters +} +``` + +**Python:** +```python +class MyAgentConfig: + def __init__(self, config_manager): + self.api_endpoint = config_manager.get_property('api.endpoint') + self.timeout = int(config_manager.get_property('api.timeout', '30')) + self.enable_retry = config_manager.get_property('api.retry', 'true') == 'true' +``` + +### 5. Testing + +Test agents in isolation before integration: + +**Java:** +```java +@SpringBootTest +public class MyCustomAgentTest { + @Autowired + private MyCustomAgent agent; + + @Test + public void testProcessTask() { + // Arrange + TaskData data = new TaskData(); + + // Act + Result result = agent.processTask(data); + + // Assert + assertNotNull(result); + assertEquals("completed", result.getStatus()); + } +} +``` + +**Python:** +```bash +# Test mode (no external services) +TEST_MODE=true python main.py my-custom --task-data '{"test": true}' + +# Unit tests +python -m pytest tests/test_my_custom_agent.py +``` + +### 6. Resource Management + +Be mindful of resource usage: + +**Java:** +```yaml +mycustomagent: + resources: + requests: + memory: "256Mi" + cpu: "100m" + limits: + memory: "512Mi" + cpu: "500m" +``` + +**Python:** +- Use connection pooling for database connections +- Close resources properly in finally blocks +- Implement timeouts for external API calls + +### 7. Documentation + +Document agent capabilities and configuration: + +```markdown +# My Custom Agent + +## Purpose +Brief description of what the agent does. + +## Configuration +List of configuration properties and their defaults. + +## API Endpoints +List of API endpoints the agent uses. + +## Provenance Events +List of events the agent submits. + +## Dependencies +External services or libraries required. +``` + +## Advanced Topics + +### LLM Integration + +Agents can leverage language models for AI-powered functionality: + +**Java:** +```java +@Autowired +private LLMProxyService llmProxy; + +public String analyzeText(String text) { + LLMRequest request = LLMRequest.builder() + .prompt("Analyze the following text: " + text) + .maxTokens(500) + .build(); + + return llmProxy.complete(request).getContent(); +} +``` + +**Python:** +```python +def analyze_text(self, text): + response = self.llm_client.complete( + prompt=f"Analyze the following text: {text}", + max_tokens=500 + ) + return response['content'] +``` + +### Dynamic Agent Deployment + +Use the agent-launcher service for dynamic deployment: + +```bash +curl -X POST http://agent-launcher:8080/api/v1/agents/launch \ + -H "Authorization: Bearer " \ + -H "Content-Type: application/json" \ + -d '{ + "agentType": "my-custom-agent", + "configuration": { + "task": "process_data", + "schedule": "0 */5 * * *" + } + }' +``` + +### Session Interception + +Agents can intercept and monitor SSH sessions: + +```java +@Component +public class SessionInterceptor implements SshSessionListener { + + @Override + public void onCommand(SshSession session, String command) { + if (isDangerous(command)) { + session.block(); + notifyAdmin(session, command); + } + } + + private boolean isDangerous(String command) { + return command.contains("rm -rf") || command.contains("dd if="); + } +} +``` + +## Next Steps + +- Review [DEVELOPMENT.md](DEVELOPMENT.md) for development workflows +- See [DEPLOYMENT.md](DEPLOYMENT.md) for deployment options +- Check [INTEGRATIONS.md](INTEGRATIONS.md) for external service integrations +- Read [python-agent/README.md](python-agent/README.md) for Python agent specifics diff --git a/DEPLOYMENT.md b/DEPLOYMENT.md new file mode 100644 index 00000000..86e3bf73 --- /dev/null +++ b/DEPLOYMENT.md @@ -0,0 +1,374 @@ +# Deployment Guide + +This guide covers deployment options for Sentrius across different environments. + +## Table of Contents + +- [Prerequisites](#prerequisites) +- [Local Development](#local-development) +- [Kubernetes Deployment](#kubernetes-deployment) +- [Cloud Deployments](#cloud-deployments) +- [Configuration](#configuration) + +## Prerequisites + +### Required +- **Java 17** or later +- **Apache Maven 3.6+** +- **PostgreSQL** database for storing session and configuration data +- **Keycloak** for user authentication and authorization +- **OpenTelemetry** endpoint for observability + +### Optional +- **Docker & Kubernetes** for containerized deployments +- **Neo4j** for graph-based analysis +- **Kafka** for event streaming +- **Python 3.12+** for Python agents + +## Local Development + +### Quick Start with Script + +For convenience, use the `run-sentrius.sh` script which starts the core and API modules: + +```bash +# Build the project first +mvn clean install + +# Run Sentrius locally (requires PostgreSQL and Keycloak) +./ops-scripts/local/run-sentrius.sh --build +``` + +### Manual Start + +```bash +# Build the project +mvn clean install + +# Start the API server +cd api +mvn spring-boot:run +``` + +### Environment Variables + +Configure using environment variables: + +```bash +export KEYCLOAK_BASE_URL=http://localhost:8180 +export DATABASE_PASSWORD=password +export KEYSTORE_PASSWORD=keystorepassword +cd api +mvn spring-boot:run +``` + +## Kubernetes Deployment + +### Build Docker Images + +#### Local Kubernetes +Build all images sequentially: +```bash +./ops-scripts/base/build-images.sh --all --no-cache +``` + +Or build concurrently for faster builds (recommended): +```bash +./ops-scripts/base/build-all-images-concurrent.sh --all --no-cache +``` + +#### GCP Container Registry +```bash +# Build and push to GCP Container Registry +./ops-scripts/base/build-images.sh gcp --all +``` + +#### Azure Container Registry +```bash +# Login to Azure Container Registry +az acr login --name sentriusacr + +# Build and push to Azure Container Registry +./ops-scripts/base/build-images.sh azure --all +``` + +### Local Kubernetes Deployment + +#### HTTP Deployment (Recommended for Development) + +```bash +# Deploy to local Kubernetes cluster +./ops-scripts/local/deploy-helm.sh + +# Forward ports for local access +kubectl port-forward -n dev service/sentrius-sentrius 8080:8080 +kubectl port-forward -n dev service/sentrius-keycloak 8081:8081 +``` + +Add to `/etc/hosts`: +``` +127.0.0.1 sentrius-sentrius +127.0.0.1 sentrius-keycloak +``` + +Access at: +- Sentrius UI: http://localhost:8080 +- Keycloak: http://localhost:8081 + +#### TLS Deployment + +```bash +# Deploy with TLS and auto-install cert-manager +./ops-scripts/local/deploy-helm.sh --tls --install-cert-manager +``` + +Add to `/etc/hosts`: +``` +127.0.0.1 sentrius-dev.local +127.0.0.1 keycloak-dev.local +``` + +Access at: +- Sentrius UI: https://sentrius-dev.local +- Keycloak: https://keycloak-dev.local + +**Note**: Self-signed certificates will be automatically generated. + +## Cloud Deployments + +### GCP/GKE Deployment + +```bash +# Deploy to GKE cluster +./ops-scripts/gcp/deploy-helm.sh --tenant +``` + +**Note**: Ensure you're connected to your GKE cluster and have the necessary permissions. + +For detailed GCP deployment documentation, see [ops-scripts/gcp/README.md](ops-scripts/gcp/README.md). + +### Azure/AKS Deployment + +```bash +# Deploy to AKS cluster (default domain: trustpolicy.ai) +./ops-scripts/azure/deploy-helm.sh --tenant + +# Deploy with custom domain +./ops-scripts/azure/deploy-helm.sh --tenant --domain mycompany.com +``` + +**Prerequisites:** +- Azure CLI configured: `az login && az aks get-credentials --resource-group sentrius-rg --name sentrius-aks-cluster` +- Docker images pushed to Azure Container Registry +- DNS zone configured in Azure DNS (default: trustpolicy.ai) + +**Default Domain**: Azure deployments use `trustpolicy.ai` by default. You can specify a custom domain with the `--domain` parameter. + +For detailed Azure deployment documentation, see [ops-scripts/azure/README.md](ops-scripts/azure/README.md). + +### AWS Deployment + +Sentrius Helm charts support AWS EKS deployments. See [Helm Chart Configuration](#helm-chart-configuration) for environment-specific settings. + +## Helm Chart Configuration + +### Available Charts + +1. **sentrius-chart** - Complete Sentrius deployment with all services +2. **sentrius-chart-launcher** - Lightweight chart focused on the launcher service + +### Key Configuration Options + +#### Environment Settings + +```yaml +environment: "local" # Options: local, gke, aws, azure +tenant: "my-company" +subdomain: "my-company.sentrius.cloud" +``` + +#### Core Services + +```yaml +sentrius: + image: + repository: sentrius + tag: latest + +llmproxy: + image: + repository: sentrius-llmproxy + tag: latest + +postgres: + storageSize: "10Gi" +``` + +#### Ingress Configuration + +```yaml +ingress: + enabled: true + class: "nginx" # or "gce" for GKE, "alb" for AWS + tlsEnabled: true + annotations: {} +``` + +#### TLS/SSL Configuration + +For production with Let's Encrypt: +```yaml +certificates: + enabled: true + issuer: "letsencrypt-prod" + +ingress: + tlsEnabled: true +``` + +For local development with self-signed certificates: +```yaml +environment: local +certificates: + enabled: true +ingress: + tlsEnabled: true +``` + +### Custom Values Example + +Create a `my-values.yaml` file: + +```yaml +environment: "gke" +tenant: "my-company" +subdomain: "my-company.sentrius.cloud" + +sentrius: + image: + repository: "my-registry/sentrius" + tag: "v1.0.0" + +postgres: + storageSize: "20Gi" + +ingress: + enabled: true + tlsEnabled: true + class: "gce" +``` + +Deploy with custom values: +```bash +helm install my-sentrius sentrius-chart -f my-values.yaml +``` + +### Multi-Environment Support + +The charts support multiple deployment environments with different configurations: + +**Local Development:** +- Uses NodePort services +- Minimal resource requirements +- In-memory storage options + +**GKE (Google Cloud):** +- Uses LoadBalancer services +- Managed certificates +- Persistent storage + +**AWS:** +- ALB ingress support +- EBS storage classes +- AWS-specific annotations + +**Azure:** +- Azure Load Balancer integration +- Azure disk storage +- Azure-specific networking + +## Configuration + +### Database Configuration + +```properties +spring.datasource.url=jdbc:postgresql://localhost:5432/sentrius +spring.datasource.username=postgres +spring.datasource.password=postgres +spring.jpa.hibernate.ddl-auto=update +``` + +### Keycloak Authentication + +```properties +keycloak.realm=sentrius +keycloak.base-url=${KEYCLOAK_BASE_URL:http://localhost:8180} +spring.security.oauth2.client.registration.keycloak.client-secret=${KEYCLOAK_SECRET:defaultSecret} +spring.security.oauth2.client.registration.keycloak.client-id=sentrius-api +spring.security.oauth2.client.registration.keycloak.authorization-grant-type=authorization_code +spring.security.oauth2.client.registration.keycloak.redirect-uri=${BASE_URL:http://localhost:8080}/login/oauth2/code/keycloak +spring.security.oauth2.client.registration.keycloak.scope=openid,profile,email +spring.security.oauth2.resourceserver.jwt.issuer-uri=${KEYCLOAK_BASE_URL:http://localhost:8180}/realms/sentrius +spring.security.oauth2.client.provider.keycloak.issuer-uri=${KEYCLOAK_BASE_URL:http://localhost:8180}/realms/sentrius +``` + +### SSH Settings + +```properties +sentrius.ssh.port=22 +sentrius.ssh.connection-timeout=30000 +``` + +## Testing Deployments + +### Helm Chart Testing + +Test Helm charts locally before deployment: + +```bash +# Test all charts +./ops-scripts/test-helm-charts.sh + +# Test specific aspects +./ops-scripts/test-helm-charts.sh lint # Lint charts +./ops-scripts/test-helm-charts.sh template # Test rendering +./ops-scripts/test-helm-charts.sh config # Test configurations +``` + +For detailed testing documentation, see [docs/helm-testing.md](docs/helm-testing.md). + +## Troubleshooting + +### Build Failures + +```bash +# Clear Maven cache if build issues occur +rm -rf ~/.m2/repository +mvn clean install + +# Check Java version +java -version # Should be 17+ +mvn -version # Should be 3.6+ +``` + +### Runtime Issues + +```bash +# Check required services +curl http://localhost:8180 # Keycloak health +psql -h localhost -U postgres -d sentrius # Database connectivity +``` + +### Container Issues + +```bash +# Reset Docker environment for local development +eval $(minikube docker-env) +docker images | grep sentrius +``` + +## Next Steps + +- Review [DEVELOPMENT.md](DEVELOPMENT.md) for development workflows +- See [INTEGRATIONS.md](INTEGRATIONS.md) for external service integrations +- Check [CUSTOM_AGENTS.md](CUSTOM_AGENTS.md) for creating custom agents diff --git a/DEVELOPMENT.md b/DEVELOPMENT.md new file mode 100644 index 00000000..42239f69 --- /dev/null +++ b/DEVELOPMENT.md @@ -0,0 +1,326 @@ +# Development Guide + +This guide covers development workflows, building, and testing Sentrius. + +## Table of Contents + +- [Project Structure](#project-structure) +- [Building](#building) +- [Testing](#testing) +- [Development Workflow](#development-workflow) +- [Contributing](#contributing) + +## Project Structure + +Sentrius consists of multiple Maven sub-projects: + +``` +sentrius/ +├── core/ # Core business logic and SSH session management +├── api/ # REST API layer and web interface +├── dataplane/ # Secure data transfer and processing +├── llm-core/ # Language model integration core +├── llm-dataplane/ # LLM data processing layer +├── integration-proxy/ # LLM proxy service for AI integration +├── agent-proxy/ # Agent communication proxy +├── analytics/ # Java-based monitoring agent +├── ai-agent/ # Intelligent monitoring and automation agent +├── agent-launcher/ # Dynamic agent lifecycle management +├── provenance-core/ # Event tracking and audit framework +├── provenance-ingestor/ # Event ingestion and processing +├── python-agent/ # Python-based agent framework +├── ops-scripts/ # Operational scripts for deployment +├── sentrius-chart/ # Helm chart for full deployment +├── sentrius-chart-launcher/# Helm chart for launcher service +└── pom.xml # Root Maven POM +``` + +### Core Module + +Contains business logic, including: +- Enclave management +- Zero trust policy enforcement +- Secure SSH connection handling + +### API Module + +A RESTful interface for interacting with the core functionalities. The API module exposes endpoints that let you: +- Create and manage enclaves +- Configure security rules +- Visualize SSH sessions and logs +- Handle user access and authentication + +## Building + +### Prerequisites + +- **Java 17** or later +- **Apache Maven 3.6+** + +### Full Build + +Build the entire project including all modules: + +```bash +mvn clean install +``` + +**Build Performance:** +- Initial build: ~7 minutes (downloads dependencies) +- Subsequent builds: 3-5 minutes (cached dependencies) +- Test execution: ~1 minute + +### Build Without Tests + +To speed up builds during development: + +```bash +mvn clean install -DskipTests +``` + +### Build Specific Modules + +Build only specific modules with dependencies: + +```bash +# Build core modules +mvn clean install -pl core,api,dataplane -am + +# Build specific module with dependencies +mvn clean install -pl api -am +``` + +### Maven Warnings + +The build produces these warnings which are **expected and safe to ignore**: + +``` +'dependencyManagement.dependencies.dependency' must be unique: org.projectlombok:lombok:jar +'dependencyManagement.dependencies.dependency' must be unique: org.springframework.boot:spring-boot-starter-web:jar +'dependencies.dependency' must be unique: org.springframework.boot:spring-boot-starter-actuator:jar +``` + +## Testing + +### Running Tests + +Run all tests: + +```bash +mvn test +``` + +Run tests for specific module: + +```bash +cd api +mvn test +``` + +### CI/CD Testing + +Sentrius includes comprehensive CI/CD testing: + +- **Automated testing** runs on every push and pull request via GitHub Actions +- **Helm chart validation** including linting, template rendering, and schema validation +- **Integration testing** with Kubernetes clusters for deployment validation + +### Local Helm Chart Testing + +Test Helm charts locally before deployment: + +```bash +# Test all charts +./ops-scripts/test-helm-charts.sh + +# Test specific aspects +./ops-scripts/test-helm-charts.sh lint # Lint charts +./ops-scripts/test-helm-charts.sh template # Test rendering +./ops-scripts/test-helm-charts.sh config # Test configurations +``` + +For detailed testing documentation, see [docs/helm-testing.md](docs/helm-testing.md). + +## Development Workflow + +### Setting Up Development Environment + +1. **Clone the repository:** + ```bash + git clone https://github.com/SentriusLLC/Sentrius-private.git + cd Sentrius-private + ``` + +2. **Build the project:** + ```bash + mvn clean install -DskipTests + ``` + +3. **Set up required services:** + - PostgreSQL database + - Keycloak authentication server + - OpenTelemetry endpoint (optional for development) + +4. **Configure application properties:** + - Copy `application.properties.example` to `application.properties` + - Update database and Keycloak connection settings + +### Running in Development Mode + +#### Using the Convenience Script + +```bash +./ops-scripts/local/run-sentrius.sh --build +``` + +#### Manual Start + +```bash +cd api +mvn spring-boot:run +``` + +#### With Custom Configuration + +```bash +export KEYCLOAK_BASE_URL=http://localhost:8180 +export DATABASE_PASSWORD=password +export KEYSTORE_PASSWORD=keystorepassword +cd api +mvn spring-boot:run +``` + +### Docker Image Development + +Build Docker images for testing: + +```bash +# Build all images sequentially +./ops-scripts/base/build-images.sh --all --no-cache + +# Build all images concurrently (faster) +./ops-scripts/base/build-all-images-concurrent.sh --all --no-cache + +# Build specific images +./ops-scripts/base/build-images.sh --sentrius --sentrius-keycloak +``` + +Build with development certificates: + +```bash +./ops-scripts/base/build-images.sh --all --include-dev-certs +``` + +### Python Agent Development + +Python agents require Python 3.12+ and dependencies: + +```bash +cd python-agent + +# Install dependencies +pip3 install -r requirements.txt + +# Test mode (no external services required) +TEST_MODE=true python3 main.py chat-helper --task-data '{"test": "message"}' + +# Production mode +python3 main.py chat-helper --config application.properties +``` + +See [CUSTOM_AGENTS.md](CUSTOM_AGENTS.md) for detailed agent development guide. + +## Contributing + +### Getting Started + +1. Fork the repository +2. Create a feature branch for your changes +3. Make your changes following the coding standards +4. Write tests for your changes +5. Run the full test suite +6. Open a pull request with a clear description + +### Coding Standards + +- Follow existing code style and patterns +- Write meaningful commit messages +- Add tests for new functionality +- Update documentation as needed +- Keep changes focused and minimal + +### Pull Request Process + +1. Ensure all tests pass +2. Update documentation if needed +3. Add a clear description of changes +4. Link to relevant issues +5. Wait for code review +6. Address review feedback + +### Reporting Issues + +If you encounter any issues or have requests: + +1. Check existing issues first +2. Provide clear reproduction steps +3. Include relevant logs and error messages +4. Specify your environment (OS, Java version, etc.) + +## Development Tips + +### IDE Setup + +**IntelliJ IDEA:** +- Import as Maven project +- Enable annotation processing for Lombok +- Configure Java 17 SDK + +**Eclipse:** +- Import as Existing Maven Project +- Install Lombok plugin +- Set compiler compliance to Java 17 + +### Debugging + +**Local Debugging:** +```bash +cd api +mvn spring-boot:run -Dspring-boot.run.jvmArguments="-Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=n,address=5005" +``` + +Then attach your IDE debugger to port 5005. + +**Kubernetes Debugging:** +```bash +kubectl port-forward -n dev pod/ 5005:5005 +``` + +### Hot Reload + +Spring Boot DevTools is included for automatic restart on code changes: + +```bash +cd api +mvn spring-boot:run +``` + +Changes to Java classes will trigger automatic restart. + +## Performance Expectations + +| Operation | Time | Notes | +|-----------|------|-------| +| Maven build (clean install) | 7m24s | First build, downloads dependencies | +| Maven build (cached) | 3-5m | Subsequent builds | +| Maven test execution | 1m3s | Full test suite | +| Docker image build | 5-10m | All images, sequential | +| Docker image build (concurrent) | 3-7m | All images, parallel | +| Python dependency install | <1m | Initial setup | + +## Next Steps + +- Review [DEPLOYMENT.md](DEPLOYMENT.md) for deployment options +- See [INTEGRATIONS.md](INTEGRATIONS.md) for external service integrations +- Check [CUSTOM_AGENTS.md](CUSTOM_AGENTS.md) for creating custom agents +- Read [API_DOCUMENTATION.md](docs/api-documentation.md) for API reference diff --git a/INTEGRATIONS.md b/INTEGRATIONS.md new file mode 100644 index 00000000..cac2a2e3 --- /dev/null +++ b/INTEGRATIONS.md @@ -0,0 +1,346 @@ +# Integrations + +Sentrius supports external service integrations through the integration-proxy module, providing secure, zero-trust access to external APIs and services. + +## Table of Contents + +- [GitHub Integration](#github-integration) +- [JIRA Integration](#jira-integration) +- [LLM Integration](#llm-integration) +- [Self-Healing System](#self-healing-system) + +## GitHub Integration + +The GitHub MCP (Model Context Protocol) integration enables secure access to GitHub repositories, issues, and pull requests through dynamically launched MCP server containers. + +### Features + +- Query GitHub issues and pull requests +- Access repository information +- Clone and interact with repositories +- All operations use zero-trust security model + +### Setup + +#### 1. Store GitHub Token + +Create an `IntegrationSecurityToken` with: +- `connectionType`: "github" +- `connectionInfo`: Your GitHub Personal Access Token + +Via API: +```bash +curl -X POST http://localhost:8080/api/v1/integration/tokens \ + -H "Authorization: Bearer " \ + -H "Content-Type: application/json" \ + -d '{ + "connectionType": "github", + "connectionInfo": "", + "description": "GitHub integration token" + }' +``` + +Via UI: +1. Navigate to Integration Settings +2. Click "Add Integration Token" +3. Select "GitHub" as connection type +4. Enter your GitHub Personal Access Token +5. Save + +#### 2. Launch MCP Server + +```bash +curl -X POST "http://integration-proxy:8080/api/v1/github/mcp/launch?tokenId=" \ + -H "Authorization: Bearer " +``` + +#### 3. Access via Service URL + +The response includes a `serviceUrl` for accessing the GitHub MCP server within the cluster. + +### Usage Examples + +**Query Issues:** +```bash +curl http:///issues?repo=owner/repo \ + -H "Authorization: Bearer " +``` + +**Get Pull Request:** +```bash +curl http:///pulls/123?repo=owner/repo \ + -H "Authorization: Bearer " +``` + +For detailed documentation, see [integration-proxy/GITHUB_INTEGRATION.md](integration-proxy/GITHUB_INTEGRATION.md). + +## JIRA Integration + +The JIRA integration provides secure proxy access to JIRA APIs for ticket management and tracking. + +### Features + +- Search for JIRA issues +- Get issue details +- Manage issue comments +- Assign issues to users + +### Available Endpoints + +#### Search Issues +```bash +curl -X GET "http://integration-proxy:8080/api/v1/jira/rest/api/3/search?jql=project=PROJ" \ + -H "Authorization: Bearer " +``` + +#### Get Issue Details +```bash +curl -X GET "http://integration-proxy:8080/api/v1/jira/rest/api/3/issue/PROJ-123" \ + -H "Authorization: Bearer " +``` + +#### Add Comment +```bash +curl -X POST "http://integration-proxy:8080/api/v1/jira/rest/api/3/issue/PROJ-123/comment" \ + -H "Authorization: Bearer " \ + -H "Content-Type: application/json" \ + -d '{ + "body": "This is a comment" + }' +``` + +#### Assign Issue +```bash +curl -X PUT "http://integration-proxy:8080/api/v1/jira/rest/api/3/issue/PROJ-123/assignee" \ + -H "Authorization: Bearer " \ + -H "Content-Type: application/json" \ + -d '{ + "accountId": "user-account-id" + }' +``` + +### Authentication + +All JIRA requests are authenticated through Keycloak and validated against the user's permissions. + +## LLM Integration + +Sentrius includes a proxy service for integrating with Large Language Models (LLMs) while maintaining zero-trust security. + +### Features + +- Secure access to LLM APIs +- Request/response logging +- Usage tracking +- Cost management + +### Supported Models + +- OpenAI GPT models +- Anthropic Claude models +- Custom model endpoints + +### Configuration + +Configure in `application.properties`: + +```properties +llm.proxy.openai.api-key=${OPENAI_API_KEY} +llm.proxy.anthropic.api-key=${ANTHROPIC_API_KEY} +llm.proxy.default-model=gpt-4 +llm.proxy.max-tokens=2000 +``` + +### Usage + +```bash +curl -X POST http://llm-proxy:8080/api/v1/llm/complete \ + -H "Authorization: Bearer " \ + -H "Content-Type: application/json" \ + -d '{ + "prompt": "Analyze this SSH session for anomalies", + "model": "gpt-4", + "maxTokens": 500 + }' +``` + +## Self-Healing System + +Sentrius includes an intelligent self-healing system that automatically detects, analyzes, and repairs errors in your infrastructure. + +### Key Features + +- **Automatic Error Detection**: Continuously monitors error output and OpenTelemetry data +- **Security Analysis**: Analyzes errors for security concerns before attempting repairs +- **Flexible Patching Policies**: Configure when repairs should be applied +- **Coding Agent Deployment**: Automatically launches agents to analyze and fix errors +- **Docker Image Building**: Builds and deploys fixed images automatically +- **GitHub Integration**: Creates pull requests with fixes (requires GitHub integration) + +### Configuration + +#### Web UI Configuration + +1. Navigate to **Self-Healing Configuration** (`/sso/v1/self-healing/config`) +2. Click **Add Pod Configuration** +3. Set the pod name, type, and patching policy +4. Enable or disable self-healing for the pod + +#### API Configuration + +```bash +# Create or update configuration +curl -X POST http://localhost:8080/api/v1/self-healing/config \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer " \ + -d '{ + "podName": "sentrius-api", + "podType": "api", + "patchingPolicy": "OFF_HOURS", + "enabled": true + }' +``` + +#### Patching Policies + +- **Immediate**: Apply fixes as soon as errors are detected +- **Off-Hours**: Queue fixes for maintenance windows (default: 10 PM - 6 AM) +- **Never**: Disable self-healing (manual intervention required) + +#### Helm Configuration + +Update `values.yaml`: + +```yaml +selfHealing: + enabled: true + offHours: + start: 22 # 10 PM + end: 6 # 6 AM + codingAgent: + clientId: "coding-agents" + clientSecret: "" # Set in secrets + agentLauncher: + url: "http://sentrius-agents-launcherservice:8080" + builder: + namespace: "dev" + autoBuild: true + github: + enabled: false # Auto-enabled if GitHub integration exists +``` + +**Important**: Self-healing requires GitHub integration to be configured. The system will automatically detect if a GitHub token exists. + +### Viewing Healing Sessions + +Monitor healing sessions via: + +1. Navigate to **Self-Healing Sessions** (`/sso/v1/self-healing/sessions`) +2. Filter by status: All, Active, or Completed +3. View detailed information: + - Agent activity and logs + - Security analysis results + - Docker build status + - GitHub PR links + +### How It Works + +1. **Error Detection**: Scans error_output table every 5 minutes +2. **Policy Check**: Determines if healing is enabled for the affected pod +3. **Security Analysis**: Analyzes error logs for security keywords +4. **Agent Launch**: Launches coding agent pod if safe to proceed +5. **Code Repair**: Agent examines error and generates fixes +6. **Docker Build**: Creates new Docker image with fixes +7. **GitHub PR**: Creates pull request with changes (if configured) +8. **Completion**: Updates healing session with results + +### Security Considerations + +- **GitHub Integration Required**: Self-healing only proceeds if GitHub integration is configured +- **Security Analysis**: Security-related errors require manual review +- **Audit Trail**: All healing attempts are logged +- **Isolated Execution**: Agents run in isolated Kubernetes pods + +### Manual Triggering + +Trigger self-healing for specific errors: + +Via UI: +1. Navigate to **Error Logs** (`/sso/v1/notifications/error/log/get`) +2. Click **Trigger Self-Healing** on any error + +Via API: +```bash +curl -X POST http://localhost:8080/api/v1/self-healing/trigger/{errorId} \ + -H "Authorization: Bearer " +``` + +### Database Schema + +The system uses three main tables: +- `self_healing_config`: Patching policies per pod/service +- `self_healing_session`: Tracks each healing attempt +- `error_output`: Extended with healing status fields + +## Creating Custom Integrations + +### Integration Proxy Pattern + +To add a new integration: + +1. **Create Integration Controller:** + ```java + @RestController + @RequestMapping("/api/v1/myservice") + public class MyServiceIntegrationController { + + @Autowired + private IntegrationTokenService tokenService; + + @GetMapping("/data") + public ResponseEntity getData( + @RequestHeader("Authorization") String auth, + @RequestParam Long tokenId + ) { + // Validate user has access + IntegrationToken token = tokenService.getToken(tokenId); + + // Call external service + String result = callExternalService(token); + + return ResponseEntity.ok(result); + } + } + ``` + +2. **Add Token Type:** + ```java + public enum IntegrationConnectionType { + GITHUB, + JIRA, + MYSERVICE // Add your integration + } + ``` + +3. **Configure Security:** + ```java + @Configuration + public class MyServiceSecurityConfig { + // Configure authentication and authorization + } + ``` + +### MCP Server Integration + +For services supporting Model Context Protocol: + +1. Create MCP server Docker image +2. Add launcher endpoint in integration-proxy +3. Configure Kubernetes service for dynamic containers +4. Implement token-based authentication + +## Next Steps + +- Review [DEPLOYMENT.md](DEPLOYMENT.md) for deployment options +- See [DEVELOPMENT.md](DEVELOPMENT.md) for development workflows +- Check [CUSTOM_AGENTS.md](CUSTOM_AGENTS.md) for creating custom agents diff --git a/README.md b/README.md index b6b32fb4..481f09ee 100644 --- a/README.md +++ b/README.md @@ -1,782 +1,123 @@ -Sentrius - -![image](docs/images/dashboard.png) - -Sentrius is zero trust (and if you want AI assisted) management system. to protect your infrastructure. It is split -into several maven projects. Agents can be leveraged to monitor and control infra ( SSH, APIs, RDP eventually), ensuring that all connections are secure and compliant with your organization's policies. -Agents can access external resources ( like LLMs or integrations ) via a zero trust assisted access token. -sub-projects: - - core – Handles the core functionalities (e.g., SSH session management, zero trust policy enforcement). - api – Provides a RESTful API layer to interface with the core module. - dataplane – Offers dataplane functionality for secure data transfer and processing. - integration-proxy – A proxy service that integrates with large language models (LLMs) and external services (like GitHub, JIRA) to enhance security and compliance. Supports dynamic MCP (Model Context Protocol) server management for GitHub integrations. - llm-dataplane – A data processing layer that leverages LLMs for advanced analysis and decision-making in SSH sessions. - ops-scripts – Contains operational scripts for deployment and management tasks. - ai-agent – Java-based intelligent agent framework for monitoring and controlling SSH sessions. - agent-launcher – Service for dynamically launching and managing agents. - python-agent – Python-based agent framework for SSH session monitoring and user assistance. - -Internally, Sentrius may still be referenced by its former name, SSO (SecureShellOps), in certain scripts or configurations. -Table of Contents - - Key Features - Project Structure - Prerequisites - Installation - Configuration - Running Sentrius - Helm Chart Deployment - Testing - Integrations - Custom Agents - Usage - API Documentation - Contributing - License - Contact - -Key Features - - Zero Trust Security - Sentrius enforces zero trust policies, ensuring that every SSH connection is authenticated, authorized, and constantly monitored. - - Enclaves - Group hosts into logical enclaves and apply role-based access control for fine-grained permissions. Simplify security oversight by separating and organizing your infrastructure. - - Dynamic Rules Enforcement - Define flexible, context-aware rules that adapt to real-time changes in your environment (e.g., user risk score, time of day, IP ranges). - - REST API - Manage your SSH configurations, enclaves, security rules, and sessions programmatically using a well-documented REST API. - - Self-Healing System - Automatically detects, analyzes, and repairs system errors through intelligent coding agents. Configure patching policies (immediate, off-hours, or never) per pod/service, with built-in security analysis to prevent healing of security-sensitive errors without manual review. When configured, the system can automatically create GitHub pull requests with fixes. - -Custom SSH Server responds via Sentrius UI or terminals -![image](docs/images/ssh.png) - -Agent Designer supports natural language prompts to create custom agents that can monitor and control SSH sessions, automate tasks, and provide user assistance. The Agent Designer allows you to define agent behavior, capabilities, and interactions with the Sentrius platform. -![image](docs/images/agentdesigner.png) - -Project Structure - -Sentrius consists of multiple sub-projects: - - core - Contains business logic, including: - Enclave management - Zero trust policy enforcement - Secure SSH connection handling - - api - A RESTful interface for interacting with the core functionalities. The api module exposes endpoints that let you: - Create and manage enclaves - Configure security rules - Visualize SSH sessions and logs - Handle user access and authentication +# Sentrius -sentrius/ -├── core/ -│ ├── src/ -│ └── pom.xml -├── api/ -│ ├── src/ -│ └── pom.xml -├── ops-scripts/ -│ └── gcp/ -│ └── deploy-helm.sh -├── pom.xml -└── ... - -Prerequisites - - Java 17 or later - Apache Maven 3.6+ - Database (PostgreSQL, MySQL, etc.) for storing session and configuration data - Keycloak for user authentication and authorization - (Optional) Docker & Kubernetes if you plan to deploy on a containerized environment - (Optional) python 3.6+ for the python agent - -Installation - - Clone the Repository - -git clone https://github.com/your-organization/sentrius.git -cd sentrius - -#Running Sentrius - -Build the projects from root ( mvn clean install ) to ensure all dependencies are resolved and the modules are compiled. - -For convenience the ops/local directory contains a "run-sentrius.sh" script which will start the core and api -modules. You can run this script from the project root. -This assumes you have a database available, keycloak running, and the necessary configurations. We now require an -OTEL endpoint, along with neo4j and kafka (but these are optional).: - - ./ops/local/run-sentrius.sh - -It is simpler to run a kubernetes deployment, which is described in the Deployment. To do this, build as you would -above. - -Build the images in your local Docker registry (note this builds all images, including core, api, and any other modules): - - /build-images.sh --all --no-cache - -For faster builds, you can use the concurrent build script which builds all images in parallel: - - ./ops-scripts/base/build-all-images-concurrent.sh --all --no-cache - -Run the Helm deployment script to deploy Sentrius to your local Kubernetes cluster: - - ./ops-scripts/local/deploy-helm.sh - - -## If Not using TLS -You may wish to forward ports so you can access the services locally. The following commands will forward the necessary ports for the core and api modules: - kubectl port-forward -n dev service/sentrius-sentrius 8080:8080 - kubectl port-forward -n dev service/sentrius-keycloak 8081:8081 - -This will require that you either change the hostnames in the deploy-helm script or add entries to your /etc/hosts file to point to localhost for the services. - 127.0.0.1 sentrius-sentrius - 127.0.0.1 sentrius-keycloak - -## If Using TLS -The deploy script will automatically install cert-manager and create self-signed certificates for the services. You can access the services via: - - https://sentrius-dev.local - https://keycloak-dev.local - -Add these to /etc/hosts file pointing to your minikube or local cluster IP. - - -There is a GCP deployment that is hasn't been tested in some time. You can find it in the ops-scripts/gcp directory. - -You will need to ensure you link to your GKE cluster and have the necessary permissions to deploy resources. - - ./ops-scripts/gcp/deploy-helm.sh - -You are welcome to run the core and api modules separately, as needed. You can start the core module by running: - - mvn install - cd api - mvn spring-boot:run - -## Testing - -### CI/CD Testing +![Sentrius Dashboard](docs/images/dashboard.png) -Sentrius includes comprehensive CI/CD testing for Helm charts and Java builds: +**Sentrius** is a zero trust security platform for protecting your infrastructure. Monitor and control SSH connections, APIs, and RDP sessions with AI-powered agents, ensuring all access is secure and compliant with your organization's policies. -- **Automated testing** runs on every push and pull request via GitHub Actions -- **Helm chart validation** including linting, template rendering, and schema validation -- **Integration testing** with Kubernetes clusters for deployment validation +## 🚀 Quick Start -### Local Testing - -Test Helm charts locally before deployment: - - # Test all charts - ./ops-scripts/test-helm-charts.sh - - # Test specific aspects - ./ops-scripts/test-helm-charts.sh lint # Lint charts - ./ops-scripts/test-helm-charts.sh template # Test rendering - ./ops-scripts/test-helm-charts.sh config # Test configurations - -For detailed testing documentation, see [docs/helm-testing.md](docs/helm-testing.md). - -Build the Project - -Sentrius uses Maven for its build process. Ensure Maven is installed and then run: - - mvn clean install - - This command will build both the core and api sub-projects, downloading any required dependencies. - -Configuration - -Sentrius requires properties in order to connect to databases, authenticate users, and configure SSH session parameters. You can supply them in src/main/resources/application.properties or via external configuration (e.g., environment variables or config files). - -Typical settings include: - - Database Configuration - -spring.datasource.url=jdbc:postgresql://localhost:5432/sentrius -spring.datasource.username=postgres -spring.datasource.password=postgres -spring.jpa.hibernate.ddl-auto=update - -Security & Authentication - -# JWT or OAuth -To configure Keycloak, you can use the following properties: - - keycloak.realm=sentrius - keycloak.base-url=${KEYCLOAK_BASE_URL:http://localhost:8180} - spring.security.oauth2.client.registration.keycloak.client-secret=${KEYCLOAK_SECRET:defaultSecret} - - spring.security.oauth2.client.registration.keycloak.client-id=sentrius-api - spring.security.oauth2.client.registration.keycloak.authorization-grant-type=authorization_code - spring.security.oauth2.client.registration.keycloak.redirect-uri=${BASE_URL:http://localhost:8080}/login/oauth2/code/keycloak - spring.security.oauth2.client.registration.keycloak.scope=openid,profile,email - - spring.security.oauth2.resourceserver.jwt.issuer-uri=${KEYCLOAK_BASE_URL:http://localhost:8180}/realms/sentrius - spring.security.oauth2.client.provider.keycloak.issuer-uri=${KEYCLOAK_BASE_URL:http://localhost:8180}/realms/sentrius - - -SSH Settings - - sentrius.ssh.port=22 - sentrius.ssh.connection-timeout=30000 - - Core and API Specifics - Core might need additional application-specific properties (e.g., caching, logging). - The API often needs separate configurations for its own port, API versioning, or logging settings. - -Feel free to structure your configs based on your environment (dev/test/prod). For large-scale deployments, we recommend using a secure secrets manager (HashiCorp Vault, AWS Secrets Manager, etc.) to avoid storing sensitive information in plain text. - -## Helm Chart Deployment - -Sentrius provides comprehensive Helm charts for Kubernetes deployment across multiple environments. There are two main charts available: - -### Available Charts - -1. **sentrius-chart** - Complete Sentrius deployment with all services -2. **sentrius-chart-launcher** - Lightweight chart focused on the launcher service - -### Quick Start - -#### Local Deployment +### Deploy with Kubernetes (Recommended) ```bash -# Build all images (sequential) -./build-images.sh --all --no-cache - -# OR build all images concurrently (faster) +# Build Docker images (3-7 minutes) ./ops-scripts/base/build-all-images-concurrent.sh --all --no-cache -# Deploy to local Kubernetes cluster (HTTP) +# Deploy to local cluster ./ops-scripts/local/deploy-helm.sh -# OR deploy with TLS enabled for secure transport -./ops-scripts/local/deploy-helm.sh --tls - -# OR deploy with TLS and auto-install cert-manager -./ops-scripts/local/deploy-helm.sh --tls --install-cert-manager - -# Forward ports for local access (HTTP deployment) +# Access services kubectl port-forward -n dev service/sentrius-sentrius 8080:8080 -kubectl port-forward -n dev service/sentrius-keycloak 8081:8081 ``` -**For HTTP deployment**, add to `/etc/hosts`: -``` -127.0.0.1 sentrius-sentrius -127.0.0.1 sentrius-keycloak -``` - -**For TLS deployment**, add to `/etc/hosts`: -``` -127.0.0.1 sentrius-dev.local -127.0.0.1 keycloak-dev.local -``` +Open http://localhost:8080 in your browser. -**TLS Requirements:** -- cert-manager must be installed in your cluster. You can: - - Install manually: `kubectl apply -f https://github.com/cert-manager/cert-manager/releases/download/v1.13.0/cert-manager.yaml` - - Use auto-install flag: `./ops-scripts/local/deploy-helm.sh --tls --install-cert-manager` -- Access via: `https://sentrius-dev.local` and `https://keycloak-dev.local` -- Self-signed certificates will be automatically generated - -#### GCP/GKE Deployment +### Run Locally (Development) ```bash -# Deploy to GKE cluster -./ops-scripts/gcp/deploy-helm.sh -``` - -### Chart Configuration - -#### Key Configuration Options - -**Environment Settings:** -- `environment`: Set to "local", "gke", "aws", or "azure" -- `tenant`: Your tenant identifier -- `subdomain`: Domain for your deployment - -**Core Services:** -- `sentrius.image.repository`: Core Sentrius image repository -- `llmproxy.image.repository`: LLM proxy image repository -- `postgres.storageSize`: Database storage allocation - -**Ingress Configuration:** -```yaml -ingress: - enabled: true - class: "nginx" # or "gce" for GKE - tlsEnabled: true - annotations: - gke: - kubernetes.io/ingress.class: gce - networking.gke.io/managed-certificates: wildcard-cert -``` - -**TLS/SSL Configuration:** -```yaml -certificates: - enabled: true # Enable certificate generation - issuer: "letsencrypt-prod" # For AWS/Azure (cert-manager) - -# For local development with self-signed certificates: -environment: local -certificates: - enabled: true -ingress: - tlsEnabled: true -``` - -**Agent Configuration:** -```yaml -sentriusagent: - image: - repository: sentrius-agent - oauth2: - client_id: java-agents - client_secret: your-secret - -sentriusaiagent: - image: - repository: sentrius-ai-agent - oauth2: - client_id: java-agents -``` - -#### Custom Values Example - -Create a `my-values.yaml` file: -```yaml -environment: "gke" -tenant: "my-company" -subdomain: "my-company.sentrius.cloud" - -sentrius: - image: - repository: "my-registry/sentrius" - tag: "v1.0.0" +# Build project +mvn clean install -postgres: - storageSize: "20Gi" - -ingress: - enabled: true - tlsEnabled: true - class: "gce" -``` - -Deploy with custom values: -```bash -helm install my-sentrius sentrius-chart -f my-values.yaml +# Start services (requires PostgreSQL and Keycloak) +./ops-scripts/local/run-sentrius.sh --build ``` -### Multi-Environment Support - -The charts support multiple deployment environments with different configurations: - -**Local Development:** -- Uses NodePort services -- Minimal resource requirements -- In-memory storage options - -**GKE (Google Cloud):** -- Uses LoadBalancer services -- Managed certificates -- Persistent storage - -**AWS:** -- ALB ingress support -- EBS storage classes -- AWS-specific annotations - -**Azure:** -- Azure Load Balancer integration -- Azure disk storage -- Azure-specific networking - -### Helm Testing - -For comprehensive testing documentation including CI/CD testing, local testing, and troubleshooting, see [docs/helm-testing.md](docs/helm-testing.md). - -## Integrations - -Sentrius supports external service integrations through the integration-proxy module, providing secure, zero-trust access to external APIs and services. - -### GitHub Integration - -The GitHub MCP (Model Context Protocol) integration enables secure access to GitHub repositories, issues, and pull requests through dynamically launched MCP server containers. +See [DEPLOYMENT.md](DEPLOYMENT.md) for detailed deployment options. -**Features:** -- Query GitHub issues and pull requests -- Access repository information -- Clone and interact with repositories -- All operations use zero-trust security model +## ✨ Key Features -**Setup:** +### Zero Trust Security +Enforce zero trust policies with continuous authentication, authorization, and monitoring for every connection. -1. **Store GitHub Token:** - Create an `IntegrationSecurityToken` with: - - `connectionType`: "github" - - `connectionInfo`: Your GitHub Personal Access Token +### SSH Session Management +![SSH Session Management](docs/images/ssh.png) -2. **Launch MCP Server:** - ```bash - curl -X POST "http://integration-proxy:8080/api/v1/github/mcp/launch?tokenId=" \ - -H "Authorization: Bearer " - ``` +Secure SSH connections with real-time monitoring, command filtering, and session recording. Access through the web UI or terminal. -3. **Access via Service URL:** - The response includes a `serviceUrl` for accessing the GitHub MCP server within the cluster. +### AI-Powered Agent Designer +![Agent Designer](docs/images/agentdesigner.png) -For detailed documentation, see [integration-proxy/GITHUB_INTEGRATION.md](integration-proxy/GITHUB_INTEGRATION.md). +Create custom agents using natural language prompts. Agents can monitor sessions, automate tasks, and provide user assistance. -### JIRA Integration +### Enclaves & Access Control +Group hosts into logical enclaves with role-based access control for fine-grained permissions and simplified security oversight. -The JIRA integration provides secure proxy access to JIRA APIs for ticket management and tracking. +### Dynamic Rules Enforcement +Define flexible, context-aware rules that adapt to real-time changes (user risk score, time of day, IP ranges). -**Available Endpoints:** -- `/api/v1/jira/rest/api/3/search` - Search for JIRA issues -- `/api/v1/jira/rest/api/3/issue` - Get issue details -- `/api/v1/jira/rest/api/3/issue/comment` - Manage issue comments -- `/api/v1/jira/rest/api/3/issue/assignee` - Assign issues +### Self-Healing System +Automatically detect, analyze, and repair system errors through intelligent coding agents. Configure patching policies per service with built-in security analysis. -All JIRA requests are authenticated through Keycloak and validated against the user's permissions. +### External Integrations +Integrate with GitHub, JIRA, and LLMs through secure zero-trust proxies. All integrations use access tokens with granular permissions. -## Self-Healing System +## 📋 Prerequisites -Sentrius includes an intelligent self-healing system that automatically detects, analyzes, and repairs errors in your infrastructure. +**Required:** +- Java 17+ +- Maven 3.6+ +- PostgreSQL database +- Keycloak authentication server +- Docker & Kubernetes (for containerized deployment) -### Key Features +**Optional:** +- Neo4j (graph analysis) +- Kafka (event streaming) +- Python 3.12+ (Python agents) -- **Automatic Error Detection**: Continuously monitors the error output table and OpenTelemetry data for system errors -- **Security Analysis**: Automatically analyzes errors to determine if they pose security concerns before attempting repairs -- **Flexible Patching Policies**: Configure per-pod/service policies for when repairs should be applied: - - **Immediate**: Apply fixes as soon as errors are detected - - **Off-Hours**: Queue fixes to apply during configured maintenance windows (default: 10 PM - 6 AM) - - **Never**: Disable self-healing for critical services that require manual intervention -- **Coding Agent Deployment**: Automatically launches isolated coding agent pods to analyze errors and generate fixes -- **Docker Image Building**: Spins up Kubernetes Jobs using Kaniko to build and push Docker images with the fixes -- **Complete Workflow Automation**: Coordinates agent launch, monitoring, image building, and optional GitHub PR creation -- **Read-Only Agent Monitoring**: View real-time agent activity and healing progress through the UI (non-security errors only) -- **GitHub Integration**: Optionally create pull requests with fixes when GitHub credentials are configured +## 📚 Documentation -### Configuration +- **[Deployment Guide](DEPLOYMENT.md)** - Deploy Sentrius locally, on Kubernetes, or cloud platforms +- **[Development Guide](DEVELOPMENT.md)** - Build, test, and contribute to Sentrius +- **[Custom Agents](CUSTOM_AGENTS.md)** - Create Java and Python agents for monitoring and automation +- **[Integrations](INTEGRATIONS.md)** - Connect with GitHub, JIRA, LLMs, and self-healing system +- **[API Documentation](docs/)** - REST API reference and guides -Self-healing can be configured through the web UI or via API: +## 🏗️ Architecture -#### Web UI Configuration +Sentrius consists of 12+ Maven modules organized for zero trust security: -1. Navigate to **Self-Healing Configuration** (`/sso/v1/self-healing/config`) -2. Click **Add Pod Configuration** to create a new policy -3. Set the pod name, type, and patching policy using the slider control -4. Enable or disable self-healing for the pod - -#### API Configuration - -```bash -# Create or update a self-healing configuration -curl -X POST http://localhost:8080/api/v1/self-healing/config \ - -H "Content-Type: application/json" \ - -H "Authorization: Bearer " \ - -d '{ - "podName": "sentrius-api", - "podType": "api", - "patchingPolicy": "OFF_HOURS", - "enabled": true - }' - -# Get all configurations -curl http://localhost:8080/api/v1/self-healing/config \ - -H "Authorization: Bearer " -``` - -#### Application Properties - -Self-healing configuration is managed through Helm values and automatically populated into the ConfigMap. Update `values.yaml`: - -```yaml -selfHealing: - enabled: true - offHours: - start: 22 # 10 PM - end: 6 # 6 AM - codingAgent: - clientId: "coding-agents" - clientSecret: "" # Set in secrets - agentLauncher: - url: "http://sentrius-agents-launcherservice:8080" - builder: - namespace: "dev" - image: "gcr.io/kaniko-project/executor:latest" - timeoutSeconds: 1800 - autoBuild: true - docker: - registry: "" # Leave empty for local registry - github: - enabled: false # Auto-enabled if GitHub integration exists - apiUrl: "https://api.github.com" - owner: "" - repo: "" -``` - -**Important**: Self-healing requires GitHub integration to be configured in the integration tokens table. The system will automatically detect if a GitHub token exists and only proceed if configured. To add a GitHub integration token, navigate to the Integration Settings in the UI and add a token with `connectionType: "github"`. - -### Viewing Healing Sessions - -Monitor active and completed healing sessions: - -1. Navigate to **Self-Healing Sessions** (`/sso/v1/self-healing/sessions`) -2. Filter by status: All, Active, or Completed -3. View detailed information about each session including: - - Agent activity and logs - - Security analysis results - - Docker build status - - GitHub PR links (if created) - - Error details and resolution - -### How It Works - -The self-healing workflow consists of several automated steps: - -1. **Error Detection**: The system scans the error_output table every 5 minutes for new errors -2. **Policy Check**: Determines if healing is enabled for the affected pod and checks the patching policy -3. **Security Analysis**: Analyzes error logs for security-related keywords -4. **Agent Launch**: If not a security concern, launches a coding agent pod to analyze and fix the error -5. **Code Repair**: The coding agent examines the error, generates fixes, and commits changes -6. **Docker Build**: A Kubernetes Job is created to build a new Docker image with the fixes using Kaniko -7. **GitHub PR**: If configured, creates a pull request with the changes -8. **Completion**: Updates the healing session with results and status - -The entire workflow is asynchronous and can handle multiple concurrent healing sessions. - -### Security Considerations - -The self-healing system includes built-in safety mechanisms: - -- **GitHub Integration Required**: Self-healing only proceeds if a GitHub integration token is configured in the system. This ensures all fixes can be tracked via pull requests. -- **Security Analysis**: Errors containing security-related keywords (authentication, authorization, vulnerability, etc.) are flagged and require manual review before healing proceeds -- **No Visibility Restriction**: Security-flagged errors are hidden from general users until cleared by administrators -- **Audit Trail**: All healing attempts are logged and tracked in the `self_healing_session` table -- **Isolated Execution**: Healing agents run in isolated Kubernetes pods with limited permissions - -### Manual Triggering - -You can manually trigger self-healing for specific errors (requires GitHub integration to be configured): - -1. Navigate to **Error Logs** (`/sso/v1/notifications/error/log/get`) -2. Click **Trigger Self-Healing** on any error -3. Monitor progress in the Self-Healing Sessions view - -Or via API: - -```bash -curl -X POST http://localhost:8080/api/v1/self-healing/trigger/{errorId} \ - -H "Authorization: Bearer " ``` - -**Note**: If GitHub integration is not configured, the trigger will fail with a message prompting you to add a GitHub integration token first. - -### Database Schema - -The self-healing system uses three main tables: - -- `self_healing_config`: Stores patching policies per pod/service -- `self_healing_session`: Tracks each healing attempt and its status -- `error_output`: Extended with healing status and security analysis fields - -## Custom Agents - -Sentrius supports both Java and Python-based custom agents that can extend the platform's functionality for monitoring, automation, and user assistance. - -### Java Agents - -Java agents are built using the Spring Boot framework and integrate with the Sentrius ecosystem through the agent launcher service. - -#### Creating a Custom Java Agent - -1. **Create a new Spring Boot module** following the pattern of existing agents: - ``` - my-custom-agent/ - ├── src/main/java/ - │ └── io/sentrius/agent/mycustom/ - │ ├── MyCustomAgent.java - │ └── MyCustomAgentConfig.java - └── pom.xml - ``` - -2. **Implement the Agent Interface:** - ```java - @Component - @ConditionalOnProperty(name = "agents.mycustom.enabled", havingValue = "true") - public class MyCustomAgent implements ApplicationListener { - - @Autowired - private AgentService agentService; - - @Override - public void onApplicationEvent(ApplicationReadyEvent event) { - // Register agent and start processing - agentService.register(this); - } - } - ``` - -3. **Configuration Properties:** - ```java - @ConfigurationProperties(prefix = "agents.mycustom") - @Data - public class MyCustomAgentConfig { - private boolean enabled = false; - private String name = "my-custom-agent"; - private String description = "Custom agent for specialized tasks"; - } - ``` - -4. **Add to application.properties:** - ```properties - agents.mycustom.enabled=true - agents.mycustom.name=my-custom-agent - agents.mycustom.description=Custom agent for specialized tasks - ``` - -5. **Deploy with Helm Chart:** - ```yaml - # Add to values.yaml - mycustomagent: - image: - repository: my-custom-agent - tag: latest - oauth2: - client_id: java-agents - client_secret: your-secret - ``` - -#### Java Agent Features - -- **Zero Trust Integration**: Automatic ZTAT (Zero Trust Access Token) handling -- **Provenance Tracking**: Built-in event logging and audit trails -- **LLM Integration**: Access to language models through the LLM proxy -- **Session Monitoring**: Real-time SSH session monitoring capabilities -- **RESTful APIs**: Full access to Sentrius APIs and data - -### Python Agents - -Python agents provide a flexible framework for creating custom automation and user assistance tools. - -#### Creating a Custom Python Agent - -1. **Set up the agent structure:** - ```python - # agents/my_custom/my_custom_agent.py - from agents.base import BaseAgent - - class MyCustomAgent(BaseAgent): - def __init__(self, config_manager): - super().__init__(config_manager, name="my-custom-agent") - self.agent_definition = config_manager.get_agent_definition('my.custom') - - def execute_task(self, task_data=None): - # Your custom logic here - self.submit_provenance( - event_type="CUSTOM_TASK", - details={"task": "custom_operation", "data": task_data} - ) - - return { - "status": "completed", - "result": "Custom task executed successfully" - } - ``` - -2. **Create agent configuration:** - ```yaml - # my-custom.yaml - description: "Custom agent that performs specialized tasks" - context: | - You are a custom agent designed to handle specific business logic. - Process requests according to your specialized capabilities. - ``` - -3. **Add to application.properties:** - ```properties - agent.my.custom.config=my-custom.yaml - agent.my.custom.enabled=true - ``` - -4. **Register in main.py:** - ```python - from agents.my_custom.my_custom_agent import MyCustomAgent - - AVAILABLE_AGENTS = { - 'chat-helper': ChatHelperAgent, - 'my-custom': MyCustomAgent, # Add your agent here - } - ``` - -5. **Run your custom agent:** - ```bash - python main.py my-custom --task-data '{"operation": "process_data"}' - ``` - -#### Python Agent Features - -- **API Integration**: Full access to Sentrius APIs using JWT authentication -- **Configuration Management**: Support for properties files and YAML configurations -- **LLM Proxy Access**: Integration with language models for AI-powered tasks -- **Provenance Submission**: Automatic event tracking and audit logging -- **Keycloak Authentication**: Built-in OAuth2/JWT token management - -#### Running Python Agents - -```bash -# With properties configuration -python main.py my-custom --config my-app.properties - -# With environment variables -export KEYCLOAK_BASE_URL=http://localhost:8180 -export KEYCLOAK_CLIENT_ID=python-agents -python main.py my-custom - -# Test mode (no external services) -TEST_MODE=true python main.py my-custom +sentrius/ +├── core/ # Business logic, enclave management, policy enforcement +├── api/ # REST API and web interface +├── dataplane/ # Secure data transfer and processing +├── llm-core/ # Language model integration +├── integration-proxy/ # External service integrations (GitHub, JIRA, LLMs) +├── agent-launcher/ # Dynamic agent lifecycle management +├── provenance-core/ # Event tracking and audit framework +└── ... ``` -### Agent Development Best Practices +See [DEVELOPMENT.md](DEVELOPMENT.md) for complete project structure. -1. **Authentication**: Always use proper OAuth2/JWT authentication -2. **Provenance**: Submit detailed provenance events for audit trails -3. **Error Handling**: Implement robust error handling and logging -4. **Configuration**: Use environment-specific configurations -5. **Testing**: Test agents in isolation before integration -6. **Documentation**: Document agent capabilities and configuration options +## 🤝 Contributing -For detailed Python agent documentation, see [python-agent/README.md](python-agent/README.md). +Contributions are welcome! To get started: -Contributing +1. Fork the repository +2. Create a feature branch for your changes +3. Open a pull request with a clear description -Contributions of all forms are welcome! To get started: +See [DEVELOPMENT.md](DEVELOPMENT.md) for detailed development guidelines. - Fork the repository. - Create a feature branch for your changes. - Open a pull request back into the main branch, describing your changes and rationale. +## 📄 License -If you encounter any issues or have requests, feel free to open a GitHub Issue. We actively review and address bug reports, feature requests, and general improvements. -License +Sentrius is licensed under the MIT License. See the [LICENSE](LICENSE) file for details. -Sentrius is licensed under the MIT License. For more details, please see the LICENSE file. -Contact +## 📧 Contact -Questions, feedback, or need commercial support? Reach out to the project maintainers: +Questions or need commercial support? -Email: marc@sentrius.io +**Email:** marc@sentrius.io -We’re always happy to help you secure your infrastructure with Sentrius! +We're here to help you secure your infrastructure with Sentrius! diff --git a/ai-agent/src/main/java/io/sentrius/agent/analysis/agents/verbs/AgentVerbs.java b/ai-agent/src/main/java/io/sentrius/agent/analysis/agents/verbs/AgentVerbs.java new file mode 100644 index 00000000..e69de29b diff --git a/api/src/main/java/io/sentrius/sso/controllers/api/agents/AgentTemplateController.java b/api/src/main/java/io/sentrius/sso/controllers/api/agents/AgentTemplateController.java index 69b23759..fa5fe035 100644 --- a/api/src/main/java/io/sentrius/sso/controllers/api/agents/AgentTemplateController.java +++ b/api/src/main/java/io/sentrius/sso/controllers/api/agents/AgentTemplateController.java @@ -1,15 +1,22 @@ package io.sentrius.sso.controllers.api.agents; import io.sentrius.sso.config.ApiPaths; +import io.sentrius.sso.config.AppConfig; import io.sentrius.sso.core.annotations.LimitAccess; import io.sentrius.sso.core.config.SystemOptions; import io.sentrius.sso.core.controllers.BaseController; import io.sentrius.sso.core.dto.AgentRegistrationDTO; import io.sentrius.sso.core.dto.agents.AgentTemplateDTO; +import io.sentrius.sso.core.exceptions.ZtatException; import io.sentrius.sso.core.model.security.enums.ApplicationAccessEnum; +import io.sentrius.sso.core.services.ATPLPolicyService; import io.sentrius.sso.core.services.ErrorOutputService; import io.sentrius.sso.core.services.UserService; +import io.sentrius.sso.core.services.agents.AgentClientService; +import io.sentrius.sso.core.services.agents.AgentContextService; +import io.sentrius.sso.core.services.agents.AgentLaunchService; import io.sentrius.sso.core.services.agents.AgentTemplateService; +import io.sentrius.sso.core.services.agents.ZeroTrustClientService; import jakarta.servlet.http.HttpServletRequest; import jakarta.servlet.http.HttpServletResponse; import lombok.extern.slf4j.Slf4j; @@ -26,15 +33,33 @@ public class AgentTemplateController extends BaseController { private final AgentTemplateService templateService; + private final ZeroTrustClientService zeroTrustClientService; + private final AppConfig appConfig; + private final ATPLPolicyService atplPolicyService; + private final AgentLaunchService agentLaunchService; + private final AgentContextService agentContextService; + private final AgentClientService agentClientService; public AgentTemplateController( UserService userService, SystemOptions systemOptions, ErrorOutputService errorOutputService, - AgentTemplateService templateService + AgentTemplateService templateService, + ZeroTrustClientService zeroTrustClientService, + AppConfig appConfig, + ATPLPolicyService atplPolicyService, + AgentLaunchService agentLaunchService, + AgentContextService agentContextService, + AgentClientService agentClientService ) { super(userService, systemOptions, errorOutputService); this.templateService = templateService; + this.zeroTrustClientService = zeroTrustClientService; + this.appConfig = appConfig; + this.atplPolicyService = atplPolicyService; + this.agentLaunchService = agentLaunchService; + this.agentContextService = agentContextService; + this.agentClientService = agentClientService; } /** @@ -241,7 +266,7 @@ public ResponseEntity prepareLaunch( /** * Launch an agent from a template - * This endpoint creates an agent registration and triggers the launcher service + * This endpoint creates an agent registration and triggers the launcher service automatically * * @param id Template ID * @param agentName Name for the new agent @@ -269,22 +294,105 @@ public ResponseEntity launchFromTemplate( log.info("User {} launching agent '{}' from template '{}'", operatingUser.getUsername(), agentName, template.getName()); - // Build launch response with template information - // The actual launcher integration will be handled by the frontend calling the launcher service - Map launchInfo = Map.of( - "status", "prepared", + // Check if agent is already running + try { + String status = agentClientService.getAgentPodStatus( + appConfig.getSentriusLauncherService(), + agentName + ); + if ("Running".equals(status) || "Pending".equals(status)) { + log.info("Agent {} is already running or pending", agentName); + return ResponseEntity.ok(Map.of( + "status", "already_exists", + "message", "Agent is already running or pending", + "agentName", agentName + )); + } + } catch (Exception e) { + log.debug("Agent status check failed (agent may not exist yet): {}", e.getMessage()); + } + + // Build AgentRegistrationDTO with full template configuration + AgentRegistrationDTO agentDto = AgentRegistrationDTO.builder() + .agentName(agentName) + .agentType(template.getAgentType()) + .agentCallbackUrl("") + .clientId(agentName) // Set clientId to match agentName for policy caching + .agentTemplateId(id.toString()) + .agentContextId(agentContextId) + .templateConfiguration(template.getDefaultConfiguration()) + .templateIdentity(template.getIdentity()) + .templatePurpose(template.getPurpose()) + .templateGoals(template.getGoals()) + .templateGuardrails(template.getGuardrails()) + .templateTrustPolicyId(template.getTrustPolicyId()) + .templateLaunchConfiguration(template.getLaunchConfiguration()) + .agentPolicyId(template.getTrustPolicyId() != null ? template.getTrustPolicyId() : "") + .build(); + + // Cache the policy if it exists + if (template.getTrustPolicyId() != null && !template.getTrustPolicyId().isEmpty()) { + var latest = atplPolicyService.getLatestPolicyEntity(template.getTrustPolicyId()); + if (latest.isPresent()) { + log.info("Caching policy {} for agent {}", template.getTrustPolicyId(), agentName); + atplPolicyService.cachePolicy(agentDto.getClientId(), template.getTrustPolicyId()); + } else { + log.warn("Policy {} not found, skipping cache", template.getTrustPolicyId()); + } + } + + // Call the launcher service + zeroTrustClientService.callAuthenticatedPostOnApi( + appConfig.getSentriusLauncherService(), + "agent/launcher/create", + agentDto + ); + + // Record the agent launch if agentContextId is provided + if (agentContextId != null && !agentContextId.isEmpty()) { + try { + UUID contextId = UUID.fromString(agentContextId); + String launchedBy = operatingUser.getUserId(); + String parameters = String.format( + "agentType=%s,templateId=%s,policyId=%s", + template.getAgentType(), + id.toString(), + template.getTrustPolicyId() != null ? template.getTrustPolicyId() : "none" + ); + + UUID launchId = agentLaunchService.recordLaunch( + agentName, + contextId, + launchedBy, + parameters + ); + + log.info("Recorded agent launch: launchId={}, contextId={}, agentName={}", + launchId, contextId, agentName); + } catch (IllegalArgumentException e) { + log.warn("Invalid agentContextId '{}', skipping launch record: {}", agentContextId, e.getMessage()); + } catch (Exception e) { + log.warn("Failed to record agent launch (non-critical): {}", e.getMessage()); + } + } + + log.info("Successfully launched agent '{}' from template '{}'", agentName, template.getName()); + + return ResponseEntity.ok(Map.of( + "status", "success", + "message", "Agent launched successfully", "agentName", agentName, "templateId", id.toString(), "templateName", template.getName(), - "agentType", template.getAgentType(), - "trustPolicyId", template.getTrustPolicyId() != null ? template.getTrustPolicyId() : "", - "message", "Agent launch prepared. Use the prepare-launch endpoint to get full configuration for launcher service.", - "nextStep", String.format("/api/v1/agent/templates/%s/prepare-launch?agentName=%s", id, agentName) - ); + "agentType", template.getAgentType() + )); - return ResponseEntity.ok(launchInfo); } catch (IllegalArgumentException e) { return ResponseEntity.notFound().build(); + } catch (ZtatException e) { + log.error("Error calling launcher service", e); + return ResponseEntity.status(503) + .body(Map.of("error", "Failed to contact launcher service: " + e.getMessage())); } catch (Exception e) { log.error("Error launching agent from template", e); return ResponseEntity.badRequest() diff --git a/api/src/main/resources/templates/sso/agents/agent_templates.html b/api/src/main/resources/templates/sso/agents/agent_templates.html index ea73a954..e86e40ad 100644 --- a/api/src/main/resources/templates/sso/agents/agent_templates.html +++ b/api/src/main/resources/templates/sso/agents/agent_templates.html @@ -314,6 +314,13 @@
${template.name} return; } + // Show loading indicator + const launchBtn = document.querySelector(`button[data-template-id="${templateId}"]`); + if (launchBtn) { + launchBtn.disabled = true; + launchBtn.innerHTML = ' Launching...'; + } + fetch(`/api/v1/agent/templates/${templateId}/launch?agentName=${encodeURIComponent(agentName)}`, { method: 'POST', headers: { @@ -322,8 +329,12 @@
${template.name} }) .then(response => response.json()) .then(data => { - if (data.status === 'prepared') { - alert(`Agent launch prepared!\n\nAgent: ${data.agentName}\nTemplate: ${data.templateName}\nType: ${data.agentType}\n\n${data.message}`); + if (data.status === 'success') { + alert(`Agent launched successfully!\n\nAgent: ${data.agentName}\nTemplate: ${data.templateName}\nType: ${data.agentType}\n\nThe agent is being deployed. Check the agent list for status.`); + } else if (data.status === 'already_exists') { + alert(`Agent already exists!\n\nAgent: ${data.agentName}\n\n${data.message}`); + } else if (data.error) { + alert(`Failed to launch agent: ${data.error}`); } else { alert('Agent launch initiated. Check the agent list for status.'); } @@ -331,6 +342,13 @@
${template.name} .catch(error => { console.error('Error launching agent:', error); alert('Failed to launch agent. Please check the logs for details.'); + }) + .finally(() => { + // Restore button state + if (launchBtn) { + launchBtn.disabled = false; + launchBtn.innerHTML = ' Launch'; + } }); } diff --git a/docs/SCREENSHOT_SUGGESTIONS.md b/docs/SCREENSHOT_SUGGESTIONS.md new file mode 100644 index 00000000..8582c6bc --- /dev/null +++ b/docs/SCREENSHOT_SUGGESTIONS.md @@ -0,0 +1,141 @@ +# Screenshot Suggestions for Sentrius Documentation + +This document outlines suggested screenshots to enhance the Sentrius documentation and improve user understanding. + +## Currently Used Screenshots + +1. **dashboard.png** (3746 x 1961) - Main dashboard view, used in README header +2. **mainscreen.png** (3813 x 1913) - Main screen view (NOT currently used in new README) +3. **ssh.png** (608 x 123) - SSH session interface +4. **agentdesigner.png** (2760 x 1931) - Agent Designer interface + +## Recommended Additional Screenshots + +### High Priority + +1. **Quick Start Deployment** + - **File:** `docs/images/kubernetes-deployment.png` + - **Content:** Screenshot showing successful Kubernetes deployment with port-forward commands + - **Usage:** In README Quick Start section and DEPLOYMENT.md + - **Purpose:** Help users visualize successful deployment + +2. **Enclave Management** + - **File:** `docs/images/enclave-management.png` + - **Content:** Screenshot of the enclave management interface showing host groups and access controls + - **Usage:** In README Key Features section + - **Purpose:** Showcase the enclave feature visually + +3. **Self-Healing Configuration** + - **File:** `docs/images/self-healing-config.png` + - **Content:** Self-healing configuration UI showing patching policies + - **Usage:** In INTEGRATIONS.md Self-Healing section + - **Purpose:** Help users understand self-healing configuration options + +4. **Self-Healing Session View** + - **File:** `docs/images/self-healing-session.png` + - **Content:** Active healing session showing agent logs and status + - **Usage:** In INTEGRATIONS.md Self-Healing section + - **Purpose:** Show users what to expect during healing process + +### Medium Priority + +5. **Integration Settings** + - **File:** `docs/images/integration-settings.png` + - **Content:** Integration settings page showing GitHub/JIRA token configuration + - **Usage:** In INTEGRATIONS.md + - **Purpose:** Guide users through integration setup + +6. **Rules Engine** + - **File:** `docs/images/rules-engine.png` + - **Content:** Dynamic rules configuration interface + - **Usage:** In README or dedicated rules documentation + - **Purpose:** Showcase dynamic rule enforcement capabilities + +7. **Session Monitoring** + - **File:** `docs/images/session-monitoring.png` + - **Content:** Real-time SSH session monitoring view with active sessions + - **Usage:** In README or dedicated monitoring documentation + - **Purpose:** Show live monitoring capabilities + +### Low Priority + +8. **Python Agent Console** + - **File:** `docs/images/python-agent-console.png` + - **Content:** Terminal showing Python agent running in test mode + - **Usage:** In CUSTOM_AGENTS.md + - **Purpose:** Help developers understand agent development workflow + +9. **Helm Chart Testing** + - **File:** `docs/images/helm-testing.png` + - **Content:** Terminal output showing successful helm chart tests + - **Usage:** In DEPLOYMENT.md and DEVELOPMENT.md + - **Purpose:** Show testing workflow + +10. **Build Process** + - **File:** `docs/images/maven-build.png` + - **Content:** Terminal showing successful Maven build + - **Usage:** In DEVELOPMENT.md + - **Purpose:** Help new developers understand build process + +## Suggestions for Existing Screenshots + +### Potentially Replace/Update + +- **mainscreen.png** is currently unused in the new README. Consider: + - Replace with more specific feature screenshots + - OR use it to show main navigation/menu structure + - OR update README to include it as an overview screenshot + +### Image Optimization + +All PNG files are quite large (15KB - 223KB). Consider: +- Optimizing images for web (reduce resolution for documentation) +- Using compressed PNGs or WebP format +- Keeping originals in a separate folder + +## Implementation Priority + +**Phase 1 (Immediate):** +- Add mainscreen.png to README or document where it should be used +- Create Quick Start Deployment screenshot + +**Phase 2 (Near-term):** +- Enclave Management screenshot +- Self-Healing Configuration and Session screenshots +- Integration Settings screenshot + +**Phase 3 (As needed):** +- Rules Engine, Session Monitoring +- Development workflow screenshots + +## Screenshot Guidelines + +When creating new screenshots: + +1. **Resolution:** Use 1920x1080 or similar 16:9 aspect ratio +2. **Content:** Show realistic data (no empty states unless demonstrating initial setup) +3. **Annotations:** Consider adding arrows or highlights for key UI elements +4. **Consistency:** Use same theme/color scheme across all screenshots +5. **Accessibility:** Ensure text is readable at various sizes +6. **Privacy:** Remove any sensitive information (real usernames, IPs, tokens) + +## Integration with Documentation + +Update the following files when adding new screenshots: + +- `README.md` - Feature highlights, Quick Start +- `DEPLOYMENT.md` - Deployment process, configuration +- `DEVELOPMENT.md` - Build process, testing +- `CUSTOM_AGENTS.md` - Agent development workflow +- `INTEGRATIONS.md` - Integration setup, self-healing + +## Maintenance + +- Review screenshots quarterly for accuracy with current UI +- Update screenshots when major UI changes occur +- Keep a changelog of screenshot updates in this file + +--- + +**Last Updated:** 2025-12-23 +**Maintainer:** Sentrius Documentation Team diff --git a/feature.patch b/feature.patch new file mode 100644 index 00000000..9c260819 --- /dev/null +++ b/feature.patch @@ -0,0 +1,5108 @@ +diff --git a/.azure.env b/.azure.env +new file mode 100644 +index 00000000..6cdc9f61 +--- /dev/null ++++ b/.azure.env +@@ -0,0 +1,14 @@ ++SENTRIUS_VERSION=1.1.51 ++SENTRIUS_SSH_VERSION=1.1.10 ++SENTRIUS_KEYCLOAK_VERSION=1.1.13 ++SENTRIUS_AGENT_VERSION=1.1.22 ++SENTRIUS_AI_AGENT_VERSION=1.1.3 ++LLMPROXY_VERSION=1.1.3 ++LAUNCHER_VERSION=1.1.3 ++AGENTPROXY_VERSION=1.1.3 ++SSHPROXY_VERSION=1.1.3 ++RDPPROXY_VERSION=1.1.3 ++GITHUB_MCP_VERSION=1.1.3 ++PROMPT_ADVISOR_VERSION=1.1.6 ++MONITORING_AGENT_VERSION=1.1.21 ++SSH_AGENT_VERSION=1.1.3 +diff --git a/CUSTOM_AGENTS.md b/CUSTOM_AGENTS.md +new file mode 100644 +index 00000000..538263c6 +--- /dev/null ++++ b/CUSTOM_AGENTS.md +@@ -0,0 +1,582 @@ ++# Custom Agents ++ ++Sentrius supports both Java and Python-based custom agents that can extend the platform's functionality for monitoring, automation, and user assistance. ++ ++## Table of Contents ++ ++- [Overview](#overview) ++- [Java Agents](#java-agents) ++- [Python Agents](#python-agents) ++- [Agent Development Best Practices](#agent-development-best-practices) ++ ++## Overview ++ ++Custom agents in Sentrius can: ++- Monitor SSH sessions and system activity ++- Provide user assistance and automation ++- Integrate with external services via zero trust access ++- Execute custom business logic ++- Submit provenance events for audit trails ++ ++## Java Agents ++ ++Java agents are built using the Spring Boot framework and integrate with the Sentrius ecosystem through the agent launcher service. ++ ++### Creating a Custom Java Agent ++ ++#### 1. Create Module Structure ++ ++``` ++my-custom-agent/ ++├── src/main/java/ ++│ └── io/sentrius/agent/mycustom/ ++│ ├── MyCustomAgent.java ++│ └── MyCustomAgentConfig.java ++└── pom.xml ++``` ++ ++#### 2. Implement the Agent Interface ++ ++```java ++@Component ++@ConditionalOnProperty(name = "agents.mycustom.enabled", havingValue = "true") ++public class MyCustomAgent implements ApplicationListener { ++ ++ @Autowired ++ private AgentService agentService; ++ ++ @Override ++ public void onApplicationEvent(ApplicationReadyEvent event) { ++ // Register agent and start processing ++ agentService.register(this); ++ } ++ ++ @Scheduled(fixedDelay = 60000) // Run every minute ++ public void processTask() { ++ // Your agent logic here ++ logger.info("Processing custom agent task"); ++ } ++} ++``` ++ ++#### 3. Configuration Properties ++ ++```java ++@ConfigurationProperties(prefix = "agents.mycustom") ++@Data ++public class MyCustomAgentConfig { ++ private boolean enabled = false; ++ private String name = "my-custom-agent"; ++ private String description = "Custom agent for specialized tasks"; ++ private int pollInterval = 60000; ++} ++``` ++ ++#### 4. Add to application.properties ++ ++```properties ++agents.mycustom.enabled=true ++agents.mycustom.name=my-custom-agent ++agents.mycustom.description=Custom agent for specialized tasks ++agents.mycustom.pollInterval=60000 ++``` ++ ++#### 5. Deploy with Helm Chart ++ ++Add to `values.yaml`: ++ ++```yaml ++mycustomagent: ++ image: ++ repository: my-custom-agent ++ tag: latest ++ oauth2: ++ client_id: java-agents ++ client_secret: your-secret ++ resources: ++ requests: ++ memory: "256Mi" ++ cpu: "100m" ++ limits: ++ memory: "512Mi" ++ cpu: "500m" ++``` ++ ++### Java Agent Features ++ ++- **Zero Trust Integration**: Automatic ZTAT (Zero Trust Access Token) handling ++- **Provenance Tracking**: Built-in event logging and audit trails ++- **LLM Integration**: Access to language models through the LLM proxy ++- **Session Monitoring**: Real-time SSH session monitoring capabilities ++- **RESTful APIs**: Full access to Sentrius APIs and data ++ ++### Example: Session Monitoring Agent ++ ++```java ++@Component ++public class SessionMonitorAgent implements ApplicationListener { ++ ++ @Autowired ++ private SshSessionService sessionService; ++ ++ @Autowired ++ private ProvenanceService provenanceService; ++ ++ @Scheduled(fixedDelay = 30000) ++ public void monitorSessions() { ++ List activeSessions = sessionService.getActiveSessions(); ++ ++ for (SshSession session : activeSessions) { ++ if (isAnomalous(session)) { ++ provenanceService.submit(ProvenanceEvent.builder() ++ .eventType("ANOMALOUS_SESSION_DETECTED") ++ .sessionId(session.getId()) ++ .details(Map.of( ++ "user", session.getUsername(), ++ "reason", "Suspicious command pattern detected" ++ )) ++ .build()); ++ ++ // Take action ++ sessionService.flagSession(session.getId()); ++ } ++ } ++ } ++ ++ private boolean isAnomalous(SshSession session) { ++ // Your anomaly detection logic ++ return false; ++ } ++} ++``` ++ ++## Python Agents ++ ++Python agents provide a flexible framework for creating custom automation and user assistance tools. ++ ++### Creating a Custom Python Agent ++ ++#### 1. Set up the Agent Structure ++ ++```python ++# agents/my_custom/my_custom_agent.py ++from agents.base import BaseAgent ++ ++class MyCustomAgent(BaseAgent): ++ def __init__(self, config_manager): ++ super().__init__(config_manager, name="my-custom-agent") ++ self.agent_definition = config_manager.get_agent_definition('my.custom') ++ ++ def execute_task(self, task_data=None): ++ """Execute the agent's main task""" ++ self.logger.info(f"Executing custom task with data: {task_data}") ++ ++ # Your custom logic here ++ result = self.process_data(task_data) ++ ++ # Submit provenance event ++ self.submit_provenance( ++ event_type="CUSTOM_TASK", ++ details={ ++ "task": "custom_operation", ++ "data": task_data, ++ "result": result ++ } ++ ) ++ ++ return { ++ "status": "completed", ++ "result": result ++ } ++ ++ def process_data(self, data): ++ """Process the task data""" ++ # Implement your logic ++ return "processed_result" ++``` ++ ++#### 2. Create Agent Configuration ++ ++Create `my-custom.yaml`: ++ ++```yaml ++description: "Custom agent that performs specialized tasks" ++context: | ++ You are a custom agent designed to handle specific business logic. ++ Process requests according to your specialized capabilities. ++ ++ Your responsibilities include: ++ - Processing custom data ++ - Submitting provenance events ++ - Integrating with external services ++``` ++ ++#### 3. Add to application.properties ++ ++```properties ++agent.my.custom.config=my-custom.yaml ++agent.my.custom.enabled=true ++agent.my.custom.poll.interval=60000 ++``` ++ ++#### 4. Register in main.py ++ ++```python ++from agents.my_custom.my_custom_agent import MyCustomAgent ++ ++AVAILABLE_AGENTS = { ++ 'chat-helper': ChatHelperAgent, ++ 'my-custom': MyCustomAgent, # Add your agent here ++ 'mcp': MCPAgent, ++} ++``` ++ ++#### 5. Run Your Custom Agent ++ ++```bash ++# Test mode (no external services) ++TEST_MODE=true python main.py my-custom --task-data '{"operation": "process_data"}' ++ ++# With properties configuration ++python main.py my-custom --config my-app.properties ++ ++# With environment variables ++export KEYCLOAK_BASE_URL=http://localhost:8180 ++export KEYCLOAK_CLIENT_ID=python-agents ++python main.py my-custom ++``` ++ ++### Python Agent Features ++ ++- **API Integration**: Full access to Sentrius APIs using JWT authentication ++- **Configuration Management**: Support for properties files and YAML configurations ++- **LLM Proxy Access**: Integration with language models for AI-powered tasks ++- **Provenance Submission**: Automatic event tracking and audit logging ++- **Keycloak Authentication**: Built-in OAuth2/JWT token management ++ ++### Example: Data Processing Agent ++ ++```python ++from agents.base import BaseAgent ++import requests ++ ++class DataProcessingAgent(BaseAgent): ++ def __init__(self, config_manager): ++ super().__init__(config_manager, name="data-processor") ++ self.api_endpoint = config_manager.get_property('api.endpoint') ++ ++ def execute_task(self, task_data=None): ++ """Process data from external sources""" ++ ++ # Fetch data from API ++ headers = self.get_auth_headers() ++ response = requests.get( ++ f"{self.api_endpoint}/data", ++ headers=headers ++ ) ++ ++ if response.status_code == 200: ++ data = response.json() ++ processed = self.process(data) ++ ++ # Submit results ++ self.submit_results(processed) ++ ++ # Track in provenance ++ self.submit_provenance( ++ event_type="DATA_PROCESSED", ++ details={ ++ "records": len(processed), ++ "status": "success" ++ } ++ ) ++ ++ return {"status": "completed", "records": len(processed)} ++ else: ++ self.logger.error(f"Failed to fetch data: {response.status_code}") ++ return {"status": "failed", "error": response.text} ++ ++ def process(self, data): ++ """Process the data""" ++ # Your processing logic ++ return [item for item in data if self.is_valid(item)] ++ ++ def is_valid(self, item): ++ """Validate data item""" ++ return item.get('status') == 'active' ++ ++ def submit_results(self, processed_data): ++ """Submit processed data back to API""" ++ headers = self.get_auth_headers() ++ requests.post( ++ f"{self.api_endpoint}/results", ++ headers=headers, ++ json=processed_data ++ ) ++``` ++ ++## Agent Development Best Practices ++ ++### 1. Authentication ++ ++Always use proper OAuth2/JWT authentication: ++ ++**Java:** ++```java ++@Autowired ++private OAuth2ClientService oauth2Client; ++ ++public String getAccessToken() { ++ return oauth2Client.getAccessToken("java-agents"); ++} ++``` ++ ++**Python:** ++```python ++def get_auth_headers(self): ++ token = self.auth_manager.get_access_token() ++ return { ++ 'Authorization': f'Bearer {token}', ++ 'Content-Type': 'application/json' ++ } ++``` ++ ++### 2. Provenance Tracking ++ ++Submit detailed provenance events for audit trails: ++ ++**Java:** ++```java ++provenanceService.submit(ProvenanceEvent.builder() ++ .eventType("AGENT_ACTION") ++ .agentName("my-agent") ++ .action("process_data") ++ .details(Map.of( ++ "records_processed", count, ++ "duration_ms", duration ++ )) ++ .build()); ++``` ++ ++**Python:** ++```python ++self.submit_provenance( ++ event_type="AGENT_ACTION", ++ details={ ++ "action": "process_data", ++ "records_processed": count, ++ "duration_ms": duration ++ } ++) ++``` ++ ++### 3. Error Handling ++ ++Implement robust error handling and logging: ++ ++**Java:** ++```java ++try { ++ processData(); ++} catch (Exception e) { ++ logger.error("Failed to process data", e); ++ provenanceService.submit(ProvenanceEvent.builder() ++ .eventType("AGENT_ERROR") ++ .error(e.getMessage()) ++ .build()); ++ throw new AgentException("Processing failed", e); ++} ++``` ++ ++**Python:** ++```python ++try: ++ self.process_data() ++except Exception as e: ++ self.logger.error(f"Failed to process data: {e}") ++ self.submit_provenance( ++ event_type="AGENT_ERROR", ++ details={"error": str(e)} ++ ) ++ raise ++``` ++ ++### 4. Configuration Management ++ ++Use environment-specific configurations: ++ ++**Java:** ++```java ++@ConfigurationProperties(prefix = "agents.mycustom") ++public class MyAgentConfig { ++ private String apiEndpoint; ++ private int timeout = 30000; ++ private boolean enableRetry = true; ++ // Getters and setters ++} ++``` ++ ++**Python:** ++```python ++class MyAgentConfig: ++ def __init__(self, config_manager): ++ self.api_endpoint = config_manager.get_property('api.endpoint') ++ self.timeout = int(config_manager.get_property('api.timeout', '30')) ++ self.enable_retry = config_manager.get_property('api.retry', 'true') == 'true' ++``` ++ ++### 5. Testing ++ ++Test agents in isolation before integration: ++ ++**Java:** ++```java ++@SpringBootTest ++public class MyCustomAgentTest { ++ @Autowired ++ private MyCustomAgent agent; ++ ++ @Test ++ public void testProcessTask() { ++ // Arrange ++ TaskData data = new TaskData(); ++ ++ // Act ++ Result result = agent.processTask(data); ++ ++ // Assert ++ assertNotNull(result); ++ assertEquals("completed", result.getStatus()); ++ } ++} ++``` ++ ++**Python:** ++```bash ++# Test mode (no external services) ++TEST_MODE=true python main.py my-custom --task-data '{"test": true}' ++ ++# Unit tests ++python -m pytest tests/test_my_custom_agent.py ++``` ++ ++### 6. Resource Management ++ ++Be mindful of resource usage: ++ ++**Java:** ++```yaml ++mycustomagent: ++ resources: ++ requests: ++ memory: "256Mi" ++ cpu: "100m" ++ limits: ++ memory: "512Mi" ++ cpu: "500m" ++``` ++ ++**Python:** ++- Use connection pooling for database connections ++- Close resources properly in finally blocks ++- Implement timeouts for external API calls ++ ++### 7. Documentation ++ ++Document agent capabilities and configuration: ++ ++```markdown ++# My Custom Agent ++ ++## Purpose ++Brief description of what the agent does. ++ ++## Configuration ++List of configuration properties and their defaults. ++ ++## API Endpoints ++List of API endpoints the agent uses. ++ ++## Provenance Events ++List of events the agent submits. ++ ++## Dependencies ++External services or libraries required. ++``` ++ ++## Advanced Topics ++ ++### LLM Integration ++ ++Agents can leverage language models for AI-powered functionality: ++ ++**Java:** ++```java ++@Autowired ++private LLMProxyService llmProxy; ++ ++public String analyzeText(String text) { ++ LLMRequest request = LLMRequest.builder() ++ .prompt("Analyze the following text: " + text) ++ .maxTokens(500) ++ .build(); ++ ++ return llmProxy.complete(request).getContent(); ++} ++``` ++ ++**Python:** ++```python ++def analyze_text(self, text): ++ response = self.llm_client.complete( ++ prompt=f"Analyze the following text: {text}", ++ max_tokens=500 ++ ) ++ return response['content'] ++``` ++ ++### Dynamic Agent Deployment ++ ++Use the agent-launcher service for dynamic deployment: ++ ++```bash ++curl -X POST http://agent-launcher:8080/api/v1/agents/launch \ ++ -H "Authorization: Bearer " \ ++ -H "Content-Type: application/json" \ ++ -d '{ ++ "agentType": "my-custom-agent", ++ "configuration": { ++ "task": "process_data", ++ "schedule": "0 */5 * * *" ++ } ++ }' ++``` ++ ++### Session Interception ++ ++Agents can intercept and monitor SSH sessions: ++ ++```java ++@Component ++public class SessionInterceptor implements SshSessionListener { ++ ++ @Override ++ public void onCommand(SshSession session, String command) { ++ if (isDangerous(command)) { ++ session.block(); ++ notifyAdmin(session, command); ++ } ++ } ++ ++ private boolean isDangerous(String command) { ++ return command.contains("rm -rf") || command.contains("dd if="); ++ } ++} ++``` ++ ++## Next Steps ++ ++- Review [DEVELOPMENT.md](DEVELOPMENT.md) for development workflows ++- See [DEPLOYMENT.md](DEPLOYMENT.md) for deployment options ++- Check [INTEGRATIONS.md](INTEGRATIONS.md) for external service integrations ++- Read [python-agent/README.md](python-agent/README.md) for Python agent specifics +diff --git a/DEPLOYMENT.md b/DEPLOYMENT.md +new file mode 100644 +index 00000000..86e3bf73 +--- /dev/null ++++ b/DEPLOYMENT.md +@@ -0,0 +1,374 @@ ++# Deployment Guide ++ ++This guide covers deployment options for Sentrius across different environments. ++ ++## Table of Contents ++ ++- [Prerequisites](#prerequisites) ++- [Local Development](#local-development) ++- [Kubernetes Deployment](#kubernetes-deployment) ++- [Cloud Deployments](#cloud-deployments) ++- [Configuration](#configuration) ++ ++## Prerequisites ++ ++### Required ++- **Java 17** or later ++- **Apache Maven 3.6+** ++- **PostgreSQL** database for storing session and configuration data ++- **Keycloak** for user authentication and authorization ++- **OpenTelemetry** endpoint for observability ++ ++### Optional ++- **Docker & Kubernetes** for containerized deployments ++- **Neo4j** for graph-based analysis ++- **Kafka** for event streaming ++- **Python 3.12+** for Python agents ++ ++## Local Development ++ ++### Quick Start with Script ++ ++For convenience, use the `run-sentrius.sh` script which starts the core and API modules: ++ ++```bash ++# Build the project first ++mvn clean install ++ ++# Run Sentrius locally (requires PostgreSQL and Keycloak) ++./ops-scripts/local/run-sentrius.sh --build ++``` ++ ++### Manual Start ++ ++```bash ++# Build the project ++mvn clean install ++ ++# Start the API server ++cd api ++mvn spring-boot:run ++``` ++ ++### Environment Variables ++ ++Configure using environment variables: ++ ++```bash ++export KEYCLOAK_BASE_URL=http://localhost:8180 ++export DATABASE_PASSWORD=password ++export KEYSTORE_PASSWORD=keystorepassword ++cd api ++mvn spring-boot:run ++``` ++ ++## Kubernetes Deployment ++ ++### Build Docker Images ++ ++#### Local Kubernetes ++Build all images sequentially: ++```bash ++./ops-scripts/base/build-images.sh --all --no-cache ++``` ++ ++Or build concurrently for faster builds (recommended): ++```bash ++./ops-scripts/base/build-all-images-concurrent.sh --all --no-cache ++``` ++ ++#### GCP Container Registry ++```bash ++# Build and push to GCP Container Registry ++./ops-scripts/base/build-images.sh gcp --all ++``` ++ ++#### Azure Container Registry ++```bash ++# Login to Azure Container Registry ++az acr login --name sentriusacr ++ ++# Build and push to Azure Container Registry ++./ops-scripts/base/build-images.sh azure --all ++``` ++ ++### Local Kubernetes Deployment ++ ++#### HTTP Deployment (Recommended for Development) ++ ++```bash ++# Deploy to local Kubernetes cluster ++./ops-scripts/local/deploy-helm.sh ++ ++# Forward ports for local access ++kubectl port-forward -n dev service/sentrius-sentrius 8080:8080 ++kubectl port-forward -n dev service/sentrius-keycloak 8081:8081 ++``` ++ ++Add to `/etc/hosts`: ++``` ++127.0.0.1 sentrius-sentrius ++127.0.0.1 sentrius-keycloak ++``` ++ ++Access at: ++- Sentrius UI: http://localhost:8080 ++- Keycloak: http://localhost:8081 ++ ++#### TLS Deployment ++ ++```bash ++# Deploy with TLS and auto-install cert-manager ++./ops-scripts/local/deploy-helm.sh --tls --install-cert-manager ++``` ++ ++Add to `/etc/hosts`: ++``` ++127.0.0.1 sentrius-dev.local ++127.0.0.1 keycloak-dev.local ++``` ++ ++Access at: ++- Sentrius UI: https://sentrius-dev.local ++- Keycloak: https://keycloak-dev.local ++ ++**Note**: Self-signed certificates will be automatically generated. ++ ++## Cloud Deployments ++ ++### GCP/GKE Deployment ++ ++```bash ++# Deploy to GKE cluster ++./ops-scripts/gcp/deploy-helm.sh --tenant ++``` ++ ++**Note**: Ensure you're connected to your GKE cluster and have the necessary permissions. ++ ++For detailed GCP deployment documentation, see [ops-scripts/gcp/README.md](ops-scripts/gcp/README.md). ++ ++### Azure/AKS Deployment ++ ++```bash ++# Deploy to AKS cluster (default domain: trustpolicy.ai) ++./ops-scripts/azure/deploy-helm.sh --tenant ++ ++# Deploy with custom domain ++./ops-scripts/azure/deploy-helm.sh --tenant --domain mycompany.com ++``` ++ ++**Prerequisites:** ++- Azure CLI configured: `az login && az aks get-credentials --resource-group sentrius-rg --name sentrius-aks-cluster` ++- Docker images pushed to Azure Container Registry ++- DNS zone configured in Azure DNS (default: trustpolicy.ai) ++ ++**Default Domain**: Azure deployments use `trustpolicy.ai` by default. You can specify a custom domain with the `--domain` parameter. ++ ++For detailed Azure deployment documentation, see [ops-scripts/azure/README.md](ops-scripts/azure/README.md). ++ ++### AWS Deployment ++ ++Sentrius Helm charts support AWS EKS deployments. See [Helm Chart Configuration](#helm-chart-configuration) for environment-specific settings. ++ ++## Helm Chart Configuration ++ ++### Available Charts ++ ++1. **sentrius-chart** - Complete Sentrius deployment with all services ++2. **sentrius-chart-launcher** - Lightweight chart focused on the launcher service ++ ++### Key Configuration Options ++ ++#### Environment Settings ++ ++```yaml ++environment: "local" # Options: local, gke, aws, azure ++tenant: "my-company" ++subdomain: "my-company.sentrius.cloud" ++``` ++ ++#### Core Services ++ ++```yaml ++sentrius: ++ image: ++ repository: sentrius ++ tag: latest ++ ++llmproxy: ++ image: ++ repository: sentrius-llmproxy ++ tag: latest ++ ++postgres: ++ storageSize: "10Gi" ++``` ++ ++#### Ingress Configuration ++ ++```yaml ++ingress: ++ enabled: true ++ class: "nginx" # or "gce" for GKE, "alb" for AWS ++ tlsEnabled: true ++ annotations: {} ++``` ++ ++#### TLS/SSL Configuration ++ ++For production with Let's Encrypt: ++```yaml ++certificates: ++ enabled: true ++ issuer: "letsencrypt-prod" ++ ++ingress: ++ tlsEnabled: true ++``` ++ ++For local development with self-signed certificates: ++```yaml ++environment: local ++certificates: ++ enabled: true ++ingress: ++ tlsEnabled: true ++``` ++ ++### Custom Values Example ++ ++Create a `my-values.yaml` file: ++ ++```yaml ++environment: "gke" ++tenant: "my-company" ++subdomain: "my-company.sentrius.cloud" ++ ++sentrius: ++ image: ++ repository: "my-registry/sentrius" ++ tag: "v1.0.0" ++ ++postgres: ++ storageSize: "20Gi" ++ ++ingress: ++ enabled: true ++ tlsEnabled: true ++ class: "gce" ++``` ++ ++Deploy with custom values: ++```bash ++helm install my-sentrius sentrius-chart -f my-values.yaml ++``` ++ ++### Multi-Environment Support ++ ++The charts support multiple deployment environments with different configurations: ++ ++**Local Development:** ++- Uses NodePort services ++- Minimal resource requirements ++- In-memory storage options ++ ++**GKE (Google Cloud):** ++- Uses LoadBalancer services ++- Managed certificates ++- Persistent storage ++ ++**AWS:** ++- ALB ingress support ++- EBS storage classes ++- AWS-specific annotations ++ ++**Azure:** ++- Azure Load Balancer integration ++- Azure disk storage ++- Azure-specific networking ++ ++## Configuration ++ ++### Database Configuration ++ ++```properties ++spring.datasource.url=jdbc:postgresql://localhost:5432/sentrius ++spring.datasource.username=postgres ++spring.datasource.password=postgres ++spring.jpa.hibernate.ddl-auto=update ++``` ++ ++### Keycloak Authentication ++ ++```properties ++keycloak.realm=sentrius ++keycloak.base-url=${KEYCLOAK_BASE_URL:http://localhost:8180} ++spring.security.oauth2.client.registration.keycloak.client-secret=${KEYCLOAK_SECRET:defaultSecret} ++spring.security.oauth2.client.registration.keycloak.client-id=sentrius-api ++spring.security.oauth2.client.registration.keycloak.authorization-grant-type=authorization_code ++spring.security.oauth2.client.registration.keycloak.redirect-uri=${BASE_URL:http://localhost:8080}/login/oauth2/code/keycloak ++spring.security.oauth2.client.registration.keycloak.scope=openid,profile,email ++spring.security.oauth2.resourceserver.jwt.issuer-uri=${KEYCLOAK_BASE_URL:http://localhost:8180}/realms/sentrius ++spring.security.oauth2.client.provider.keycloak.issuer-uri=${KEYCLOAK_BASE_URL:http://localhost:8180}/realms/sentrius ++``` ++ ++### SSH Settings ++ ++```properties ++sentrius.ssh.port=22 ++sentrius.ssh.connection-timeout=30000 ++``` ++ ++## Testing Deployments ++ ++### Helm Chart Testing ++ ++Test Helm charts locally before deployment: ++ ++```bash ++# Test all charts ++./ops-scripts/test-helm-charts.sh ++ ++# Test specific aspects ++./ops-scripts/test-helm-charts.sh lint # Lint charts ++./ops-scripts/test-helm-charts.sh template # Test rendering ++./ops-scripts/test-helm-charts.sh config # Test configurations ++``` ++ ++For detailed testing documentation, see [docs/helm-testing.md](docs/helm-testing.md). ++ ++## Troubleshooting ++ ++### Build Failures ++ ++```bash ++# Clear Maven cache if build issues occur ++rm -rf ~/.m2/repository ++mvn clean install ++ ++# Check Java version ++java -version # Should be 17+ ++mvn -version # Should be 3.6+ ++``` ++ ++### Runtime Issues ++ ++```bash ++# Check required services ++curl http://localhost:8180 # Keycloak health ++psql -h localhost -U postgres -d sentrius # Database connectivity ++``` ++ ++### Container Issues ++ ++```bash ++# Reset Docker environment for local development ++eval $(minikube docker-env) ++docker images | grep sentrius ++``` ++ ++## Next Steps ++ ++- Review [DEVELOPMENT.md](DEVELOPMENT.md) for development workflows ++- See [INTEGRATIONS.md](INTEGRATIONS.md) for external service integrations ++- Check [CUSTOM_AGENTS.md](CUSTOM_AGENTS.md) for creating custom agents +diff --git a/DEVELOPMENT.md b/DEVELOPMENT.md +new file mode 100644 +index 00000000..42239f69 +--- /dev/null ++++ b/DEVELOPMENT.md +@@ -0,0 +1,326 @@ ++# Development Guide ++ ++This guide covers development workflows, building, and testing Sentrius. ++ ++## Table of Contents ++ ++- [Project Structure](#project-structure) ++- [Building](#building) ++- [Testing](#testing) ++- [Development Workflow](#development-workflow) ++- [Contributing](#contributing) ++ ++## Project Structure ++ ++Sentrius consists of multiple Maven sub-projects: ++ ++``` ++sentrius/ ++├── core/ # Core business logic and SSH session management ++├── api/ # REST API layer and web interface ++├── dataplane/ # Secure data transfer and processing ++├── llm-core/ # Language model integration core ++├── llm-dataplane/ # LLM data processing layer ++├── integration-proxy/ # LLM proxy service for AI integration ++├── agent-proxy/ # Agent communication proxy ++├── analytics/ # Java-based monitoring agent ++├── ai-agent/ # Intelligent monitoring and automation agent ++├── agent-launcher/ # Dynamic agent lifecycle management ++├── provenance-core/ # Event tracking and audit framework ++├── provenance-ingestor/ # Event ingestion and processing ++├── python-agent/ # Python-based agent framework ++├── ops-scripts/ # Operational scripts for deployment ++├── sentrius-chart/ # Helm chart for full deployment ++├── sentrius-chart-launcher/# Helm chart for launcher service ++└── pom.xml # Root Maven POM ++``` ++ ++### Core Module ++ ++Contains business logic, including: ++- Enclave management ++- Zero trust policy enforcement ++- Secure SSH connection handling ++ ++### API Module ++ ++A RESTful interface for interacting with the core functionalities. The API module exposes endpoints that let you: ++- Create and manage enclaves ++- Configure security rules ++- Visualize SSH sessions and logs ++- Handle user access and authentication ++ ++## Building ++ ++### Prerequisites ++ ++- **Java 17** or later ++- **Apache Maven 3.6+** ++ ++### Full Build ++ ++Build the entire project including all modules: ++ ++```bash ++mvn clean install ++``` ++ ++**Build Performance:** ++- Initial build: ~7 minutes (downloads dependencies) ++- Subsequent builds: 3-5 minutes (cached dependencies) ++- Test execution: ~1 minute ++ ++### Build Without Tests ++ ++To speed up builds during development: ++ ++```bash ++mvn clean install -DskipTests ++``` ++ ++### Build Specific Modules ++ ++Build only specific modules with dependencies: ++ ++```bash ++# Build core modules ++mvn clean install -pl core,api,dataplane -am ++ ++# Build specific module with dependencies ++mvn clean install -pl api -am ++``` ++ ++### Maven Warnings ++ ++The build produces these warnings which are **expected and safe to ignore**: ++ ++``` ++'dependencyManagement.dependencies.dependency' must be unique: org.projectlombok:lombok:jar ++'dependencyManagement.dependencies.dependency' must be unique: org.springframework.boot:spring-boot-starter-web:jar ++'dependencies.dependency' must be unique: org.springframework.boot:spring-boot-starter-actuator:jar ++``` ++ ++## Testing ++ ++### Running Tests ++ ++Run all tests: ++ ++```bash ++mvn test ++``` ++ ++Run tests for specific module: ++ ++```bash ++cd api ++mvn test ++``` ++ ++### CI/CD Testing ++ ++Sentrius includes comprehensive CI/CD testing: ++ ++- **Automated testing** runs on every push and pull request via GitHub Actions ++- **Helm chart validation** including linting, template rendering, and schema validation ++- **Integration testing** with Kubernetes clusters for deployment validation ++ ++### Local Helm Chart Testing ++ ++Test Helm charts locally before deployment: ++ ++```bash ++# Test all charts ++./ops-scripts/test-helm-charts.sh ++ ++# Test specific aspects ++./ops-scripts/test-helm-charts.sh lint # Lint charts ++./ops-scripts/test-helm-charts.sh template # Test rendering ++./ops-scripts/test-helm-charts.sh config # Test configurations ++``` ++ ++For detailed testing documentation, see [docs/helm-testing.md](docs/helm-testing.md). ++ ++## Development Workflow ++ ++### Setting Up Development Environment ++ ++1. **Clone the repository:** ++ ```bash ++ git clone https://github.com/SentriusLLC/Sentrius-private.git ++ cd Sentrius-private ++ ``` ++ ++2. **Build the project:** ++ ```bash ++ mvn clean install -DskipTests ++ ``` ++ ++3. **Set up required services:** ++ - PostgreSQL database ++ - Keycloak authentication server ++ - OpenTelemetry endpoint (optional for development) ++ ++4. **Configure application properties:** ++ - Copy `application.properties.example` to `application.properties` ++ - Update database and Keycloak connection settings ++ ++### Running in Development Mode ++ ++#### Using the Convenience Script ++ ++```bash ++./ops-scripts/local/run-sentrius.sh --build ++``` ++ ++#### Manual Start ++ ++```bash ++cd api ++mvn spring-boot:run ++``` ++ ++#### With Custom Configuration ++ ++```bash ++export KEYCLOAK_BASE_URL=http://localhost:8180 ++export DATABASE_PASSWORD=password ++export KEYSTORE_PASSWORD=keystorepassword ++cd api ++mvn spring-boot:run ++``` ++ ++### Docker Image Development ++ ++Build Docker images for testing: ++ ++```bash ++# Build all images sequentially ++./ops-scripts/base/build-images.sh --all --no-cache ++ ++# Build all images concurrently (faster) ++./ops-scripts/base/build-all-images-concurrent.sh --all --no-cache ++ ++# Build specific images ++./ops-scripts/base/build-images.sh --sentrius --sentrius-keycloak ++``` ++ ++Build with development certificates: ++ ++```bash ++./ops-scripts/base/build-images.sh --all --include-dev-certs ++``` ++ ++### Python Agent Development ++ ++Python agents require Python 3.12+ and dependencies: ++ ++```bash ++cd python-agent ++ ++# Install dependencies ++pip3 install -r requirements.txt ++ ++# Test mode (no external services required) ++TEST_MODE=true python3 main.py chat-helper --task-data '{"test": "message"}' ++ ++# Production mode ++python3 main.py chat-helper --config application.properties ++``` ++ ++See [CUSTOM_AGENTS.md](CUSTOM_AGENTS.md) for detailed agent development guide. ++ ++## Contributing ++ ++### Getting Started ++ ++1. Fork the repository ++2. Create a feature branch for your changes ++3. Make your changes following the coding standards ++4. Write tests for your changes ++5. Run the full test suite ++6. Open a pull request with a clear description ++ ++### Coding Standards ++ ++- Follow existing code style and patterns ++- Write meaningful commit messages ++- Add tests for new functionality ++- Update documentation as needed ++- Keep changes focused and minimal ++ ++### Pull Request Process ++ ++1. Ensure all tests pass ++2. Update documentation if needed ++3. Add a clear description of changes ++4. Link to relevant issues ++5. Wait for code review ++6. Address review feedback ++ ++### Reporting Issues ++ ++If you encounter any issues or have requests: ++ ++1. Check existing issues first ++2. Provide clear reproduction steps ++3. Include relevant logs and error messages ++4. Specify your environment (OS, Java version, etc.) ++ ++## Development Tips ++ ++### IDE Setup ++ ++**IntelliJ IDEA:** ++- Import as Maven project ++- Enable annotation processing for Lombok ++- Configure Java 17 SDK ++ ++**Eclipse:** ++- Import as Existing Maven Project ++- Install Lombok plugin ++- Set compiler compliance to Java 17 ++ ++### Debugging ++ ++**Local Debugging:** ++```bash ++cd api ++mvn spring-boot:run -Dspring-boot.run.jvmArguments="-Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=n,address=5005" ++``` ++ ++Then attach your IDE debugger to port 5005. ++ ++**Kubernetes Debugging:** ++```bash ++kubectl port-forward -n dev pod/ 5005:5005 ++``` ++ ++### Hot Reload ++ ++Spring Boot DevTools is included for automatic restart on code changes: ++ ++```bash ++cd api ++mvn spring-boot:run ++``` ++ ++Changes to Java classes will trigger automatic restart. ++ ++## Performance Expectations ++ ++| Operation | Time | Notes | ++|-----------|------|-------| ++| Maven build (clean install) | 7m24s | First build, downloads dependencies | ++| Maven build (cached) | 3-5m | Subsequent builds | ++| Maven test execution | 1m3s | Full test suite | ++| Docker image build | 5-10m | All images, sequential | ++| Docker image build (concurrent) | 3-7m | All images, parallel | ++| Python dependency install | <1m | Initial setup | ++ ++## Next Steps ++ ++- Review [DEPLOYMENT.md](DEPLOYMENT.md) for deployment options ++- See [INTEGRATIONS.md](INTEGRATIONS.md) for external service integrations ++- Check [CUSTOM_AGENTS.md](CUSTOM_AGENTS.md) for creating custom agents ++- Read [API_DOCUMENTATION.md](docs/api-documentation.md) for API reference +diff --git a/INTEGRATIONS.md b/INTEGRATIONS.md +new file mode 100644 +index 00000000..cac2a2e3 +--- /dev/null ++++ b/INTEGRATIONS.md +@@ -0,0 +1,346 @@ ++# Integrations ++ ++Sentrius supports external service integrations through the integration-proxy module, providing secure, zero-trust access to external APIs and services. ++ ++## Table of Contents ++ ++- [GitHub Integration](#github-integration) ++- [JIRA Integration](#jira-integration) ++- [LLM Integration](#llm-integration) ++- [Self-Healing System](#self-healing-system) ++ ++## GitHub Integration ++ ++The GitHub MCP (Model Context Protocol) integration enables secure access to GitHub repositories, issues, and pull requests through dynamically launched MCP server containers. ++ ++### Features ++ ++- Query GitHub issues and pull requests ++- Access repository information ++- Clone and interact with repositories ++- All operations use zero-trust security model ++ ++### Setup ++ ++#### 1. Store GitHub Token ++ ++Create an `IntegrationSecurityToken` with: ++- `connectionType`: "github" ++- `connectionInfo`: Your GitHub Personal Access Token ++ ++Via API: ++```bash ++curl -X POST http://localhost:8080/api/v1/integration/tokens \ ++ -H "Authorization: Bearer " \ ++ -H "Content-Type: application/json" \ ++ -d '{ ++ "connectionType": "github", ++ "connectionInfo": "", ++ "description": "GitHub integration token" ++ }' ++``` ++ ++Via UI: ++1. Navigate to Integration Settings ++2. Click "Add Integration Token" ++3. Select "GitHub" as connection type ++4. Enter your GitHub Personal Access Token ++5. Save ++ ++#### 2. Launch MCP Server ++ ++```bash ++curl -X POST "http://integration-proxy:8080/api/v1/github/mcp/launch?tokenId=" \ ++ -H "Authorization: Bearer " ++``` ++ ++#### 3. Access via Service URL ++ ++The response includes a `serviceUrl` for accessing the GitHub MCP server within the cluster. ++ ++### Usage Examples ++ ++**Query Issues:** ++```bash ++curl http:///issues?repo=owner/repo \ ++ -H "Authorization: Bearer " ++``` ++ ++**Get Pull Request:** ++```bash ++curl http:///pulls/123?repo=owner/repo \ ++ -H "Authorization: Bearer " ++``` ++ ++For detailed documentation, see [integration-proxy/GITHUB_INTEGRATION.md](integration-proxy/GITHUB_INTEGRATION.md). ++ ++## JIRA Integration ++ ++The JIRA integration provides secure proxy access to JIRA APIs for ticket management and tracking. ++ ++### Features ++ ++- Search for JIRA issues ++- Get issue details ++- Manage issue comments ++- Assign issues to users ++ ++### Available Endpoints ++ ++#### Search Issues ++```bash ++curl -X GET "http://integration-proxy:8080/api/v1/jira/rest/api/3/search?jql=project=PROJ" \ ++ -H "Authorization: Bearer " ++``` ++ ++#### Get Issue Details ++```bash ++curl -X GET "http://integration-proxy:8080/api/v1/jira/rest/api/3/issue/PROJ-123" \ ++ -H "Authorization: Bearer " ++``` ++ ++#### Add Comment ++```bash ++curl -X POST "http://integration-proxy:8080/api/v1/jira/rest/api/3/issue/PROJ-123/comment" \ ++ -H "Authorization: Bearer " \ ++ -H "Content-Type: application/json" \ ++ -d '{ ++ "body": "This is a comment" ++ }' ++``` ++ ++#### Assign Issue ++```bash ++curl -X PUT "http://integration-proxy:8080/api/v1/jira/rest/api/3/issue/PROJ-123/assignee" \ ++ -H "Authorization: Bearer " \ ++ -H "Content-Type: application/json" \ ++ -d '{ ++ "accountId": "user-account-id" ++ }' ++``` ++ ++### Authentication ++ ++All JIRA requests are authenticated through Keycloak and validated against the user's permissions. ++ ++## LLM Integration ++ ++Sentrius includes a proxy service for integrating with Large Language Models (LLMs) while maintaining zero-trust security. ++ ++### Features ++ ++- Secure access to LLM APIs ++- Request/response logging ++- Usage tracking ++- Cost management ++ ++### Supported Models ++ ++- OpenAI GPT models ++- Anthropic Claude models ++- Custom model endpoints ++ ++### Configuration ++ ++Configure in `application.properties`: ++ ++```properties ++llm.proxy.openai.api-key=${OPENAI_API_KEY} ++llm.proxy.anthropic.api-key=${ANTHROPIC_API_KEY} ++llm.proxy.default-model=gpt-4 ++llm.proxy.max-tokens=2000 ++``` ++ ++### Usage ++ ++```bash ++curl -X POST http://llm-proxy:8080/api/v1/llm/complete \ ++ -H "Authorization: Bearer " \ ++ -H "Content-Type: application/json" \ ++ -d '{ ++ "prompt": "Analyze this SSH session for anomalies", ++ "model": "gpt-4", ++ "maxTokens": 500 ++ }' ++``` ++ ++## Self-Healing System ++ ++Sentrius includes an intelligent self-healing system that automatically detects, analyzes, and repairs errors in your infrastructure. ++ ++### Key Features ++ ++- **Automatic Error Detection**: Continuously monitors error output and OpenTelemetry data ++- **Security Analysis**: Analyzes errors for security concerns before attempting repairs ++- **Flexible Patching Policies**: Configure when repairs should be applied ++- **Coding Agent Deployment**: Automatically launches agents to analyze and fix errors ++- **Docker Image Building**: Builds and deploys fixed images automatically ++- **GitHub Integration**: Creates pull requests with fixes (requires GitHub integration) ++ ++### Configuration ++ ++#### Web UI Configuration ++ ++1. Navigate to **Self-Healing Configuration** (`/sso/v1/self-healing/config`) ++2. Click **Add Pod Configuration** ++3. Set the pod name, type, and patching policy ++4. Enable or disable self-healing for the pod ++ ++#### API Configuration ++ ++```bash ++# Create or update configuration ++curl -X POST http://localhost:8080/api/v1/self-healing/config \ ++ -H "Content-Type: application/json" \ ++ -H "Authorization: Bearer " \ ++ -d '{ ++ "podName": "sentrius-api", ++ "podType": "api", ++ "patchingPolicy": "OFF_HOURS", ++ "enabled": true ++ }' ++``` ++ ++#### Patching Policies ++ ++- **Immediate**: Apply fixes as soon as errors are detected ++- **Off-Hours**: Queue fixes for maintenance windows (default: 10 PM - 6 AM) ++- **Never**: Disable self-healing (manual intervention required) ++ ++#### Helm Configuration ++ ++Update `values.yaml`: ++ ++```yaml ++selfHealing: ++ enabled: true ++ offHours: ++ start: 22 # 10 PM ++ end: 6 # 6 AM ++ codingAgent: ++ clientId: "coding-agents" ++ clientSecret: "" # Set in secrets ++ agentLauncher: ++ url: "http://sentrius-agents-launcherservice:8080" ++ builder: ++ namespace: "dev" ++ autoBuild: true ++ github: ++ enabled: false # Auto-enabled if GitHub integration exists ++``` ++ ++**Important**: Self-healing requires GitHub integration to be configured. The system will automatically detect if a GitHub token exists. ++ ++### Viewing Healing Sessions ++ ++Monitor healing sessions via: ++ ++1. Navigate to **Self-Healing Sessions** (`/sso/v1/self-healing/sessions`) ++2. Filter by status: All, Active, or Completed ++3. View detailed information: ++ - Agent activity and logs ++ - Security analysis results ++ - Docker build status ++ - GitHub PR links ++ ++### How It Works ++ ++1. **Error Detection**: Scans error_output table every 5 minutes ++2. **Policy Check**: Determines if healing is enabled for the affected pod ++3. **Security Analysis**: Analyzes error logs for security keywords ++4. **Agent Launch**: Launches coding agent pod if safe to proceed ++5. **Code Repair**: Agent examines error and generates fixes ++6. **Docker Build**: Creates new Docker image with fixes ++7. **GitHub PR**: Creates pull request with changes (if configured) ++8. **Completion**: Updates healing session with results ++ ++### Security Considerations ++ ++- **GitHub Integration Required**: Self-healing only proceeds if GitHub integration is configured ++- **Security Analysis**: Security-related errors require manual review ++- **Audit Trail**: All healing attempts are logged ++- **Isolated Execution**: Agents run in isolated Kubernetes pods ++ ++### Manual Triggering ++ ++Trigger self-healing for specific errors: ++ ++Via UI: ++1. Navigate to **Error Logs** (`/sso/v1/notifications/error/log/get`) ++2. Click **Trigger Self-Healing** on any error ++ ++Via API: ++```bash ++curl -X POST http://localhost:8080/api/v1/self-healing/trigger/{errorId} \ ++ -H "Authorization: Bearer " ++``` ++ ++### Database Schema ++ ++The system uses three main tables: ++- `self_healing_config`: Patching policies per pod/service ++- `self_healing_session`: Tracks each healing attempt ++- `error_output`: Extended with healing status fields ++ ++## Creating Custom Integrations ++ ++### Integration Proxy Pattern ++ ++To add a new integration: ++ ++1. **Create Integration Controller:** ++ ```java ++ @RestController ++ @RequestMapping("/api/v1/myservice") ++ public class MyServiceIntegrationController { ++ ++ @Autowired ++ private IntegrationTokenService tokenService; ++ ++ @GetMapping("/data") ++ public ResponseEntity getData( ++ @RequestHeader("Authorization") String auth, ++ @RequestParam Long tokenId ++ ) { ++ // Validate user has access ++ IntegrationToken token = tokenService.getToken(tokenId); ++ ++ // Call external service ++ String result = callExternalService(token); ++ ++ return ResponseEntity.ok(result); ++ } ++ } ++ ``` ++ ++2. **Add Token Type:** ++ ```java ++ public enum IntegrationConnectionType { ++ GITHUB, ++ JIRA, ++ MYSERVICE // Add your integration ++ } ++ ``` ++ ++3. **Configure Security:** ++ ```java ++ @Configuration ++ public class MyServiceSecurityConfig { ++ // Configure authentication and authorization ++ } ++ ``` ++ ++### MCP Server Integration ++ ++For services supporting Model Context Protocol: ++ ++1. Create MCP server Docker image ++2. Add launcher endpoint in integration-proxy ++3. Configure Kubernetes service for dynamic containers ++4. Implement token-based authentication ++ ++## Next Steps ++ ++- Review [DEPLOYMENT.md](DEPLOYMENT.md) for deployment options ++- See [DEVELOPMENT.md](DEVELOPMENT.md) for development workflows ++- Check [CUSTOM_AGENTS.md](CUSTOM_AGENTS.md) for creating custom agents +diff --git a/README.md b/README.md +index b6b32fb4..481f09ee 100644 +--- a/README.md ++++ b/README.md +@@ -1,782 +1,123 @@ +-Sentrius +- +-![image](docs/images/dashboard.png) +- +-Sentrius is zero trust (and if you want AI assisted) management system. to protect your infrastructure. It is split +-into several maven projects. Agents can be leveraged to monitor and control infra ( SSH, APIs, RDP eventually), ensuring that all connections are secure and compliant with your organization's policies. +-Agents can access external resources ( like LLMs or integrations ) via a zero trust assisted access token. +-sub-projects: +- +- core – Handles the core functionalities (e.g., SSH session management, zero trust policy enforcement). +- api – Provides a RESTful API layer to interface with the core module. +- dataplane – Offers dataplane functionality for secure data transfer and processing. +- integration-proxy – A proxy service that integrates with large language models (LLMs) and external services (like GitHub, JIRA) to enhance security and compliance. Supports dynamic MCP (Model Context Protocol) server management for GitHub integrations. +- llm-dataplane – A data processing layer that leverages LLMs for advanced analysis and decision-making in SSH sessions. +- ops-scripts – Contains operational scripts for deployment and management tasks. +- ai-agent – Java-based intelligent agent framework for monitoring and controlling SSH sessions. +- agent-launcher – Service for dynamically launching and managing agents. +- python-agent – Python-based agent framework for SSH session monitoring and user assistance. +- +-Internally, Sentrius may still be referenced by its former name, SSO (SecureShellOps), in certain scripts or configurations. +-Table of Contents +- +- Key Features +- Project Structure +- Prerequisites +- Installation +- Configuration +- Running Sentrius +- Helm Chart Deployment +- Testing +- Integrations +- Custom Agents +- Usage +- API Documentation +- Contributing +- License +- Contact +- +-Key Features +- +- Zero Trust Security +- Sentrius enforces zero trust policies, ensuring that every SSH connection is authenticated, authorized, and constantly monitored. +- +- Enclaves +- Group hosts into logical enclaves and apply role-based access control for fine-grained permissions. Simplify security oversight by separating and organizing your infrastructure. +- +- Dynamic Rules Enforcement +- Define flexible, context-aware rules that adapt to real-time changes in your environment (e.g., user risk score, time of day, IP ranges). +- +- REST API +- Manage your SSH configurations, enclaves, security rules, and sessions programmatically using a well-documented REST API. +- +- Self-Healing System +- Automatically detects, analyzes, and repairs system errors through intelligent coding agents. Configure patching policies (immediate, off-hours, or never) per pod/service, with built-in security analysis to prevent healing of security-sensitive errors without manual review. When configured, the system can automatically create GitHub pull requests with fixes. +- +-Custom SSH Server responds via Sentrius UI or terminals +-![image](docs/images/ssh.png) +- +-Agent Designer supports natural language prompts to create custom agents that can monitor and control SSH sessions, automate tasks, and provide user assistance. The Agent Designer allows you to define agent behavior, capabilities, and interactions with the Sentrius platform. +-![image](docs/images/agentdesigner.png) +- +-Project Structure +- +-Sentrius consists of multiple sub-projects: +- +- core +- Contains business logic, including: +- Enclave management +- Zero trust policy enforcement +- Secure SSH connection handling +- +- api +- A RESTful interface for interacting with the core functionalities. The api module exposes endpoints that let you: +- Create and manage enclaves +- Configure security rules +- Visualize SSH sessions and logs +- Handle user access and authentication ++# Sentrius + +-sentrius/ +-├── core/ +-│ ├── src/ +-│ └── pom.xml +-├── api/ +-│ ├── src/ +-│ └── pom.xml +-├── ops-scripts/ +-│ └── gcp/ +-│ └── deploy-helm.sh +-├── pom.xml +-└── ... +- +-Prerequisites +- +- Java 17 or later +- Apache Maven 3.6+ +- Database (PostgreSQL, MySQL, etc.) for storing session and configuration data +- Keycloak for user authentication and authorization +- (Optional) Docker & Kubernetes if you plan to deploy on a containerized environment +- (Optional) python 3.6+ for the python agent +- +-Installation +- +- Clone the Repository +- +-git clone https://github.com/your-organization/sentrius.git +-cd sentrius +- +-#Running Sentrius +- +-Build the projects from root ( mvn clean install ) to ensure all dependencies are resolved and the modules are compiled. +- +-For convenience the ops/local directory contains a "run-sentrius.sh" script which will start the core and api +-modules. You can run this script from the project root. +-This assumes you have a database available, keycloak running, and the necessary configurations. We now require an +-OTEL endpoint, along with neo4j and kafka (but these are optional).: +- +- ./ops/local/run-sentrius.sh +- +-It is simpler to run a kubernetes deployment, which is described in the Deployment. To do this, build as you would +-above. +- +-Build the images in your local Docker registry (note this builds all images, including core, api, and any other modules): +- +- /build-images.sh --all --no-cache +- +-For faster builds, you can use the concurrent build script which builds all images in parallel: +- +- ./ops-scripts/base/build-all-images-concurrent.sh --all --no-cache +- +-Run the Helm deployment script to deploy Sentrius to your local Kubernetes cluster: +- +- ./ops-scripts/local/deploy-helm.sh +- +- +-## If Not using TLS +-You may wish to forward ports so you can access the services locally. The following commands will forward the necessary ports for the core and api modules: +- kubectl port-forward -n dev service/sentrius-sentrius 8080:8080 +- kubectl port-forward -n dev service/sentrius-keycloak 8081:8081 +- +-This will require that you either change the hostnames in the deploy-helm script or add entries to your /etc/hosts file to point to localhost for the services. +- 127.0.0.1 sentrius-sentrius +- 127.0.0.1 sentrius-keycloak +- +-## If Using TLS +-The deploy script will automatically install cert-manager and create self-signed certificates for the services. You can access the services via: +- +- https://sentrius-dev.local +- https://keycloak-dev.local +- +-Add these to /etc/hosts file pointing to your minikube or local cluster IP. +- +- +-There is a GCP deployment that is hasn't been tested in some time. You can find it in the ops-scripts/gcp directory. +- +-You will need to ensure you link to your GKE cluster and have the necessary permissions to deploy resources. +- +- ./ops-scripts/gcp/deploy-helm.sh +- +-You are welcome to run the core and api modules separately, as needed. You can start the core module by running: +- +- mvn install +- cd api +- mvn spring-boot:run +- +-## Testing +- +-### CI/CD Testing ++![Sentrius Dashboard](docs/images/dashboard.png) + +-Sentrius includes comprehensive CI/CD testing for Helm charts and Java builds: ++**Sentrius** is a zero trust security platform for protecting your infrastructure. Monitor and control SSH connections, APIs, and RDP sessions with AI-powered agents, ensuring all access is secure and compliant with your organization's policies. + +-- **Automated testing** runs on every push and pull request via GitHub Actions +-- **Helm chart validation** including linting, template rendering, and schema validation +-- **Integration testing** with Kubernetes clusters for deployment validation ++## 🚀 Quick Start + +-### Local Testing +- +-Test Helm charts locally before deployment: +- +- # Test all charts +- ./ops-scripts/test-helm-charts.sh +- +- # Test specific aspects +- ./ops-scripts/test-helm-charts.sh lint # Lint charts +- ./ops-scripts/test-helm-charts.sh template # Test rendering +- ./ops-scripts/test-helm-charts.sh config # Test configurations +- +-For detailed testing documentation, see [docs/helm-testing.md](docs/helm-testing.md). +- +-Build the Project +- +-Sentrius uses Maven for its build process. Ensure Maven is installed and then run: +- +- mvn clean install +- +- This command will build both the core and api sub-projects, downloading any required dependencies. +- +-Configuration +- +-Sentrius requires properties in order to connect to databases, authenticate users, and configure SSH session parameters. You can supply them in src/main/resources/application.properties or via external configuration (e.g., environment variables or config files). +- +-Typical settings include: +- +- Database Configuration +- +-spring.datasource.url=jdbc:postgresql://localhost:5432/sentrius +-spring.datasource.username=postgres +-spring.datasource.password=postgres +-spring.jpa.hibernate.ddl-auto=update +- +-Security & Authentication +- +-# JWT or OAuth +-To configure Keycloak, you can use the following properties: +- +- keycloak.realm=sentrius +- keycloak.base-url=${KEYCLOAK_BASE_URL:http://localhost:8180} +- spring.security.oauth2.client.registration.keycloak.client-secret=${KEYCLOAK_SECRET:defaultSecret} +- +- spring.security.oauth2.client.registration.keycloak.client-id=sentrius-api +- spring.security.oauth2.client.registration.keycloak.authorization-grant-type=authorization_code +- spring.security.oauth2.client.registration.keycloak.redirect-uri=${BASE_URL:http://localhost:8080}/login/oauth2/code/keycloak +- spring.security.oauth2.client.registration.keycloak.scope=openid,profile,email +- +- spring.security.oauth2.resourceserver.jwt.issuer-uri=${KEYCLOAK_BASE_URL:http://localhost:8180}/realms/sentrius +- spring.security.oauth2.client.provider.keycloak.issuer-uri=${KEYCLOAK_BASE_URL:http://localhost:8180}/realms/sentrius +- +- +-SSH Settings +- +- sentrius.ssh.port=22 +- sentrius.ssh.connection-timeout=30000 +- +- Core and API Specifics +- Core might need additional application-specific properties (e.g., caching, logging). +- The API often needs separate configurations for its own port, API versioning, or logging settings. +- +-Feel free to structure your configs based on your environment (dev/test/prod). For large-scale deployments, we recommend using a secure secrets manager (HashiCorp Vault, AWS Secrets Manager, etc.) to avoid storing sensitive information in plain text. +- +-## Helm Chart Deployment +- +-Sentrius provides comprehensive Helm charts for Kubernetes deployment across multiple environments. There are two main charts available: +- +-### Available Charts +- +-1. **sentrius-chart** - Complete Sentrius deployment with all services +-2. **sentrius-chart-launcher** - Lightweight chart focused on the launcher service +- +-### Quick Start +- +-#### Local Deployment ++### Deploy with Kubernetes (Recommended) + + ```bash +-# Build all images (sequential) +-./build-images.sh --all --no-cache +- +-# OR build all images concurrently (faster) ++# Build Docker images (3-7 minutes) + ./ops-scripts/base/build-all-images-concurrent.sh --all --no-cache + +-# Deploy to local Kubernetes cluster (HTTP) ++# Deploy to local cluster + ./ops-scripts/local/deploy-helm.sh + +-# OR deploy with TLS enabled for secure transport +-./ops-scripts/local/deploy-helm.sh --tls +- +-# OR deploy with TLS and auto-install cert-manager +-./ops-scripts/local/deploy-helm.sh --tls --install-cert-manager +- +-# Forward ports for local access (HTTP deployment) ++# Access services + kubectl port-forward -n dev service/sentrius-sentrius 8080:8080 +-kubectl port-forward -n dev service/sentrius-keycloak 8081:8081 + ``` + +-**For HTTP deployment**, add to `/etc/hosts`: +-``` +-127.0.0.1 sentrius-sentrius +-127.0.0.1 sentrius-keycloak +-``` +- +-**For TLS deployment**, add to `/etc/hosts`: +-``` +-127.0.0.1 sentrius-dev.local +-127.0.0.1 keycloak-dev.local +-``` ++Open http://localhost:8080 in your browser. + +-**TLS Requirements:** +-- cert-manager must be installed in your cluster. You can: +- - Install manually: `kubectl apply -f https://github.com/cert-manager/cert-manager/releases/download/v1.13.0/cert-manager.yaml` +- - Use auto-install flag: `./ops-scripts/local/deploy-helm.sh --tls --install-cert-manager` +-- Access via: `https://sentrius-dev.local` and `https://keycloak-dev.local` +-- Self-signed certificates will be automatically generated +- +-#### GCP/GKE Deployment ++### Run Locally (Development) + + ```bash +-# Deploy to GKE cluster +-./ops-scripts/gcp/deploy-helm.sh +-``` +- +-### Chart Configuration +- +-#### Key Configuration Options +- +-**Environment Settings:** +-- `environment`: Set to "local", "gke", "aws", or "azure" +-- `tenant`: Your tenant identifier +-- `subdomain`: Domain for your deployment +- +-**Core Services:** +-- `sentrius.image.repository`: Core Sentrius image repository +-- `llmproxy.image.repository`: LLM proxy image repository +-- `postgres.storageSize`: Database storage allocation +- +-**Ingress Configuration:** +-```yaml +-ingress: +- enabled: true +- class: "nginx" # or "gce" for GKE +- tlsEnabled: true +- annotations: +- gke: +- kubernetes.io/ingress.class: gce +- networking.gke.io/managed-certificates: wildcard-cert +-``` +- +-**TLS/SSL Configuration:** +-```yaml +-certificates: +- enabled: true # Enable certificate generation +- issuer: "letsencrypt-prod" # For AWS/Azure (cert-manager) +- +-# For local development with self-signed certificates: +-environment: local +-certificates: +- enabled: true +-ingress: +- tlsEnabled: true +-``` +- +-**Agent Configuration:** +-```yaml +-sentriusagent: +- image: +- repository: sentrius-agent +- oauth2: +- client_id: java-agents +- client_secret: your-secret +- +-sentriusaiagent: +- image: +- repository: sentrius-ai-agent +- oauth2: +- client_id: java-agents +-``` +- +-#### Custom Values Example +- +-Create a `my-values.yaml` file: +-```yaml +-environment: "gke" +-tenant: "my-company" +-subdomain: "my-company.sentrius.cloud" +- +-sentrius: +- image: +- repository: "my-registry/sentrius" +- tag: "v1.0.0" ++# Build project ++mvn clean install + +-postgres: +- storageSize: "20Gi" +- +-ingress: +- enabled: true +- tlsEnabled: true +- class: "gce" +-``` +- +-Deploy with custom values: +-```bash +-helm install my-sentrius sentrius-chart -f my-values.yaml ++# Start services (requires PostgreSQL and Keycloak) ++./ops-scripts/local/run-sentrius.sh --build + ``` + +-### Multi-Environment Support +- +-The charts support multiple deployment environments with different configurations: +- +-**Local Development:** +-- Uses NodePort services +-- Minimal resource requirements +-- In-memory storage options +- +-**GKE (Google Cloud):** +-- Uses LoadBalancer services +-- Managed certificates +-- Persistent storage +- +-**AWS:** +-- ALB ingress support +-- EBS storage classes +-- AWS-specific annotations +- +-**Azure:** +-- Azure Load Balancer integration +-- Azure disk storage +-- Azure-specific networking +- +-### Helm Testing +- +-For comprehensive testing documentation including CI/CD testing, local testing, and troubleshooting, see [docs/helm-testing.md](docs/helm-testing.md). +- +-## Integrations +- +-Sentrius supports external service integrations through the integration-proxy module, providing secure, zero-trust access to external APIs and services. +- +-### GitHub Integration +- +-The GitHub MCP (Model Context Protocol) integration enables secure access to GitHub repositories, issues, and pull requests through dynamically launched MCP server containers. ++See [DEPLOYMENT.md](DEPLOYMENT.md) for detailed deployment options. + +-**Features:** +-- Query GitHub issues and pull requests +-- Access repository information +-- Clone and interact with repositories +-- All operations use zero-trust security model ++## ✨ Key Features + +-**Setup:** ++### Zero Trust Security ++Enforce zero trust policies with continuous authentication, authorization, and monitoring for every connection. + +-1. **Store GitHub Token:** +- Create an `IntegrationSecurityToken` with: +- - `connectionType`: "github" +- - `connectionInfo`: Your GitHub Personal Access Token ++### SSH Session Management ++![SSH Session Management](docs/images/ssh.png) + +-2. **Launch MCP Server:** +- ```bash +- curl -X POST "http://integration-proxy:8080/api/v1/github/mcp/launch?tokenId=" \ +- -H "Authorization: Bearer " +- ``` ++Secure SSH connections with real-time monitoring, command filtering, and session recording. Access through the web UI or terminal. + +-3. **Access via Service URL:** +- The response includes a `serviceUrl` for accessing the GitHub MCP server within the cluster. ++### AI-Powered Agent Designer ++![Agent Designer](docs/images/agentdesigner.png) + +-For detailed documentation, see [integration-proxy/GITHUB_INTEGRATION.md](integration-proxy/GITHUB_INTEGRATION.md). ++Create custom agents using natural language prompts. Agents can monitor sessions, automate tasks, and provide user assistance. + +-### JIRA Integration ++### Enclaves & Access Control ++Group hosts into logical enclaves with role-based access control for fine-grained permissions and simplified security oversight. + +-The JIRA integration provides secure proxy access to JIRA APIs for ticket management and tracking. ++### Dynamic Rules Enforcement ++Define flexible, context-aware rules that adapt to real-time changes (user risk score, time of day, IP ranges). + +-**Available Endpoints:** +-- `/api/v1/jira/rest/api/3/search` - Search for JIRA issues +-- `/api/v1/jira/rest/api/3/issue` - Get issue details +-- `/api/v1/jira/rest/api/3/issue/comment` - Manage issue comments +-- `/api/v1/jira/rest/api/3/issue/assignee` - Assign issues ++### Self-Healing System ++Automatically detect, analyze, and repair system errors through intelligent coding agents. Configure patching policies per service with built-in security analysis. + +-All JIRA requests are authenticated through Keycloak and validated against the user's permissions. ++### External Integrations ++Integrate with GitHub, JIRA, and LLMs through secure zero-trust proxies. All integrations use access tokens with granular permissions. + +-## Self-Healing System ++## 📋 Prerequisites + +-Sentrius includes an intelligent self-healing system that automatically detects, analyzes, and repairs errors in your infrastructure. ++**Required:** ++- Java 17+ ++- Maven 3.6+ ++- PostgreSQL database ++- Keycloak authentication server ++- Docker & Kubernetes (for containerized deployment) + +-### Key Features ++**Optional:** ++- Neo4j (graph analysis) ++- Kafka (event streaming) ++- Python 3.12+ (Python agents) + +-- **Automatic Error Detection**: Continuously monitors the error output table and OpenTelemetry data for system errors +-- **Security Analysis**: Automatically analyzes errors to determine if they pose security concerns before attempting repairs +-- **Flexible Patching Policies**: Configure per-pod/service policies for when repairs should be applied: +- - **Immediate**: Apply fixes as soon as errors are detected +- - **Off-Hours**: Queue fixes to apply during configured maintenance windows (default: 10 PM - 6 AM) +- - **Never**: Disable self-healing for critical services that require manual intervention +-- **Coding Agent Deployment**: Automatically launches isolated coding agent pods to analyze errors and generate fixes +-- **Docker Image Building**: Spins up Kubernetes Jobs using Kaniko to build and push Docker images with the fixes +-- **Complete Workflow Automation**: Coordinates agent launch, monitoring, image building, and optional GitHub PR creation +-- **Read-Only Agent Monitoring**: View real-time agent activity and healing progress through the UI (non-security errors only) +-- **GitHub Integration**: Optionally create pull requests with fixes when GitHub credentials are configured ++## 📚 Documentation + +-### Configuration ++- **[Deployment Guide](DEPLOYMENT.md)** - Deploy Sentrius locally, on Kubernetes, or cloud platforms ++- **[Development Guide](DEVELOPMENT.md)** - Build, test, and contribute to Sentrius ++- **[Custom Agents](CUSTOM_AGENTS.md)** - Create Java and Python agents for monitoring and automation ++- **[Integrations](INTEGRATIONS.md)** - Connect with GitHub, JIRA, LLMs, and self-healing system ++- **[API Documentation](docs/)** - REST API reference and guides + +-Self-healing can be configured through the web UI or via API: ++## 🏗️ Architecture + +-#### Web UI Configuration ++Sentrius consists of 12+ Maven modules organized for zero trust security: + +-1. Navigate to **Self-Healing Configuration** (`/sso/v1/self-healing/config`) +-2. Click **Add Pod Configuration** to create a new policy +-3. Set the pod name, type, and patching policy using the slider control +-4. Enable or disable self-healing for the pod +- +-#### API Configuration +- +-```bash +-# Create or update a self-healing configuration +-curl -X POST http://localhost:8080/api/v1/self-healing/config \ +- -H "Content-Type: application/json" \ +- -H "Authorization: Bearer " \ +- -d '{ +- "podName": "sentrius-api", +- "podType": "api", +- "patchingPolicy": "OFF_HOURS", +- "enabled": true +- }' +- +-# Get all configurations +-curl http://localhost:8080/api/v1/self-healing/config \ +- -H "Authorization: Bearer " +-``` +- +-#### Application Properties +- +-Self-healing configuration is managed through Helm values and automatically populated into the ConfigMap. Update `values.yaml`: +- +-```yaml +-selfHealing: +- enabled: true +- offHours: +- start: 22 # 10 PM +- end: 6 # 6 AM +- codingAgent: +- clientId: "coding-agents" +- clientSecret: "" # Set in secrets +- agentLauncher: +- url: "http://sentrius-agents-launcherservice:8080" +- builder: +- namespace: "dev" +- image: "gcr.io/kaniko-project/executor:latest" +- timeoutSeconds: 1800 +- autoBuild: true +- docker: +- registry: "" # Leave empty for local registry +- github: +- enabled: false # Auto-enabled if GitHub integration exists +- apiUrl: "https://api.github.com" +- owner: "" +- repo: "" +-``` +- +-**Important**: Self-healing requires GitHub integration to be configured in the integration tokens table. The system will automatically detect if a GitHub token exists and only proceed if configured. To add a GitHub integration token, navigate to the Integration Settings in the UI and add a token with `connectionType: "github"`. +- +-### Viewing Healing Sessions +- +-Monitor active and completed healing sessions: +- +-1. Navigate to **Self-Healing Sessions** (`/sso/v1/self-healing/sessions`) +-2. Filter by status: All, Active, or Completed +-3. View detailed information about each session including: +- - Agent activity and logs +- - Security analysis results +- - Docker build status +- - GitHub PR links (if created) +- - Error details and resolution +- +-### How It Works +- +-The self-healing workflow consists of several automated steps: +- +-1. **Error Detection**: The system scans the error_output table every 5 minutes for new errors +-2. **Policy Check**: Determines if healing is enabled for the affected pod and checks the patching policy +-3. **Security Analysis**: Analyzes error logs for security-related keywords +-4. **Agent Launch**: If not a security concern, launches a coding agent pod to analyze and fix the error +-5. **Code Repair**: The coding agent examines the error, generates fixes, and commits changes +-6. **Docker Build**: A Kubernetes Job is created to build a new Docker image with the fixes using Kaniko +-7. **GitHub PR**: If configured, creates a pull request with the changes +-8. **Completion**: Updates the healing session with results and status +- +-The entire workflow is asynchronous and can handle multiple concurrent healing sessions. +- +-### Security Considerations +- +-The self-healing system includes built-in safety mechanisms: +- +-- **GitHub Integration Required**: Self-healing only proceeds if a GitHub integration token is configured in the system. This ensures all fixes can be tracked via pull requests. +-- **Security Analysis**: Errors containing security-related keywords (authentication, authorization, vulnerability, etc.) are flagged and require manual review before healing proceeds +-- **No Visibility Restriction**: Security-flagged errors are hidden from general users until cleared by administrators +-- **Audit Trail**: All healing attempts are logged and tracked in the `self_healing_session` table +-- **Isolated Execution**: Healing agents run in isolated Kubernetes pods with limited permissions +- +-### Manual Triggering +- +-You can manually trigger self-healing for specific errors (requires GitHub integration to be configured): +- +-1. Navigate to **Error Logs** (`/sso/v1/notifications/error/log/get`) +-2. Click **Trigger Self-Healing** on any error +-3. Monitor progress in the Self-Healing Sessions view +- +-Or via API: +- +-```bash +-curl -X POST http://localhost:8080/api/v1/self-healing/trigger/{errorId} \ +- -H "Authorization: Bearer " + ``` +- +-**Note**: If GitHub integration is not configured, the trigger will fail with a message prompting you to add a GitHub integration token first. +- +-### Database Schema +- +-The self-healing system uses three main tables: +- +-- `self_healing_config`: Stores patching policies per pod/service +-- `self_healing_session`: Tracks each healing attempt and its status +-- `error_output`: Extended with healing status and security analysis fields +- +-## Custom Agents +- +-Sentrius supports both Java and Python-based custom agents that can extend the platform's functionality for monitoring, automation, and user assistance. +- +-### Java Agents +- +-Java agents are built using the Spring Boot framework and integrate with the Sentrius ecosystem through the agent launcher service. +- +-#### Creating a Custom Java Agent +- +-1. **Create a new Spring Boot module** following the pattern of existing agents: +- ``` +- my-custom-agent/ +- ├── src/main/java/ +- │ └── io/sentrius/agent/mycustom/ +- │ ├── MyCustomAgent.java +- │ └── MyCustomAgentConfig.java +- └── pom.xml +- ``` +- +-2. **Implement the Agent Interface:** +- ```java +- @Component +- @ConditionalOnProperty(name = "agents.mycustom.enabled", havingValue = "true") +- public class MyCustomAgent implements ApplicationListener { +- +- @Autowired +- private AgentService agentService; +- +- @Override +- public void onApplicationEvent(ApplicationReadyEvent event) { +- // Register agent and start processing +- agentService.register(this); +- } +- } +- ``` +- +-3. **Configuration Properties:** +- ```java +- @ConfigurationProperties(prefix = "agents.mycustom") +- @Data +- public class MyCustomAgentConfig { +- private boolean enabled = false; +- private String name = "my-custom-agent"; +- private String description = "Custom agent for specialized tasks"; +- } +- ``` +- +-4. **Add to application.properties:** +- ```properties +- agents.mycustom.enabled=true +- agents.mycustom.name=my-custom-agent +- agents.mycustom.description=Custom agent for specialized tasks +- ``` +- +-5. **Deploy with Helm Chart:** +- ```yaml +- # Add to values.yaml +- mycustomagent: +- image: +- repository: my-custom-agent +- tag: latest +- oauth2: +- client_id: java-agents +- client_secret: your-secret +- ``` +- +-#### Java Agent Features +- +-- **Zero Trust Integration**: Automatic ZTAT (Zero Trust Access Token) handling +-- **Provenance Tracking**: Built-in event logging and audit trails +-- **LLM Integration**: Access to language models through the LLM proxy +-- **Session Monitoring**: Real-time SSH session monitoring capabilities +-- **RESTful APIs**: Full access to Sentrius APIs and data +- +-### Python Agents +- +-Python agents provide a flexible framework for creating custom automation and user assistance tools. +- +-#### Creating a Custom Python Agent +- +-1. **Set up the agent structure:** +- ```python +- # agents/my_custom/my_custom_agent.py +- from agents.base import BaseAgent +- +- class MyCustomAgent(BaseAgent): +- def __init__(self, config_manager): +- super().__init__(config_manager, name="my-custom-agent") +- self.agent_definition = config_manager.get_agent_definition('my.custom') +- +- def execute_task(self, task_data=None): +- # Your custom logic here +- self.submit_provenance( +- event_type="CUSTOM_TASK", +- details={"task": "custom_operation", "data": task_data} +- ) +- +- return { +- "status": "completed", +- "result": "Custom task executed successfully" +- } +- ``` +- +-2. **Create agent configuration:** +- ```yaml +- # my-custom.yaml +- description: "Custom agent that performs specialized tasks" +- context: | +- You are a custom agent designed to handle specific business logic. +- Process requests according to your specialized capabilities. +- ``` +- +-3. **Add to application.properties:** +- ```properties +- agent.my.custom.config=my-custom.yaml +- agent.my.custom.enabled=true +- ``` +- +-4. **Register in main.py:** +- ```python +- from agents.my_custom.my_custom_agent import MyCustomAgent +- +- AVAILABLE_AGENTS = { +- 'chat-helper': ChatHelperAgent, +- 'my-custom': MyCustomAgent, # Add your agent here +- } +- ``` +- +-5. **Run your custom agent:** +- ```bash +- python main.py my-custom --task-data '{"operation": "process_data"}' +- ``` +- +-#### Python Agent Features +- +-- **API Integration**: Full access to Sentrius APIs using JWT authentication +-- **Configuration Management**: Support for properties files and YAML configurations +-- **LLM Proxy Access**: Integration with language models for AI-powered tasks +-- **Provenance Submission**: Automatic event tracking and audit logging +-- **Keycloak Authentication**: Built-in OAuth2/JWT token management +- +-#### Running Python Agents +- +-```bash +-# With properties configuration +-python main.py my-custom --config my-app.properties +- +-# With environment variables +-export KEYCLOAK_BASE_URL=http://localhost:8180 +-export KEYCLOAK_CLIENT_ID=python-agents +-python main.py my-custom +- +-# Test mode (no external services) +-TEST_MODE=true python main.py my-custom ++sentrius/ ++├── core/ # Business logic, enclave management, policy enforcement ++├── api/ # REST API and web interface ++├── dataplane/ # Secure data transfer and processing ++├── llm-core/ # Language model integration ++├── integration-proxy/ # External service integrations (GitHub, JIRA, LLMs) ++├── agent-launcher/ # Dynamic agent lifecycle management ++├── provenance-core/ # Event tracking and audit framework ++└── ... + ``` + +-### Agent Development Best Practices ++See [DEVELOPMENT.md](DEVELOPMENT.md) for complete project structure. + +-1. **Authentication**: Always use proper OAuth2/JWT authentication +-2. **Provenance**: Submit detailed provenance events for audit trails +-3. **Error Handling**: Implement robust error handling and logging +-4. **Configuration**: Use environment-specific configurations +-5. **Testing**: Test agents in isolation before integration +-6. **Documentation**: Document agent capabilities and configuration options ++## 🤝 Contributing + +-For detailed Python agent documentation, see [python-agent/README.md](python-agent/README.md). ++Contributions are welcome! To get started: + +-Contributing ++1. Fork the repository ++2. Create a feature branch for your changes ++3. Open a pull request with a clear description + +-Contributions of all forms are welcome! To get started: ++See [DEVELOPMENT.md](DEVELOPMENT.md) for detailed development guidelines. + +- Fork the repository. +- Create a feature branch for your changes. +- Open a pull request back into the main branch, describing your changes and rationale. ++## 📄 License + +-If you encounter any issues or have requests, feel free to open a GitHub Issue. We actively review and address bug reports, feature requests, and general improvements. +-License ++Sentrius is licensed under the MIT License. See the [LICENSE](LICENSE) file for details. + +-Sentrius is licensed under the MIT License. For more details, please see the LICENSE file. +-Contact ++## 📧 Contact + +-Questions, feedback, or need commercial support? Reach out to the project maintainers: ++Questions or need commercial support? + +-Email: marc@sentrius.io ++**Email:** marc@sentrius.io + +-We’re always happy to help you secure your infrastructure with Sentrius! ++We're here to help you secure your infrastructure with Sentrius! +diff --git a/api/src/main/java/io/sentrius/sso/controllers/api/agents/AgentTemplateController.java b/api/src/main/java/io/sentrius/sso/controllers/api/agents/AgentTemplateController.java +index 69b23759..fa5fe035 100644 +--- a/api/src/main/java/io/sentrius/sso/controllers/api/agents/AgentTemplateController.java ++++ b/api/src/main/java/io/sentrius/sso/controllers/api/agents/AgentTemplateController.java +@@ -1,15 +1,22 @@ + package io.sentrius.sso.controllers.api.agents; + + import io.sentrius.sso.config.ApiPaths; ++import io.sentrius.sso.config.AppConfig; + import io.sentrius.sso.core.annotations.LimitAccess; + import io.sentrius.sso.core.config.SystemOptions; + import io.sentrius.sso.core.controllers.BaseController; + import io.sentrius.sso.core.dto.AgentRegistrationDTO; + import io.sentrius.sso.core.dto.agents.AgentTemplateDTO; ++import io.sentrius.sso.core.exceptions.ZtatException; + import io.sentrius.sso.core.model.security.enums.ApplicationAccessEnum; ++import io.sentrius.sso.core.services.ATPLPolicyService; + import io.sentrius.sso.core.services.ErrorOutputService; + import io.sentrius.sso.core.services.UserService; ++import io.sentrius.sso.core.services.agents.AgentClientService; ++import io.sentrius.sso.core.services.agents.AgentContextService; ++import io.sentrius.sso.core.services.agents.AgentLaunchService; + import io.sentrius.sso.core.services.agents.AgentTemplateService; ++import io.sentrius.sso.core.services.agents.ZeroTrustClientService; + import jakarta.servlet.http.HttpServletRequest; + import jakarta.servlet.http.HttpServletResponse; + import lombok.extern.slf4j.Slf4j; +@@ -26,15 +33,33 @@ import java.util.UUID; + public class AgentTemplateController extends BaseController { + + private final AgentTemplateService templateService; ++ private final ZeroTrustClientService zeroTrustClientService; ++ private final AppConfig appConfig; ++ private final ATPLPolicyService atplPolicyService; ++ private final AgentLaunchService agentLaunchService; ++ private final AgentContextService agentContextService; ++ private final AgentClientService agentClientService; + + public AgentTemplateController( + UserService userService, + SystemOptions systemOptions, + ErrorOutputService errorOutputService, +- AgentTemplateService templateService ++ AgentTemplateService templateService, ++ ZeroTrustClientService zeroTrustClientService, ++ AppConfig appConfig, ++ ATPLPolicyService atplPolicyService, ++ AgentLaunchService agentLaunchService, ++ AgentContextService agentContextService, ++ AgentClientService agentClientService + ) { + super(userService, systemOptions, errorOutputService); + this.templateService = templateService; ++ this.zeroTrustClientService = zeroTrustClientService; ++ this.appConfig = appConfig; ++ this.atplPolicyService = atplPolicyService; ++ this.agentLaunchService = agentLaunchService; ++ this.agentContextService = agentContextService; ++ this.agentClientService = agentClientService; + } + + /** +@@ -241,7 +266,7 @@ public class AgentTemplateController extends BaseController { + + /** + * Launch an agent from a template +- * This endpoint creates an agent registration and triggers the launcher service ++ * This endpoint creates an agent registration and triggers the launcher service automatically + * + * @param id Template ID + * @param agentName Name for the new agent +@@ -269,22 +294,105 @@ public class AgentTemplateController extends BaseController { + log.info("User {} launching agent '{}' from template '{}'", + operatingUser.getUsername(), agentName, template.getName()); + +- // Build launch response with template information +- // The actual launcher integration will be handled by the frontend calling the launcher service +- Map launchInfo = Map.of( +- "status", "prepared", ++ // Check if agent is already running ++ try { ++ String status = agentClientService.getAgentPodStatus( ++ appConfig.getSentriusLauncherService(), ++ agentName ++ ); ++ if ("Running".equals(status) || "Pending".equals(status)) { ++ log.info("Agent {} is already running or pending", agentName); ++ return ResponseEntity.ok(Map.of( ++ "status", "already_exists", ++ "message", "Agent is already running or pending", ++ "agentName", agentName ++ )); ++ } ++ } catch (Exception e) { ++ log.debug("Agent status check failed (agent may not exist yet): {}", e.getMessage()); ++ } ++ ++ // Build AgentRegistrationDTO with full template configuration ++ AgentRegistrationDTO agentDto = AgentRegistrationDTO.builder() ++ .agentName(agentName) ++ .agentType(template.getAgentType()) ++ .agentCallbackUrl("") ++ .clientId(agentName) // Set clientId to match agentName for policy caching ++ .agentTemplateId(id.toString()) ++ .agentContextId(agentContextId) ++ .templateConfiguration(template.getDefaultConfiguration()) ++ .templateIdentity(template.getIdentity()) ++ .templatePurpose(template.getPurpose()) ++ .templateGoals(template.getGoals()) ++ .templateGuardrails(template.getGuardrails()) ++ .templateTrustPolicyId(template.getTrustPolicyId()) ++ .templateLaunchConfiguration(template.getLaunchConfiguration()) ++ .agentPolicyId(template.getTrustPolicyId() != null ? template.getTrustPolicyId() : "") ++ .build(); ++ ++ // Cache the policy if it exists ++ if (template.getTrustPolicyId() != null && !template.getTrustPolicyId().isEmpty()) { ++ var latest = atplPolicyService.getLatestPolicyEntity(template.getTrustPolicyId()); ++ if (latest.isPresent()) { ++ log.info("Caching policy {} for agent {}", template.getTrustPolicyId(), agentName); ++ atplPolicyService.cachePolicy(agentDto.getClientId(), template.getTrustPolicyId()); ++ } else { ++ log.warn("Policy {} not found, skipping cache", template.getTrustPolicyId()); ++ } ++ } ++ ++ // Call the launcher service ++ zeroTrustClientService.callAuthenticatedPostOnApi( ++ appConfig.getSentriusLauncherService(), ++ "agent/launcher/create", ++ agentDto ++ ); ++ ++ // Record the agent launch if agentContextId is provided ++ if (agentContextId != null && !agentContextId.isEmpty()) { ++ try { ++ UUID contextId = UUID.fromString(agentContextId); ++ String launchedBy = operatingUser.getUserId(); ++ String parameters = String.format( ++ "agentType=%s,templateId=%s,policyId=%s", ++ template.getAgentType(), ++ id.toString(), ++ template.getTrustPolicyId() != null ? template.getTrustPolicyId() : "none" ++ ); ++ ++ UUID launchId = agentLaunchService.recordLaunch( ++ agentName, ++ contextId, ++ launchedBy, ++ parameters ++ ); ++ ++ log.info("Recorded agent launch: launchId={}, contextId={}, agentName={}", ++ launchId, contextId, agentName); ++ } catch (IllegalArgumentException e) { ++ log.warn("Invalid agentContextId '{}', skipping launch record: {}", agentContextId, e.getMessage()); ++ } catch (Exception e) { ++ log.warn("Failed to record agent launch (non-critical): {}", e.getMessage()); ++ } ++ } ++ ++ log.info("Successfully launched agent '{}' from template '{}'", agentName, template.getName()); ++ ++ return ResponseEntity.ok(Map.of( ++ "status", "success", ++ "message", "Agent launched successfully", + "agentName", agentName, + "templateId", id.toString(), + "templateName", template.getName(), +- "agentType", template.getAgentType(), +- "trustPolicyId", template.getTrustPolicyId() != null ? template.getTrustPolicyId() : "", +- "message", "Agent launch prepared. Use the prepare-launch endpoint to get full configuration for launcher service.", +- "nextStep", String.format("/api/v1/agent/templates/%s/prepare-launch?agentName=%s", id, agentName) +- ); ++ "agentType", template.getAgentType() ++ )); + +- return ResponseEntity.ok(launchInfo); + } catch (IllegalArgumentException e) { + return ResponseEntity.notFound().build(); ++ } catch (ZtatException e) { ++ log.error("Error calling launcher service", e); ++ return ResponseEntity.status(503) ++ .body(Map.of("error", "Failed to contact launcher service: " + e.getMessage())); + } catch (Exception e) { + log.error("Error launching agent from template", e); + return ResponseEntity.badRequest() +diff --git a/api/src/main/resources/templates/sso/agents/agent_templates.html b/api/src/main/resources/templates/sso/agents/agent_templates.html +index ea73a954..e86e40ad 100644 +--- a/api/src/main/resources/templates/sso/agents/agent_templates.html ++++ b/api/src/main/resources/templates/sso/agents/agent_templates.html +@@ -314,6 +314,13 @@ document.addEventListener('DOMContentLoaded', function() { + return; + } + ++ // Show loading indicator ++ const launchBtn = document.querySelector(`button[data-template-id="${templateId}"]`); ++ if (launchBtn) { ++ launchBtn.disabled = true; ++ launchBtn.innerHTML = ' Launching...'; ++ } ++ + fetch(`/api/v1/agent/templates/${templateId}/launch?agentName=${encodeURIComponent(agentName)}`, { + method: 'POST', + headers: { +@@ -322,8 +329,12 @@ document.addEventListener('DOMContentLoaded', function() { + }) + .then(response => response.json()) + .then(data => { +- if (data.status === 'prepared') { +- alert(`Agent launch prepared!\n\nAgent: ${data.agentName}\nTemplate: ${data.templateName}\nType: ${data.agentType}\n\n${data.message}`); ++ if (data.status === 'success') { ++ alert(`Agent launched successfully!\n\nAgent: ${data.agentName}\nTemplate: ${data.templateName}\nType: ${data.agentType}\n\nThe agent is being deployed. Check the agent list for status.`); ++ } else if (data.status === 'already_exists') { ++ alert(`Agent already exists!\n\nAgent: ${data.agentName}\n\n${data.message}`); ++ } else if (data.error) { ++ alert(`Failed to launch agent: ${data.error}`); + } else { + alert('Agent launch initiated. Check the agent list for status.'); + } +@@ -331,6 +342,13 @@ document.addEventListener('DOMContentLoaded', function() { + .catch(error => { + console.error('Error launching agent:', error); + alert('Failed to launch agent. Please check the logs for details.'); ++ }) ++ .finally(() => { ++ // Restore button state ++ if (launchBtn) { ++ launchBtn.disabled = false; ++ launchBtn.innerHTML = ' Launch'; ++ } + }); + } + +diff --git a/docs/SCREENSHOT_SUGGESTIONS.md b/docs/SCREENSHOT_SUGGESTIONS.md +new file mode 100644 +index 00000000..8582c6bc +--- /dev/null ++++ b/docs/SCREENSHOT_SUGGESTIONS.md +@@ -0,0 +1,141 @@ ++# Screenshot Suggestions for Sentrius Documentation ++ ++This document outlines suggested screenshots to enhance the Sentrius documentation and improve user understanding. ++ ++## Currently Used Screenshots ++ ++1. **dashboard.png** (3746 x 1961) - Main dashboard view, used in README header ++2. **mainscreen.png** (3813 x 1913) - Main screen view (NOT currently used in new README) ++3. **ssh.png** (608 x 123) - SSH session interface ++4. **agentdesigner.png** (2760 x 1931) - Agent Designer interface ++ ++## Recommended Additional Screenshots ++ ++### High Priority ++ ++1. **Quick Start Deployment** ++ - **File:** `docs/images/kubernetes-deployment.png` ++ - **Content:** Screenshot showing successful Kubernetes deployment with port-forward commands ++ - **Usage:** In README Quick Start section and DEPLOYMENT.md ++ - **Purpose:** Help users visualize successful deployment ++ ++2. **Enclave Management** ++ - **File:** `docs/images/enclave-management.png` ++ - **Content:** Screenshot of the enclave management interface showing host groups and access controls ++ - **Usage:** In README Key Features section ++ - **Purpose:** Showcase the enclave feature visually ++ ++3. **Self-Healing Configuration** ++ - **File:** `docs/images/self-healing-config.png` ++ - **Content:** Self-healing configuration UI showing patching policies ++ - **Usage:** In INTEGRATIONS.md Self-Healing section ++ - **Purpose:** Help users understand self-healing configuration options ++ ++4. **Self-Healing Session View** ++ - **File:** `docs/images/self-healing-session.png` ++ - **Content:** Active healing session showing agent logs and status ++ - **Usage:** In INTEGRATIONS.md Self-Healing section ++ - **Purpose:** Show users what to expect during healing process ++ ++### Medium Priority ++ ++5. **Integration Settings** ++ - **File:** `docs/images/integration-settings.png` ++ - **Content:** Integration settings page showing GitHub/JIRA token configuration ++ - **Usage:** In INTEGRATIONS.md ++ - **Purpose:** Guide users through integration setup ++ ++6. **Rules Engine** ++ - **File:** `docs/images/rules-engine.png` ++ - **Content:** Dynamic rules configuration interface ++ - **Usage:** In README or dedicated rules documentation ++ - **Purpose:** Showcase dynamic rule enforcement capabilities ++ ++7. **Session Monitoring** ++ - **File:** `docs/images/session-monitoring.png` ++ - **Content:** Real-time SSH session monitoring view with active sessions ++ - **Usage:** In README or dedicated monitoring documentation ++ - **Purpose:** Show live monitoring capabilities ++ ++### Low Priority ++ ++8. **Python Agent Console** ++ - **File:** `docs/images/python-agent-console.png` ++ - **Content:** Terminal showing Python agent running in test mode ++ - **Usage:** In CUSTOM_AGENTS.md ++ - **Purpose:** Help developers understand agent development workflow ++ ++9. **Helm Chart Testing** ++ - **File:** `docs/images/helm-testing.png` ++ - **Content:** Terminal output showing successful helm chart tests ++ - **Usage:** In DEPLOYMENT.md and DEVELOPMENT.md ++ - **Purpose:** Show testing workflow ++ ++10. **Build Process** ++ - **File:** `docs/images/maven-build.png` ++ - **Content:** Terminal showing successful Maven build ++ - **Usage:** In DEVELOPMENT.md ++ - **Purpose:** Help new developers understand build process ++ ++## Suggestions for Existing Screenshots ++ ++### Potentially Replace/Update ++ ++- **mainscreen.png** is currently unused in the new README. Consider: ++ - Replace with more specific feature screenshots ++ - OR use it to show main navigation/menu structure ++ - OR update README to include it as an overview screenshot ++ ++### Image Optimization ++ ++All PNG files are quite large (15KB - 223KB). Consider: ++- Optimizing images for web (reduce resolution for documentation) ++- Using compressed PNGs or WebP format ++- Keeping originals in a separate folder ++ ++## Implementation Priority ++ ++**Phase 1 (Immediate):** ++- Add mainscreen.png to README or document where it should be used ++- Create Quick Start Deployment screenshot ++ ++**Phase 2 (Near-term):** ++- Enclave Management screenshot ++- Self-Healing Configuration and Session screenshots ++- Integration Settings screenshot ++ ++**Phase 3 (As needed):** ++- Rules Engine, Session Monitoring ++- Development workflow screenshots ++ ++## Screenshot Guidelines ++ ++When creating new screenshots: ++ ++1. **Resolution:** Use 1920x1080 or similar 16:9 aspect ratio ++2. **Content:** Show realistic data (no empty states unless demonstrating initial setup) ++3. **Annotations:** Consider adding arrows or highlights for key UI elements ++4. **Consistency:** Use same theme/color scheme across all screenshots ++5. **Accessibility:** Ensure text is readable at various sizes ++6. **Privacy:** Remove any sensitive information (real usernames, IPs, tokens) ++ ++## Integration with Documentation ++ ++Update the following files when adding new screenshots: ++ ++- `README.md` - Feature highlights, Quick Start ++- `DEPLOYMENT.md` - Deployment process, configuration ++- `DEVELOPMENT.md` - Build process, testing ++- `CUSTOM_AGENTS.md` - Agent development workflow ++- `INTEGRATIONS.md` - Integration setup, self-healing ++ ++## Maintenance ++ ++- Review screenshots quarterly for accuracy with current UI ++- Update screenshots when major UI changes occur ++- Keep a changelog of screenshot updates in this file ++ ++--- ++ ++**Last Updated:** 2025-12-23 ++**Maintainer:** Sentrius Documentation Team +diff --git a/ops-scripts/azure/QUICKREF.md b/ops-scripts/azure/QUICKREF.md +new file mode 100644 +index 00000000..5b36277f +--- /dev/null ++++ b/ops-scripts/azure/QUICKREF.md +@@ -0,0 +1,413 @@ ++# Azure/AKS Deployment Quick Reference ++ ++Quick reference guide for common Sentrius Azure/AKS deployment tasks. ++ ++## Initial Setup ++ ++```bash ++# Login to Azure ++az login ++az account set --subscription ++ ++# Configure kubectl for AKS ++az aks get-credentials --resource-group sentrius-rg --name sentrius-aks-cluster ++ ++# Login to Azure Container Registry ++az acr login --name sentriusacr ++ ++# Set Azure Container Registry environment variable ++export AZURE_REGISTRY=sentriusacr.azurecr.io ++``` ++ ++## Common Commands ++ ++### Deploy New Tenant ++ ++```bash ++# Deploy with TLS and default domain (trustpolicy.ai) ++./ops-scripts/azure/deploy-helm.sh --tenant production ++ ++# Deploy with custom domain ++./ops-scripts/azure/deploy-helm.sh --tenant production --domain mycompany.com ++ ++# Deploy without TLS (development only) ++./ops-scripts/azure/deploy-helm.sh --tenant dev --no-tls ++``` ++ ++### Build and Push Images ++ ++```bash ++# Build all images for Azure ++./ops-scripts/base/build-images.sh azure --all ++ ++# Build specific image ++./ops-scripts/base/build-images.sh azure --sentrius ++ ++# Build with no cache ++./ops-scripts/base/build-images.sh azure --all --no-cache ++``` ++ ++### Start/Stop Deployments ++ ++```bash ++# Stop all pods (saves costs, keeps config) ++./ops-scripts/azure/spindown.sh --tenant production ++ ++# Start pods again ++./ops-scripts/azure/spinup.sh --tenant production ++ ++# Restart with updated config ++./ops-scripts/azure/restart.sh ++``` ++ ++### Completely Remove Tenant ++ ++```bash ++# Remove everything (destructive!) ++./ops-scripts/azure/shutdown.sh --tenant old-tenant ++# OR ++./ops-scripts/azure/destroy-tenant.sh old-tenant ++``` ++ ++### DNS Management ++ ++```bash ++# Get ingress IP ++INGRESS_IP=$(kubectl get ingress apps-ingress-production -n production -o jsonpath='{.status.loadBalancer.ingress[0].ip}') ++ ++# Create DNS records manually ++./ops-scripts/azure/create-subdomain.sh production $INGRESS_IP ++ ++# Remove DNS records ++./ops-scripts/azure/remove-subdomain.sh production ++``` ++ ++### Testing ++ ++```bash ++# Test Helm chart rendering ++./ops-scripts/azure/test-helm.sh production ++ ++# Lint Helm charts ++helm lint sentrius-chart ++helm lint sentrius-chart-launcher ++``` ++ ++## Monitoring and Debugging ++ ++### Check Deployment Status ++ ++```bash ++# Check all resources ++kubectl get all -n production ++ ++# Check deployments ++kubectl get deployments -n production ++kubectl get deployments -n production-agents ++ ++# Check pods ++kubectl get pods -n production ++kubectl get pods -n production-agents ++ ++# Check ingress ++kubectl get ingress -n production ++ ++# Check services ++kubectl get services -n production ++``` ++ ++### View Logs ++ ++```bash ++# Sentrius API logs ++kubectl logs -n production deployment/sentrius-sentrius --tail=100 -f ++ ++# Keycloak logs ++kubectl logs -n production deployment/sentrius-keycloak --tail=100 -f ++ ++# Agent logs ++kubectl logs -n production-agents deployment/sentrius-agents-launcherservice --tail=100 -f ++ ++# All logs from a pod ++kubectl logs -n production --all-containers=true ++``` ++ ++### Describe Resources ++ ++```bash ++# Describe pod (shows events) ++kubectl describe pod -n production ++ ++# Describe ingress ++kubectl describe ingress -n production apps-ingress-production ++ ++# Describe deployment ++kubectl describe deployment -n production sentrius-sentrius ++``` ++ ++### Execute Commands in Pods ++ ++```bash ++# Get shell in pod ++kubectl exec -it -n production -- /bin/bash ++ ++# Run single command ++kubectl exec -n production -- ls -la /app ++``` ++ ++## Azure-Specific Commands ++ ++### Check AKS Cluster ++ ++```bash ++# Get cluster info ++az aks show --resource-group sentrius-rg --name sentrius-aks-cluster ++ ++# List node pools ++az aks nodepool list --resource-group sentrius-rg --cluster-name sentrius-aks-cluster ++ ++# Scale node pool ++az aks nodepool scale --resource-group sentrius-rg --cluster-name sentrius-aks-cluster --name default --node-count 3 ++``` ++ ++### Check Container Registry ++ ++```bash ++# List repositories ++az acr repository list --name sentriusacr --output table ++ ++# List tags for image ++az acr repository show-tags --name sentriusacr --repository sentrius --output table ++ ++# Delete old image ++az acr repository delete --name sentriusacr --image sentrius:old-tag ++``` ++ ++### Check DNS Records ++ ++```bash ++# List all DNS records ++az network dns record-set a list --resource-group sentrius-rg --zone-name sentrius.cloud --output table ++ ++# Show specific record ++az network dns record-set a show --resource-group sentrius-rg --zone-name sentrius.cloud --name production ++ ++# Delete DNS record ++az network dns record-set a delete --resource-group sentrius-rg --zone-name sentrius.cloud --name old-tenant --yes ++``` ++ ++### Check Load Balancers ++ ++```bash ++# List public IPs ++az network public-ip list --resource-group sentrius-rg --output table ++ ++# Show load balancer ++az network lb list --resource-group sentrius-rg --output table ++``` ++ ++## Troubleshooting ++ ++### Pods Not Starting ++ ++```bash ++# Check pod status ++kubectl get pods -n production ++ ++# View pod events ++kubectl describe pod -n production ++ ++# Check logs ++kubectl logs -n production --previous ++ ++# Check resource limits ++kubectl top nodes ++kubectl top pods -n production ++``` ++ ++### Image Pull Errors ++ ++```bash ++# Check if ACR is attached to AKS ++az aks show --resource-group sentrius-rg --name sentrius-aks-cluster --query "identity" ++ ++# Attach ACR to AKS ++az aks update -n sentrius-aks-cluster -g sentrius-rg --attach-acr sentriusacr ++ ++# Verify image exists ++az acr repository show --name sentriusacr --repository sentrius --image sentrius:1.1.51 ++``` ++ ++### DNS Not Resolving ++ ++```bash ++# Check DNS record exists ++az network dns record-set a show --resource-group sentrius-rg --zone-name sentrius.cloud --name production ++ ++# Check ingress has IP ++kubectl get ingress -n production ++ ++# Test DNS resolution ++nslookup production.sentrius.cloud ++dig production.sentrius.cloud ++``` ++ ++### Certificate Issues ++ ++```bash ++# Check cert-manager ++kubectl get pods -n cert-manager ++ ++# Check certificates ++kubectl get certificate -n production ++ ++# Check certificate status ++kubectl describe certificate -n production ++ ++# Check certificate secret ++kubectl get secret -n production -o yaml ++``` ++ ++### Ingress Not Working ++ ++```bash ++# Check ingress controller ++kubectl get pods -n kube-system | grep ingress ++ ++# Check ingress resource ++kubectl describe ingress -n production apps-ingress-production ++ ++# Check Application Gateway ++az network application-gateway show --resource-group sentrius-rg --name sentrius-appgw ++``` ++ ++## Version Management ++ ++### Update Versions ++ ++```bash ++# Edit version file ++vim .azure.env ++ ++# Update version number ++SENTRIUS_VERSION=1.1.52 ++``` ++ ++### Deploy New Version ++ ++```bash ++# Build and push new images ++./ops-scripts/base/build-images.sh azure --all ++ ++# Deploy updated version ++./ops-scripts/azure/deploy-helm.sh --tenant production ++``` ++ ++### Rollback ++ ++```bash ++# View Helm history ++helm history sentrius -n production ++ ++# Rollback to previous version ++helm rollback sentrius -n production ++ ++# Rollback to specific revision ++helm rollback sentrius 3 -n production ++``` ++ ++## Secrets Management ++ ++### View Secrets ++ ++```bash ++# List secrets ++kubectl get secrets -n production ++ ++# View secret data (base64 encoded) ++kubectl get secret production-keycloak-secrets -n production -o yaml ++ ++# Decode secret value ++kubectl get secret production-keycloak-secrets -n production -o jsonpath="{.data.db-password}" | base64 --decode ++``` ++ ++### Regenerate Secrets ++ ++```bash ++# Delete existing secret ++kubectl delete secret production-keycloak-secrets -n production ++ ++# Redeploy (will generate new secret) ++./ops-scripts/azure/deploy-helm.sh --tenant production ++``` ++ ++## Backup and Restore ++ ++### Backup Resources ++ ++```bash ++# Backup namespace resources ++kubectl get all -n production -o yaml > production-backup.yaml ++ ++# Backup secrets ++kubectl get secrets -n production -o yaml > production-secrets-backup.yaml ++ ++# Backup configmaps ++kubectl get configmaps -n production -o yaml > production-configmaps-backup.yaml ++``` ++ ++### Export Helm Values ++ ++```bash ++# Get current Helm values ++helm get values sentrius -n production > production-values.yaml ++ ++# Get all values including defaults ++helm get values sentrius -n production --all > production-all-values.yaml ++``` ++ ++## Performance Optimization ++ ++### Scale Deployments ++ ++```bash ++# Scale specific deployment ++kubectl scale deployment sentrius-sentrius -n production --replicas=3 ++ ++# Scale all deployments ++kubectl scale deployment --all -n production --replicas=2 ++``` ++ ++### Resource Monitoring ++ ++```bash ++# Check node resources ++kubectl top nodes ++ ++# Check pod resources ++kubectl top pods -n production ++ ++# Check pod resource limits ++kubectl describe pod -n production | grep -A 5 "Limits:" ++``` ++ ++## Quick Links ++ ++- **Main README**: [README.md](README.md) ++- **Deployment Guide**: [../../DEPLOYMENT.md](../../DEPLOYMENT.md) ++- **Helm Charts**: [../../sentrius-chart](../../sentrius-chart) ++- **Azure Docs**: https://docs.microsoft.com/en-us/azure/aks/ ++ ++## Environment Files ++ ++- **`.azure.env`** - Version numbers for all services ++- **`base.sh`** - Cluster and DNS configuration ++- **`.generated.env`** - Auto-generated secrets (not in git) ++ ++## Support ++ ++For issues: ++1. Check logs: `kubectl logs -n ` ++2. Check events: `kubectl get events -n --sort-by='.lastTimestamp'` ++3. Check Helm status: `helm status sentrius -n ` ++4. Run test: `./ops-scripts/azure/test-helm.sh ` +diff --git a/ops-scripts/azure/README.md b/ops-scripts/azure/README.md +new file mode 100644 +index 00000000..358f02d9 +--- /dev/null ++++ b/ops-scripts/azure/README.md +@@ -0,0 +1,495 @@ ++# Sentrius Azure/AKS Deployment Scripts ++ ++This directory contains scripts for deploying Sentrius to Azure Kubernetes Service (AKS). ++ ++## Prerequisites ++ ++1. **Azure CLI** installed and configured ++ ```bash ++ az login ++ az account set --subscription ++ ``` ++ ++2. **kubectl** configured to access your AKS cluster ++ ```bash ++ az aks get-credentials --resource-group sentrius-rg --name sentrius-aks-cluster ++ ``` ++ ++3. **Helm 3.x** installed ++ ++4. **Docker images** built and pushed to Azure Container Registry ++ ```bash ++ # From repository root, build and push all images ++ cd /path/to/Sentrius-private ++ ./ops-scripts/base/build-images.sh azure --all ++ ``` ++ ++## Configuration ++ ++The deployment is configured through the following files: ++ ++- **`.azure.env`** - Contains version numbers for all services ++- **`ops-scripts/azure/base.sh`** - Contains cluster, resource group, and DNS zone configuration ++ ++### Environment Variables (.azure.env) ++ ++```bash ++SENTRIUS_VERSION=1.1.51 ++SENTRIUS_SSH_VERSION=1.1.10 ++SENTRIUS_KEYCLOAK_VERSION=1.1.13 ++SENTRIUS_AGENT_VERSION=1.1.22 ++SENTRIUS_AI_AGENT_VERSION=1.1.3 ++LLMPROXY_VERSION=1.1.3 ++LAUNCHER_VERSION=1.1.3 ++AGENTPROXY_VERSION=1.1.3 ++SSHPROXY_VERSION=1.1.3 ++RDPPROXY_VERSION=1.1.3 ++GITHUB_MCP_VERSION=1.1.3 ++MONITORING_AGENT_VERSION=1.1.21 ++SSH_AGENT_VERSION=1.1.3 ++``` ++ ++### Cluster Configuration (base.sh) ++ ++```bash ++NAMESPACE=august ++CLUSTER=sentrius-aks-cluster ++REGION=eastus ++RESOURCE_GROUP=sentrius-rg ++DNS_ZONE=trustpolicy.ai ++``` ++ ++### Azure Container Registry ++ ++Set the `AZURE_REGISTRY` environment variable to your Azure Container Registry: ++ ++```bash ++export AZURE_REGISTRY=sentriusacr.azurecr.io ++``` ++ ++## Scripts ++ ++### deploy-helm.sh ++ ++Deploys Sentrius to AKS with all components. ++ ++**Usage:** ++```bash ++./ops-scripts/azure/deploy-helm.sh --tenant [--no-tls] ++``` ++ ++**Options:** ++- `--tenant TENANT_NAME` - (Required) Name of the tenant to deploy ++- `--domain DOMAIN` - (Optional) Domain name for services (default: trustpolicy.ai) ++- `--no-tls` - (Optional) Disable TLS/SSL (not recommended for production) ++ ++**Example:** ++```bash ++# Deploy production tenant with default domain (trustpolicy.ai) ++./ops-scripts/azure/deploy-helm.sh --tenant production ++ ++# Deploy with custom domain ++./ops-scripts/azure/deploy-helm.sh --tenant production --domain sentrius.cloud ++ ++# Deploy test tenant without TLS ++./ops-scripts/azure/deploy-helm.sh --tenant test --no-tls ++``` ++ ++**What it does:** ++1. Sources environment variables and secrets ++2. Creates Kubernetes namespaces (`` and `-agents`) ++3. Generates or retrieves secrets from Kubernetes ++4. Deploys main Sentrius chart to `` namespace ++5. Deploys launcher chart to `-agents` namespace ++6. Waits for LoadBalancer IP to be assigned ++7. Creates DNS records for: ++ - `.` (main application, default: trustpolicy.ai) ++ - `keycloak..` (authentication) ++ - `agentproxy..` (agent proxy) ++ - `rdpproxy..` (RDP proxy) ++ ++**Note:** The default domain is `trustpolicy.ai`. You can specify a custom domain with `--domain`. ++ ++### spinup.sh ++ ++Scales up all deployments to 1 replica (for resuming after spindown). ++ ++**Usage:** ++```bash ++./ops-scripts/azure/spinup.sh --tenant ++``` ++ ++**Example:** ++```bash ++./ops-scripts/azure/spinup.sh --tenant production ++``` ++ ++### spindown.sh ++ ++Scales down all deployments to 0 replicas to save costs while preserving configuration. ++ ++**Usage:** ++```bash ++./ops-scripts/azure/spindown.sh --tenant ++``` ++ ++**Example:** ++```bash ++./ops-scripts/azure/spindown.sh --tenant test ++``` ++ ++**What it preserves:** ++- Configurations and secrets ++- Ingresses and load balancers ++- DNS records ++- Certificates ++ ++### restart.sh ++ ++Restarts all deployments in the default namespace and upgrades the Helm release. ++ ++**Usage:** ++```bash ++./ops-scripts/azure/restart.sh ++``` ++ ++### shutdown.sh ++ ++Completely removes a tenant deployment including namespaces and DNS records. ++ ++**Usage:** ++```bash ++./ops-scripts/azure/shutdown.sh --tenant ++``` ++ ++**Example:** ++```bash ++./ops-scripts/azure/shutdown.sh --tenant test-tenant ++``` ++ ++**Warning:** This is destructive and cannot be undone. It will: ++1. Uninstall Helm releases ++2. Delete Kubernetes namespaces (`` and `-agents`) ++3. Remove all DNS records ++ ++### destroy-tenant.sh ++ ++Alternative name for shutdown.sh - completely removes a tenant deployment. ++ ++**Usage:** ++```bash ++./ops-scripts/azure/destroy-tenant.sh ++``` ++ ++**Example:** ++```bash ++./ops-scripts/azure/destroy-tenant.sh old-tenant ++``` ++ ++### test-helm.sh ++ ++Tests Helm chart rendering without deploying. ++ ++**Usage:** ++```bash ++./ops-scripts/azure/test-helm.sh [tenant-name] ++``` ++ ++**Example:** ++```bash ++./ops-scripts/azure/test-helm.sh my-tenant ++``` ++ ++### create-subdomain.sh ++ ++Manually creates DNS records for a tenant (useful if deploy-helm.sh DNS creation fails). ++ ++**Usage:** ++```bash ++./ops-scripts/azure/create-subdomain.sh ++``` ++ ++**Example:** ++```bash ++# Get the ingress IP first ++INGRESS_IP=$(kubectl get ingress apps-ingress-production -n production -o jsonpath='{.status.loadBalancer.ingress[0].ip}') ++ ++# Create DNS records ++./ops-scripts/azure/create-subdomain.sh production $INGRESS_IP ++``` ++ ++**What it creates:** ++- `.sentrius.cloud` → Ingress IP ++- `keycloak..sentrius.cloud` → Ingress IP ++- `agentproxy..sentrius.cloud` → Ingress IP ++- `rdpproxy..sentrius.cloud` → Ingress IP ++ ++### remove-subdomain.sh ++ ++Removes DNS records for a tenant. ++ ++**Usage:** ++```bash ++./ops-scripts/azure/remove-subdomain.sh ++``` ++ ++**Example:** ++```bash ++./ops-scripts/azure/remove-subdomain.sh old-tenant ++``` ++ ++**Note:** This is also called automatically by `destroy-tenant.sh` and `shutdown.sh`. ++ ++## Deployment Architecture ++ ++### Namespaces ++ ++Each tenant deployment creates two namespaces: ++ ++1. **``** - Main application namespace containing: ++ - Sentrius API (`sentrius-sentrius`) ++ - Keycloak (`sentrius-keycloak`) ++ - PostgreSQL databases ++ - Integration Proxy (`sentrius-integrationproxy`) ++ - Agent Proxy (`sentrius-agentproxy`) ++ - SSH/RDP Proxies ++ - Neo4j, Kafka (optional) ++ ++2. **`-agents`** - Agent launcher namespace containing: ++ - Launcher Service (`sentrius-agents-launcherservice`) ++ - Dynamic agent deployments ++ ++### Services Deployed ++ ++| Service | Image | Purpose | ++|---------|-------|---------| ++| Sentrius API | `sentrius` | Main application and REST API | ++| Keycloak | `sentrius-keycloak` | Authentication and authorization | ++| Integration Proxy | `sentrius-integration-proxy` | LLM and external service integration | ++| Agent Proxy | `sentrius-agent-proxy` | Agent communication proxy | ++| Launcher Service | `sentrius-launcher-service` | Dynamic agent lifecycle management | ++| SSH Proxy | `sentrius-ssh-proxy` | SSH session proxy | ++| RDP Proxy | `sentrius-rdp-proxy` | RDP session proxy | ++| Java Agent | `sentrius-agent` | Java-based monitoring agent | ++| AI Agent | `sentrius-ai-agent` | AI-powered monitoring agent | ++| Monitoring Agent | `sentrius-monitoring-agent` | System monitoring agent | ++| SSH Agent | `sentrius-ssh-agent` | SSH monitoring agent | ++ ++### DNS Configuration ++ ++The deployment automatically creates DNS records in Azure DNS: ++ ++- `.` → Main application (default domain: trustpolicy.ai) ++- `keycloak..` → Keycloak authentication ++- `agentproxy..` → Agent proxy service ++- `rdpproxy..` → RDP proxy service ++ ++All records point to the AKS Ingress LoadBalancer IP. ++ ++**Custom Domains**: To use a different domain, specify it with the `--domain` parameter: ++```bash ++./ops-scripts/azure/deploy-helm.sh --tenant production --domain mycompany.com ++``` ++ ++## Secret Management ++ ++Secrets are automatically generated and stored in Kubernetes secrets: ++ ++- **`-keycloak-secrets`** - Keycloak database and client secrets ++- **`-db-secret`** - Application database credentials ++- **`-oauth2-secrets`** - OAuth2 client secrets for services ++ ++Secrets are persisted across deployments. On first deployment, new secrets are generated. On subsequent deployments, existing secrets are reused. ++ ++## Building and Pushing Images ++ ++To build and push all images to Azure Container Registry: ++ ++```bash ++# Login to Azure Container Registry ++az acr login --name sentriusacr ++ ++# Build all images for Azure ++./ops-scripts/base/build-images.sh azure --all ++ ++# Build specific images ++./ops-scripts/base/build-images.sh azure --sentrius ++./ops-scripts/base/build-images.sh azure --sentrius-keycloak ++./ops-scripts/base/build-images.sh azure --sentrius-launcher-service ++ ++# Build with no cache (clean build) ++./ops-scripts/base/build-images.sh azure --all --no-cache ++``` ++ ++The build script automatically: ++1. Increments patch version in `.azure.env` ++2. Builds Docker images ++3. Tags images with version number ++4. Pushes to your Azure Container Registry ++ ++## Monitoring Deployment ++ ++After deployment, monitor the status: ++ ++```bash ++# Check deployment status ++kubectl get deployments -n ++kubectl get deployments -n -agents ++ ++# Check pod status ++kubectl get pods -n ++kubectl get pods -n -agents ++ ++# Check services ++kubectl get services -n ++ ++# Check ingress ++kubectl get ingress -n ++ ++# View logs ++kubectl logs -n deployment/sentrius-sentrius ++kubectl logs -n deployment/sentrius-keycloak ++``` ++ ++## Troubleshooting ++ ++### DNS Records Not Created ++ ++If DNS records are not created automatically: ++ ++```bash ++# Check if LoadBalancer IP is assigned ++kubectl get ingress apps-ingress- -n ++ ++# Manually create DNS records ++./ops-scripts/azure/create-subdomain.sh ++``` ++ ++### Secret Issues ++ ++If secrets are corrupted or need to be regenerated: ++ ++```bash ++# Delete existing secrets ++kubectl delete secret -keycloak-secrets -n ++kubectl delete secret -db-secret -n ++ ++# Redeploy (new secrets will be generated) ++./ops-scripts/azure/deploy-helm.sh --tenant ++``` ++ ++### Image Pull Issues ++ ++Ensure images are pushed to Azure Container Registry: ++ ++```bash ++# List images in registry ++az acr repository list --name sentriusacr --output table ++ ++# Check specific image tags ++az acr repository show-tags --name sentriusacr --repository sentrius --output table ++``` ++ ++Ensure AKS has permission to pull from ACR: ++ ++```bash ++# Grant AKS pull permissions to ACR ++az aks update -n sentrius-aks-cluster -g sentrius-rg --attach-acr sentriusacr ++``` ++ ++### Ingress Controller Issues ++ ++Ensure Application Gateway Ingress Controller (AGIC) is installed: ++ ++```bash ++# Check if AGIC is installed ++kubectl get pods -n kube-system | grep ingress ++ ++# Install AGIC using Helm ++helm repo add application-gateway-kubernetes-ingress https://appgwingress.blob.core.windows.net/ingress-azure-helm-package/ ++helm install ingress-azure application-gateway-kubernetes-ingress/ingress-azure ++``` ++ ++## Upgrading Deployments ++ ++To upgrade an existing deployment: ++ ++1. Update version numbers in `.azure.env` ++2. Build and push new images ++3. Redeploy using `deploy-helm.sh` ++ ++```bash ++# Edit .azure.env to update versions ++vim .azure.env ++ ++# Build and push updated images ++./ops-scripts/base/build-images.sh azure --all ++ ++# Upgrade deployment ++./ops-scripts/azure/deploy-helm.sh --tenant ++``` ++ ++## Cost Optimization ++ ++To reduce costs when not in use: ++ ++```bash ++# Scale down to zero replicas (preserves configuration) ++./ops-scripts/azure/spindown.sh --tenant ++ ++# Scale back up when needed ++./ops-scripts/azure/spinup.sh --tenant ++ ++# Or delete specific tenant completely ++./ops-scripts/azure/destroy-tenant.sh ++``` ++ ++## Security Considerations ++ ++1. **TLS/SSL**: Always use TLS in production (default behavior) ++2. **Secrets**: Secrets are auto-generated and stored in Kubernetes ++3. **DNS**: Uses Azure DNS for managed DNS records ++4. **Network**: Services are exposed via AKS Ingress with LoadBalancer ++5. **Authentication**: Keycloak provides OAuth2/OIDC authentication ++6. **Container Registry**: Use Azure Container Registry with managed identities ++ ++## Azure-Specific Configuration ++ ++### Storage Classes ++ ++AKS provides several storage classes: ++ ++- `managed-premium` - Premium SSD (default for Sentrius) ++- `managed-standard` - Standard HDD ++- `azurefile` - Azure Files (for ReadWriteMany) ++ ++Configure in Helm values: ++```yaml ++config: ++ storageClassName: "managed-premium" ++``` ++ ++### Ingress Classes ++ ++For AKS, use Application Gateway Ingress Controller: ++ ++```yaml ++ingress: ++ class: "azure/application-gateway" ++``` ++ ++### Load Balancer Annotations ++ ++Azure-specific annotations are automatically applied: ++ ++```yaml ++service.beta.kubernetes.io/azure-load-balancer-resource-group: sentrius-rg ++``` ++ ++## Support ++ ++For issues or questions: ++1. Check logs: `kubectl logs -n ` ++2. Review Helm values: `helm get values sentrius -n ` ++3. Test chart rendering: `./ops-scripts/azure/test-helm.sh ` ++4. Check Azure resources: `az resource list --resource-group sentrius-rg` +diff --git a/ops-scripts/azure/base.sh b/ops-scripts/azure/base.sh +new file mode 100755 +index 00000000..02fa87da +--- /dev/null ++++ b/ops-scripts/azure/base.sh +@@ -0,0 +1,6 @@ ++#!/bin/bash ++NAMESPACE=august ++CLUSTER=sentrius-aks-cluster ++REGION=eastus ++RESOURCE_GROUP=sentrius-rg ++DNS_ZONE=trustpolicy.ai +diff --git a/ops-scripts/azure/create-subdomain.sh b/ops-scripts/azure/create-subdomain.sh +new file mode 100755 +index 00000000..b34cb858 +--- /dev/null ++++ b/ops-scripts/azure/create-subdomain.sh +@@ -0,0 +1,54 @@ ++#!/bin/bash ++SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) ++ ++source ${SCRIPT_DIR}/base.sh ++ ++TENANT=$1 ++INGRESS_IP=$2 ++ ++if [[ -z "$TENANT" ]]; then ++ echo "Usage: $0 " 1>&2 ++ exit 1 ++fi ++ ++if [[ -z "$INGRESS_IP" ]]; then ++ echo "Usage: $0 " 1>&2 ++ echo "To get ingress IP: kubectl get ingress apps-ingress-${TENANT} -n ${TENANT} -o jsonpath='{.status.loadBalancer.ingress[0].ip}'" ++ exit 1 ++fi ++ ++echo "Creating DNS records for tenant ${TENANT} with IP ${INGRESS_IP}..." ++ ++# Add main tenant domain ++az network dns record-set a add-record \ ++ --resource-group ${RESOURCE_GROUP} \ ++ --zone-name ${DNS_ZONE} \ ++ --record-set-name ${TENANT} \ ++ --ipv4-address $INGRESS_IP ++ ++# Add Keycloak subdomain ++az network dns record-set a add-record \ ++ --resource-group ${RESOURCE_GROUP} \ ++ --zone-name ${DNS_ZONE} \ ++ --record-set-name keycloak.${TENANT} \ ++ --ipv4-address $INGRESS_IP ++ ++# Add Agent Proxy subdomain ++az network dns record-set a add-record \ ++ --resource-group ${RESOURCE_GROUP} \ ++ --zone-name ${DNS_ZONE} \ ++ --record-set-name agentproxy.${TENANT} \ ++ --ipv4-address $INGRESS_IP ++ ++# Add RDP Proxy subdomain ++az network dns record-set a add-record \ ++ --resource-group ${RESOURCE_GROUP} \ ++ --zone-name ${DNS_ZONE} \ ++ --record-set-name rdpproxy.${TENANT} \ ++ --ipv4-address $INGRESS_IP ++ ++echo "✅ DNS records created successfully!" ++echo " ${TENANT}.${DNS_ZONE} → ${INGRESS_IP}" ++echo " keycloak.${TENANT}.${DNS_ZONE} → ${INGRESS_IP}" ++echo " agentproxy.${TENANT}.${DNS_ZONE} → ${INGRESS_IP}" ++echo " rdpproxy.${TENANT}.${DNS_ZONE} → ${INGRESS_IP}" +diff --git a/ops-scripts/azure/deploy-helm.sh b/ops-scripts/azure/deploy-helm.sh +new file mode 100755 +index 00000000..58547733 +--- /dev/null ++++ b/ops-scripts/azure/deploy-helm.sh +@@ -0,0 +1,504 @@ ++#!/bin/bash ++ ++SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) ++ ++source ${SCRIPT_DIR}/base.sh ++source ${SCRIPT_DIR}/../base/base.sh ++source ${SCRIPT_DIR}/../../.azure.env ++ ++# For AKS deployments, use versioned tags from .azure.env ++# Default to 'latest' if .azure.env is not sourced or variables are not set ++SENTRIUS_VERSION="${SENTRIUS_VERSION:-latest}" ++SENTRIUS_SSH_VERSION="${SENTRIUS_SSH_VERSION:-latest}" ++SENTRIUS_KEYCLOAK_VERSION="${SENTRIUS_KEYCLOAK_VERSION:-latest}" ++SENTRIUS_AGENT_VERSION="${SENTRIUS_AGENT_VERSION:-latest}" ++SENTRIUS_AI_AGENT_VERSION="${SENTRIUS_AI_AGENT_VERSION:-latest}" ++LLMPROXY_VERSION="${LLMPROXY_VERSION:-latest}" ++LAUNCHER_VERSION="${LAUNCHER_VERSION:-latest}" ++AGENTPROXY_VERSION="${AGENTPROXY_VERSION:-latest}" ++SSHPROXY_VERSION="${SSHPROXY_VERSION:-latest}" ++RDPPROXY_VERSION="${RDPPROXY_VERSION:-latest}" ++GITHUB_MCP_VERSION="${GITHUB_MCP_VERSION:-latest}" ++MONITORING_AGENT_VERSION="${MONITORING_AGENT_VERSION:-latest}" ++SSH_AGENT_VERSION="${SSH_AGENT_VERSION:-latest}" ++ ++TENANT="" ++ENV_TARGET="aks" ++CERTIFICATES_ENABLED="true" ++INGRESS_TLS_ENABLED="true" ++ENVIRONMENT="aks" ++DEPLOY_ADMINER=${DEPLOY_ADMINER:-false} ++ENABLE_RDP_CONTAINER=${ENABLE_RDP_CONTAINER:-true} ++ ++# Azure Container Registry ++AZURE_REGISTRY="${AZURE_REGISTRY:-sentriusacr.azurecr.io}" ++ ++# Generate secrets using the shared script ++(source ${SCRIPT_DIR}/../base/generate-secrets.sh) ++ ++GENERATED_ENV_PATH="${SCRIPT_DIR}/../../.generated.env" ++if [[ -f "$GENERATED_ENV_PATH" ]]; then ++ source "$GENERATED_ENV_PATH" ++fi ++ ++DOMAIN_NAME="trustpolicy.ai" # Default domain for Azure ++ ++# Parse command line arguments ++while [[ $# -gt 0 ]]; do ++ case $1 in ++ --tenant) ++ TENANT="$2" ++ shift 2 ++ ;; ++ --domain) ++ DOMAIN_NAME="$2" ++ shift 2 ++ ;; ++ --no-tls) ++ CERTIFICATES_ENABLED="false" ++ INGRESS_TLS_ENABLED="false" ++ shift ++ ;; ++ *) ++ echo "Unknown option: $1" ++ echo "Usage: $0 --tenant TENANT_NAME [--domain DOMAIN] [--no-tls]" ++ echo " --tenant: Specify tenant name (required)" ++ echo " --domain: Specify domain name (default: trustpolicy.ai)" ++ echo " --no-tls: Disable TLS/SSL (not recommended for production)" ++ exit 1 ++ ;; ++ esac ++done ++ ++if [[ -z "$TENANT" ]]; then ++ echo "Must provide tenant name with --tenant" 1>&2 ++ echo "Usage: $0 --tenant TENANT_NAME [--domain DOMAIN] [--no-tls]" ++ exit 1 ++fi ++ ++# Configure domain settings for AKS ++SUBDOMAIN="${TENANT}.${DOMAIN_NAME}" ++APROXY_SUBDOMAIN="agentproxy.${TENANT}.${DOMAIN_NAME}" ++KEYCLOAK_SUBDOMAIN="keycloak.${TENANT}.${DOMAIN_NAME}" ++RDPPROXY_SUBDOMAIN="rdpproxy.${TENANT}.${DOMAIN_NAME}" ++KEYCLOAK_HOSTNAME="${KEYCLOAK_SUBDOMAIN}" ++KEYCLOAK_DOMAIN="https://${KEYCLOAK_SUBDOMAIN}" ++KEYCLOAK_INTERNAL_DOMAIN="${KEYCLOAK_DOMAIN}" ++SENTRIUS_DOMAIN="https://${SUBDOMAIN}" ++APROXY_DOMAIN="https://${APROXY_SUBDOMAIN}" ++RDPPROXY_DOMAIN="https://${RDPPROXY_SUBDOMAIN}" ++STORAGE_CLASS_NAME="managed-premium" ++ ++# Check if namespace exists ++kubectl get namespace ${TENANT} >/dev/null 2>&1 ++if [[ $? -ne 0 ]]; then ++ echo "Namespace ${TENANT} does not exist. Creating..." ++ kubectl create namespace ${TENANT} || { echo "Failed to create namespace ${TENANT}"; exit 1; } ++fi ++ ++kubectl get namespace ${TENANT}-agents >/dev/null 2>&1 ++if [[ $? -ne 0 ]]; then ++ echo "Namespace ${TENANT}-agents does not exist. Creating..." ++ kubectl create namespace ${TENANT}-agents || { echo "Failed to create namespace ${TENANT}-agents"; exit 1; } ++fi ++ ++# Wait for admission webhooks to be ready (prevents validation failures during deployment) ++echo "🔍 Checking for admission webhooks..." ++ ++# Check for ingress controller webhook ++if kubectl get validatingwebhookconfigurations 2>/dev/null | grep -q "ingress"; then ++ echo "⏳ Waiting for ingress admission webhook to be ready..." ++ for i in {1..30}; do ++ if kubectl get validatingwebhookconfigurations 2>/dev/null | grep -q "ingress.*admission"; then ++ echo "✅ Ingress admission webhook is configured" ++ sleep 2 ++ break ++ fi ++ echo "Waiting for ingress webhook configuration... ($i/30)" ++ sleep 2 ++ done ++fi ++ ++# Check for cert-manager webhook (only if TLS is enabled) ++if [[ "$CERTIFICATES_ENABLED" == "true" ]]; then ++ if kubectl get validatingwebhookconfigurations cert-manager-webhook >/dev/null 2>&1; then ++ echo "⏳ Waiting for cert-manager webhook to be fully operational..." ++ if kubectl get pods -n cert-manager -l app.kubernetes.io/name=webhook >/dev/null 2>&1; then ++ kubectl wait --for=condition=ready pod -l app.kubernetes.io/name=webhook \ ++ -n cert-manager \ ++ -l app.kubernetes.io/name=webhook \ ++ --timeout=60s 2>/dev/null || \ ++ echo "⚠️ cert-manager webhook may not be fully ready" ++ fi ++ echo "✅ cert-manager webhook check complete" ++ sleep 2 ++ fi ++fi ++ ++# Generate Keycloak DB password if not set and secret doesn't exist ++if [[ -z "$KEYCLOAK_DB_PASSWORD" ]]; then ++ echo "🔎 Checking if keycloak secret already exists..." ++ if kubectl get secret "${TENANT}-keycloak-secrets" --namespace "${TENANT}" >/dev/null 2>&1; then ++ echo "✅ Found existing keycloak secret; extracting DB password..." ++ KEYCLOAK_DB_PASSWORD=$(kubectl get secret "${TENANT}-keycloak-secrets" --namespace "${TENANT}" -o jsonpath="{.data.db-password}" | base64 --decode) ++ if [[ -z "$KEYCLOAK_DB_PASSWORD" ]]; then ++ echo "❌ Secret exists but db-password is empty; exiting for safety" ++ exit 1 ++ fi ++ else ++ echo "⚠️ No existing secret found; generating new Keycloak DB password..." ++ KEYCLOAK_DB_PASSWORD=$(head /dev/urandom | tr -dc A-Za-z0-9 | head -c 24) ++ fi ++fi ++ ++# Generate Keycloak client secret if not already present ++if [[ -z "$KEYCLOAK_CLIENT_SECRET" ]]; then ++ echo "🔎 Checking if keycloak secret already exists..." ++ if kubectl get secret "${TENANT}-keycloak-secrets" --namespace "${TENANT}" >/dev/null 2>&1; then ++ echo "✅ Found existing keycloak secret; extracting client secret..." ++ KEYCLOAK_CLIENT_SECRET=$(kubectl get secret "${TENANT}-keycloak-secrets" --namespace "${TENANT}" -o jsonpath="{.data.client-secret}" | base64 --decode) ++ else ++ echo "⚠️ No existing secret found; generating new Keycloak client secret..." ++ KEYCLOAK_CLIENT_SECRET=$(head /dev/urandom | tr -dc A-Za-z0-9 | head -c 32) ++ fi ++fi ++ ++# ========================================== ++# 🔍 Render Helm Output for Validation ++# ========================================== ++RENDER_PATH="${SCRIPT_DIR}/rendered-${TENANT}.yaml" ++ ++echo "📄 Rendering Helm chart (dry run) for validation..." ++helm template sentrius ./sentrius-chart \ ++ --namespace ${TENANT} \ ++ --set adminer.enabled=${DEPLOY_ADMINER} \ ++ --set tenant=${TENANT} \ ++ --set environment=${ENVIRONMENT} \ ++ --set ingress.class="azure/application-gateway" \ ++ --set subdomain="${SUBDOMAIN}" \ ++ --set metrics.enabled=true \ ++ --set healthCheck.backendConfig.enabled=false \ ++ --set config.storageClassName="${STORAGE_CLASS_NAME}" \ ++ --set agentproxySubdomain="${APROXY_SUBDOMAIN}" \ ++ --set rdpproxySubdomain="${RDPPROXY_SUBDOMAIN}" \ ++ --set keycloakSubdomain="${KEYCLOAK_SUBDOMAIN}" \ ++ --set keycloakHostname="${KEYCLOAK_HOSTNAME}" \ ++ --set keycloakDomain="${KEYCLOAK_DOMAIN}" \ ++ --set keycloakInternalDomain="${KEYCLOAK_DOMAIN}" \ ++ --set sentriusDomain="${SENTRIUS_DOMAIN}" \ ++ --set agentproxyDomain="${APROXY_DOMAIN}" \ ++ --set rdpproxyDomain="${RDPPROXY_DOMAIN}" \ ++ --set certificates.enabled=${CERTIFICATES_ENABLED} \ ++ --set ingress.tlsEnabled=${INGRESS_TLS_ENABLED} \ ++ > "${RENDER_PATH}" ++ ++if [[ $? -ne 0 ]]; then ++ echo "❌ Helm rendering failed — check your templates!" ++ exit 1 ++fi ++ ++echo "✅ Rendered output saved to ${RENDER_PATH}" ++ ++# Validate YAML ++echo "🔍 Validating Kubernetes YAML with kubeval (if installed)..." ++if command -v kubeval >/dev/null 2>&1; then ++ kubeval --strict "${RENDER_PATH}" ++else ++ echo "⚠️ kubeval not installed — skipping schema validation." ++fi ++ ++echo "======================================" ++echo "🚀 Deploying Sentrius (Two-Stage Ingress)" ++echo "======================================" ++ ++echo "📦 Deploying Sentrius main chart to namespace ${TENANT}..." ++helm upgrade --install sentrius ./sentrius-chart --namespace ${TENANT} \ ++ --set adminer.enabled=${DEPLOY_ADMINER} \ ++ --set tenant=${TENANT} \ ++ --set environment=${ENVIRONMENT} \ ++ --set ingress.class="azure/application-gateway" \ ++ --set subdomain="${SUBDOMAIN}" \ ++ --set metrics.enabled=true \ ++ --set healthCheck.backendConfig.enabled=false \ ++ --set config.storageClassName="${STORAGE_CLASS_NAME}" \ ++ --set agentproxySubdomain="${APROXY_SUBDOMAIN}" \ ++ --set rdpproxySubdomain="${RDPPROXY_SUBDOMAIN}" \ ++ --set keycloakSubdomain="${KEYCLOAK_SUBDOMAIN}" \ ++ --set keycloakHostname="${KEYCLOAK_HOSTNAME}" \ ++ --set keycloakDomain="${KEYCLOAK_DOMAIN}" \ ++ --set keycloakInternalDomain="${KEYCLOAK_INTERNAL_DOMAIN}" \ ++ --set sentriusDomain="${SENTRIUS_DOMAIN}" \ ++ --set secrets.db.password="${DB_PASSWORD}" \ ++ --set secrets.db.keystorePassword="${KEYSTORE_PASSWORD}" \ ++ --set agentproxyDomain="${APROXY_DOMAIN}" \ ++ --set rdpproxyDomain="${RDPPROXY_DOMAIN}" \ ++ --set certificates.enabled=${CERTIFICATES_ENABLED} \ ++ --set ingress.tlsEnabled=${INGRESS_TLS_ENABLED} \ ++ --set launcherFQDN=sentrius-agents-launcherservice.${TENANT}-agents.svc.cluster.local \ ++ --set integrationproxy.image.repository="${AZURE_REGISTRY}/sentrius-integration-proxy" \ ++ --set integrationproxy.image.pullPolicy="IfNotPresent" \ ++ --set integrationproxy.image.tag=${LLMPROXY_VERSION} \ ++ --set agentproxy.image.repository="${AZURE_REGISTRY}/sentrius-agent-proxy" \ ++ --set agentproxy.image.pullPolicy="IfNotPresent" \ ++ --set agentproxy.image.tag=${AGENTPROXY_VERSION} \ ++ --set sentrius.image.repository="${AZURE_REGISTRY}/sentrius" \ ++ --set sentrius.image.pullPolicy="IfNotPresent" \ ++ --set sentrius.image.tag=${SENTRIUS_VERSION} \ ++ --set keycloak.db.password="${KEYCLOAK_DB_PASSWORD}" \ ++ --set secrets.db.username="postgres" \ ++ --set keycloak.adminPassword="${KEYCLOAK_ADMIN_PASSWORD}" \ ++ --set keycloak.clientSecret="${KEYCLOAK_CLIENT_SECRET}" \ ++ --set keycloak.realm.clients.sentriusApi.client_secret="${SENTRIUS_API_CLIENT_SECRET}" \ ++ --set keycloak.realm.clients.sentriusLauncher.client_secret="${SENTRIUS_LAUNCHER_CLIENT_SECRET}" \ ++ --set keycloak.realm.clients.javaAgents.client_secret="${JAVA_AGENTS_CLIENT_SECRET}" \ ++ --set keycloak.realm.clients.aiAgentAssessor.client_secret="${MONITORING_AGENT_CLIENT_SECRET}" \ ++ --set keycloak.realm.clients.sshagent.client_secret="${SSH_AGENT_CLIENT_SECRET}" \ ++ --set keycloak.realm.clients.agentProxy.client_secret="${SENTRIUS_APROXY_CLIENT_SECRET}" \ ++ --set keycloak.realm.clients.promptAdvisor.client_secret="${PROMPT_ADVISOR_CLIENT_SECRET}" \ ++ --set keycloak.image.repository="${AZURE_REGISTRY}/sentrius-keycloak" \ ++ --set keycloak.image.pullPolicy="IfNotPresent" \ ++ --set keycloak.image.tag=${SENTRIUS_KEYCLOAK_VERSION} \ ++ --set ssh.image.repository="${AZURE_REGISTRY}/sentrius-ssh" \ ++ --set ssh.image.pullPolicy="IfNotPresent" \ ++ --set ssh.image.tag=${SENTRIUS_SSH_VERSION} \ ++ --set sentriusaiagent.image.repository="${AZURE_REGISTRY}/sentrius-ai-agent" \ ++ --set sentriusaiagent.image.pullPolicy="IfNotPresent" \ ++ --set sentriusaiagent.image.tag=${SENTRIUS_AI_AGENT_VERSION} \ ++ --set launcherservice.image.repository="${AZURE_REGISTRY}/sentrius-launcher-service" \ ++ --set launcherservice.image.pullPolicy="IfNotPresent" \ ++ --set launcherservice.image.tag=${LAUNCHER_VERSION} \ ++ --set sshproxy.image.repository="${AZURE_REGISTRY}/sentrius-ssh-proxy" \ ++ --set sshproxy.image.pullPolicy="IfNotPresent" \ ++ --set sshproxy.image.tag=${SSHPROXY_VERSION} \ ++ --set monitoringagent.image.tag=${MONITORING_AGENT_VERSION} \ ++ --set monitoringagent.image.repository="${AZURE_REGISTRY}/sentrius-monitoring-agent" \ ++ --set monitoringagent.image.pullPolicy="IfNotPresent" \ ++ --set sshagent.image.tag=${SSH_AGENT_VERSION} \ ++ --set sshagent.image.repository="${AZURE_REGISTRY}/sentrius-ssh-agent" \ ++ --set rdpproxy.image.repository="${AZURE_REGISTRY}/sentrius-rdp-proxy" \ ++ --set rdpproxy.image.pullPolicy="IfNotPresent" \ ++ --set rdpproxy.image.tag=${RDPPROXY_VERSION} \ ++ --set rdpTest.enabled=${ENABLE_RDP_CONTAINER} \ ++ --set neo4j.env.NEO4J_server_config_strict__validation__enabled="\"false\"" \ ++ --set sentriusagent.image.repository="${AZURE_REGISTRY}/sentrius-agent" \ ++ --set sentriusagent.image.pullPolicy="IfNotPresent" \ ++ --set sentriusagent.image.tag=${SENTRIUS_AGENT_VERSION} || { echo "Failed to deploy Sentrius with Helm"; exit 1; } ++ ++echo "" ++echo "======================================" ++echo "⏳ STAGE 1: Waiting for Keycloak Ingress" ++echo "======================================" ++ ++# Wait for Keycloak ingress to get an IP ++KEYCLOAK_INGRESS_TIMEOUT=600 ++ELAPSED=0 ++KEYCLOAK_INGRESS_IP="" ++ ++echo "Waiting for Keycloak ingress IP (timeout: ${KEYCLOAK_INGRESS_TIMEOUT}s)..." ++while [ $ELAPSED -lt $KEYCLOAK_INGRESS_TIMEOUT ]; do ++ KEYCLOAK_INGRESS_IP=$(kubectl get ingress "keycloak-ingress-${TENANT}" -n ${TENANT} -o jsonpath='{.status.loadBalancer.ingress[0].ip}' 2>/dev/null || echo "") ++ ++ if [[ -n "$KEYCLOAK_INGRESS_IP" ]]; then ++ echo "✅ Keycloak ingress has IP: $KEYCLOAK_INGRESS_IP" ++ break ++ fi ++ ++ if [ $((ELAPSED % 30)) -eq 0 ]; then ++ echo " Still waiting for Keycloak ingress IP... ($ELAPSED seconds elapsed)" ++ fi ++ sleep 10 ++ ELAPSED=$((ELAPSED + 10)) ++done ++ ++if [[ -z "$KEYCLOAK_INGRESS_IP" ]]; then ++ echo "❌ ERROR: Keycloak ingress did not get an IP within ${KEYCLOAK_INGRESS_TIMEOUT} seconds" ++ echo "" ++ echo "Checking ingress status:" ++ kubectl describe ingress "keycloak-ingress-${TENANT}" -n ${TENANT} ++ exit 1 ++fi ++ ++# Create/Update DNS for Keycloak immediately ++echo "" ++echo "🌐 Configuring DNS for Keycloak..." ++if az network dns record-set a show --resource-group ${RESOURCE_GROUP} --zone-name ${DNS_ZONE} --name keycloak.${TENANT} 2>/dev/null | grep -q "keycloak.${TENANT}"; then ++ echo " Updating existing DNS record for ${KEYCLOAK_SUBDOMAIN}..." ++ az network dns record-set a remove-record --resource-group ${RESOURCE_GROUP} --zone-name ${DNS_ZONE} --record-set-name keycloak.${TENANT} --ipv4-address $(az network dns record-set a show --resource-group ${RESOURCE_GROUP} --zone-name ${DNS_ZONE} --name keycloak.${TENANT} --query 'aRecords[0].ipv4Address' -o tsv) 2>/dev/null || true ++fi ++ ++az network dns record-set a add-record \ ++ --resource-group ${RESOURCE_GROUP} \ ++ --zone-name ${DNS_ZONE} \ ++ --record-set-name keycloak.${TENANT} \ ++ --ipv4-address $KEYCLOAK_INGRESS_IP || { ++ echo "⚠️ Failed to create DNS record, it may already exist" ++} ++ ++# Wait for Keycloak pod to be ready ++echo "" ++echo "⏳ Waiting for Keycloak pod to be ready..." ++kubectl wait --for=condition=ready pod \ ++ -l "app.kubernetes.io/name=keycloak" \ ++ -n ${TENANT} \ ++ --timeout=10m || { ++ echo "⚠️ Keycloak pod not ready yet, but continuing..." ++} ++ ++# Wait for Keycloak to respond ++echo "" ++echo "⏳ Waiting for Keycloak to be healthy..." ++echo " Checking: https://${KEYCLOAK_SUBDOMAIN}/" ++KEYCLOAK_HEALTH_TIMEOUT=300 ++ELAPSED=0 ++ ++while [ $ELAPSED -lt $KEYCLOAK_HEALTH_TIMEOUT ]; do ++ # Try HTTPS (with DNS), then HTTP with IP ++ if curl -sf -k --connect-timeout 5 "https://${KEYCLOAK_SUBDOMAIN}/" >/dev/null 2>&1; then ++ echo "✅ Keycloak is healthy via HTTPS" ++ break ++ elif curl -sf --connect-timeout 5 "http://${KEYCLOAK_INGRESS_IP}/" >/dev/null 2>&1; then ++ echo "✅ Keycloak is responding (certificate may still be provisioning)" ++ break ++ fi ++ ++ if [ $((ELAPSED % 30)) -eq 0 ]; then ++ echo " Waiting for Keycloak to respond... ($ELAPSED seconds elapsed)" ++ fi ++ sleep 10 ++ ELAPSED=$((ELAPSED + 10)) ++done ++ ++if [ $ELAPSED -ge $KEYCLOAK_HEALTH_TIMEOUT ]; then ++ echo "⚠️ WARNING: Keycloak did not respond within ${KEYCLOAK_HEALTH_TIMEOUT} seconds" ++ echo " Continuing anyway - apps will retry connection..." ++fi ++ ++echo "" ++echo "======================================" ++echo "⏳ STAGE 2: Waiting for Apps Ingress" ++echo "======================================" ++ ++# Wait for apps ingress to get an IP ++APPS_INGRESS_TIMEOUT=600 ++ELAPSED=0 ++APPS_INGRESS_IP="" ++ ++echo "Waiting for apps ingress IP (timeout: ${APPS_INGRESS_TIMEOUT}s)..." ++while [ $ELAPSED -lt $APPS_INGRESS_TIMEOUT ]; do ++ APPS_INGRESS_IP=$(kubectl get ingress "apps-ingress-${TENANT}" -n ${TENANT} -o jsonpath='{.status.loadBalancer.ingress[0].ip}' 2>/dev/null || echo "") ++ ++ if [[ -n "$APPS_INGRESS_IP" ]]; then ++ echo "✅ Apps ingress has IP: $APPS_INGRESS_IP" ++ break ++ fi ++ ++ if [ $((ELAPSED % 30)) -eq 0 ]; then ++ echo " Still waiting for apps ingress IP... ($ELAPSED seconds elapsed)" ++ fi ++ sleep 10 ++ ELAPSED=$((ELAPSED + 10)) ++done ++ ++if [[ -z "$APPS_INGRESS_IP" ]]; then ++ echo "⚠️ WARNING: Apps ingress did not get an IP within ${APPS_INGRESS_TIMEOUT} seconds" ++ echo " Application pods may still be starting up..." ++else ++ # Configure DNS for apps ++ echo "" ++ echo "🌐 Configuring DNS for application services..." ++ ++ # Check and create/update DNS records ++ for SUBDOMAIN_NAME in "${SUBDOMAIN}" "${APROXY_SUBDOMAIN}" "${RDPPROXY_SUBDOMAIN}"; do ++ RECORD_NAME=$(echo ${SUBDOMAIN_NAME} | sed "s/\.${DNS_ZONE}//") ++ if az network dns record-set a show --resource-group ${RESOURCE_GROUP} --zone-name ${DNS_ZONE} --name ${RECORD_NAME} 2>/dev/null | grep -q "${RECORD_NAME}"; then ++ echo " Updating ${SUBDOMAIN_NAME}..." ++ az network dns record-set a remove-record --resource-group ${RESOURCE_GROUP} --zone-name ${DNS_ZONE} --record-set-name ${RECORD_NAME} --ipv4-address $(az network dns record-set a show --resource-group ${RESOURCE_GROUP} --zone-name ${DNS_ZONE} --name ${RECORD_NAME} --query 'aRecords[0].ipv4Address' -o tsv) 2>/dev/null || true ++ fi ++ ++ az network dns record-set a add-record \ ++ --resource-group ${RESOURCE_GROUP} \ ++ --zone-name ${DNS_ZONE} \ ++ --record-set-name ${RECORD_NAME} \ ++ --ipv4-address $APPS_INGRESS_IP || { ++ echo "⚠️ Failed to create DNS record for ${SUBDOMAIN_NAME}" ++ } ++ done ++fi ++ ++# Deploy launcher service ++echo "" ++echo "======================================" ++echo "📦 Deploying Launcher Service" ++echo "======================================" ++ ++echo "Deploying Sentrius launcher chart to namespace ${TENANT}-agents..." ++helm upgrade --install sentrius-agents ./sentrius-chart-launcher --namespace ${TENANT}-agents \ ++ --set tenant=${TENANT}-agents \ ++ --set baseRelease=sentrius \ ++ --set sentriusNamespace=${TENANT} \ ++ --set ingress.class="azure/application-gateway" \ ++ --set healthCheck.backendConfig.enabled=false \ ++ --set keycloakFQDN=sentrius-keycloak.${TENANT}.svc.cluster.local \ ++ --set sentriusFQDN=sentrius-sentrius.${TENANT}.svc.cluster.local \ ++ --set integrationproxyFQDN=sentrius-integrationproxy.${TENANT}.svc.cluster.local \ ++ --set agentproxyFQDN=sentrius-agentproxy.${TENANT}.svc.cluster.local \ ++ --set subdomain="${SUBDOMAIN}" \ ++ --set metrics.enabled=true \ ++ --set agentproxySubdomain="${APROXY_SUBDOMAIN}" \ ++ --set agentproxyDomain="${APROXY_DOMAIN}" \ ++ --set keycloakSubdomain="${KEYCLOAK_SUBDOMAIN}" \ ++ --set keycloakHostname="${KEYCLOAK_HOSTNAME}" \ ++ --set keycloakDomain="${KEYCLOAK_DOMAIN}" \ ++ --set keycloakInternalDomain="${KEYCLOAK_INTERNAL_DOMAIN}" \ ++ --set sentriusDomain="${SENTRIUS_DOMAIN}" \ ++ --set integrationproxy.image.repository="${AZURE_REGISTRY}/sentrius-integration-proxy" \ ++ --set integrationproxy.image.pullPolicy="IfNotPresent" \ ++ --set integrationproxy.image.tag=${LLMPROXY_VERSION} \ ++ --set secrets.db.password="${DB_PASSWORD}" \ ++ --set secrets.db.keystorePassword="${KEYSTORE_PASSWORD}" \ ++ --set launcherservice.oauth2.client_secret="${SENTRIUS_LAUNCHER_CLIENT_SECRET}" \ ++ --set sentrius.image.repository="${AZURE_REGISTRY}/sentrius" \ ++ --set sentrius.image.pullPolicy="IfNotPresent" \ ++ --set sentrius.image.tag=${SENTRIUS_VERSION} \ ++ --set keycloak.image.repository="${AZURE_REGISTRY}/sentrius-keycloak" \ ++ --set keycloak.image.pullPolicy="IfNotPresent" \ ++ --set keycloak.image.tag=${SENTRIUS_KEYCLOAK_VERSION} \ ++ --set ssh.image.repository="${AZURE_REGISTRY}/sentrius-ssh" \ ++ --set ssh.image.pullPolicy="IfNotPresent" \ ++ --set ssh.image.tag=${SENTRIUS_SSH_VERSION} \ ++ --set sentriusaiagent.image.repository="${AZURE_REGISTRY}/sentrius-ai-agent" \ ++ --set sentriusaiagent.image.pullPolicy="IfNotPresent" \ ++ --set sentriusaiagent.image.tag=${SENTRIUS_AI_AGENT_VERSION} \ ++ --set launcherservice.image.repository="${AZURE_REGISTRY}/sentrius-launcher-service" \ ++ --set launcherservice.image.pullPolicy="IfNotPresent" \ ++ --set launcherservice.image.tag=${LAUNCHER_VERSION} \ ++ --set neo4j.env.NEO4J_server_config_strict__validation__enabled="\"false\"" \ ++ --set sentriusagent.image.repository="${AZURE_REGISTRY}/sentrius-agent" \ ++ --set sentriusagent.image.pullPolicy="IfNotPresent" \ ++ --set sentriusagent.image.tag=${SENTRIUS_AGENT_VERSION} || { echo "Failed to deploy Sentrius launcher with Helm"; exit 1; } ++ ++# Wait for application pods ++echo "" ++echo "⏳ Waiting for application pods to be ready..." ++kubectl wait --for=condition=ready pod \ ++ -l "app.kubernetes.io/instance=sentrius" \ ++ -n ${TENANT} \ ++ --timeout=10m 2>&1 | grep -v "error: no matching resources found" || true ++ ++echo "" ++echo "======================================" ++echo "✅ Deployment Complete!" ++echo "======================================" ++echo "" ++echo "Keycloak Ingress IP: ${KEYCLOAK_INGRESS_IP}" ++echo "Apps Ingress IP: ${APPS_INGRESS_IP:-}" ++echo "" ++echo "Services:" ++echo " Keycloak: ${KEYCLOAK_DOMAIN}" ++echo " Sentrius: ${SENTRIUS_DOMAIN}" ++echo " Agent Proxy: ${APROXY_DOMAIN}" ++echo " RDP Proxy: ${RDPPROXY_DOMAIN}" ++echo "" ++echo "Check status with:" ++echo " kubectl get ingress -n ${TENANT}" ++echo " kubectl get pods -n ${TENANT}" +diff --git a/ops-scripts/azure/destroy-tenant.sh b/ops-scripts/azure/destroy-tenant.sh +new file mode 100755 +index 00000000..14d975db +--- /dev/null ++++ b/ops-scripts/azure/destroy-tenant.sh +@@ -0,0 +1,62 @@ ++#!/bin/bash ++SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) ++ ++source ${SCRIPT_DIR}/base.sh ++ ++TENANT=$1 ++ ++if [[ -z "$TENANT" ]]; then ++ echo "Usage: $0 " 1>&2 ++ exit 1 ++fi ++ ++echo "======================================" ++echo "🗑️ Destroying Tenant: ${TENANT}" ++echo "======================================" ++ ++# Uninstall Helm releases ++echo "📦 Uninstalling Helm releases..." ++helm uninstall sentrius -n ${TENANT} 2>/dev/null || echo " sentrius release not found" ++helm uninstall sentrius-agents -n ${TENANT}-agents 2>/dev/null || echo " sentrius-agents release not found" ++ ++# Delete ingresses to release load balancers ++echo "🌐 Deleting ingresses..." ++kubectl delete ingress --all -n ${TENANT} 2>/dev/null || true ++ ++# Wait for cleanup ++echo "⏳ Waiting for resources to be cleaned up..." ++sleep 10 ++ ++# Remove DNS records ++echo "🌐 Removing DNS records..." ++${SCRIPT_DIR}/remove-subdomain.sh ${TENANT} ++ ++# Delete namespaces ++echo "📦 Deleting namespaces..." ++kubectl delete namespace ${TENANT} --timeout=60s 2>/dev/null || true ++kubectl delete namespace ${TENANT}-agents --timeout=60s 2>/dev/null || true ++ ++# If namespaces are stuck ++echo "🔍 Checking for stuck namespaces..." ++if kubectl get namespace ${TENANT} >/dev/null 2>&1; then ++ echo " Removing finalizers from ${TENANT}..." ++ kubectl get namespace ${TENANT} -o json | \ ++ jq '.spec.finalizers = []' | \ ++ kubectl replace --raw /api/v1/namespaces/${TENANT}/finalize -f - ++fi ++ ++if kubectl get namespace ${TENANT}-agents >/dev/null 2>&1; then ++ echo " Removing finalizers from ${TENANT}-agents..." ++ kubectl get namespace ${TENANT}-agents -o json | \ ++ jq '.spec.finalizers = []' | \ ++ kubectl replace --raw /api/v1/namespaces/${TENANT}-agents/finalize -f - ++fi ++ ++echo "" ++echo "======================================" ++echo "✅ Tenant Destroyed!" ++echo "======================================" ++echo "" ++echo "Verify cleanup:" ++echo " kubectl get namespaces | grep ${TENANT}" ++echo " az network dns record-set a list --resource-group ${RESOURCE_GROUP} --zone-name ${DNS_ZONE}" +diff --git a/ops-scripts/azure/remove-subdomain.sh b/ops-scripts/azure/remove-subdomain.sh +new file mode 100755 +index 00000000..106cce8d +--- /dev/null ++++ b/ops-scripts/azure/remove-subdomain.sh +@@ -0,0 +1,43 @@ ++#!/bin/bash ++SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) ++ ++source ${SCRIPT_DIR}/base.sh ++ ++TENANT=$1 ++ ++if [[ -z "$TENANT" ]]; then ++ echo "Usage: $0 " 1>&2 ++ exit 1 ++fi ++ ++echo "Removing DNS records for tenant ${TENANT}..." ++ ++# Remove main tenant domain ++az network dns record-set a delete \ ++ --resource-group ${RESOURCE_GROUP} \ ++ --zone-name ${DNS_ZONE} \ ++ --name ${TENANT} \ ++ --yes 2>/dev/null || echo " ${TENANT}.${DNS_ZONE} not found" ++ ++# Remove Keycloak subdomain ++az network dns record-set a delete \ ++ --resource-group ${RESOURCE_GROUP} \ ++ --zone-name ${DNS_ZONE} \ ++ --name keycloak.${TENANT} \ ++ --yes 2>/dev/null || echo " keycloak.${TENANT}.${DNS_ZONE} not found" ++ ++# Remove Agent Proxy subdomain ++az network dns record-set a delete \ ++ --resource-group ${RESOURCE_GROUP} \ ++ --zone-name ${DNS_ZONE} \ ++ --name agentproxy.${TENANT} \ ++ --yes 2>/dev/null || echo " agentproxy.${TENANT}.${DNS_ZONE} not found" ++ ++# Remove RDP Proxy subdomain ++az network dns record-set a delete \ ++ --resource-group ${RESOURCE_GROUP} \ ++ --zone-name ${DNS_ZONE} \ ++ --name rdpproxy.${TENANT} \ ++ --yes 2>/dev/null || echo " rdpproxy.${TENANT}.${DNS_ZONE} not found" ++ ++echo "✅ DNS records removed successfully!" +diff --git a/ops-scripts/azure/restart.sh b/ops-scripts/azure/restart.sh +new file mode 100755 +index 00000000..5f466f91 +--- /dev/null ++++ b/ops-scripts/azure/restart.sh +@@ -0,0 +1,47 @@ ++#!/bin/bash ++ ++SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) ++ ++source ${SCRIPT_DIR}/base.sh ++source ${SCRIPT_DIR}/../../.azure.env ++ ++# Azure Container Registry ++AZURE_REGISTRY="${AZURE_REGISTRY:-sentriusacr.azurecr.io}" ++ ++TENANT="${1:-${NAMESPACE}}" ++ ++if [[ -z "$TENANT" ]]; then ++ echo "Usage: $0 " 1>&2 ++ echo "Example: $0 production" ++ exit 1 ++fi ++ ++echo "Restarting all deployments in namespace ${TENANT}..." ++kubectl scale deployment --all --replicas=1 -n ${TENANT} ++ ++echo "Upgrading Sentrius deployment with latest configuration..." ++helm upgrade --install sentrius ./sentrius-chart --namespace ${TENANT} \ ++ --set tenant=${TENANT} \ ++ --set environment=aks \ ++ --set sentrius.image.repository=${AZURE_REGISTRY}/sentrius \ ++ --set sentrius.image.tag=${SENTRIUS_VERSION} \ ++ --set ssh.image.repository=${AZURE_REGISTRY}/sentrius-ssh \ ++ --set ssh.image.tag=${SENTRIUS_SSH_VERSION} \ ++ --set keycloak.image.repository=${AZURE_REGISTRY}/sentrius-keycloak \ ++ --set keycloak.image.tag=${SENTRIUS_KEYCLOAK_VERSION} \ ++ --set sentriusagent.image.repository=${AZURE_REGISTRY}/sentrius-agent \ ++ --set sentriusagent.image.tag=${SENTRIUS_AGENT_VERSION} \ ++ --set sentriusaiagent.image.repository=${AZURE_REGISTRY}/sentrius-ai-agent \ ++ --set sentriusaiagent.image.tag=${SENTRIUS_AI_AGENT_VERSION} \ ++ --set integrationproxy.image.repository=${AZURE_REGISTRY}/sentrius-integration-proxy \ ++ --set integrationproxy.image.tag=${LLMPROXY_VERSION} \ ++ --set agentproxy.image.repository=${AZURE_REGISTRY}/sentrius-agent-proxy \ ++ --set agentproxy.image.tag=${AGENTPROXY_VERSION:-1.0.0} \ ++ --set launcherservice.image.repository=${AZURE_REGISTRY}/sentrius-launcher-service \ ++ --set launcherservice.image.tag=${LAUNCHER_VERSION} \ ++ --set sshproxy.image.repository=${AZURE_REGISTRY}/sentrius-ssh-proxy \ ++ --set sshproxy.image.tag=${SSHPROXY_VERSION:-1.0.0} \ ++ --set rdpproxy.image.repository=${AZURE_REGISTRY}/sentrius-rdp-proxy \ ++ --set rdpproxy.image.tag=${RDPPROXY_VERSION:-1.0.0} || { echo "Failed to deploy Sentrius with Helm"; exit 1; } ++ ++echo "✅ Restart complete!" +diff --git a/ops-scripts/azure/shutdown.sh b/ops-scripts/azure/shutdown.sh +new file mode 100755 +index 00000000..d8853391 +--- /dev/null ++++ b/ops-scripts/azure/shutdown.sh +@@ -0,0 +1,87 @@ ++#!/bin/bash ++ ++SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) ++ ++source ${SCRIPT_DIR}/base.sh ++ ++TENANT="${1:-${NAMESPACE}}" ++ ++while [[ $# -gt 0 ]]; do ++ case $1 in ++ --tenant) ++ TENANT="$2" ++ shift 2 ++ ;; ++ *) ++ echo "Unknown option: $1" ++ echo "Usage: $0 --tenant TENANT_NAME" ++ echo " --tenant: Specify tenant name (required)" ++ exit 1 ++ ;; ++ esac ++done ++ ++echo "======================================" ++echo "🗑️ Tearing Down Sentrius Deployment" ++echo "======================================" ++ ++# Delete Helm releases ++echo "📦 Uninstalling Helm releases..." ++helm uninstall sentrius -n ${TENANT} 2>/dev/null || echo " sentrius release not found" ++helm uninstall sentrius-agents -n ${TENANT}-agents 2>/dev/null || echo " sentrius-agents release not found" ++ ++# Delete ManagedCertificates explicitly (sometimes they linger) ++echo "🔐 Deleting managed certificates..." ++kubectl delete certificate --all -n ${TENANT} 2>/dev/null || true ++ ++# Delete Ingresses explicitly (to release load balancers) ++echo "🌐 Deleting ingresses..." ++kubectl delete ingress --all -n ${TENANT} 2>/dev/null || true ++ ++# Wait for load balancers to be removed ++echo "⏳ Waiting for load balancers to be cleaned up..." ++sleep 10 ++ ++# Delete DNS records ++echo "🌐 Deleting DNS records..." ++for SUBDOMAIN in "keycloak.${TENANT}" "${TENANT}" "agentproxy.${TENANT}" "rdpproxy.${TENANT}"; do ++ if az network dns record-set a show --resource-group ${RESOURCE_GROUP} --zone-name ${DNS_ZONE} --name ${SUBDOMAIN} 2>/dev/null | grep -q ${SUBDOMAIN}; then ++ echo " Deleting ${SUBDOMAIN}.${DNS_ZONE}..." ++ az network dns record-set a delete \ ++ --resource-group ${RESOURCE_GROUP} \ ++ --zone-name ${DNS_ZONE} \ ++ --name ${SUBDOMAIN} \ ++ --yes 2>/dev/null || echo " Failed to delete ${SUBDOMAIN}" ++ fi ++done ++ ++# Delete namespaces (this removes all remaining resources) ++echo "📦 Deleting namespaces..." ++kubectl delete namespace ${TENANT} --timeout=60s 2>/dev/null || echo " Forcing namespace deletion..." ++kubectl delete namespace ${TENANT}-agents --timeout=60s 2>/dev/null || echo " Forcing namespace deletion..." ++ ++# If namespaces are stuck (sometimes happens with finalizers) ++echo "🔍 Checking for stuck namespaces..." ++if kubectl get namespace ${TENANT} >/dev/null 2>&1; then ++ echo " Namespace ${TENANT} is stuck, removing finalizers..." ++ kubectl get namespace ${TENANT} -o json | \ ++ jq '.spec.finalizers = []' | \ ++ kubectl replace --raw /api/v1/namespaces/${TENANT}/finalize -f - ++fi ++ ++if kubectl get namespace ${TENANT}-agents >/dev/null 2>&1; then ++ echo " Namespace ${TENANT}-agents is stuck, removing finalizers..." ++ kubectl get namespace ${TENANT}-agents -o json | \ ++ jq '.spec.finalizers = []' | \ ++ kubectl replace --raw /api/v1/namespaces/${TENANT}-agents/finalize -f - ++fi ++ ++echo "" ++echo "======================================" ++echo "✅ Teardown Complete!" ++echo "======================================" ++echo "" ++echo "Verify cleanup with:" ++echo " kubectl get namespaces | grep ${TENANT}" ++echo " az network public-ip list --resource-group ${RESOURCE_GROUP}" ++echo " az network dns record-set a list --resource-group ${RESOURCE_GROUP} --zone-name ${DNS_ZONE}" +diff --git a/ops-scripts/azure/spindown.sh b/ops-scripts/azure/spindown.sh +new file mode 100755 +index 00000000..3f38711a +--- /dev/null ++++ b/ops-scripts/azure/spindown.sh +@@ -0,0 +1,50 @@ ++#!/bin/bash ++ ++SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) ++ ++source ${SCRIPT_DIR}/base.sh ++ ++TENANT="${1:-${NAMESPACE}}" ++ ++while [[ $# -gt 0 ]]; do ++ case $1 in ++ --tenant) ++ TENANT="$2" ++ shift 2 ++ ;; ++ *) ++ echo "Unknown option: $1" ++ echo "Usage: $0 --tenant TENANT_NAME" ++ echo " --tenant: Specify tenant name (required)" ++ exit 1 ++ ;; ++ esac ++done ++ ++echo "======================================" ++echo "💤 Scaling Down Sentrius Deployment" ++echo "======================================" ++ ++# This keeps: ++# ✅ Configurations, secrets, ingresses ++# ✅ Load balancers and IPs (so DNS stays valid) ++# ✅ Certificates (already provisioned) ++# ❌ Stops: All pods/containers (reduces costs) ++ ++# To restart: ++# kubectl scale deployment --all --replicas=1 -n ${TENANT} ++# kubectl scale deployment --all --replicas=1 -n ${TENANT}-agents ++# kubectl scale statefulset --all --replicas=1 -n ${TENANT} ++ ++# Scale down all deployments to 0 replicas ++kubectl scale deployment --all --replicas=0 -n ${TENANT} ++kubectl scale deployment --all --replicas=0 -n ${TENANT}-agents ++kubectl scale statefulset --all --replicas=0 -n ${TENANT} ++ ++echo "" ++echo "✅ Spindown complete!" ++echo "" ++echo "To restart:" ++echo " kubectl scale deployment --all --replicas=1 -n ${TENANT}" ++echo " kubectl scale deployment --all --replicas=1 -n ${TENANT}-agents" ++echo " kubectl scale statefulset --all --replicas=1 -n ${TENANT}" +diff --git a/ops-scripts/azure/spinup.sh b/ops-scripts/azure/spinup.sh +new file mode 100755 +index 00000000..a4c175e1 +--- /dev/null ++++ b/ops-scripts/azure/spinup.sh +@@ -0,0 +1,44 @@ ++#!/bin/bash ++ ++SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) ++ ++source ${SCRIPT_DIR}/base.sh ++ ++TENANT="${1:-${NAMESPACE}}" ++ ++while [[ $# -gt 0 ]]; do ++ case $1 in ++ --tenant) ++ TENANT="$2" ++ shift 2 ++ ;; ++ --no-tls) ++ CERTIFICATES_ENABLED="false" ++ INGRESS_TLS_ENABLED="false" ++ shift ++ ;; ++ *) ++ echo "Unknown option: $1" ++ echo "Usage: $0 --tenant TENANT_NAME [--no-tls]" ++ echo " --tenant: Specify tenant name (required)" ++ echo " --no-tls: Disable TLS/SSL (not recommended for production)" ++ exit 1 ++ ;; ++ esac ++done ++ ++echo "======================================" ++echo "⚡ Starting Up Sentrius Deployment" ++echo "======================================" ++ ++# Scale up all deployments to 1 replica ++kubectl scale deployment --all --replicas=1 -n ${TENANT} ++kubectl scale deployment --all --replicas=1 -n ${TENANT}-agents ++kubectl scale statefulset --all --replicas=1 -n ${TENANT} ++ ++echo "" ++echo "✅ Startup complete!" ++echo "" ++echo "Check status with:" ++echo " kubectl get pods -n ${TENANT}" ++echo " kubectl get pods -n ${TENANT}-agents" +diff --git a/ops-scripts/azure/test-helm.sh b/ops-scripts/azure/test-helm.sh +new file mode 100755 +index 00000000..a88b7f60 +--- /dev/null ++++ b/ops-scripts/azure/test-helm.sh +@@ -0,0 +1,117 @@ ++#!/bin/bash ++ ++SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) ++ ++source ${SCRIPT_DIR}/base.sh ++source ${SCRIPT_DIR}/../../.azure.env ++ ++TENANT=${1:-test-tenant} ++DOMAIN_NAME="trustpolicy.ai" # Default domain for Azure ++ ++echo "======================================" ++echo "🧪 Testing Helm Chart Rendering" ++echo "======================================" ++echo "Tenant: ${TENANT}" ++echo "Domain: ${DOMAIN_NAME}" ++echo "" ++ ++# Azure Container Registry ++AZURE_REGISTRY="${AZURE_REGISTRY:-sentriusacr.azurecr.io}" ++ ++# Test render sentrius-chart ++echo "📦 Testing sentrius-chart..." ++helm template sentrius ./sentrius-chart \ ++ --namespace ${TENANT} \ ++ --set tenant=${TENANT} \ ++ --set environment=aks \ ++ --set ingress.class="azure/application-gateway" \ ++ --set subdomain="${TENANT}.${DOMAIN_NAME}" \ ++ --set agentproxySubdomain="agentproxy.${TENANT}.${DOMAIN_NAME}" \ ++ --set keycloakSubdomain="keycloak.${TENANT}.${DOMAIN_NAME}" \ ++ --set rdpproxySubdomain="rdpproxy.${TENANT}.${DOMAIN_NAME}" \ ++ --set keycloakHostname="keycloak.${TENANT}.${DOMAIN_NAME}" \ ++ --set keycloakDomain="https://keycloak.${TENANT}.${DOMAIN_NAME}" \ ++ --set keycloakInternalDomain="https://keycloak.${TENANT}.${DOMAIN_NAME}" \ ++ --set sentriusDomain="https://${TENANT}.${DOMAIN_NAME}" \ ++ --set agentproxyDomain="https://agentproxy.${TENANT}.${DOMAIN_NAME}" \ ++ --set rdpproxyDomain="https://rdpproxy.${TENANT}.${DOMAIN_NAME}" \ ++ --set certificates.enabled=true \ ++ --set ingress.tlsEnabled=true \ ++ --set sentrius.image.repository="${AZURE_REGISTRY}/sentrius" \ ++ --set sentrius.image.tag=${SENTRIUS_VERSION} \ ++ --set keycloak.image.repository="${AZURE_REGISTRY}/sentrius-keycloak" \ ++ --set keycloak.image.tag=${SENTRIUS_KEYCLOAK_VERSION} \ ++ --set ssh.image.repository="${AZURE_REGISTRY}/sentrius-ssh" \ ++ --set ssh.image.tag=${SENTRIUS_SSH_VERSION} \ ++ --set sentriusagent.image.repository="${AZURE_REGISTRY}/sentrius-agent" \ ++ --set sentriusagent.image.tag=${SENTRIUS_AGENT_VERSION} \ ++ --set sentriusaiagent.image.repository="${AZURE_REGISTRY}/sentrius-ai-agent" \ ++ --set sentriusaiagent.image.tag=${SENTRIUS_AI_AGENT_VERSION} \ ++ --set integrationproxy.image.repository="${AZURE_REGISTRY}/sentrius-integration-proxy" \ ++ --set integrationproxy.image.tag=${LLMPROXY_VERSION} \ ++ --set agentproxy.image.repository="${AZURE_REGISTRY}/sentrius-agent-proxy" \ ++ --set agentproxy.image.tag=${AGENTPROXY_VERSION} \ ++ --set launcherservice.image.repository="${AZURE_REGISTRY}/sentrius-launcher-service" \ ++ --set launcherservice.image.tag=${LAUNCHER_VERSION} \ ++ --set sshproxy.image.repository="${AZURE_REGISTRY}/sentrius-ssh-proxy" \ ++ --set sshproxy.image.tag=${SSHPROXY_VERSION} \ ++ --set rdpproxy.image.repository="${AZURE_REGISTRY}/sentrius-rdp-proxy" \ ++ --set rdpproxy.image.tag=${RDPPROXY_VERSION} \ ++ > /tmp/sentrius-chart-test.yaml ++ ++if [[ $? -eq 0 ]]; then ++ echo "✅ sentrius-chart rendered successfully" ++ echo " Output saved to /tmp/sentrius-chart-test.yaml" ++else ++ echo "❌ sentrius-chart rendering failed" ++ exit 1 ++fi ++ ++echo "" ++echo "📦 Testing sentrius-chart-launcher..." ++helm template sentrius-agents ./sentrius-chart-launcher \ ++ --namespace ${TENANT}-agents \ ++ --set tenant=${TENANT}-agents \ ++ --set baseRelease=sentrius \ ++ --set sentriusNamespace=${TENANT} \ ++ --set ingress.class="azure/application-gateway" \ ++ --set keycloakFQDN=sentrius-keycloak.${TENANT}.svc.cluster.local \ ++ --set sentriusFQDN=sentrius-sentrius.${TENANT}.svc.cluster.local \ ++ --set integrationproxyFQDN=sentrius-integrationproxy.${TENANT}.svc.cluster.local \ ++ --set agentproxyFQDN=sentrius-agentproxy.${TENANT}.svc.cluster.local \ ++ --set subdomain="${TENANT}.${DOMAIN_NAME}" \ ++ --set agentproxySubdomain="agentproxy.${TENANT}.${DOMAIN_NAME}" \ ++ --set keycloakSubdomain="keycloak.${TENANT}.${DOMAIN_NAME}" \ ++ --set keycloakHostname="keycloak.${TENANT}.${DOMAIN_NAME}" \ ++ --set keycloakDomain="https://keycloak.${TENANT}.${DOMAIN_NAME}" \ ++ --set keycloakInternalDomain="https://keycloak.${TENANT}.${DOMAIN_NAME}" \ ++ --set sentriusDomain="https://${TENANT}.${DOMAIN_NAME}" \ ++ --set agentproxyDomain="https://agentproxy.${TENANT}.${DOMAIN_NAME}" \ ++ --set sentrius.image.repository="${AZURE_REGISTRY}/sentrius" \ ++ --set sentrius.image.tag=${SENTRIUS_VERSION} \ ++ --set keycloak.image.repository="${AZURE_REGISTRY}/sentrius-keycloak" \ ++ --set keycloak.image.tag=${SENTRIUS_KEYCLOAK_VERSION} \ ++ --set ssh.image.repository="${AZURE_REGISTRY}/sentrius-ssh" \ ++ --set ssh.image.tag=${SENTRIUS_SSH_VERSION} \ ++ --set sentriusagent.image.repository="${AZURE_REGISTRY}/sentrius-agent" \ ++ --set sentriusagent.image.tag=${SENTRIUS_AGENT_VERSION} \ ++ --set sentriusaiagent.image.repository="${AZURE_REGISTRY}/sentrius-ai-agent" \ ++ --set sentriusaiagent.image.tag=${SENTRIUS_AI_AGENT_VERSION} \ ++ --set integrationproxy.image.repository="${AZURE_REGISTRY}/sentrius-integration-proxy" \ ++ --set integrationproxy.image.tag=${LLMPROXY_VERSION} \ ++ --set launcherservice.image.repository="${AZURE_REGISTRY}/sentrius-launcher-service" \ ++ --set launcherservice.image.tag=${LAUNCHER_VERSION} \ ++ > /tmp/sentrius-chart-launcher-test.yaml ++ ++if [[ $? -eq 0 ]]; then ++ echo "✅ sentrius-chart-launcher rendered successfully" ++ echo " Output saved to /tmp/sentrius-chart-launcher-test.yaml" ++else ++ echo "❌ sentrius-chart-launcher rendering failed" ++ exit 1 ++fi ++ ++echo "" ++echo "======================================" ++echo "✅ All Tests Passed!" ++echo "======================================" +diff --git a/ops-scripts/base/build-images.sh b/ops-scripts/base/build-images.sh +index 7744d178..91738a67 100755 +--- a/ops-scripts/base/build-images.sh ++++ b/ops-scripts/base/build-images.sh +@@ -9,14 +9,14 @@ ENV_TARGET="local" # default mode + NO_CACHE=false + INCLUDE_DEV_CERTS=false + +-# --- Parse the environment target (local | gcp) --- +-if [[ "$1" == "local" || "$1" == "gcp" ]]; then ++# --- Parse the environment target (local | gcp | azure) --- ++if [[ "$1" == "local" || "$1" == "gcp" || "$1" == "azure" ]]; then + ENV_TARGET="$1" + shift + fi + +-# --- Load environment file only for GCP (versions needed for registry) --- +-if [[ "$ENV_TARGET" == "gcp" ]]; then ++# --- Load environment file for GCP or Azure (versions needed for registry) --- ++if [[ "$ENV_TARGET" == "gcp" || "$ENV_TARGET" == "azure" ]]; then + ENV_FILE=".$ENV_TARGET.env" + source "$ENV_FILE" + cp "$ENV_FILE" "$ENV_FILE.bak" +@@ -116,11 +116,16 @@ build_image() { + exit 1 + fi + +- if [[ "$ENV_TARGET" == "gcp" ]]; then ++ if [[ "$ENV_TARGET" == "gcp" || "$ENV_TARGET" == "azure" ]]; then + REGISTRY="us-central1-docker.pkg.dev/sentrius-project/sentrius-repo" + docker tag "$name:$version" "$REGISTRY/$name:$version" + docker push "$REGISTRY/$name:$version" + echo "✅ Pushed $REGISTRY/$name:$version" ++ elif [[ "$ENV_TARGET" == "azure" ]]; then ++ REGISTRY="${AZURE_REGISTRY:-sentriusacr.azurecr.io}" ++ docker tag "$name:$version" "$REGISTRY/$name:$version" ++ docker push "$REGISTRY/$name:$version" ++ echo "✅ Pushed $REGISTRY/$name:$version" + else + echo "✅ Built locally: $name:$version" + fi +@@ -182,11 +187,16 @@ build_keycloak_image() { + minikube image load "$name:$version" + fi + +- if [[ "$ENV_TARGET" == "gcp" ]]; then ++ if [[ "$ENV_TARGET" == "gcp" || "$ENV_TARGET" == "azure" ]]; then + REGISTRY="us-central1-docker.pkg.dev/sentrius-project/sentrius-repo" + docker tag "$name:$version" "$REGISTRY/$name:$version" + docker push "$REGISTRY/$name:$version" + echo "✅ Pushed $REGISTRY/$name:$version" ++ elif [[ "$ENV_TARGET" == "azure" ]]; then ++ REGISTRY="${AZURE_REGISTRY:-sentriusacr.azurecr.io}" ++ docker tag "$name:$version" "$REGISTRY/$name:$version" ++ docker push "$REGISTRY/$name:$version" ++ echo "✅ Pushed $REGISTRY/$name:$version" + else + echo "✅ Built locally: $name:$version" + fi +@@ -233,16 +243,21 @@ while [[ "$#" -gt 0 ]]; do + shift + done + +-# --- Auth for GCP --- +-if [[ "$ENV_TARGET" == "gcp" ]]; then ++# --- Auth for GCP or Azure --- ++if [[ "$ENV_TARGET" == "gcp" || "$ENV_TARGET" == "azure" ]]; then + echo "Authenticating with Google Cloud..." + gcloud auth configure-docker us-central1-docker.pkg.dev || exit 1 ++elif [[ "$ENV_TARGET" == "azure" ]]; then ++ echo "Authenticating with Azure Container Registry..." ++ REGISTRY="${AZURE_REGISTRY:-sentriusacr.azurecr.io}" ++ REGISTRY_NAME=$(echo "$REGISTRY" | cut -d'.' -f1) ++ az acr login --name "$REGISTRY_NAME" || exit 1 + fi + + # --- Build Steps --- + if $update_sentrius; then + cp api/target/sentrius-api-*.jar docker/sentrius/sentrius.jar +- if [[ "$ENV_TARGET" == "gcp" ]]; then ++ if [[ "$ENV_TARGET" == "gcp" || "$ENV_TARGET" == "azure" ]]; then + SENTRIUS_VERSION=$(increment_patch_version $SENTRIUS_VERSION) + update_env_var "SENTRIUS_VERSION" "$SENTRIUS_VERSION" + else +@@ -253,7 +268,7 @@ if $update_sentrius; then + fi + + if $update_sentrius_ssh; then +- if [[ "$ENV_TARGET" == "gcp" ]]; then ++ if [[ "$ENV_TARGET" == "gcp" || "$ENV_TARGET" == "azure" ]]; then + SENTRIUS_SSH_VERSION=$(increment_patch_version $SENTRIUS_SSH_VERSION) + update_env_var "SENTRIUS_SSH_VERSION" "$SENTRIUS_SSH_VERSION" + else +@@ -263,7 +278,7 @@ if $update_sentrius_ssh; then + fi + + if $update_sentrius_keycloak; then +- if [[ "$ENV_TARGET" == "gcp" ]]; then ++ if [[ "$ENV_TARGET" == "gcp" || "$ENV_TARGET" == "azure" ]]; then + SENTRIUS_KEYCLOAK_VERSION=$(increment_patch_version $SENTRIUS_KEYCLOAK_VERSION) + update_env_var "SENTRIUS_KEYCLOAK_VERSION" "$SENTRIUS_KEYCLOAK_VERSION" + else +@@ -274,7 +289,7 @@ fi + + if $update_sentrius_agent; then + cp analytics/target/analytics-*.jar docker/sentrius-agent/agent.jar +- if [[ "$ENV_TARGET" == "gcp" ]]; then ++ if [[ "$ENV_TARGET" == "gcp" || "$ENV_TARGET" == "azure" ]]; then + SENTRIUS_AGENT_VERSION=$(increment_patch_version $SENTRIUS_AGENT_VERSION) + update_env_var "SENTRIUS_AGENT_VERSION" "$SENTRIUS_AGENT_VERSION" + else +@@ -286,7 +301,7 @@ fi + + if $update_sentrius_ai_agent; then + cp enterprise-agent/target/enterprise-agent-*.jar docker/sentrius-ai-agent/agent.jar +- if [[ "$ENV_TARGET" == "gcp" ]]; then ++ if [[ "$ENV_TARGET" == "gcp" || "$ENV_TARGET" == "azure" ]]; then + SENTRIUS_AI_AGENT_VERSION=$(increment_patch_version $SENTRIUS_AI_AGENT_VERSION) + update_env_var "SENTRIUS_AI_AGENT_VERSION" "$SENTRIUS_AI_AGENT_VERSION" + else +@@ -302,7 +317,7 @@ fi + + if $update_integrationproxy; then + cp integration-proxy/target/sentrius-integration-proxy-*.jar docker/integrationproxy/llmproxy.jar +- if [[ "$ENV_TARGET" == "gcp" ]]; then ++ if [[ "$ENV_TARGET" == "gcp" || "$ENV_TARGET" == "azure" ]]; then + LLMPROXY_VERSION=$(increment_patch_version $LLMPROXY_VERSION) + update_env_var "LLMPROXY_VERSION" "$LLMPROXY_VERSION" + else +@@ -314,7 +329,7 @@ fi + + if $update_launcher; then + cp agent-launcher/target/agent-launcher-*.jar docker/sentrius-launcher-service/launcher.jar +- if [[ "$ENV_TARGET" == "gcp" ]]; then ++ if [[ "$ENV_TARGET" == "gcp" || "$ENV_TARGET" == "azure" ]]; then + LAUNCHER_VERSION=$(increment_patch_version $LAUNCHER_VERSION) + update_env_var "LAUNCHER_VERSION" "$LAUNCHER_VERSION" + else +@@ -326,7 +341,7 @@ fi + + if $update_agent_proxy; then + cp agent-proxy/target/sentrius-agent-proxy-*.jar docker/agent-proxy/agentproxy.jar +- if [[ "$ENV_TARGET" == "gcp" ]]; then ++ if [[ "$ENV_TARGET" == "gcp" || "$ENV_TARGET" == "azure" ]]; then + AGENTPROXY_VERSION=$(increment_patch_version $AGENTPROXY_VERSION) + update_env_var "AGENTPROXY_VERSION" "$AGENTPROXY_VERSION" + else +@@ -338,7 +353,7 @@ fi + + if $update_ssh_proxy; then + cp ssh-proxy/target/ssh-proxy-*.jar docker/ssh-proxy/sshproxy.jar +- if [[ "$ENV_TARGET" == "gcp" ]]; then ++ if [[ "$ENV_TARGET" == "gcp" || "$ENV_TARGET" == "azure" ]]; then + SSHPROXY_VERSION=$(increment_patch_version $SSHPROXY_VERSION) + update_env_var "SSHPROXY_VERSION" "$SSHPROXY_VERSION" + else +@@ -350,7 +365,7 @@ fi + + if $update_rdp_proxy; then + cp rdp-proxy/target/rdp-proxy-*.jar docker/rdp-proxy/rdpproxy.jar +- if [[ "$ENV_TARGET" == "gcp" ]]; then ++ if [[ "$ENV_TARGET" == "gcp" || "$ENV_TARGET" == "azure" ]]; then + RDPPROXY_VERSION=$(increment_patch_version $RDPPROXY_VERSION) + update_env_var "RDPPROXY_VERSION" "$RDPPROXY_VERSION" + else +@@ -362,7 +377,7 @@ fi + + if $update_monitoring_agent; then + cp monitoring/target/monitoring-*.jar docker/monitoring/monitoring.jar +- if [[ "$ENV_TARGET" == "gcp" ]]; then ++ if [[ "$ENV_TARGET" == "gcp" || "$ENV_TARGET" == "azure" ]]; then + $MONITORING_AGENT_VERSION=$(increment_patch_version $MONITORING_AGENT_VERSION) + update_env_var "$MONITORING_AGENT_VERSION" "$MONITORING_AGENT_VERSION" + else +@@ -374,7 +389,7 @@ fi + + if $update_ssh_agent; then + cp \/target/ssh-agent-*.jar docker/ssh-agent/ssh-agent.jar +- if [[ "$ENV_TARGET" == "gcp" ]]; then ++ if [[ "$ENV_TARGET" == "gcp" || "$ENV_TARGET" == "azure" ]]; then + $SSH_AGENT_VERSION=$(increment_patch_version $SSH_AGENT_VERSION) + update_env_var "$SSH_AGENT_VERSION" "$SSH_AGENT_VERSION" + else +@@ -385,7 +400,7 @@ if $update_ssh_agent; then + fi + + if $update_github_mcp; then +- if [[ "$ENV_TARGET" == "gcp" ]]; then ++ if [[ "$ENV_TARGET" == "gcp" || "$ENV_TARGET" == "azure" ]]; then + GITHUB_MCP_VERSION=$(increment_patch_version $GITHUB_MCP_VERSION) + update_env_var "GITHUB_MCP_VERSION" "$GITHUB_MCP_VERSION" + else +@@ -395,7 +410,7 @@ if $update_github_mcp; then + fi + + if $update_prompt_advisor; then +- if [[ "$ENV_TARGET" == "gcp" ]]; then ++ if [[ "$ENV_TARGET" == "gcp" || "$ENV_TARGET" == "azure" ]]; then + PROMPT_ADVISOR_VERSION=$(increment_patch_version $PROMPT_ADVISOR_VERSION) + update_env_var "PROMPT_ADVISOR_VERSION" "$PROMPT_ADVISOR_VERSION" + else diff --git a/ops-scripts/azure/QUICKREF.md b/ops-scripts/azure/QUICKREF.md new file mode 100644 index 00000000..5b36277f --- /dev/null +++ b/ops-scripts/azure/QUICKREF.md @@ -0,0 +1,413 @@ +# Azure/AKS Deployment Quick Reference + +Quick reference guide for common Sentrius Azure/AKS deployment tasks. + +## Initial Setup + +```bash +# Login to Azure +az login +az account set --subscription + +# Configure kubectl for AKS +az aks get-credentials --resource-group sentrius-rg --name sentrius-aks-cluster + +# Login to Azure Container Registry +az acr login --name sentriusacr + +# Set Azure Container Registry environment variable +export AZURE_REGISTRY=sentriusacr.azurecr.io +``` + +## Common Commands + +### Deploy New Tenant + +```bash +# Deploy with TLS and default domain (trustpolicy.ai) +./ops-scripts/azure/deploy-helm.sh --tenant production + +# Deploy with custom domain +./ops-scripts/azure/deploy-helm.sh --tenant production --domain mycompany.com + +# Deploy without TLS (development only) +./ops-scripts/azure/deploy-helm.sh --tenant dev --no-tls +``` + +### Build and Push Images + +```bash +# Build all images for Azure +./ops-scripts/base/build-images.sh azure --all + +# Build specific image +./ops-scripts/base/build-images.sh azure --sentrius + +# Build with no cache +./ops-scripts/base/build-images.sh azure --all --no-cache +``` + +### Start/Stop Deployments + +```bash +# Stop all pods (saves costs, keeps config) +./ops-scripts/azure/spindown.sh --tenant production + +# Start pods again +./ops-scripts/azure/spinup.sh --tenant production + +# Restart with updated config +./ops-scripts/azure/restart.sh +``` + +### Completely Remove Tenant + +```bash +# Remove everything (destructive!) +./ops-scripts/azure/shutdown.sh --tenant old-tenant +# OR +./ops-scripts/azure/destroy-tenant.sh old-tenant +``` + +### DNS Management + +```bash +# Get ingress IP +INGRESS_IP=$(kubectl get ingress apps-ingress-production -n production -o jsonpath='{.status.loadBalancer.ingress[0].ip}') + +# Create DNS records manually +./ops-scripts/azure/create-subdomain.sh production $INGRESS_IP + +# Remove DNS records +./ops-scripts/azure/remove-subdomain.sh production +``` + +### Testing + +```bash +# Test Helm chart rendering +./ops-scripts/azure/test-helm.sh production + +# Lint Helm charts +helm lint sentrius-chart +helm lint sentrius-chart-launcher +``` + +## Monitoring and Debugging + +### Check Deployment Status + +```bash +# Check all resources +kubectl get all -n production + +# Check deployments +kubectl get deployments -n production +kubectl get deployments -n production-agents + +# Check pods +kubectl get pods -n production +kubectl get pods -n production-agents + +# Check ingress +kubectl get ingress -n production + +# Check services +kubectl get services -n production +``` + +### View Logs + +```bash +# Sentrius API logs +kubectl logs -n production deployment/sentrius-sentrius --tail=100 -f + +# Keycloak logs +kubectl logs -n production deployment/sentrius-keycloak --tail=100 -f + +# Agent logs +kubectl logs -n production-agents deployment/sentrius-agents-launcherservice --tail=100 -f + +# All logs from a pod +kubectl logs -n production --all-containers=true +``` + +### Describe Resources + +```bash +# Describe pod (shows events) +kubectl describe pod -n production + +# Describe ingress +kubectl describe ingress -n production apps-ingress-production + +# Describe deployment +kubectl describe deployment -n production sentrius-sentrius +``` + +### Execute Commands in Pods + +```bash +# Get shell in pod +kubectl exec -it -n production -- /bin/bash + +# Run single command +kubectl exec -n production -- ls -la /app +``` + +## Azure-Specific Commands + +### Check AKS Cluster + +```bash +# Get cluster info +az aks show --resource-group sentrius-rg --name sentrius-aks-cluster + +# List node pools +az aks nodepool list --resource-group sentrius-rg --cluster-name sentrius-aks-cluster + +# Scale node pool +az aks nodepool scale --resource-group sentrius-rg --cluster-name sentrius-aks-cluster --name default --node-count 3 +``` + +### Check Container Registry + +```bash +# List repositories +az acr repository list --name sentriusacr --output table + +# List tags for image +az acr repository show-tags --name sentriusacr --repository sentrius --output table + +# Delete old image +az acr repository delete --name sentriusacr --image sentrius:old-tag +``` + +### Check DNS Records + +```bash +# List all DNS records +az network dns record-set a list --resource-group sentrius-rg --zone-name sentrius.cloud --output table + +# Show specific record +az network dns record-set a show --resource-group sentrius-rg --zone-name sentrius.cloud --name production + +# Delete DNS record +az network dns record-set a delete --resource-group sentrius-rg --zone-name sentrius.cloud --name old-tenant --yes +``` + +### Check Load Balancers + +```bash +# List public IPs +az network public-ip list --resource-group sentrius-rg --output table + +# Show load balancer +az network lb list --resource-group sentrius-rg --output table +``` + +## Troubleshooting + +### Pods Not Starting + +```bash +# Check pod status +kubectl get pods -n production + +# View pod events +kubectl describe pod -n production + +# Check logs +kubectl logs -n production --previous + +# Check resource limits +kubectl top nodes +kubectl top pods -n production +``` + +### Image Pull Errors + +```bash +# Check if ACR is attached to AKS +az aks show --resource-group sentrius-rg --name sentrius-aks-cluster --query "identity" + +# Attach ACR to AKS +az aks update -n sentrius-aks-cluster -g sentrius-rg --attach-acr sentriusacr + +# Verify image exists +az acr repository show --name sentriusacr --repository sentrius --image sentrius:1.1.51 +``` + +### DNS Not Resolving + +```bash +# Check DNS record exists +az network dns record-set a show --resource-group sentrius-rg --zone-name sentrius.cloud --name production + +# Check ingress has IP +kubectl get ingress -n production + +# Test DNS resolution +nslookup production.sentrius.cloud +dig production.sentrius.cloud +``` + +### Certificate Issues + +```bash +# Check cert-manager +kubectl get pods -n cert-manager + +# Check certificates +kubectl get certificate -n production + +# Check certificate status +kubectl describe certificate -n production + +# Check certificate secret +kubectl get secret -n production -o yaml +``` + +### Ingress Not Working + +```bash +# Check ingress controller +kubectl get pods -n kube-system | grep ingress + +# Check ingress resource +kubectl describe ingress -n production apps-ingress-production + +# Check Application Gateway +az network application-gateway show --resource-group sentrius-rg --name sentrius-appgw +``` + +## Version Management + +### Update Versions + +```bash +# Edit version file +vim .azure.env + +# Update version number +SENTRIUS_VERSION=1.1.52 +``` + +### Deploy New Version + +```bash +# Build and push new images +./ops-scripts/base/build-images.sh azure --all + +# Deploy updated version +./ops-scripts/azure/deploy-helm.sh --tenant production +``` + +### Rollback + +```bash +# View Helm history +helm history sentrius -n production + +# Rollback to previous version +helm rollback sentrius -n production + +# Rollback to specific revision +helm rollback sentrius 3 -n production +``` + +## Secrets Management + +### View Secrets + +```bash +# List secrets +kubectl get secrets -n production + +# View secret data (base64 encoded) +kubectl get secret production-keycloak-secrets -n production -o yaml + +# Decode secret value +kubectl get secret production-keycloak-secrets -n production -o jsonpath="{.data.db-password}" | base64 --decode +``` + +### Regenerate Secrets + +```bash +# Delete existing secret +kubectl delete secret production-keycloak-secrets -n production + +# Redeploy (will generate new secret) +./ops-scripts/azure/deploy-helm.sh --tenant production +``` + +## Backup and Restore + +### Backup Resources + +```bash +# Backup namespace resources +kubectl get all -n production -o yaml > production-backup.yaml + +# Backup secrets +kubectl get secrets -n production -o yaml > production-secrets-backup.yaml + +# Backup configmaps +kubectl get configmaps -n production -o yaml > production-configmaps-backup.yaml +``` + +### Export Helm Values + +```bash +# Get current Helm values +helm get values sentrius -n production > production-values.yaml + +# Get all values including defaults +helm get values sentrius -n production --all > production-all-values.yaml +``` + +## Performance Optimization + +### Scale Deployments + +```bash +# Scale specific deployment +kubectl scale deployment sentrius-sentrius -n production --replicas=3 + +# Scale all deployments +kubectl scale deployment --all -n production --replicas=2 +``` + +### Resource Monitoring + +```bash +# Check node resources +kubectl top nodes + +# Check pod resources +kubectl top pods -n production + +# Check pod resource limits +kubectl describe pod -n production | grep -A 5 "Limits:" +``` + +## Quick Links + +- **Main README**: [README.md](README.md) +- **Deployment Guide**: [../../DEPLOYMENT.md](../../DEPLOYMENT.md) +- **Helm Charts**: [../../sentrius-chart](../../sentrius-chart) +- **Azure Docs**: https://docs.microsoft.com/en-us/azure/aks/ + +## Environment Files + +- **`.azure.env`** - Version numbers for all services +- **`base.sh`** - Cluster and DNS configuration +- **`.generated.env`** - Auto-generated secrets (not in git) + +## Support + +For issues: +1. Check logs: `kubectl logs -n ` +2. Check events: `kubectl get events -n --sort-by='.lastTimestamp'` +3. Check Helm status: `helm status sentrius -n ` +4. Run test: `./ops-scripts/azure/test-helm.sh ` diff --git a/ops-scripts/azure/README.md b/ops-scripts/azure/README.md new file mode 100644 index 00000000..358f02d9 --- /dev/null +++ b/ops-scripts/azure/README.md @@ -0,0 +1,495 @@ +# Sentrius Azure/AKS Deployment Scripts + +This directory contains scripts for deploying Sentrius to Azure Kubernetes Service (AKS). + +## Prerequisites + +1. **Azure CLI** installed and configured + ```bash + az login + az account set --subscription + ``` + +2. **kubectl** configured to access your AKS cluster + ```bash + az aks get-credentials --resource-group sentrius-rg --name sentrius-aks-cluster + ``` + +3. **Helm 3.x** installed + +4. **Docker images** built and pushed to Azure Container Registry + ```bash + # From repository root, build and push all images + cd /path/to/Sentrius-private + ./ops-scripts/base/build-images.sh azure --all + ``` + +## Configuration + +The deployment is configured through the following files: + +- **`.azure.env`** - Contains version numbers for all services +- **`ops-scripts/azure/base.sh`** - Contains cluster, resource group, and DNS zone configuration + +### Environment Variables (.azure.env) + +```bash +SENTRIUS_VERSION=1.1.51 +SENTRIUS_SSH_VERSION=1.1.10 +SENTRIUS_KEYCLOAK_VERSION=1.1.13 +SENTRIUS_AGENT_VERSION=1.1.22 +SENTRIUS_AI_AGENT_VERSION=1.1.3 +LLMPROXY_VERSION=1.1.3 +LAUNCHER_VERSION=1.1.3 +AGENTPROXY_VERSION=1.1.3 +SSHPROXY_VERSION=1.1.3 +RDPPROXY_VERSION=1.1.3 +GITHUB_MCP_VERSION=1.1.3 +MONITORING_AGENT_VERSION=1.1.21 +SSH_AGENT_VERSION=1.1.3 +``` + +### Cluster Configuration (base.sh) + +```bash +NAMESPACE=august +CLUSTER=sentrius-aks-cluster +REGION=eastus +RESOURCE_GROUP=sentrius-rg +DNS_ZONE=trustpolicy.ai +``` + +### Azure Container Registry + +Set the `AZURE_REGISTRY` environment variable to your Azure Container Registry: + +```bash +export AZURE_REGISTRY=sentriusacr.azurecr.io +``` + +## Scripts + +### deploy-helm.sh + +Deploys Sentrius to AKS with all components. + +**Usage:** +```bash +./ops-scripts/azure/deploy-helm.sh --tenant [--no-tls] +``` + +**Options:** +- `--tenant TENANT_NAME` - (Required) Name of the tenant to deploy +- `--domain DOMAIN` - (Optional) Domain name for services (default: trustpolicy.ai) +- `--no-tls` - (Optional) Disable TLS/SSL (not recommended for production) + +**Example:** +```bash +# Deploy production tenant with default domain (trustpolicy.ai) +./ops-scripts/azure/deploy-helm.sh --tenant production + +# Deploy with custom domain +./ops-scripts/azure/deploy-helm.sh --tenant production --domain sentrius.cloud + +# Deploy test tenant without TLS +./ops-scripts/azure/deploy-helm.sh --tenant test --no-tls +``` + +**What it does:** +1. Sources environment variables and secrets +2. Creates Kubernetes namespaces (`` and `-agents`) +3. Generates or retrieves secrets from Kubernetes +4. Deploys main Sentrius chart to `` namespace +5. Deploys launcher chart to `-agents` namespace +6. Waits for LoadBalancer IP to be assigned +7. Creates DNS records for: + - `.` (main application, default: trustpolicy.ai) + - `keycloak..` (authentication) + - `agentproxy..` (agent proxy) + - `rdpproxy..` (RDP proxy) + +**Note:** The default domain is `trustpolicy.ai`. You can specify a custom domain with `--domain`. + +### spinup.sh + +Scales up all deployments to 1 replica (for resuming after spindown). + +**Usage:** +```bash +./ops-scripts/azure/spinup.sh --tenant +``` + +**Example:** +```bash +./ops-scripts/azure/spinup.sh --tenant production +``` + +### spindown.sh + +Scales down all deployments to 0 replicas to save costs while preserving configuration. + +**Usage:** +```bash +./ops-scripts/azure/spindown.sh --tenant +``` + +**Example:** +```bash +./ops-scripts/azure/spindown.sh --tenant test +``` + +**What it preserves:** +- Configurations and secrets +- Ingresses and load balancers +- DNS records +- Certificates + +### restart.sh + +Restarts all deployments in the default namespace and upgrades the Helm release. + +**Usage:** +```bash +./ops-scripts/azure/restart.sh +``` + +### shutdown.sh + +Completely removes a tenant deployment including namespaces and DNS records. + +**Usage:** +```bash +./ops-scripts/azure/shutdown.sh --tenant +``` + +**Example:** +```bash +./ops-scripts/azure/shutdown.sh --tenant test-tenant +``` + +**Warning:** This is destructive and cannot be undone. It will: +1. Uninstall Helm releases +2. Delete Kubernetes namespaces (`` and `-agents`) +3. Remove all DNS records + +### destroy-tenant.sh + +Alternative name for shutdown.sh - completely removes a tenant deployment. + +**Usage:** +```bash +./ops-scripts/azure/destroy-tenant.sh +``` + +**Example:** +```bash +./ops-scripts/azure/destroy-tenant.sh old-tenant +``` + +### test-helm.sh + +Tests Helm chart rendering without deploying. + +**Usage:** +```bash +./ops-scripts/azure/test-helm.sh [tenant-name] +``` + +**Example:** +```bash +./ops-scripts/azure/test-helm.sh my-tenant +``` + +### create-subdomain.sh + +Manually creates DNS records for a tenant (useful if deploy-helm.sh DNS creation fails). + +**Usage:** +```bash +./ops-scripts/azure/create-subdomain.sh +``` + +**Example:** +```bash +# Get the ingress IP first +INGRESS_IP=$(kubectl get ingress apps-ingress-production -n production -o jsonpath='{.status.loadBalancer.ingress[0].ip}') + +# Create DNS records +./ops-scripts/azure/create-subdomain.sh production $INGRESS_IP +``` + +**What it creates:** +- `.sentrius.cloud` → Ingress IP +- `keycloak..sentrius.cloud` → Ingress IP +- `agentproxy..sentrius.cloud` → Ingress IP +- `rdpproxy..sentrius.cloud` → Ingress IP + +### remove-subdomain.sh + +Removes DNS records for a tenant. + +**Usage:** +```bash +./ops-scripts/azure/remove-subdomain.sh +``` + +**Example:** +```bash +./ops-scripts/azure/remove-subdomain.sh old-tenant +``` + +**Note:** This is also called automatically by `destroy-tenant.sh` and `shutdown.sh`. + +## Deployment Architecture + +### Namespaces + +Each tenant deployment creates two namespaces: + +1. **``** - Main application namespace containing: + - Sentrius API (`sentrius-sentrius`) + - Keycloak (`sentrius-keycloak`) + - PostgreSQL databases + - Integration Proxy (`sentrius-integrationproxy`) + - Agent Proxy (`sentrius-agentproxy`) + - SSH/RDP Proxies + - Neo4j, Kafka (optional) + +2. **`-agents`** - Agent launcher namespace containing: + - Launcher Service (`sentrius-agents-launcherservice`) + - Dynamic agent deployments + +### Services Deployed + +| Service | Image | Purpose | +|---------|-------|---------| +| Sentrius API | `sentrius` | Main application and REST API | +| Keycloak | `sentrius-keycloak` | Authentication and authorization | +| Integration Proxy | `sentrius-integration-proxy` | LLM and external service integration | +| Agent Proxy | `sentrius-agent-proxy` | Agent communication proxy | +| Launcher Service | `sentrius-launcher-service` | Dynamic agent lifecycle management | +| SSH Proxy | `sentrius-ssh-proxy` | SSH session proxy | +| RDP Proxy | `sentrius-rdp-proxy` | RDP session proxy | +| Java Agent | `sentrius-agent` | Java-based monitoring agent | +| AI Agent | `sentrius-ai-agent` | AI-powered monitoring agent | +| Monitoring Agent | `sentrius-monitoring-agent` | System monitoring agent | +| SSH Agent | `sentrius-ssh-agent` | SSH monitoring agent | + +### DNS Configuration + +The deployment automatically creates DNS records in Azure DNS: + +- `.` → Main application (default domain: trustpolicy.ai) +- `keycloak..` → Keycloak authentication +- `agentproxy..` → Agent proxy service +- `rdpproxy..` → RDP proxy service + +All records point to the AKS Ingress LoadBalancer IP. + +**Custom Domains**: To use a different domain, specify it with the `--domain` parameter: +```bash +./ops-scripts/azure/deploy-helm.sh --tenant production --domain mycompany.com +``` + +## Secret Management + +Secrets are automatically generated and stored in Kubernetes secrets: + +- **`-keycloak-secrets`** - Keycloak database and client secrets +- **`-db-secret`** - Application database credentials +- **`-oauth2-secrets`** - OAuth2 client secrets for services + +Secrets are persisted across deployments. On first deployment, new secrets are generated. On subsequent deployments, existing secrets are reused. + +## Building and Pushing Images + +To build and push all images to Azure Container Registry: + +```bash +# Login to Azure Container Registry +az acr login --name sentriusacr + +# Build all images for Azure +./ops-scripts/base/build-images.sh azure --all + +# Build specific images +./ops-scripts/base/build-images.sh azure --sentrius +./ops-scripts/base/build-images.sh azure --sentrius-keycloak +./ops-scripts/base/build-images.sh azure --sentrius-launcher-service + +# Build with no cache (clean build) +./ops-scripts/base/build-images.sh azure --all --no-cache +``` + +The build script automatically: +1. Increments patch version in `.azure.env` +2. Builds Docker images +3. Tags images with version number +4. Pushes to your Azure Container Registry + +## Monitoring Deployment + +After deployment, monitor the status: + +```bash +# Check deployment status +kubectl get deployments -n +kubectl get deployments -n -agents + +# Check pod status +kubectl get pods -n +kubectl get pods -n -agents + +# Check services +kubectl get services -n + +# Check ingress +kubectl get ingress -n + +# View logs +kubectl logs -n deployment/sentrius-sentrius +kubectl logs -n deployment/sentrius-keycloak +``` + +## Troubleshooting + +### DNS Records Not Created + +If DNS records are not created automatically: + +```bash +# Check if LoadBalancer IP is assigned +kubectl get ingress apps-ingress- -n + +# Manually create DNS records +./ops-scripts/azure/create-subdomain.sh +``` + +### Secret Issues + +If secrets are corrupted or need to be regenerated: + +```bash +# Delete existing secrets +kubectl delete secret -keycloak-secrets -n +kubectl delete secret -db-secret -n + +# Redeploy (new secrets will be generated) +./ops-scripts/azure/deploy-helm.sh --tenant +``` + +### Image Pull Issues + +Ensure images are pushed to Azure Container Registry: + +```bash +# List images in registry +az acr repository list --name sentriusacr --output table + +# Check specific image tags +az acr repository show-tags --name sentriusacr --repository sentrius --output table +``` + +Ensure AKS has permission to pull from ACR: + +```bash +# Grant AKS pull permissions to ACR +az aks update -n sentrius-aks-cluster -g sentrius-rg --attach-acr sentriusacr +``` + +### Ingress Controller Issues + +Ensure Application Gateway Ingress Controller (AGIC) is installed: + +```bash +# Check if AGIC is installed +kubectl get pods -n kube-system | grep ingress + +# Install AGIC using Helm +helm repo add application-gateway-kubernetes-ingress https://appgwingress.blob.core.windows.net/ingress-azure-helm-package/ +helm install ingress-azure application-gateway-kubernetes-ingress/ingress-azure +``` + +## Upgrading Deployments + +To upgrade an existing deployment: + +1. Update version numbers in `.azure.env` +2. Build and push new images +3. Redeploy using `deploy-helm.sh` + +```bash +# Edit .azure.env to update versions +vim .azure.env + +# Build and push updated images +./ops-scripts/base/build-images.sh azure --all + +# Upgrade deployment +./ops-scripts/azure/deploy-helm.sh --tenant +``` + +## Cost Optimization + +To reduce costs when not in use: + +```bash +# Scale down to zero replicas (preserves configuration) +./ops-scripts/azure/spindown.sh --tenant + +# Scale back up when needed +./ops-scripts/azure/spinup.sh --tenant + +# Or delete specific tenant completely +./ops-scripts/azure/destroy-tenant.sh +``` + +## Security Considerations + +1. **TLS/SSL**: Always use TLS in production (default behavior) +2. **Secrets**: Secrets are auto-generated and stored in Kubernetes +3. **DNS**: Uses Azure DNS for managed DNS records +4. **Network**: Services are exposed via AKS Ingress with LoadBalancer +5. **Authentication**: Keycloak provides OAuth2/OIDC authentication +6. **Container Registry**: Use Azure Container Registry with managed identities + +## Azure-Specific Configuration + +### Storage Classes + +AKS provides several storage classes: + +- `managed-premium` - Premium SSD (default for Sentrius) +- `managed-standard` - Standard HDD +- `azurefile` - Azure Files (for ReadWriteMany) + +Configure in Helm values: +```yaml +config: + storageClassName: "managed-premium" +``` + +### Ingress Classes + +For AKS, use Application Gateway Ingress Controller: + +```yaml +ingress: + class: "azure/application-gateway" +``` + +### Load Balancer Annotations + +Azure-specific annotations are automatically applied: + +```yaml +service.beta.kubernetes.io/azure-load-balancer-resource-group: sentrius-rg +``` + +## Support + +For issues or questions: +1. Check logs: `kubectl logs -n ` +2. Review Helm values: `helm get values sentrius -n ` +3. Test chart rendering: `./ops-scripts/azure/test-helm.sh ` +4. Check Azure resources: `az resource list --resource-group sentrius-rg` diff --git a/ops-scripts/azure/base.sh b/ops-scripts/azure/base.sh new file mode 100755 index 00000000..02fa87da --- /dev/null +++ b/ops-scripts/azure/base.sh @@ -0,0 +1,6 @@ +#!/bin/bash +NAMESPACE=august +CLUSTER=sentrius-aks-cluster +REGION=eastus +RESOURCE_GROUP=sentrius-rg +DNS_ZONE=trustpolicy.ai diff --git a/ops-scripts/azure/create-subdomain.sh b/ops-scripts/azure/create-subdomain.sh new file mode 100755 index 00000000..b34cb858 --- /dev/null +++ b/ops-scripts/azure/create-subdomain.sh @@ -0,0 +1,54 @@ +#!/bin/bash +SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) + +source ${SCRIPT_DIR}/base.sh + +TENANT=$1 +INGRESS_IP=$2 + +if [[ -z "$TENANT" ]]; then + echo "Usage: $0 " 1>&2 + exit 1 +fi + +if [[ -z "$INGRESS_IP" ]]; then + echo "Usage: $0 " 1>&2 + echo "To get ingress IP: kubectl get ingress apps-ingress-${TENANT} -n ${TENANT} -o jsonpath='{.status.loadBalancer.ingress[0].ip}'" + exit 1 +fi + +echo "Creating DNS records for tenant ${TENANT} with IP ${INGRESS_IP}..." + +# Add main tenant domain +az network dns record-set a add-record \ + --resource-group ${RESOURCE_GROUP} \ + --zone-name ${DNS_ZONE} \ + --record-set-name ${TENANT} \ + --ipv4-address $INGRESS_IP + +# Add Keycloak subdomain +az network dns record-set a add-record \ + --resource-group ${RESOURCE_GROUP} \ + --zone-name ${DNS_ZONE} \ + --record-set-name keycloak.${TENANT} \ + --ipv4-address $INGRESS_IP + +# Add Agent Proxy subdomain +az network dns record-set a add-record \ + --resource-group ${RESOURCE_GROUP} \ + --zone-name ${DNS_ZONE} \ + --record-set-name agentproxy.${TENANT} \ + --ipv4-address $INGRESS_IP + +# Add RDP Proxy subdomain +az network dns record-set a add-record \ + --resource-group ${RESOURCE_GROUP} \ + --zone-name ${DNS_ZONE} \ + --record-set-name rdpproxy.${TENANT} \ + --ipv4-address $INGRESS_IP + +echo "✅ DNS records created successfully!" +echo " ${TENANT}.${DNS_ZONE} → ${INGRESS_IP}" +echo " keycloak.${TENANT}.${DNS_ZONE} → ${INGRESS_IP}" +echo " agentproxy.${TENANT}.${DNS_ZONE} → ${INGRESS_IP}" +echo " rdpproxy.${TENANT}.${DNS_ZONE} → ${INGRESS_IP}" diff --git a/ops-scripts/azure/deploy-helm.sh b/ops-scripts/azure/deploy-helm.sh new file mode 100755 index 00000000..58547733 --- /dev/null +++ b/ops-scripts/azure/deploy-helm.sh @@ -0,0 +1,504 @@ +#!/bin/bash + +SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) + +source ${SCRIPT_DIR}/base.sh +source ${SCRIPT_DIR}/../base/base.sh +source ${SCRIPT_DIR}/../../.azure.env + +# For AKS deployments, use versioned tags from .azure.env +# Default to 'latest' if .azure.env is not sourced or variables are not set +SENTRIUS_VERSION="${SENTRIUS_VERSION:-latest}" +SENTRIUS_SSH_VERSION="${SENTRIUS_SSH_VERSION:-latest}" +SENTRIUS_KEYCLOAK_VERSION="${SENTRIUS_KEYCLOAK_VERSION:-latest}" +SENTRIUS_AGENT_VERSION="${SENTRIUS_AGENT_VERSION:-latest}" +SENTRIUS_AI_AGENT_VERSION="${SENTRIUS_AI_AGENT_VERSION:-latest}" +LLMPROXY_VERSION="${LLMPROXY_VERSION:-latest}" +LAUNCHER_VERSION="${LAUNCHER_VERSION:-latest}" +AGENTPROXY_VERSION="${AGENTPROXY_VERSION:-latest}" +SSHPROXY_VERSION="${SSHPROXY_VERSION:-latest}" +RDPPROXY_VERSION="${RDPPROXY_VERSION:-latest}" +GITHUB_MCP_VERSION="${GITHUB_MCP_VERSION:-latest}" +MONITORING_AGENT_VERSION="${MONITORING_AGENT_VERSION:-latest}" +SSH_AGENT_VERSION="${SSH_AGENT_VERSION:-latest}" + +TENANT="" +ENV_TARGET="aks" +CERTIFICATES_ENABLED="true" +INGRESS_TLS_ENABLED="true" +ENVIRONMENT="aks" +DEPLOY_ADMINER=${DEPLOY_ADMINER:-false} +ENABLE_RDP_CONTAINER=${ENABLE_RDP_CONTAINER:-true} + +# Azure Container Registry +AZURE_REGISTRY="${AZURE_REGISTRY:-sentriusacr.azurecr.io}" + +# Generate secrets using the shared script +(source ${SCRIPT_DIR}/../base/generate-secrets.sh) + +GENERATED_ENV_PATH="${SCRIPT_DIR}/../../.generated.env" +if [[ -f "$GENERATED_ENV_PATH" ]]; then + source "$GENERATED_ENV_PATH" +fi + +DOMAIN_NAME="trustpolicy.ai" # Default domain for Azure + +# Parse command line arguments +while [[ $# -gt 0 ]]; do + case $1 in + --tenant) + TENANT="$2" + shift 2 + ;; + --domain) + DOMAIN_NAME="$2" + shift 2 + ;; + --no-tls) + CERTIFICATES_ENABLED="false" + INGRESS_TLS_ENABLED="false" + shift + ;; + *) + echo "Unknown option: $1" + echo "Usage: $0 --tenant TENANT_NAME [--domain DOMAIN] [--no-tls]" + echo " --tenant: Specify tenant name (required)" + echo " --domain: Specify domain name (default: trustpolicy.ai)" + echo " --no-tls: Disable TLS/SSL (not recommended for production)" + exit 1 + ;; + esac +done + +if [[ -z "$TENANT" ]]; then + echo "Must provide tenant name with --tenant" 1>&2 + echo "Usage: $0 --tenant TENANT_NAME [--domain DOMAIN] [--no-tls]" + exit 1 +fi + +# Configure domain settings for AKS +SUBDOMAIN="${TENANT}.${DOMAIN_NAME}" +APROXY_SUBDOMAIN="agentproxy.${TENANT}.${DOMAIN_NAME}" +KEYCLOAK_SUBDOMAIN="keycloak.${TENANT}.${DOMAIN_NAME}" +RDPPROXY_SUBDOMAIN="rdpproxy.${TENANT}.${DOMAIN_NAME}" +KEYCLOAK_HOSTNAME="${KEYCLOAK_SUBDOMAIN}" +KEYCLOAK_DOMAIN="https://${KEYCLOAK_SUBDOMAIN}" +KEYCLOAK_INTERNAL_DOMAIN="${KEYCLOAK_DOMAIN}" +SENTRIUS_DOMAIN="https://${SUBDOMAIN}" +APROXY_DOMAIN="https://${APROXY_SUBDOMAIN}" +RDPPROXY_DOMAIN="https://${RDPPROXY_SUBDOMAIN}" +STORAGE_CLASS_NAME="managed-premium" + +# Check if namespace exists +kubectl get namespace ${TENANT} >/dev/null 2>&1 +if [[ $? -ne 0 ]]; then + echo "Namespace ${TENANT} does not exist. Creating..." + kubectl create namespace ${TENANT} || { echo "Failed to create namespace ${TENANT}"; exit 1; } +fi + +kubectl get namespace ${TENANT}-agents >/dev/null 2>&1 +if [[ $? -ne 0 ]]; then + echo "Namespace ${TENANT}-agents does not exist. Creating..." + kubectl create namespace ${TENANT}-agents || { echo "Failed to create namespace ${TENANT}-agents"; exit 1; } +fi + +# Wait for admission webhooks to be ready (prevents validation failures during deployment) +echo "🔍 Checking for admission webhooks..." + +# Check for ingress controller webhook +if kubectl get validatingwebhookconfigurations 2>/dev/null | grep -q "ingress"; then + echo "⏳ Waiting for ingress admission webhook to be ready..." + for i in {1..30}; do + if kubectl get validatingwebhookconfigurations 2>/dev/null | grep -q "ingress.*admission"; then + echo "✅ Ingress admission webhook is configured" + sleep 2 + break + fi + echo "Waiting for ingress webhook configuration... ($i/30)" + sleep 2 + done +fi + +# Check for cert-manager webhook (only if TLS is enabled) +if [[ "$CERTIFICATES_ENABLED" == "true" ]]; then + if kubectl get validatingwebhookconfigurations cert-manager-webhook >/dev/null 2>&1; then + echo "⏳ Waiting for cert-manager webhook to be fully operational..." + if kubectl get pods -n cert-manager -l app.kubernetes.io/name=webhook >/dev/null 2>&1; then + kubectl wait --for=condition=ready pod -l app.kubernetes.io/name=webhook \ + -n cert-manager \ + -l app.kubernetes.io/name=webhook \ + --timeout=60s 2>/dev/null || \ + echo "⚠️ cert-manager webhook may not be fully ready" + fi + echo "✅ cert-manager webhook check complete" + sleep 2 + fi +fi + +# Generate Keycloak DB password if not set and secret doesn't exist +if [[ -z "$KEYCLOAK_DB_PASSWORD" ]]; then + echo "🔎 Checking if keycloak secret already exists..." + if kubectl get secret "${TENANT}-keycloak-secrets" --namespace "${TENANT}" >/dev/null 2>&1; then + echo "✅ Found existing keycloak secret; extracting DB password..." + KEYCLOAK_DB_PASSWORD=$(kubectl get secret "${TENANT}-keycloak-secrets" --namespace "${TENANT}" -o jsonpath="{.data.db-password}" | base64 --decode) + if [[ -z "$KEYCLOAK_DB_PASSWORD" ]]; then + echo "❌ Secret exists but db-password is empty; exiting for safety" + exit 1 + fi + else + echo "⚠️ No existing secret found; generating new Keycloak DB password..." + KEYCLOAK_DB_PASSWORD=$(head /dev/urandom | tr -dc A-Za-z0-9 | head -c 24) + fi +fi + +# Generate Keycloak client secret if not already present +if [[ -z "$KEYCLOAK_CLIENT_SECRET" ]]; then + echo "🔎 Checking if keycloak secret already exists..." + if kubectl get secret "${TENANT}-keycloak-secrets" --namespace "${TENANT}" >/dev/null 2>&1; then + echo "✅ Found existing keycloak secret; extracting client secret..." + KEYCLOAK_CLIENT_SECRET=$(kubectl get secret "${TENANT}-keycloak-secrets" --namespace "${TENANT}" -o jsonpath="{.data.client-secret}" | base64 --decode) + else + echo "⚠️ No existing secret found; generating new Keycloak client secret..." + KEYCLOAK_CLIENT_SECRET=$(head /dev/urandom | tr -dc A-Za-z0-9 | head -c 32) + fi +fi + +# ========================================== +# 🔍 Render Helm Output for Validation +# ========================================== +RENDER_PATH="${SCRIPT_DIR}/rendered-${TENANT}.yaml" + +echo "📄 Rendering Helm chart (dry run) for validation..." +helm template sentrius ./sentrius-chart \ + --namespace ${TENANT} \ + --set adminer.enabled=${DEPLOY_ADMINER} \ + --set tenant=${TENANT} \ + --set environment=${ENVIRONMENT} \ + --set ingress.class="azure/application-gateway" \ + --set subdomain="${SUBDOMAIN}" \ + --set metrics.enabled=true \ + --set healthCheck.backendConfig.enabled=false \ + --set config.storageClassName="${STORAGE_CLASS_NAME}" \ + --set agentproxySubdomain="${APROXY_SUBDOMAIN}" \ + --set rdpproxySubdomain="${RDPPROXY_SUBDOMAIN}" \ + --set keycloakSubdomain="${KEYCLOAK_SUBDOMAIN}" \ + --set keycloakHostname="${KEYCLOAK_HOSTNAME}" \ + --set keycloakDomain="${KEYCLOAK_DOMAIN}" \ + --set keycloakInternalDomain="${KEYCLOAK_DOMAIN}" \ + --set sentriusDomain="${SENTRIUS_DOMAIN}" \ + --set agentproxyDomain="${APROXY_DOMAIN}" \ + --set rdpproxyDomain="${RDPPROXY_DOMAIN}" \ + --set certificates.enabled=${CERTIFICATES_ENABLED} \ + --set ingress.tlsEnabled=${INGRESS_TLS_ENABLED} \ + > "${RENDER_PATH}" + +if [[ $? -ne 0 ]]; then + echo "❌ Helm rendering failed — check your templates!" + exit 1 +fi + +echo "✅ Rendered output saved to ${RENDER_PATH}" + +# Validate YAML +echo "🔍 Validating Kubernetes YAML with kubeval (if installed)..." +if command -v kubeval >/dev/null 2>&1; then + kubeval --strict "${RENDER_PATH}" +else + echo "⚠️ kubeval not installed — skipping schema validation." +fi + +echo "======================================" +echo "🚀 Deploying Sentrius (Two-Stage Ingress)" +echo "======================================" + +echo "📦 Deploying Sentrius main chart to namespace ${TENANT}..." +helm upgrade --install sentrius ./sentrius-chart --namespace ${TENANT} \ + --set adminer.enabled=${DEPLOY_ADMINER} \ + --set tenant=${TENANT} \ + --set environment=${ENVIRONMENT} \ + --set ingress.class="azure/application-gateway" \ + --set subdomain="${SUBDOMAIN}" \ + --set metrics.enabled=true \ + --set healthCheck.backendConfig.enabled=false \ + --set config.storageClassName="${STORAGE_CLASS_NAME}" \ + --set agentproxySubdomain="${APROXY_SUBDOMAIN}" \ + --set rdpproxySubdomain="${RDPPROXY_SUBDOMAIN}" \ + --set keycloakSubdomain="${KEYCLOAK_SUBDOMAIN}" \ + --set keycloakHostname="${KEYCLOAK_HOSTNAME}" \ + --set keycloakDomain="${KEYCLOAK_DOMAIN}" \ + --set keycloakInternalDomain="${KEYCLOAK_INTERNAL_DOMAIN}" \ + --set sentriusDomain="${SENTRIUS_DOMAIN}" \ + --set secrets.db.password="${DB_PASSWORD}" \ + --set secrets.db.keystorePassword="${KEYSTORE_PASSWORD}" \ + --set agentproxyDomain="${APROXY_DOMAIN}" \ + --set rdpproxyDomain="${RDPPROXY_DOMAIN}" \ + --set certificates.enabled=${CERTIFICATES_ENABLED} \ + --set ingress.tlsEnabled=${INGRESS_TLS_ENABLED} \ + --set launcherFQDN=sentrius-agents-launcherservice.${TENANT}-agents.svc.cluster.local \ + --set integrationproxy.image.repository="${AZURE_REGISTRY}/sentrius-integration-proxy" \ + --set integrationproxy.image.pullPolicy="IfNotPresent" \ + --set integrationproxy.image.tag=${LLMPROXY_VERSION} \ + --set agentproxy.image.repository="${AZURE_REGISTRY}/sentrius-agent-proxy" \ + --set agentproxy.image.pullPolicy="IfNotPresent" \ + --set agentproxy.image.tag=${AGENTPROXY_VERSION} \ + --set sentrius.image.repository="${AZURE_REGISTRY}/sentrius" \ + --set sentrius.image.pullPolicy="IfNotPresent" \ + --set sentrius.image.tag=${SENTRIUS_VERSION} \ + --set keycloak.db.password="${KEYCLOAK_DB_PASSWORD}" \ + --set secrets.db.username="postgres" \ + --set keycloak.adminPassword="${KEYCLOAK_ADMIN_PASSWORD}" \ + --set keycloak.clientSecret="${KEYCLOAK_CLIENT_SECRET}" \ + --set keycloak.realm.clients.sentriusApi.client_secret="${SENTRIUS_API_CLIENT_SECRET}" \ + --set keycloak.realm.clients.sentriusLauncher.client_secret="${SENTRIUS_LAUNCHER_CLIENT_SECRET}" \ + --set keycloak.realm.clients.javaAgents.client_secret="${JAVA_AGENTS_CLIENT_SECRET}" \ + --set keycloak.realm.clients.aiAgentAssessor.client_secret="${MONITORING_AGENT_CLIENT_SECRET}" \ + --set keycloak.realm.clients.sshagent.client_secret="${SSH_AGENT_CLIENT_SECRET}" \ + --set keycloak.realm.clients.agentProxy.client_secret="${SENTRIUS_APROXY_CLIENT_SECRET}" \ + --set keycloak.realm.clients.promptAdvisor.client_secret="${PROMPT_ADVISOR_CLIENT_SECRET}" \ + --set keycloak.image.repository="${AZURE_REGISTRY}/sentrius-keycloak" \ + --set keycloak.image.pullPolicy="IfNotPresent" \ + --set keycloak.image.tag=${SENTRIUS_KEYCLOAK_VERSION} \ + --set ssh.image.repository="${AZURE_REGISTRY}/sentrius-ssh" \ + --set ssh.image.pullPolicy="IfNotPresent" \ + --set ssh.image.tag=${SENTRIUS_SSH_VERSION} \ + --set sentriusaiagent.image.repository="${AZURE_REGISTRY}/sentrius-ai-agent" \ + --set sentriusaiagent.image.pullPolicy="IfNotPresent" \ + --set sentriusaiagent.image.tag=${SENTRIUS_AI_AGENT_VERSION} \ + --set launcherservice.image.repository="${AZURE_REGISTRY}/sentrius-launcher-service" \ + --set launcherservice.image.pullPolicy="IfNotPresent" \ + --set launcherservice.image.tag=${LAUNCHER_VERSION} \ + --set sshproxy.image.repository="${AZURE_REGISTRY}/sentrius-ssh-proxy" \ + --set sshproxy.image.pullPolicy="IfNotPresent" \ + --set sshproxy.image.tag=${SSHPROXY_VERSION} \ + --set monitoringagent.image.tag=${MONITORING_AGENT_VERSION} \ + --set monitoringagent.image.repository="${AZURE_REGISTRY}/sentrius-monitoring-agent" \ + --set monitoringagent.image.pullPolicy="IfNotPresent" \ + --set sshagent.image.tag=${SSH_AGENT_VERSION} \ + --set sshagent.image.repository="${AZURE_REGISTRY}/sentrius-ssh-agent" \ + --set rdpproxy.image.repository="${AZURE_REGISTRY}/sentrius-rdp-proxy" \ + --set rdpproxy.image.pullPolicy="IfNotPresent" \ + --set rdpproxy.image.tag=${RDPPROXY_VERSION} \ + --set rdpTest.enabled=${ENABLE_RDP_CONTAINER} \ + --set neo4j.env.NEO4J_server_config_strict__validation__enabled="\"false\"" \ + --set sentriusagent.image.repository="${AZURE_REGISTRY}/sentrius-agent" \ + --set sentriusagent.image.pullPolicy="IfNotPresent" \ + --set sentriusagent.image.tag=${SENTRIUS_AGENT_VERSION} || { echo "Failed to deploy Sentrius with Helm"; exit 1; } + +echo "" +echo "======================================" +echo "⏳ STAGE 1: Waiting for Keycloak Ingress" +echo "======================================" + +# Wait for Keycloak ingress to get an IP +KEYCLOAK_INGRESS_TIMEOUT=600 +ELAPSED=0 +KEYCLOAK_INGRESS_IP="" + +echo "Waiting for Keycloak ingress IP (timeout: ${KEYCLOAK_INGRESS_TIMEOUT}s)..." +while [ $ELAPSED -lt $KEYCLOAK_INGRESS_TIMEOUT ]; do + KEYCLOAK_INGRESS_IP=$(kubectl get ingress "keycloak-ingress-${TENANT}" -n ${TENANT} -o jsonpath='{.status.loadBalancer.ingress[0].ip}' 2>/dev/null || echo "") + + if [[ -n "$KEYCLOAK_INGRESS_IP" ]]; then + echo "✅ Keycloak ingress has IP: $KEYCLOAK_INGRESS_IP" + break + fi + + if [ $((ELAPSED % 30)) -eq 0 ]; then + echo " Still waiting for Keycloak ingress IP... ($ELAPSED seconds elapsed)" + fi + sleep 10 + ELAPSED=$((ELAPSED + 10)) +done + +if [[ -z "$KEYCLOAK_INGRESS_IP" ]]; then + echo "❌ ERROR: Keycloak ingress did not get an IP within ${KEYCLOAK_INGRESS_TIMEOUT} seconds" + echo "" + echo "Checking ingress status:" + kubectl describe ingress "keycloak-ingress-${TENANT}" -n ${TENANT} + exit 1 +fi + +# Create/Update DNS for Keycloak immediately +echo "" +echo "🌐 Configuring DNS for Keycloak..." +if az network dns record-set a show --resource-group ${RESOURCE_GROUP} --zone-name ${DNS_ZONE} --name keycloak.${TENANT} 2>/dev/null | grep -q "keycloak.${TENANT}"; then + echo " Updating existing DNS record for ${KEYCLOAK_SUBDOMAIN}..." + az network dns record-set a remove-record --resource-group ${RESOURCE_GROUP} --zone-name ${DNS_ZONE} --record-set-name keycloak.${TENANT} --ipv4-address $(az network dns record-set a show --resource-group ${RESOURCE_GROUP} --zone-name ${DNS_ZONE} --name keycloak.${TENANT} --query 'aRecords[0].ipv4Address' -o tsv) 2>/dev/null || true +fi + +az network dns record-set a add-record \ + --resource-group ${RESOURCE_GROUP} \ + --zone-name ${DNS_ZONE} \ + --record-set-name keycloak.${TENANT} \ + --ipv4-address $KEYCLOAK_INGRESS_IP || { + echo "⚠️ Failed to create DNS record, it may already exist" +} + +# Wait for Keycloak pod to be ready +echo "" +echo "⏳ Waiting for Keycloak pod to be ready..." +kubectl wait --for=condition=ready pod \ + -l "app.kubernetes.io/name=keycloak" \ + -n ${TENANT} \ + --timeout=10m || { + echo "⚠️ Keycloak pod not ready yet, but continuing..." +} + +# Wait for Keycloak to respond +echo "" +echo "⏳ Waiting for Keycloak to be healthy..." +echo " Checking: https://${KEYCLOAK_SUBDOMAIN}/" +KEYCLOAK_HEALTH_TIMEOUT=300 +ELAPSED=0 + +while [ $ELAPSED -lt $KEYCLOAK_HEALTH_TIMEOUT ]; do + # Try HTTPS (with DNS), then HTTP with IP + if curl -sf -k --connect-timeout 5 "https://${KEYCLOAK_SUBDOMAIN}/" >/dev/null 2>&1; then + echo "✅ Keycloak is healthy via HTTPS" + break + elif curl -sf --connect-timeout 5 "http://${KEYCLOAK_INGRESS_IP}/" >/dev/null 2>&1; then + echo "✅ Keycloak is responding (certificate may still be provisioning)" + break + fi + + if [ $((ELAPSED % 30)) -eq 0 ]; then + echo " Waiting for Keycloak to respond... ($ELAPSED seconds elapsed)" + fi + sleep 10 + ELAPSED=$((ELAPSED + 10)) +done + +if [ $ELAPSED -ge $KEYCLOAK_HEALTH_TIMEOUT ]; then + echo "⚠️ WARNING: Keycloak did not respond within ${KEYCLOAK_HEALTH_TIMEOUT} seconds" + echo " Continuing anyway - apps will retry connection..." +fi + +echo "" +echo "======================================" +echo "⏳ STAGE 2: Waiting for Apps Ingress" +echo "======================================" + +# Wait for apps ingress to get an IP +APPS_INGRESS_TIMEOUT=600 +ELAPSED=0 +APPS_INGRESS_IP="" + +echo "Waiting for apps ingress IP (timeout: ${APPS_INGRESS_TIMEOUT}s)..." +while [ $ELAPSED -lt $APPS_INGRESS_TIMEOUT ]; do + APPS_INGRESS_IP=$(kubectl get ingress "apps-ingress-${TENANT}" -n ${TENANT} -o jsonpath='{.status.loadBalancer.ingress[0].ip}' 2>/dev/null || echo "") + + if [[ -n "$APPS_INGRESS_IP" ]]; then + echo "✅ Apps ingress has IP: $APPS_INGRESS_IP" + break + fi + + if [ $((ELAPSED % 30)) -eq 0 ]; then + echo " Still waiting for apps ingress IP... ($ELAPSED seconds elapsed)" + fi + sleep 10 + ELAPSED=$((ELAPSED + 10)) +done + +if [[ -z "$APPS_INGRESS_IP" ]]; then + echo "⚠️ WARNING: Apps ingress did not get an IP within ${APPS_INGRESS_TIMEOUT} seconds" + echo " Application pods may still be starting up..." +else + # Configure DNS for apps + echo "" + echo "🌐 Configuring DNS for application services..." + + # Check and create/update DNS records + for SUBDOMAIN_NAME in "${SUBDOMAIN}" "${APROXY_SUBDOMAIN}" "${RDPPROXY_SUBDOMAIN}"; do + RECORD_NAME=$(echo ${SUBDOMAIN_NAME} | sed "s/\.${DNS_ZONE}//") + if az network dns record-set a show --resource-group ${RESOURCE_GROUP} --zone-name ${DNS_ZONE} --name ${RECORD_NAME} 2>/dev/null | grep -q "${RECORD_NAME}"; then + echo " Updating ${SUBDOMAIN_NAME}..." + az network dns record-set a remove-record --resource-group ${RESOURCE_GROUP} --zone-name ${DNS_ZONE} --record-set-name ${RECORD_NAME} --ipv4-address $(az network dns record-set a show --resource-group ${RESOURCE_GROUP} --zone-name ${DNS_ZONE} --name ${RECORD_NAME} --query 'aRecords[0].ipv4Address' -o tsv) 2>/dev/null || true + fi + + az network dns record-set a add-record \ + --resource-group ${RESOURCE_GROUP} \ + --zone-name ${DNS_ZONE} \ + --record-set-name ${RECORD_NAME} \ + --ipv4-address $APPS_INGRESS_IP || { + echo "⚠️ Failed to create DNS record for ${SUBDOMAIN_NAME}" + } + done +fi + +# Deploy launcher service +echo "" +echo "======================================" +echo "📦 Deploying Launcher Service" +echo "======================================" + +echo "Deploying Sentrius launcher chart to namespace ${TENANT}-agents..." +helm upgrade --install sentrius-agents ./sentrius-chart-launcher --namespace ${TENANT}-agents \ + --set tenant=${TENANT}-agents \ + --set baseRelease=sentrius \ + --set sentriusNamespace=${TENANT} \ + --set ingress.class="azure/application-gateway" \ + --set healthCheck.backendConfig.enabled=false \ + --set keycloakFQDN=sentrius-keycloak.${TENANT}.svc.cluster.local \ + --set sentriusFQDN=sentrius-sentrius.${TENANT}.svc.cluster.local \ + --set integrationproxyFQDN=sentrius-integrationproxy.${TENANT}.svc.cluster.local \ + --set agentproxyFQDN=sentrius-agentproxy.${TENANT}.svc.cluster.local \ + --set subdomain="${SUBDOMAIN}" \ + --set metrics.enabled=true \ + --set agentproxySubdomain="${APROXY_SUBDOMAIN}" \ + --set agentproxyDomain="${APROXY_DOMAIN}" \ + --set keycloakSubdomain="${KEYCLOAK_SUBDOMAIN}" \ + --set keycloakHostname="${KEYCLOAK_HOSTNAME}" \ + --set keycloakDomain="${KEYCLOAK_DOMAIN}" \ + --set keycloakInternalDomain="${KEYCLOAK_INTERNAL_DOMAIN}" \ + --set sentriusDomain="${SENTRIUS_DOMAIN}" \ + --set integrationproxy.image.repository="${AZURE_REGISTRY}/sentrius-integration-proxy" \ + --set integrationproxy.image.pullPolicy="IfNotPresent" \ + --set integrationproxy.image.tag=${LLMPROXY_VERSION} \ + --set secrets.db.password="${DB_PASSWORD}" \ + --set secrets.db.keystorePassword="${KEYSTORE_PASSWORD}" \ + --set launcherservice.oauth2.client_secret="${SENTRIUS_LAUNCHER_CLIENT_SECRET}" \ + --set sentrius.image.repository="${AZURE_REGISTRY}/sentrius" \ + --set sentrius.image.pullPolicy="IfNotPresent" \ + --set sentrius.image.tag=${SENTRIUS_VERSION} \ + --set keycloak.image.repository="${AZURE_REGISTRY}/sentrius-keycloak" \ + --set keycloak.image.pullPolicy="IfNotPresent" \ + --set keycloak.image.tag=${SENTRIUS_KEYCLOAK_VERSION} \ + --set ssh.image.repository="${AZURE_REGISTRY}/sentrius-ssh" \ + --set ssh.image.pullPolicy="IfNotPresent" \ + --set ssh.image.tag=${SENTRIUS_SSH_VERSION} \ + --set sentriusaiagent.image.repository="${AZURE_REGISTRY}/sentrius-ai-agent" \ + --set sentriusaiagent.image.pullPolicy="IfNotPresent" \ + --set sentriusaiagent.image.tag=${SENTRIUS_AI_AGENT_VERSION} \ + --set launcherservice.image.repository="${AZURE_REGISTRY}/sentrius-launcher-service" \ + --set launcherservice.image.pullPolicy="IfNotPresent" \ + --set launcherservice.image.tag=${LAUNCHER_VERSION} \ + --set neo4j.env.NEO4J_server_config_strict__validation__enabled="\"false\"" \ + --set sentriusagent.image.repository="${AZURE_REGISTRY}/sentrius-agent" \ + --set sentriusagent.image.pullPolicy="IfNotPresent" \ + --set sentriusagent.image.tag=${SENTRIUS_AGENT_VERSION} || { echo "Failed to deploy Sentrius launcher with Helm"; exit 1; } + +# Wait for application pods +echo "" +echo "⏳ Waiting for application pods to be ready..." +kubectl wait --for=condition=ready pod \ + -l "app.kubernetes.io/instance=sentrius" \ + -n ${TENANT} \ + --timeout=10m 2>&1 | grep -v "error: no matching resources found" || true + +echo "" +echo "======================================" +echo "✅ Deployment Complete!" +echo "======================================" +echo "" +echo "Keycloak Ingress IP: ${KEYCLOAK_INGRESS_IP}" +echo "Apps Ingress IP: ${APPS_INGRESS_IP:-}" +echo "" +echo "Services:" +echo " Keycloak: ${KEYCLOAK_DOMAIN}" +echo " Sentrius: ${SENTRIUS_DOMAIN}" +echo " Agent Proxy: ${APROXY_DOMAIN}" +echo " RDP Proxy: ${RDPPROXY_DOMAIN}" +echo "" +echo "Check status with:" +echo " kubectl get ingress -n ${TENANT}" +echo " kubectl get pods -n ${TENANT}" diff --git a/ops-scripts/azure/destroy-tenant.sh b/ops-scripts/azure/destroy-tenant.sh new file mode 100755 index 00000000..14d975db --- /dev/null +++ b/ops-scripts/azure/destroy-tenant.sh @@ -0,0 +1,62 @@ +#!/bin/bash +SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) + +source ${SCRIPT_DIR}/base.sh + +TENANT=$1 + +if [[ -z "$TENANT" ]]; then + echo "Usage: $0 " 1>&2 + exit 1 +fi + +echo "======================================" +echo "🗑️ Destroying Tenant: ${TENANT}" +echo "======================================" + +# Uninstall Helm releases +echo "📦 Uninstalling Helm releases..." +helm uninstall sentrius -n ${TENANT} 2>/dev/null || echo " sentrius release not found" +helm uninstall sentrius-agents -n ${TENANT}-agents 2>/dev/null || echo " sentrius-agents release not found" + +# Delete ingresses to release load balancers +echo "🌐 Deleting ingresses..." +kubectl delete ingress --all -n ${TENANT} 2>/dev/null || true + +# Wait for cleanup +echo "⏳ Waiting for resources to be cleaned up..." +sleep 10 + +# Remove DNS records +echo "🌐 Removing DNS records..." +${SCRIPT_DIR}/remove-subdomain.sh ${TENANT} + +# Delete namespaces +echo "📦 Deleting namespaces..." +kubectl delete namespace ${TENANT} --timeout=60s 2>/dev/null || true +kubectl delete namespace ${TENANT}-agents --timeout=60s 2>/dev/null || true + +# If namespaces are stuck +echo "🔍 Checking for stuck namespaces..." +if kubectl get namespace ${TENANT} >/dev/null 2>&1; then + echo " Removing finalizers from ${TENANT}..." + kubectl get namespace ${TENANT} -o json | \ + jq '.spec.finalizers = []' | \ + kubectl replace --raw /api/v1/namespaces/${TENANT}/finalize -f - +fi + +if kubectl get namespace ${TENANT}-agents >/dev/null 2>&1; then + echo " Removing finalizers from ${TENANT}-agents..." + kubectl get namespace ${TENANT}-agents -o json | \ + jq '.spec.finalizers = []' | \ + kubectl replace --raw /api/v1/namespaces/${TENANT}-agents/finalize -f - +fi + +echo "" +echo "======================================" +echo "✅ Tenant Destroyed!" +echo "======================================" +echo "" +echo "Verify cleanup:" +echo " kubectl get namespaces | grep ${TENANT}" +echo " az network dns record-set a list --resource-group ${RESOURCE_GROUP} --zone-name ${DNS_ZONE}" diff --git a/ops-scripts/azure/remove-subdomain.sh b/ops-scripts/azure/remove-subdomain.sh new file mode 100755 index 00000000..106cce8d --- /dev/null +++ b/ops-scripts/azure/remove-subdomain.sh @@ -0,0 +1,43 @@ +#!/bin/bash +SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) + +source ${SCRIPT_DIR}/base.sh + +TENANT=$1 + +if [[ -z "$TENANT" ]]; then + echo "Usage: $0 " 1>&2 + exit 1 +fi + +echo "Removing DNS records for tenant ${TENANT}..." + +# Remove main tenant domain +az network dns record-set a delete \ + --resource-group ${RESOURCE_GROUP} \ + --zone-name ${DNS_ZONE} \ + --name ${TENANT} \ + --yes 2>/dev/null || echo " ${TENANT}.${DNS_ZONE} not found" + +# Remove Keycloak subdomain +az network dns record-set a delete \ + --resource-group ${RESOURCE_GROUP} \ + --zone-name ${DNS_ZONE} \ + --name keycloak.${TENANT} \ + --yes 2>/dev/null || echo " keycloak.${TENANT}.${DNS_ZONE} not found" + +# Remove Agent Proxy subdomain +az network dns record-set a delete \ + --resource-group ${RESOURCE_GROUP} \ + --zone-name ${DNS_ZONE} \ + --name agentproxy.${TENANT} \ + --yes 2>/dev/null || echo " agentproxy.${TENANT}.${DNS_ZONE} not found" + +# Remove RDP Proxy subdomain +az network dns record-set a delete \ + --resource-group ${RESOURCE_GROUP} \ + --zone-name ${DNS_ZONE} \ + --name rdpproxy.${TENANT} \ + --yes 2>/dev/null || echo " rdpproxy.${TENANT}.${DNS_ZONE} not found" + +echo "✅ DNS records removed successfully!" diff --git a/ops-scripts/azure/restart.sh b/ops-scripts/azure/restart.sh new file mode 100755 index 00000000..5f466f91 --- /dev/null +++ b/ops-scripts/azure/restart.sh @@ -0,0 +1,47 @@ +#!/bin/bash + +SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) + +source ${SCRIPT_DIR}/base.sh +source ${SCRIPT_DIR}/../../.azure.env + +# Azure Container Registry +AZURE_REGISTRY="${AZURE_REGISTRY:-sentriusacr.azurecr.io}" + +TENANT="${1:-${NAMESPACE}}" + +if [[ -z "$TENANT" ]]; then + echo "Usage: $0 " 1>&2 + echo "Example: $0 production" + exit 1 +fi + +echo "Restarting all deployments in namespace ${TENANT}..." +kubectl scale deployment --all --replicas=1 -n ${TENANT} + +echo "Upgrading Sentrius deployment with latest configuration..." +helm upgrade --install sentrius ./sentrius-chart --namespace ${TENANT} \ + --set tenant=${TENANT} \ + --set environment=aks \ + --set sentrius.image.repository=${AZURE_REGISTRY}/sentrius \ + --set sentrius.image.tag=${SENTRIUS_VERSION} \ + --set ssh.image.repository=${AZURE_REGISTRY}/sentrius-ssh \ + --set ssh.image.tag=${SENTRIUS_SSH_VERSION} \ + --set keycloak.image.repository=${AZURE_REGISTRY}/sentrius-keycloak \ + --set keycloak.image.tag=${SENTRIUS_KEYCLOAK_VERSION} \ + --set sentriusagent.image.repository=${AZURE_REGISTRY}/sentrius-agent \ + --set sentriusagent.image.tag=${SENTRIUS_AGENT_VERSION} \ + --set sentriusaiagent.image.repository=${AZURE_REGISTRY}/sentrius-ai-agent \ + --set sentriusaiagent.image.tag=${SENTRIUS_AI_AGENT_VERSION} \ + --set integrationproxy.image.repository=${AZURE_REGISTRY}/sentrius-integration-proxy \ + --set integrationproxy.image.tag=${LLMPROXY_VERSION} \ + --set agentproxy.image.repository=${AZURE_REGISTRY}/sentrius-agent-proxy \ + --set agentproxy.image.tag=${AGENTPROXY_VERSION:-1.0.0} \ + --set launcherservice.image.repository=${AZURE_REGISTRY}/sentrius-launcher-service \ + --set launcherservice.image.tag=${LAUNCHER_VERSION} \ + --set sshproxy.image.repository=${AZURE_REGISTRY}/sentrius-ssh-proxy \ + --set sshproxy.image.tag=${SSHPROXY_VERSION:-1.0.0} \ + --set rdpproxy.image.repository=${AZURE_REGISTRY}/sentrius-rdp-proxy \ + --set rdpproxy.image.tag=${RDPPROXY_VERSION:-1.0.0} || { echo "Failed to deploy Sentrius with Helm"; exit 1; } + +echo "✅ Restart complete!" diff --git a/ops-scripts/azure/shutdown.sh b/ops-scripts/azure/shutdown.sh new file mode 100755 index 00000000..d8853391 --- /dev/null +++ b/ops-scripts/azure/shutdown.sh @@ -0,0 +1,87 @@ +#!/bin/bash + +SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) + +source ${SCRIPT_DIR}/base.sh + +TENANT="${1:-${NAMESPACE}}" + +while [[ $# -gt 0 ]]; do + case $1 in + --tenant) + TENANT="$2" + shift 2 + ;; + *) + echo "Unknown option: $1" + echo "Usage: $0 --tenant TENANT_NAME" + echo " --tenant: Specify tenant name (required)" + exit 1 + ;; + esac +done + +echo "======================================" +echo "🗑️ Tearing Down Sentrius Deployment" +echo "======================================" + +# Delete Helm releases +echo "📦 Uninstalling Helm releases..." +helm uninstall sentrius -n ${TENANT} 2>/dev/null || echo " sentrius release not found" +helm uninstall sentrius-agents -n ${TENANT}-agents 2>/dev/null || echo " sentrius-agents release not found" + +# Delete ManagedCertificates explicitly (sometimes they linger) +echo "🔐 Deleting managed certificates..." +kubectl delete certificate --all -n ${TENANT} 2>/dev/null || true + +# Delete Ingresses explicitly (to release load balancers) +echo "🌐 Deleting ingresses..." +kubectl delete ingress --all -n ${TENANT} 2>/dev/null || true + +# Wait for load balancers to be removed +echo "⏳ Waiting for load balancers to be cleaned up..." +sleep 10 + +# Delete DNS records +echo "🌐 Deleting DNS records..." +for SUBDOMAIN in "keycloak.${TENANT}" "${TENANT}" "agentproxy.${TENANT}" "rdpproxy.${TENANT}"; do + if az network dns record-set a show --resource-group ${RESOURCE_GROUP} --zone-name ${DNS_ZONE} --name ${SUBDOMAIN} 2>/dev/null | grep -q ${SUBDOMAIN}; then + echo " Deleting ${SUBDOMAIN}.${DNS_ZONE}..." + az network dns record-set a delete \ + --resource-group ${RESOURCE_GROUP} \ + --zone-name ${DNS_ZONE} \ + --name ${SUBDOMAIN} \ + --yes 2>/dev/null || echo " Failed to delete ${SUBDOMAIN}" + fi +done + +# Delete namespaces (this removes all remaining resources) +echo "📦 Deleting namespaces..." +kubectl delete namespace ${TENANT} --timeout=60s 2>/dev/null || echo " Forcing namespace deletion..." +kubectl delete namespace ${TENANT}-agents --timeout=60s 2>/dev/null || echo " Forcing namespace deletion..." + +# If namespaces are stuck (sometimes happens with finalizers) +echo "🔍 Checking for stuck namespaces..." +if kubectl get namespace ${TENANT} >/dev/null 2>&1; then + echo " Namespace ${TENANT} is stuck, removing finalizers..." + kubectl get namespace ${TENANT} -o json | \ + jq '.spec.finalizers = []' | \ + kubectl replace --raw /api/v1/namespaces/${TENANT}/finalize -f - +fi + +if kubectl get namespace ${TENANT}-agents >/dev/null 2>&1; then + echo " Namespace ${TENANT}-agents is stuck, removing finalizers..." + kubectl get namespace ${TENANT}-agents -o json | \ + jq '.spec.finalizers = []' | \ + kubectl replace --raw /api/v1/namespaces/${TENANT}-agents/finalize -f - +fi + +echo "" +echo "======================================" +echo "✅ Teardown Complete!" +echo "======================================" +echo "" +echo "Verify cleanup with:" +echo " kubectl get namespaces | grep ${TENANT}" +echo " az network public-ip list --resource-group ${RESOURCE_GROUP}" +echo " az network dns record-set a list --resource-group ${RESOURCE_GROUP} --zone-name ${DNS_ZONE}" diff --git a/ops-scripts/azure/spindown.sh b/ops-scripts/azure/spindown.sh new file mode 100755 index 00000000..3f38711a --- /dev/null +++ b/ops-scripts/azure/spindown.sh @@ -0,0 +1,50 @@ +#!/bin/bash + +SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) + +source ${SCRIPT_DIR}/base.sh + +TENANT="${1:-${NAMESPACE}}" + +while [[ $# -gt 0 ]]; do + case $1 in + --tenant) + TENANT="$2" + shift 2 + ;; + *) + echo "Unknown option: $1" + echo "Usage: $0 --tenant TENANT_NAME" + echo " --tenant: Specify tenant name (required)" + exit 1 + ;; + esac +done + +echo "======================================" +echo "💤 Scaling Down Sentrius Deployment" +echo "======================================" + +# This keeps: +# ✅ Configurations, secrets, ingresses +# ✅ Load balancers and IPs (so DNS stays valid) +# ✅ Certificates (already provisioned) +# ❌ Stops: All pods/containers (reduces costs) + +# To restart: +# kubectl scale deployment --all --replicas=1 -n ${TENANT} +# kubectl scale deployment --all --replicas=1 -n ${TENANT}-agents +# kubectl scale statefulset --all --replicas=1 -n ${TENANT} + +# Scale down all deployments to 0 replicas +kubectl scale deployment --all --replicas=0 -n ${TENANT} +kubectl scale deployment --all --replicas=0 -n ${TENANT}-agents +kubectl scale statefulset --all --replicas=0 -n ${TENANT} + +echo "" +echo "✅ Spindown complete!" +echo "" +echo "To restart:" +echo " kubectl scale deployment --all --replicas=1 -n ${TENANT}" +echo " kubectl scale deployment --all --replicas=1 -n ${TENANT}-agents" +echo " kubectl scale statefulset --all --replicas=1 -n ${TENANT}" diff --git a/ops-scripts/azure/spinup.sh b/ops-scripts/azure/spinup.sh new file mode 100755 index 00000000..a4c175e1 --- /dev/null +++ b/ops-scripts/azure/spinup.sh @@ -0,0 +1,44 @@ +#!/bin/bash + +SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) + +source ${SCRIPT_DIR}/base.sh + +TENANT="${1:-${NAMESPACE}}" + +while [[ $# -gt 0 ]]; do + case $1 in + --tenant) + TENANT="$2" + shift 2 + ;; + --no-tls) + CERTIFICATES_ENABLED="false" + INGRESS_TLS_ENABLED="false" + shift + ;; + *) + echo "Unknown option: $1" + echo "Usage: $0 --tenant TENANT_NAME [--no-tls]" + echo " --tenant: Specify tenant name (required)" + echo " --no-tls: Disable TLS/SSL (not recommended for production)" + exit 1 + ;; + esac +done + +echo "======================================" +echo "⚡ Starting Up Sentrius Deployment" +echo "======================================" + +# Scale up all deployments to 1 replica +kubectl scale deployment --all --replicas=1 -n ${TENANT} +kubectl scale deployment --all --replicas=1 -n ${TENANT}-agents +kubectl scale statefulset --all --replicas=1 -n ${TENANT} + +echo "" +echo "✅ Startup complete!" +echo "" +echo "Check status with:" +echo " kubectl get pods -n ${TENANT}" +echo " kubectl get pods -n ${TENANT}-agents" diff --git a/ops-scripts/azure/test-helm.sh b/ops-scripts/azure/test-helm.sh new file mode 100755 index 00000000..a88b7f60 --- /dev/null +++ b/ops-scripts/azure/test-helm.sh @@ -0,0 +1,117 @@ +#!/bin/bash + +SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) + +source ${SCRIPT_DIR}/base.sh +source ${SCRIPT_DIR}/../../.azure.env + +TENANT=${1:-test-tenant} +DOMAIN_NAME="trustpolicy.ai" # Default domain for Azure + +echo "======================================" +echo "🧪 Testing Helm Chart Rendering" +echo "======================================" +echo "Tenant: ${TENANT}" +echo "Domain: ${DOMAIN_NAME}" +echo "" + +# Azure Container Registry +AZURE_REGISTRY="${AZURE_REGISTRY:-sentriusacr.azurecr.io}" + +# Test render sentrius-chart +echo "📦 Testing sentrius-chart..." +helm template sentrius ./sentrius-chart \ + --namespace ${TENANT} \ + --set tenant=${TENANT} \ + --set environment=aks \ + --set ingress.class="azure/application-gateway" \ + --set subdomain="${TENANT}.${DOMAIN_NAME}" \ + --set agentproxySubdomain="agentproxy.${TENANT}.${DOMAIN_NAME}" \ + --set keycloakSubdomain="keycloak.${TENANT}.${DOMAIN_NAME}" \ + --set rdpproxySubdomain="rdpproxy.${TENANT}.${DOMAIN_NAME}" \ + --set keycloakHostname="keycloak.${TENANT}.${DOMAIN_NAME}" \ + --set keycloakDomain="https://keycloak.${TENANT}.${DOMAIN_NAME}" \ + --set keycloakInternalDomain="https://keycloak.${TENANT}.${DOMAIN_NAME}" \ + --set sentriusDomain="https://${TENANT}.${DOMAIN_NAME}" \ + --set agentproxyDomain="https://agentproxy.${TENANT}.${DOMAIN_NAME}" \ + --set rdpproxyDomain="https://rdpproxy.${TENANT}.${DOMAIN_NAME}" \ + --set certificates.enabled=true \ + --set ingress.tlsEnabled=true \ + --set sentrius.image.repository="${AZURE_REGISTRY}/sentrius" \ + --set sentrius.image.tag=${SENTRIUS_VERSION} \ + --set keycloak.image.repository="${AZURE_REGISTRY}/sentrius-keycloak" \ + --set keycloak.image.tag=${SENTRIUS_KEYCLOAK_VERSION} \ + --set ssh.image.repository="${AZURE_REGISTRY}/sentrius-ssh" \ + --set ssh.image.tag=${SENTRIUS_SSH_VERSION} \ + --set sentriusagent.image.repository="${AZURE_REGISTRY}/sentrius-agent" \ + --set sentriusagent.image.tag=${SENTRIUS_AGENT_VERSION} \ + --set sentriusaiagent.image.repository="${AZURE_REGISTRY}/sentrius-ai-agent" \ + --set sentriusaiagent.image.tag=${SENTRIUS_AI_AGENT_VERSION} \ + --set integrationproxy.image.repository="${AZURE_REGISTRY}/sentrius-integration-proxy" \ + --set integrationproxy.image.tag=${LLMPROXY_VERSION} \ + --set agentproxy.image.repository="${AZURE_REGISTRY}/sentrius-agent-proxy" \ + --set agentproxy.image.tag=${AGENTPROXY_VERSION} \ + --set launcherservice.image.repository="${AZURE_REGISTRY}/sentrius-launcher-service" \ + --set launcherservice.image.tag=${LAUNCHER_VERSION} \ + --set sshproxy.image.repository="${AZURE_REGISTRY}/sentrius-ssh-proxy" \ + --set sshproxy.image.tag=${SSHPROXY_VERSION} \ + --set rdpproxy.image.repository="${AZURE_REGISTRY}/sentrius-rdp-proxy" \ + --set rdpproxy.image.tag=${RDPPROXY_VERSION} \ + > /tmp/sentrius-chart-test.yaml + +if [[ $? -eq 0 ]]; then + echo "✅ sentrius-chart rendered successfully" + echo " Output saved to /tmp/sentrius-chart-test.yaml" +else + echo "❌ sentrius-chart rendering failed" + exit 1 +fi + +echo "" +echo "📦 Testing sentrius-chart-launcher..." +helm template sentrius-agents ./sentrius-chart-launcher \ + --namespace ${TENANT}-agents \ + --set tenant=${TENANT}-agents \ + --set baseRelease=sentrius \ + --set sentriusNamespace=${TENANT} \ + --set ingress.class="azure/application-gateway" \ + --set keycloakFQDN=sentrius-keycloak.${TENANT}.svc.cluster.local \ + --set sentriusFQDN=sentrius-sentrius.${TENANT}.svc.cluster.local \ + --set integrationproxyFQDN=sentrius-integrationproxy.${TENANT}.svc.cluster.local \ + --set agentproxyFQDN=sentrius-agentproxy.${TENANT}.svc.cluster.local \ + --set subdomain="${TENANT}.${DOMAIN_NAME}" \ + --set agentproxySubdomain="agentproxy.${TENANT}.${DOMAIN_NAME}" \ + --set keycloakSubdomain="keycloak.${TENANT}.${DOMAIN_NAME}" \ + --set keycloakHostname="keycloak.${TENANT}.${DOMAIN_NAME}" \ + --set keycloakDomain="https://keycloak.${TENANT}.${DOMAIN_NAME}" \ + --set keycloakInternalDomain="https://keycloak.${TENANT}.${DOMAIN_NAME}" \ + --set sentriusDomain="https://${TENANT}.${DOMAIN_NAME}" \ + --set agentproxyDomain="https://agentproxy.${TENANT}.${DOMAIN_NAME}" \ + --set sentrius.image.repository="${AZURE_REGISTRY}/sentrius" \ + --set sentrius.image.tag=${SENTRIUS_VERSION} \ + --set keycloak.image.repository="${AZURE_REGISTRY}/sentrius-keycloak" \ + --set keycloak.image.tag=${SENTRIUS_KEYCLOAK_VERSION} \ + --set ssh.image.repository="${AZURE_REGISTRY}/sentrius-ssh" \ + --set ssh.image.tag=${SENTRIUS_SSH_VERSION} \ + --set sentriusagent.image.repository="${AZURE_REGISTRY}/sentrius-agent" \ + --set sentriusagent.image.tag=${SENTRIUS_AGENT_VERSION} \ + --set sentriusaiagent.image.repository="${AZURE_REGISTRY}/sentrius-ai-agent" \ + --set sentriusaiagent.image.tag=${SENTRIUS_AI_AGENT_VERSION} \ + --set integrationproxy.image.repository="${AZURE_REGISTRY}/sentrius-integration-proxy" \ + --set integrationproxy.image.tag=${LLMPROXY_VERSION} \ + --set launcherservice.image.repository="${AZURE_REGISTRY}/sentrius-launcher-service" \ + --set launcherservice.image.tag=${LAUNCHER_VERSION} \ + > /tmp/sentrius-chart-launcher-test.yaml + +if [[ $? -eq 0 ]]; then + echo "✅ sentrius-chart-launcher rendered successfully" + echo " Output saved to /tmp/sentrius-chart-launcher-test.yaml" +else + echo "❌ sentrius-chart-launcher rendering failed" + exit 1 +fi + +echo "" +echo "======================================" +echo "✅ All Tests Passed!" +echo "======================================" diff --git a/ops-scripts/base/build-images.sh b/ops-scripts/base/build-images.sh index 7744d178..91738a67 100755 --- a/ops-scripts/base/build-images.sh +++ b/ops-scripts/base/build-images.sh @@ -9,14 +9,14 @@ ENV_TARGET="local" # default mode NO_CACHE=false INCLUDE_DEV_CERTS=false -# --- Parse the environment target (local | gcp) --- -if [[ "$1" == "local" || "$1" == "gcp" ]]; then +# --- Parse the environment target (local | gcp | azure) --- +if [[ "$1" == "local" || "$1" == "gcp" || "$1" == "azure" ]]; then ENV_TARGET="$1" shift fi -# --- Load environment file only for GCP (versions needed for registry) --- -if [[ "$ENV_TARGET" == "gcp" ]]; then +# --- Load environment file for GCP or Azure (versions needed for registry) --- +if [[ "$ENV_TARGET" == "gcp" || "$ENV_TARGET" == "azure" ]]; then ENV_FILE=".$ENV_TARGET.env" source "$ENV_FILE" cp "$ENV_FILE" "$ENV_FILE.bak" @@ -116,11 +116,16 @@ build_image() { exit 1 fi - if [[ "$ENV_TARGET" == "gcp" ]]; then + if [[ "$ENV_TARGET" == "gcp" || "$ENV_TARGET" == "azure" ]]; then REGISTRY="us-central1-docker.pkg.dev/sentrius-project/sentrius-repo" docker tag "$name:$version" "$REGISTRY/$name:$version" docker push "$REGISTRY/$name:$version" echo "✅ Pushed $REGISTRY/$name:$version" + elif [[ "$ENV_TARGET" == "azure" ]]; then + REGISTRY="${AZURE_REGISTRY:-sentriusacr.azurecr.io}" + docker tag "$name:$version" "$REGISTRY/$name:$version" + docker push "$REGISTRY/$name:$version" + echo "✅ Pushed $REGISTRY/$name:$version" else echo "✅ Built locally: $name:$version" fi @@ -182,11 +187,16 @@ build_keycloak_image() { minikube image load "$name:$version" fi - if [[ "$ENV_TARGET" == "gcp" ]]; then + if [[ "$ENV_TARGET" == "gcp" || "$ENV_TARGET" == "azure" ]]; then REGISTRY="us-central1-docker.pkg.dev/sentrius-project/sentrius-repo" docker tag "$name:$version" "$REGISTRY/$name:$version" docker push "$REGISTRY/$name:$version" echo "✅ Pushed $REGISTRY/$name:$version" + elif [[ "$ENV_TARGET" == "azure" ]]; then + REGISTRY="${AZURE_REGISTRY:-sentriusacr.azurecr.io}" + docker tag "$name:$version" "$REGISTRY/$name:$version" + docker push "$REGISTRY/$name:$version" + echo "✅ Pushed $REGISTRY/$name:$version" else echo "✅ Built locally: $name:$version" fi @@ -233,16 +243,21 @@ while [[ "$#" -gt 0 ]]; do shift done -# --- Auth for GCP --- -if [[ "$ENV_TARGET" == "gcp" ]]; then +# --- Auth for GCP or Azure --- +if [[ "$ENV_TARGET" == "gcp" || "$ENV_TARGET" == "azure" ]]; then echo "Authenticating with Google Cloud..." gcloud auth configure-docker us-central1-docker.pkg.dev || exit 1 +elif [[ "$ENV_TARGET" == "azure" ]]; then + echo "Authenticating with Azure Container Registry..." + REGISTRY="${AZURE_REGISTRY:-sentriusacr.azurecr.io}" + REGISTRY_NAME=$(echo "$REGISTRY" | cut -d'.' -f1) + az acr login --name "$REGISTRY_NAME" || exit 1 fi # --- Build Steps --- if $update_sentrius; then cp api/target/sentrius-api-*.jar docker/sentrius/sentrius.jar - if [[ "$ENV_TARGET" == "gcp" ]]; then + if [[ "$ENV_TARGET" == "gcp" || "$ENV_TARGET" == "azure" ]]; then SENTRIUS_VERSION=$(increment_patch_version $SENTRIUS_VERSION) update_env_var "SENTRIUS_VERSION" "$SENTRIUS_VERSION" else @@ -253,7 +268,7 @@ if $update_sentrius; then fi if $update_sentrius_ssh; then - if [[ "$ENV_TARGET" == "gcp" ]]; then + if [[ "$ENV_TARGET" == "gcp" || "$ENV_TARGET" == "azure" ]]; then SENTRIUS_SSH_VERSION=$(increment_patch_version $SENTRIUS_SSH_VERSION) update_env_var "SENTRIUS_SSH_VERSION" "$SENTRIUS_SSH_VERSION" else @@ -263,7 +278,7 @@ if $update_sentrius_ssh; then fi if $update_sentrius_keycloak; then - if [[ "$ENV_TARGET" == "gcp" ]]; then + if [[ "$ENV_TARGET" == "gcp" || "$ENV_TARGET" == "azure" ]]; then SENTRIUS_KEYCLOAK_VERSION=$(increment_patch_version $SENTRIUS_KEYCLOAK_VERSION) update_env_var "SENTRIUS_KEYCLOAK_VERSION" "$SENTRIUS_KEYCLOAK_VERSION" else @@ -274,7 +289,7 @@ fi if $update_sentrius_agent; then cp analytics/target/analytics-*.jar docker/sentrius-agent/agent.jar - if [[ "$ENV_TARGET" == "gcp" ]]; then + if [[ "$ENV_TARGET" == "gcp" || "$ENV_TARGET" == "azure" ]]; then SENTRIUS_AGENT_VERSION=$(increment_patch_version $SENTRIUS_AGENT_VERSION) update_env_var "SENTRIUS_AGENT_VERSION" "$SENTRIUS_AGENT_VERSION" else @@ -286,7 +301,7 @@ fi if $update_sentrius_ai_agent; then cp enterprise-agent/target/enterprise-agent-*.jar docker/sentrius-ai-agent/agent.jar - if [[ "$ENV_TARGET" == "gcp" ]]; then + if [[ "$ENV_TARGET" == "gcp" || "$ENV_TARGET" == "azure" ]]; then SENTRIUS_AI_AGENT_VERSION=$(increment_patch_version $SENTRIUS_AI_AGENT_VERSION) update_env_var "SENTRIUS_AI_AGENT_VERSION" "$SENTRIUS_AI_AGENT_VERSION" else @@ -302,7 +317,7 @@ fi if $update_integrationproxy; then cp integration-proxy/target/sentrius-integration-proxy-*.jar docker/integrationproxy/llmproxy.jar - if [[ "$ENV_TARGET" == "gcp" ]]; then + if [[ "$ENV_TARGET" == "gcp" || "$ENV_TARGET" == "azure" ]]; then LLMPROXY_VERSION=$(increment_patch_version $LLMPROXY_VERSION) update_env_var "LLMPROXY_VERSION" "$LLMPROXY_VERSION" else @@ -314,7 +329,7 @@ fi if $update_launcher; then cp agent-launcher/target/agent-launcher-*.jar docker/sentrius-launcher-service/launcher.jar - if [[ "$ENV_TARGET" == "gcp" ]]; then + if [[ "$ENV_TARGET" == "gcp" || "$ENV_TARGET" == "azure" ]]; then LAUNCHER_VERSION=$(increment_patch_version $LAUNCHER_VERSION) update_env_var "LAUNCHER_VERSION" "$LAUNCHER_VERSION" else @@ -326,7 +341,7 @@ fi if $update_agent_proxy; then cp agent-proxy/target/sentrius-agent-proxy-*.jar docker/agent-proxy/agentproxy.jar - if [[ "$ENV_TARGET" == "gcp" ]]; then + if [[ "$ENV_TARGET" == "gcp" || "$ENV_TARGET" == "azure" ]]; then AGENTPROXY_VERSION=$(increment_patch_version $AGENTPROXY_VERSION) update_env_var "AGENTPROXY_VERSION" "$AGENTPROXY_VERSION" else @@ -338,7 +353,7 @@ fi if $update_ssh_proxy; then cp ssh-proxy/target/ssh-proxy-*.jar docker/ssh-proxy/sshproxy.jar - if [[ "$ENV_TARGET" == "gcp" ]]; then + if [[ "$ENV_TARGET" == "gcp" || "$ENV_TARGET" == "azure" ]]; then SSHPROXY_VERSION=$(increment_patch_version $SSHPROXY_VERSION) update_env_var "SSHPROXY_VERSION" "$SSHPROXY_VERSION" else @@ -350,7 +365,7 @@ fi if $update_rdp_proxy; then cp rdp-proxy/target/rdp-proxy-*.jar docker/rdp-proxy/rdpproxy.jar - if [[ "$ENV_TARGET" == "gcp" ]]; then + if [[ "$ENV_TARGET" == "gcp" || "$ENV_TARGET" == "azure" ]]; then RDPPROXY_VERSION=$(increment_patch_version $RDPPROXY_VERSION) update_env_var "RDPPROXY_VERSION" "$RDPPROXY_VERSION" else @@ -362,7 +377,7 @@ fi if $update_monitoring_agent; then cp monitoring/target/monitoring-*.jar docker/monitoring/monitoring.jar - if [[ "$ENV_TARGET" == "gcp" ]]; then + if [[ "$ENV_TARGET" == "gcp" || "$ENV_TARGET" == "azure" ]]; then $MONITORING_AGENT_VERSION=$(increment_patch_version $MONITORING_AGENT_VERSION) update_env_var "$MONITORING_AGENT_VERSION" "$MONITORING_AGENT_VERSION" else @@ -374,7 +389,7 @@ fi if $update_ssh_agent; then cp \/target/ssh-agent-*.jar docker/ssh-agent/ssh-agent.jar - if [[ "$ENV_TARGET" == "gcp" ]]; then + if [[ "$ENV_TARGET" == "gcp" || "$ENV_TARGET" == "azure" ]]; then $SSH_AGENT_VERSION=$(increment_patch_version $SSH_AGENT_VERSION) update_env_var "$SSH_AGENT_VERSION" "$SSH_AGENT_VERSION" else @@ -385,7 +400,7 @@ if $update_ssh_agent; then fi if $update_github_mcp; then - if [[ "$ENV_TARGET" == "gcp" ]]; then + if [[ "$ENV_TARGET" == "gcp" || "$ENV_TARGET" == "azure" ]]; then GITHUB_MCP_VERSION=$(increment_patch_version $GITHUB_MCP_VERSION) update_env_var "GITHUB_MCP_VERSION" "$GITHUB_MCP_VERSION" else @@ -395,7 +410,7 @@ if $update_github_mcp; then fi if $update_prompt_advisor; then - if [[ "$ENV_TARGET" == "gcp" ]]; then + if [[ "$ENV_TARGET" == "gcp" || "$ENV_TARGET" == "azure" ]]; then PROMPT_ADVISOR_VERSION=$(increment_patch_version $PROMPT_ADVISOR_VERSION) update_env_var "PROMPT_ADVISOR_VERSION" "$PROMPT_ADVISOR_VERSION" else