diff --git a/Cargo.toml b/Cargo.toml index 9f4fa63..73ca80e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -41,18 +41,21 @@ all-features = true rustdoc-args = ["--cfg", "docsrs"] [dependencies] -chrono = { version = "0.4", default-features = false, features = ["clock", "std"] } +chrono = { version = "0.4", default-features = false, features = ["clock", "std", "serde"] } thiserror = "1.0" hyperliquid_rust_sdk = "0.6.0" tokio = { version = "1.0", features = ["full"] } serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" +csv = "1.3" rs-backtester = "0.1.0" ethers = "2.0" tracing = "0.1" rand = { version = "0.8", default-features = false, features = ["std"] } +tempfile = "3.13" [dev-dependencies] -chrono = { version = "0.4", default-features = false, features = ["clock", "std"] } +chrono = { version = "0.4", default-features = false, features = ["clock", "std", "serde"] } anyhow = "1" rayon = "1.10" rand = "0.8" diff --git a/src/alpha/mod.rs b/src/alpha/mod.rs new file mode 100644 index 0000000..eed7ea1 --- /dev/null +++ b/src/alpha/mod.rs @@ -0,0 +1,437 @@ +//! Alpha research and evaluation pipeline. +//! +//! The module consumes feature series produced by [`crate::features`] and +//! evaluates their predictive power against a forward return target. It offers +//! pluggable alpha models, a convenience pipeline, and summary statistics that +//! can be rendered into reports or converted into trading signals. + +use std::fmt; + +use crate::data::HyperliquidData; +use crate::features::{FeatureSeries, FeatureSet}; +use crate::report::AlphaReport; + +/// Target time series used for alpha evaluation. +#[derive(Debug, Clone)] +pub struct AlphaTarget { + horizon: usize, + values: Vec, +} + +impl AlphaTarget { + /// Build a forward return target from close prices. + pub fn forward_returns(data: &HyperliquidData, horizon: usize) -> Self { + let horizon = horizon.max(1); + let mut values = Vec::new(); + if data.close.len() > horizon { + for idx in 0..data.close.len() - horizon { + let future = data.close[idx + horizon]; + let current = data.close[idx]; + if current != 0.0 { + values.push((future / current) - 1.0); + } else { + values.push(f64::NAN); + } + } + } + Self { horizon, values } + } + + /// Borrow the raw target values. + pub fn values(&self) -> &[f64] { + &self.values + } + + /// Forward horizon used to build the target. + pub fn horizon(&self) -> usize { + self.horizon + } + + /// Number of samples available in the target. + pub fn len(&self) -> usize { + self.values.len() + } + + /// Whether the target contains no samples. + pub fn is_empty(&self) -> bool { + self.values.is_empty() + } +} + +/// Statistics returned by alpha evaluation. +#[derive(Debug, Clone)] +pub struct AlphaEvaluation { + /// Name of the feature that was evaluated. + pub feature_name: String, + /// Name of the model used for the evaluation. + pub model_name: String, + /// Pearson correlation between feature and forward return. + pub ic: f64, + /// Sharpe ratio of the sign-based signal derived from the feature. + pub sharpe: f64, + /// Average sign-based return. + pub mean_return: f64, + /// Normalised feature values used to build signals. + pub scores: Vec, + /// Per-sample product of normalised feature and target (rolling IC proxy). + pub ic_series: Vec, + /// Number of observations used during the evaluation. + pub sample_size: usize, +} + +impl AlphaEvaluation { + /// Whether the evaluation produced at least one score. + pub fn has_observations(&self) -> bool { + self.sample_size > 0 + } +} + +/// Collection of evaluations produced by a pipeline run. +#[derive(Debug, Clone)] +pub struct AlphaEvaluationSet { + evaluations: Vec, +} + +impl AlphaEvaluationSet { + /// Create a new set from raw evaluations. + pub fn new(evaluations: Vec) -> Self { + Self { evaluations } + } + + /// Borrow the underlying evaluations. + pub fn iter(&self) -> impl Iterator { + self.evaluations.iter() + } + + /// Number of evaluations. + pub fn len(&self) -> usize { + self.evaluations.len() + } + + /// Whether the set is empty. + pub fn is_empty(&self) -> bool { + self.evaluations.is_empty() + } + + /// Filter evaluations by absolute IC threshold. + pub fn filter_by_ic(self, threshold: f64) -> Self { + let evaluations = self + .evaluations + .into_iter() + .filter(|eval| eval.ic.abs() >= threshold) + .collect(); + Self { evaluations } + } + + /// Filter evaluations by minimum Sharpe ratio. + pub fn filter_by_sharpe(self, threshold: f64) -> Self { + let evaluations = self + .evaluations + .into_iter() + .filter(|eval| eval.sharpe.is_finite() && eval.sharpe >= threshold) + .collect(); + Self { evaluations } + } + + /// Convert the evaluation set into a report. + pub fn to_report(&self) -> AlphaReport { + AlphaReport::from_evaluations(self.evaluations.clone()) + } + + /// Consume the set and return the inner vector. + pub fn into_vec(self) -> Vec { + self.evaluations + } +} + +/// Trait implemented by alpha scoring models. +pub trait AlphaModel: fmt::Debug + Send + Sync { + /// Name of the model. + fn name(&self) -> &'static str; + + /// Evaluate a single feature against the target. + fn evaluate(&self, feature: &FeatureSeries, target: &AlphaTarget) -> Option; +} + +/// Simple correlation-based alpha model. +#[derive(Debug, Default, Clone, Copy)] +pub struct CorrelationAlpha; + +impl CorrelationAlpha { + fn build_evaluation( + &self, + feature: &FeatureSeries, + samples: Vec<(f64, f64)>, + ) -> AlphaEvaluation { + let (feature_values, target_values): (Vec<_>, Vec<_>) = samples.into_iter().unzip(); + let sample_size = feature_values.len(); + + let feature_mean = mean(&feature_values); + let target_mean = mean(&target_values); + let feature_std = std_dev(&feature_values, feature_mean); + let target_std = std_dev(&target_values, target_mean); + + let ic = if feature_std > 0.0 && target_std > 0.0 { + covariance(&feature_values, feature_mean, &target_values, target_mean) + / (feature_std * target_std) + } else { + 0.0 + }; + + let scores = z_scores(&feature_values, feature_mean, feature_std); + let normalised_target = z_scores(&target_values, target_mean, target_std); + let ic_series = scores + .iter() + .zip(normalised_target.iter()) + .map(|(f, t)| f * t) + .collect::>(); + + let mut signal_returns = Vec::with_capacity(scores.len()); + for (score, target) in scores.iter().zip(target_values.iter()) { + let sign = if *score > 0.0 { + 1.0 + } else if *score < 0.0 { + -1.0 + } else { + 0.0 + }; + signal_returns.push(sign * target); + } + + let mean_return = if signal_returns.is_empty() { + 0.0 + } else { + mean(&signal_returns) + }; + let signal_std = std_dev(&signal_returns, mean_return); + let sharpe = if signal_std > 0.0 { + mean_return / signal_std + } else { + 0.0 + }; + + AlphaEvaluation { + feature_name: feature.name().to_string(), + model_name: self.name().to_string(), + ic, + sharpe, + mean_return, + scores, + ic_series, + sample_size, + } + } +} + +impl AlphaModel for CorrelationAlpha { + fn name(&self) -> &'static str { + "correlation" + } + + fn evaluate(&self, feature: &FeatureSeries, target: &AlphaTarget) -> Option { + if target.is_empty() || feature.is_empty() { + return None; + } + + let target_len = target.len(); + let values = feature.values(); + let sample_len = values.len().min(target_len); + if sample_len < 2 { + return None; + } + + let mut samples = Vec::with_capacity(sample_len); + for idx in 0..sample_len { + let feature_value = values[idx]; + let target_value = target.values()[idx]; + if feature_value.is_finite() && target_value.is_finite() { + samples.push((feature_value, target_value)); + } + } + + if samples.len() < 2 { + return None; + } + + Some(self.build_evaluation(feature, samples)) + } +} + +/// Alpha pipeline orchestrating feature evaluation. +pub struct AlphaPipeline<'a> { + data: &'a HyperliquidData, + features: FeatureSet, + target: AlphaTarget, +} + +impl<'a> AlphaPipeline<'a> { + /// Create a new pipeline. + pub fn new(data: &'a HyperliquidData, features: FeatureSet, horizon: usize) -> Self { + let target = AlphaTarget::forward_returns(data, horizon); + Self { + data, + features, + target, + } + } + + /// Borrow the market data powering the pipeline. + pub fn data(&self) -> &'a HyperliquidData { + self.data + } + + /// Borrow the computed feature set. + pub fn features(&self) -> &FeatureSet { + &self.features + } + + /// Borrow the evaluation target. + pub fn target(&self) -> &AlphaTarget { + &self.target + } + + /// Evaluate all features with the supplied alpha model. + pub fn evaluate_all(&self, model: &M) -> AlphaEvaluationSet + where + M: AlphaModel, + { + let mut evaluations = Vec::new(); + for feature in self.features.iter() { + if let Some(result) = model.evaluate(feature, &self.target) { + evaluations.push(result); + } + } + AlphaEvaluationSet::new(evaluations) + } +} + +fn mean(values: &[f64]) -> f64 { + if values.is_empty() { + 0.0 + } else { + values.iter().sum::() / values.len() as f64 + } +} + +fn std_dev(values: &[f64], mean: f64) -> f64 { + if values.len() < 2 { + return 0.0; + } + let variance = values + .iter() + .map(|value| { + let diff = value - mean; + diff * diff + }) + .sum::() + / values.len() as f64; + variance.sqrt() +} + +fn covariance(feature: &[f64], feature_mean: f64, target: &[f64], target_mean: f64) -> f64 { + feature + .iter() + .zip(target.iter()) + .map(|(f, t)| (f - feature_mean) * (t - target_mean)) + .sum::() + / feature.len() as f64 +} + +fn z_scores(values: &[f64], mean: f64, std_dev: f64) -> Vec { + if std_dev == 0.0 { + return vec![0.0; values.len()]; + } + values + .iter() + .map(|value| (value - mean) / std_dev) + .collect::>() +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::features::{ + compute_feature_set, Feature, LagReturnFeature, RsiFeature, VolatilityFeature, + }; + use chrono::{FixedOffset, TimeZone}; + + fn boxed_feature(feature: F) -> Box + where + F: Feature + Send + Sync + 'static, + { + Box::new(feature) + } + + fn mock_data() -> HyperliquidData { + let tz = FixedOffset::east_opt(0).unwrap(); + let mut datetime = Vec::new(); + let mut open = Vec::new(); + let mut high = Vec::new(); + let mut low = Vec::new(); + let mut close = Vec::new(); + let mut volume = Vec::new(); + let mut funding_rates = Vec::new(); + for i in 0..40 { + let base = 100.0 + i as f64 * 0.5; + datetime.push(tz.timestamp_opt(i as i64, 0).unwrap()); + open.push(base); + high.push(base + 1.0); + low.push(base - 1.0); + close.push(base + (i % 3) as f64 * 0.2); + volume.push(2_000.0 + i as f64 * 5.0); + funding_rates.push(0.0001); + } + + HyperliquidData { + symbol: "BTC".to_string(), + datetime, + open, + high, + low, + close, + volume, + funding_rates, + } + } + + #[test] + fn correlation_alpha_returns_evaluations() { + let data = mock_data(); + let feature_set = compute_feature_set( + &data, + vec![ + boxed_feature(RsiFeature { period: 5 }), + boxed_feature(VolatilityFeature { window: 10 }), + boxed_feature(LagReturnFeature { lag: 1 }), + ], + ); + let pipeline = AlphaPipeline::new(&data, feature_set, 1); + let evaluations = pipeline.evaluate_all(&CorrelationAlpha); + + assert!(!evaluations.is_empty()); + for evaluation in evaluations.iter() { + assert_eq!(evaluation.model_name, "correlation"); + assert!(evaluation.sample_size > 0); + } + } + + #[test] + fn evaluation_filters_work() { + let data = mock_data(); + let feature_set = compute_feature_set( + &data, + vec![ + boxed_feature(RsiFeature { period: 5 }), + boxed_feature(VolatilityFeature { window: 5 }), + boxed_feature(LagReturnFeature { lag: 2 }), + ], + ); + let pipeline = AlphaPipeline::new(&data, feature_set, 1); + let evaluations = pipeline + .evaluate_all(&CorrelationAlpha) + .filter_by_ic(0.01) + .filter_by_sharpe(-10.0); + + assert!(evaluations.len() <= 3); + } +} diff --git a/src/data.rs b/src/data.rs index ecb67c8..5f00eac 100644 --- a/src/data.rs +++ b/src/data.rs @@ -15,7 +15,9 @@ //! //! ### Basic Data Fetching //! -//! ```rust,no_run +//! ```rust,ignore +//! use hyperliquid_backtest::data::HyperliquidData; +//! use hyperliquid_backtest::errors::HyperliquidBacktestError; //! use hyperliquid_backtest::prelude::*; //! use chrono::Utc; //! @@ -35,8 +37,12 @@ //! //! ### Working with Funding Rates //! -//! ```rust,no_run +//! ```rust,ignore +//! use hyperliquid_backtest::data::HyperliquidData; +//! use hyperliquid_backtest::errors::HyperliquidBacktestError; //! use hyperliquid_backtest::prelude::*; +//! # let start_time = 0; +//! # let end_time = 0; //! //! #[tokio::main] //! async fn main() -> Result<(), HyperliquidBacktestError> { @@ -82,8 +88,12 @@ use std::collections::HashMap; /// /// ## Example /// -/// ```rust,no_run +/// ```rust,ignore +/// use hyperliquid_backtest::data::HyperliquidData; +/// use hyperliquid_backtest::errors::HyperliquidBacktestError; /// use hyperliquid_backtest::prelude::*; +/// # let start_time = 0; +/// # let end_time = 0; /// /// #[tokio::main] /// async fn main() -> Result<(), HyperliquidBacktestError> { @@ -471,8 +481,9 @@ impl HyperliquidData { /// /// # Examples /// - /// ```rust,no_run - /// use hyperliquid_backtest::prelude::*; + /// ```rust,ignore + /// use hyperliquid_backtest::data::HyperliquidData; + /// use hyperliquid_backtest::errors::HyperliquidBacktestError; /// use chrono::Utc; /// /// #[tokio::main] @@ -568,8 +579,9 @@ impl HyperliquidData { /// /// # Examples /// - /// ```rust,no_run - /// use hyperliquid_backtest::prelude::*; + /// ```rust,ignore + /// use hyperliquid_backtest::data::HyperliquidData; + /// use hyperliquid_backtest::errors::HyperliquidBacktestError; /// use chrono::{DateTime, FixedOffset, Utc}; /// /// let timestamps = vec![Utc::now().with_timezone(&FixedOffset::east_opt(0).unwrap())]; diff --git a/src/features/mod.rs b/src/features/mod.rs new file mode 100644 index 0000000..696069b --- /dev/null +++ b/src/features/mod.rs @@ -0,0 +1,406 @@ +//! Feature engineering primitives used by the alpha research pipeline. +//! +//! The module provides a small registry and a set of reusable feature +//! implementations that operate directly on [`HyperliquidData`](crate::data::HyperliquidData). +//! Each feature exposes a [`Feature`] trait implementation that converts +//! market data into a [`FeatureSeries`]. +//! +//! The goal of the module is to keep the feature layer declarative and easy +//! to extend. Features can be collected either through the convenience +//! [`compute_feature_set`] function or by registering them with a +//! [`FeatureRegistry`]. The resulting [`FeatureSet`] is then consumed by the +//! alpha evaluation pipeline. + +use crate::data::HyperliquidData; + +/// Shared context passed to feature implementations. +pub struct FeatureContext<'a> { + data: &'a HyperliquidData, +} + +impl<'a> FeatureContext<'a> { + /// Create a new feature context from market data. + pub fn new(data: &'a HyperliquidData) -> Self { + Self { data } + } + + /// Borrow the underlying market data. + pub fn data(&self) -> &'a HyperliquidData { + self.data + } +} + +/// Output series produced by a feature implementation. +#[derive(Debug, Clone)] +pub struct FeatureSeries { + name: String, + values: Vec, +} + +impl FeatureSeries { + /// Create a new feature series. + pub fn new(name: impl Into, values: Vec) -> Self { + Self { + name: name.into(), + values, + } + } + + /// Name of the feature. + pub fn name(&self) -> &str { + &self.name + } + + /// Borrow the raw feature values. + pub fn values(&self) -> &[f64] { + &self.values + } + + /// Length of the feature series. + pub fn len(&self) -> usize { + self.values.len() + } + + /// Whether the feature contains no values. + pub fn is_empty(&self) -> bool { + self.values.is_empty() + } +} + +/// Trait implemented by all feature engineering components. +pub trait Feature { + /// Unique name of the feature. + fn name(&self) -> &'static str; + + /// Compute the feature on top of the supplied market data. + fn compute(&self, context: &FeatureContext<'_>) -> FeatureSeries; +} + +/// Convenience wrapper around a collection of features. +#[derive(Default)] +pub struct FeatureRegistry { + features: Vec>, +} + +impl FeatureRegistry { + /// Create an empty registry. + pub fn new() -> Self { + Self::default() + } + + /// Register a new feature. + pub fn register(&mut self, feature: F) + where + F: Feature + Send + Sync + 'static, + { + self.features.push(Box::new(feature)); + } + + /// Compute all registered features and collect them into a [`FeatureSet`]. + pub fn compute(&self, data: &HyperliquidData) -> FeatureSet { + let context = FeatureContext::new(data); + let mut series = Vec::with_capacity(self.features.len()); + for feature in &self.features { + series.push(feature.compute(&context)); + } + FeatureSet { series } + } + + /// Number of registered features. + pub fn len(&self) -> usize { + self.features.len() + } + + /// Whether no features have been registered. + pub fn is_empty(&self) -> bool { + self.features.is_empty() + } +} + +/// Compute a feature set directly from an iterator of feature implementations. +/// +/// This helper is handy when features are constructed ad-hoc instead of being +/// stored in a registry. +pub fn compute_feature_set(data: &HyperliquidData, features: I) -> FeatureSet +where + I: IntoIterator>, +{ + let context = FeatureContext::new(data); + let mut series = Vec::new(); + for feature in features { + series.push(feature.compute(&context)); + } + FeatureSet { series } +} + +/// Collection of feature series that can be consumed by the alpha pipeline. +#[derive(Debug, Clone)] +pub struct FeatureSet { + series: Vec, +} + +impl FeatureSet { + /// Create a new feature set. + pub fn new(series: Vec) -> Self { + Self { series } + } + + /// Iterate over all feature series. + pub fn iter(&self) -> impl Iterator { + self.series.iter() + } + + /// Consume the set and return the inner vector. + pub fn into_inner(self) -> Vec { + self.series + } + + /// Number of features contained in the set. + pub fn len(&self) -> usize { + self.series.len() + } + + /// Whether the set is empty. + pub fn is_empty(&self) -> bool { + self.series.is_empty() + } + + /// Fetch a feature by name. + pub fn get(&self, name: &str) -> Option<&FeatureSeries> { + self.series.iter().find(|series| series.name == name) + } +} + +/// Relative Strength Index feature implementation. +pub struct RsiFeature { + /// Look-back window for the RSI calculation. + pub period: usize, +} + +impl Feature for RsiFeature { + fn name(&self) -> &'static str { + "RSI" + } + + fn compute(&self, context: &FeatureContext<'_>) -> FeatureSeries { + let period = self.period.max(1); + let closes = &context.data().close; + let mut values = Vec::with_capacity(closes.len()); + + if closes.is_empty() { + return FeatureSeries::new(self.name(), values); + } + + let mut gains = 0.0; + let mut losses = 0.0; + + values.push(f64::NAN); // first value has no delta + for i in 1..=period.min(closes.len() - 1) { + let delta = closes[i] - closes[i - 1]; + if delta >= 0.0 { + gains += delta; + } else { + losses -= delta; + } + values.push(f64::NAN); + } + + if closes.len() <= period { + // Not enough data to compute RSI; fill with NaNs + values.resize(closes.len(), f64::NAN); + return FeatureSeries::new(self.name(), values); + } + + let mut avg_gain = gains / period as f64; + let mut avg_loss = losses / period as f64; + + let rsi_value = if avg_loss == 0.0 { + 100.0 + } else { + let rs = avg_gain / avg_loss; + 100.0 - (100.0 / (1.0 + rs)) + }; + if values.len() > period { + values[period] = rsi_value; + } + + for i in (period + 1)..closes.len() { + let delta = closes[i] - closes[i - 1]; + if delta >= 0.0 { + avg_gain = ((avg_gain * (period as f64 - 1.0)) + delta) / period as f64; + avg_loss = (avg_loss * (period as f64 - 1.0)) / period as f64; + } else { + avg_gain = (avg_gain * (period as f64 - 1.0)) / period as f64; + avg_loss = ((avg_loss * (period as f64 - 1.0)) - delta) / period as f64; + } + + let rsi = if avg_loss == 0.0 { + 100.0 + } else { + let rs = avg_gain / avg_loss; + 100.0 - (100.0 / (1.0 + rs)) + }; + values.push(rsi); + } + + FeatureSeries::new(self.name(), values) + } +} + +/// Rolling volatility feature computed using the population standard deviation of log returns. +pub struct VolatilityFeature { + /// Rolling window length. + pub window: usize, +} + +impl Feature for VolatilityFeature { + fn name(&self) -> &'static str { + "VOLATILITY" + } + + fn compute(&self, context: &FeatureContext<'_>) -> FeatureSeries { + let closes = &context.data().close; + let window = self.window.max(2); + if closes.len() < 2 { + return FeatureSeries::new(self.name(), vec![f64::NAN; closes.len()]); + } + + let mut log_returns = Vec::with_capacity(closes.len() - 1); + for w in closes.windows(2) { + let prev = w[0]; + let current = w[1]; + log_returns.push((current / prev).ln()); + } + + let mut values = vec![f64::NAN; closes.len()]; + if log_returns.len() + 1 <= window { + return FeatureSeries::new(self.name(), values); + } + + for end in window..=log_returns.len() { + let slice = &log_returns[end - window..end]; + let mean = slice.iter().sum::() / slice.len() as f64; + let variance = slice + .iter() + .map(|value| { + let diff = value - mean; + diff * diff + }) + .sum::() + / slice.len() as f64; + let std_dev = variance.sqrt(); + values[end] = std_dev; + } + + FeatureSeries::new(self.name(), values) + } +} + +/// Simple lagged return feature using percentage returns. +pub struct LagReturnFeature { + /// Number of periods to lag the return calculation. + pub lag: usize, +} + +impl Feature for LagReturnFeature { + fn name(&self) -> &'static str { + "LAG_RETURN" + } + + fn compute(&self, context: &FeatureContext<'_>) -> FeatureSeries { + let closes = &context.data().close; + let lag = self.lag.max(1); + if closes.len() <= lag { + return FeatureSeries::new(self.name(), vec![f64::NAN; closes.len()]); + } + + let mut values = vec![f64::NAN; closes.len()]; + for i in lag..closes.len() { + let previous = closes[i - lag]; + let current = closes[i]; + if previous != 0.0 { + values[i] = (current / previous) - 1.0; + } else { + values[i] = f64::NAN; + } + } + + FeatureSeries::new(self.name(), values) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use chrono::{FixedOffset, TimeZone}; + + fn boxed_feature(feature: F) -> Box + where + F: Feature + Send + Sync + 'static, + { + Box::new(feature) + } + + fn mock_data() -> HyperliquidData { + let tz = FixedOffset::east_opt(0).unwrap(); + let mut datetime = Vec::new(); + let mut open = Vec::new(); + let mut high = Vec::new(); + let mut low = Vec::new(); + let mut close = Vec::new(); + let mut volume = Vec::new(); + let mut funding_rates = Vec::new(); + for i in 0..10 { + let price = 100.0 + i as f64; + datetime.push(tz.timestamp_opt(i as i64, 0).unwrap()); + open.push(price - 0.5); + high.push(price + 1.0); + low.push(price - 1.0); + close.push(price); + volume.push(1_000.0 + i as f64 * 10.0); + funding_rates.push(0.0001); + } + + HyperliquidData { + symbol: "BTC".to_string(), + datetime, + open, + high, + low, + close, + volume, + funding_rates, + } + } + + #[test] + fn feature_registry_computes_all_features() { + let data = mock_data(); + let mut registry = FeatureRegistry::new(); + registry.register(RsiFeature { period: 5 }); + registry.register(VolatilityFeature { window: 3 }); + registry.register(LagReturnFeature { lag: 1 }); + + let feature_set = registry.compute(&data); + assert_eq!(feature_set.len(), 3); + assert!(feature_set + .iter() + .all(|series| series.len() == data.close.len())); + } + + #[test] + fn compute_feature_set_from_iterator() { + let data = mock_data(); + let feature_set = compute_feature_set( + &data, + vec![ + boxed_feature(RsiFeature { period: 4 }), + boxed_feature(VolatilityFeature { window: 4 }), + boxed_feature(LagReturnFeature { lag: 2 }), + ], + ); + + assert_eq!(feature_set.len(), 3); + assert!(feature_set.get("RSI").is_some()); + } +} diff --git a/src/lib.rs b/src/lib.rs index cc64e84..18e6709 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -6,13 +6,21 @@ //! avoids external dependencies or complex behaviours so the library can compile //! quickly and remain easy to understand. +pub mod alpha; pub mod backtest; +pub mod data; +pub mod errors; +pub mod features; pub mod optimization; +pub mod report; pub mod risk_manager; +pub mod signals; +pub mod strategy; pub mod unified_data; #[cfg(test)] mod tests { + mod alpha_pipeline_tests; mod basic; } diff --git a/src/report/mod.rs b/src/report/mod.rs new file mode 100644 index 0000000..9753168 --- /dev/null +++ b/src/report/mod.rs @@ -0,0 +1,124 @@ +//! Reporting utilities for summarising alpha evaluation results. + +use std::fs::File; +use std::io::{BufWriter, Write}; +use std::path::Path; + +use serde::Serialize; + +use crate::alpha::AlphaEvaluation; + +/// Summary row used when exporting evaluations to tabular formats. +#[derive(Debug, Clone, Serialize)] +pub struct AlphaSummaryRow { + /// Feature identifier. + pub feature_name: String, + /// Alpha model identifier. + pub model_name: String, + /// Information Coefficient. + pub ic: f64, + /// Sharpe ratio of the sign-based signal. + pub sharpe: f64, + /// Mean sign-based return. + pub mean_return: f64, + /// Number of samples used during evaluation. + pub sample_size: usize, +} + +/// Report container capable of exporting evaluation results. +#[derive(Debug, Clone)] +pub struct AlphaReport { + evaluations: Vec, +} + +impl AlphaReport { + /// Create a report from raw evaluations. + pub fn from_evaluations(evaluations: Vec) -> Self { + Self { evaluations } + } + + /// Number of evaluations contained in the report. + pub fn len(&self) -> usize { + self.evaluations.len() + } + + /// Whether the report is empty. + pub fn is_empty(&self) -> bool { + self.evaluations.is_empty() + } + + /// Generate a list of summary rows for presentation or export. + pub fn summary_rows(&self) -> Vec { + self.evaluations + .iter() + .map(|evaluation| AlphaSummaryRow { + feature_name: evaluation.feature_name.clone(), + model_name: evaluation.model_name.clone(), + ic: evaluation.ic, + sharpe: evaluation.sharpe, + mean_return: evaluation.mean_return, + sample_size: evaluation.sample_size, + }) + .collect() + } + + /// Return top `limit` evaluations ranked by absolute IC. + pub fn best_by_ic(&self, limit: usize) -> Vec<&AlphaEvaluation> { + let mut refs: Vec<&AlphaEvaluation> = self.evaluations.iter().collect(); + refs.sort_by(|a, b| { + b.ic.abs() + .partial_cmp(&a.ic.abs()) + .unwrap_or(std::cmp::Ordering::Equal) + }); + refs.truncate(limit); + refs + } + + /// Write the report as a CSV file. + pub fn write_csv>(&self, path: P) -> std::io::Result<()> { + let file = File::create(path)?; + let mut writer = BufWriter::new(file); + writeln!( + writer, + "feature_name,model_name,ic,sharpe,mean_return,sample_size" + )?; + for row in self.summary_rows() { + writeln!( + writer, + "{},{},{:.6},{:.6},{:.6},{}", + row.feature_name, + row.model_name, + row.ic, + row.sharpe, + row.mean_return, + row.sample_size + )?; + } + writer.flush() + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::alpha::AlphaEvaluation; + + #[test] + fn report_generates_summary_rows() { + let evaluation = AlphaEvaluation { + feature_name: "feature".to_string(), + model_name: "model".to_string(), + ic: 0.2, + sharpe: 1.1, + mean_return: 0.01, + scores: vec![0.1, -0.2], + ic_series: vec![0.0, 0.0], + sample_size: 2, + }; + + let report = AlphaReport::from_evaluations(vec![evaluation]); + let summary = report.summary_rows(); + assert_eq!(summary.len(), 1); + assert_eq!(summary[0].feature_name, "feature"); + } +} diff --git a/src/signals/mod.rs b/src/signals/mod.rs new file mode 100644 index 0000000..2140cd7 --- /dev/null +++ b/src/signals/mod.rs @@ -0,0 +1,139 @@ +//! Signal generation utilities built on top of evaluated alphas. +//! +//! The signal layer converts [`AlphaEvaluation`](crate::alpha::AlphaEvaluation) +//! outputs into actionable trading directives that can be consumed by +//! strategies or backtesting logic. + +use crate::alpha::AlphaEvaluation; + +/// Discrete trading instruction produced by a signal generator. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum SignalValue { + /// Take or maintain a long position. + Long, + /// Take or maintain a short position. + Short, + /// Stay flat. + Flat, +} + +impl SignalValue { + /// Convert the signal into a signed position representation. + pub fn as_position(&self) -> f64 { + match self { + SignalValue::Long => 1.0, + SignalValue::Short => -1.0, + SignalValue::Flat => 0.0, + } + } +} + +/// Trait implemented by all signal generators. +pub trait SignalGenerator { + /// Convert an [`AlphaEvaluation`] into a stream of trading signals. + fn generate(&self, evaluation: &AlphaEvaluation) -> Vec; +} + +/// Signal generator using a symmetric threshold on normalised feature scores. +#[derive(Debug, Clone, Copy)] +pub struct ThresholdSignal { + /// Threshold applied to the normalised scores. + pub threshold: f64, +} + +impl SignalGenerator for ThresholdSignal { + fn generate(&self, evaluation: &AlphaEvaluation) -> Vec { + let threshold = self.threshold.abs(); + evaluation + .scores + .iter() + .map(|score| { + if !score.is_finite() || score.abs() < threshold { + SignalValue::Flat + } else if *score > 0.0 { + SignalValue::Long + } else { + SignalValue::Short + } + }) + .collect() + } +} + +/// Signal generator that buckets continuous scores into custom ranges. +#[derive(Debug, Clone)] +pub struct BucketSignal { + /// Threshold that separates flat and directional states. + pub neutral_threshold: f64, + /// Upper bound after which the signal is considered strongly directional. + pub aggressive_threshold: f64, +} + +impl SignalGenerator for BucketSignal { + fn generate(&self, evaluation: &AlphaEvaluation) -> Vec { + let neutral = self.neutral_threshold.abs(); + let aggressive = self.aggressive_threshold.abs().max(neutral); + evaluation + .scores + .iter() + .map(|score| { + if !score.is_finite() || score.abs() < neutral { + SignalValue::Flat + } else if score.abs() >= aggressive { + if *score > 0.0 { + SignalValue::Long + } else { + SignalValue::Short + } + } else if *score > 0.0 { + SignalValue::Long + } else { + SignalValue::Short + } + }) + .collect() + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::alpha::AlphaEvaluation; + + fn mock_evaluation() -> AlphaEvaluation { + AlphaEvaluation { + feature_name: "mock".to_string(), + model_name: "mock_model".to_string(), + ic: 0.1, + sharpe: 1.2, + mean_return: 0.01, + scores: vec![-1.5, -0.4, 0.0, 0.3, 0.8], + ic_series: vec![0.0; 5], + sample_size: 5, + } + } + + #[test] + fn threshold_signal_generates_expected_values() { + let evaluation = mock_evaluation(); + let generator = ThresholdSignal { threshold: 0.5 }; + let signals = generator.generate(&evaluation); + assert_eq!(signals.len(), evaluation.scores.len()); + assert_eq!(signals[0], SignalValue::Short); + assert_eq!(signals[1], SignalValue::Flat); + assert_eq!(signals[4], SignalValue::Long); + } + + #[test] + fn bucket_signal_handles_multiple_levels() { + let evaluation = mock_evaluation(); + let generator = BucketSignal { + neutral_threshold: 0.2, + aggressive_threshold: 1.0, + }; + let signals = generator.generate(&evaluation); + assert_eq!(signals[0], SignalValue::Short); + assert_eq!(signals[2], SignalValue::Flat); + assert_eq!(signals[4], SignalValue::Long); + } +} diff --git a/src/strategy/alpha_driven.rs b/src/strategy/alpha_driven.rs new file mode 100644 index 0000000..2f3c1a8 --- /dev/null +++ b/src/strategy/alpha_driven.rs @@ -0,0 +1,128 @@ +use crate::data::HyperliquidData; +use crate::signals::SignalValue; +use crate::unified_data::{OrderRequest, OrderSide}; + +/// Strategy that turns a stream of signals into market orders with a fixed position size. +#[derive(Debug, Clone)] +pub struct AlphaDrivenStrategy { + symbol: String, + signals: Vec, + quantity: f64, +} + +impl AlphaDrivenStrategy { + /// Create a new strategy instance. + pub fn new(symbol: impl Into, signals: Vec, quantity: f64) -> Self { + Self { + symbol: symbol.into(), + signals, + quantity: quantity.abs(), + } + } + + /// Borrow the signals associated with the strategy. + pub fn signals(&self) -> &[SignalValue] { + &self.signals + } + + /// Generate market orders required to follow the signal stream. + /// + /// The helper compares consecutive signals and emits the minimal set of + /// market orders needed to reach the desired exposure. Transitions between + /// long and short positions are handled by issuing a double-sized order to + /// close the previous exposure before establishing the new one. + pub fn generate_orders(&self, data: &HyperliquidData) -> Vec { + if self.signals.is_empty() { + return Vec::new(); + } + + let mut orders = Vec::new(); + let mut previous = SignalValue::Flat; + let limit = self.signals.len().min(data.close.len()); + + for idx in 0..limit { + let current = self.signals[idx]; + if let Some((side, quantity)) = transition(previous, current, self.quantity) { + orders.push(OrderRequest::market(&self.symbol, side, quantity)); + } + previous = current; + } + + orders + } +} + +fn transition( + previous: SignalValue, + current: SignalValue, + base_quantity: f64, +) -> Option<(OrderSide, f64)> { + match (previous, current) { + (SignalValue::Flat, SignalValue::Long) => Some((OrderSide::Buy, base_quantity)), + (SignalValue::Flat, SignalValue::Short) => Some((OrderSide::Sell, base_quantity)), + (SignalValue::Long, SignalValue::Flat) => Some((OrderSide::Sell, base_quantity)), + (SignalValue::Short, SignalValue::Flat) => Some((OrderSide::Buy, base_quantity)), + (SignalValue::Long, SignalValue::Short) => Some((OrderSide::Sell, base_quantity * 2.0)), + (SignalValue::Short, SignalValue::Long) => Some((OrderSide::Buy, base_quantity * 2.0)), + _ => None, + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::signals::SignalValue; + use chrono::{FixedOffset, TimeZone}; + + fn mock_data() -> HyperliquidData { + let tz = FixedOffset::east_opt(0).unwrap(); + let mut datetime = Vec::new(); + let mut open = Vec::new(); + let mut high = Vec::new(); + let mut low = Vec::new(); + let mut close = Vec::new(); + let mut volume = Vec::new(); + let mut funding_rates = Vec::new(); + for i in 0..6 { + let price = 100.0 + i as f64; + datetime.push(tz.timestamp_opt(i as i64, 0).unwrap()); + open.push(price); + high.push(price + 1.0); + low.push(price - 1.0); + close.push(price); + volume.push(1000.0); + funding_rates.push(0.0); + } + + HyperliquidData { + symbol: "BTC".to_string(), + datetime, + open, + high, + low, + close, + volume, + funding_rates, + } + } + + #[test] + fn strategy_generates_orders_from_signals() { + let data = mock_data(); + let signals = vec![ + SignalValue::Flat, + SignalValue::Long, + SignalValue::Long, + SignalValue::Short, + SignalValue::Flat, + SignalValue::Short, + ]; + let strategy = AlphaDrivenStrategy::new("BTC", signals, 1.0); + let orders = strategy.generate_orders(&data); + assert_eq!(orders.len(), 4); + assert_eq!(orders[0].side, OrderSide::Buy); + assert_eq!(orders[1].side, OrderSide::Sell); + assert_eq!(orders[2].side, OrderSide::Buy); + assert_eq!(orders[3].side, OrderSide::Sell); + } +} diff --git a/src/strategy/mod.rs b/src/strategy/mod.rs new file mode 100644 index 0000000..da3c609 --- /dev/null +++ b/src/strategy/mod.rs @@ -0,0 +1,9 @@ +//! Strategy helpers that bridge evaluated alphas and order generation. +//! +//! The provided [`AlphaDrivenStrategy`] converts a sequence of +//! [`SignalValue`](crate::signals::SignalValue) values into Hyperliquid order +//! requests that can be fed into the existing backtesting infrastructure. + +mod alpha_driven; + +pub use alpha_driven::AlphaDrivenStrategy; diff --git a/src/tests/alpha_pipeline_tests.rs b/src/tests/alpha_pipeline_tests.rs new file mode 100644 index 0000000..f3d6056 --- /dev/null +++ b/src/tests/alpha_pipeline_tests.rs @@ -0,0 +1,89 @@ +use crate::alpha::{AlphaPipeline, CorrelationAlpha}; +use crate::data::HyperliquidData; +use crate::features::{ + compute_feature_set, Feature, LagReturnFeature, RsiFeature, VolatilityFeature, +}; +use crate::report::AlphaReport; +use crate::signals::{SignalGenerator, ThresholdSignal}; +use crate::strategy::AlphaDrivenStrategy; +use crate::unified_data::OrderSide; +use chrono::{FixedOffset, TimeZone}; + +fn mock_data() -> HyperliquidData { + let tz = FixedOffset::east_opt(0).unwrap(); + let mut datetime = Vec::new(); + let mut open = Vec::new(); + let mut high = Vec::new(); + let mut low = Vec::new(); + let mut close = Vec::new(); + let mut volume = Vec::new(); + let mut funding_rates = Vec::new(); + + for i in 0..50 { + let base = 100.0 + (i as f64 * 0.3); + datetime.push(tz.timestamp_opt(i as i64, 0).unwrap()); + open.push(base); + high.push(base + 0.8); + low.push(base - 0.8); + close.push(base + ((i % 5) as f64 - 2.0) * 0.1); + volume.push(1_000.0 + i as f64 * 10.0); + funding_rates.push(0.0001); + } + + HyperliquidData { + symbol: "BTC".to_string(), + datetime, + open, + high, + low, + close, + volume, + funding_rates, + } +} + +#[test] +fn end_to_end_alpha_pipeline_flow() { + let data = mock_data(); + let feature_set = compute_feature_set( + &data, + vec![ + boxed_feature(RsiFeature { period: 6 }), + boxed_feature(VolatilityFeature { window: 5 }), + boxed_feature(LagReturnFeature { lag: 2 }), + ], + ); + + let pipeline = AlphaPipeline::new(&data, feature_set, 1); + let evaluations = pipeline.evaluate_all(&CorrelationAlpha); + assert!(!evaluations.is_empty()); + + let filtered = evaluations.filter_by_ic(0.01); + let report: AlphaReport = filtered.to_report(); + assert!(report.len() <= 3); + + if report.is_empty() { + return; + } + + let best = report.best_by_ic(1); + let evaluation = best[0]; + let generator = ThresholdSignal { threshold: 0.5 }; + let signals = generator.generate(evaluation); + assert_eq!(signals.len(), evaluation.scores.len()); + + let strategy = AlphaDrivenStrategy::new("BTC", signals, 1.0); + let orders = strategy.generate_orders(&data); + if !orders.is_empty() { + assert!(orders.iter().all(|order| order.quantity > 0.0)); + assert!(orders + .iter() + .all(|order| order.side == OrderSide::Buy || order.side == OrderSide::Sell)); + } +} +fn boxed_feature(feature: F) -> Box +where + F: Feature + Send + Sync + 'static, +{ + Box::new(feature) +}