Python SDK
The Olytix Core Python SDK provides native access to your semantic layer for machine learning workflows, feature engineering, and data science applications. It ensures consistent metrics across training and inference, eliminating feature skew.
Installation
pip install olytix-core
# With ML extras (pandas, numpy, scikit-learn integration)
pip install olytix-core[ml]
# For Jupyter notebook support
pip install olytix-core[jupyter]
Quick Start
from olytix-core import Olytix CoreClient
# Initialize client
client = Olytix CoreClient(
url="http://localhost:8000",
api_key="YOUR_API_KEY"
)
# Query metrics
result = client.query(
metrics=["total_revenue", "order_count"],
dimensions=["region", "product_category"],
filters=[{"dimension": "order_date.year", "operator": "equals", "value": 2024}]
)
# Access as DataFrame
df = result.to_pandas()
print(df.head())
Configuration
Client Initialization
from olytix-core import Olytix CoreClient
# Basic configuration
client = Olytix CoreClient(
url="http://localhost:8000",
api_key="YOUR_API_KEY"
)
# Full configuration
client = Olytix CoreClient(
url="http://localhost:8000",
api_key="YOUR_API_KEY",
timeout=30, # Request timeout in seconds
max_retries=3, # Number of retry attempts
verify_ssl=True, # SSL certificate verification
headers={"X-Custom": "value"} # Additional headers
)
# From environment variables
import os
client = Olytix CoreClient(
url=os.environ["OLYTIX_URL"],
api_key=os.environ["OLYTIX_API_KEY"]
)
Configuration File
Create a ~/.olytix-core/config.yml:
default:
url: http://localhost:8000
api_key: ${OLYTIX_API_KEY}
production:
url: https://olytix-core.example.com
api_key: ${OLYTIX_PROD_API_KEY}
timeout: 60
Use profiles:
from olytix-core import Olytix CoreClient
# Use default profile
client = Olytix CoreClient.from_config()
# Use specific profile
client = Olytix CoreClient.from_config("production")
Querying Data
Basic Query
# Query with metrics and dimensions
result = client.query(
metrics=["total_revenue", "order_count"],
dimensions=["region"]
)
# Access data
print(result.data) # List of dictionaries
print(result.to_pandas()) # pandas DataFrame
print(result.to_arrow()) # PyArrow Table
Filtering
# Single filter
result = client.query(
metrics=["total_revenue"],
dimensions=["region"],
filters=[
{"dimension": "order_date.year", "operator": "equals", "value": 2024}
]
)
# Multiple filters
result = client.query(
metrics=["total_revenue"],
dimensions=["region", "product_category"],
filters=[
{"dimension": "order_date.year", "operator": "equals", "value": 2024},
{"dimension": "region", "operator": "in", "values": ["US-East", "US-West"]},
{"dimension": "order_amount", "operator": "gte", "value": 100}
]
)
Time Dimensions
# Query with time granularity
result = client.query(
metrics=["total_revenue"],
dimensions=["order_date.month", "region"],
time_dimensions=[
{
"dimension": "order_date",
"date_range": ["2024-01-01", "2024-12-31"],
"granularity": "month"
}
]
)
Ordering and Limiting
result = client.query(
metrics=["total_revenue"],
dimensions=["product_category"],
order_by=[
{"field": "total_revenue", "direction": "desc"}
],
limit=10
)
Data Formats
Pandas DataFrame
import pandas as pd
result = client.query(
metrics=["total_revenue", "order_count"],
dimensions=["region", "order_date.month"]
)
df = result.to_pandas()
# Type-aware conversion
df["order_date.month"] = pd.to_datetime(df["order_date.month"])
df["region"] = df["region"].astype("category")
PyArrow Table
import pyarrow as pa
result = client.query(
metrics=["total_revenue"],
dimensions=["region"]
)
table = result.to_arrow()
# Write to Parquet
pa.parquet.write_table(table, "data.parquet")
NumPy Arrays
import numpy as np
result = client.query(
metrics=["total_revenue", "order_count"],
dimensions=["region"]
)
# Get as structured array
df = result.to_pandas()
X = df[["total_revenue", "order_count"]].values
Feature Engineering
Feature Store Integration
from olytix-core import Olytix CoreClient, FeatureSet
client = Olytix CoreClient(url="http://localhost:8000", api_key="YOUR_KEY")
# Define a feature set
feature_set = FeatureSet(
name="customer_features",
entity="customer_id",
features=[
{"metric": "total_revenue", "aggregation": "sum", "window": "90d"},
{"metric": "order_count", "aggregation": "count", "window": "90d"},
{"metric": "avg_order_value", "aggregation": "avg", "window": "90d"},
]
)
# Generate features for training
features = client.get_features(
feature_set=feature_set,
entity_ids=["cust_001", "cust_002", "cust_003"],
as_of_date="2024-01-01"
)
df = features.to_pandas()
Point-in-Time Features
Prevent data leakage with point-in-time correct features:
# Get features as they existed at a specific point in time
features = client.get_features(
metrics=["total_revenue", "order_count"],
dimensions=["customer_id"],
as_of_date="2024-01-01",
lookback_window="90d"
)
Rolling Window Features
# Rolling aggregations
result = client.query(
metrics=[
{"name": "revenue_7d", "expression": "rolling_sum(total_revenue, 7d)"},
{"name": "revenue_30d", "expression": "rolling_sum(total_revenue, 30d)"},
{"name": "orders_7d", "expression": "rolling_count(order_count, 7d)"}
],
dimensions=["customer_id", "order_date"]
)
Machine Learning Workflows
Training Data Preparation
from olytix-core import Olytix CoreClient
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
client = Olytix CoreClient(url="http://localhost:8000", api_key="YOUR_KEY")
# Get training data
result = client.query(
metrics=["total_revenue", "order_count", "avg_order_value", "return_rate"],
dimensions=["customer_id", "customer_segment", "tenure_months"],
filters=[
{"dimension": "order_date.year", "operator": "lte", "value": 2023}
]
)
df = result.to_pandas()
# Prepare features and target
X = df[["order_count", "avg_order_value", "tenure_months"]]
y = df["total_revenue"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
# Train model
model = RandomForestRegressor(n_estimators=100)
model.fit(X_train, y_train)
Batch Inference
# Get current features for all customers
result = client.query(
metrics=["order_count", "avg_order_value", "tenure_months"],
dimensions=["customer_id"]
)
df = result.to_pandas()
X_inference = df[["order_count", "avg_order_value", "tenure_months"]]
# Generate predictions
predictions = model.predict(X_inference)
df["predicted_revenue"] = predictions
Model Monitoring
# Track prediction vs actual
from datetime import datetime, timedelta
def get_model_performance(client, model_id, days=7):
result = client.query(
metrics=["total_revenue"],
dimensions=["customer_id", "order_date"],
filters=[
{
"dimension": "order_date",
"operator": "gte",
"value": (datetime.now() - timedelta(days=days)).strftime("%Y-%m-%d")
}
]
)
df = result.to_pandas()
# Join with predictions and calculate metrics
return df
Jupyter Integration
Magic Commands
# Load Olytix Core magic
%load_ext olytix-core
# Configure connection
%olytix-core_connect http://localhost:8000 --api-key YOUR_KEY
# Query with magic
%%olytix-core_query
metrics: [total_revenue, order_count]
dimensions: [region]
filters:
- dimension: order_date.year
operator: equals
value: 2024
Interactive Exploration
from olytix-core import Olytix CoreClient
from olytix-core.jupyter import CubeExplorer
client = Olytix CoreClient(url="http://localhost:8000", api_key="YOUR_KEY")
# Launch interactive explorer
explorer = CubeExplorer(client, cube="orders")
explorer.show() # Displays interactive widget
Visualization
import matplotlib.pyplot as plt
result = client.query(
metrics=["total_revenue"],
dimensions=["order_date.month"],
order_by=[{"field": "order_date.month", "direction": "asc"}]
)
df = result.to_pandas()
plt.figure(figsize=(12, 6))
plt.plot(df["order_date.month"], df["total_revenue"])
plt.title("Monthly Revenue Trend")
plt.xlabel("Month")
plt.ylabel("Revenue ($)")
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
Caching
Enable Caching
from olytix-core import Olytix CoreClient, DiskCache
# Use disk cache
client = Olytix CoreClient(
url="http://localhost:8000",
api_key="YOUR_KEY",
cache=DiskCache(path="~/.olytix-core/cache", ttl=3600)
)
# First query - hits API
result1 = client.query(metrics=["total_revenue"], dimensions=["region"])
# Second query - served from cache
result2 = client.query(metrics=["total_revenue"], dimensions=["region"])
Cache Control
# Force fresh data
result = client.query(
metrics=["total_revenue"],
dimensions=["region"],
use_cache=False
)
# Clear cache
client.cache.clear()
Async Support
import asyncio
from olytix-core import AsyncOlytix CoreClient
async def main():
client = AsyncOlytix CoreClient(
url="http://localhost:8000",
api_key="YOUR_KEY"
)
async with client:
# Parallel queries
results = await asyncio.gather(
client.query(metrics=["total_revenue"], dimensions=["region"]),
client.query(metrics=["order_count"], dimensions=["product_category"]),
client.query(metrics=["avg_order_value"], dimensions=["customer_segment"])
)
for result in results:
print(result.to_pandas())
asyncio.run(main())
Error Handling
from olytix-core import Olytix CoreClient
from olytix-core.exceptions import (
Olytix CoreError,
AuthenticationError,
RateLimitError,
QueryError
)
client = Olytix CoreClient(url="http://localhost:8000", api_key="YOUR_KEY")
try:
result = client.query(
metrics=["total_revenue"],
dimensions=["region"]
)
except AuthenticationError:
print("Invalid API key")
except RateLimitError as e:
print(f"Rate limited. Retry after {e.retry_after} seconds")
except QueryError as e:
print(f"Query failed: {e.message}")
except Olytix CoreError as e:
print(f"Olytix Core error: {e}")
Best Practices
1. Use Consistent Metrics
Always use Olytix Core metrics instead of calculating in Python:
# Good - uses Olytix Core metric definition
result = client.query(metrics=["avg_order_value"])
# Bad - may differ from official definition
df = client.query(metrics=["total_revenue", "order_count"]).to_pandas()
aov = df["total_revenue"] / df["order_count"]
2. Batch Queries
Combine related queries to reduce API calls:
# Good - single query
result = client.query(
metrics=["total_revenue", "order_count", "avg_order_value"],
dimensions=["region", "product_category"]
)
# Bad - multiple queries
rev = client.query(metrics=["total_revenue"], dimensions=["region"])
cnt = client.query(metrics=["order_count"], dimensions=["region"])
3. Filter at the Source
Apply filters in Olytix Core rather than pandas:
# Good - filter in Olytix Core
result = client.query(
metrics=["total_revenue"],
filters=[{"dimension": "region", "operator": "equals", "value": "US-East"}]
)
# Bad - fetch all, then filter
result = client.query(metrics=["total_revenue"], dimensions=["region"])
df = result.to_pandas()
df = df[df["region"] == "US-East"]
4. Point-in-Time Correctness
Always use as_of_date for training data:
# Good - prevents data leakage
features = client.get_features(
feature_set=customer_features,
as_of_date="2024-01-01"
)
# Bad - may include future data
features = client.query(metrics=["total_revenue"], dimensions=["customer_id"])