Skip to main content

Python SDK

For Data Analysts

The Olytix Core Python SDK provides native access to your semantic layer for machine learning workflows, feature engineering, and data science applications. It ensures consistent metrics across training and inference, eliminating feature skew.

Installation

pip install olytix-core

# With ML extras (pandas, numpy, scikit-learn integration)
pip install olytix-core[ml]

# For Jupyter notebook support
pip install olytix-core[jupyter]

Quick Start

from olytix-core import Olytix CoreClient

# Initialize client
client = Olytix CoreClient(
url="http://localhost:8000",
api_key="YOUR_API_KEY"
)

# Query metrics
result = client.query(
metrics=["total_revenue", "order_count"],
dimensions=["region", "product_category"],
filters=[{"dimension": "order_date.year", "operator": "equals", "value": 2024}]
)

# Access as DataFrame
df = result.to_pandas()
print(df.head())

Configuration

Client Initialization

from olytix-core import Olytix CoreClient

# Basic configuration
client = Olytix CoreClient(
url="http://localhost:8000",
api_key="YOUR_API_KEY"
)

# Full configuration
client = Olytix CoreClient(
url="http://localhost:8000",
api_key="YOUR_API_KEY",
timeout=30, # Request timeout in seconds
max_retries=3, # Number of retry attempts
verify_ssl=True, # SSL certificate verification
headers={"X-Custom": "value"} # Additional headers
)

# From environment variables
import os
client = Olytix CoreClient(
url=os.environ["OLYTIX_URL"],
api_key=os.environ["OLYTIX_API_KEY"]
)

Configuration File

Create a ~/.olytix-core/config.yml:

default:
url: http://localhost:8000
api_key: ${OLYTIX_API_KEY}

production:
url: https://olytix-core.example.com
api_key: ${OLYTIX_PROD_API_KEY}
timeout: 60

Use profiles:

from olytix-core import Olytix CoreClient

# Use default profile
client = Olytix CoreClient.from_config()

# Use specific profile
client = Olytix CoreClient.from_config("production")

Querying Data

Basic Query

# Query with metrics and dimensions
result = client.query(
metrics=["total_revenue", "order_count"],
dimensions=["region"]
)

# Access data
print(result.data) # List of dictionaries
print(result.to_pandas()) # pandas DataFrame
print(result.to_arrow()) # PyArrow Table

Filtering

# Single filter
result = client.query(
metrics=["total_revenue"],
dimensions=["region"],
filters=[
{"dimension": "order_date.year", "operator": "equals", "value": 2024}
]
)

# Multiple filters
result = client.query(
metrics=["total_revenue"],
dimensions=["region", "product_category"],
filters=[
{"dimension": "order_date.year", "operator": "equals", "value": 2024},
{"dimension": "region", "operator": "in", "values": ["US-East", "US-West"]},
{"dimension": "order_amount", "operator": "gte", "value": 100}
]
)

Time Dimensions

# Query with time granularity
result = client.query(
metrics=["total_revenue"],
dimensions=["order_date.month", "region"],
time_dimensions=[
{
"dimension": "order_date",
"date_range": ["2024-01-01", "2024-12-31"],
"granularity": "month"
}
]
)

Ordering and Limiting

result = client.query(
metrics=["total_revenue"],
dimensions=["product_category"],
order_by=[
{"field": "total_revenue", "direction": "desc"}
],
limit=10
)

Data Formats

Pandas DataFrame

import pandas as pd

result = client.query(
metrics=["total_revenue", "order_count"],
dimensions=["region", "order_date.month"]
)

df = result.to_pandas()

# Type-aware conversion
df["order_date.month"] = pd.to_datetime(df["order_date.month"])
df["region"] = df["region"].astype("category")

PyArrow Table

import pyarrow as pa

result = client.query(
metrics=["total_revenue"],
dimensions=["region"]
)

table = result.to_arrow()

# Write to Parquet
pa.parquet.write_table(table, "data.parquet")

NumPy Arrays

import numpy as np

result = client.query(
metrics=["total_revenue", "order_count"],
dimensions=["region"]
)

# Get as structured array
df = result.to_pandas()
X = df[["total_revenue", "order_count"]].values

Feature Engineering

Feature Store Integration

from olytix-core import Olytix CoreClient, FeatureSet

client = Olytix CoreClient(url="http://localhost:8000", api_key="YOUR_KEY")

# Define a feature set
feature_set = FeatureSet(
name="customer_features",
entity="customer_id",
features=[
{"metric": "total_revenue", "aggregation": "sum", "window": "90d"},
{"metric": "order_count", "aggregation": "count", "window": "90d"},
{"metric": "avg_order_value", "aggregation": "avg", "window": "90d"},
]
)

# Generate features for training
features = client.get_features(
feature_set=feature_set,
entity_ids=["cust_001", "cust_002", "cust_003"],
as_of_date="2024-01-01"
)

df = features.to_pandas()

Point-in-Time Features

Prevent data leakage with point-in-time correct features:

# Get features as they existed at a specific point in time
features = client.get_features(
metrics=["total_revenue", "order_count"],
dimensions=["customer_id"],
as_of_date="2024-01-01",
lookback_window="90d"
)

Rolling Window Features

# Rolling aggregations
result = client.query(
metrics=[
{"name": "revenue_7d", "expression": "rolling_sum(total_revenue, 7d)"},
{"name": "revenue_30d", "expression": "rolling_sum(total_revenue, 30d)"},
{"name": "orders_7d", "expression": "rolling_count(order_count, 7d)"}
],
dimensions=["customer_id", "order_date"]
)

Machine Learning Workflows

Training Data Preparation

from olytix-core import Olytix CoreClient
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor

client = Olytix CoreClient(url="http://localhost:8000", api_key="YOUR_KEY")

# Get training data
result = client.query(
metrics=["total_revenue", "order_count", "avg_order_value", "return_rate"],
dimensions=["customer_id", "customer_segment", "tenure_months"],
filters=[
{"dimension": "order_date.year", "operator": "lte", "value": 2023}
]
)

df = result.to_pandas()

# Prepare features and target
X = df[["order_count", "avg_order_value", "tenure_months"]]
y = df["total_revenue"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# Train model
model = RandomForestRegressor(n_estimators=100)
model.fit(X_train, y_train)

Batch Inference

# Get current features for all customers
result = client.query(
metrics=["order_count", "avg_order_value", "tenure_months"],
dimensions=["customer_id"]
)

df = result.to_pandas()
X_inference = df[["order_count", "avg_order_value", "tenure_months"]]

# Generate predictions
predictions = model.predict(X_inference)
df["predicted_revenue"] = predictions

Model Monitoring

# Track prediction vs actual
from datetime import datetime, timedelta

def get_model_performance(client, model_id, days=7):
result = client.query(
metrics=["total_revenue"],
dimensions=["customer_id", "order_date"],
filters=[
{
"dimension": "order_date",
"operator": "gte",
"value": (datetime.now() - timedelta(days=days)).strftime("%Y-%m-%d")
}
]
)

df = result.to_pandas()
# Join with predictions and calculate metrics
return df

Jupyter Integration

Magic Commands

# Load Olytix Core magic
%load_ext olytix-core

# Configure connection
%olytix-core_connect http://localhost:8000 --api-key YOUR_KEY

# Query with magic
%%olytix-core_query
metrics: [total_revenue, order_count]
dimensions: [region]
filters:
- dimension: order_date.year
operator: equals
value: 2024

Interactive Exploration

from olytix-core import Olytix CoreClient
from olytix-core.jupyter import CubeExplorer

client = Olytix CoreClient(url="http://localhost:8000", api_key="YOUR_KEY")

# Launch interactive explorer
explorer = CubeExplorer(client, cube="orders")
explorer.show() # Displays interactive widget

Visualization

import matplotlib.pyplot as plt

result = client.query(
metrics=["total_revenue"],
dimensions=["order_date.month"],
order_by=[{"field": "order_date.month", "direction": "asc"}]
)

df = result.to_pandas()

plt.figure(figsize=(12, 6))
plt.plot(df["order_date.month"], df["total_revenue"])
plt.title("Monthly Revenue Trend")
plt.xlabel("Month")
plt.ylabel("Revenue ($)")
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

Caching

Enable Caching

from olytix-core import Olytix CoreClient, DiskCache

# Use disk cache
client = Olytix CoreClient(
url="http://localhost:8000",
api_key="YOUR_KEY",
cache=DiskCache(path="~/.olytix-core/cache", ttl=3600)
)

# First query - hits API
result1 = client.query(metrics=["total_revenue"], dimensions=["region"])

# Second query - served from cache
result2 = client.query(metrics=["total_revenue"], dimensions=["region"])

Cache Control

# Force fresh data
result = client.query(
metrics=["total_revenue"],
dimensions=["region"],
use_cache=False
)

# Clear cache
client.cache.clear()

Async Support

import asyncio
from olytix-core import AsyncOlytix CoreClient

async def main():
client = AsyncOlytix CoreClient(
url="http://localhost:8000",
api_key="YOUR_KEY"
)

async with client:
# Parallel queries
results = await asyncio.gather(
client.query(metrics=["total_revenue"], dimensions=["region"]),
client.query(metrics=["order_count"], dimensions=["product_category"]),
client.query(metrics=["avg_order_value"], dimensions=["customer_segment"])
)

for result in results:
print(result.to_pandas())

asyncio.run(main())

Error Handling

from olytix-core import Olytix CoreClient
from olytix-core.exceptions import (
Olytix CoreError,
AuthenticationError,
RateLimitError,
QueryError
)

client = Olytix CoreClient(url="http://localhost:8000", api_key="YOUR_KEY")

try:
result = client.query(
metrics=["total_revenue"],
dimensions=["region"]
)
except AuthenticationError:
print("Invalid API key")
except RateLimitError as e:
print(f"Rate limited. Retry after {e.retry_after} seconds")
except QueryError as e:
print(f"Query failed: {e.message}")
except Olytix CoreError as e:
print(f"Olytix Core error: {e}")

Best Practices

1. Use Consistent Metrics

Always use Olytix Core metrics instead of calculating in Python:

# Good - uses Olytix Core metric definition
result = client.query(metrics=["avg_order_value"])

# Bad - may differ from official definition
df = client.query(metrics=["total_revenue", "order_count"]).to_pandas()
aov = df["total_revenue"] / df["order_count"]

2. Batch Queries

Combine related queries to reduce API calls:

# Good - single query
result = client.query(
metrics=["total_revenue", "order_count", "avg_order_value"],
dimensions=["region", "product_category"]
)

# Bad - multiple queries
rev = client.query(metrics=["total_revenue"], dimensions=["region"])
cnt = client.query(metrics=["order_count"], dimensions=["region"])

3. Filter at the Source

Apply filters in Olytix Core rather than pandas:

# Good - filter in Olytix Core
result = client.query(
metrics=["total_revenue"],
filters=[{"dimension": "region", "operator": "equals", "value": "US-East"}]
)

# Bad - fetch all, then filter
result = client.query(metrics=["total_revenue"], dimensions=["region"])
df = result.to_pandas()
df = df[df["region"] == "US-East"]

4. Point-in-Time Correctness

Always use as_of_date for training data:

# Good - prevents data leakage
features = client.get_features(
feature_set=customer_features,
as_of_date="2024-01-01"
)

# Bad - may include future data
features = client.query(metrics=["total_revenue"], dimensions=["customer_id"])

Next Steps