📊 SQLStream DuckDB Queries
Use these queries with SQLStream's DuckDB backend to analyze the data directly from GitHub URLs.
Setup: Define Table URLs
# GitHub raw URL base
BASE = 'https://github.com/subhayu99/sqlstream/raw/refs/heads/main/examples/simple.html'
# Table references using SQLStream HTML syntax
customers = f'{BASE}#html:0'
orders = f'{BASE}#html:1'
Query 1: Join Customers with Orders
from sqlstream import query
result = query(customers).sql(f"""
SELECT
c.customer_id,
c.name,
c.email,
c.city,
o.order_id,
o.product,
o.amount,
o.order_date
FROM 'https://github.com/subhayu99/sqlstream/raw/refs/heads/main/examples/simple.html#html:0' c
JOIN 'https://github.com/subhayu99/sqlstream/raw/refs/heads/main/examples/simple.html#html:1' o ON c.customer_id = o.customer_id
ORDER BY c.customer_id, o.order_date
""", backend="duckdb")
for row in result:
print(row)
Query 2: Customer Order Summary
result = query(customers).sql(f"""
SELECT
c.customer_id,
c.name,
c.city,
COUNT(o.order_id) AS total_orders,
SUM(o.amount) AS total_spent,
AVG(o.amount) AS avg_order_value,
MIN(o.order_date) AS first_order,
MAX(o.order_date) AS last_order
FROM 'https://github.com/subhayu99/sqlstream/raw/refs/heads/main/examples/simple.html#html:0' c
LEFT JOIN 'https://github.com/subhayu99/sqlstream/raw/refs/heads/main/examples/simple.html#html:1' o ON c.customer_id = o.customer_id
GROUP BY c.customer_id, c.name, c.city
ORDER BY total_spent DESC
""", backend="duckdb")
Query 3: High-Value Customers (>$200 total)
result = query(customers).sql(f"""
SELECT
c.name,
c.email,
SUM(o.amount) AS total_spent
FROM 'https://github.com/subhayu99/sqlstream/raw/refs/heads/main/examples/simple.html#html:0' c
JOIN 'https://github.com/subhayu99/sqlstream/raw/refs/heads/main/examples/simple.html#html:1' o ON c.customer_id = o.customer_id
GROUP BY c.customer_id, c.name, c.email
HAVING SUM(o.amount) > 200
ORDER BY total_spent DESC
""", backend="duckdb")
Query 4: Orders by City
result = query(orders).sql(f"""
SELECT
c.city,
COUNT(o.order_id) AS order_count,
SUM(o.amount) AS total_revenue,
AVG(o.amount) AS avg_order_value
FROM 'https://github.com/subhayu99/sqlstream/raw/refs/heads/main/examples/simple.html#html:1' o
JOIN 'https://github.com/subhayu99/sqlstream/raw/refs/heads/main/examples/simple.html#html:0' c ON o.customer_id = c.customer_id
GROUP BY c.city
ORDER BY total_revenue DESC
""", backend="duckdb")
Query 5: Customers with Multiple Orders
result = query(customers).sql(f"""
SELECT
c.customer_id,
c.name,
c.email,
COUNT(o.order_id) AS order_count,
STRING_AGG(o.product, ', ') AS products_ordered
FROM 'https://github.com/subhayu99/sqlstream/raw/refs/heads/main/examples/simple.html#html:0' c
JOIN 'https://github.com/subhayu99/sqlstream/raw/refs/heads/main/examples/simple.html#html:1' o ON c.customer_id = o.customer_id
GROUP BY c.customer_id, c.name, c.email
HAVING COUNT(o.order_id) > 1
ORDER BY order_count DESC
""", backend="duckdb")
Query 6: Top Products by Revenue
result = query(orders).sql(f"""
SELECT
o.product,
COUNT(*) AS times_ordered,
SUM(o.amount) AS total_revenue,
AVG(o.amount) AS avg_price
FROM 'https://github.com/subhayu99/sqlstream/raw/refs/heads/main/examples/simple.html#html:1' o
GROUP BY o.product
ORDER BY total_revenue DESC
""", backend="duckdb")
Query 7: Orders Above Average Amount
result = query(orders).sql(f"""
SELECT
o.order_id,
c.name AS customer_name,
o.product,
o.amount,
o.order_date
FROM 'https://github.com/subhayu99/sqlstream/raw/refs/heads/main/examples/simple.html#html:1' o
JOIN 'https://github.com/subhayu99/sqlstream/raw/refs/heads/main/examples/simple.html#html:0' c ON o.customer_id = c.customer_id
WHERE o.amount > (SELECT AVG(amount) FROM 'https://github.com/subhayu99/sqlstream/raw/refs/heads/main/examples/simple.html#html:1')
ORDER BY o.amount DESC
""", backend="duckdb")
Query 8: Customer Order Timeline
result = query(orders).sql(f"""
SELECT
c.name,
o.product,
o.amount,
o.order_date,
ROW_NUMBER() OVER (PARTITION BY c.customer_id ORDER BY o.order_date) AS order_number
FROM 'https://github.com/subhayu99/sqlstream/raw/refs/heads/main/examples/simple.html#html:1' o
JOIN 'https://github.com/subhayu99/sqlstream/raw/refs/heads/main/examples/simple.html#html:0' c ON o.customer_id = c.customer_id
ORDER BY c.name, o.order_date
""", backend="duckdb")