📊 SQLStream DuckDB Queries
Use these queries with SQLStream's DuckDB backend to analyze the data. The queries use GitHub URLs with SQLStream's HTML table syntax.
from sqlstream import query
Setup: Define Table URLs
# GitHub raw URL base
BASE = 'https://github.com/subhayu99/sqlstream/raw/refs/heads/main/examples/complex.html'
# Table references using SQLStream HTML syntax
customers = f'{BASE}#html:0'
categories = f'{BASE}#html:1'
products = f'{BASE}#html:2'
orders = f'{BASE}#html:3'
order_items = f'{BASE}#html:4'
reviews = f'{BASE}#html:5'
Query 1: Complete Order Details with Customer and Product Info
result = query(orders).sql(f"""
SELECT
o.order_id,
o.order_date,
o.status,
c.first_name || ' ' || c.last_name AS customer_name,
c.email,
c.loyalty_tier,
p.product_name,
oi.quantity,
oi.unit_price,
oi.discount_percent,
(oi.quantity * oi.unit_price * (1 - oi.discount_percent/100)) AS line_total,
cat.category_name
FROM 'https://github.com/subhayu99/sqlstream/raw/refs/heads/main/examples/complex.html#html:3' o
JOIN 'https://github.com/subhayu99/sqlstream/raw/refs/heads/main/examples/complex.html#html:0' c ON o.customer_id = c.customer_id
JOIN 'https://github.com/subhayu99/sqlstream/raw/refs/heads/main/examples/complex.html#html:4' oi ON o.order_id = oi.order_id
JOIN 'https://github.com/subhayu99/sqlstream/raw/refs/heads/main/examples/complex.html#html:2' p ON oi.product_id = p.product_id
JOIN 'https://github.com/subhayu99/sqlstream/raw/refs/heads/main/examples/complex.html#html:1' cat ON p.category_id = cat.category_id
ORDER BY o.order_date DESC, o.order_id
""", backend="duckdb")
Query 2: Customer Purchase Summary
result = query(customers).sql(f"""
SELECT
c.customer_id,
c.first_name || ' ' || c.last_name AS customer_name,
c.loyalty_tier,
COUNT(DISTINCT o.order_id) AS total_orders,
SUM(oi.quantity * oi.unit_price * (1 - oi.discount_percent/100)) AS total_spent,
AVG(oi.quantity * oi.unit_price * (1 - oi.discount_percent/100)) AS avg_order_value
FROM 'https://github.com/subhayu99/sqlstream/raw/refs/heads/main/examples/complex.html#html:0' c
LEFT JOIN 'https://github.com/subhayu99/sqlstream/raw/refs/heads/main/examples/complex.html#html:3' o ON c.customer_id = o.customer_id
LEFT JOIN 'https://github.com/subhayu99/sqlstream/raw/refs/heads/main/examples/complex.html#html:4' oi ON o.order_id = oi.order_id
GROUP BY c.customer_id, c.first_name, c.last_name, c.loyalty_tier
ORDER BY total_spent DESC
""", backend="duckdb")
Query 3: Product Performance with Ratings and Sales
result = query(products).sql(f"""
SELECT
p.product_id,
p.product_name,
cat.category_name,
p.price,
p.stock_quantity,
COUNT(DISTINCT oi.order_id) AS times_ordered,
SUM(oi.quantity) AS total_units_sold,
SUM(oi.quantity * oi.unit_price * (1 - oi.discount_percent/100)) AS total_revenue,
AVG(r.rating) AS avg_rating,
COUNT(r.review_id) AS review_count
FROM 'https://github.com/subhayu99/sqlstream/raw/refs/heads/main/examples/complex.html#html:2' p
JOIN 'https://github.com/subhayu99/sqlstream/raw/refs/heads/main/examples/complex.html#html:1' cat ON p.category_id = cat.category_id
LEFT JOIN 'https://github.com/subhayu99/sqlstream/raw/refs/heads/main/examples/complex.html#html:4' oi ON p.product_id = oi.product_id
LEFT JOIN 'https://github.com/subhayu99/sqlstream/raw/refs/heads/main/examples/complex.html#html:5' r ON p.product_id = r.product_id
GROUP BY p.product_id, p.product_name, cat.category_name, p.price, p.stock_quantity
ORDER BY total_revenue DESC
""", backend="duckdb")
Query 4: Category Performance Analysis
result = query(categories).sql(f"""
SELECT
cat.category_name,
COUNT(DISTINCT p.product_id) AS product_count,
COUNT(DISTINCT oi.order_id) AS order_count,
SUM(oi.quantity) AS total_units_sold,
SUM(oi.quantity * oi.unit_price * (1 - oi.discount_percent/100)) AS total_revenue,
AVG(oi.quantity * oi.unit_price * (1 - oi.discount_percent/100)) AS avg_order_value
FROM 'https://github.com/subhayu99/sqlstream/raw/refs/heads/main/examples/complex.html#html:1' cat
JOIN 'https://github.com/subhayu99/sqlstream/raw/refs/heads/main/examples/complex.html#html:2' p ON cat.category_id = p.category_id
LEFT JOIN 'https://github.com/subhayu99/sqlstream/raw/refs/heads/main/examples/complex.html#html:4' oi ON p.product_id = oi.product_id
GROUP BY cat.category_id, cat.category_name
ORDER BY total_revenue DESC
""", backend="duckdb")
Query 5: Customer Segmentation by Purchase Diversity
result = query(customers).sql(f"""
SELECT
c.customer_id,
c.first_name || ' ' || c.last_name AS customer_name,
c.loyalty_tier,
COUNT(DISTINCT cat.category_id) AS categories_purchased_from,
COUNT(DISTINCT p.product_id) AS unique_products_purchased,
SUM(oi.quantity * oi.unit_price * (1 - oi.discount_percent/100)) AS total_spent
FROM 'https://github.com/subhayu99/sqlstream/raw/refs/heads/main/examples/complex.html#html:0' c
JOIN 'https://github.com/subhayu99/sqlstream/raw/refs/heads/main/examples/complex.html#html:3' o ON c.customer_id = o.customer_id
JOIN 'https://github.com/subhayu99/sqlstream/raw/refs/heads/main/examples/complex.html#html:4' oi ON o.order_id = oi.order_id
JOIN 'https://github.com/subhayu99/sqlstream/raw/refs/heads/main/examples/complex.html#html:2' p ON oi.product_id = p.product_id
JOIN 'https://github.com/subhayu99/sqlstream/raw/refs/heads/main/examples/complex.html#html:1' cat ON p.category_id = cat.category_id
GROUP BY c.customer_id, c.first_name, c.last_name, c.loyalty_tier
ORDER BY categories_purchased_from DESC, total_spent DESC
""", backend="duckdb")
Query 6: Monthly Sales Trend
result = query(orders).sql(f"""
SELECT
strftime(o.order_date, '%Y-%m') AS month,
COUNT(DISTINCT o.order_id) AS order_count,
COUNT(DISTINCT o.customer_id) AS unique_customers,
SUM(oi.quantity) AS total_items_sold,
SUM(oi.quantity * oi.unit_price * (1 - oi.discount_percent/100)) AS total_revenue
FROM 'https://github.com/subhayu99/sqlstream/raw/refs/heads/main/examples/complex.html#html:3' o
JOIN 'https://github.com/subhayu99/sqlstream/raw/refs/heads/main/examples/complex.html#html:4' oi ON o.order_id = oi.order_id
GROUP BY strftime(o.order_date, '%Y-%m')
ORDER BY month
""", backend="duckdb")
Query 7: Products Needing Restock
result = query(products).sql(f"""
SELECT
p.product_id,
p.product_name,
cat.category_name,
p.stock_quantity,
COUNT(DISTINCT oi.order_id) AS times_ordered,
SUM(oi.quantity) AS total_units_sold,
CASE
WHEN p.stock_quantity < 30 THEN 'Critical'
WHEN p.stock_quantity < 50 THEN 'Low'
ELSE 'Adequate'
END AS stock_status
FROM 'https://github.com/subhayu99/sqlstream/raw/refs/heads/main/examples/complex.html#html:2' p
JOIN 'https://github.com/subhayu99/sqlstream/raw/refs/heads/main/examples/complex.html#html:1' cat ON p.category_id = cat.category_id
LEFT JOIN 'https://github.com/subhayu99/sqlstream/raw/refs/heads/main/examples/complex.html#html:4' oi ON p.product_id = oi.product_id
GROUP BY p.product_id, p.product_name, cat.category_name, p.stock_quantity
HAVING p.stock_quantity < 50 AND COUNT(oi.order_id) > 0
ORDER BY p.stock_quantity ASC, total_units_sold DESC
""", backend="duckdb")
Query 8: Customer Lifetime Value
result = query(customers).sql(f"""
SELECT
c.customer_id,
c.first_name || ' ' || c.last_name AS customer_name,
c.loyalty_tier,
MIN(o.order_date) AS first_purchase,
MAX(o.order_date) AS last_purchase,
COUNT(DISTINCT o.order_id) AS total_orders,
SUM(oi.quantity * oi.unit_price * (1 - oi.discount_percent/100)) AS lifetime_value,
AVG(r.rating) AS avg_rating_given
FROM 'https://github.com/subhayu99/sqlstream/raw/refs/heads/main/examples/complex.html#html:0' c
LEFT JOIN 'https://github.com/subhayu99/sqlstream/raw/refs/heads/main/examples/complex.html#html:3' o ON c.customer_id = o.customer_id
LEFT JOIN 'https://github.com/subhayu99/sqlstream/raw/refs/heads/main/examples/complex.html#html:4' oi ON o.order_id = oi.order_id
LEFT JOIN 'https://github.com/subhayu99/sqlstream/raw/refs/heads/main/examples/complex.html#html:5' r ON c.customer_id = r.customer_id
GROUP BY c.customer_id, c.first_name, c.last_name, c.loyalty_tier
ORDER BY lifetime_value DESC
""", backend="duckdb")
Query 9: Review Analysis (Verified vs Unverified)
result = query(products).sql(f"""
SELECT
p.product_name,
cat.category_name,
COUNT(CASE WHEN r.verified_purchase = 'Yes' THEN 1 END) AS verified_reviews,
COUNT(CASE WHEN r.verified_purchase = 'No' THEN 1 END) AS unverified_reviews,
AVG(CASE WHEN r.verified_purchase = 'Yes' THEN r.rating END) AS avg_verified_rating,
AVG(CASE WHEN r.verified_purchase = 'No' THEN r.rating END) AS avg_unverified_rating,
AVG(r.rating) AS overall_rating
FROM 'https://github.com/subhayu99/sqlstream/raw/refs/heads/main/examples/complex.html#html:2' p
JOIN 'https://github.com/subhayu99/sqlstream/raw/refs/heads/main/examples/complex.html#html:1' cat ON p.category_id = cat.category_id
JOIN 'https://github.com/subhayu99/sqlstream/raw/refs/heads/main/examples/complex.html#html:5' r ON p.product_id = r.product_id
GROUP BY p.product_id, p.product_name, cat.category_name
HAVING COUNT(r.review_id) >= 2
ORDER BY overall_rating DESC
""", backend="duckdb")
Query 10: Order Status Funnel Analysis
result = query(orders).sql(f"""
SELECT
o.status,
COUNT(DISTINCT o.order_id) AS order_count,
COUNT(DISTINCT o.customer_id) AS unique_customers,
SUM(oi.quantity * oi.unit_price * (1 - oi.discount_percent/100)) AS total_value,
AVG(oi.quantity * oi.unit_price * (1 - oi.discount_percent/100)) AS avg_order_value
FROM 'https://github.com/subhayu99/sqlstream/raw/refs/heads/main/examples/complex.html#html:3' o
JOIN 'https://github.com/subhayu99/sqlstream/raw/refs/heads/main/examples/complex.html#html:4' oi ON o.order_id = oi.order_id
GROUP BY o.status
ORDER BY
CASE o.status
WHEN 'Processing' THEN 1
WHEN 'Shipped' THEN 2
WHEN 'Delivered' THEN 3
END
""", backend="duckdb")