Simple Multi Table Example for SQLStream

Table 1: Customers

customer_id name email city
101 Alice Johnson alice@email.com New York
102 Bob Smith bob@email.com Los Angeles
103 Carol Davis carol@email.com Chicago
104 David Wilson david@email.com Houston
105 Emma Brown emma@email.com Phoenix

Table 2: Orders

order_id customer_id product amount order_date
1001 101 Laptop 1200.00 2024-01-15
1002 102 Mouse 25.50 2024-01-16
1003 101 Keyboard 75.00 2024-01-17
1004 103 Monitor 350.00 2024-01-18
1005 104 Headphones 89.99 2024-01-19
1006 102 Webcam 120.00 2024-01-20
1007 105 Desk Lamp 45.00 2024-01-21

📊 SQLStream DuckDB Queries

Use these queries with SQLStream's DuckDB backend to analyze the data directly from GitHub URLs.

Setup: Define Table URLs
# GitHub raw URL base
BASE = 'https://github.com/subhayu99/sqlstream/raw/refs/heads/main/examples/simple.html'

# Table references using SQLStream HTML syntax
customers = f'{BASE}#html:0'
orders = f'{BASE}#html:1'
Query 1: Join Customers with Orders
from sqlstream import query

result = query(customers).sql(f"""
    SELECT 
        c.customer_id,
        c.name,
        c.email,
        c.city,
        o.order_id,
        o.product,
        o.amount,
        o.order_date
    FROM 'https://github.com/subhayu99/sqlstream/raw/refs/heads/main/examples/simple.html#html:0' c
    JOIN 'https://github.com/subhayu99/sqlstream/raw/refs/heads/main/examples/simple.html#html:1' o ON c.customer_id = o.customer_id
    ORDER BY c.customer_id, o.order_date
""", backend="duckdb")

for row in result:
    print(row)
Query 2: Customer Order Summary
result = query(customers).sql(f"""
    SELECT 
        c.customer_id,
        c.name,
        c.city,
        COUNT(o.order_id) AS total_orders,
        SUM(o.amount) AS total_spent,
        AVG(o.amount) AS avg_order_value,
        MIN(o.order_date) AS first_order,
        MAX(o.order_date) AS last_order
    FROM 'https://github.com/subhayu99/sqlstream/raw/refs/heads/main/examples/simple.html#html:0' c
    LEFT JOIN 'https://github.com/subhayu99/sqlstream/raw/refs/heads/main/examples/simple.html#html:1' o ON c.customer_id = o.customer_id
    GROUP BY c.customer_id, c.name, c.city
    ORDER BY total_spent DESC
""", backend="duckdb")
Query 3: High-Value Customers (>$200 total)
result = query(customers).sql(f"""
    SELECT 
        c.name,
        c.email,
        SUM(o.amount) AS total_spent
    FROM 'https://github.com/subhayu99/sqlstream/raw/refs/heads/main/examples/simple.html#html:0' c
    JOIN 'https://github.com/subhayu99/sqlstream/raw/refs/heads/main/examples/simple.html#html:1' o ON c.customer_id = o.customer_id
    GROUP BY c.customer_id, c.name, c.email
    HAVING SUM(o.amount) > 200
    ORDER BY total_spent DESC
""", backend="duckdb")
Query 4: Orders by City
result = query(orders).sql(f"""
    SELECT 
        c.city,
        COUNT(o.order_id) AS order_count,
        SUM(o.amount) AS total_revenue,
        AVG(o.amount) AS avg_order_value
    FROM 'https://github.com/subhayu99/sqlstream/raw/refs/heads/main/examples/simple.html#html:1' o
    JOIN 'https://github.com/subhayu99/sqlstream/raw/refs/heads/main/examples/simple.html#html:0' c ON o.customer_id = c.customer_id
    GROUP BY c.city
    ORDER BY total_revenue DESC
""", backend="duckdb")
Query 5: Customers with Multiple Orders
result = query(customers).sql(f"""
    SELECT 
        c.customer_id,
        c.name,
        c.email,
        COUNT(o.order_id) AS order_count,
        STRING_AGG(o.product, ', ') AS products_ordered
    FROM 'https://github.com/subhayu99/sqlstream/raw/refs/heads/main/examples/simple.html#html:0' c
    JOIN 'https://github.com/subhayu99/sqlstream/raw/refs/heads/main/examples/simple.html#html:1' o ON c.customer_id = o.customer_id
    GROUP BY c.customer_id, c.name, c.email
    HAVING COUNT(o.order_id) > 1
    ORDER BY order_count DESC
""", backend="duckdb")
Query 6: Top Products by Revenue
result = query(orders).sql(f"""
    SELECT 
        o.product,
        COUNT(*) AS times_ordered,
        SUM(o.amount) AS total_revenue,
        AVG(o.amount) AS avg_price
    FROM 'https://github.com/subhayu99/sqlstream/raw/refs/heads/main/examples/simple.html#html:1' o
    GROUP BY o.product
    ORDER BY total_revenue DESC
""", backend="duckdb")
Query 7: Orders Above Average Amount
result = query(orders).sql(f"""
    SELECT 
        o.order_id,
        c.name AS customer_name,
        o.product,
        o.amount,
        o.order_date
    FROM 'https://github.com/subhayu99/sqlstream/raw/refs/heads/main/examples/simple.html#html:1' o
    JOIN 'https://github.com/subhayu99/sqlstream/raw/refs/heads/main/examples/simple.html#html:0' c ON o.customer_id = c.customer_id
    WHERE o.amount > (SELECT AVG(amount) FROM 'https://github.com/subhayu99/sqlstream/raw/refs/heads/main/examples/simple.html#html:1')
    ORDER BY o.amount DESC
""", backend="duckdb")
Query 8: Customer Order Timeline
result = query(orders).sql(f"""
    SELECT 
        c.name,
        o.product,
        o.amount,
        o.order_date,
        ROW_NUMBER() OVER (PARTITION BY c.customer_id ORDER BY o.order_date) AS order_number
    FROM 'https://github.com/subhayu99/sqlstream/raw/refs/heads/main/examples/simple.html#html:1' o
    JOIN 'https://github.com/subhayu99/sqlstream/raw/refs/heads/main/examples/simple.html#html:0' c ON o.customer_id = c.customer_id
    ORDER BY c.name, o.order_date
""", backend="duckdb")