============================= test session starts ==============================
platform linux -- Python 3.12.3, pytest-9.0.2, pluggy-1.6.0 -- /home/ralbright/projects/hyperstreamdb/venv/bin/python
cachedir: .pytest_cache
hypothesis profile 'default'
rootdir: /home/ralbright/projects/hyperstreamdb
configfile: pyproject.toml
plugins: asyncio-1.3.0, hypothesis-6.151.11, anyio-4.12.1
asyncio: mode=Mode.STRICT, debug=False, asyncio_default_fixture_loop_scope=None, asyncio_default_test_loop_scope=function
collecting ... collected 155 items

tests/benchmarks/hybrid/test_scalar_plus_vector.py::TestHybridQueryBenchmarks::test_filtered_vector_search ERROR [  0%]
tests/benchmarks/hybrid/test_scalar_plus_vector.py::TestHybridQueryBenchmarks::test_multi_filter_vector ERROR [  1%]
tests/benchmarks/hybrid/test_scalar_plus_vector.py::TestHybridQueryBenchmarks::test_comparison_post_vs_pre_filter ERROR [  1%]
tests/benchmarks/index/test_index_build.py::TestIndexBuildBenchmarks::test_build_100k_vectors_latency ERROR [  2%]
tests/benchmarks/table_format/test_vs_iceberg.py::TestTableFormatBenchmarks::test_point_lookup ERROR [  3%]
tests/benchmarks/table_format/test_vs_iceberg.py::TestTableFormatBenchmarks::test_high_selectivity_filter ERROR [  3%]
tests/benchmarks/table_format/test_vs_iceberg.py::TestTableFormatBenchmarks::test_range_query ERROR [  4%]
tests/benchmarks/table_format/test_vs_iceberg.py::TestTableFormatBenchmarks::test_full_scan_baseline ERROR [  5%]
tests/benchmarks/vector_search/test_cold_read_latency.py::TestColdReadLatency::test_cold_search_latency ERROR [  5%]
tests/benchmarks/vector_search/test_parallel_search.py::test_parallel_search_vs_sequential ERROR [  6%]
tests/benchmarks/vector_search/test_parallel_search.py::test_parallel_search_with_filters ERROR [  7%]
tests/benchmarks/vector_search/test_qdrant_direct_comparison.py::TestQdrantComparison::test_ingestion_comparison_small ERROR [  7%]
tests/benchmarks/vector_search/test_qdrant_direct_comparison.py::TestQdrantComparison::test_query_comparison ERROR [  8%]
tests/benchmarks/vector_search/test_qdrant_direct_comparison.py::TestQdrantComparison::test_filtered_search_comparison ERROR [  9%]
tests/benchmarks/vector_search/test_vector_index_verification.py::test_automatic_schema_detection ERROR [  9%]
tests/benchmarks/vector_search/test_vector_index_verification.py::test_explicit_index_configuration_still_works ERROR [ 10%]
tests/benchmarks/vector_search/test_vs_qdrant.py::TestVectorSearchBenchmarks::test_ingest_1m_vectors ERROR [ 10%]
tests/benchmarks/vector_search/test_vs_qdrant.py::TestVectorSearchBenchmarks::test_search_unfiltered_small ERROR [ 11%]
tests/benchmarks/vector_search/test_vs_qdrant.py::TestVectorSearchBenchmarks::test_search_filtered_high_selectivity ERROR [ 12%]
tests/benchmarks/vector_search/test_vs_qdrant.py::TestVectorSearchBenchmarks::test_concurrent_queries ERROR [ 12%]
tests/integration/test_boolean_index.py::test_boolean_indexing PASSED    [ 13%]
tests/integration/test_catalog.py::test_catalog_flow PASSED              [ 14%]
tests/integration/test_concurrent_access.py::test_concurrent_writers PASSED [ 14%]
tests/integration/test_concurrent_access.py::test_concurrent_readers PASSED [ 15%]
tests/integration/test_concurrent_access.py::test_read_write_concurrency PASSED [ 16%]
tests/integration/test_concurrent_access.py::test_concurrent_compaction PASSED [ 16%]
tests/integration/test_concurrent_access.py::test_lock_contention PASSED [ 17%]
tests/integration/test_error_handling.py::test_invalid_uri PASSED        [ 18%]
tests/integration/test_error_handling.py::test_nonexistent_table_read PASSED [ 18%]
tests/integration/test_error_handling.py::test_schema_mismatch_write PASSED [ 19%]
tests/integration/test_error_handling.py::test_empty_batch_write PASSED  [ 20%]
tests/integration/test_error_handling.py::test_null_values_handling PASSED [ 20%]
tests/integration/test_error_handling.py::test_large_string_values PASSED [ 21%]
tests/integration/test_error_handling.py::test_invalid_filter_expression PASSED [ 21%]
tests/integration/test_error_handling.py::test_nonexistent_column_filter PASSED [ 22%]
tests/integration/test_error_handling.py::test_corrupted_data_handling PASSED [ 23%]
tests/integration/test_error_handling.py::test_concurrent_schema_changes PASSED [ 23%]
tests/integration/test_error_handling.py::test_resource_limits PASSED    [ 24%]
tests/integration/test_error_handling.py::test_special_characters_in_data PASSED [ 25%]
tests/integration/test_glue_catalog.py::test_glue_catalog_basic SKIPPED  [ 25%]
tests/integration/test_glue_catalog.py::test_glue_catalog_with_account_id SKIPPED [ 26%]
tests/integration/test_hnsw_ivf.py::test_hnsw_ivf_integration PASSED     [ 27%]
tests/integration/test_merge.py::test_merge_pruning FAILED               [ 27%]
tests/integration/test_nyc_taxi.py::test_nyc_taxi_ingest PASSED          [ 28%]
tests/integration/test_nyc_taxi.py::test_nyc_taxi_query PASSED           [ 29%]
tests/integration/test_nyc_taxi.py::test_nyc_taxi_compaction PASSED      [ 29%]
tests/integration/test_profiling_micro.py::test_profiling PASSED         [ 30%]
tests/integration/test_query_planning.py::test_query_planning FAILED     [ 30%]
tests/integration/test_rest_catalog.py::test_rest_catalog_basic SKIPPED  [ 31%]
tests/integration/test_rest_catalog.py::test_rest_catalog_with_prefix PASSED [ 32%]
tests/integration/test_schema_evolution.py::TestSchemaEvolution::test_add_column PASSED [ 32%]
tests/integration/test_schema_evolution.py::TestSchemaEvolution::test_add_multiple_columns PASSED [ 33%]
tests/integration/test_schema_evolution.py::TestSchemaEvolution::test_drop_column_projection PASSED [ 34%]
tests/integration/test_schema_evolution.py::TestSchemaEvolution::test_type_widening PASSED [ 34%]
tests/integration/test_schema_evolution.py::TestSchemaEvolution::test_type_narrowing_error PASSED [ 35%]
tests/integration/test_schema_evolution.py::TestSchemaEvolution::test_nullable_to_required PASSED [ 36%]
tests/integration/test_schema_evolution.py::TestSchemaEvolution::test_required_to_nullable FAILED [ 36%]
tests/integration/test_schema_evolution.py::TestSchemaEvolution::test_schema_merge_on_read PASSED [ 37%]
tests/integration/test_selective_indexing.py::test_selective_indexing FAILED [ 38%]
tests/integration/test_sql.py::test_sql_basic_query PASSED               [ 38%]
tests/integration/test_vector_search.py::test_vector_search_flow PASSED  [ 39%]
tests/integration/test_views.py::test_view_creation PASSED               [ 40%]
tests/integration/test_wal_compaction.py::test_wal_compaction FAILED     [ 40%]
tests/integration/test_wal_durability.py::test_wal_durability FAILED     [ 41%]
tests/integration/test_wikipedia.py::test_wikipedia_hybrid_queries PASSED [ 41%]
tests/integration/test_write_buffer.py::test_write_buffer_and_index PASSED [ 42%]
tests/performance/test_benchmarks.py::TestIngestThroughput::test_ingest_throughput_in_memory PASSED [ 43%]
tests/performance/test_benchmarks.py::TestIngestThroughput::test_ingest_throughput_out_of_memory PASSED [ 43%]
tests/performance/test_benchmarks.py::TestQueryLatency::test_query_latency_indexed_in_memory PASSED [ 44%]
tests/performance/test_benchmarks.py::TestQueryLatency::test_query_latency_full_scan_out_of_memory PASSED [ 45%]
tests/performance/test_benchmarks.py::TestCompactionSpeed::test_compaction_speed_in_memory PASSED [ 45%]
tests/performance/test_benchmarks.py::TestCompactionSpeed::test_compaction_speed_out_of_memory PASSED [ 46%]
tests/performance/test_benchmarks.py::TestVectorSearchLatency::test_vector_search_latency_in_memory PASSED [ 47%]
tests/performance/test_benchmarks.py::TestVectorSearchLatency::test_vector_search_latency_out_of_memory PASSED [ 47%]
tests/performance/test_benchmarks.py::TestMemoryUsage::test_memory_usage_write_in_memory PASSED [ 48%]
tests/performance/test_benchmarks.py::TestMemoryUsage::test_memory_usage_read_out_of_memory PASSED [ 49%]
tests/performance/test_gpu_batch_benchmarks.py::TestGPUBatchPerformance::test_l2_batch_100k_vectors FAILED [ 49%]
tests/performance/test_gpu_batch_benchmarks.py::TestGPUBatchPerformance::test_cosine_batch_100k_vectors FAILED [ 50%]
tests/performance/test_gpu_batch_benchmarks.py::TestGPUBatchPerformance::test_inner_product_batch_100k_vectors FAILED [ 50%]
tests/performance/test_gpu_batch_benchmarks.py::TestGPUBatchPerformance::test_varying_vector_sizes FAILED [ 51%]
tests/performance/test_gpu_batch_benchmarks.py::TestGPUBatchPerformance::test_varying_dimensions FAILED [ 52%]
tests/test_binary_vector_properties.py::test_binary_hamming_distance_correctness PASSED [ 52%]
tests/test_binary_vector_properties.py::test_binary_hamming_distance_identical_vectors PASSED [ 53%]
tests/test_binary_vector_properties.py::test_binary_hamming_distance_symmetry PASSED [ 54%]
tests/test_binary_vector_properties.py::test_binary_vector_auto_packing_hamming PASSED [ 54%]
tests/test_binary_vector_properties.py::test_binary_vector_auto_packing_jaccard PASSED [ 55%]
tests/test_binary_vector_properties.py::test_binary_vector_auto_packing_rejects_non_binary PASSED [ 56%]
tests/test_binary_vector_properties.py::test_binary_jaccard_distance_correctness PASSED [ 56%]
tests/test_binary_vector_properties.py::test_binary_jaccard_distance_identical PASSED [ 57%]
tests/test_binary_vector_properties.py::test_binary_jaccard_distance_all_zeros PASSED [ 58%]
tests/test_binary_vector_properties.py::test_binary_vector_dimension_mismatch PASSED [ 58%]
tests/test_catalog_factory.py::test_create_catalog_direct PASSED         [ 59%]
tests/test_catalog_factory.py::test_create_catalog_from_config PASSED    [ 60%]
tests/test_connector_apis.py::test_connector_apis FAILED                 [ 60%]
tests/test_connector_integration.py::test_connector_apis PASSED          [ 61%]
tests/test_context_backend_property.py::test_context_backend_property FAILED [ 61%]
tests/test_context_backend_property.py::test_auto_detect_backend_property PASSED [ 62%]
tests/test_context_backend_property.py::test_backend_case_insensitive_property PASSED [ 63%]
tests/test_cross_engine_compat.py::TestSparkCompatibility::test_spark_read_basic SKIPPED [ 63%]
tests/test_cross_engine_compat.py::TestSparkCompatibility::test_spark_read_v3_metadata SKIPPED [ 64%]
tests/test_cross_engine_compat.py::TestTrinoCompatibility::test_trino_read_basic SKIPPED [ 65%]
tests/test_cross_engine_compat.py::TestV2Features::test_sort_order_metadata FAILED [ 65%]
tests/test_cross_engine_compat.py::TestV2Features::test_partition_evolution_metadata PASSED [ 66%]
tests/test_cross_engine_compat.py::TestV2Features::test_ndv_statistics PASSED [ 67%]
tests/test_cross_engine_compat.py::TestV3Features::test_row_lineage_columns PASSED [ 67%]
tests/test_cross_engine_compat.py::TestV3Features::test_default_values_schema PASSED [ 68%]
tests/test_default_config.py::test_load_default_catalog PASSED           [ 69%]
tests/test_distance_properties.py::test_l2_distance_correctness PASSED   [ 69%]
tests/test_distance_properties.py::test_cosine_distance_correctness PASSED [ 70%]
tests/test_distance_properties.py::test_inner_product_correctness PASSED [ 70%]
tests/test_distance_properties.py::test_l1_distance_correctness PASSED   [ 71%]
tests/test_distance_properties.py::test_hamming_distance_correctness PASSED [ 72%]
tests/test_distance_properties.py::test_jaccard_distance_correctness PASSED [ 72%]
tests/test_distance_properties.py::test_dimension_mismatch_raises_error PASSED [ 73%]
tests/test_distance_properties.py::test_nan_values_raise_error PASSED    [ 74%]
tests/test_distance_properties.py::test_inf_values_raise_error PASSED    [ 74%]
tests/test_distance_properties.py::test_accepts_numpy_arrays PASSED      [ 75%]
tests/test_distance_properties.py::test_batch_operation_shape_correctness PASSED [ 76%]
tests/test_distance_properties.py::test_batch_operation_all_metrics PASSED [ 76%]
tests/test_distance_properties.py::test_batch_matches_single_pair PASSED [ 77%]
tests/test_distance_properties.py::test_sparse_l2_equivalence PASSED     [ 78%]
tests/test_distance_properties.py::test_sparse_cosine_equivalence PASSED [ 78%]
tests/test_distance_properties.py::test_sparse_inner_product_equivalence PASSED [ 79%]
tests/test_distance_properties.py::test_sparse_vector_unsorted_indices_error PASSED [ 80%]
tests/test_distance_properties.py::test_sparse_vector_out_of_bounds_error PASSED [ 80%]
tests/test_distance_properties.py::test_sparse_vector_length_mismatch_error PASSED [ 81%]
tests/test_distance_properties.py::test_sparse_vector_nan_values_error PASSED [ 81%]
tests/test_distance_properties.py::test_sparse_vector_dimension_mismatch_error PASSED [ 82%]
tests/test_exports.py::test_imports PASSED                               [ 83%]
tests/test_hyperstream_cuda.py::test_cuda_search PASSED                  [ 83%]
tests/test_hyperstream_mps.py::test_mps_search PASSED                    [ 84%]
tests/test_ingestion_optimizations.py::test_ingestion_optimizations PASSED [ 85%]
tests/test_mps_gpu.py::test_mps_gpu FAILED                               [ 85%]
tests/test_per_column_acceleration.py::test_cascading_device_assignment PASSED [ 86%]
tests/test_pgvector_python_sql.py::TestPgVectorCompatibility::test_large_vector_string PASSED [ 87%]
tests/test_pgvector_python_sql.py::TestPgVectorCompatibility::test_mixed_aggregations PASSED [ 87%]
tests/test_pgvector_python_sql.py::TestPgVectorCompatibility::test_numpy_scalar_interpolation PASSED [ 88%]
tests/test_pgvector_python_sql.py::TestPgVectorCompatibility::test_pgvector_cast_syntax PASSED [ 89%]
tests/test_pgvector_python_sql.py::TestPgVectorCompatibility::test_pgvector_operator_l2 PASSED [ 89%]
tests/test_pgvector_python_sql.py::TestPgVectorCompatibility::test_vector_avg_centroid PASSED [ 90%]
tests/test_pyarrow_schema_regression.py::test_pyarrow_schema_consistency PASSED [ 90%]
tests/test_python_gpu_context.py::test_auto_detect PASSED                [ 91%]
tests/test_python_gpu_context.py::test_cpu_backend_creation FAILED       [ 92%]
tests/test_python_gpu_context.py::test_cpu_backend_with_device_id FAILED [ 92%]
tests/test_python_gpu_context.py::test_backend_property PASSED           [ 93%]
tests/test_python_gpu_context.py::test_device_id_property FAILED         [ 94%]
tests/test_python_gpu_context.py::test_list_available_backends PASSED    [ 94%]
tests/test_python_gpu_context.py::test_unavailable_backend_error FAILED  [ 95%]
tests/test_python_gpu_context.py::test_invalid_backend_error FAILED      [ 96%]
tests/test_python_gpu_context.py::test_get_stats PASSED                  [ 96%]
tests/test_python_gpu_context.py::test_reset_stats PASSED                [ 97%]
tests/test_python_gpu_context.py::test_repr FAILED                       [ 98%]
tests/test_python_gpu_context.py::test_case_insensitive_backend PASSED   [ 98%]
tests/test_qdrant_install.py::test_qdrant_install_and_query PASSED       [ 99%]
tests/test_splits.py::test_read_split_with_projection FAILED             [100%]

==================================== ERRORS ====================================
___ ERROR at setup of TestHybridQueryBenchmarks.test_filtered_vector_search ____

    @pytest.fixture(scope="module")
    def minio_manager():
        """Setup MinIO for all tests in this module."""
>       minio = setup_minio_for_benchmarks(bucket_name="hybrid-benchmarks")
                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

tests/benchmarks/hybrid/test_scalar_plus_vector.py:25: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
tests/benchmarks/common/minio_setup.py:162: in setup_minio_for_benchmarks
    minio.create_bucket(bucket_name)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = <minio_setup.MinIOManager object at 0x73029b4bd640>
bucket_name = 'hybrid-benchmarks'

    def create_bucket(self, bucket_name: str):
        """Create a bucket in MinIO."""
>       import boto3
E       ModuleNotFoundError: No module named 'boto3'

tests/benchmarks/common/minio_setup.py:135: ModuleNotFoundError
---------------------------- Captured stdout setup -----------------------------
Setting up MinIO for benchmarks...
Started MinIO container: hyperstreamdb-benchmark-minio
✓ MinIO started at http://localhost:9010
  Access key: minioadmin
  Secret key: minioadmin
_____ ERROR at setup of TestHybridQueryBenchmarks.test_multi_filter_vector _____

    @pytest.fixture(scope="module")
    def minio_manager():
        """Setup MinIO for all tests in this module."""
>       minio = setup_minio_for_benchmarks(bucket_name="hybrid-benchmarks")
                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

tests/benchmarks/hybrid/test_scalar_plus_vector.py:25: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
tests/benchmarks/common/minio_setup.py:162: in setup_minio_for_benchmarks
    minio.create_bucket(bucket_name)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = <minio_setup.MinIOManager object at 0x73029b4bd640>
bucket_name = 'hybrid-benchmarks'

    def create_bucket(self, bucket_name: str):
        """Create a bucket in MinIO."""
>       import boto3
E       ModuleNotFoundError: No module named 'boto3'

tests/benchmarks/common/minio_setup.py:135: ModuleNotFoundError
_ ERROR at setup of TestHybridQueryBenchmarks.test_comparison_post_vs_pre_filter _

    @pytest.fixture(scope="module")
    def minio_manager():
        """Setup MinIO for all tests in this module."""
>       minio = setup_minio_for_benchmarks(bucket_name="hybrid-benchmarks")
                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

tests/benchmarks/hybrid/test_scalar_plus_vector.py:25: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
tests/benchmarks/common/minio_setup.py:162: in setup_minio_for_benchmarks
    minio.create_bucket(bucket_name)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = <minio_setup.MinIOManager object at 0x73029b4bd640>
bucket_name = 'hybrid-benchmarks'

    def create_bucket(self, bucket_name: str):
        """Create a bucket in MinIO."""
>       import boto3
E       ModuleNotFoundError: No module named 'boto3'

tests/benchmarks/common/minio_setup.py:135: ModuleNotFoundError
__ ERROR at setup of TestIndexBuildBenchmarks.test_build_100k_vectors_latency __

    @pytest.fixture(scope="module")
    def minio_manager():
        """Setup MinIO for all tests in this module."""
>       minio = setup_minio_for_benchmarks(bucket_name="index-benchmarks")
                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

tests/benchmarks/index/test_index_build.py:26: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
tests/benchmarks/common/minio_setup.py:162: in setup_minio_for_benchmarks
    minio.create_bucket(bucket_name)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = <minio_setup.MinIOManager object at 0x73029b3279e0>
bucket_name = 'index-benchmarks'

    def create_bucket(self, bucket_name: str):
        """Create a bucket in MinIO."""
>       import boto3
E       ModuleNotFoundError: No module named 'boto3'

tests/benchmarks/common/minio_setup.py:135: ModuleNotFoundError
---------------------------- Captured stdout setup -----------------------------
Setting up MinIO for benchmarks...
Started MinIO container: hyperstreamdb-benchmark-minio
✓ MinIO started at http://localhost:9010
  Access key: minioadmin
  Secret key: minioadmin
________ ERROR at setup of TestTableFormatBenchmarks.test_point_lookup _________

    @pytest.fixture(scope="module")
    def minio_manager():
        """Setup MinIO for all tests in this module."""
>       minio = setup_minio_for_benchmarks(bucket_name="table-benchmarks")
                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

tests/benchmarks/table_format/test_vs_iceberg.py:23: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
tests/benchmarks/common/minio_setup.py:162: in setup_minio_for_benchmarks
    minio.create_bucket(bucket_name)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = <minio_setup.MinIOManager object at 0x73029b327d70>
bucket_name = 'table-benchmarks'

    def create_bucket(self, bucket_name: str):
        """Create a bucket in MinIO."""
>       import boto3
E       ModuleNotFoundError: No module named 'boto3'

tests/benchmarks/common/minio_setup.py:135: ModuleNotFoundError
---------------------------- Captured stdout setup -----------------------------
Setting up MinIO for benchmarks...
Started MinIO container: hyperstreamdb-benchmark-minio
✓ MinIO started at http://localhost:9010
  Access key: minioadmin
  Secret key: minioadmin
___ ERROR at setup of TestTableFormatBenchmarks.test_high_selectivity_filter ___

    @pytest.fixture(scope="module")
    def minio_manager():
        """Setup MinIO for all tests in this module."""
>       minio = setup_minio_for_benchmarks(bucket_name="table-benchmarks")
                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

tests/benchmarks/table_format/test_vs_iceberg.py:23: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
tests/benchmarks/common/minio_setup.py:162: in setup_minio_for_benchmarks
    minio.create_bucket(bucket_name)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = <minio_setup.MinIOManager object at 0x73029b327d70>
bucket_name = 'table-benchmarks'

    def create_bucket(self, bucket_name: str):
        """Create a bucket in MinIO."""
>       import boto3
E       ModuleNotFoundError: No module named 'boto3'

tests/benchmarks/common/minio_setup.py:135: ModuleNotFoundError
_________ ERROR at setup of TestTableFormatBenchmarks.test_range_query _________

    @pytest.fixture(scope="module")
    def minio_manager():
        """Setup MinIO for all tests in this module."""
>       minio = setup_minio_for_benchmarks(bucket_name="table-benchmarks")
                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

tests/benchmarks/table_format/test_vs_iceberg.py:23: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
tests/benchmarks/common/minio_setup.py:162: in setup_minio_for_benchmarks
    minio.create_bucket(bucket_name)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = <minio_setup.MinIOManager object at 0x73029b327d70>
bucket_name = 'table-benchmarks'

    def create_bucket(self, bucket_name: str):
        """Create a bucket in MinIO."""
>       import boto3
E       ModuleNotFoundError: No module named 'boto3'

tests/benchmarks/common/minio_setup.py:135: ModuleNotFoundError
_____ ERROR at setup of TestTableFormatBenchmarks.test_full_scan_baseline ______

    @pytest.fixture(scope="module")
    def minio_manager():
        """Setup MinIO for all tests in this module."""
>       minio = setup_minio_for_benchmarks(bucket_name="table-benchmarks")
                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

tests/benchmarks/table_format/test_vs_iceberg.py:23: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
tests/benchmarks/common/minio_setup.py:162: in setup_minio_for_benchmarks
    minio.create_bucket(bucket_name)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = <minio_setup.MinIOManager object at 0x73029b327d70>
bucket_name = 'table-benchmarks'

    def create_bucket(self, bucket_name: str):
        """Create a bucket in MinIO."""
>       import boto3
E       ModuleNotFoundError: No module named 'boto3'

tests/benchmarks/common/minio_setup.py:135: ModuleNotFoundError
________ ERROR at setup of TestColdReadLatency.test_cold_search_latency ________

    @pytest.fixture(scope="module")
    def minio_setup():
>       minio = setup_minio_for_benchmarks()
                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^

tests/benchmarks/vector_search/test_cold_read_latency.py:25: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
tests/benchmarks/common/minio_setup.py:162: in setup_minio_for_benchmarks
    minio.create_bucket(bucket_name)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = <minio_setup.MinIOManager object at 0x73029b327b90>
bucket_name = 'hyperstreamdb-benchmarks'

    def create_bucket(self, bucket_name: str):
        """Create a bucket in MinIO."""
>       import boto3
E       ModuleNotFoundError: No module named 'boto3'

tests/benchmarks/common/minio_setup.py:135: ModuleNotFoundError
---------------------------- Captured stdout setup -----------------------------
Setting up MinIO for benchmarks...
Started MinIO container: hyperstreamdb-benchmark-minio
✓ MinIO started at http://localhost:9010
  Access key: minioadmin
  Secret key: minioadmin
_____________ ERROR at setup of test_parallel_search_vs_sequential _____________

    @pytest.fixture(scope="module")
    def minio_manager():
        """Setup MinIO for all tests in this module."""
>       minio = setup_minio_for_benchmarks(bucket_name="vector-benchmarks")
                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

tests/benchmarks/vector_search/test_parallel_search.py:27: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
tests/benchmarks/common/minio_setup.py:162: in setup_minio_for_benchmarks
    minio.create_bucket(bucket_name)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = <minio_setup.MinIOManager object at 0x73029b327230>
bucket_name = 'vector-benchmarks'

    def create_bucket(self, bucket_name: str):
        """Create a bucket in MinIO."""
>       import boto3
E       ModuleNotFoundError: No module named 'boto3'

tests/benchmarks/common/minio_setup.py:135: ModuleNotFoundError
---------------------------- Captured stdout setup -----------------------------
Setting up MinIO for benchmarks...
Started MinIO container: hyperstreamdb-benchmark-minio
✓ MinIO started at http://localhost:9010
  Access key: minioadmin
  Secret key: minioadmin
_____________ ERROR at setup of test_parallel_search_with_filters ______________

    @pytest.fixture(scope="module")
    def minio_manager():
        """Setup MinIO for all tests in this module."""
>       minio = setup_minio_for_benchmarks(bucket_name="vector-benchmarks")
                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

tests/benchmarks/vector_search/test_parallel_search.py:27: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
tests/benchmarks/common/minio_setup.py:162: in setup_minio_for_benchmarks
    minio.create_bucket(bucket_name)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = <minio_setup.MinIOManager object at 0x73029b327230>
bucket_name = 'vector-benchmarks'

    def create_bucket(self, bucket_name: str):
        """Create a bucket in MinIO."""
>       import boto3
E       ModuleNotFoundError: No module named 'boto3'

tests/benchmarks/common/minio_setup.py:135: ModuleNotFoundError
____ ERROR at setup of TestQdrantComparison.test_ingestion_comparison_small ____

    @pytest.fixture(scope="module")
    def minio_manager():
        """Setup MinIO for HyperStreamDB tests."""
>       minio = setup_minio_for_benchmarks(bucket_name="qdrant-comparison")
                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

tests/benchmarks/vector_search/test_qdrant_direct_comparison.py:40: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
tests/benchmarks/common/minio_setup.py:162: in setup_minio_for_benchmarks
    minio.create_bucket(bucket_name)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = <minio_setup.MinIOManager object at 0x73029e1b44a0>
bucket_name = 'qdrant-comparison'

    def create_bucket(self, bucket_name: str):
        """Create a bucket in MinIO."""
>       import boto3
E       ModuleNotFoundError: No module named 'boto3'

tests/benchmarks/common/minio_setup.py:135: ModuleNotFoundError
---------------------------- Captured stdout setup -----------------------------
Setting up MinIO for benchmarks...
Started MinIO container: hyperstreamdb-benchmark-minio
✓ MinIO started at http://localhost:9010
  Access key: minioadmin
  Secret key: minioadmin
_________ ERROR at setup of TestQdrantComparison.test_query_comparison _________

    @pytest.fixture(scope="module")
    def minio_manager():
        """Setup MinIO for HyperStreamDB tests."""
>       minio = setup_minio_for_benchmarks(bucket_name="qdrant-comparison")
                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

tests/benchmarks/vector_search/test_qdrant_direct_comparison.py:40: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
tests/benchmarks/common/minio_setup.py:162: in setup_minio_for_benchmarks
    minio.create_bucket(bucket_name)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = <minio_setup.MinIOManager object at 0x73029e1b44a0>
bucket_name = 'qdrant-comparison'

    def create_bucket(self, bucket_name: str):
        """Create a bucket in MinIO."""
>       import boto3
E       ModuleNotFoundError: No module named 'boto3'

tests/benchmarks/common/minio_setup.py:135: ModuleNotFoundError
____ ERROR at setup of TestQdrantComparison.test_filtered_search_comparison ____

    @pytest.fixture(scope="module")
    def minio_manager():
        """Setup MinIO for HyperStreamDB tests."""
>       minio = setup_minio_for_benchmarks(bucket_name="qdrant-comparison")
                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

tests/benchmarks/vector_search/test_qdrant_direct_comparison.py:40: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
tests/benchmarks/common/minio_setup.py:162: in setup_minio_for_benchmarks
    minio.create_bucket(bucket_name)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = <minio_setup.MinIOManager object at 0x73029e1b44a0>
bucket_name = 'qdrant-comparison'

    def create_bucket(self, bucket_name: str):
        """Create a bucket in MinIO."""
>       import boto3
E       ModuleNotFoundError: No module named 'boto3'

tests/benchmarks/common/minio_setup.py:135: ModuleNotFoundError
______________ ERROR at setup of test_automatic_schema_detection _______________

    @pytest.fixture(scope="module")
    def minio_manager():
        """Setup MinIO for tests."""
>       minio = setup_minio_for_benchmarks(bucket_name="index-verification")
                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

tests/benchmarks/vector_search/test_vector_index_verification.py:27: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
tests/benchmarks/common/minio_setup.py:162: in setup_minio_for_benchmarks
    minio.create_bucket(bucket_name)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = <minio_setup.MinIOManager object at 0x73029b1e6e40>
bucket_name = 'index-verification'

    def create_bucket(self, bucket_name: str):
        """Create a bucket in MinIO."""
>       import boto3
E       ModuleNotFoundError: No module named 'boto3'

tests/benchmarks/common/minio_setup.py:135: ModuleNotFoundError
---------------------------- Captured stdout setup -----------------------------
Setting up MinIO for benchmarks...
Started MinIO container: hyperstreamdb-benchmark-minio
✓ MinIO started at http://localhost:9010
  Access key: minioadmin
  Secret key: minioadmin
_______ ERROR at setup of test_explicit_index_configuration_still_works ________

    @pytest.fixture(scope="module")
    def minio_manager():
        """Setup MinIO for tests."""
>       minio = setup_minio_for_benchmarks(bucket_name="index-verification")
                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

tests/benchmarks/vector_search/test_vector_index_verification.py:27: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
tests/benchmarks/common/minio_setup.py:162: in setup_minio_for_benchmarks
    minio.create_bucket(bucket_name)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = <minio_setup.MinIOManager object at 0x73029b1e6e40>
bucket_name = 'index-verification'

    def create_bucket(self, bucket_name: str):
        """Create a bucket in MinIO."""
>       import boto3
E       ModuleNotFoundError: No module named 'boto3'

tests/benchmarks/common/minio_setup.py:135: ModuleNotFoundError
_____ ERROR at setup of TestVectorSearchBenchmarks.test_ingest_1m_vectors ______

    @pytest.fixture(scope="module")
    def minio_manager():
        """Setup MinIO for all tests in this module."""
>       minio = setup_minio_for_benchmarks(bucket_name="vector-benchmarks")
                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

tests/benchmarks/vector_search/test_vs_qdrant.py:25: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
tests/benchmarks/common/minio_setup.py:162: in setup_minio_for_benchmarks
    minio.create_bucket(bucket_name)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = <minio_setup.MinIOManager object at 0x73029b1d1790>
bucket_name = 'vector-benchmarks'

    def create_bucket(self, bucket_name: str):
        """Create a bucket in MinIO."""
>       import boto3
E       ModuleNotFoundError: No module named 'boto3'

tests/benchmarks/common/minio_setup.py:135: ModuleNotFoundError
---------------------------- Captured stdout setup -----------------------------
Setting up MinIO for benchmarks...
Started MinIO container: hyperstreamdb-benchmark-minio
✓ MinIO started at http://localhost:9010
  Access key: minioadmin
  Secret key: minioadmin
__ ERROR at setup of TestVectorSearchBenchmarks.test_search_unfiltered_small ___

    @pytest.fixture(scope="module")
    def minio_manager():
        """Setup MinIO for all tests in this module."""
>       minio = setup_minio_for_benchmarks(bucket_name="vector-benchmarks")
                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

tests/benchmarks/vector_search/test_vs_qdrant.py:25: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
tests/benchmarks/common/minio_setup.py:162: in setup_minio_for_benchmarks
    minio.create_bucket(bucket_name)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = <minio_setup.MinIOManager object at 0x73029b1d1790>
bucket_name = 'vector-benchmarks'

    def create_bucket(self, bucket_name: str):
        """Create a bucket in MinIO."""
>       import boto3
E       ModuleNotFoundError: No module named 'boto3'

tests/benchmarks/common/minio_setup.py:135: ModuleNotFoundError
_ ERROR at setup of TestVectorSearchBenchmarks.test_search_filtered_high_selectivity _

    @pytest.fixture(scope="module")
    def minio_manager():
        """Setup MinIO for all tests in this module."""
>       minio = setup_minio_for_benchmarks(bucket_name="vector-benchmarks")
                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

tests/benchmarks/vector_search/test_vs_qdrant.py:25: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
tests/benchmarks/common/minio_setup.py:162: in setup_minio_for_benchmarks
    minio.create_bucket(bucket_name)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = <minio_setup.MinIOManager object at 0x73029b1d1790>
bucket_name = 'vector-benchmarks'

    def create_bucket(self, bucket_name: str):
        """Create a bucket in MinIO."""
>       import boto3
E       ModuleNotFoundError: No module named 'boto3'

tests/benchmarks/common/minio_setup.py:135: ModuleNotFoundError
_____ ERROR at setup of TestVectorSearchBenchmarks.test_concurrent_queries _____

    @pytest.fixture(scope="module")
    def minio_manager():
        """Setup MinIO for all tests in this module."""
>       minio = setup_minio_for_benchmarks(bucket_name="vector-benchmarks")
                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

tests/benchmarks/vector_search/test_vs_qdrant.py:25: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
tests/benchmarks/common/minio_setup.py:162: in setup_minio_for_benchmarks
    minio.create_bucket(bucket_name)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = <minio_setup.MinIOManager object at 0x73029b1d1790>
bucket_name = 'vector-benchmarks'

    def create_bucket(self, bucket_name: str):
        """Create a bucket in MinIO."""
>       import boto3
E       ModuleNotFoundError: No module named 'boto3'

tests/benchmarks/common/minio_setup.py:135: ModuleNotFoundError
=================================== FAILURES ===================================
______________________________ test_merge_pruning ______________________________

    def test_merge_pruning():
        base_dir = "/tmp/test_merge"
        if os.path.exists(base_dir):
            shutil.rmtree(base_dir)
        os.makedirs(base_dir)
    
        uri = f"file://{base_dir}"
        table = hdb.Table(uri)
        table.add_index_columns(["id", "val"])
    
        # 1. Write Segment A (IDs 0-9)
        df_a = pd.DataFrame({
            "id": range(0, 10),
            "val": range(100, 110)
        })
        # We must explicitly cast to int32 because our Rust writer expects Int32 for the index implementation
        df_a["id"] = df_a["id"].astype("int32")
        table.write_pandas(df_a)
        print("Written Segment A")
    
        # 2. Write Segment B (IDs 10-19)
        df_b = pd.DataFrame({
            "id": range(10, 20),
            "val": range(200, 210)
        })
        df_b["id"] = df_b["id"].astype("int32")
        table.write_pandas(df_b)
        print("Written Segment B")
    
        # 3. Write Segment C (IDs 20-29)
        df_c = pd.DataFrame({
            "id": range(20, 30),
            "val": range(300, 310)
        })
        df_c["id"] = df_c["id"].astype("int32")
        table.write_pandas(df_c)
        print("Written Segment C")
    
        # Verify files exist
        # Verify files exist
        files = glob.glob(f"{base_dir}/*.inv.parquet")
        print(f"Index files found: {len(files)}")
>       assert len(files) >= 3, "Should have at least 3 inverted index files"
E       AssertionError: Should have at least 3 inverted index files
E       assert 0 >= 3
E        +  where 0 = len([])

tests/integration/test_merge.py:50: AssertionError
----------------------------- Captured stdout call -----------------------------
Written data to /tmp/test_merge/b3194d6a-b608-41f3-ab6a-ddef3f322c18.parquet (10 rows)
Written Segment A
Written data to /tmp/test_merge/cbbeac35-61e8-46cd-b6cd-63d2a151ae4e.parquet (10 rows)
Written Segment B
Written data to /tmp/test_merge/420d6701-8cdd-4bc5-a977-9c9471783f2c.parquet (10 rows)
Written Segment C
Index files found: 0
_____________________________ test_query_planning ______________________________

    def test_query_planning():
        base_dir = "/tmp/test_query_planning"
        if os.path.exists(base_dir):
            shutil.rmtree(base_dir)
        os.makedirs(base_dir)
    
        uri = f"file://{base_dir}"
        table = hdb.Table(uri)
        table.add_index_columns(["id", "val"])
    
        # Create multiple segments to test planning across segments.
        import pyarrow as pa
        schema = pa.schema([
            pa.field("id", pa.int64()),
            pa.field("val", pa.float64()),
        ])
    
        batch1 = pa.RecordBatch.from_arrays([
            pa.array([1, 2, 3], type=pa.int64()),
            pa.array([1.1, 2.2, 3.3], type=pa.float64()),
        ], schema=schema)
        batch2 = pa.RecordBatch.from_arrays([
            pa.array([4, 5, 6], type=pa.int64()),
            pa.array([4.4, 5.5, 6.6], type=pa.float64()),
        ], schema=schema)
        batch3 = pa.RecordBatch.from_arrays([
            pa.array([7, 8, 9], type=pa.int64()),
            pa.array([7.7, 8.8, 9.9], type=pa.float64()),
        ], schema=schema)
    
        table.write_arrow(batch1)
        table.write_arrow(batch2)
        table.write_arrow(batch3)
        table.commit()
    
        # Check for segment files - only count main parquet segments, not inverted indexes.
        files = [f for f in os.listdir(base_dir) if f.startswith("seg_") and f.endswith(".parquet") and ".inv." not in f]
        print(f"Created {len(files)} initial segments.")
>       assert len(files) == 3
E       assert 0 == 3
E        +  where 0 = len([])

tests/integration/test_query_planning.py:43: AssertionError
----------------------------- Captured stdout call -----------------------------
Written data to /tmp/test_query_planning/add634e8-1499-4864-a3d2-c15a9e7b9bda.parquet (3 rows)
Written data to /tmp/test_query_planning/536ade8e-f9b1-4596-b621-86f1536b1b32.parquet (3 rows)
Written data to /tmp/test_query_planning/504e656a-eb02-4a8d-bae8-6c9c1b87f2aa.parquet (3 rows)
Created 0 initial segments.
________________ TestSchemaEvolution.test_required_to_nullable _________________

self = <test_schema_evolution.TestSchemaEvolution object at 0x73029b240f50>
temp_dir = '/tmp/tmpvhph21hh'

    def test_required_to_nullable(self, temp_dir):
        """Test changing a required column to nullable (should work seamlessly)."""
        table = Table(f"file://{temp_dir}")
    
        # Write data with required column
        schema_v1 = pa.schema([
            pa.field("id", pa.int64()),
            pa.field("value", pa.int32(), nullable=False),  # Required
        ])
    
        batch_v1 = pa.RecordBatch.from_arrays([
            pa.array([1, 2, 3], type=pa.int64()),
            pa.array([100, 200, 300], type=pa.int32()),
        ], schema=schema_v1)
    
        table.write([batch_v1])
        table.commit()
    
        # Write new data with nullable column
        schema_v2 = pa.schema([
            pa.field("id", pa.int64()),
            pa.field("value", pa.int32(), nullable=True),  # Now nullable
        ])
    
        batch_v2 = pa.RecordBatch.from_arrays([
            pa.array([4, 5, 6], type=pa.int64()),
            pa.array([400, None, 600], type=pa.int32()),  # Contains null
        ], schema=schema_v2)
    
        table.write([batch_v2])
        table.commit()
    
        # Read all data - should work seamlessly
>       result = table.read()
                 ^^^^^^^^^^^^

tests/integration/test_schema_evolution.py:336: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
python/hyperstreamdb/__init__.py:419: in read
    return self.to_arrow(filter, vector_filter, columns, device=device, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = HyperStreamTable(uri=file:///tmp/tmpvhph21hh), filter = None
vector_filter = None, columns = None, device = None, kwargs = {}, vf = None

    def to_arrow(self, filter: Optional[str] = None, vector_filter: Optional[Union[Dict[str, Any], List[float]]] = None, columns: Optional[List[str]] = None, device: Optional[Any] = None, **kwargs):
        """
        Read table to Arrow Table with auto-vectorization of search queries and flexible parameters.
    
        Parameters:
            filter: Optional scalar WHERE clause (e.g., "category = 'news'")
            vector_filter: Dict with vector search params:
                - column: str (required) - vector column name
                - query: list (required) - query vector
                - k: int (required) - number of results
                - metric: str (optional) - 'l2'|'cosine'|'innerproduct'|'l1'|'hamming'|'jaccard' (default: l2)
                - ef_search: int (optional) - HNSW ef parameter for tuning
                - probes: int (optional) - IVF probes parameter for tuning
            columns: Optional list of column names to select
            device: Optional compute device (GPU/CPU)
            **kwargs: Extra params (merged into vector_filter if present)
        """
        if "filter" in kwargs and filter is None:
            filter = kwargs.pop("filter")
    
        vf = self._prepare_vector_filter(vector_filter, **kwargs)
        # to_arrow in Rust doesn't currently take **kwargs
>       return self._inner.to_arrow(filter, vf, columns, device=device)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
E       RuntimeError: Failed to create batch with evolved schema: Invalid argument error: Column 'value' is declared as non-nullable but contains null values

python/hyperstreamdb/__init__.py:409: RuntimeError
----------------------------- Captured stdout call -----------------------------
Written data to /tmp/tmpvhph21hh/03f6392b-54e1-41aa-bf41-e8b7494ca570.parquet (3 rows)
Written data to /tmp/tmpvhph21hh/d42b4a6e-4de9-4d93-9b4a-3516081e42d7.parquet (3 rows)
DEBUG: table_schema fields: 2
----------------------------- Captured stderr call -----------------------------
DEBUG: Total entries to prune: 2
DEBUG: Entry d42b4a6e-4de9-4d93-9b4a-3516081e42d7.parquet: partitions={}, index_files=[]
DEBUG: Entry 03f6392b-54e1-41aa-bf41-e8b7494ca570.parquet: partitions={}, index_files=[]
___________________________ test_selective_indexing ____________________________

    def test_selective_indexing():
        print("1. Creating Table and Ingesting Data (Default: NO INDEX)")
        table = hdb.Table(TABLE_URI)
        table.set_index_all(False) # Ensure we test selective indexing only
>       table.autocommit = False # Prevent background flushes during investigation
        ^^^^^^^^^^^^^^^^

tests/integration/test_selective_indexing.py:27: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = HyperStreamTable(uri=file:///tmp/hyperstream_test_selective_indexing)
value = False

    @autocommit.setter
    def autocommit(self, value: bool):
>       self._inner.autocommit = value
        ^^^^^^^^^^^^^^^^^^^^^^
E       AttributeError: attribute 'autocommit' of 'builtins.Table' objects is not writable

python/hyperstreamdb/__init__.py:226: AttributeError
----------------------------- Captured stdout call -----------------------------
1. Creating Table and Ingesting Data (Default: NO INDEX)
_____________________________ test_wal_compaction ______________________________

    def test_wal_compaction():
        print("="*60)
        print("Testing WAL Compaction")
        print("="*60)
    
        # Setup
        ts = int(time.time())
        uri = f"file:///tmp/test_wal_compact_{ts}"
        base_path = uri.replace("file://", "")
        if os.path.exists(base_path):
            shutil.rmtree(base_path)
    
        # 1. Write many small batches to trigger compaction
        print("\nPhase 1: Writing 200 small batches...")
        table = hdb.Table(uri)
>       table.autocommit = False
        ^^^^^^^^^^^^^^^^

tests/integration/test_wal_compaction.py:23: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = HyperStreamTable(uri=file:///tmp/test_wal_compact_1775699760)
value = False

    @autocommit.setter
    def autocommit(self, value: bool):
>       self._inner.autocommit = value
        ^^^^^^^^^^^^^^^^^^^^^^
E       AttributeError: attribute 'autocommit' of 'builtins.Table' objects is not writable

python/hyperstreamdb/__init__.py:226: AttributeError
----------------------------- Captured stdout call -----------------------------
============================================================
Testing WAL Compaction
============================================================

Phase 1: Writing 200 small batches...
_____________________________ test_wal_durability ______________________________

    def test_wal_durability():
        print("="*60)
        print("Testing Write-Ahead Log (WAL) Durability")
        print("="*60)
    
        # Setup
        ts = int(time.time())
        uri = f"file:///tmp/test_wal_{ts}"
        base_path = f"/tmp/test_wal_{ts}"
        if os.path.exists(base_path):
            shutil.rmtree(base_path)
    
        # 1. Write Data (Unflushed)
        print("\nPhase 1: Writing Data (Buffered)...")
        table = hdb.Table(uri)
>       table.autocommit = False
        ^^^^^^^^^^^^^^^^

tests/integration/test_wal_durability.py:25: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = HyperStreamTable(uri=file:///tmp/test_wal_1775699760), value = False

    @autocommit.setter
    def autocommit(self, value: bool):
>       self._inner.autocommit = value
        ^^^^^^^^^^^^^^^^^^^^^^
E       AttributeError: attribute 'autocommit' of 'builtins.Table' objects is not writable

python/hyperstreamdb/__init__.py:226: AttributeError
----------------------------- Captured stdout call -----------------------------
============================================================
Testing Write-Ahead Log (WAL) Durability
============================================================

Phase 1: Writing Data (Buffered)...
______________ TestGPUBatchPerformance.test_l2_batch_100k_vectors ______________

self = <test_gpu_batch_benchmarks.TestGPUBatchPerformance object at 0x73029b26e360>

    def test_l2_batch_100k_vectors(self):
        """
        Benchmark L2 distance batch operations with 100K vectors.
    
        Validates Requirement 3.2: >10x speedup for 100K+ vectors on GPU vs CPU.
        """
        print("\n" + "=" * 80)
        print("Benchmark: L2 Distance Batch Operations (100K vectors)")
        print("=" * 80)
    
        # Setup: 100K vectors, 128 dimensions
        dim = 128
        n_vectors = 100_000
    
        query = np.random.rand(dim).astype(np.float32)
        database = np.random.rand(n_vectors, dim).astype(np.float32)
    
        print(f"Query dimension: {dim}")
        print(f"Database size: {n_vectors:,} vectors")
        print()
    
        # Check available backends
        backends = hdb.ComputeContext.list_available_backends()
        print(f"Available backends: {backends}")
        print()
    
        # Test CPU performance
        cpu_ctx = hdb.ComputeContext('cpu')
        print(f"Testing CPU backend...")
>       cpu_result = measure_batch_performance(query, database, cpu_ctx, metric_name="l2")
                     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

tests/performance/test_gpu_batch_benchmarks.py:95: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

query = array([0.7149059 , 0.48231366, 0.7797398 , 0.03748933, 0.26798365,
       0.79087806, 0.66457313, 0.31558052, 0.857510... 0.68138844, 0.03728044, 0.6782774 , 0.04903762, 0.6154126 ,
       0.6360721 , 0.45859593, 0.7391426 ], dtype=float32)
database = array([[0.6052205 , 0.8627832 , 0.13217288, ..., 0.5527315 , 0.23110206,
        0.59919274],
       [0.32227668, 0.68...3068844, 0.87687933, 0.17740564, ..., 0.96412724, 0.37939724,
        0.62584585]], shape=(100000, 128), dtype=float32)
context = Device(type='cpu', index=-1), metric_name = 'l2', warmup = True

    def measure_batch_performance(query, database, context, metric_name="l2", warmup=True):
        """
        Measure batch distance computation performance.
    
        Args:
            query: Query vector (1D array)
            database: Database vectors (2D array, shape [n_vectors, dim])
            context: ComputeContext for computation
            metric_name: Distance metric to use ("l2", "cosine", "inner_product", "l1", "hamming", "jaccard")
            warmup: Whether to perform a warmup run
    
        Returns:
            dict with timing and throughput metrics
        """
        # Get the appropriate batch function
        batch_functions = {
            "l2": hdb.l2_batch,
            "cosine": hdb.cosine_batch,
            "inner_product": hdb.inner_product_batch,
            "l1": hdb.l1_batch,
            "hamming": hdb.hamming_batch,
            "jaccard": hdb.jaccard_batch,
        }
    
        batch_fn = batch_functions[metric_name]
    
        # Warmup run to ensure GPU is initialized
        if warmup:
>           _ = batch_fn(query, database[:100], context=context)
                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
E           TypeError: l2_batch() got an unexpected keyword argument 'context'

tests/performance/test_gpu_batch_benchmarks.py:44: TypeError
----------------------------- Captured stdout call -----------------------------

================================================================================
Benchmark: L2 Distance Batch Operations (100K vectors)
================================================================================
Query dimension: 128
Database size: 100,000 vectors

Available backends: ['cpu']

Testing CPU backend...
____________ TestGPUBatchPerformance.test_cosine_batch_100k_vectors ____________

self = <test_gpu_batch_benchmarks.TestGPUBatchPerformance object at 0x73029b26e660>

    def test_cosine_batch_100k_vectors(self):
        """
        Benchmark Cosine distance batch operations with 100K vectors.
        """
        print("\n" + "=" * 80)
        print("Benchmark: Cosine Distance Batch Operations (100K vectors)")
        print("=" * 80)
    
        # Setup: 100K vectors, 128 dimensions
        dim = 128
        n_vectors = 100_000
    
        query = np.random.rand(dim).astype(np.float32)
        database = np.random.rand(n_vectors, dim).astype(np.float32)
    
        print(f"Query dimension: {dim}")
        print(f"Database size: {n_vectors:,} vectors")
        print()
    
        # Check available backends
        backends = hdb.ComputeContext.list_available_backends()
    
        # Test CPU performance
        cpu_ctx = hdb.ComputeContext('cpu')
        print(f"Testing CPU backend...")
>       cpu_result = measure_batch_performance(query, database, cpu_ctx, metric_name="cosine")
                     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

tests/performance/test_gpu_batch_benchmarks.py:168: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

query = array([0.8872817 , 0.7858607 , 0.7293044 , 0.36449793, 0.01204487,
       0.29139125, 0.30727798, 0.74574196, 0.918336... 0.69189   , 0.5872218 , 0.47737014, 0.568376  , 0.21363   ,
       0.7928401 , 0.84168386, 0.03283874], dtype=float32)
database = array([[0.4930413 , 0.28678852, 0.7028018 , ..., 0.42925644, 0.42051306,
        0.5987516 ],
       [0.25023288, 0.51...6507937, 0.6231798 , 0.7519356 , ..., 0.6987578 , 0.79223186,
        0.11411265]], shape=(100000, 128), dtype=float32)
context = Device(type='cpu', index=-1), metric_name = 'cosine', warmup = True

    def measure_batch_performance(query, database, context, metric_name="l2", warmup=True):
        """
        Measure batch distance computation performance.
    
        Args:
            query: Query vector (1D array)
            database: Database vectors (2D array, shape [n_vectors, dim])
            context: ComputeContext for computation
            metric_name: Distance metric to use ("l2", "cosine", "inner_product", "l1", "hamming", "jaccard")
            warmup: Whether to perform a warmup run
    
        Returns:
            dict with timing and throughput metrics
        """
        # Get the appropriate batch function
        batch_functions = {
            "l2": hdb.l2_batch,
            "cosine": hdb.cosine_batch,
            "inner_product": hdb.inner_product_batch,
            "l1": hdb.l1_batch,
            "hamming": hdb.hamming_batch,
            "jaccard": hdb.jaccard_batch,
        }
    
        batch_fn = batch_functions[metric_name]
    
        # Warmup run to ensure GPU is initialized
        if warmup:
>           _ = batch_fn(query, database[:100], context=context)
                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
E           TypeError: cosine_batch() got an unexpected keyword argument 'context'

tests/performance/test_gpu_batch_benchmarks.py:44: TypeError
----------------------------- Captured stdout call -----------------------------

================================================================================
Benchmark: Cosine Distance Batch Operations (100K vectors)
================================================================================
Query dimension: 128
Database size: 100,000 vectors

Testing CPU backend...
________ TestGPUBatchPerformance.test_inner_product_batch_100k_vectors _________

self = <test_gpu_batch_benchmarks.TestGPUBatchPerformance object at 0x73029b26e960>

    def test_inner_product_batch_100k_vectors(self):
        """
        Benchmark Inner Product batch operations with 100K vectors.
        """
        print("\n" + "=" * 80)
        print("Benchmark: Inner Product Batch Operations (100K vectors)")
        print("=" * 80)
    
        # Setup: 100K vectors, 128 dimensions
        dim = 128
        n_vectors = 100_000
    
        query = np.random.rand(dim).astype(np.float32)
        database = np.random.rand(n_vectors, dim).astype(np.float32)
    
        print(f"Query dimension: {dim}")
        print(f"Database size: {n_vectors:,} vectors")
        print()
    
        # Check available backends
        backends = hdb.ComputeContext.list_available_backends()
    
        # Test CPU performance
        cpu_ctx = hdb.ComputeContext('cpu')
        print(f"Testing CPU backend...")
>       cpu_result = measure_batch_performance(query, database, cpu_ctx, metric_name="inner_product")
                     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

tests/performance/test_gpu_batch_benchmarks.py:227: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

query = array([0.44779238, 0.8342972 , 0.7770294 , 0.3724244 , 0.53757685,
       0.89591193, 0.30564195, 0.2832857 , 0.231103... 0.05465851, 0.38498178, 0.33101985, 0.54280776, 0.29049498,
       0.25580314, 0.46852008, 0.60152197], dtype=float32)
database = array([[0.51232046, 0.1687608 , 0.5244236 , ..., 0.10994744, 0.3847918 ,
        0.03080553],
       [0.2823431 , 0.25...4940475, 0.20449524, 0.37980527, ..., 0.7617651 , 0.34386608,
        0.21659186]], shape=(100000, 128), dtype=float32)
context = Device(type='cpu', index=-1), metric_name = 'inner_product'
warmup = True

    def measure_batch_performance(query, database, context, metric_name="l2", warmup=True):
        """
        Measure batch distance computation performance.
    
        Args:
            query: Query vector (1D array)
            database: Database vectors (2D array, shape [n_vectors, dim])
            context: ComputeContext for computation
            metric_name: Distance metric to use ("l2", "cosine", "inner_product", "l1", "hamming", "jaccard")
            warmup: Whether to perform a warmup run
    
        Returns:
            dict with timing and throughput metrics
        """
        # Get the appropriate batch function
        batch_functions = {
            "l2": hdb.l2_batch,
            "cosine": hdb.cosine_batch,
            "inner_product": hdb.inner_product_batch,
            "l1": hdb.l1_batch,
            "hamming": hdb.hamming_batch,
            "jaccard": hdb.jaccard_batch,
        }
    
        batch_fn = batch_functions[metric_name]
    
        # Warmup run to ensure GPU is initialized
        if warmup:
>           _ = batch_fn(query, database[:100], context=context)
                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
E           TypeError: inner_product_batch() got an unexpected keyword argument 'context'

tests/performance/test_gpu_batch_benchmarks.py:44: TypeError
----------------------------- Captured stdout call -----------------------------

================================================================================
Benchmark: Inner Product Batch Operations (100K vectors)
================================================================================
Query dimension: 128
Database size: 100,000 vectors

Testing CPU backend...
______________ TestGPUBatchPerformance.test_varying_vector_sizes _______________

self = <test_gpu_batch_benchmarks.TestGPUBatchPerformance object at 0x73029b241af0>

    def test_varying_vector_sizes(self):
        """
        Benchmark performance across different database sizes.
    
        Tests: 10K, 50K, 100K, 200K, 500K vectors
        """
        print("\n" + "=" * 80)
        print("Benchmark: Performance Scaling with Database Size")
        print("=" * 80)
    
        dim = 128
        sizes = [10_000, 50_000, 100_000, 200_000, 500_000]
    
        # Check available backends
        backends = hdb.ComputeContext.list_available_backends()
        gpu_available = any(backend in backends for backend in ['cuda', 'rocm', 'mps', 'intel'])
    
        cpu_ctx = hdb.ComputeContext('cpu')
        if gpu_available:
            gpu_ctx = hdb.ComputeContext.auto_detect()
            print(f"GPU backend: {gpu_ctx.backend}")
        else:
            print("⚠ No GPU backend available - CPU baseline only")
        print()
    
        print(f"{'Size':>10} | {'CPU Time':>12} | {'CPU Throughput':>18} | {'GPU Time':>12} | {'GPU Throughput':>18} | {'Speedup':>10}")
        print("-" * 110)
    
        for n_vectors in sizes:
            query = np.random.rand(dim).astype(np.float32)
            database = np.random.rand(n_vectors, dim).astype(np.float32)
    
            # CPU benchmark
>           cpu_result = measure_batch_performance(query, database, cpu_ctx, metric_name="l2", warmup=False)
                         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

tests/performance/test_gpu_batch_benchmarks.py:294: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

query = array([0.13802472, 0.69798565, 0.5540291 , 0.82227755, 0.8029997 ,
       0.5306728 , 0.3334154 , 0.14587238, 0.019131... 0.8503226 , 0.9650007 , 0.5938947 , 0.1587369 , 0.06519979,
       0.4126034 , 0.00641142, 0.08628648], dtype=float32)
database = array([[0.04215395, 0.5698025 , 0.35554177, ..., 0.7550619 , 0.20829318,
        0.3134492 ],
       [0.54613847, 0.14...1370571 , 0.33884934, 0.93131477, ..., 0.6781244 , 0.00574026,
        0.9418732 ]], shape=(10000, 128), dtype=float32)
context = Device(type='cpu', index=-1), metric_name = 'l2', warmup = False

    def measure_batch_performance(query, database, context, metric_name="l2", warmup=True):
        """
        Measure batch distance computation performance.
    
        Args:
            query: Query vector (1D array)
            database: Database vectors (2D array, shape [n_vectors, dim])
            context: ComputeContext for computation
            metric_name: Distance metric to use ("l2", "cosine", "inner_product", "l1", "hamming", "jaccard")
            warmup: Whether to perform a warmup run
    
        Returns:
            dict with timing and throughput metrics
        """
        # Get the appropriate batch function
        batch_functions = {
            "l2": hdb.l2_batch,
            "cosine": hdb.cosine_batch,
            "inner_product": hdb.inner_product_batch,
            "l1": hdb.l1_batch,
            "hamming": hdb.hamming_batch,
            "jaccard": hdb.jaccard_batch,
        }
    
        batch_fn = batch_functions[metric_name]
    
        # Warmup run to ensure GPU is initialized
        if warmup:
            _ = batch_fn(query, database[:100], context=context)
    
        # Benchmark run
        start = time.time()
>       distances = batch_fn(query, database, context=context)
                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
E       TypeError: l2_batch() got an unexpected keyword argument 'context'

tests/performance/test_gpu_batch_benchmarks.py:48: TypeError
----------------------------- Captured stdout call -----------------------------

================================================================================
Benchmark: Performance Scaling with Database Size
================================================================================
⚠ No GPU backend available - CPU baseline only

      Size |     CPU Time |     CPU Throughput |     GPU Time |     GPU Throughput |    Speedup
--------------------------------------------------------------------------------------------------------------
_______________ TestGPUBatchPerformance.test_varying_dimensions ________________

self = <test_gpu_batch_benchmarks.TestGPUBatchPerformance object at 0x73029b243920>

    def test_varying_dimensions(self):
        """
        Benchmark performance across different vector dimensions.
    
        Tests: 64D, 128D, 256D, 512D, 1024D
        """
        print("\n" + "=" * 80)
        print("Benchmark: Performance Scaling with Vector Dimension")
        print("=" * 80)
    
        n_vectors = 100_000
        dimensions = [64, 128, 256, 512, 1024]
    
        # Check available backends
        backends = hdb.ComputeContext.list_available_backends()
        gpu_available = any(backend in backends for backend in ['cuda', 'rocm', 'mps', 'intel'])
    
        cpu_ctx = hdb.ComputeContext('cpu')
        if gpu_available:
            gpu_ctx = hdb.ComputeContext.auto_detect()
            print(f"GPU backend: {gpu_ctx.backend}")
        else:
            print("⚠ No GPU backend available - CPU baseline only")
        print()
    
        print(f"{'Dimension':>10} | {'CPU Time':>12} | {'CPU Throughput':>18} | {'GPU Time':>12} | {'GPU Throughput':>18} | {'Speedup':>10}")
        print("-" * 110)
    
        for dim in dimensions:
            query = np.random.rand(dim).astype(np.float32)
            database = np.random.rand(n_vectors, dim).astype(np.float32)
    
            # CPU benchmark
>           cpu_result = measure_batch_performance(query, database, cpu_ctx, metric_name="l2", warmup=False)
                         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

tests/performance/test_gpu_batch_benchmarks.py:342: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

query = array([0.52453476, 0.01010123, 0.33578962, 0.9150242 , 0.2310591 ,
       0.74495065, 0.20733173, 0.08943768, 0.687599... 0.76613927, 0.4204134 , 0.9313128 , 0.9983727 ,
       0.7972642 , 0.36332512, 0.16858453, 0.42034507], dtype=float32)
database = array([[0.53472495, 0.91730875, 0.8840364 , ..., 0.30761224, 0.38792297,
        0.7642325 ],
       [0.9992816 , 0.59...4266389 , 0.20754735, 0.98774445, ..., 0.6750898 , 0.06496236,
        0.36299247]], shape=(100000, 64), dtype=float32)
context = Device(type='cpu', index=-1), metric_name = 'l2', warmup = False

    def measure_batch_performance(query, database, context, metric_name="l2", warmup=True):
        """
        Measure batch distance computation performance.
    
        Args:
            query: Query vector (1D array)
            database: Database vectors (2D array, shape [n_vectors, dim])
            context: ComputeContext for computation
            metric_name: Distance metric to use ("l2", "cosine", "inner_product", "l1", "hamming", "jaccard")
            warmup: Whether to perform a warmup run
    
        Returns:
            dict with timing and throughput metrics
        """
        # Get the appropriate batch function
        batch_functions = {
            "l2": hdb.l2_batch,
            "cosine": hdb.cosine_batch,
            "inner_product": hdb.inner_product_batch,
            "l1": hdb.l1_batch,
            "hamming": hdb.hamming_batch,
            "jaccard": hdb.jaccard_batch,
        }
    
        batch_fn = batch_functions[metric_name]
    
        # Warmup run to ensure GPU is initialized
        if warmup:
            _ = batch_fn(query, database[:100], context=context)
    
        # Benchmark run
        start = time.time()
>       distances = batch_fn(query, database, context=context)
                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
E       TypeError: l2_batch() got an unexpected keyword argument 'context'

tests/performance/test_gpu_batch_benchmarks.py:48: TypeError
----------------------------- Captured stdout call -----------------------------

================================================================================
Benchmark: Performance Scaling with Vector Dimension
================================================================================
⚠ No GPU backend available - CPU baseline only

 Dimension |     CPU Time |     CPU Throughput |     GPU Time |     GPU Throughput |    Speedup
--------------------------------------------------------------------------------------------------------------
_____________________________ test_connector_apis ______________________________

    def test_connector_apis():
        print("Testing Connector APIs...")
    
        # Setup
        uri = "file:///tmp/test_connector_api_table"
        if os.path.exists("/tmp/test_connector_api_table"):
            shutil.rmtree("/tmp/test_connector_api_table")
    
        table = hdb.Table(uri)
    
        # Create test data
        df = pd.DataFrame({
            "id": range(1000),
            "name": [f"user_{i}" for i in range(1000)],
            "age": [i % 50 for i in range(1000)],
            "category": ["A" if i % 2 == 0 else "B" for i in range(1000)]
        })
    
        # Index 'age' and 'category'
        table.add_index_columns(["age", "category"])
    
        print("- Writing data...")
        table.write_pandas(df)
        table.wait_for_indexes()
    
        print("- Compacting and Backfilling Indexes...")
        table.compact(min_file_size_bytes=1024)
        table.index_all_columns()
        table.wait_for_indexes()
    
        # 1. Test list_data_files
        print("\n1. Testing list_data_files()...")
        files = table.list_data_files()
        assert len(files) > 0
        print(f"  Found {len(files)} files")
    
        file_info = files[0]
        print(f"  File: {file_info.file_path}")
        print(f"  Rows: {file_info.row_count}")
        print(f"  Size: {file_info.file_size_bytes} bytes")
        print(f"  Scalar Indexes: {file_info.has_scalar_indexes}")
        print(f"  Indexed Columns: {file_info.indexed_columns}")
    
        assert file_info.row_count == 1000
>       assert "age" in file_info.indexed_columns
E       AssertionError: assert 'age' in []
E        +  where [] = <builtins.PyDataFileInfo object at 0x73029b49f330>.indexed_columns

tests/test_connector_apis.py:51: AssertionError
----------------------------- Captured stdout call -----------------------------
Testing Connector APIs...
- Writing data...
Written data to /tmp/test_connector_api_table/3e2b7f83-1a64-4d24-a8a8-fdf90a7ca66e.parquet (1000 rows)
- Compacting and Backfilling Indexes...

1. Testing list_data_files()...
  Found 1 files
  File: 3e2b7f83-1a64-4d24-a8a8-fdf90a7ca66e.parquet
  Rows: 1000
  Size: 7306 bytes
  Scalar Indexes: False
  Indexed Columns: []
________________________ test_context_backend_property _________________________

    @given(backend=valid_backends, device_id=device_ids)
>   def test_context_backend_property(backend, device_id):
                   ^^^

tests/test_context_backend_property.py:25: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

backend = 'cpu', device_id = 0

    @given(backend=valid_backends, device_id=device_ids)
    def test_context_backend_property(backend, device_id):
        """
        Property: For any created GPU context, querying its backend property
        should return the backend name that was used to create it.
    
        This property verifies that the backend property correctly reflects
        the backend used during context creation, regardless of whether the
        backend is available on the current system.
        """
        # Get list of available backends
        available_backends = hdb.ComputeContext.list_available_backends()
    
        if backend in available_backends:
            # Backend is available - should succeed
>           ctx = hdb.ComputeContext(backend, device_id=device_id)
                  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
E           TypeError: Device.__new__() got an unexpected keyword argument 'device_id'
E           Falsifying example: test_context_backend_property(
E               backend='cpu',
E               device_id=0,
E           )

tests/test_context_backend_property.py:39: TypeError
___________________ TestV2Features.test_sort_order_metadata ____________________

self = <test_cross_engine_compat.TestV2Features object at 0x73029b2b04a0>

    def test_sort_order_metadata(self):
        """Verify sort order is written to metadata"""
        table_path = str(TEST_DIR / "sort_order_test")
        table = hdb.Table(table_path)
    
        df = pd.DataFrame({"a": [3, 1, 2], "b": [6, 4, 5]})
>       table.set_sort_order(["a"], ascending=[True])

tests/test_cross_engine_compat.py:112: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = HyperStreamTable(uri=/tmp/hyperstream_compat_tests/sort_order_test)
columns = ['a'], ascending = [True]

    def set_sort_order(self, columns: List[str], ascending: List[bool]):
        """Set the table's default sort order for future data writes."""
>       return self._inner.replace_sort_order(columns, ascending)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
E       ValueError: Column 'a' not found

python/hyperstreamdb/__init__.py:535: ValueError
_________________________________ test_mps_gpu _________________________________

    def test_mps_gpu():
        print("="*50)
        print("HYPERSTREAMDB MPS GPU VALIDATION")
        print("="*50)
    
        # 1. Detect GPU Context
        print("\n[1] Detecting GPU Context...")
        try:
            ctx = hdb.ComputeContext.auto_detect()
            print(f"Detected backend: {ctx.backend}")
            print(f"Device ID: {ctx.device_id}")
    
            if ctx.backend != "mps":
                print(f"WARNING: Expected 'mps' backend on macOS, but got '{ctx.backend}'")
        except Exception as e:
            print(f"Error detecting GPU context: {e}")
            return
    
        # 2. Prepare Test Data
        print("\n[2] Preparing Test Data...")
        n_vectors = 100_000
        dim = 768
        print(f"Generating {n_vectors:,} vectors of {dim} dimensions...")
    
        query = np.random.randn(dim).astype(np.float32)
        vectors = np.random.randn(n_vectors, dim).astype(np.float32)
    
        # 3. Compute on CPU (Baseline)
        print("\n[3] Computing distances on CPU...")
        cpu_ctx = hdb.ComputeContext("cpu")
        start_time = time.time()
>       cpu_distances = hdb.l2_batch(query, vectors, context=cpu_ctx)
                        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
E       TypeError: l2_batch() got an unexpected keyword argument 'context'

tests/test_mps_gpu.py:36: TypeError
----------------------------- Captured stdout call -----------------------------
==================================================
HYPERSTREAMDB MPS GPU VALIDATION
==================================================

[1] Detecting GPU Context...
Detected backend: cpu
Device ID: -1
WARNING: Expected 'mps' backend on macOS, but got 'cpu'

[2] Preparing Test Data...
Generating 100,000 vectors of 768 dimensions...

[3] Computing distances on CPU...
__________________________ test_cpu_backend_creation ___________________________

    def test_cpu_backend_creation():
        """Test creating a CPU backend context"""
        ctx = hdb.ComputeContext('cpu')
        assert ctx.backend == 'cpu'
>       assert ctx.device_id == 0  # Default device_id
        ^^^^^^^^^^^^^^^^^^^^^^^^^
E       AssertionError: assert -1 == 0
E        +  where -1 = Device(type='cpu', index=-1).device_id

tests/test_python_gpu_context.py:24: AssertionError
_______________________ test_cpu_backend_with_device_id ________________________

    def test_cpu_backend_with_device_id():
        """Test creating a CPU backend with custom device_id"""
>       ctx = hdb.ComputeContext('cpu', device_id=-1)
              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
E       TypeError: Device.__new__() got an unexpected keyword argument 'device_id'

tests/test_python_gpu_context.py:29: TypeError
___________________________ test_device_id_property ____________________________

    def test_device_id_property():
        """Test that device_id property returns the correct device ID"""
>       ctx = hdb.ComputeContext('cpu', device_id=5)
              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
E       TypeError: Device.__new__() got an unexpected keyword argument 'device_id'

tests/test_python_gpu_context.py:47: TypeError
________________________ test_unavailable_backend_error ________________________

    def test_unavailable_backend_error():
        """Test that requesting an unavailable backend raises RuntimeError"""
        backends = hdb.ComputeContext.list_available_backends()
    
        # Try to create a context with a backend that's not available
        # We'll try all possible backends and expect errors for unavailable ones
        all_backends = ['cuda', 'rocm', 'mps', 'intel']
        unavailable = [b for b in all_backends if b not in backends]
    
        for backend in unavailable:
            with pytest.raises(RuntimeError) as exc_info:
                hdb.ComputeContext(backend)
    
            # Check that error message mentions available backends
            error_msg = str(exc_info.value)
            assert 'not available' in error_msg.lower()
>           assert 'available backends' in error_msg.lower()
E           AssertionError: assert 'available backends' in 'cuda backend not available. install from source with: pip install hyperstreamdb[cuda] --no-binary :all:'
E            +  where 'cuda backend not available. install from source with: pip install hyperstreamdb[cuda] --no-binary :all:' = <built-in method lower of str object at 0x73029b05b000>()
E            +    where <built-in method lower of str object at 0x73029b05b000> = 'CUDA backend not available. Install from source with: pip install hyperstreamdb[cuda] --no-binary :all:'.lower

tests/test_python_gpu_context.py:82: AssertionError
__________________________ test_invalid_backend_error __________________________

    def test_invalid_backend_error():
        """Test that requesting an invalid backend raises ValueError"""
        with pytest.raises(ValueError) as exc_info:
            hdb.ComputeContext('invalid_backend')
    
        error_msg = str(exc_info.value)
>       assert 'unknown backend' in error_msg.lower()
E       assert 'unknown backend' in "unknown device type 'invalid_backend'. valid types: 'cpu', 'cuda', 'mps', 'intel', 'rocm', 'xpu'"
E        +  where "unknown device type 'invalid_backend'. valid types: 'cpu', 'cuda', 'mps', 'intel', 'rocm', 'xpu'" = <built-in method lower of str object at 0x73029b05aaf0>()
E        +    where <built-in method lower of str object at 0x73029b05aaf0> = "Unknown device type 'invalid_backend'. Valid types: 'cpu', 'cuda', 'mps', 'intel', 'rocm', 'xpu'".lower

tests/test_python_gpu_context.py:92: AssertionError
__________________________________ test_repr ___________________________________

    def test_repr():
        """Test that __repr__ returns a useful string representation"""
>       ctx = hdb.ComputeContext('cpu', device_id=0)
              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
E       TypeError: Device.__new__() got an unexpected keyword argument 'device_id'

tests/test_python_gpu_context.py:146: TypeError
_______________________ test_read_split_with_projection ________________________

cleanup = None

    def test_read_split_with_projection(cleanup):
        os.makedirs("test_splits_data", exist_ok=True)
        table_uri = f"file://{os.getcwd()}/test_splits_data"
    
        # 1. Create a Parquet file with 3 Row Groups (approx)
        # 3 batches of 10 rows
        schema = pa.schema([
            ('id', pa.int32()),
            ('val', pa.string()),
            ('heavy', pa.string()) # Large column we want to skip
        ])
    
        file_path = f"{table_uri}/segment_abc.parquet"
        writer = pq.ParquetWriter(file_path.replace("file://", ""), schema, version='2.6')
    
        for i in range(3):
            ids = pa.array(range(i*10, (i+1)*10), type=pa.int32())
            vals = pa.array([f"val_{x}" for x in range(i*10, (i+1)*10)])
            heavy = pa.array(["X" * 1000 for _ in range(10)])
            batch = pa.Table.from_arrays([ids, vals, heavy], schema=schema)
            writer.write_table(batch, row_group_size=10)
    
        writer.close()
    
        # 2. Initialize Table
        # Note: Table::new expects existing table or creates it?
        # Our simple Table::new just opens logic.
        # We need to manually construct splits or use list_data_files -> manual split construction if get_splits logic is simple.
    
        # Actually, let's use list_data_files to get the file, then create a split manually or use get_splits
        # We implemented `get_splits(max_split_size)`.
        # File size: 30 rows + heavy strings ~ 30KB.
        # Let's say max_split_size = 10KB.
    
        # Need to trick `get_splits`?
        # Or just verify `read_split` directly by assuming we know row group IDs.
    
        # Let's interact via `read_split` manually first to test the API.
    
        catalog = hdb.create_catalog("nessie", {"url": "http://localhost:19120"}) # Type doesn't matter for local file read if we use direct path
        # Actually we don't have a specific `Table` python object exposed easily for direct `read_split` calls
        # except via the internal test wrapper or if we expose it on the Catalog?
        # Python binding `read_split` is on `PyTable`.
    
        # We need a `PyTable`.
        # `load_table` usually returns one. But we have a local file.
        # Let's use `hdb.Table` (if exposed?) No.
        # But `PyRestCatalog` etc return `PyTable`.
        # Does `PyTable.new(uri)` exist? It is not exposed to Python as `__init__`.
    
        # 3. Open Table directly
        # Note: open_table expects URI. For local file, typically table URI is parent dir.
        # But here we want to treat the file as a table/segment source.
        # Our simple Table implementation treats URI as "Table Root".
        # And read_split(split) uses split.file_path.
    
        # Let's open the "Table" at the directory level.
        table = hdb.open_table(table_uri)
    
        # 4. Construct a Split manually (or via Python wrapper if exposed)
        # PySplit is exposed.
        # We want to read Row Group 1 (rows 10-20).
        # file_path in split should be relative or absolute. Table assumes absolute or relative to store.
        # Let's use absolute path since we used absolute path in writer.
    
        # Row Group 1:
        split = hdb.PySplit(
            file_path,
            0, 100, # offset/length ignored by current impl
            [1],    # row_group_ids
            None,   # index_file_path
            False   # can_use_indexes
        )
    
        # 5. Read Split with Projection (SKIP 'heavy')
        columns = ["id", "val"]
        arrow_table = table.read_split(split, columns)
    
        # 6. Verify
>       assert arrow_table.num_columns == 2
E       assert 0 == 2
E        +  where 0 = pyarrow.Table\n\n----.num_columns

tests/test_splits.py:96: AssertionError
=============================== warnings summary ===============================
tests/test_exports.py::test_imports
  /home/ralbright/projects/hyperstreamdb/venv/lib/python3.12/site-packages/_pytest/python.py:170: PytestReturnNotNoneWarning: Test functions should return None, but tests/test_exports.py::test_imports returned <class 'bool'>.
  Did you mean to use `assert` instead of `return`?
  See https://docs.pytest.org/en/stable/how-to/assert.html#return-not-none for more information.
    warnings.warn(

-- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html
=========================== short test summary info ============================
FAILED tests/integration/test_merge.py::test_merge_pruning - AssertionError: ...
FAILED tests/integration/test_query_planning.py::test_query_planning - assert...
FAILED tests/integration/test_schema_evolution.py::TestSchemaEvolution::test_required_to_nullable
FAILED tests/integration/test_selective_indexing.py::test_selective_indexing
FAILED tests/integration/test_wal_compaction.py::test_wal_compaction - Attrib...
FAILED tests/integration/test_wal_durability.py::test_wal_durability - Attrib...
FAILED tests/performance/test_gpu_batch_benchmarks.py::TestGPUBatchPerformance::test_l2_batch_100k_vectors
FAILED tests/performance/test_gpu_batch_benchmarks.py::TestGPUBatchPerformance::test_cosine_batch_100k_vectors
FAILED tests/performance/test_gpu_batch_benchmarks.py::TestGPUBatchPerformance::test_inner_product_batch_100k_vectors
FAILED tests/performance/test_gpu_batch_benchmarks.py::TestGPUBatchPerformance::test_varying_vector_sizes
FAILED tests/performance/test_gpu_batch_benchmarks.py::TestGPUBatchPerformance::test_varying_dimensions
FAILED tests/test_connector_apis.py::test_connector_apis - AssertionError: as...
FAILED tests/test_context_backend_property.py::test_context_backend_property
FAILED tests/test_cross_engine_compat.py::TestV2Features::test_sort_order_metadata
FAILED tests/test_mps_gpu.py::test_mps_gpu - TypeError: l2_batch() got an une...
FAILED tests/test_python_gpu_context.py::test_cpu_backend_creation - Assertio...
FAILED tests/test_python_gpu_context.py::test_cpu_backend_with_device_id - Ty...
FAILED tests/test_python_gpu_context.py::test_device_id_property - TypeError:...
FAILED tests/test_python_gpu_context.py::test_unavailable_backend_error - Ass...
FAILED tests/test_python_gpu_context.py::test_invalid_backend_error - assert ...
FAILED tests/test_python_gpu_context.py::test_repr - TypeError: Device.__new_...
FAILED tests/test_splits.py::test_read_split_with_projection - assert 0 == 2
ERROR tests/benchmarks/hybrid/test_scalar_plus_vector.py::TestHybridQueryBenchmarks::test_filtered_vector_search
ERROR tests/benchmarks/hybrid/test_scalar_plus_vector.py::TestHybridQueryBenchmarks::test_multi_filter_vector
ERROR tests/benchmarks/hybrid/test_scalar_plus_vector.py::TestHybridQueryBenchmarks::test_comparison_post_vs_pre_filter
ERROR tests/benchmarks/index/test_index_build.py::TestIndexBuildBenchmarks::test_build_100k_vectors_latency
ERROR tests/benchmarks/table_format/test_vs_iceberg.py::TestTableFormatBenchmarks::test_point_lookup
ERROR tests/benchmarks/table_format/test_vs_iceberg.py::TestTableFormatBenchmarks::test_high_selectivity_filter
ERROR tests/benchmarks/table_format/test_vs_iceberg.py::TestTableFormatBenchmarks::test_range_query
ERROR tests/benchmarks/table_format/test_vs_iceberg.py::TestTableFormatBenchmarks::test_full_scan_baseline
ERROR tests/benchmarks/vector_search/test_cold_read_latency.py::TestColdReadLatency::test_cold_search_latency
ERROR tests/benchmarks/vector_search/test_parallel_search.py::test_parallel_search_vs_sequential
ERROR tests/benchmarks/vector_search/test_parallel_search.py::test_parallel_search_with_filters
ERROR tests/benchmarks/vector_search/test_qdrant_direct_comparison.py::TestQdrantComparison::test_ingestion_comparison_small
ERROR tests/benchmarks/vector_search/test_qdrant_direct_comparison.py::TestQdrantComparison::test_query_comparison
ERROR tests/benchmarks/vector_search/test_qdrant_direct_comparison.py::TestQdrantComparison::test_filtered_search_comparison
ERROR tests/benchmarks/vector_search/test_vector_index_verification.py::test_automatic_schema_detection
ERROR tests/benchmarks/vector_search/test_vector_index_verification.py::test_explicit_index_configuration_still_works
ERROR tests/benchmarks/vector_search/test_vs_qdrant.py::TestVectorSearchBenchmarks::test_ingest_1m_vectors
ERROR tests/benchmarks/vector_search/test_vs_qdrant.py::TestVectorSearchBenchmarks::test_search_unfiltered_small
ERROR tests/benchmarks/vector_search/test_vs_qdrant.py::TestVectorSearchBenchmarks::test_search_filtered_high_selectivity
ERROR tests/benchmarks/vector_search/test_vs_qdrant.py::TestVectorSearchBenchmarks::test_concurrent_queries
== 22 failed, 107 passed, 6 skipped, 1 warning, 20 errors in 86.81s (0:01:26) ==
