#!/usr/bin/env python3
from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
from dataclasses import dataclass, field
import json
from pathlib import Path
import sys
from typing import Any, List, Union


@dataclass
class MergeReport:
    """Tracks changes during merge."""

    added: list[str] = field(default_factory=list)
    removed: list[str] = field(default_factory=list)

    def print_report(self):
        if self.added:
            print("\nAdded (from new config):", file=sys.stderr)
            for path in sorted(self.added):
                print(f"  + {path}", file=sys.stderr)

        if self.removed:
            print("\nRemoved (not in new config):", file=sys.stderr)
            for path in sorted(self.removed):
                print(f"  - {path}", file=sys.stderr)

        if not (self.added or self.removed):
            print("\nNo differences found.", file=sys.stderr)


def should_remove_key(key_path: str, remove_keys: list[str] | None) -> bool:
    """
    Check if a key should be removed based on the remove_keys list.

    If remove_keys is None, don't remove anything.
    If remove_keys is empty list, remove all old keys.
    If remove_keys has values, only remove keys that match or are children of those paths.
    """
    if remove_keys is None:
        return False
    if not remove_keys:
        # Empty list means remove all
        return True
    # Check if key_path matches or is a child of any specified path
    for remove_path in remove_keys:
        if key_path == remove_path or key_path.startswith(remove_path + "."):
            return True
    return False


def merge_json(
    old: Any, new: Any, report: MergeReport, remove_keys: list[str] | None, path: str = ""
) -> Any:
    """
    Recursively merge two JSON values.

    Existing values in 'old' are preserved.
    Keys only in 'old' are kept unless they should be removed based on remove_keys:
    - None: keep all old keys
    - []: remove all old keys not in new
    - ['path.to.key', ...]: remove only specified keys (and their children)
    """
    # If new is not a dict, keep old value (preserve existing)
    if not isinstance(new, dict):
        return old

    # If old is not a dict but new is, use new (can't merge)
    if not isinstance(old, dict):
        return new

    # Both are dicts - merge recursively, preserving old key order
    result = {}

    # First, process keys from old (preserves original order)
    for key in old:
        key_path = f"{path}.{key}" if path else key
        if key in new:
            # Key exists in both - recursively merge
            result[key] = merge_json(old[key], new[key], report, remove_keys, key_path)
        elif should_remove_key(key_path, remove_keys):
            # Key only in old and should be removed - drop it
            report.removed.append(key_path)
        else:
            # Key only in old - keep it
            result[key] = old[key]

    # Then, add new keys that don't exist in old (at the end)
    for key in new:
        if key not in old:
            key_path = f"{path}.{key}" if path else key
            result[key] = new[key]
            report.added.append(key_path)

    return result


def strtobool(value):
    lower = str(value).lower()
    if lower in {"y", "yes", "t", "true", "on", "1"}:
        return True
    elif lower in {"n", "no", "f", "false", "off", "0"}:
        return False
    else:
        raise ValueError('"{}" is not a valid bool value'.format(value))


class Optional:
    def __init__(self, t=None):
        self.type = t


# global default configuration
config = {
    "logging": {
        "type": "std_out",
        "color": True,
        "file_name": "path_to_some_file.log",
        "max_file_size": Optional(int),
        "max_archived_files": Optional(int),
    },
    "mjolnir": {
        "max_cache_size": 1000000000,
        "id_table_size": 1300000000,
        "use_lru_mem_cache": False,
        "lru_mem_cache_hard_control": False,
        "use_simple_mem_cache": False,
        "user_agent": Optional(str),
        "tile_url": Optional(str),
        "tile_url_gz": Optional(bool),
        "tile_url_user_pw": Optional(str),
        "concurrency": Optional(int),
        "data_quality_dir": Optional(str),
        "tile_dir": "/data/valhalla",
        "tile_extract": "/data/valhalla/tiles.tar",
        "traffic_extract": "/data/valhalla/traffic.tar",
        "incident_dir": Optional(str),
        "incident_log": Optional(str),
        "shortcut_caching": Optional(bool),
        "graph_lua_name": Optional(str),
        "admin": "/data/valhalla/admin.sqlite",
        "landmarks": "/data/valhalla/landmarks.sqlite",
        "timezone": "/data/valhalla/tz_world.sqlite",
        "transit_dir": "/data/valhalla/transit",
        "transit_feeds_dir": "/data/valhalla/transit_feeds",
        "transit_bounding_box": Optional(str),
        "transit_pbf_limit": 20000,
        "hierarchy": True,
        "shortcuts": True,
        "keep_all_osm_node_ids": False,
        "keep_osm_node_ids": False,
        "include_platforms": False,
        "include_driveways": True,
        "include_construction": False,
        "include_bicycle": True,
        "include_pedestrian": True,
        "include_driving": True,
        "import_bike_share_stations": False,
        "global_synchronized_cache": False,
        "max_concurrent_reader_users": 1,
        "reclassify_links": True,
        "default_speeds_config": Optional(str),
        "data_processing": {
            "infer_internal_intersections": True,
            "infer_turn_channels": True,
            "apply_country_overrides": True,
            "grid_divisions_within_tile": 32,
            "use_admin_db": True,
            "use_direction_on_ways": False,
            "allow_alt_name": False,
            "use_urban_tag": False,
            "use_rest_area": False,
            "scan_tar": False,
        },
    },
    "additional_data": {
        "elevation": "/data/valhalla/elevation/",
        "elevation_url": Optional(str),
        "elevation_url_user_pw": Optional(str),
    },
    "loki": {
        "actions": [
            "locate",
            "route",
            "height",
            "sources_to_targets",
            "optimized_route",
            "isochrone",
            "trace_route",
            "trace_attributes",
            "transit_available",
            "expansion",
            "centroid",
            "status",
            "tile",
        ],
        "use_connectivity": True,
        "service_defaults": {
            "radius": 0,
            "minimum_reachability": 50,
            "search_cutoff": 35000,
            "node_snap_tolerance": 5,
            "street_side_tolerance": 5,
            "street_side_max_distance": 1000,
            "heading_tolerance": 60,
            "mvt_min_zoom_road_class": [7, 7, 8, 11, 11, 12, 13, 14],
            "mvt_cache_dir": Optional(str),
            "mvt_cache_min_zoom": 11,
            "mvt_max_age": "1800",
        },
        "service": {"proxy": "ipc:///tmp/loki"},
    },
    "thor": {
        "source_to_target_algorithm": "select_optimal",
        "service": {"proxy": "ipc:///tmp/thor"},
        "max_reserved_labels_count_astar": 2000000,
        "max_reserved_labels_count_bidir_astar": 1000000,
        "max_reserved_labels_count_dijkstras": 4000000,
        "max_reserved_labels_count_bidir_dijkstras": 2000000,
        "clear_reserved_memory": False,
        "extended_search": False,
        "costmatrix": {
            "check_reverse_connection": True,
            "allow_second_pass": False,
            "max_reserved_locations": 25,
            "max_iterations": 2800,
            "min_iterations": 100,
            "hierarchy_limits": {
                "max_up_transitions": {
                    "1": 400,
                    "2": 100,
                },
                "expand_within_distance": {"0": 1e8, "1": 20000, "2": 5000},
            },
        },
        "bidirectional_astar": {
            "threshold_delta": 420.0,
            "alternative_cost_extend": 1.2,
            "alternative_iterations_delta": 100000,
            "hierarchy_limits": {
                "max_up_transitions": {
                    "1": 400,
                    "2": 100,
                },
                "expand_within_distance": {"0": 1e8, "1": 20000, "2": 5000},
            },
        },
        "unidirectional_astar": {
            "hierarchy_limits": {
                "max_up_transitions": {
                    "1": 400,
                    "2": 100,
                },
                "expand_within_distance": {"0": 1e8, "1": 100000, "2": 5000},
            }
        },
    },
    "odin": {
        "service": {"proxy": "ipc:///tmp/odin"},
        "markup_formatter": {
            "markup_enabled": False,
            "phoneme_format": "<TEXTUAL_STRING> (<span class=<QUOTES>phoneme<QUOTES>>/<VERBAL_STRING>/</span>)",
        },
    },
    "meili": {
        "mode": "auto",
        "customizable": [
            "mode",
            "search_radius",
            "turn_penalty_factor",
            "gps_accuracy",
            "interpolation_distance",
            "sigma_z",
            "beta",
            "max_route_distance_factor",
            "max_route_time_factor",
        ],
        "verbose": False,
        "default": {
            "sigma_z": 4.07,
            "gps_accuracy": 5.0,
            "beta": 3,
            "max_route_distance_factor": 5,
            "max_route_time_factor": 5,
            "max_search_radius": 100,
            "breakage_distance": 2000,
            "interpolation_distance": 10,
            "search_radius": 50,
            "geometry": False,
            "route": True,
            "turn_penalty_factor": 0,
        },
        "auto": {"turn_penalty_factor": 200, "search_radius": 50},
        "pedestrian": {"turn_penalty_factor": 100, "search_radius": 50},
        "bicycle": {"turn_penalty_factor": 140},
        "multimodal": {"turn_penalty_factor": 70},
        "service": {"proxy": "ipc:///tmp/meili"},
        "grid": {"size": 500, "cache_size": 100240},
    },
    "httpd": {
        "service": {
            "listen": "tcp://*:8002",
            "loopback": "ipc:///tmp/loopback",
            "interrupt": "ipc:///tmp/interrupt",
            "drain_seconds": 28,
            "shutdown_seconds": 1,
            "timeout_seconds": -1,
        }
    },
    "service_limits": {
        "auto": {
            "max_distance": 5000000.0,
            "max_locations": 20,
            "max_matrix_distance": 400000.0,
            "max_matrix_location_pairs": 2500,
        },
        "bus": {
            "max_distance": 5000000.0,
            "max_locations": 50,
            "max_matrix_distance": 400000.0,
            "max_matrix_location_pairs": 2500,
        },
        "taxi": {
            "max_distance": 5000000.0,
            "max_locations": 20,
            "max_matrix_distance": 400000.0,
            "max_matrix_location_pairs": 2500,
        },
        "pedestrian": {
            "max_distance": 250000.0,
            "max_locations": 50,
            "max_matrix_distance": 200000.0,
            "max_matrix_location_pairs": 2500,
            "min_multimodal_walking_distance": 1,
            "max_multimodal_walking_distance": 10000,
        },
        "motor_scooter": {
            "max_distance": 500000.0,
            "max_locations": 50,
            "max_matrix_distance": 200000.0,
            "max_matrix_location_pairs": 2500,
        },
        "motorcycle": {
            "max_distance": 500000.0,
            "max_locations": 50,
            "max_matrix_distance": 200000.0,
            "max_matrix_location_pairs": 2500,
        },
        "bicycle": {
            "max_distance": 500000.0,
            "max_locations": 50,
            "max_matrix_distance": 200000.0,
            "max_matrix_location_pairs": 2500,
        },
        "multimodal": {
            "max_distance": 500000.0,
            "max_locations": 50,
            "max_matrix_distance": 0.0,
            "max_matrix_location_pairs": 0,
        },
        "status": {"allow_verbose": False},
        "transit": {
            "max_distance": 500000.0,
            "max_locations": 50,
            "max_matrix_distance": 200000.0,
            "max_matrix_location_pairs": 2500,
        },
        "truck": {
            "max_distance": 5000000.0,
            "max_locations": 20,
            "max_matrix_distance": 400000.0,
            "max_matrix_location_pairs": 2500,
        },
        "skadi": {"max_shape": 750000, "min_resample": 10.0},
        "isochrone": {
            "max_contours": 4,
            "max_time_contour": 120,
            "max_distance": 25000.0,
            "max_locations": 1,
            "max_distance_contour": 200,
        },
        "trace": {
            "max_distance": 200000.0,
            "max_gps_accuracy": 100.0,
            "max_search_radius": 100.0,
            "max_shape": 16000,
            "max_alternates": 3,
            "max_alternates_shape": 100,
        },
        "bikeshare": {
            "max_distance": 500000.0,
            "max_locations": 50,
            "max_matrix_distance": 200000.0,
            "max_matrix_location_pairs": 2500,
        },
        "auto_pedestrian": {
            "max_distance": 5000000.0,
            "max_matrix_distance": 400000.0,
            "max_matrix_location_pairs": 2500,
        },
        "centroid": {"max_distance": 200000.0, "max_locations": 5},
        "max_exclude_locations": 50,
        "max_reachability": 100,
        "max_radius": 200,
        "max_timedep_distance": 500000,
        "max_timedep_distance_matrix": 0,
        "max_alternates": 2,
        "max_exclude_polygons_length": 10000,
        "min_linear_cost_factor": 1,
        "max_linear_cost_edges": 50000,
        "max_distance_disable_hierarchy_culling": 0,
        "hierarchy_limits": {
            "allow_modification": False,
            "costmatrix": {
                "max_allowed_up_transitions": {
                    "1": 400,
                    "2": 100,
                },
                "max_expand_within_distance": {"0": 1e8, "1": 100000, "2": 5000},
            },
            "unidirectional_astar": {
                "max_allowed_up_transitions": {
                    "1": 400,
                    "2": 100,
                },
                "max_expand_within_distance": {"0": 1e8, "1": 100000, "2": 5000},
            },
            "bidirectional_astar": {
                "max_allowed_up_transitions": {
                    "1": 400,
                    "2": 100,
                },
                "max_expand_within_distance": {"0": 1e8, "1": 20000, "2": 5000},
            },
        },
        "allow_hard_exclusions": False,
    },
    "statsd": {
        "host": Optional(str),
        "port": 8125,
        "prefix": "valhalla",
        "batch_size": 500,
        "tags": Optional(list),
    },
}

help_text = {
    "logging": {
        "__note__": (
            "Process-wide logging configuration. The file logger is not suitable for "
            "multi-processing mode. Log rolling in particular may cause race conditions and crashes."
        ),
        "type": "Type of logger either std_out or file",
        "color": "Use colored log level in std_out logger",
        "file_name": "Output log file for the file logger",
        "max_file_size": "Maximum file size in bytes before rolling to a new file (0 = disabled)",
        "max_archived_files": "Maximum number of archived log files to keep (0 = disabled)",
    },
    "mjolnir": {
        "max_cache_size": "Number of bytes per thread used to store tile data in memory",
        "id_table_size": "Value controls the initial size of the Id table",
        "use_lru_mem_cache": "Use memory cache with LRU eviction policy",
        "lru_mem_cache_hard_control": "Use hard memory limit control for LRU memory cache (i.e. on every put) - never allow overcommit",
        "use_simple_mem_cache": "Use memory cache within a simple hash map the clears all tiles when overcommitted",
        "user_agent": "User-Agent http header to request single tiles",
        "tile_url": "Http location to read tiles from if they are not found in the tile_dir, e.g.: http://your_valhalla_tile_server_host:8000/some/Optional/path/{tilePath}?some=Optional&query=params. Valhalla will look for the {tilePath} portion of the url and fill this out with a given tile path when it make a request for that tile",
        "tile_url_gz": "Whether or not to request for compressed tiles",
        "tile_url_user_pw": 'User & password for HTTP basic auth in the form of "user:password"',
        "concurrency": "How many threads to use in the concurrent parts of tile building",
        "data_quality_dir": "The directory where we output files regarding data quality issues, e.g. duplicateways.txt",
        "tile_dir": "Location to read/write tiles to/from",
        "tile_extract": "Location to read tiles from tar",
        "traffic_extract": "Location to read traffic from tar",
        "incident_dir": "Location to read incident tiles from",
        "incident_log": "Location to read change events of incident tiles",
        "shortcut_caching": "Precaches the superseded edges of all shortcuts in the graph. Defaults to false",
        "graph_lua_name": "Location of the lua file to use for graph customization during tile building instead of default one",
        "admin": "Location of sqlite file holding admin polygons created with valhalla_build_admins",
        "landmarks": "Location of sqlite file holding landmark POI created with valhalla_build_landmarks",
        "timezone": "Location of sqlite file holding timezone information created with valhalla_build_timezones",
        "transit_dir": "Location of intermediate transit tiles created with valhalla_build_transit",
        "transit_feeds_dir": "Location of all GTFS transit feeds, needs to contain one subdirectory per feed",
        "transit_bounding_box": "Add comma separated bounding box values to only download transit data inside the given bounding box",
        "transit_pbf_limit": "Limit individual PBF files to this many trips (needed for PBF's stupid size limit)",
        "hierarchy": "bool indicating whether road hierarchy is to be built - default to True",
        "shortcuts": "bool indicating whether shortcuts are to be built - default to True",
        "keep_all_osm_node_ids": "bool indicating whether to store all OSM node ids in the graph data - defaults to False",
        "keep_osm_node_ids": "bool indicating whether to store OSM node ids (at topological/graph nodes) in the graph data - defaults to False",
        "include_platforms": "bool indicating whether to include highway=platform - default to False",
        "include_driveways": "bool indicating whether private driveways are included - default to True",
        "include_construction": "bool indicating where roads under construction are included - default to False",
        "include_bicycle": "bool indicating whether cycling only ways are included - default to True",
        "include_pedestrian": "bool indicating whether pedestrian only ways are included - default to True",
        "include_driving": "bool indicating whether driving only ways are included - default to True",
        "import_bike_share_stations": "bool indicating whether importing bike share stations(BSS). Set to True when using multimodal - default to False",
        "global_synchronized_cache": "bool indicating whether global_synchronized_cache is used - default to False",
        "max_concurrent_reader_users": "number of threads in the threadpool which can be used to fetch tiles over the network via curl",
        "reclassify_links": "bool indicating whether or not to reclassify links - reclassifies ramps based on the lowest class connecting road",
        "default_speeds_config": "a path indicating the json config file which graph enhancer will use to set the speeds of edges in the graph based on their geographic location (state/country), density (urban/rural), road class, road use (form of way)",
        "data_processing": {
            "infer_internal_intersections": "bool indicating whether or not to infer internal intersections during the graph enhancer phase or use the internal_intersection key from the pbf",
            "infer_turn_channels": "bool indicating whether or not to infer turn channels during the graph enhancer phase or use the turn_channel key from the pbf",
            "apply_country_overrides": "bool indicating whether or not to apply country overrides during the graph enhancer phase",
            "grid_divisions_within_tile": "number of grid subdivisions within a tile. Used for spatial sorting of nodes within a tile. Set to 0 to disable spatial sorting of nodes",
            "use_admin_db": "bool indicating whether or not to use the administrative database during the graph enhancer phase or use the admin keys from the pbf that are set on the node",
            "use_direction_on_ways": "bool indicating whether or not to process the direction key on the ways or utilize the guidance relation tags during the parsing phase",
            "allow_alt_name": "bool indicating whether or not to process the alt_name key on the ways during the parsing phase",
            "use_urban_tag": "bool indicating whether or not to use the urban area tag on the ways or to utilize the getDensity function within the graph enhancer phase",
            "use_rest_area": "bool indicating whether or not to use the rest/service area tag on the ways",
            "scan_tar": "bool indicating whether or not to pre-scan the tar ball(s) when loading an extract with an index file, to warm up the OS page cache.",
        },
    },
    "additional_data": {
        "elevation": "Location of elevation tiles",
        "elevation_url": "Http location to read elevations from. this address is used if elevation tiles were not found in the elevation directory. Ex.: http://<your_valhalla_tile_server_host>:<your_valhalla_tile_server_port>/some/Optional/path/{tilePath}?some=Optional&query=params. Valhalla will look for the {tilePath} portion of the url and fill this out with an elevation path when it makes a request for that particular elevation",
        "elevation_url_user_pw": 'User & password for HTTP basic auth in the form of "user:password"',
    },
    "loki": {
        "actions": "Comma separated list of allowable actions for the service, one or more of: locate, route, height, optimized_route, isochrone, trace_route, trace_attributes, transit_available, expansion, centroid, status, tile",
        "use_connectivity": "a boolean value to know whether or not to construct the connectivity maps",
        "service_defaults": {
            "radius": "Default radius to apply to incoming locations should one not be supplied",
            "minimum_reachability": "Default minimum reachability to apply to incoming locations should one not be supplied",
            "search_cutoff": "The cutoff at which we will assume the input is too far away from civilisation to be worth correlating to the nearest graph elements",
            "node_snap_tolerance": "During edge correlation this is the tolerance used to determine whether or not to snap to the intersection rather than along the street, if the snap location is within this distance from the intersection the intersection is used instead",
            "street_side_tolerance": "If your input coordinate is less than this tolerance away from the edge centerline then we set your side of street to none otherwise your side of street will be left or right depending on direction of travel",
            "street_side_max_distance": "The max distance in meters that the input coordinates or display ll can be from the edge centerline for them to be used for determining the side of street. Beyond this distance the side of street is set to none",
            "heading_tolerance": "When a heading is supplied, this is the tolerance around that heading with which we determine whether an edges heading is similar enough to match the supplied heading",
            "mvt_min_zoom_road_class": "Minimum zoom level for each road class (8 values: Motorway, Trunk, Primary, Secondary, Tertiary, Unclassified, Residential, Service/Other). Roads will only be rendered at or above their minimum zoom level.",
            "mvt_cache_dir": "The cache directory for MVT tiles. If empty/omitted, we disable MVT caching",
            "mvt_cache_min_zoom": "The minimum zoom level which will be cached, the maximum will be determined by mvt_min_zoom_road_class",
            "mvt_max_age": "The value used for 'max-age' in the Cache-Control response header for the MVT end point",
        },
        "service": {"proxy": "IPC linux domain socket file location"},
    },
    "thor": {
        "source_to_target_algorithm": 'Which matrix algorithm should be used, one of "timedistancematrix" or "costmatrix". If blank, the optimal will be selected.',
        "service": {"proxy": "IPC linux domain socket file location"},
        "max_reserved_labels_count_astar": "Maximum capacity allowed to keep reserved for unidirectional A*.",
        "max_reserved_labels_count_bidir_astar": "Maximum capacity allowed to keep reserved for bidirectional A*.",
        "max_reserved_labels_count_dijkstras": "Maximum capacity allowed to keep reserved for unidirectional Dijkstras.",
        "max_reserved_labels_count_bidir_dijkstras": "Maximum capacity allowed to keep reserved for bidirectional Dijkstras.",
        "max_reserved_locations_costmatrix": "Maximum amount of locations allowed to to keep reserved between requests for CostMatrix",
        "clear_reserved_memory": "If True clean reserved memory in path algorithms",
        "extended_search": "If True and 1 side of the bidirectional search is exhausted, causes the other side to continue if the starting location of that side began on a not_thru or closed edge",
        "costmatrix": {
            "check_reverse_connection": "Whether to check for expansion connections on the reverse tree, which has an adverse effect on performance",
            "allow_second_pass": 'Whether to allow a second pass for unfound CostMatrix connections, where we turn off destination-only, relax hierarchies and expand into "semi-islands"',
            "max_reserved_locations": "Maximum amount of locations allowed to to keep reserved between requests for CostMatrix",
            "max_iterations": "Upper bound on the number of iterations per expansion once a path has been found. Must be a positive integer",
            "min_iterations": "Lower bound on the number of iterations per expansion once a path has been found. Must be a positive integer",
            "hierarchy_limits": {
                "max_up_transitions": {
                    "1": "The default maximum up transitions for level 1 in CostMatrix",
                    "2": "The default maximum up transitions for level 2 in CostMatrix",
                },
                "expand_within_distance": {
                    "0": "The default distance within which expansion is allowed from the origin/destination on level 0 in costmatrix",
                    "1": "The default distance within which expansion is allowed from the origin/destination on level 1 in costmatrix",
                    "2": "The default distance within which expansion is allowed from the origin/destination on level 2 in costmatrix",
                },
            },
        },
        "bidirectional_astar": {
            "threshold_delta": "Time (seconds) to extend search once the first connection has been found",
            "alternative_cost_extend": "Relative cost extension to find alternative routes",
            "alternative_iterations_delta": "Number of extra iterations to allow when searching for alternative paths. Higher values will find more alternatives but will be slower",
            "hierarchy_limits": {
                "max_up_transitions": {
                    "1": "The default maximum up transitions for level 1 in CostMatrix",
                    "2": "The default maximum up transitions for level 2 in CostMatrix",
                },
                "expand_within_distance": {
                    "0": "The default distance within which expansion is allowed from the origin/destination on level 0 in bidirectional astar",
                    "1": "The default distance within which expansion is allowed from the origin/destination on level 1 in bidirectional astar",
                    "2": "The default distance within which expansion is allowed from the origin/destination on level 2 in bidirectional astar",
                },
            },
        },
        "unidirectional_astar": {
            "hierarchy_limits": {
                "max_up_transitions": {
                    "1": "The default maximum up transitions for level 1 in CostMatrix",
                    "2": "The default maximum up transitions for level 2 in CostMatrix",
                },
                "expand_within_distance": {
                    "0": "The default distance within which expansion is allowed from the origin/destination on level 0 in bidirectional astar",
                    "1": "The default distance within which expansion is allowed from the origin/destination on level 1 in bidirectional astar",
                    "2": "The default distance within which expansion is allowed from the origin/destination on level 2 in bidirectional astar",
                },
            }
        },
    },
    "odin": {
        "service": {"proxy": "IPC linux domain socket file location"},
        "markup_formatter": {
            "markup_enabled": "Boolean flag to use markup formatting",
            "phoneme_format": "The phoneme format string that will be used by street names and signs",
        },
    },
    "meili": {
        "mode": "Specify the default transport mode",
        "customizable": "Specify which parameters are allowed to be customized by URL query parameters",
        "verbose": "Control verbose output for debugging",
        "default": {
            "sigma_z": "A non-negative value to specify the GPS accuracy (the variance of the normal distribution) of an incoming GPS sequence. It is also used to weight emission costs of measurements",
            "gps_accuracy": "TODO: ",
            "beta": "A non-negative emprical value to weight the transition cost of two successive candidates",
            "max_route_distance_factor": "A non-negative value used to limit the routing search range which is the distance to next measurement multiplied by this factor",
            "max_route_time_factor": "A non-negative value used to limit the routing search range which is the time to the next measurement multiplied by this factor",
            "breakage_distance": "A non-negative value. If two successive measurements are far than this distance, then connectivity in between will not be considered",
            "max_search_radius": "A non-negative value specifying the maximum radius in meters about a given point to search for candidate edges for routing",
            "interpolation_distance": "If two successive measurements are closer than this distance, then the later one will be interpolated into the matched route",
            "search_radius": "A non-negative value to specify the search radius (in meters) within which to search road candidates for each measurement",
            "geometry": "TODO: ",
            "route": "TODO: ",
            "turn_penalty_factor": "A non-negative value to penalize turns from one road segment to next",
        },
        "auto": {
            "turn_penalty_factor": "A non-negative value to penalize turns from one road segment to next",
            "search_radius": "A non-negative value to specify the search radius (in meters) within which to search road candidates for each measurement",
        },
        "pedestrian": {
            "turn_penalty_factor": "A non-negative value to penalize turns from one road segment to next",
            "search_radius": "A non-negative value to specify the search radius (in meters) within which to search road candidates for each measurement",
        },
        "bicycle": {
            "turn_penalty_factor": "A non-negative value to penalize turns from one road segment to next"
        },
        "multimodal": {
            "turn_penalty_factor": "A non-negative value to penalize turns from one road segment to next"
        },
        "service": {"proxy": "IPC linux domain socket file location"},
        "grid": {
            "size": "TODO: Resolution of the grid used in finding match candidates",
            "cache_size": "TODO: number of grids to keep in cache",
        },
    },
    "httpd": {
        "service": {
            "listen": "The protocol, host location and port your service will bind to",
            "loopback": "IPC linux domain socket file location used to communicate results back to the client",
            "interrupt": "IPC linux domain socket file location used to cancel work in progress",
            "drain_seconds": "How long to wait for currently running threads to finish before signaling them to shutdown",
            "shutdown_seconds": "How long to wait for currently running threads to quit before exiting the process",
            "timeout_seconds": "How long to wait for a single request to finish before timing it out (defaults to infinite)",
        }
    },
    "service_limits": {
        "auto": {
            "max_distance": "Maximum b-line distance between all locations in meters",
            "max_locations": "Maximum number of input locations",
            "max_matrix_distance": "Maximum b-line distance between 2 most distant locations in meters for a matrix",
            "max_matrix_location_pairs": "Maximum number of routes computed with the matrix, e.g. 2500 = 50:50 or 1:2500",
        },
        "bus": {
            "max_distance": "Maximum b-line distance between all locations in meters",
            "max_locations": "Maximum number of input locations",
            "max_matrix_distance": "Maximum b-line distance between 2 most distant locations in meters for a matrix",
            "max_matrix_location_pairs": "Maximum number of routes computed with the matrix, e.g. 2500 = 50:50 or 1:2500",
        },
        "taxi": {
            "max_distance": "Maximum b-line distance between all locations in meters",
            "max_locations": "Maximum number of input locations",
            "max_matrix_distance": "Maximum b-line distance between 2 most distant locations in meters for a matrix",
            "max_matrix_location_pairs": "Maximum number of routes computed with the matrix, e.g. 2500 = 50:50 or 1:2500",
        },
        "pedestrian": {
            "max_distance": "Maximum b-line distance between all locations in meters",
            "max_locations": "Maximum number of input locations",
            "max_matrix_distance": "Maximum b-line distance between 2 most distant locations in meters for a matrix",
            "max_matrix_location_pairs": "Maximum number of routes computed with the matrix, e.g. 2500 = 50:50 or 1:2500",
            "min_multimodal_walking_distance": "Minimum distance you must walk in a multimodal route that uses pedestrian costing",
            "max_multimodal_walking_distance": "Maximum distance allowed for walking in a multimodal route that uses pedestrian costing",
        },
        "motor_scooter": {
            "max_distance": "Maximum b-line distance between all locations in meters",
            "max_locations": "Maximum number of input locations",
            "max_matrix_distance": "Maximum b-line distance between 2 most distant locations in meters for a matrix",
            "max_matrix_location_pairs": "Maximum number of routes computed with the matrix, e.g. 2500 = 50:50 or 1:2500",
        },
        "motorcycle": {
            "max_distance": "Maximum b-line distance between all locations in meters",
            "max_locations": "Maximum number of input locations",
            "max_matrix_distance": "Maximum b-line distance between 2 most distant locations in meters for a matrix",
            "max_matrix_location_pairs": "Maximum number of routes computed with the matrix, e.g. 2500 = 50:50 or 1:2500",
        },
        "bicycle": {
            "max_distance": "Maximum b-line distance between all locations in meters",
            "max_locations": "Maximum number of input locations",
            "max_matrix_distance": "Maximum b-line distance between 2 most distant locations in meters for a matrix",
            "max_matrix_location_pairs": "Maximum number of routes computed with the matrix, e.g. 2500 = 50:50 or 1:2500",
        },
        "multimodal": {
            "max_distance": "Maximum b-line distance between all locations in meters",
            "max_locations": "Maximum number of input locations",
            "max_matrix_distance": "Maximum b-line distance between 2 most distant locations in meters for a matrix",
            "max_matrix_location_pairs": "Maximum number of routes computed with the matrix, e.g. 2500 = 50:50 or 1:2500",
        },
        "status": {
            "allow_verbose": "Allow verbose output for the /status endpoint, which can be computationally expensive"
        },
        "transit": {
            "max_distance": "Maximum b-line distance between all locations in meters",
            "max_locations": "Maximum number of input locations",
            "max_matrix_distance": "Maximum b-line distance between 2 most distant locations in meters for a matrix",
            "max_matrix_location_pairs": "Maximum number of routes computed with the matrix, e.g. 2500 = 50:50 or 1:2500",
        },
        "truck": {
            "max_distance": "Maximum b-line distance between all locations in meters",
            "max_locations": "Maximum number of input locations",
            "max_matrix_distance": "Maximum b-line distance between 2 most distant locations in meters for a matrix",
            "max_matrix_location_pairs": "Maximum number of routes computed with the matrix, e.g. 2500 = 50:50 or 1:2500",
        },
        "skadi": {
            "max_shape": "Maximum number of input shapes",
            "min_resample": "Smalled resampling distance to allow in meters",
        },
        "isochrone": {
            "max_contours": "Maximum number of input contours to allow",
            "max_time_contour": "Maximum time value for any one contour in minutes",
            "max_distance": "Maximum b-line distance between all locations in meters",
            "max_locations": "Maximum number of input locations",
            "max_distance_contour": "Maximum distance value for any one contour in kilometers",
        },
        "trace": {
            "max_distance": "Maximum input shape distance in meters",
            "max_gps_accuracy": "Maximum input gps accuracy in meters",
            "max_search_radius": "Maximum upper bounds of the search radius in meters",
            "max_shape": "Maximum number of input shape points",
            "max_alternates": "Maximum number of alternate map matching",
            "max_alternates_shape": "Maximum number of input shape points when requesting multiple paths",
        },
        "bikeshare": {
            "max_distance": "Maximum b-line distance between all locations in meters",
            "max_locations": "Maximum number of input locations",
            "max_matrix_distance": "Maximum b-line distance between 2 most distant locations in meters for a matrix",
            "max_matrix_location_pairs": "Maximum number of routes computed with the matrix, e.g. 2500 = 50:50 or 1:2500",
        },
        "auto_pedestrian": {
            "max_distance": "Maximum b-line distance between all locations in meters",
            "max_matrix_distance": "Maximum b-line distance between 2 most distant locations in meters for a matrix",
            "max_matrix_location_pairs": "Maximum number of routes computed with the matrix, e.g. 2500 = 50:50 or 1:2500",
        },
        "centroid": {
            "max_distance": "Maximum b-line distance between any pair of locations in meters",
            "max_locations": "Maximum number of input locations, 127 is a hard limit and cannot be increased in config",
        },
        "max_exclude_locations": "Maximum number of avoid locations to allow in a request",
        "max_reachability": "Maximum reachability (number of nodes reachable) allowed on any one location",
        "max_radius": "Maximum radius in meters allowed on any one location",
        "max_timedep_distance": "Maximum b-line distance between locations to allow a time-dependent route",
        "max_timedep_distance_matrix": "Maximum b-line distance between 2 most distant locations in meters to allow a time-dependent matrix",
        "max_alternates": "Maximum number of alternate routes to allow in a request",
        "max_exclude_polygons_length": "Maximum total perimeter of all exclude_polygons in meters",
        "min_linear_cost_factor": "Minimum allowed factor admissible for linear feature cost factors. Beware: low values approaching zero will render the A* heuristic unusable",
        "max_linear_cost_edges": "Maximum total number of linear cost edges",
        "max_distance_disable_hierarchy_culling": "Maximum search distance allowed with hierarchy culling disabled",
        "hierarchy_limits": {
            "allow_modification": "Whether hierarchy limits can be modified via the request",
            "costmatrix": {
                "max_allowed_up_transitions": {
                    "1": 'The maximum up transitions for level 1. Values set via request parameters that exceed this value are clamped. Ignored if "allow_modifications" is set to false.',
                    "2": 'The maximum up transitions for level 2. Values set via request parameters that exceed this value are clamped. Ignored if "allow_modifications" is set to false.',
                },
                "max_expand_within_distance": {
                    "0": 'The maximum distance in meters from which the origin/destination will be expanded on level 0. Values set via request parameters that exceed this value are clamped. Ignored if "allow_modifications" is set to false.',
                    "1": 'The maximum distance in meters from which the origin/destination will be expanded on level 1. Values set via request parameters that exceed this value are clamped. Ignored if "allow_modifications" is set to false.',
                    "2": 'The maximum distance in meters from which the origin/destination will be expanded on level 2. Values set via request parameters that exceed this value are clamped. Ignored if "allow_modifications" is set to false.',
                },
            },
            "bidirectional_astar": {
                "max_allowed_up_transitions": {
                    "1": 'The maximum up transitions for level 1. Values set via request parameters that exceed this value are clamped. Ignored if "allow_modifications" is set to false.',
                    "2": 'The maximum up transitions for level 2. Values set via request parameters that exceed this value are clamped. Ignored if "allow_modifications" is set to false.',
                },
                "max_expand_within_distance": {
                    "0": 'The maximum distance in meters from which the origin/destination will be expanded on level 0. Values set via request parameters that exceed this value are clamped. Ignored if "allow_modifications" is set to false.',
                    "1": 'The maximum distance in meters from which the origin/destination will be expanded on level 1. Values set via request parameters that exceed this value are clamped. Ignored if "allow_modifications" is set to false.',
                    "2": 'The maximum distance in meters from which the origin/destination will be expanded on level 2. Values set via request parameters that exceed this value are clamped. Ignored if "allow_modifications" is set to false.',
                },
            },
            "unidirectional_astar": {
                "max_allowed_up_transitions": {
                    "1": 'The maximum up transitions for level 1. Values set via request parameters that exceed this value are clamped. Ignored if "allow_modifications" is set to false.',
                    "2": 'The maximum up transitions for level 2. Values set via request parameters that exceed this value are clamped. Ignored if "allow_modifications" is set to false.',
                },
                "max_expand_within_distance": {
                    "0": 'The maximum distance in meters from which the origin/destination will be expanded on level 0. Values set via request parameters that exceed this value are clamped. Ignored if "allow_modifications" is set to false.',
                    "1": 'The maximum distance in meters from which the origin/destination will be expanded on level 1. Values set via request parameters that exceed this value are clamped. Ignored if "allow_modifications" is set to false.',
                    "2": 'The maximum distance in meters from which the origin/destination will be expanded on level 2. Values set via request parameters that exceed this value are clamped. Ignored if "allow_modifications" is set to false.',
                },
            },
        },
        "allow_hard_exclusions": "Whether hard exclusions, such as exclude_bridges, exclude_tunnels, exclude_tolls, exclude_highways or exclude_ferries are allowed on this server",
    },
    "statsd": {
        "host": "The statsd host address",
        "port": "The statsd port",
        "prefix": "The statsd prefix to use for each metric",
        "batch_size": "Approximate maximum size in bytes of each batch of stats to send to statsd",
        "tags": "List of tags to include with each metric",
    },
}


def add_leaf_args(
    path: str,
    tree: Union[dict, bool, str, list, Optional],
    leaves_: List[str],
    parser_: ArgumentParser,
    help: dict,
):
    """
    returns a list of leaves of the tree, `\0` separated, stops at non dicts
    while doing so it also adds arguments to the parser
    """
    # if we are at a dict go deeper
    if isinstance(tree, dict):
        for k in tree:
            v = tree[k]
            add_leaf_args("\0".join([path, k]) if len(path) else k, v, leaves_, parser_, help)
    # we've reached a leaf
    else:
        keys = path.split("\0")
        h = help
        for k in keys:
            h = h[k]

        # its either required and is the right type or optional and has a type to use
        # lists are supported as comma separated and bools support a bunch of conventions
        # the rest of the primitives (strings and numbers) parse automatically
        if isinstance(tree, Optional):
            t = tree.type
        elif isinstance(tree, list):
            t = lambda arg: arg.split(",")  # noqa: E731
        elif isinstance(tree, bool):
            t = lambda arg: bool(strtobool(arg))  # noqa: E731
        else:
            t = type(tree)

        arg = "--" + path.replace("_", "-").replace("\0", "-")
        if t is list:
            parser_.add_argument(arg, nargs="+", help=h, default=tree)
        else:
            parser_.add_argument(arg, type=t, help=h, default=tree)
        leaves_.append(path)


def override_config(args_: dict, leaves_: list, config_: dict):
    """override the defaults given what was passed"""
    for leaf in leaves_:
        keys = leaf.split("\0")
        v = config_
        for k in keys[:-1]:
            v = v[k]
        v[keys[-1]] = args_.get(leaf.replace("\0", "_"))
        if isinstance(v[keys[-1]], type(Optional())):
            del v[keys[-1]]


# set up available program options
leaves = []
parser = ArgumentParser(
    description="Generate valhalla configuration",
    formatter_class=ArgumentDefaultsHelpFormatter,
)
add_leaf_args("", config, leaves, parser, help_text)

# merge-related arguments
parser.add_argument(
    "-m",
    "--merge-config",
    type=Path,
    help="Path to existing config file to merge with. Existing values are preserved, new keys are added.",
)
parser.add_argument(
    "-o",
    "--output",
    type=Path,
    help="Output file path (default: stdout)",
)
parser.add_argument(
    "-r",
    "--remove-old",
    nargs="*",
    metavar="KEY",
    help="When merging, remove old keys. Without arguments, removes all keys not in new config. "
    "With arguments (JSON dot notation, e.g. 'mjolnir.logging'), removes only those keys.",
)
parser.add_argument(
    "-p",
    "--report",
    action="store_true",
    help="When merging, print the merge report.",
)


# entry point to program
def main():
    # TODO: add argument to set base path and use in all other path based values
    args = parser.parse_args()

    # encapsulate to test easier
    override_config(args.__dict__, leaves, config)

    # if merging with existing config
    output_config = config
    report = None
    if args.merge_config:
        try:
            with args.merge_config.open("r") as f:
                existing_config = json.load(f)
        except FileNotFoundError:
            print(f"Error: Config file not found: {args.merge_config}", file=sys.stderr)
            sys.exit(1)
        except json.JSONDecodeError as e:
            print(f"Error: Invalid JSON in config: {e}", file=sys.stderr)
            sys.exit(1)

        report = MergeReport()
        # args.remove_old is None if not specified, [] if specified without args, or a list of keys
        output_config = merge_json(existing_config, config, report, args.remove_old)

    if args.report:
        if report:
            report.print_report()
        elif not args.merge_config:
            print("Warn: --report requires --merge-config", file=sys.stderr)

    # format output
    output_json = json.dumps(output_config, indent=2)

    # write output
    if args.output:
        with args.output.open("w") as f:
            f.write(output_json)
            f.write("\n")
        print(f"Config written to: {args.output}", file=sys.stderr)
    else:
        print(output_json)


if __name__ == "__main__":
    main()
