Python Library Documentation: module multiazurestoragefreighter

NAME
    multiazurestoragefreighter

CLASSES
    AgeFreighter(builtins.object)
        MultiAzureStorageFreighter
    builtins.object
        AzureExtensions
        BlobUploader
        GraphLoader
        StorageAccount
        StorageLoader
        TempTables
    
    class AzureExtensions(builtins.object)
     |  AzureExtensions(subscription_id: str = '', resource_group_name: str = '', pg_server_name: str = '', pool: Optional[psycopg_pool.pool_async.AsyncConnectionPool] = None, extensions: List[str] = [])
     |  
     |  Methods defined here:
     |  
     |  __init__(self, subscription_id: str = '', resource_group_name: str = '', pg_server_name: str = '', pool: Optional[psycopg_pool.pool_async.AsyncConnectionPool] = None, extensions: List[str] = [])
     |      Initialize self.  See help(type(self)) for accurate signature.
     |  
     |  async create(self) -> None
     |      Create the Azure Extensions for PostgreSQL Flex Server.
     |      
     |      Returns:
     |          None
     |  
     |  enable(self) -> None
     |      Enable the Azure Storage Extension for PostgreSQL Flex Server.
     |      
     |      Returns:
     |          None
     |  
     |  ----------------------------------------------------------------------
     |  Data descriptors defined here:
     |  
     |  __dict__
     |      dictionary for instance variables (if defined)
     |  
     |  __weakref__
     |      list of weak references to the object (if defined)
    
    class BlobUploader(builtins.object)
     |  BlobUploader(storage_account_name: str = '', access_key: str = '', blob_container_name: str = '', file_paths_dict: Dict[str, List[str]] = {}, lines_per_chunk: int = 10000)
     |  
     |  Methods defined here:
     |  
     |  __init__(self, storage_account_name: str = '', access_key: str = '', blob_container_name: str = '', file_paths_dict: Dict[str, List[str]] = {}, lines_per_chunk: int = 10000)
     |      Initialize self.  See help(type(self)) for accurate signature.
     |  
     |  createTempFile(self, temp_file_name: str = '', count: int = 0, header_line: str = '', add_header: bool = True) -> _io.BufferedRandom
     |      Create a temporary file.
     |      
     |      Args:
     |          temp_file_name (str): The name of the temporary file.
     |          count (int): The count of the temporary file.
     |          header_line (str): The header line.
     |          add_header (bool): Whether to add the header.
     |      
     |      Returns:
     |          temp_file: The temporary file.
     |  
     |  getColumnsInCSV(self, csv_path: str = '') -> tuple
     |      Get the columns in the CSV file.
     |      
     |      Args:
     |          csv_path (str): The path to the CSV file.
     |      
     |      Returns:
     |          tuple: The columns in the CSV file and the newline character.
     |  
     |  splitFile(self) -> None
     |      Split the file into chunks.
     |      
     |      Returns:
     |          None
     |  
     |  async upload(self) -> None
     |      Upload CSV files to the Blob Container.
     |      
     |      Returns:
     |          None
     |  
     |  async uploadBlob(self, file_path: str, container_client) -> None
     |      Upload a file to the Blob Container.
     |      
     |      Args:
     |          file_path (str): The path to the file.
     |          container_client: The container client.
     |      
     |      Returns:
     |          None
     |  
     |  ----------------------------------------------------------------------
     |  Data descriptors defined here:
     |  
     |  __dict__
     |      dictionary for instance variables (if defined)
     |  
     |  __weakref__
     |      list of weak references to the object (if defined)
    
    class GraphLoader(builtins.object)
     |  GraphLoader(tbls_from_storage: Dict[str, str] = {}, total_lines: Dict[str, int] = {}, vertex_args: List[Any] = [], edge_args: List[Any] = [], columns_in_csvs: Dict[str, List[str]] = {}, id_map_tbls: Dict[str, str] = {}, graph_name: str = '', records_per_thread: int = 0, pool: Optional[psycopg_pool.pool_async.AsyncConnectionPool] = None)
     |  
     |  Methods defined here:
     |  
     |  __init__(self, tbls_from_storage: Dict[str, str] = {}, total_lines: Dict[str, int] = {}, vertex_args: List[Any] = [], edge_args: List[Any] = [], columns_in_csvs: Dict[str, List[str]] = {}, id_map_tbls: Dict[str, str] = {}, graph_name: str = '', records_per_thread: int = 0, pool: Optional[psycopg_pool.pool_async.AsyncConnectionPool] = None)
     |      Initialize self.  See help(type(self)) for accurate signature.
     |  
     |  async executeQuery(self, pool: psycopg_pool.pool_async.AsyncConnectionPool, query: str) -> None
     |      Execute a query.
     |      
     |      Args:
     |          pool (AsyncConnectionPool): The connection pool.
     |          query (str): The query to execute.
     |      
     |      Returns:
     |          None
     |  
     |  async getFirstId(self, graph_name: str = '', label_type: str = '') -> int
     |      Get the first id for a vertex or edge.
     |      
     |      Args:
     |          graph_name (str): The name of the graph.
     |          label_type (str): The type or the label.
     |      
     |      Returns:
     |          int: The first id.
     |  
     |  async load(self) -> None
     |      Load the graph data.
     |      
     |      Returns:
     |          None
     |  
     |  ----------------------------------------------------------------------
     |  Static methods defined here:
     |  
     |  quotedGraphName(graph_name: str = '') -> str
     |      Quote the graph name.
     |      
     |      Args:
     |          graph_name (str): The name of the graph.
     |      
     |      Returns:
     |          str: The quoted graph name
     |  
     |  ----------------------------------------------------------------------
     |  Data descriptors defined here:
     |  
     |  __dict__
     |      dictionary for instance variables (if defined)
     |  
     |  __weakref__
     |      list of weak references to the object (if defined)
    
    class MultiAzureStorageFreighter(AgeFreighter)
     |  Method resolution order:
     |      MultiAzureStorageFreighter
     |      AgeFreighter
     |      builtins.object
     |  
     |  Methods defined here:
     |  
     |  async __aenter__(self)
     |      Enter the context manager.
     |  
     |  async __aexit__(self, exc_type, exc, tb)
     |      Exit the context manager.
     |  
     |  __init__(self)
     |      Initialize the AgeFreighter object.
     |  
     |  findAzureSubscriptionID(self) -> bool
     |      Fetch the Azure Subscription ID.
     |      
     |      Returns:
     |          bool: True if the Azure Subscription ID is fetched successfully, False otherwise.
     |  
     |  async load(self, vertex_args: List[Any] = [], edge_args: List[Any] = [], graph_name: str = '', chunk_size: int = 128, create_graph: bool = True, **kwargs: Any) -> None
     |      Load a graph data to the PostgreSQL Flex with Azure Storage.
     |      
     |      Args:
     |          vertex_args (List[Dict[str, str]]): The arguments for the vertices.
     |          edge_args (List[Dict[str, str]]): The arguments for the edges.
     |          graph_name (str): The name of the graph.
     |          chunk_size (int): The size of the chunks to create.
     |          create_graph (bool): Whether to create the graph.
     |          **kwargs: Additional keyword arguments.
     |      
     |      Returns:
     |          None
     |  
     |  async setParameters(self) -> bool
     |      Set the parameters for the Azure Storage Freighter.
     |      
     |      Returns:
     |          bool: True if the parameters are set successfully, False otherwise.
     |  
     |  ----------------------------------------------------------------------
     |  Static methods defined here:
     |  
     |  checkColumns(columns_in_csvs: Dict[str, List[str]], vertex_args: List[Any], edge_args: List[Any]) -> None
     |      Check if the columns contain required arguments.
     |      
     |      Args:
     |          columns_in_csvs (Dict[str, List[str]]): The columns in the CSV files.
     |          vertex_args (List[Dict[str, str]]): The arguments for the vertices.
     |          edge_args (List[Dict[str, str]]): The arguments for the edges.
     |      
     |      Returns:
     |          None
     |  
     |  isValidAzureSubscriptionID(subscriptionID: str = '') -> bool
     |      Check if the Azure Subscription ID is valid.
     |      
     |      Args:
     |          subscriptionID (str): The Azure Subscription ID.
     |      
     |      Returns:
     |          bool: True if the Azure Subscription ID is valid, False otherwise.
     |  
     |  ----------------------------------------------------------------------
     |  Methods inherited from AgeFreighter:
     |  
     |  async close(self) -> None
     |      Close the connection pool.
     |      
     |      Returns:
     |          None
     |  
     |  async connect(self, dsn: str = '', max_connections: int = 64, min_connections: int = 4, log_level=None, **kwargs) -> None
     |      Open a connection pool.
     |      
     |      Args:
     |          dsn (str): The data source name.
     |          max_connections (int): The maximum number of connections.
     |          min_connections (int): The minimum number of connections.
     |          log_level: The log level.
     |          **kwargs: Additional keyword arguments.
     |      
     |      Returns:
     |          None
     |  
     |  async copyChunk(self, chunk: pandas.core.frame.DataFrame, first_id: int = 0, graph_name: str = '', label: str = '', id_maps: dict = {}, is_edge: bool = False) -> None
     |      Copy a chunk of data to the PostgreSQL database.
     |      
     |      Args:
     |          chunk (pd.DataFrame): The chunk of data to copy.
     |          first_id (int): The first ID.
     |          graph_name (str): The name of the graph.
     |          label (str): The label of the data.
     |          id_maps (dict): The ID maps.
     |          is_edge (bool): Whether the data is an edge.
     |      
     |      Returns:
     |          None
     |  
     |  async copyEdges(self, edges: pandas.core.frame.DataFrame, edge_type: str = '', edge_props: list = [], chunk_size: int = 0) -> None
     |      Create edges in the PostgreSQL database via the COPY protocol.
     |      
     |      Args:
     |          edges (pd.DataFrame): The edges to create.
     |          edge_type (str): The type of the edges.
     |          edge_props (list): The properties of the edges.
     |          chunk_size (int): The size of the chunks to create.
     |      
     |      Returns:
     |          None
     |  
     |  async copyVertices(self, vertices: pandas.core.frame.DataFrame, vertex_label: str = '', chunk_size: int = 0) -> None
     |      Create vertices in the PostgreSQL database via the COPY protocol.
     |      
     |      Args:
     |          vertices (pd.DataFrame): The vertices to create.
     |          label (str): The label of the vertices.
     |          chunk_size (int): The size of the chunks to create.
     |      
     |      Returns:
     |          None
     |  
     |  createEdgeCypher(self, row: pandas.core.series.Series, edge_type: str = '', edge_props: list = []) -> str
     |      Create a Cypher query to create an edge.
     |      
     |      Args:
     |          row (pd.core.series.Series): The row to create the edge from.
     |          edge_type (str): The type of the edge.
     |          edge_props (list): The properties of the edge.
     |      
     |      Returns:
     |          str: The Cypher query to create the edge.
     |  
     |  async createEdges(self, edges: pandas.core.frame.DataFrame, edge_type: str = '', edge_props: list = [], chunk_size: int = 3, direct_loading: bool = False, use_copy: bool = False) -> None
     |      Create edges in the PostgreSQL database.
     |      
     |      Args:
     |          edges (pd.DataFrame): The edges to create.
     |          edge_type (str): The type of the edges.
     |          edge_props (list): The properties of the edges.
     |          chunk_size (int): The size of the chunks to create.
     |          direct_loading (bool): Whether to load the edges directly.
     |          use_copy (bool): Whether to use the COPY protocol to load the edges.
     |      
     |      Returns:
     |          None
     |  
     |  async createEdgesCypher(self, edges: pandas.core.frame.DataFrame, edge_type: str = '', edge_props: list = [], chunk_size: int = 0) -> None
     |      Create edges in the PostgreSQL database via Cypher.
     |      
     |      Args:
     |          edges (pd.DataFrame): The edges to create.
     |          edge_type (str): The type of the edges.
     |          edge_props (list): The properties of the edges.
     |          chunk_size (int): The size of the chunks to create.
     |      
     |      Returns:
     |          None
     |  
     |  async createEdgesDirectly(self, edges: pandas.core.frame.DataFrame, edge_type: str = '', edge_props: list = [], chunk_size: int = 0) -> None
     |      Create edges in the PostgreSQL database directly.
     |      
     |      Args:
     |          edges (pd.DataFrame): The edges to create.
     |          edge_type (str): The type of the edges.
     |          edge_props (list): The properties of the edges.
     |          chunk_size (int): The size of the chunks to create.
     |      
     |      Returns:
     |          None
     |  
     |  async createGraphFromDataFrame(self, graph_name: str, src: pandas.core.frame.DataFrame, existing_node_ids: list = [], first_chunk: bool = True, start_v_label: str = '', start_id: str = '', start_props: list[str] = [], edge_type: str = '', edge_props: list[str] = [], end_v_label: str = '', end_id: str = '', end_props: list[str] = [], chunk_size: int = 3, direct_loading: bool = False, create_graph: bool = False, use_copy: bool = False) -> None
     |      Create a graph from DataFrame
     |      
     |      Args:
     |          graph_name (str): The name of the graph to load the data into.
     |          src (pd.DataFrame): The DataFrame to load the data from.
     |          existing_node_ids (list): The existing node IDs.
     |          first_chunk (bool): Whether it is the first chunk.
     |          start_v_label (str): The label of the start vertex.
     |          start_id (str): The ID of the start vertex.
     |          start_props (list): The properties of the start vertex.
     |          edge_type (str): The type of the edge.
     |          edge_props (list): The properties of the edge.
     |          end_v_label (str): The label of the end vertex.
     |          end_id (str): The ID of the end vertex.
     |          end_props (list): The properties of the end vertex.
     |          chunk_size (int): The size of the chunks to create.
     |          direct_loading (bool): Whether to load the data directly.
     |          create_graph (bool): Whether to create the graph.
     |          use_copy (bool): Whether to use the COPY protocol to load the data.
     |      
     |      Returns:
     |          None
     |  
     |  async createLabelType(self, label_type: str = '', value: str = '') -> None
     |      Create a label type in the PostgreSQL database.
     |      
     |      Args:
     |          label_type (str): The type of the label to create. It can be either "vertex" or "edge".
     |          value (str): The value of the label to create.
     |      
     |      Returns:
     |          None
     |  
     |  createValuesDirectly(self, row: pandas.core.series.Series, edge_props: list = [], id_maps: dict = {}) -> str
     |      Create values directly.
     |      
     |      Args:
     |          row (pd.core.series.Series): The row to create the values from.
     |          edge_props (list): The properties of the edges.
     |          id_maps (dict): The ID maps.
     |      
     |      Returns:
     |          str: The values.
     |  
     |  async createVertices(self, vertices: pandas.core.frame.DataFrame, vertex_label: str = '', chunk_size: int = 3, direct_loading: bool = False, use_copy: bool = False) -> None
     |      Create vertices in the PostgreSQL database.
     |      
     |      Args:
     |          vertices (pd.DataFrame): The vertices to create.
     |          vertex_label (str): The label of the vertices.
     |          chunk_size (int): The size of the chunks to create.
     |          direct_loading (bool): Whether to load the vertices directly.
     |          use_copy (bool): Whether to use the COPY protocol to load the vertices.
     |      
     |      Returns:
     |          None
     |  
     |  async createVerticesCypher(self, vertices: pandas.core.frame.DataFrame, label: str = '', chunk_size: int = 0) -> None
     |      Create vertices in the PostgreSQL database via Cypher.
     |      
     |      Args:
     |          vertices (pd.DataFrame): The vertices to create.
     |          label (str): The label of the vertices.
     |          chunk_size (int): The size of the chunks to create.
     |      
     |      Returns:
     |          None
     |      
     |      Note:
     |          psycopg can not handle Cypher's UNWIND as of 9th Dec 2024.
     |  
     |  async createVerticesDirectly(self, vertices: pandas.core.frame.DataFrame, label: str = '', chunk_size: int = 0) -> None
     |      Create vertices in the PostgreSQL database directly.
     |      
     |      Args:
     |          vertices (pd.DataFrame): The vertices to create.
     |          label (str): The label of the vertices.
     |          chunk_size (int): The size of the chunks to create.
     |      
     |      Returns:
     |          None
     |  
     |  async executeQuery(self, query: str = '') -> None
     |      Execute a query with an async connection pool.
     |      
     |      Args:
     |          query (str): The query to execute.
     |      
     |      Returns:
     |          None
     |  
     |  async executeWithTasks(self, target: Callable, args: list = []) -> None
     |      Execute queries with tasks.
     |      
     |      Args:
     |          target (Callable): The target function to execute.
     |          args (list): The arguments to pass to the target function.
     |      
     |      Returns:
     |          None
     |  
     |  async getFirstId(self, graph_name: str = '', label_type: str = '') -> int
     |      Get the first id for a vertex or edge.
     |      
     |      Args:
     |          graph_name (str): The name of the graph to get the first id for.
     |          label_type (str): The type or the label to get the first id for.
     |      
     |      Returns:
     |          int: The first id.
     |  
     |  async getIdMaps(self, edges: pandas.core.frame.DataFrame) -> dict
     |      Get the idmaps between entry_id and id(graphid).
     |      
     |      Args:
     |          edges (pd.DataFrame): The edges to create.
     |      
     |      Returns:
     |          dict: The ID maps.
     |  
     |  processChunkDirectly(self, chunk: pandas.core.frame.DataFrame, query: str = '', edge_props: list = [], id_maps: dict = {}) -> list
     |      Process a chunk of data directly.
     |      
     |      Args:
     |          chunk (pd.DataFrame): The chunk of data to process.
     |          query (str): The query to execute.
     |          edge_props (list): The properties of the edges.
     |          id_maps (dict): The ID maps.
     |      
     |      Returns:
     |          list: The processed chunk of data.
     |  
     |  async setUpGraph(self, graph_name: str = '', create_graph: bool = False) -> None
     |      Set up the graph in the PostgreSQL database.
     |      
     |      Args:
     |          graph_name (str): The name of the graph to set up.
     |          create_graph (bool): Whether to create the graph.
     |      
     |      Returns:
     |          None
     |  
     |  showProgress(self, type: str = '', i: int = 0, total: int = 0) -> None
     |      Show the progress of the loading.
     |      
     |      Args:
     |          type (str): The type of the loading.
     |          i (int): The current index.
     |          total (int): The total number of items.
     |      
     |      Returns:
     |          None
     |  
     |  ----------------------------------------------------------------------
     |  Static methods inherited from AgeFreighter:
     |  
     |  checkKeys(keys: list = [], elements: list = [], error_msg: str = '') -> None
     |      Check if the keys of the CSV file match the elements.
     |      
     |      Args:
     |          keys (list): The keys of the CSV file.
     |          elements (list): The elements to check.
     |          error_msg (str): The error message to display.
     |      
     |      Returns:
     |          None
     |  
     |  quotedGraphName(graph_name: str = '') -> str
     |      Quote the graph name.
     |      
     |      Args:
     |          graph_name (str): The name of the graph.
     |      
     |      Returns:
     |          str: The quoted graph name
     |  
     |  ----------------------------------------------------------------------
     |  Data descriptors inherited from AgeFreighter:
     |  
     |  __dict__
     |      dictionary for instance variables (if defined)
     |  
     |  __weakref__
     |      list of weak references to the object (if defined)
    
    class StorageAccount(builtins.object)
     |  StorageAccount(subscription_id: str = '', resource_group_name: str = '', location: str = '', pool: Optional[psycopg_pool.pool_async.AsyncConnectionPool] = None)
     |  
     |  Methods defined here:
     |  
     |  __del__(self)
     |  
     |  __init__(self, subscription_id: str = '', resource_group_name: str = '', location: str = '', pool: Optional[psycopg_pool.pool_async.AsyncConnectionPool] = None)
     |      Initialize self.  See help(type(self)) for accurate signature.
     |  
     |  async attach(self) -> None
     |  
     |  create(self) -> None
     |      Create a Storage Account and a Blob Container.
     |      
     |      Returns:
     |          None
     |  
     |  ----------------------------------------------------------------------
     |  Data descriptors defined here:
     |  
     |  __dict__
     |      dictionary for instance variables (if defined)
     |  
     |  __weakref__
     |      list of weak references to the object (if defined)
    
    class StorageLoader(builtins.object)
     |  StorageLoader(tmp_file_lists: Dict[str, List[str]] = {}, total_lines: Dict[str, int] = {}, storage_account_name: str = '', blob_container_name: str = '', tbls_from_storage: Dict[str, str] = {}, columns_in_tbls_from_storage: Dict[str, str] = {}, columns_in_csvs: Dict[str, List[str]] = {}, pool: Optional[psycopg_pool.pool_async.AsyncConnectionPool] = None)
     |  
     |  Methods defined here:
     |  
     |  __init__(self, tmp_file_lists: Dict[str, List[str]] = {}, total_lines: Dict[str, int] = {}, storage_account_name: str = '', blob_container_name: str = '', tbls_from_storage: Dict[str, str] = {}, columns_in_tbls_from_storage: Dict[str, str] = {}, columns_in_csvs: Dict[str, List[str]] = {}, pool: Optional[psycopg_pool.pool_async.AsyncConnectionPool] = None)
     |      Initialize self.  See help(type(self)) for accurate signature.
     |  
     |  async executeQuery(self, pool: psycopg_pool.pool_async.AsyncConnectionPool, query: str) -> None
     |      Execute a query.
     |      
     |      Args:
     |          pool (AsyncConnectionPool): The connection pool.
     |          query (str): The query to execute.
     |      
     |      Returns:
     |          None
     |  
     |  async load(self) -> None
     |      Load files into temporary tables.
     |      
     |      Returns:
     |          None
     |  
     |  ----------------------------------------------------------------------
     |  Data descriptors defined here:
     |  
     |  __dict__
     |      dictionary for instance variables (if defined)
     |  
     |  __weakref__
     |      list of weak references to the object (if defined)
    
    class TempTables(builtins.object)
     |  TempTables(columns_in_csvs: Dict[str, List[str]] = {}, file_paths_dict: Dict[str, List[str]] = {}, pool: Optional[psycopg_pool.pool_async.AsyncConnectionPool] = None)
     |  
     |  Methods defined here:
     |  
     |  __init__(self, columns_in_csvs: Dict[str, List[str]] = {}, file_paths_dict: Dict[str, List[str]] = {}, pool: Optional[psycopg_pool.pool_async.AsyncConnectionPool] = None)
     |      Initialize self.  See help(type(self)) for accurate signature.
     |  
     |  async create(self) -> None
     |      Create temporary tables.
     |      
     |      Returns:
     |          None
     |  
     |  async delete(self) -> None
     |      Delete temporary tables.
     |      
     |      Returns:
     |          None
     |  
     |  ----------------------------------------------------------------------
     |  Data descriptors defined here:
     |  
     |  __dict__
     |      dictionary for instance variables (if defined)
     |  
     |  __weakref__
     |      list of weak references to the object (if defined)

DATA
    Any = typing.Any
        Special type indicating an unconstrained type.
        
        - Any is compatible with every type.
        - Any assumed to have all methods.
        - All values assumed to be instances of Any.
        
        Note that all the above statements are true from the point of view of
        static type checkers. At runtime, Any should not be used with instance
        or class checks.
    
    Dict = typing.Dict
        A generic version of dict.
    
    List = typing.List
        A generic version of list.
    
    Optional = typing.Optional
        Optional type.
        
        Optional[X] is equivalent to Union[X, None].
    
    log = <Logger multiazurestoragefreighter (WARNING)>

FILE
    multiazurestoragefreighter.py

