Module pipettin-piper.piper.datatools.datautils

Functions

def get_colrow(index: int, ncols: int, nrow: int = None)
Expand source code
def get_colrow(index: int, ncols: int, nrow: int=None):
    """Calculate the row and column number from an index, and the grid's dimensions.

    Args:
        index (int): Index of the item.
        ncols (int): Number of columns in the square grid.
        nrow (int, optional): Number of rows in the square grid. Defaults to None.

    Raises:
        ValueError: When a row count is provided, and the calculated row exceeds it, this exception is raised.

    Returns:
        tuple: The column and row indexes.
    """
    row = (index // ncols) + 1  # "//": integer division.
    col = (index %  ncols)      # "%": remainder.
    if row > nrow and nrow is not None:
        raise ValueError("get_colrow: the resulting row number exceeds the number of available rows.")
    return col, row

Calculate the row and column number from an index, and the grid's dimensions.

Args

index : int
Index of the item.
ncols : int
Number of columns in the square grid.
nrow : int, optional
Number of rows in the square grid. Defaults to None.

Raises

ValueError
When a row count is provided, and the calculated row exceeds it, this exception is raised.

Returns

tuple
The column and row indexes.
def get_db_from_config(config) ‑> dict
Expand source code
def get_db_from_config(config) -> dict:
    """Instantiate a data tools object from config and dump its contents to a python dict.

    Returns a tuple with the data (dict) and the database name (str).
    """
    logging.info(f"Loading datatools module '{config['datatools']}'.")
    module = importlib.import_module("piper.datatools." + config["datatools"])
    datatools = module.load_from_config(**config["database"])
    return datatools.get_db(), datatools.database_name

Instantiate a data tools object from config and dump its contents to a python dict.

Returns a tuple with the data (dict) and the database name (str).

def get_index(col_id: int, row_id: str, ncols: int, nrows: int) ‑> int
Expand source code
def get_index(col_id: int, row_id: str, ncols: int, nrows: int) -> int:
    """Calculate the index of an element in a matrix based on column id, row id, and matrix dimensions.

    Args:
        col_id (int): The column index (zero-based).
        row_id (str): The row id (letters).
        ncols (int): Number of columns in the matrix.
        nrows (int): Number of rows in the matrix.

    Returns:
        int: The index of the element in the matrix.

    Raises:
        ValueError: If the calculated index is out of bounds.

    Example:
      # Test the function with an example
      get_index(2, 'B', 5, 10)  # This should correspond to column 3 and row B.
    """
    row_index = row_to_index(row_id)
    if row_index >= nrows or col_id >= ncols:
        raise ValueError("get_index: Row or column id exceeds matrix dimensions.")
    return row_index * ncols + col_id

Calculate the index of an element in a matrix based on column id, row id, and matrix dimensions.

Args

col_id : int
The column index (zero-based).
row_id : str
The row id (letters).
ncols : int
Number of columns in the matrix.
nrows : int
Number of rows in the matrix.

Returns

int
The index of the element in the matrix.

Raises

ValueError
If the calculated index is out of bounds.

Example

Test the function with an example

get_index(2, 'B', 5, 10) # This should correspond to column 3 and row B.

def get_objects_from_db(db_objects,
db_name: str,
workspace_name: str = None,
workspace: dict = None,
platforms: list = None,
containers: list = None,
tools: list = None,
subset_platforms: bool = True)
Expand source code
def get_objects_from_db(db_objects, db_name: str, workspace_name: str = None,
                        workspace: dict = None, platforms: list = None,
                        containers: list = None, tools: list = None,
                        subset_platforms: bool = True):
    """
    Retrieve workspace, platforms, containers, and tools from a specified database object.

    Thumbnails are removed from workspaces if found.

    Args:
        db_objects (dict): A dictionary of databases, each one an full database as a dictionary,
                           containing objects organized by type (i.e. collections). It can also be
                           a URL to this object.
        db_name (str): Name of the database to load from `db_objects`.
        workspace_name (str, optional): Name of the workspace to retrieve from the database.
        workspace (dict, optional): Existing workspace data. Defaults to None, in which case
                                    the workspace is retrieved from the database by name.
        platforms (list, optional): Existing list of platform data. Defaults to None, in which case
                                    the platforms are retrieved from the database.
        containers (list, optional): Existing list of container data. Defaults to None, in which case
                                     the containers are retrieved from the database.
        tools (list, optional): Existing list of tool data. Defaults to None, in which case
                                the tools are retrieved from the database.
        subset_platforms (bool, optional): If True, only platforms referenced in the workspace will
                                           be retrieved. Defaults to True.

    Raises:
        ValueError: If `workspace_name` is None and `workspace` is not provided.
        ValueError: If `workspace_name` does not correspond to any workspace in the database.

    Returns:
        tuple: A tuple containing:
            - workspace (dict): The retrieved or provided workspace object.
            - platforms (list): The retrieved or provided list of platform objects.
            - containers (list): The retrieved or provided list of container objects.
            - tools (list): The retrieved or provided list of tool objects.

    """
    # Load the database, from a dict or URL.
    db = load_objects(db_objects)[db_name]

    # Get the workspace.
    if workspace is None:
        ws_names = [w["name"] for w in db["workspaces"]]
        if workspace_name is None:
            raise ValueError("workspace_name must be set if no workspace is provided.")
        elif workspace_name not in ws_names:
            raise ValueError(f"Workspace with name '{workspace_name}' not found in workspaces: {ws_names}")
        workspace = next(w for w in db["workspaces"] if w["name"] == workspace_name)

    # Discard its thumbnail.
    if "thumbnail" in workspace:
        del workspace["thumbnail"]

    # Get platforms.
    if platforms is None:
        if subset_platforms:
            workspace_items = workspace.get("items", [])
            platforms_in_workspace = [item["platform"] for item in workspace_items]
            platforms = [p for p in db["platforms"] if p["name"] in platforms_in_workspace]
        else:
            platforms = db["platforms"]

    # Get containers.
    if containers is None:
        containers = db["containers"]

    # Get tools.
    if tools is None:
        tools = db["tools"]

    return workspace, platforms, containers, tools

Retrieve workspace, platforms, containers, and tools from a specified database object.

Thumbnails are removed from workspaces if found.

Args

db_objects : dict
A dictionary of databases, each one an full database as a dictionary, containing objects organized by type (i.e. collections). It can also be a URL to this object.
db_name : str
Name of the database to load from db_objects.
workspace_name : str, optional
Name of the workspace to retrieve from the database.
workspace : dict, optional
Existing workspace data. Defaults to None, in which case the workspace is retrieved from the database by name.
platforms : list, optional
Existing list of platform data. Defaults to None, in which case the platforms are retrieved from the database.
containers : list, optional
Existing list of container data. Defaults to None, in which case the containers are retrieved from the database.
tools : list, optional
Existing list of tool data. Defaults to None, in which case the tools are retrieved from the database.
subset_platforms : bool, optional
If True, only platforms referenced in the workspace will be retrieved. Defaults to True.

Raises

ValueError
If workspace_name is None and workspace is not provided.
ValueError
If workspace_name does not correspond to any workspace in the database.

Returns

tuple
A tuple containing: - workspace (dict): The retrieved or provided workspace object. - platforms (list): The retrieved or provided list of platform objects. - containers (list): The retrieved or provided list of container objects. - tools (list): The retrieved or provided list of tool objects.
def load_default_objects()
Expand source code
def load_default_objects():
    return load_objects(db_location)
def load_defaults()
Expand source code
def load_defaults():
    # Example using exported data.
    d="https://gitlab.com/pipettin-bot/pipettin-gui/-/raw/develop/api/src/db/defaults/databases.json"
    databases = load_objects(d)[0]

    d="https://gitlab.com/pipettin-bot/pipettin-gui/-/raw/develop/api/src/db/defaults/workspaces.json"
    workspace = load_objects(d)[0]

    d="https://gitlab.com/pipettin-bot/pipettin-gui/-/raw/develop/api/src/db/defaults/platforms.json"
    platforms = load_objects(d)

    d="https://gitlab.com/pipettin-bot/pipettin-gui/-/raw/develop/api/src/db/defaults/containers.json"
    containers = load_objects(d)

    d="https://gitlab.com/pipettin-bot/pipettin-gui/-/raw/develop/api/src/db/defaults/tools.json"
    tools = load_objects(d)

    return workspace, platforms, containers, tools, databases
def load_objects(definitions: str | dict) ‑> dict
Expand source code
def load_objects(definitions: Union[str, dict]) -> dict:
    """
    Load and return objects from various input types, including URLs, file paths, or Python dictionaries.

    Args:
        definitions (Union[str, dict]): The source from which to load the objects.
            - If a string is provided, it is treated as either a URL or a file path.
            - If a dictionary or list is provided, it is deep-copied and returned.

    Returns:
        dict: A dictionary containing the loaded objects.

    Raises:
        ValueError: If `definitions` is a string that does not correspond to a valid URL or file path,
                    or if the input type is unsupported.
    """
    if isinstance(definitions, str):
        url_scheme: str = urllib.parse.urlparse(definitions).scheme
        if url_scheme in ["http", "https"]:
            return load_objects_from_url(definitions)
        elif url_scheme == "" and os.path.exists(url_scheme):
            return load_objects_from_file(definitions)
    elif isinstance(definitions, dict) or isinstance(definitions, list):
        return deepcopy(definitions)

    # Handle unmatched confition.
    raise ValueError(f"Could not load tool data from the provided definition: '{definitions}'. Unhandled condition.")

Load and return objects from various input types, including URLs, file paths, or Python dictionaries.

Args

definitions : Union[str, dict]
The source from which to load the objects. - If a string is provided, it is treated as either a URL or a file path. - If a dictionary or list is provided, it is deep-copied and returned.

Returns

dict
A dictionary containing the loaded objects.

Raises

ValueError
If definitions is a string that does not correspond to a valid URL or file path, or if the input type is unsupported.
def load_objects_from_file(file_path)
Expand source code
def load_objects_from_file(file_path):
    with open(file_path, "r", encoding="utf-8") as f:
        objects = json.load(f)
    return objects
def load_objects_from_url(target_url)
Expand source code
def load_objects_from_url(target_url):
    data = urllib.request.urlopen(target_url)
    objects = json.load(data)
    return objects
def row_to_index(row_id: str) ‑> int
Expand source code
def row_to_index(row_id: str) -> int:
    """Convert a row id consisting of letters (e.g., A, B, Z, AA) to an index.

    Args:
        row_id (str): The row id (letters).

    Returns:
        int: The zero-based row index.
    """
    row_id = row_id.upper()
    index = 0
    for char in row_id:
        index = index * 26 + (ord(char) - ord('A') + 1)
    return index - 1  # Zero-based index

Convert a row id consisting of letters (e.g., A, B, Z, AA) to an index.

Args

row_id : str
The row id (letters).

Returns

int
The zero-based row index.

Classes

class DataError (*args, **kwargs)
Expand source code
class DataError(Exception):
    pass

Common base class for all non-exit exceptions.

Ancestors

  • builtins.Exception
  • builtins.BaseException