Module `pipettin-piper.piper.utils`

Functions

def decode_csv(data) ‑> dict

Expand source code

def decode_csv(data) -> dict:
    """Decode CSV data and put it into an ordered dict of lists."""
    # Decode the Base64 string
    if is_base64_encoded(data):
        data = base64.b64decode(data)

    # Decode the bytes string
    if isinstance(data, bytes):
        data = data.decode("utf-8")

    # Use StringIO to treat the string as a file-like object
    data_io = StringIO(data)

    # Read the CSV data
    csv_reader = csv.DictReader(data_io)
    # Read the CSV data into a pandas DataFrame
    # result = pd.read_csv(data_io)

    # Read the CSV data into a list of dictionaries
    # result = [row for row in csv_reader]

    # Read the CSV data into a dictionaries of lists
    result = OrderedDict()
    # Populate the dictionary with column names as keys and lists of column values
    for row in csv_reader:
        for key, value in row.items():
            if key not in result:
                result[key] = []
            result[key].append(value)

    return result

Decode CSV data and put it into an ordered dict of lists.

def default_config(**data) ‑> pipettin-piper.piper.config.config_helper.TrackedDict

Expand source code

def default_config(**data) -> TrackedDict:
    """Load default configuration, and update it with any additional kwyworkd arguments."""
    
    # Get the included configuration file's path.
    inp_file = get_config_path()

    # Load configuration.
    dflt_config = TrackedDict(
        file_path=inp_file,
        # Update with kwargs.
        data=data,
        allow_edits=True)

    return dflt_config

Load default configuration, and update it with any additional kwyworkd arguments.

def dump_json(path: str, data: dict, ensure_ascii=False)

Expand source code

def dump_json(path:str, data:dict, ensure_ascii=False):
    try:
        with open(path, 'w', encoding='utf-8') as file:
            json.dump(data, file, ensure_ascii=ensure_ascii, indent=4, cls=BytesEncoder)
            print(f"Wrote to JSON file at: '{path}'")
    except Exception as e:
        logging.error(f"Failed to write to the file at '{path}': {e}")
        raise e

def get_config_path()

Expand source code

def get_config_path():
    """Get the included configuration file's path.
    See: https://stackoverflow.com/a/20885799/11524079
    """
    try:
        inp_file = (impresources.files(config) / 'config.yml')
    except AttributeError:
        # Python < PY3.9, fall back to method deprecated in PY3.11.
        inp_file_gen = impresources.path(config, 'config.yml')
        with inp_file_gen as inp_file_inst:
            inp_file = inp_file_inst

    return inp_file

Get the included configuration file's path. See: https://stackoverflow.com/a/20885799/11524079

def is_base64_encoded(data)

Expand source code

def is_base64_encoded(data):
    try:
        # Ensure the string length is a multiple of 4
        if isinstance(data, str) and len(data) % 4 == 0:
            base64.b64decode(data, validate=True)
            return True
        return False
    except binascii.Error:
        return False

def scrub(obj: dict, bad_key: str = '_remove_this_key')

Expand source code

def scrub(obj:dict, bad_key:str="_remove_this_key"):
    """Remove keys from a dictionary by name.

    Args:
        obj (dict): A dictionary to scrub.
        bad_key (str, optional): Key to remove. Defaults to "_remove_this_key".
    """

    if isinstance(obj, dict):
        # the call to `list` is useless for py2 but makes
        # the code py2/py3 compatible
        for key in list(obj.keys()):
            if key == bad_key:
                del obj[key]
            else:
                scrub(obj[key], bad_key)
    # elif isinstance(obj, list):
    #     for i in reversed(range(len(obj))):
    #         if obj[i] == bad_key:
    #             del obj[i]
    #         else:
    #             scrub(obj[i], bad_key)
    else:
        # neither a dict nor a list, do nothing
        pass

Remove keys from a dictionary by name.

Args

obj : dict: A dictionary to scrub.
bad_key : str, optional: Key to remove. Defaults to "_remove_this_key".

Classes

class BytesEncoder (*, skipkeys=False, ensure_ascii=True, check_circular=True, allow_nan=True, sort_keys=False, indent=None, separators=None, default=None)

Expand source code

class BytesEncoder(json.JSONEncoder):
    """Handle 'bytes' type values in jdon.dump
    By Mark at https://stackoverflow.com/a/77322684
    """
    def default(self, o):
        if isinstance(o, bytes):
            return base64.b64encode(o).decode("ascii")
        else:
            return super().default(o)

Handle 'bytes' type values in jdon.dump By Mark at https://stackoverflow.com/a/77322684

Constructor for JSONEncoder, with sensible defaults.

If skipkeys is false, then it is a TypeError to attempt encoding of keys that are not str, int, float or None. If skipkeys is True, such items are simply skipped.

If ensure_ascii is true, the output is guaranteed to be str objects with all incoming non-ASCII characters escaped. If ensure_ascii is false, the output can contain non-ASCII characters.

If check_circular is true, then lists, dicts, and custom encoded objects will be checked for circular references during encoding to prevent an infinite recursion (which would cause an RecursionError). Otherwise, no such check takes place.

If allow_nan is true, then NaN, Infinity, and -Infinity will be encoded as such. This behavior is not JSON specification compliant, but is consistent with most JavaScript based encoders and decoders. Otherwise, it will be a ValueError to encode such floats.

If sort_keys is true, then the output of dictionaries will be sorted by key; this is useful for regression tests to ensure that JSON serializations can be compared on a day-to-day basis.

If indent is a non-negative integer, then JSON array elements and object members will be pretty-printed with that indent level. An indent level of 0 will only insert newlines. None is the most compact representation.

If specified, separators should be an (item_separator, key_separator) tuple. The default is (', ', ': ') if indent is None and (',', ': ') otherwise. To get the most compact JSON representation, you should specify (',', ':') to eliminate whitespace.

If specified, default is a function that gets called for objects that can't otherwise be serialized. It should return a JSON encodable version of the object or raise a TypeError.

Ancestors

json.encoder.JSONEncoder

Methods

def default(self, o)

Expand source code

def default(self, o):
    if isinstance(o, bytes):
        return base64.b64encode(o).decode("ascii")
    else:
        return super().default(o)

Implement this method in a subclass such that it returns a serializable object for o, or calls the base implementation (to raise a TypeError).

For example, to support arbitrary iterators, you could implement default like this::

def default(self, o):
    try:
        iterable = iter(o)
    except TypeError:
        pass
    else:
        return list(iterable)
    # Let the base class default method raise the TypeError
    return JSONEncoder.default(self, o)