myco.utils¶

Helper functions used by multiple modules or don't have a clear home.

`AttributeDictionary` ¶

Bases: dict

Extends a dictionary to index keys as attributes.

Source code in myco/utils.py

class AttributeDictionary(dict):
    """Extends a dictionary to index keys as attributes."""

    def __init__(self, *args, **kwargs):
        super(AttributeDictionary, self).__init__(*args, **kwargs)
        self.__dict__ = self

`augment_feature_patch(feature)` ¶

Apply random feature augmentations (flipping and rotating)

Source code in myco/utils.py

def augment_feature_patch(feature: np.ndarray) -> np.ndarray:
    """Apply random feature augmentations (flipping and rotating)"""
    flip_lr = np.flip(feature, 2)
    flip_ud = np.flip(feature, 1)
    rot90 = np.rot90(feature, axes=(1, 2))
    rot180 = np.rot90(feature, k=2, axes=(1, 2))
    rot270 = np.rot90(feature, k=3, axes=(1, 2))
    aug_patches = np.vstack([feature, flip_lr, flip_ud, rot90, rot180, rot270])

    return aug_patches

`balance_samples(samples, class_weight)` ¶

Over and undersamples classification data.

Parameters:

Name	Type	Description	Default
`samples`	`np.array`	array of shape (height, width, n_classes) to get samples from	required
`class_weight`	`dict`	the proportional distribution of each class	required
`add_samples`		???	required

Returns:

Type	Description
`list`	a list of response data sample indices to include in model training

Source code in myco/utils.py

def balance_samples(samples: np.array, class_weight: dict) -> list:
    """Over and undersamples classification data.

    Args:
        samples: array of shape (height, width, n_classes) to get samples from
        class_weight: the proportional distribution of each class
        add_samples: ???

    Returns:
        a list of response data sample indices to include in model training
    """
    num_samples = samples.shape[0]
    samples_per_class = np.sum(samples, (1, 2))
    classes, weights = np.array(list(class_weight.keys())), np.array(
        list(class_weight.values())
    )

    # the indices in each class with at least one positive
    indices = [np.where(samples_per_class[:, i] > 0) for i in classes]

    # the labels that should be oversampled
    over_sample_classes = np.where(weights >= 1)[0]
    ind = {}
    for cl in over_sample_classes:
        ind[cl] = np.repeat(indices[cl], np.abs(weights[cl] + 1))
        np.random.shuffle(ind[cl])

    # the labels that should be undersampled
    under_sample_classes = np.where(weights < 1)[0]
    for cl in under_sample_classes:
        np.random.shuffle(indices[cl][0])
        stop_ind = np.floor(indices[0][0].shape[0] * np.abs(weights[cl]))
        ind[cl] = indices[cl][0][: int(stop_ind)]

    # stack the under and oversampled indices and shuffle em
    sample_inxs = np.hstack(list(ind.values()))
    np.random.shuffle(sample_inxs)

    return sample_inxs[:num_samples]

`compute_class_weights(response)` ¶

Computes weights per class.

Parameters:

Name	Type	Description	Default
`response`	`np.ndarray`	one-hot encoded tensor of response data	required

Returns:

Type	Description
`dict`	a vector of weights per class

Source code in myco/utils.py

def compute_class_weights(response: np.ndarray) -> dict:
    """Computes weights per class.

    Args:
        response: one-hot encoded tensor of response data

    Returns:
        a vector of weights per class
    """

    data_vals = np.argmax(response, axis=3).flatten()
    class_labels = np.unique(data_vals)
    weights = class_weight.compute_class_weight(
        "balanced", classes=class_labels, y=data_vals
    )
    return dict(zip(class_labels, weights))

`compute_sample_weights(class_weights, response)` ¶

Computes weights per sample.

Parameters:

Name	Type	Description	Default
`class_weights`	`np.ndarray`	weights of each class	required
`response`	`np.ndarray`	one-hot encoded tensor of response data	required

Returns:

Type	Description
`np.ndarray`	2D array of sample weights

Source code in myco/utils.py

def compute_sample_weights(
    class_weights: np.ndarray, response: np.ndarray
) -> np.ndarray:
    """Computes weights per sample.

    Args:
        class_weights: weights of each class
        response: one-hot encoded tensor of response data

    Returns:
        2D array of sample weights
    """
    assert len(response.shape) == 4, "Input data should have 4 dimensions"
    labels = np.argmax(response, axis=3)
    shape = labels.shape
    sample_weights = np.take(
        class_weights, labels.reshape(shape[0], shape[1] * shape[2])
    )

    return sample_weights

`create_classes(pred_score)` ¶

Reads the scores of one hot encoded data and finds the class value.

Parameters:

Name	Type	Description	Default
`pred_score`	`np.ndarray`	matrix of scores	required

Returns:

Type	Description
`np.ndarray`	class-labeled array cells. dim 2 is reduced from num-classes to 1

Source code in myco/utils.py

def create_classes(pred_score: np.ndarray) -> np.ndarray:
    """Reads the scores of one hot encoded data and finds the class value.

    Args:
        pred_score: matrix of scores

    Returns:
        class-labeled array cells. dim 2 is reduced from num-classes to 1
    """
    y_pred = tf.math.argmax(pred_score, axis=-1)
    return np.expand_dims(y_pred, axis=-1)

`find_yaml_files(filepath, yaml_ext)` ¶

Search for yaml files affiliated with a numpy archive

Source code in myco/utils.py

def find_yaml_files(filepath: str, yaml_ext: str):
    """Search for yaml files affiliated with a numpy archive"""
    base, ext = os.path.splitext(filepath.rstrip(os.path.sep))
    yaml_files = glob(f"{base}*{yaml_ext}")
    yaml_files.sort()
    n_files = len(yaml_files)

    if n_files < 1:
        raise FileNotFoundError(
            f"No associated *.{yaml_ext} files found for {filepath}"
        )
    elif n_files > 1:
        nearest_ext = f"{base}.{yaml_ext}"
        next_nearest_ext = f"{filepath}.{yaml_ext}"
        if nearest_ext in yaml_files:
            yaml_file = nearest_ext
        elif next_nearest_ext in yaml_files:
            yaml_file = nearest_ext
        else:
            yaml_file = yaml_files[-1]
            warnings.warn(
                f"Multiple *.{yaml_ext} files found for {filepath}. Reading "
                f"{yaml_file}."
            )
    else:
        yaml_file = yaml_files[0]

    return yaml_file

`load_object(path, compressed=True)` ¶

Reads a python object that's been saved to disk or to GCS

Parameters:

Name	Type	Description	Default
`path`	`str`	file path of the object to load	required
`compressed`	`bool`	whether the file was compressed prior to saving	`True`

Returns:

Name	Type	Description
`obj`	`Any`	the original python object (e.g., a fit NetworkScaler() object)

Source code in myco/utils.py

def load_object(path: str, compressed: bool = True) -> Any:
    """Reads a python object that's been saved to disk or to GCS

    Args:
        path: file path of the object to load
        compressed: whether the file was compressed prior to saving

    Returns:
        obj: the original python object (e.g., a fit NetworkScaler() object)
    """
    with tf.io.gfile.GFile(path, "rb") as f:
        obj = f.read()

    if compressed:
        obj = gzip.decompress(obj)

    return pickle.loads(obj)

`n_digits(number)` ¶

Counts the number of significant integer digits of a number.

Parameters:

Name	Type	Description	Default
`number`	`Union[int, float]`	the number to evaluate.	required

Returns:

Name	Type	Description
`order`	`int`	number of digits required to represent a number

Source code in myco/utils.py

def n_digits(number: Union[int, float]) -> int:
    """Counts the number of significant integer digits of a number.

    Args:
        number: the number to evaluate.

    Returns:
        order: number of digits required to represent a number
    """
    if number == 0:
        order = 1
    else:
        order = np.floor(np.log10(number)).astype(int) + 1

    return order

`read_archive_data(filepath, mode='r', ext='yml')` ¶

Reads a numpy memmap archive based on an associated header yaml file.

Parameters:

Name	Type	Description	Default
`filepath`	`str`	path to the numpy archive to read. automatically searches	required
`mode`	`str`	the mode for opening the memmap (in 'r', 'r+', 'w+': see	`'r'`
`ext`	`str`	the filename extension to search for based on the input filepath.	`'yml'`

Returns:

Type	Description
`np.ndarray`	The numpy array as a memmap object.

Source code in myco/utils.py

def read_archive_data(filepath: str, mode: str = "r", ext: str = "yml") -> np.ndarray:
    """Reads a numpy memmap archive based on an associated header yaml file.

    Args:
        filepath: path to the numpy archive to read. automatically searches
        for associated .yml files.
        mode: the mode for opening the memmap (in 'r', 'r+', 'w+': see
        np.memmap).
        ext: the filename extension to search for based on the input filepath.

    Returns:
        The numpy array as a memmap object.
    """
    yaml_file = find_yaml_files(filepath, ext)
    config = read_yaml(yaml_file)
    memmap = np.memmap(
        filepath, dtype=config["dtype"], shape=tuple(config["shape"]), mode=mode
    )

    return memmap

`read_archive_header(filepath, ext='yml')` ¶

Reads archive metadata information from a yaml file.

Source code in myco/utils.py

def read_archive_header(filepath: str, ext: str = "yml") -> dict:
    """Reads archive metadata information from a yaml file."""
    yaml_file = find_yaml_files(filepath, ext)
    return read_yaml(yaml_file)

`read_yaml(filepath)` ¶

Read a yaml file into memory as a dictionary.

Parameters:

Name	Type	Description	Default
`filepath`	`str`	path to a local .yml/.yaml file.	required

Returns:

Type	Description
`dict`	A parsed dictionary with the yaml file's contents.

Source code in myco/utils.py

def read_yaml(filepath: str) -> dict:
    """Read a yaml file into memory as a dictionary.

    Args:
        filepath: path to a local .yml/.yaml file.

    Returns:
        A parsed dictionary with the yaml file's contents.
    """
    yaml = YAML(typ="safe", pure=True)
    with tf.io.gfile.GFile(filepath, "r") as handler:
        configuration = yaml.load(handler)

    return configuration

`revert_augmentation(pred)` ¶

Inverse transformation of the augment_feature_path() function.

Source code in myco/utils.py

def revert_augmentation(pred: np.ndarray) -> np.ndarray:
    """Inverse transformation of the augment_feature_path() function."""
    pred[1] = np.flip(pred[1], 1)
    pred[2] = np.flip(pred[2], 0)
    pred[3] = np.rot90(pred[3], k=3, axes=(0, 1))
    pred[4] = np.rot90(pred[4], k=2, axes=(0, 1))
    pred[5] = np.rot90(pred[5], axes=(0, 1))

    return pred

`save_object(obj, path, compress=True)` ¶

Writes a python object to disk or GCS for later access

Parameters:

Name	Type	Description	Default
`obj`	`object`	a python object to be saved (e.g. a variable or a class)	required
`path`	`str`	the output file path	required
`compress`	`bool`	flag to specify whether the file should be compressed	`True`

Source code in myco/utils.py

def save_object(obj: object, path: str, compress: bool = True) -> None:
    """Writes a python object to disk or GCS for later access

    Args:
        obj: a python object to be saved (e.g. a variable or a class)
        path: the output file path
        compress: flag to specify whether the file should be compressed
    """
    obj = pickle.dumps(obj)

    if compress:
        obj = gzip.compress(obj)

    with tf.io.gfile.GFile(path, "wb") as f:
        f.write(obj)

`write_archive_header(filepath, archive=None, shape=None, dtype=None, nodata=None, tfrecords=[])` ¶

Write a numpy memmap archive header yaml file.

You can either get the shape/dtype/nodata info by passing the array as the archive keyword or by passing shape and dtype as keywords.

Parameters:

Name	Type	Description	Default
`filepath`	`str`	path to an output .yml file.	required
`archive`	`np.ndarray`	a numpy array to write the header for.	`None`
`shape`	`tuple`	dimensions of the numpy array	`None`
`dtype`	`str`	the numpy datatype of the array	`None`
`nodata`	`float`	the nodata value to ignore	`None`
`tfrecords`	`list`	list of tfrecord filepaths	`[]`

Returns:

Type	Description
`None`	None. Writes a yaml file to `filepath`.

Source code in myco/utils.py

def write_archive_header(
    filepath: str,
    archive: np.ndarray = None,
    shape: tuple = None,
    dtype: str = None,
    nodata: float = None,
    tfrecords: list = [],
) -> None:
    """Write a numpy memmap archive header yaml file.

    You can either get the shape/dtype/nodata info by passing the array as the
        `archive` keyword or by passing `shape` and `dtype` as keywords.

    Args:
        filepath: path to an output .yml file.
        archive: a numpy array to write the header for.
        shape: dimensions of the numpy array
        dtype: the numpy datatype of the array
        nodata: the nodata value to ignore
        tfrecords: list of tfrecord filepaths

    Returns:
        None. Writes a yaml file to `filepath`.
    """

    # get required data from the archive or from keywords
    shape = archive.shape if archive is not None else shape
    dtype = str(archive.dtype) if archive is not None else dtype
    assert shape is not None, "archive or shape parameter must be set"
    assert dtype is not None, "archive or dtype parameter must be set"

    # avoid overwriting the archive if that filename is passed
    base, ext = os.path.splitext(filepath.rstrip(os.path.sep))
    if "npy" in ext.lower():
        filepath = f"{base}.yml"

    config = {
        "shape": shape,
        "dtype": dtype,
        "nodata": nodata,
        "tfrecords": tfrecords,
    }
    write_yaml(config, filepath)

`write_yaml(data, filepath)` ¶

Write a dictionary to a yaml file.

Parameters:

Name	Type	Description	Default
`data`	`dict`	dictionary of values to write.	required
`filepath`	`str`	path to an output .yml/.yaml file.	required

Returns:

Type	Description
`None`	None. Writes to disk.

Source code in myco/utils.py

def write_yaml(data: dict, filepath: str) -> None:
    """Write a dictionary to a yaml file.

    Args:
        data: dictionary of values to write.
        filepath: path to an output .yml/.yaml file.

    Returns:
        None. Writes to disk.
    """
    yaml = YAML()
    with tf.io.gfile.GFile(filepath, "w") as handler:
        yaml.dump(data, handler)

myco.utils¶

AttributeDictionary ¶

augment_feature_patch(feature) ¶

balance_samples(samples, class_weight) ¶

compute_class_weights(response) ¶

compute_sample_weights(class_weights, response) ¶

create_classes(pred_score) ¶

find_yaml_files(filepath, yaml_ext) ¶

load_object(path, compressed=True) ¶

n_digits(number) ¶

read_archive_data(filepath, mode='r', ext='yml') ¶

read_archive_header(filepath, ext='yml') ¶

read_yaml(filepath) ¶

revert_augmentation(pred) ¶

save_object(obj, path, compress=True) ¶

write_archive_header(filepath, archive=None, shape=None, dtype=None, nodata=None, tfrecords=[]) ¶

write_yaml(data, filepath) ¶

`AttributeDictionary` ¶

`augment_feature_patch(feature)` ¶

`balance_samples(samples, class_weight)` ¶

`compute_class_weights(response)` ¶

`compute_sample_weights(class_weights, response)` ¶

`create_classes(pred_score)` ¶

`find_yaml_files(filepath, yaml_ext)` ¶

`load_object(path, compressed=True)` ¶

`n_digits(number)` ¶

`read_archive_data(filepath, mode='r', ext='yml')` ¶

`read_archive_header(filepath, ext='yml')` ¶

`read_yaml(filepath)` ¶

`revert_augmentation(pred)` ¶

`save_object(obj, path, compress=True)` ¶

`write_archive_header(filepath, archive=None, shape=None, dtype=None, nodata=None, tfrecords=[])` ¶

`write_yaml(data, filepath)` ¶