import itertools
from datetime import datetime, timezone
from .taxonomy import Taxonomy
from .acoustic import AcousticTaxonomy
from .label import LabelManager, AcousticLabelManager
[docs]class TaxonomyManager:
"""Class for managing `evolving` taxonomies.
Args:
tax: korus.taxonomy.Taxonomy
The initial draft. Can be empty.
Attrs:
draft: korus.taxonomy.Taxonomy
The draft taxonomy version
releases: list[korus.taxonomy.Taxonomy]
The released taxonomy versions
"""
def __init__(self, tax: Taxonomy, labels: LabelManager):
super().__init__()
self.draft = tax
self.releases = []
self.labels = labels
@property
def version(self) -> int:
"""The current version number"""
return len(self.releases)
@property
def current(self) -> Taxonomy:
"""The current version (latest release) of the taxonomy"""
if len(self.releases) == 0:
return None
else:
return self.releases[-1]
[docs] def get_taxonomy(self, version: int = None) -> AcousticTaxonomy:
"""Get a given version of the taxonomy"""
return self.current if version is None else self.releases[version - 1]
[docs] def release(self, comment: str = None):
"""Release a new version of the taxonomy.
Increments the version number by +1.
The current OS clock time is used to timestamp the release.
Args:
comment: str (optional)
An explanatory note
"""
release = self.draft.deepcopy()
release.version = self.version + 1
release.comment = comment
release.timestamp = datetime.now(timezone.utc)
self.releases.append(release)
self.labels.update(release.version, release.all_labels)
self.draft.clear_history()
[docs] def get_label(
self,
id: int | list[int],
always_list: bool = False,
):
"""TODO: docstring"""
return self.labels.get_label(id, always_list=always_list, return_version=False)
[docs] def get_label_id(
self,
label: str | tuple | list = None,
version: int = None,
ascend: bool = False,
descend: bool = False,
always_list: bool = False,
label_id: int | list[int] = None,
) -> int | list[int]:
"""
Args:
label: str | tuple | list
Sound source and sound type label(s). The character '*' can be used as wildcard.
For example, use ('SRKW','*') to retrieve all label IDs associated with the sound
source 'SRKW', irrespective of sound type. Multiple source-type pairs can be
specified as a list of tuples. Ignored if `label_id` is provided.
version: int
Taxonomy version. If None, the current release is used.
ascend: bool
Also return the labels of ancestral nodes.
descend: bool
Also return the labels of descendant nodes.
always_list: bool
Whether to always return a list. Default is False.
label_id: int | list[int]
Label IDs. If provided, the `label` and `version` arguments are ignored.
Returns:
ids: int | list[int]
Label identifier(s)
Raises:
ValueError: if the label does not exist in the taxonomy
"""
# if `label_id` was specified, used it in place of `label`
if label_id is not None:
ids = []
for version, label in self.labels.get_label(label_id, always_list=True):
ids += self.get_label_id(
label, version, ascend, descend, always_list=True
)
if isinstance(label_id, int) and len(ids) == 1 and not always_list:
ids = ids[0]
return ids
tax = self.get_taxonomy(version)
ids = get_label_id(
label,
tax,
self.labels,
ascend,
descend,
always_list,
)
return ids
[docs] def crosswalk(
self,
label_id: int | list[int],
dst_version: int = None,
ascend: bool = False,
descend: bool = False,
always_list: bool = False,
equivalent_only: bool = False,
) -> int | list[int]:
"""Map a list of label IDs to another taxonomy version.
Args:
label_id: int | list[int]
Label ID(s)
dst_version: int
Destination taxonomy version. If None, crosswalk to all versions, including the source version.
ascend: bool
Also return the labels of ancestral nodes of the mapped node(s).
descend: bool
Also return the labels of descendant nodes of the mapped node(s).
equivalent_only: bool
If True, only return the mapped label IDs that are 1-to-1
Returns:
ids: int | list[int]
The mapped label ID(s).
"""
ids = []
if dst_version is None:
dst_versions = [i + 1 for i in range(len(self.releases))]
else:
dst_versions = [dst_version]
# convert label IDs to (version, node identifier) tuples
inputs = self.labels.get_label(label_id, return_nid=True, always_list=True)
# loop over inputs
for src_version, nid in inputs:
# loop over destination taxonomies
for dst_version in dst_versions:
# get destination taxonomy
dst_taxonomy = self.releases[dst_version - 1]
# get closest relatives in destination taxonomy
mode = "b" if src_version > dst_version else "f"
relatives, equiv = self.get_closest_relative(nid, dst_version, mode)
relativesx = self.labels.get_label(
self.labels.get_label_id(dst_version, nid=relatives),
return_version=False,
)
if equivalent_only and not equiv:
continue
# convert node IDs to tags
relatives = self.labels.get_label(
self.labels.get_label_id(dst_version, nid=relatives),
return_version=False,
)
# get label IDs
ids += get_label_id(
relatives,
dst_taxonomy,
self.labels,
ascend,
descend,
always_list=True,
)
ids = list(set(ids))
if isinstance(label_id, int) and len(ids) == 1 and not always_list:
ids = ids[0]
return ids
[docs] def get_precursor_nodes(self, nid: str):
"""Get precursor node(s)
Args:
nid: str
The source node ID
Returns:
: list[str]
The IDs of the precursor node(s)
: bool
Whether the source node and the precursor node(s) may be considered equivalent.
"""
for tax in self.releases:
if nid in tax.created_nodes:
return tax.created_nodes[nid]
return [], False
[docs] def get_inheritor_nodes(self, nid: str):
"""Get inheritor node(s)
Args:
nid: str
The source node ID
Returns:
: list[str]
The IDs of the inheritor node(s)
: bool
Whether the source node and the inheritor node(s) may be considered equivalent.
"""
for tax in self.releases:
if nid in tax.removed_nodes:
return tax.removed_nodes[nid]
return [], False
[docs] def get_closest_relative(
self, nid: str | tuple[str], version: int = None, mode: str = "backward"
):
"""Trace node history.
If node is present in the taxonomy, the node's ID is returned.
If node is missing, the ID's of its closest relatives are returned (precursor nodes if
mode=backward and inheritor nodes if mode=forward.)
Args:
nid: str | tuple[str]
Node ID or set of node IDs
version: int
Taxonomy version
mode: str
* backward/b: trace node history backwards in time (default)
* forward/f: trace node history forward in time
Returns:
relatives: list[str] | list[tuple[str]]
IDs of the closest relatives
is_equivalent: bool
Whether the source node and the relative nodes may be considered equivalent.
"""
is_equivalent = True
if version is None:
version = self.version
if mode.lower() in ["b", "backward"]:
mapper = self.get_precursor_nodes
elif mode.lower() in ["f", "forward"]:
mapper = self.get_inheritor_nodes
else:
raise ValueError(f"Invalid closest-relative search mode: {mode}")
if isinstance(nid, str):
# recast node ID as tuple
is_tuple = False
nid_tuple = (nid,)
else:
is_tuple = True
nid_tuple = nid
# find closest relative(s) for each member of the tuple
relatives = [[] for _ in nid_tuple]
for i, nid in enumerate(nid_tuple):
nids = [nid]
while len(nids) > 0:
# if node exists, we are done
missing = []
for nid in nids:
if self.labels.has_nid(nid, version):
relatives[i].append(nid)
else:
missing.append(nid)
# for missing nodes, use mapper to obtain precursor/inheritor nodes
nids = []
for nid in missing:
mapped_nids, equiv = mapper(nid)
nids += mapped_nids
if not equiv:
is_equivalent = False
# make all possible combinations
relatives = list(itertools.product(*relatives))
# retain only valid combinations
relatives = [r for r in relatives if self.labels.has_nid(r, version)]
# recast output
if not is_tuple:
relatives = [r[0] for r in relatives]
return relatives, is_equivalent
[docs]class AcousticTaxonomyManager(TaxonomyManager):
def __init__(self):
super().__init__(AcousticTaxonomy(), AcousticLabelManager())
[docs]def get_label_id(
label: str | tuple | list,
taxonomy: Taxonomy,
label_manager: LabelManager,
ascend: bool = False,
descend: bool = False,
always_list: bool = False,
) -> int | list[int]:
"""Returns the IDs of one or several labels.
If @ascend is set to True, the function will also return the label IDs of all the
ancestral nodes in the taxonomy tree. For example, if the sound source is specified as
SRKW, it will return labels corresponding not only to SRKW, but also KW, Toothed,
Cetacean, Mammal, Bio, and Unknown.
If @descend is set to True, the function will also return the label ids of all the
descendant nodes in the taxonomy tree. For example, if the sound source is specified
as SRKW, it will return labels corresponding not only to SRKW, but also J, K, and L pod.
Args:
label: str | tuple | list
Sound source and sound type label(s). The character '*' can be used as wildcard.
For example, use ('SRKW','*') to retrieve all label IDs associated with the sound
source 'SRKW', irrespective of sound type. Multiple source-type pairs can be
specified as a list of tuples.
taxonomy: Taxonomy
The taxonomy
label_manager: LabelManager
The label manager
ascend: bool
Also return the labels of ancestral nodes.
descend: bool
Also return the labels of descendant nodes.
always_list: bool
Whether to always return a list. Default is False.
Returns:
id: int | list[int]
Label identifier(s)
Raises:
ValueError: if the (sound-source, sound-type) label does not exist in the taxonomy
"""
if label is None:
return None
is_list = isinstance(label, list)
# recast the `label` argument as list[tuple]
labels = [label] if not is_list else label
labels = [l if isinstance(l, tuple) else (l,) for l in labels]
# taxonomy version
v = taxonomy.version
# loop over labels and get ID of each
ids = []
for l0 in labels:
ids += label_manager.get_label_id(v, l0, always_list=True)
if ascend:
for l in taxonomy.ascend(*l0, include_start_node=False):
ids += label_manager.get_label_id(v, l, always_list=True)
if descend:
for l in taxonomy.descend(*l0, include_start_node=False):
ids += label_manager.get_label_id(v, l, always_list=True)
# recast output
if not always_list and len(ids) == 1 and not is_list:
ids = ids[0]
return ids