Source code for mevis._internal.filtering

from collections.abc import Callable as _Callable

from opencog.atomspace import Atom as _Atom
from opencog.type_constructors import AtomSpace as _AtomSpace

from .args import check_arg as _check_arg


FILTER_CONTEXTS = ['atom', 'in', 'out', 'both', 'in-tree', 'out-tree']


[docs]def filter(data, target, context='atom', mode='include'): """Apply a filter to an Atomspace or list of Atoms and return a list of selected Atoms. Parameters ---------- data : Atomspace, list of Atoms The given Atomspace or list of Atoms that is filtered and thereby reduced to a shorter list of Atoms. target : str, int, Atom, list, Callable The targets that are selected by this filtering function. Possible types and their meaning: - ``str``: A string that is matched against ``.name`` and ``.type_name`` of each Atom. Capitalization is ignored. Examples: - ``target="andlink"`` will include all Atoms of type ``AndLink``. - ``target="$1"`` will include all Atoms with name ``"$1"``. - ``int``: An OpenCog Atom type that is matched against ``.type`` of each Atom. Examples: - ``target=opencog.atomspace.types.OrLink`` selects all Atoms of type ``OrLink``. - ``Atom``: An Atom that is used as it is. Examples: - ``target=list(atomspace)[4]`` selects the fifth Atom in the AtomSpace. - ``list``: A list of str, int and/or Atom. The types can be mixed freely. Examples: - ``target=["andlink", "OrLink"]`` selects all Atoms of type ``AndLink`` and ``OrLink``. - ``target=["notlink", "$1", opencog.atomspace.types.OrLink, list(atomspace)[4]]`` selects all Atoms of type ``NotLink``, all Atoms with name ``"$1"``, all Atoms of type ``OrLink``, and the fifth Atom in the AtomSpace. - ``Callable``: A function that gets an Atom as input and needs to return ``True`` or ``False`` to indicate whether the Atom is selected or not. Examples: - ``target=lambda atom: atom.is_link()`` selects all Atoms that are Links - ``target=lambda atom: atom.name.startswith("$")`` selects all Atoms that have a name starting with ``$``. context : str, tuple The context of the selection of Atoms to which it shall be expanded. Possible values: - ``atom``: Only the Atoms specified by ``target`` are selected. - ``in``: The Atoms specified by ``target`` and all their incoming neighbors are selected. - ``out``: The Atoms specified by ``target`` and all their outgoing neighbors are selected. - ``both``: The Atoms specified by ``target`` and all their incoming and outgoing neighbors are selected. This is also known as `neighborhood <https://en.wikipedia.org/wiki/Neighbourhood_(graph_theory)>`__ in graph theory or `egocentric network <https://research.library.gsu.edu/c.php?g=916490&p=6612505>`__ in social network analysis. - ``in_tree``: The Atoms specified by ``target`` and all their incoming neighbors are selected, which is repeated until nothing can be added anymore. This is also known as `in-tree or anti-arborescence <https://en.wikipedia.org/wiki/Arborescence_(graph_theory)>`__ in graph theory. - ``out_tree``: The Atoms specified by ``target`` and all their outgoing Atoms are selected, which is repeated until nothing can be added anymore. This is also known as `out-tree or arborescence <https://en.wikipedia.org/wiki/Arborescence_(graph_theory)>`__ in graph theory. - ``(context, size)``: In the case of ``in``, ``out`` and ``both`` the context can come with a size, which means how often the selected Atoms are expanded in the chosen way. Examples: - ``("out", 2)`` means that the selected Atoms are expanded twice by their outgoing Atoms, instead of just once when using ``out``. - ``("in", 3)`` means that the selected Atoms are expanded thrice by their incoming Atoms, instead of just once when using ``in``. - ``("both", 2)`` means that the selected Atoms are expanded twice by their incoming and outgoing Atoms, instead of just once when using ``both``. Note that the result can be and usually is different than the combined results from ``("in", 2)`` and ``("out", 2)``, because adding an ingoing neighbor in step 1 and then its outgoing neighbors in step 2 (or vice versa) captures more Atoms. mode : str The selection of Atoms can be the result of the filtering, but it is also possible to exclude those Atoms and instead return all other ones. Possible values: - ``include``: The selection is included in the result. - ``exclude``: The selection is excluded from the result. Everything else is included. Returns ------- atoms : list of Atoms Note ---- Chaining is possible, which means that the output of a filter application can be used as input for another one. This allows to combine different targets, contexts and modes by performing a sequence of filtering steps. Example:: import mevis as mv atomspace = mv.load('moses.scm') atoms = mv.filter(atomspace, target="AndLink", context="out-tree", mode="include") atoms = mv.filter(atoms, target="PredicateNode", context="atom", mode="exclude") mv.plot(atoms, 'vis', 'dot') The first line includes Atoms of type ``AndLink`` and their maximally expanded outgoing neighborhood. The second line excludes Atoms of type ``PredicateNode`` from the previous result. """ # Argument processing _check_arg(data, 'data', (list, _AtomSpace)) _check_arg(target, 'target', (str, int, list, _Callable, _Atom)) _check_arg(mode, 'mode', str, ['include', 'exclude']) if isinstance(context, tuple): context, size = context try: _check_arg(context, 'context', str, ['in', 'out', 'both']) _check_arg(size, 'size', int) if size < 0: raise ValueError('Context size needs to be equal to or greater than zero.') except Exception as excp: message = 'Argument "context" got an invalid tuple as value.' raise ValueError(message) from excp else: _check_arg(context, 'context', str, FILTER_CONTEXTS) size = 1 given_atoms = data # Filter: Select all Atoms as specified by target if isinstance(target, _Atom): selected_atoms = [target] else: func = _prepare_filter_func(target) selected_atoms = [atom for atom in given_atoms if func(atom)] # Expand by context: if desired, include some neighboring atoms selected_atoms = _expand(selected_atoms, context, size) # Select by mode: if desired, invert the selected set of atoms (=exclude instead of include) atoms = _include_or_exclude(selected_atoms, given_atoms, mode) return atoms
def _prepare_filter_func(target): """Create a filter function depending on the type of the given target.""" if isinstance(target, str): target = target.lower() def func(atom): # match: name, type name return atom.name.lower() == target \ or atom.type_name.lower() == target elif isinstance(target, int): def func(atom): # match: type return int(atom.type) == target elif isinstance(target, list): target = set(x.lower() if isinstance(x, str) else x for x in target) def func(atom): # match: name, type name, type, atom return atom.type_name.lower() in target or \ atom.name.lower() in target or \ int(atom.type) in target or \ atom in target elif isinstance(target, _Callable): func = target return func def _expand(atoms, context, context_size): """Expand a list of atoms depending on the type of context that shall be included.""" if context == 'in': # Add all Atoms in the incoming neighborhood of an Atom, repeat it if context size > 1 for _ in range(context_size): expansion = set() for atom in atoms: expansion.add(atom) expansion.update(atom.incoming) atoms = list(expansion) elif context == 'out': # Add all Atoms in the outgoing neighborhood of an Atom, repeat it if context size > 1 for _ in range(context_size): expansion = set() for atom in atoms: expansion.add(atom) expansion.update(atom.out) atoms = list(expansion) elif context == 'both': # Add all Atoms in the neighborhood of an Atom, repeat it if context size > 1 for _ in range(context_size): expansion = set() for atom in atoms: expansion.add(atom) expansion.update(atom.incoming) expansion.update(atom.out) atoms = list(expansion) elif context == 'in-tree': # Add all Atoms in the incoming neighborhood of an Atom, repeat it until all is reached expansion = set() for atom in atoms: expansion = expansion.union(_dfs_in(atom)) atoms = list(expansion) elif context == 'out-tree': # Add all Atoms in the outgoing neighborhood of an Atom, repeat it until all is reached expansion = set() for atom in atoms: expansion = expansion.union(_dfs_out(atom)) atoms = list(expansion) return atoms def _include_or_exclude(selected, given, mode): """Include or exclude the selected Atoms from the initially given Atoms.""" if mode == 'exclude': selected = [atom for atom in given if atom not in selected] return selected def _dfs_out(atom): """Traverse an Atom's outgoing neighborhood iteratively with a depth-first search.""" atoms = [atom] stack = list(atom.out) while stack: atom = stack.pop(0) atoms.append(atom) stack[0:0] = atom.out return atoms def _dfs_in(atom): """Traverse an Atom's incoming neighborhood iteratively with a depth-first search.""" atoms = [atom] stack = list(atom.incoming) while stack: atom = stack.pop(0) atoms.append(atom) stack[0:0] = atom.incoming return atoms