"""Neighborhood functions to generate nearby genotypes for CFG-GP."""
from ... import _grammar
from ... import exceptions as _exceptions
from ..._utilities.parametrization import get_given_or_default as _get_given_or_default
from .. import _shared
from . import default_parameters as _default_parameters
from . import representation as _representation
# Shortcuts for brevity and minor speedup
_NT = _grammar.data_structures.NonterminalSymbol
_T = _grammar.data_structures.TerminalSymbol
[docs]def subtree_replacement(grammar, genotype, parameters=None):
"""Systematically change a chosen number of nodes.
Parameters
----------
grammar : `~alogos.Grammar`
genotype : `~.representation.Genotype`
parameters : `dict` or `~alogos._utilities.parametrization.ParameterCollection`, optional
Following keyword-value pairs are considered by this function:
- ``neighborhood_distance`` (`int`) : The distance from the
original genotype to a new genotype in terms of replaced
subtrees.
- ``neighborhood_max_size`` (`int`) : Maximum number of
neighbor genotypes to generate.
- ``neighborhood_only_terminals`` (`bool`) : If `True`, only
replace nodes with terminals in them.
Returns
-------
neighbors : `list` of `~.representation.Genotype` objects
"""
# Parameter extraction
distance = _get_given_or_default(
"neighborhood_distance", parameters, _default_parameters
)
max_size = _get_given_or_default(
"neighborhood_max_size", parameters, _default_parameters
)
only_t = _get_given_or_default(
"neighborhood_only_terminals", parameters, _default_parameters
)
# Argument processing
if not isinstance(genotype, _representation.Genotype):
genotype = _representation.Genotype(genotype)
# Get alternative choices per position by going through the productions in the tree
dt = genotype.data
nodes, choices = _get_choices_per_position(grammar, dt, only_t)
num_choices_per_pos = [len(x) for x in choices]
# Generate combinations of choices
combinations = _shared.neighborhood.generate_combinations(
num_choices_per_pos, distance, max_size
)
# Neighborhood construction
if distance == 1:
nbrs = _generate_nbrs_fast_for_d1(grammar, dt, nodes, choices, combinations)
else:
nbrs = _generate_nbrs_slow(grammar, dt, nodes, choices, combinations)
return nbrs
def _generate_nbrs_fast_for_d1(grammar, dt, nodes, choices, combinations):
"""Quickly generate a neighborhood with distance 1."""
nbrs = []
for comb in combinations:
# Semantics of comb: 0 means no change, >0 points to a certain alternative choice
for idx, val in enumerate(comb):
if val > 0:
node = nodes[idx]
rhs_idx = choices[idx][val - 1]
rhs = grammar.production_rules[node.symbol][rhs_idx]
# Create a new subtree
node_new = _grow_deterministic_subtree(grammar, node, rhs)
# Swap old node with the new one
node.children, node_new.children = node_new.children, node.children
# Copy the new tree
dt_new = dt.copy()
# Restore the original tree
node.children, node_new.children = node_new.children, node.children
break
nbrs.append(_representation.Genotype(dt_new))
return nbrs
def _generate_nbrs_slow(grammar, dt, nodes, choices, combinations):
"""Slowly generate a neighborhood with distance >1."""
nbrs = set()
for comb in combinations:
# Semantics of comb: 0 means no change, >0 points to a certain alternative choice
swaps = []
for idx, val in enumerate(comb):
if val > 0:
node = nodes[idx]
rhs_idx = choices[idx][val - 1]
rhs = grammar.production_rules[node.symbol][rhs_idx]
# Create a new subtree
node_new = _grow_deterministic_subtree(grammar, node, rhs)
swaps.append((node, node_new))
# Swap all old nodes with the new ones
for old, new in swaps:
old.children, new.children = new.children, old.children
# Copy the new tree
dt_new = dt.copy()
# Restore the original tree
for old, new in swaps:
old.children, new.children = new.children, old.children
nbrs.add(_representation.Genotype(dt_new))
return list(nbrs)
def _get_choices_per_position(grammar, dt, only_terminals):
"""Get all productions found when traversing the tree in leftmost order."""
nodes = []
choices = []
root = dt.root_node
stack = [root]
while stack:
# 1) Choose nonterminal -> Leftmost
chosen_nt_node = stack.pop(0)
# 2) Choose rule -> Deduce it from tree
try:
rules = grammar.production_rules[chosen_nt_node.symbol]
num_rules = len(rules)
except Exception:
_exceptions.raise_missing_nt_error(chosen_nt_node)
try:
chosen_rule = [node.symbol for node in chosen_nt_node.children]
chosen_rule_idx = rules.index(chosen_rule)
if only_terminals:
other_rule_indices = [
idx
for idx, rule in enumerate(rules)
if idx != chosen_rule_idx
and any(isinstance(sym, _T) for sym in rule)
]
else:
other_rule_indices = [
idx for idx in range(num_rules) if idx != chosen_rule_idx
]
except ValueError:
_exceptions.raise_missing_rhs_error(chosen_nt_node, chosen_rule)
# 3) Expand the chosen nonterminal with rhs of the chosen rule -> Follow the expansion
new_nt_nodes = [
node for node in chosen_nt_node.children if isinstance(node.symbol, _NT)
]
stack = new_nt_nodes + stack
# Store the observed decisions
nodes.append(chosen_nt_node)
choices.append(other_rule_indices)
return nodes, choices
def _grow_deterministic_subtree(grammar, node, rhs):
# Cache lookup or one-time calculation
min_depths = grammar._lookup_or_calc(
"shared",
"min_depths",
_shared._cached_calculations.min_depths_to_terminals,
grammar,
)
# Create a new node to grow the subtree from
node = node.copy()
# First expansion: Use the chosen rhs
node.children = [_grammar.data_structures.Node(sym) for sym in rhs]
# Follow-up expansions: Grow by applying the first rule with shortest path to terminals
def grow(node):
nt = node.symbol
rules = grammar.production_rules[nt]
depths_per_rule = min_depths[nt]
first_min_pos = depths_per_rule.index(min(depths_per_rule))
chosen_rule = rules[first_min_pos] # first rule with min depth
new_nodes = [_grammar.data_structures.Node(sym) for sym in chosen_rule]
node.children = new_nodes
for nd in new_nodes:
if nd.contains_nonterminal():
grow(nd)
for nd in node.children:
if nd.contains_nonterminal():
grow(nd)
return node