"""Forward and reverse mapping functions for piGE."""
import random as _random
from ... import _grammar
from ... import exceptions as _exceptions
from ..._utilities import argument_processing as _ap
from ..._utilities.parametrization import get_given_or_default as _get_given_or_default
from .. import _shared
from . import default_parameters as _dp
from . import representation as _representation
# Shortcuts for brevity and minor speedup
_GT = _representation.Genotype
_DT = _grammar.data_structures.DerivationTree
_NT = _grammar.data_structures.NonterminalSymbol
def _fe(me, re):
if re:
_exceptions.raise_max_expansion_error(me)
def _fw(mw, re):
if re:
_exceptions.raise_max_wraps_error(mw)
# Forward mapping
[docs]def forward(
grammar,
genotype,
parameters=None,
raise_errors=True,
return_derivation_tree=False,
verbose=False,
):
"""Map a piGE genotype to a string phenotype.
Beginning with the start symbol, each nonterminal symbol is expanded
by applying a production rule. These rule applications are done
until the sequence of symbols contains only terminals. Then the
symbols form a sentence of the language defined by the grammar and
hence are a valid phenotype.
Choices to be made in the mapping process:
1. Which nonterminal is expanded if several are present in the
current sequence of symbols?
While standard GE always uses the leftmost nonterminal, here the
next nonterminal is chosen by the next codon in the data
(aka "order codon").
2. Which production rule is used to expand a selected nonterminal?
In case there is more than one production rule available for the
nonterminal, both GE and piGE use the next codon in the data to
select one (aka "content codon").
The formula used is: ``rule id = codon value % number of rules for the NT``.
The data is read from left to right. If the end is reached, a
so-called wrap is done to restart again from the left, i.e.
codons can be used more than once. If a maximum number of wraps
is reached, the mapping process is stopped and the individual
has no valid phenotype and should receive the worst possible
fitness during fitness evaluation.
Parameters
----------
grammar : `~alogos.Grammar`
genotype : `~.representation.Genotype` or data that can be converted to it
parameters : `dict` or `~alogos._utilities.parametrization.ParameterCollection`, optional
- ``max_expansions`` (`int`): Maximum number of nonterminal
expansions allowed in the derivation created by the mapping
process.
- ``max_wraps`` (`int`): Maximum number of times the genotype
is allowed to be wrapped in order to have more codons
available for making decisions in the mapping process.
- ``stack_mode`` (`str`): Mode by which newly discovered symbols
are added to the stack of all currently present symbols in
a derivation.
Possible values:
- ``"start"``: Insert new nodes at the start of the stack.
- ``"end"``: Insert new nodes at the end of the stack.
- ``"inplace"``: Insert new nodes at the same place as the
expanded nonterminal in the stack.
raise_errors : `bool`, optional
Possible values:
- `True`: A mapping error will be raised if a derivation is
not finished within a limit provided in the parameters.
- `False`: A partial derivation is allowed. In this case, the
returned string will contain unexpanded nonterminal symbols.
Therefore it is not a valid phenotype, i.e. not a string of
the grammar's language but a so-called sentential form.
return_derivation_tree : `bool`, optional
If `True`, not only the phenotype is returned but additionally
also the derivation tree.
verbose : `bool`, optional
If `True`, output about steps of the mapping process is printed.
Returns
-------
phenotype : `str`
If ``return_derivation_tree`` is `False`, which is the default.
(phenotype, derivation_tree) : `tuple` with two elements of type `str` and `~alogos._grammar.data_structures.DerivationTree`
If ``return_derivation_tree`` is `True`.
Raises
------
MappingError
If ``raise_errors`` is `True` and the mapping process can not
generate a full derivation before reaching a limit provided in
the parameters.
"""
# Parameter extraction
me = _get_given_or_default("max_expansions", parameters, _dp)
mw = _get_given_or_default("max_wraps", parameters, _dp)
sm = _get_given_or_default("stack_mode", parameters, _dp)
if me is None and mw is None:
mw = 0
# Argument processing
if not isinstance(genotype, _GT):
genotype = _GT(genotype)
# Mapping
if verbose or sm != "inplace":
dt = _forward_slow(grammar, genotype.data, me, mw, sm, raise_errors, verbose)
else:
dt = _forward_fast(grammar, genotype.data, me, mw, sm, raise_errors)
phe = dt.string()
# Conditional return
if return_derivation_tree:
return phe, dt
return phe
def _forward_fast(gr, gt, me, mw, sm, re):
"""Calculate the genotype-to-phenotype map of piGE in a fast way."""
d = _DT(gr)
x = d._expand
s = [d.root_node]
p = s.pop
w = gr.production_rules
g = len(gt)
c = 0
e = 0
while s:
if me is not None and e >= me:
_fe(me, re)
break
if mw is not None and (c // g) > mw:
_fw(mw, re)
break
o = gt[c % g]
c += 1
k = gt[c % g]
c += 1
i = o % len(s)
f = p(i)
r = w[f.symbol]
s[i:i] = [n for n in x(f, r[k % len(r)]) if isinstance(n.symbol, _NT)]
e += 1
return d
def _forward_slow(
grammar, genotype, max_expansions, max_wraps, stack_mode, raise_errors, verbose
):
"""Calculate the genotype-to-phenotype map of piGE in a slow way.
This is a readable implementation of the mapping process, which
also allows to print output about the steps it involves.
It served as basis for the faster, minified implementation in this
module and may be helpful in understanding, replicating or modifying
the algorithm.
"""
# Argument processing
_ap.str_arg("stack_mode", stack_mode, vals=["start", "end", "inplace"])
# Create derivation tree
derivation_tree = _grammar.data_structures.DerivationTree(grammar)
stack = [derivation_tree.root_node]
num_codons = len(genotype)
codon_counter = 0
expansion_counter = 0
if verbose:
header = "Start of the derivation"
print(header)
print("=" * len(header))
print(
"- Get start symbol of the grammar: <{}>".format(grammar.start_symbol.text)
)
print()
print("Sentential form: {}".format(derivation_tree.string()))
print()
while stack:
# Check stop conditions
if max_wraps is not None and (codon_counter // num_codons) > max_wraps:
if verbose:
header = "Stop condition fulfilled"
print(header)
print("=" * len(header))
print(
"- The maximum number of wraps and end of genotype are "
"reached: {}".format(max_wraps)
)
print()
if raise_errors:
_exceptions.raise_max_wraps_error(max_wraps)
break
if max_expansions is not None and expansion_counter >= max_expansions:
if verbose:
header = "Stop condition fulfilled"
print(header)
print("=" * len(header))
print(
"- The maximum number of expansions is reached: {}".format(
max_expansions
)
)
print()
if raise_errors:
_exceptions.raise_max_expansion_error(max_expansions)
break
# 1) Choose nonterminal: piGE uses an "order codon" to select one from all unexpanded nt
order_codon = genotype[codon_counter % num_codons]
codon_counter += 1
num_nonterminals = len(stack)
chosen_nt_idx = order_codon % num_nonterminals
chosen_nt_node = stack.pop(chosen_nt_idx)
if verbose:
header = "Expansion {}".format(expansion_counter + 1)
print(header)
print("=" * len(header))
print("- Choice of nonterminal to expand")
print(
" Index: {} by order codon value {} % {} "
"nonterminals".format(chosen_nt_idx, order_codon, num_nonterminals)
)
print(" Symbol: <{}>".format(chosen_nt_node.symbol.text))
# 2) Choose rule: piGE uses a "content codon" to select a rule from 1 to n avaiable ones
if verbose:
print("- Choice of rule to apply")
content_codon = genotype[codon_counter % num_codons]
codon_counter += 1
rules = grammar.production_rules[chosen_nt_node.symbol]
num_rules = len(rules)
chosen_rule_idx = content_codon % num_rules
chosen_rule = rules[chosen_rule_idx]
if verbose:
print(
" Index: {} by content codon value {} % {} "
"rules".format(chosen_rule_idx, content_codon, num_rules)
)
# 3) Expand the chosen nonterminal with the rhs of the chosen rule
if verbose:
print("- Application of rule to nonterminal")
new_nodes = derivation_tree._expand(chosen_nt_node, chosen_rule)
expansion_counter += 1
if verbose:
rhs = "".join(
sym.text
if isinstance(sym, _grammar.data_structures.TerminalSymbol)
else "<{}>".format(sym.text)
for sym in chosen_rule
)
print(
" Substitution: <{}> -> {}".format(chosen_nt_node.symbol.text, rhs)
)
# 4) Add new nodes that contain a nonterminal to the stack
new_nt_nodes = [node for node in new_nodes if isinstance(node.symbol, _NT)]
if stack_mode == "start":
# Alternative 1: Insert new nodes at start, suggested by partial example in 2004 paper
stack = new_nt_nodes + stack
elif stack_mode == "end":
# Alternative 2: Insert new nodes at end, suggested by full example in 2010 paper
# shown explicitly in Fig. 3
stack = stack + new_nt_nodes
elif stack_mode == "inplace":
# Alternative 3: Insert new nodes at position of the expanded nt, suggested by
# sentential forms during a derivation and by handbook in 2018
stack[chosen_nt_idx:chosen_nt_idx] = new_nt_nodes
if verbose:
print()
print("Sentential form: {}".format(derivation_tree.string()))
print()
if verbose:
header = "End of the derivation"
print(header)
print("=" * len(header))
print()
name = (
"String" if derivation_tree.is_completely_expanded() else "Sentential form"
)
print("{}: {}".format(name, derivation_tree.string()))
return derivation_tree
# Reverse mapping
[docs]def reverse(
grammar, phenotype_or_derivation_tree, parameters=None, return_derivation_tree=False
):
"""Map a string phenotype (or derivation tree) to a piGE genotype.
This is a reversal of the mapping procedure of
position-independent Grammatical Evolution (piGE).
Note that many different piGE genotypes can encode the same
derivation tree and phenotype. It is possible to return a
deterministic piGE genotype that uses the lowest possible integer
value for each choice of expansion, or a random piGE genotype that
uses a random integer value within the codon size limit.
Parameters
----------
grammar : `~alogos.Grammar`
phenotype_or_derivation_tree : `str` or `~alogos._grammar.data_structures.DerivationTree`
parameters : `dict` or `~alogos._utilities.parametrization.ParameterCollection`, optional
Following keyword-value pairs are considered by this function:
- ``codon_size`` (`int`): Codon size in bits. The number of
different integer values a codon can assume is therefore
determined by ``2**codon_size``.
- ``codon_randomization`` (`bool`): If `True`, the reverse
mapping will use the so-called "unmod" operation and therefore
generate random integers that encode the required rule choice.
If `False`, the lowest possible integer will be used and the
reverse mapping becomes deterministic.
- ``derivation_order`` (`str`): Choice of next nonterminal to
expand, which in piGE is chosen by a codon in the genotype.
Possible values:
- ``"leftmost"``: Always choose the leftmost unexpanded
nonterminal in the partial derivation.
- ``"rightmost"``: Always choose the rightmost unexpanded
nonterminal in the partial derivation.
- ``"random"``: Always choose a random unexpanded
nonterminal in the partial derivation. This makes the
reverse mapping stochastic.
- ``stack_mode`` (`str`): Mode by which newly discovered symbols
are added to the stack of all currently present symbols in
a derivation.
Possible values:
- ``"start"``: Insert new nodes at the start of the stack.
- ``"end"``: Insert new nodes at the end of the stack.
- ``"inplace"``: Insert new nodes at the same place as the
expanded nonterminal in the stack.
return_derivation_tree : `bool`, optional
If `True`, not only the genotype is returned but additionally
also the derivation tree.
Returns
-------
genotype : `~.representation.Genotype`
If ``return_derivation_tree`` is `False`, which is the default.
(genotype, derivation_tree) : `tuple` with two elements of type `~.representation.Genotype` and `~alogos._grammar.data_structures.DerivationTree`
If ``return_derivation_tree`` is `True`.
Raises
------
MappingError
If the reverse mapping fails because the string does not belong
to the grammar's language or the derivation tree does not
represent a valid derivation.
"""
# Parameter extraction
codon_size = _get_given_or_default("codon_size", parameters, _dp)
codon_randomization = _get_given_or_default("codon_randomization", parameters, _dp)
derivation_order = _get_given_or_default("derivation_order", parameters, _dp)
stack_mode = _get_given_or_default("stack_mode", parameters, _dp)
# Argument processing
_ap.str_arg(
"derivation_order", derivation_order, vals=("leftmost", "rightmost", "random")
)
_ap.str_arg("stack_mode", stack_mode, vals=("start", "end", "inplace"))
max_int = 2**codon_size - 1
# Preparation of data structures
dt = _shared.mapping.get_derivation_tree(grammar, phenotype_or_derivation_tree)
gt = []
# Trace all decisions contained in the given derivation tree
root = dt.root_node
stack = [root] # stack to ensure stack_mode
stack_tlo = [
root
] # stack to ensure derivation_order (tlo = nodes in tree leaf order)
while stack:
# 1) Choose nonterminal: piGE decides via next "order codon" -> Choose it arbitrarily
if derivation_order == "leftmost":
chosen_nt_idx_tlo = 0
elif derivation_order == "rightmost":
chosen_nt_idx_tlo = len(stack_tlo) - 1
elif derivation_order == "random":
chosen_nt_idx_tlo = _random.randint(0, len(stack_tlo) - 1)
chosen_nt_node = stack_tlo.pop(chosen_nt_idx_tlo)
chosen_nt_idx = stack.index(chosen_nt_node)
chosen_nt_idx_stored = chosen_nt_idx
chosen_nt_node = stack.pop(chosen_nt_idx)
if chosen_nt_idx > max_int:
_exceptions.raise_limited_codon_size_error(chosen_nt_idx, max_int)
if codon_randomization:
options = range(chosen_nt_idx, max_int + 1, len(stack) + 1)
chosen_nt_idx_stored = _random.choice(options)
# 2) Choose rule: piGE decides via next "content codon" -> Deduce it from tree
try:
rules = grammar.production_rules[chosen_nt_node.symbol]
except Exception:
_exceptions.raise_missing_nt_error(chosen_nt_node)
try:
chosen_rule = [node.symbol for node in chosen_nt_node.children]
chosen_rule_idx = rules.index(chosen_rule)
except ValueError:
_exceptions.raise_missing_rhs_error(chosen_nt_node, chosen_rule)
if chosen_rule_idx > max_int:
_exceptions.raise_limited_codon_size_error(chosen_rule_idx, max_int)
if codon_randomization:
options = range(chosen_rule_idx, max_int + 1, len(rules))
chosen_rule_idx = _random.choice(options)
# 3) Expand the chosen nonterminal with the rhs of the chosen rule -> Follow the expansion
new_nt_nodes = [
node for node in chosen_nt_node.children if isinstance(node.symbol, _NT)
]
if stack_mode == "start":
stack = new_nt_nodes + stack
elif stack_mode == "end":
stack = stack + new_nt_nodes
elif stack_mode == "inplace":
stack[chosen_nt_idx:chosen_nt_idx] = new_nt_nodes
stack_tlo[chosen_nt_idx_tlo:chosen_nt_idx_tlo] = new_nt_nodes
# Store the observed decisions, even if there was only one option
gt.append(chosen_nt_idx_stored)
gt.append(chosen_rule_idx)
# Finalization of data structures
gt = _GT(gt)
# Conditional return
if return_derivation_tree:
return gt, dt
return gt