Source code for alogos.systems._shared.init_population

"""Shared initialization functions for several systems."""

from ... import exceptions as _exceptions
from ..._utilities.parametrization import get_given_or_default as _get_given_or_default


[docs]def given_genotypes(grammar, parameters, dp, _representation, _init_individual): """Create a population from given genotypes.""" # Parameter extraction init_pop_given_genotypes = _get_given_or_default( "init_pop_given_genotypes", parameters, dp ) # Argument processing if parameters is None: parameters = dict() # Transformation try: if not init_pop_given_genotypes: _exceptions.raise_no_genotypes_error() individuals = [] for gt in init_pop_given_genotypes: parameters["init_ind_given_genotype"] = gt ind = _init_individual.given_genotype(grammar, parameters) individuals.append(ind) population = _representation.Population(individuals) except Exception: _exceptions.raise_init_pop_from_gt_error() return population
[docs]def given_derivation_trees(grammar, parameters, dp, _representation, _init_individual): """Create a population from given derivation trees.""" # Parameter extraction init_pop_given_derivation_trees = _get_given_or_default( "init_pop_given_derivation_trees", parameters, dp ) # Argument processing if parameters is None: parameters = dict() # Transformation try: if not init_pop_given_derivation_trees: _exceptions.raise_no_derivation_trees_error() individuals = [] for dt in init_pop_given_derivation_trees: parameters["init_ind_given_derivation_tree"] = dt ind = _init_individual.given_derivation_tree(grammar, parameters) individuals.append(ind) population = _representation.Population(individuals) except Exception: _exceptions.raise_init_pop_from_dt_error() return population
[docs]def given_phenotypes(grammar, parameters, dp, _representation, _init_individual): """Create a population from given phenotypes.""" # Parameter extraction init_pop_given_phenotypes = _get_given_or_default( "init_pop_given_phenotypes", parameters, dp ) # Argument processing if parameters is None: parameters = dict() # Transformation try: if not init_pop_given_phenotypes: _exceptions.raise_no_phenotypes_error() individuals = [] for phe in init_pop_given_phenotypes: parameters["init_ind_given_phenotype"] = phe ind = _init_individual.given_phenotype(grammar, parameters) individuals.append(ind) population = _representation.Population(individuals) except Exception: _exceptions.raise_init_pop_from_phe_error() return population
[docs]def random_genotypes(grammar, parameters, dp, _representation, _init_individual): """Create a population from random genotypes. References ---------- - 2017, Nicolau: `Understanding grammatical evolution: initialisation <https://doi.org/10.1007/s10710-017-9309-9>`__ - 3.1 Random initialisation (RND): "The original implementations of GE used a random (RND) initialisation procedure. This simple procedure consists of generating random genotype arrays (either binary or integer) of a specified length, in a manner similar to random initialisation in (fixed-length) genetic algorithms." - 2018, Ryan, O'Neill, Collins: `Handbook of Grammatical Evolution <https://doi.org/10.1007/978-3-319-78717-6>`__ - p. 11: "Originally, we used random initialisation for the GE population. However, as noted in [18, 26, 86], random initialisation can lead to very heavily biased initial populations. [...] What is crucial though, is to put some effort into ensuring good variation in that initial population, and to avoid simple random initialisation." """ # Parameter extraction population_size = _get_given_or_default("init_pop_size", parameters, dp) max_tries = _get_given_or_default("init_pop_unique_max_tries", parameters, dp) ensure_uniqueness_func = _get_ensure_uniqueness_func(parameters, dp) try: # Create individuals (and optionally ensure their uniqueness) def create_ind_func(): return _init_individual.random_genotype(grammar, parameters) individuals = [] num_tries = 0 known = set() for _ in range(population_size): ind, num_tries = ensure_uniqueness_func( create_ind_func, known, population_size, num_tries, max_tries ) individuals.append(ind) # Create population population = _representation.Population(individuals) except Exception: _exceptions.raise_init_pop_rand_gt_error() return population
[docs]def gp_rhh(grammar, parameters, dp, _representation, _init_individual): """Create a population with GP RHH. Notes ----- This approach was originally developed in genetic programming (GP), where it was termed "ramped half and half" (RHH). In grammatical evolution (GE), the adapted but conceptually identical approach is known as "sensible initialization". Two main variants of it can be found in GE literature: - Variant 1: It does not ensure that there are no duplicate trees. - Variant 2: It removes duplicate trees by comparing their linear genotypes. First the linear encoding uses the smallest possible integers to represent the rules for each step. To increase variability within the generated genotypes, an "unmod" operator is applied to each codon, which uses a random integer from the whole possible range, which still represents the same rule as the original smallest possible integer. References ---------- - 2003, Ryan et al.: `Sensible Initialisation in Chorus <https://doi.org/10.1007/3-540-36599-0_37>`__ - "This paper employs an approach similar to ramped half and half for Chorus. The abstract syntax trees used by Koza are replaced here by derivation trees, showing the sequence of derivations leading from the start symbol to a legal sentence of the grammar (a collection of only terminal symbols)." - 2016, Fagan et al.: `Exploring position independent initialisation in grammatical evolution <https://doi.org/10.1109/CEC.2016.7748331>`__ - "It can be seen from Table II that both traditional Grow and traditional Ramped Half-and-Half generate excessive numbers of duplicate individuals. This is due to the propensity of the original Grow method towards generating small trees. With Full and PI-based methods, the creation of duplicate individuals reduces markedly as the ramping depth increases." """ # Parameter extraction population_size = _get_given_or_default("init_pop_size", parameters, dp) max_tries = _get_given_or_default("init_pop_unique_max_tries", parameters, dp) start_depth = _get_given_or_default("init_pop_gp_rhh_start_depth", parameters, dp) end_depth = _get_given_or_default("init_pop_gp_rhh_end_depth", parameters, dp) ensure_uniqueness_func = _get_ensure_uniqueness_func(parameters, dp) if parameters is None: parameters = dict() try: # Create the ramp of depths: slight preference for larger values by starting from max ramped_depths = _create_ramp(start_depth, end_depth) n = len(ramped_depths) # Create individuals (and optionally ensure their uniqueness) individuals = [] num_tries = 0 known = set() i = 0 for i in range(population_size // 2): # grow def create_ind_func(): return _init_individual.gp_grow_tree(grammar, parameters) parameters["init_ind_gp_grow_max_depth"] = ramped_depths[i % n] ind, num_tries = ensure_uniqueness_func( create_ind_func, known, population_size, num_tries, max_tries ) individuals.append(ind) # full def create_ind_func(): return _init_individual.gp_full_tree(grammar, parameters) parameters["init_ind_gp_full_max_depth"] = ramped_depths[i % n] ind, num_tries = ensure_uniqueness_func( create_ind_func, known, population_size, num_tries, max_tries ) individuals.append(ind) if population_size % 2 == 1: # grow: the last individual if population size is odd def create_ind_func(): return _init_individual.gp_grow_tree(grammar, parameters) parameters["init_ind_gp_grow_max_depth"] = ramped_depths[i % n] ind, num_tries = ensure_uniqueness_func( create_ind_func, known, population_size, num_tries, max_tries ) individuals.append(ind) # Create population population = _representation.Population(individuals) except Exception: _exceptions.raise_init_pop_gp_rhh_error() return population
[docs]def pi_rhh(grammar, parameters, dp, _representation, _init_individual): """Create a population with PI RHH.""" # Parameter extraction population_size = _get_given_or_default("init_pop_size", parameters, dp) max_tries = _get_given_or_default("init_pop_unique_max_tries", parameters, dp) start_depth = _get_given_or_default("init_pop_pi_rhh_start_depth", parameters, dp) end_depth = _get_given_or_default("init_pop_pi_rhh_end_depth", parameters, dp) ensure_uniqueness_func = _get_ensure_uniqueness_func(parameters, dp) if parameters is None: parameters = dict() try: # Create the ramp of depths: slight preference for larger values by starting from max ramped_depths = _create_ramp(start_depth, end_depth) n = len(ramped_depths) # Create individuals (and optionally ensure their uniqueness) individuals = [] num_tries = 0 known = set() i = 0 for i in range(population_size // 2): # grow def create_ind_func(): return _init_individual.gp_grow_tree(grammar, parameters) parameters["init_ind_pi_grow_max_depth"] = ramped_depths[i % n] ind, num_tries = ensure_uniqueness_func( create_ind_func, known, population_size, num_tries, max_tries ) individuals.append(ind) # full def create_ind_func(): return _init_individual.gp_full_tree(grammar, parameters) parameters["init_ind_gp_full_max_depth"] = ramped_depths[i % n] ind, num_tries = ensure_uniqueness_func( create_ind_func, known, population_size, num_tries, max_tries ) individuals.append(ind) if population_size % 2 == 1: # grow: the last individual if population size is odd def create_ind_func(): return _init_individual.gp_grow_tree(grammar, parameters) parameters["init_ind_pi_grow_max_depth"] = ramped_depths[i % n] ind, num_tries = ensure_uniqueness_func( create_ind_func, known, population_size, num_tries, max_tries ) individuals.append(ind) # Create population population = _representation.Population(individuals) except Exception: _exceptions.raise_init_pop_pi_rhh_error() return population
[docs]def ptc2(grammar, parameters, dp, _representation, _init_individual): """Create a population with Nicolau's PTC2 method. In 2000, Luke created a new population initialization approach called "Probabilistic tree-creation 2" (PTC2). In 2010, Harper adapted it to grammatical evolution. In 2017, Nicolau created a variant of Harper's PTC2 method, which is implemented here. References ---------- - 2000, Luke: `Two Fast Tree-Creation Algorithms for Genetic Programming <https://doi.org/10.1109/4235.873237>`__ - "PTC1 is a modification of GROW that allows the user to provide probabilities of appearance of functions in the tree, plus a desired expected tree size, and guarantees that, on average, trees will be of that size." - "With PTC2, the user provides a probability distribution of requested tree sizes. PTC2 guarantees that, once it has picked a random tree size from this distribution, it will generate and return a tree of that size or slightly larger." - 2010, Harper: `GE, explosive grammars and the lasting legacy of bad initialisation <https://doi.org/10.1109/CEC.2010.5586336>`__ - "The PTC2 methodology is extended to GE and found to produce a more uniform distribution of parse trees." - "If the algorithm is called in a ramped way (i.e. starting with a low number of expansions, say 20, and increasing until say 240) then a large number of trees of different size and shapes will be generated." - 2017, Nicolau: `Understanding grammatical evolution: initialisation <https://doi.org/10.1007/s10710-017-9309-9>`__: - 3.3 Probabilistic tree-creation 2 (PTC2) - 3.6 Probabilistic tree-creation 2 with depth limit (PTC2D) - 2018, Ryan, O'Neill, Collins: `Handbook of Grammatical Evolution <https://doi.org/10.1007/978-3-319-78717-6>`__ - p. 13: "More recent work on initialisation includes that of Nicolau, who demonstrated that across the problems examined in their study, a variant of Harper’s PTC2 consistently outperforms other initialisations" """ population_size = _get_given_or_default("init_pop_size", parameters, dp) max_tries = _get_given_or_default("init_pop_unique_max_tries", parameters, dp) start_expansions = _get_given_or_default( "init_pop_ptc2_start_expansions", parameters, dp ) end_expansions = _get_given_or_default( "init_pop_ptc2_end_expansions", parameters, dp ) ensure_uniqueness_func = _get_ensure_uniqueness_func(parameters, dp) if parameters is None: parameters = dict() try: # Create the ramp of expansions: slight preference for larger values by starting from max ramped_exp = _create_ramp(start_expansions, end_expansions) n = len(ramped_exp) # Create individuals (and optionally ensure their uniqueness) individuals = [] num_tries = 0 known = set() for i in range(population_size): def create_ind_func(): return _init_individual.ptc2_tree(grammar, parameters) parameters["init_ind_ptc2_max_expansions"] = ramped_exp[i % n] ind, num_tries = ensure_uniqueness_func( create_ind_func, known, population_size, num_tries, max_tries ) individuals.append(ind) # Create population population = _representation.Population(individuals) except Exception: _exceptions.raise_init_pop_ptc2_error() return population
# Helper functions def _create_ramp(start, end): """Create a ramp of values. Downstream it can lead to a slight preference for larger values because it starts with the maximum and goes towards the minimum. The number of individuals does not need to be divisable by the length of the list of values. """ if start > end: raise ValueError( "For creating a ramp, the chosen start value of {} is bigger than the chosen end value of {}.".format( start, end ) ) return list(range(end, start - 1, -1)) def _get_ensure_uniqueness_func(parameters, dp): """Return a function that ensures some aspect of a new individual is unique.""" # Parameter extraction unique_phe = _get_given_or_default("init_pop_unique_phenotypes", parameters, dp) unique_gen = _get_given_or_default("init_pop_unique_genotypes", parameters, dp) # Transformation if unique_phe: ensure_uniqueness_func = _ind_with_unique_phenotype elif unique_gen: ensure_uniqueness_func = _ind_with_unique_genotype else: ensure_uniqueness_func = _ind_without_uniqueness return ensure_uniqueness_func def _ind_without_uniqueness( create_ind_func, known, population_size, num_tries, max_tries ): """Generate a new individual without ensuring anything is unique.""" ind = create_ind_func() new_num_tries = num_tries + 1 return ind, new_num_tries def _ind_with_unique_genotype( create_ind_func, known_genotypes, population_size, num_tries, max_tries ): """Generate a new individual and ensure its genotype is unique.""" for new_num_tries in range(num_tries, max_tries + 1): # noqa: B007 ind = create_ind_func() gen = ind.genotype if gen not in known_genotypes: known_genotypes.add(gen) break else: _exceptions.raise_init_pop_unique_gen_error( len(known_genotypes), population_size, new_num_tries ) return ind, new_num_tries def _ind_with_unique_phenotype( create_ind_func, known_phenotypes, population_size, num_tries, max_tries ): """Generate a new individual and ensure its phenotype is unique.""" for new_num_tries in range(num_tries, max_tries): # noqa: B007 ind = create_ind_func() phe = ind.phenotype if phe not in known_phenotypes: known_phenotypes.add(phe) break else: _exceptions.raise_init_pop_unique_phe_error( len(known_phenotypes), population_size, new_num_tries ) return ind, new_num_tries