Source code for alogos._grammar.generation.language

from copy import deepcopy as _deepcopy
from itertools import chain as _chain
from itertools import product as _cartesian_product

from ... import warnings as _warnings
from ..._utilities import argument_processing as _ap
from .. import data_structures as _data_structures


[docs]def generate_language( grammar, max_steps=None, sort_order=None, verbose=None, return_details=None ): """Generate the formal language defined by the grammar.""" # Argument processing warn_if_stopped = max_steps is None max_steps = _ap.num_arg("max_steps", max_steps, default=20_000) sort_order = _ap.str_arg( "sort_order", sort_order, vals=["discovered", "lex", "shortlex"] ) # Preparation of data structures # 1) Rules with a) only terminal symbols or b) at least one nonterminal on their rhs terminal_rules = [] nonterminal_rules = [] for lhs, multiple_rhs in grammar.production_rules.items(): for rhs in multiple_rhs: nonterminals = [] nonterminal_positions = [] for idx, symbol in enumerate(rhs): if isinstance(symbol, _data_structures.NonterminalSymbol): nonterminals.append(symbol.text) nonterminal_positions.append(idx) if nonterminals: rule = (lhs.text, _concat(rhs), nonterminals, nonterminal_positions) nonterminal_rules.append(rule) else: rule = (lhs.text, _concat(rhs)) terminal_rules.append(rule) strings_dict_template = { nonterminal.text: dict() for nonterminal in grammar.nonterminal_symbols } strings_all = _deepcopy(strings_dict_template) strings_recent = _deepcopy(strings_dict_template) # Basis part: Use terminal rules to generate the first strings of some corresponding lhs for lhs, rhs in terminal_rules: string = _terminal_symbol_seq_to_str(rhs) strings_recent[lhs][string] = None # add the key, value is irrelevant if verbose: def symbol_seq_repr(symbol_seq): seq_repr = [] for sym in symbol_seq: if isinstance(sym, _data_structures.NonterminalSymbol): seq_repr.append("<{}>".format(sym)) else: seq_repr.append(str(sym)) return "".join(seq_repr) text = "I. Basis" print(text) print("=" * len(text)) print() print("Production rules with only terminals in the right-hand side:") for lhs, rhs in terminal_rules: rhs_repr = symbol_seq_repr(rhs) if rhs_repr == "": rhs_repr = "EPSILON" print(" <{}> -> {}".format(lhs, rhs_repr)) _report_step(1, strings_recent) if max_steps > 1: text = "II. Recursion" print() print(text) print("=" * len(text)) print() print( "Production rules with at least one nonterminal in the right-hand side:" ) for lhs, rhs, _, nonterminal_pos in nonterminal_rules: rhs_repr = "".join( "<{}>".format(sym) if i in nonterminal_pos else sym for i, sym in enumerate(rhs) ) print(" <{}> -> {}".format(lhs, rhs_repr)) # Recursive part: Use nonterminal rules to generate further strings of all lhs for num_expansion in range(2, max_steps + 1): found_new_strings = False new_strings = _deepcopy(strings_dict_template) for lhs, rhs, nonterminals, nonterminal_positions in nonterminal_rules: # Prepare all combinations of fresh and old strings, except purely old ones one_or_another = [(0, 1) for _ in nonterminals] all_possible_selections = list(_cartesian_product(*one_or_another)) wanted_selections = all_possible_selections[:-1] for nonterminal_choices_list in wanted_selections: string_lists_per_nt = [] for choice, nonterminal in zip(nonterminal_choices_list, nonterminals): if choice == 0: string_lists_per_nt.append(strings_recent[nonterminal]) else: string_lists_per_nt.append(strings_all[nonterminal]) # Generate all wanted combinations of strings and put them at the places of the # corresponding nonterminals to form a new string for comb_of_known_strings in _cartesian_product(*string_lists_per_nt): new_string = rhs[:] for pos, known_string in zip( nonterminal_positions, comb_of_known_strings ): new_string[pos] = known_string new_string = _terminal_symbol_seq_to_str(new_string) new_strings[lhs][ new_string ] = None # add the key, value is irrelevant # Add fresh strings to old list, and new strings to fresh list for nonterminal in grammar.nonterminal_symbols: nt_str = nonterminal.text strings_all[nt_str].update(strings_recent[nt_str]) strings_recent[nt_str] = { string: None for string in new_strings[nt_str] if string not in strings_all[nt_str] } if strings_recent[nt_str]: found_new_strings = True if not found_new_strings: if verbose: _report_step(num_expansion, None) print() print("Finished.") break if verbose: _report_step(num_expansion, strings_recent) else: if warn_if_stopped: _warnings._warn_language_gen_stopped(max_steps) for nonterminal in grammar.nonterminal_symbols: nt_str = nonterminal.text strings_all[nt_str].update(strings_recent[nt_str]) # Optional sorting if sort_order == "discovered": def sort_func(given_set): unsorted_list = list(given_set) return unsorted_list elif sort_order == "lex": def sort_func(given_set): sorted_list = list(given_set) sorted_list.sort() return sorted_list elif sort_order == "shortlex": def sort_func(given_set): sorted_list = list(given_set) sorted_list.sort(key=lambda string: (len(string), string)) return sorted_list # Conditional return if return_details: # Detailed: A dict with nonterminals as keys and their languages as values return { nonterminal: sort_func(strings) for nonterminal, strings in strings_all.items() } else: # Only the language for the start symbol return sort_func(strings_all[grammar.start_symbol.text])
# Helper functions def _concat(symbol_sequence): """Concatenate the text of multiple symbols.""" return [sym.text for sym in symbol_sequence] def _terminal_symbol_seq_to_str(sequence): """Convert a sequence of terminal symbols to a string.""" return "".join(_chain.from_iterable(sequence)) def _report_step(num_step, strings_recent): """Print an intermediate result after a language generation step.""" print() text = "Step {}".format(num_step) print(text) print("-" * len(text)) print() if strings_recent: print("Newly found strings per nonterminal:") for nonterminal, strings in sorted(strings_recent.items()): if strings: print(" <{}>".format(nonterminal)) for string in strings: print(' "{}"'.format(string)) else: print("No newly found strings.")