Source code for evolution

'''
Handles how to perform all of the actual evolution.
'''
import random
import sys
from copy import copy
from util import diff_count, bitcount
import itertools
from collections import defaultdict
import problems


[docs]class Individual(object): ''' An individual object used to combine gene fitness with genomes, as well methods for manipulating those genomes. ''' def __init__(self, graph_length, input_length, output_length, max_arity, function_list, **_): ''' Create a new individual instance. Parameters: - ``graph_length``: The number of nodes in the CGP encoded graph. - ``input_length``: The number of input variables. - ``output_length``: The number of output variables. - ``max_arity``: The maximum arity used by any function. - ``function_list``: The list of functions a node can use. ''' self.node_step = max_arity + 1 self.input_length = input_length self.graph_length = graph_length self.function_list = function_list self.output_length = output_length self.genes = None self.genes = [self.random_gene(index) for index in range(graph_length * self.node_step + output_length)] self.determine_active_nodes() # Block of memory used when evaluating an individual self.scratch = [None] * (graph_length + self.input_length) # Records the output for each node. NOTE: semantics are only # updated when a node is active self.semantics = [0] * (graph_length + self.input_length) # Records with indices have ever been active self.never_active = [True] * graph_length self.input_counter = itertools.count(0) self.input_order = {} self.fitness = -sys.maxint
[docs] def random_gene(self, index, invalid=None): ''' Determines a random gene value given a gene index. If optional ``invalid`` option is used, the returned value will only be ``invalid`` if the gene has no other valid values. Parameters: - ``index``: The gene index who's value is being set. - ``invalid``: Value to avoid returning if possible ''' node_number = index // self.node_step gene_number = index % self.node_step # If the gene is used to specify output locations if node_number >= self.graph_length: node_number = self.graph_length gene_number = -1 # If the gene controls the function of a node if gene_number == 0: if len(self.function_list) == 1: return self.function_list[0] while True: choice = random.choice(self.function_list) if choice != invalid: return choice # If the gene controls a connection / output location else: if node_number + self.input_length == 1: return -1 while True: choice = random.randrange(-self.input_length, node_number) if choice != invalid: return choice
[docs] def dag_random_gene(self, index, invalid=None): ''' Determines a random gene value given a gene index of a full DAG. If optional ``invalid`` option is used, the returned value will only be ``invalid`` if the gene has no other valid values. Parameters: - ``index``: The gene index who's value is being set. - ``invalid``: Value to avoid returning if possible ''' node_number = index // self.node_step gene_number = index % self.node_step if node_number >= self.graph_length: node_number = self.graph_length gene_number = -1 # If it is a function gene if gene_number == 0: if len(self.function_list) == 1: return self.function_list[0] while True: choice = random.choice(self.function_list) if choice != invalid: return choice # If you are dealing with output locations or individual initialization elif gene_number < 0 or not self.genes: if node_number + self.input_length == 1: return -1 while True: choice = random.randrange(-self.input_length, node_number) if choice != invalid: return choice # If you are resetting a connection link on an existing individual else: return self.valid_reconnect(node_number, invalid)
[docs] def valid_reconnect(self, node_index, invalid=None): ''' When using a DAG individual, find a random connection location that does not depend on the current node. Parameters: - ``node_index``: The index of the node who's connection is being reset - ``invalid``: Value to avoid returning if possible ''' # Nodes always depend on themselves and inputs never depend on nodes dependent = {node_index: True, invalid: False} # Current inputs are not dependent on the mutating node for conn in self.connections(node_index): dependent[conn] = False for index in range(-self.input_length, 0): dependent[index] = False def is_dependent(current): ''' Internal recursive function to determine if a node index is dependent on ``node_index``. Also updates the dependency dictionary. Parameters: - ``current``: The current working node index to be checked for dependency. ''' if current in dependent: return dependent[current] for conn in self.connections(current): if is_dependent(conn): dependent[current] = True return True dependent[current] = False return False # Create the list of all possible connections options = range(-self.input_length, self.graph_length) for index in range(len(options)): # Choose a random untried option and swap it to the next index swapdown = random.randrange(index, len(options)) options[index], options[swapdown] = (options[swapdown], options[index]) option = options[index] # Test this option if option != invalid and not is_dependent(option): return option return invalid
[docs] def new(self, modification_method, *args, **kwargs): ''' Return a copy of the individual. Note that individuals are shallow copied except for their list of genes. ''' # WARNING individuals are shallow copied except for things added here new = copy(self) new.genes = list(self.genes) new.semantics = list(self.semantics) new.never_active = list(self.never_active) modification_method(new, *args, **kwargs) new.determine_active_nodes() return new
[docs] def connections(self, node_index): ''' Return the list of connections that a specified node has. Parameters - ``node_index``: The index of the node information is being requested for. Note this is different from gene index. ''' node_start = self.node_step * node_index return self.genes[node_start + 1: node_start + self.node_step]
[docs] def determine_active_nodes(self): ''' Determines which nodes are currently active and sets self.active to the sorted list of active genes. Automatically called by gene manipulating member functions. ''' self.active = set(self.genes[-self.output_length:]) for node_index in reversed(range(self.graph_length)): if node_index in self.active: # add all of the connection genes for this node self.active.update(self.connections(node_index)) self.active = sorted([acting for acting in self.active if acting >= 0])
[docs] def dag_determine_active_nodes(self): ''' Determines which nodes are currently active and sets self.active to the sorted list of active genes in DAG individuals. Automatically called by gene manipulating member functions. ''' depends_on = defaultdict(set) feeds_to = defaultdict(set) # The output locations start as 'connected' connected = self.genes[-self.output_length:] added = set(connected) # Build a bi-directional dependency tree while connected: working = connected.pop() # Ignore input locations if working < 0: continue for conn in self.connections(working): # Record that 'working' takes input from 'conn' depends_on[working].add(conn) # Record that 'conn' sends its output to 'working' feeds_to[conn].add(working) if conn not in added: connected.append(conn) added.add(conn) # find the order in which to evaluate them self.active = [] # All input locations start out addable addable = [x for x in range(-self.input_length, 0)] while addable: working = addable.pop() # Find everything that depends on 'working' for input for conn in feeds_to[working]: # Record that 'conn' is no longer waiting on 'working' depends_on[conn].remove(working) if len(depends_on[conn]) == 0: addable.append(conn) self.active.append(conn)
[docs] def all_active(self): ''' Function that always makes all nodes in the genome active. Useful when the fitness function analyzes nodes directly when combined with Single mutation. ''' self.active = range(self.graph_length)
[docs] def evaluate(self, inputs): ''' Given a list of inputs, return a list of outputs from executing this individual. Parameters: - ``inputs``: The list of input values for the individual to process. ''' # Start by loading the input values into scratch # NOTE: Input locations are given as negative values self.scratch[-len(inputs):] = inputs[::-1] try: input_number = self.input_order[inputs] except KeyError: input_number = next(self.input_counter) self.input_order[inputs] = input_number # Bit mask used to turn on specific bits in the semantics on = 1 << input_number for index in range(-len(inputs), 0): if self.scratch[index]: self.semantics[index] |= on on = 1 << input_number # Loop through the active genes in order for node_index in self.active: function = self.genes[node_index * self.node_step] args = [self.scratch[con] for con in self.connections(node_index)] # Apply the function to the inputs from scratch, saving results # back to the scratch result = function(*args) self.scratch[node_index] = result # Records what the output was for this node on this input if result: self.semantics[node_index] |= on else: self.semantics[node_index] &= ~on self.never_active[node_index] = False # Extract outputs from the scratch space return [self.scratch[output] for output in self.genes[-self.output_length:]]
[docs] def mutate(self, mutation_rate): ''' Mutates the calling individual's genes using the give mutation rate. Parameters: - ``mutation_rate``: The probability that a specific gene will mutate. ''' for index in range(len(self.genes)): if random.random() < mutation_rate: self.genes[index] = self.random_gene(index, self.genes[index])
[docs] def one_active_mutation(self, _): ''' Mutates the calling individual using the ``Single`` mutation method. ''' while True: # Choose an index at random index = random.randrange(len(self.genes)) # Get a new value for that gene newval = self.random_gene(index, self.genes[index]) # If that value is different than the current value if newval != self.genes[index]: self.genes[index] = newval # Determine if that gene was part of an active node node_number = index // self.node_step if (node_number >= self.graph_length or node_number in self.active): break
[docs] def reorder(self): ''' Reorder individual's genes randomly without changing any of the actual connection information. ''' # Build a list of dependencies depends_on = defaultdict(set) feeds_to = defaultdict(set) for node_index in range(self.graph_length): for conn in self.connections(node_index): # Record that 'node_index' takes input from 'conn' depends_on[node_index].add(conn) # Record that 'conn' sends its output to 'node_index' feeds_to[conn].add(node_index) # Create a dictionary storing how to translate location information new_order = {i: i for i in range(-self.input_length, 0)} # Input locations start as addable addable = new_order.keys() counter = 0 while addable: # Choose a node at random who's dependencies have already been met working = random.choice(addable) addable.remove(working) # If 'working' is not an input location if working >= 0: # Assign this node to the next available index new_order[working] = counter counter += 1 # Update all dependencies now that this node has been added for to_add in feeds_to[working]: # Mark 'to_add' as having its requirement on 'working' complete depends_on[to_add].remove(working) if len(depends_on[to_add]) == 0: addable.append(to_add) # Create the new individual using the new ordering old_genes = copy(self.genes) old_semantics = copy(self.semantics) old_n_a = copy(self.never_active) for node_index in range(self.graph_length): # Find the new starting location in the self for this node new_start = new_order[node_index] * self.node_step old_start = node_index * self.node_step new_end = new_start + self.node_step old_end = old_start + self.node_step # Move over the function gene self.genes[new_start] = old_genes[old_start] # Translate connection genes to have new order information connections = [new_order[conn] for conn in old_genes[old_start + 1:old_end]] # Move over the connection genes self.genes[new_start + 1:new_end] = connections self.semantics[new_order[node_index]] = old_semantics[node_index] self.never_active[new_order[node_index]] = old_n_a[node_index] length = len(self.genes) # Update the output locations for index in range(length - self.output_length, length): self.genes[index] = new_order[old_genes[index]] self.determine_active_nodes()
[docs] def simplify(self): ''' Using the semantic information recorded for each node, this will modify the individual to ensure only 1 node produces each unique semantic. ''' lookup = {} # Keeps track of 1 index for each unique semantic, favoring # nodes near the start of the active list for node_index in reversed(self.active): lookup[self.semantics[node_index]] = node_index # All input locations take priority for producing their semantics. for i in range(-self.input_length, 0): lookup[self.semantics[i]] = i # Replace all connections to use saved index to produce the required semantic. for index in range(len(self.genes)): if isinstance(self.genes[index], int): try: semantic = self.semantics[self.genes[index]] self.genes[index] = lookup[semantic] except KeyError: pass self.determine_active_nodes()
[docs] def asym_phenotypic_difference(self, other): ''' Determine how many genes would have to be mutated in order to make the ``other`` individual phenotypically identical to ``self``. Parameters: - ``other``: The individual to compare with. ''' # Count the differences in the output locations count = diff_count(self.genes[-self.output_length:], other.genes[-self.output_length:]) # For each active node for node_index in self.active: index = node_index * self.node_step # Count the number of different connection genes count += diff_count(self.connections(node_index), other.connections(node_index)) # Include differences in the function gene count += (self.genes[index] != other.genes[index]) return count
[docs] def show_active(self): ''' Prints the active portions of the individual in a somewhat readable way. ''' for node_index in self.active: node_start = self.node_step * node_index print node_index, self.genes[node_start], print self.connections(node_index), self.semantics[node_index] print self.genes[-self.output_length:]
def __lt__(self, other): ''' Returns the result of self.fitness < other.fitness. ''' return self.fitness < other.fitness def __le__(self, other): ''' Returns the result of self.fitness <= other.fitness. ''' return self.fitness <= other.fitness
[docs] def dump_genes(self): ''' Returns a file read/writable representation of the individuals genes. ''' return [g if isinstance(g, int) else g.__name__ for g in self.genes]
[docs] def dump(self): ''' Returns a json ready dictionary representation of the entire individual ''' genes = self.dump_genes() never_active = ''.join(str(int(x)) for x in self.never_active) return {'genes': genes, 'fitness': self.fitness, 'never_active': never_active, 'graph_length': self.graph_length, 'max_arity': self.node_step - 1, 'output_length': self.output_length, 'input_length': self.input_length}
[docs] def load(self, data): ''' Recovers a "dump"ed individual from the dictionary, overwriting the calling individual. ''' self.__dict__.update(data) self.never_active = [x == '1' for x in data['never_active']] self.genes = [g if isinstance(g, int) else problems.__dict__[g] for g in self.genes]
@staticmethod
[docs] def reconstruct_individual(data, test_inputs): ''' Recovers an individual's connections and never active node listing. Used when post processing individuals from previous runs. ''' copy = dict(data) copy['function_list'] = [None] individual = Individual(**copy) individual.load(data) # Ensure it is serial for node_index in range(individual.graph_length): for conn in individual.connections(node_index): if conn > node_index: individual.reorder() break # Store a copy of the never active genes after reorder never_active = list(individual.never_active) # Set semantics values individual.active = range(individual.graph_length) for inputs in test_inputs: individual.evaluate(tuple(inputs)) individual.determine_active_nodes() individual.never_active = never_active return individual
[docs]def generate(config, output, frequencies): ''' An ``Individual`` generator that will yield a never ending supply of ``Individual`` objects that need to have their fitness set before the next ``Individual`` can be yielded. Parameters: - ``config``: A dictionary containing all configuration information required to generate initial individuals. Should include values for: - All configuration information required to initialize an Individual. - ``dag``: If DAG based individuals should be used. - ``reorder``: If the parent should be reordered before making offspring. - ``mutation_rate``: The probably to use for mutation. - ``off_size``: The number of offspring to produce per generation. - ``output_length``: The number of output variables. - ``max_arity``: The maximum arity used by any function. - ``duplicate``: String specifying the way to handle duplicate individual creation, either ``normal'', ``skip'', ``accumulate``, or ``single``. - ``problem``: The problem these individuals are solving. Used on in the case where problems require unusual individual modification. - ``output``: Dictionary used to return information about evolution, will send out: - ``skipped``: The number of evaluations skipped by ``Skip``. - ``estimated``: The estimated number of evaluations that are skippable. - ``inactive_bits_changed``: Keeps track of nodes that were active, became inactive, and have become active again, looking at how many bits in their semantics have changed. Is a dictionary of frequencies mapping change -> count. - ``reactiveated_nodes``: Counts the number of nodes that were active, became inactive, and have become active again. - ``active_nodes_changed``: Counts how many nodes that were active before and after mutation had at least one bit in their semantics change. Is a dictionary mapping number changed in a single mutation -> count. - ``active_bits_changed``: Counts how many bits were changed when an active node was changed by mutation but remained active. - ``child_replaced_parent``: Counts how often the parent is replaced by the offspring. - ``parent_not_replaced``: Counts how often no offspring in a generation replaced the parent. - ``frequencies``: Dictionary used to return information about how often individuals of different lengths are evolved. ''' output['skipped'] = 0 output['estimated'] = 0 output['inactive_bits_changed'] = defaultdict(int) output['reactivated_nodes'] = defaultdict(int) output['active_nodes_changed'] = defaultdict(int) output['active_bits_changed'] = defaultdict(int) output['child_replaced_parent'] = 0 output['parent_not_replaced'] = 0 if config['ordering'] == 'dag': # Override base functions with dag versions Individual.determine_active_nodes = \ Individual.dag_determine_active_nodes Individual.random_gene = \ Individual.dag_random_gene if config['duplicate'] == 'single': # Override normal mutation with Single Individual.mutate = Individual.one_active_mutation if config['problem'] == 'Flat': # Override normal method for determining active genes Individual.determine_active_nodes = Individual.all_active parent = Individual(**config) # Evaluate initial individual yield parent while True: if config['ordering'] == 'reorder': # Reorder the parent parent.reorder() # Create mutant offspring mutants = [parent.new(Individual.mutate, config['mutation_rate']) for _ in range(config['off_size'])] # Determine how many active genes the parent has active = config['output_length'] + (len(parent.active) * (config['max_arity'] + 1)) for index, mutant in enumerate(mutants): # Estimates the probability none of the active genes were changed. output['estimated'] += (1 - config['mutation_rate']) ** active prev = mutant if config['duplicate'] not in ['normal', 'single']: change = parent.asym_phenotypic_difference(mutant) if change == 0: output['skipped'] += 1 if config['duplicate'] == 'skip': continue if config['duplicate'] == 'accumulate': while change == 0: # As long as there have been no changes, # keep mutating prev = mutant mutant = prev.new(Individual.mutate, config['mutation_rate']) change = parent.asym_phenotypic_difference(mutant) if 'frequency_results' in config: # Records the length of the generated individual frequencies[len(mutant.active)] += 1 # Send the offspring out to be evaluated yield mutant if config['duplicate'] == 'accumulate': # If the mutant is strickly worse, use the last equivalent mutants[index] = prev if mutant < parent else mutant best_child = max(mutants) if parent <= best_child: parent_active = set(parent.active) re_active = 0 active_changed = 0 for newly_active in best_child.active: if parent.never_active[newly_active]: # You can't get change information if its never # been evaluated before. continue # Determine how the semantics have changed prev = parent.semantics[newly_active] now = best_child.semantics[newly_active] change = bitcount(now ^ prev) if newly_active in parent_active: if change > 0: active_changed += 1 output['active_bits_changed'][change] += 1 else: # Was inactive in parent re_active += 1 output['inactive_bits_changed'][change] += 1 output['reactivated_nodes'][re_active] += 1 output['active_nodes_changed'][active_changed] += 1 output['child_replaced_parent'] += 1 # Replace the parent with the child parent = best_child else: output['parent_not_replaced'] += 1
[docs]def multi_indepenedent(config, output, frequencies): ''' Allows for multiple parallel independent populations to be evolved at the same time. Will generate one individual from each population before repeating a population. Not used in the paper ``Analysis of Cartesian Genetic Programming's Evolutionary Mechanisms.`` Parameters: - ``config``: A dictionary containing all of the configuration information required to generate multiple populations worth of individuals. Should include values for: - All configuration information required by ``generate``. - ``pop_size``: The number of parallel populations to use. - ``output``: Used to return information about evolution. Shared by all parallel populations. Will contain all information output by ``generate``. - ``frequencies``: Dictionary used to return information about how often individuals of different lengths are evolved. Shared by all parallel populations. Will contain all information output by ``generate``. ''' collective = itertools.izip(*[generate(config, output, frequencies) for _ in range(config['pop_size'])]) for next_iterations in collective: for next_iteration in next_iterations: yield next_iteration