Source code for evolution

'''
Handles how to perform all of the actual evolution.
'''
import random
import sys
from copy import copy
from util import diff_count, bitcount
import itertools
from collections import defaultdict
import problems


[docs]class Individual(object):
    '''
    An individual object used to combine gene fitness with genomes, as
    well methods for manipulating those genomes.
    '''

    def __init__(self, graph_length, input_length, output_length,
                  max_arity, function_list, **_):
        '''
        Create a new individual instance.

        Parameters:

        - ``graph_length``: The number of nodes in the CGP encoded graph.
        - ``input_length``: The number of input variables.
        - ``output_length``: The number of output variables.
        - ``max_arity``: The maximum arity used by any function.
        - ``function_list``: The list of functions a node can use.
        '''
        self.node_step = max_arity + 1
        self.input_length = input_length
        self.graph_length = graph_length
        self.function_list = function_list
        self.output_length = output_length
        self.genes = None
        self.genes = [self.random_gene(index) for index in
                      range(graph_length * self.node_step + output_length)]
        self.determine_active_nodes()
        # Block of memory used when evaluating an individual
        self.scratch = [None] * (graph_length + self.input_length)
        # Records the output for each node.  NOTE: semantics are only
        # updated when a node is active
        self.semantics = [0] * (graph_length + self.input_length)
        # Records with indices have ever been active
        self.never_active = [True] * graph_length
        self.input_counter = itertools.count(0)
        self.input_order = {}
        self.fitness = -sys.maxint

[docs]    def random_gene(self, index, invalid=None):
        '''
        Determines a random gene value given a gene index.  If optional
        ``invalid`` option is used, the returned value will only be ``invalid``
        if the gene has no other valid values.

        Parameters:

        - ``index``: The gene index who's value is being set.
        - ``invalid``: Value to avoid returning if possible
        '''
        node_number = index // self.node_step
        gene_number = index % self.node_step
        # If the gene is used to specify output locations
        if node_number >= self.graph_length:
            node_number = self.graph_length
            gene_number = -1
        # If the gene controls the function of a node
        if gene_number == 0:
            if len(self.function_list) == 1:
                return self.function_list[0]
            while True:
                choice = random.choice(self.function_list)
                if choice != invalid:
                    return choice
        # If the gene controls a connection / output location
        else:
            if node_number + self.input_length == 1:
                return -1
            while True:
                choice = random.randrange(-self.input_length, node_number)
                if choice != invalid:
                    return choice

[docs]    def dag_random_gene(self, index, invalid=None):
        '''
        Determines a random gene value given a gene index of a full DAG.
        If optional ``invalid`` option is used, the returned value
        will only be ``invalid`` if the gene has no other valid values.

        Parameters:

        - ``index``: The gene index who's value is being set.
        - ``invalid``: Value to avoid returning if possible
        '''
        node_number = index // self.node_step
        gene_number = index % self.node_step
        if node_number >= self.graph_length:
            node_number = self.graph_length
            gene_number = -1

        # If it is a function gene
        if gene_number == 0:
            if len(self.function_list) == 1:
                return self.function_list[0]
            while True:
                choice = random.choice(self.function_list)
                if choice != invalid:
                    return choice
        # If you are dealing with output locations or individual initialization
        elif gene_number < 0 or not self.genes:
            if node_number + self.input_length == 1:
                return -1
            while True:
                choice = random.randrange(-self.input_length, node_number)
                if choice != invalid:
                    return choice
        # If you are resetting a connection link on an existing individual
        else:
            return self.valid_reconnect(node_number, invalid)

[docs]    def valid_reconnect(self, node_index, invalid=None):
        '''
        When using a DAG individual, find a random connection location that
        does not depend on the current node.

        Parameters:

        - ``node_index``: The index of the node who's connection is being reset
        - ``invalid``: Value to avoid returning if possible
        '''
        # Nodes always depend on themselves and inputs never depend on nodes
        dependent = {node_index: True, invalid: False}
        # Current inputs are not dependent on the mutating node
        for conn in self.connections(node_index):
            dependent[conn] = False
        for index in range(-self.input_length, 0):
            dependent[index] = False

        def is_dependent(current):
            '''
            Internal recursive function to determine if a node index
            is dependent on ``node_index``.  Also updates the dependency
            dictionary.

            Parameters:

            - ``current``: The current working node index to be checked for
              dependency.
            '''
            if current in dependent:
                return dependent[current]
            for conn in self.connections(current):
                if is_dependent(conn):
                    dependent[current] = True
                    return True
            dependent[current] = False
            return False
        # Create the list of all possible connections
        options = range(-self.input_length, self.graph_length)
        for index in range(len(options)):
            # Choose a random untried option and swap it to the next index
            swapdown = random.randrange(index, len(options))
            options[index], options[swapdown] = (options[swapdown],
                                                 options[index])
            option = options[index]
            # Test this option
            if option != invalid and not is_dependent(option):
                return option
        return invalid

[docs]    def new(self, modification_method, *args, **kwargs):
        '''
        Return a copy of the individual.  Note that individuals are shallow
        copied except for their list of genes.
        '''
        # WARNING individuals are shallow copied except for things added here
        new = copy(self)
        new.genes = list(self.genes)
        new.semantics = list(self.semantics)
        new.never_active = list(self.never_active)
        modification_method(new, *args, **kwargs)
        new.determine_active_nodes()
        return new

[docs]    def connections(self, node_index):
        '''
        Return the list of connections that a specified node has.

        Parameters

        - ``node_index``: The index of the node information is being requested
          for.  Note this is different from gene index.
        '''
        node_start = self.node_step * node_index
        return self.genes[node_start + 1: node_start + self.node_step]

[docs]    def determine_active_nodes(self):
        '''
        Determines which nodes are currently active and sets self.active
        to the sorted list of active genes.  Automatically called by gene
        manipulating member functions.
        '''
        self.active = set(self.genes[-self.output_length:])

        for node_index in reversed(range(self.graph_length)):
            if node_index in self.active:
                # add all of the connection genes for this node
                self.active.update(self.connections(node_index))
        self.active = sorted([acting for acting in self.active if acting >= 0])

[docs]    def dag_determine_active_nodes(self):
        '''
        Determines which nodes are currently active and sets self.active
        to the sorted list of active genes in DAG individuals.
        Automatically called by gene manipulating member functions.
        '''
        depends_on = defaultdict(set)
        feeds_to = defaultdict(set)
        # The output locations start as 'connected'
        connected = self.genes[-self.output_length:]
        added = set(connected)
        # Build a bi-directional dependency tree
        while connected:
            working = connected.pop()
            # Ignore input locations
            if working < 0:
                continue
            for conn in self.connections(working):
                # Record that 'working' takes input from 'conn'
                depends_on[working].add(conn)
                # Record that 'conn' sends its output to 'working'
                feeds_to[conn].add(working)
                if conn not in added:
                    connected.append(conn)
                added.add(conn)
        # find the order in which to evaluate them
        self.active = []
        # All input locations start out addable
        addable = [x for x in range(-self.input_length, 0)]

        while addable:
            working = addable.pop()
            # Find everything that depends on 'working' for input
            for conn in feeds_to[working]:
                # Record that 'conn' is no longer waiting on 'working'
                depends_on[conn].remove(working)
                if len(depends_on[conn]) == 0:
                    addable.append(conn)
                    self.active.append(conn)

[docs]    def all_active(self):
        '''
        Function that always makes all nodes in the genome active.  Useful
        when the fitness function analyzes nodes directly when combined with
        Single mutation.
        '''
        self.active = range(self.graph_length)

[docs]    def evaluate(self, inputs):
        '''
        Given a list of inputs, return a list of outputs from executing
        this individual.

        Parameters:

        - ``inputs``: The list of input values for the individual to process.
        '''
        # Start by loading the input values into scratch
        # NOTE: Input locations are given as negative values
        self.scratch[-len(inputs):] = inputs[::-1]
        try:
            input_number = self.input_order[inputs]
        except KeyError:
            input_number = next(self.input_counter)
            self.input_order[inputs] = input_number
            # Bit mask used to turn on specific bits in the semantics
            on = 1 << input_number
            for index in range(-len(inputs), 0):
                if self.scratch[index]:
                    self.semantics[index] |= on
        on = 1 << input_number
        # Loop through the active genes in order
        for node_index in self.active:
            function = self.genes[node_index * self.node_step]
            args = [self.scratch[con] for con in self.connections(node_index)]
            # Apply the function to the inputs from scratch, saving results
            # back to the scratch
            result = function(*args)
            self.scratch[node_index] = result
            # Records what the output was for this node on this input
            if result:
                self.semantics[node_index] |= on
            else:
                self.semantics[node_index] &= ~on
            self.never_active[node_index] = False
        # Extract outputs from the scratch space
        return [self.scratch[output]
                for output in self.genes[-self.output_length:]]

[docs]    def mutate(self, mutation_rate):
        '''
        Mutates the calling individual's genes using the give mutation rate.

        Parameters:

        - ``mutation_rate``: The probability that a specific gene will mutate.
        '''
        for index in range(len(self.genes)):
            if random.random() < mutation_rate:
                self.genes[index] = self.random_gene(index, self.genes[index])

[docs]    def one_active_mutation(self, _):
        '''
        Mutates the calling individual using the ``Single`` mutation method.
        '''
        while True:
            # Choose an index at random
            index = random.randrange(len(self.genes))
            # Get a new value for that gene
            newval = self.random_gene(index, self.genes[index])
            # If that value is different than the current value
            if newval != self.genes[index]:
                self.genes[index] = newval
                # Determine if that gene was part of an active node
                node_number = index // self.node_step
                if (node_number >= self.graph_length or
                    node_number in self.active):
                    break

[docs]    def reorder(self):
        '''
        Reorder individual's genes randomly without
        changing any of the actual connection information.
        '''
        # Build a list of dependencies
        depends_on = defaultdict(set)
        feeds_to = defaultdict(set)
        for node_index in range(self.graph_length):
            for conn in self.connections(node_index):
                # Record that 'node_index' takes input from 'conn'
                depends_on[node_index].add(conn)
                # Record that 'conn' sends its output to 'node_index'
                feeds_to[conn].add(node_index)
        # Create a dictionary storing how to translate location information
        new_order = {i: i for i in range(-self.input_length, 0)}
        # Input locations start as addable
        addable = new_order.keys()
        counter = 0
        while addable:
            # Choose a node at random who's dependencies have already been met
            working = random.choice(addable)
            addable.remove(working)
            # If 'working' is not an input location
            if working >= 0:
                # Assign this node to the next available index
                new_order[working] = counter
                counter += 1
            # Update all dependencies now that this node has been added
            for to_add in feeds_to[working]:
                # Mark 'to_add' as having its requirement on 'working' complete
                depends_on[to_add].remove(working)
                if len(depends_on[to_add]) == 0:
                    addable.append(to_add)

        # Create the new individual using the new ordering
        old_genes = copy(self.genes)
        old_semantics = copy(self.semantics)
        old_n_a = copy(self.never_active)
        for node_index in range(self.graph_length):
            # Find the new starting location in the self for this node
            new_start = new_order[node_index] * self.node_step
            old_start = node_index * self.node_step
            new_end = new_start + self.node_step
            old_end = old_start + self.node_step
            # Move over the function gene
            self.genes[new_start] = old_genes[old_start]
            # Translate connection genes to have new order information
            connections = [new_order[conn]
                           for conn in old_genes[old_start + 1:old_end]]
            # Move over the connection genes
            self.genes[new_start + 1:new_end] = connections
            self.semantics[new_order[node_index]] = old_semantics[node_index]
            self.never_active[new_order[node_index]] = old_n_a[node_index]
        length = len(self.genes)
        # Update the output locations
        for index in range(length - self.output_length, length):
            self.genes[index] = new_order[old_genes[index]]
        self.determine_active_nodes()

[docs]    def simplify(self):
        '''
        Using the semantic information recorded for each node, this
        will modify the individual to ensure only 1 node produces each
        unique semantic.
        '''
        lookup = {}
        # Keeps track of 1 index for each unique semantic, favoring
        # nodes near the start of the active list
        for node_index in reversed(self.active):
            lookup[self.semantics[node_index]] = node_index
        # All input locations take priority for producing their semantics.
        for i in range(-self.input_length, 0):
            lookup[self.semantics[i]] = i
        # Replace all connections to use saved index to produce the required semantic.
        for index in range(len(self.genes)):
            if isinstance(self.genes[index], int):
                try:
                    semantic = self.semantics[self.genes[index]]
                    self.genes[index] = lookup[semantic]
                except KeyError:
                    pass
        self.determine_active_nodes()

[docs]    def asym_phenotypic_difference(self, other):
        '''
        Determine how many genes would have to be mutated in order to make
        the ``other`` individual phenotypically identical to ``self``.

        Parameters:

        - ``other``: The individual to compare with.
        '''
        # Count the differences in the output locations
        count = diff_count(self.genes[-self.output_length:],
                           other.genes[-self.output_length:])
        # For each active node
        for node_index in self.active:
            index = node_index * self.node_step
            # Count the number of different connection genes
            count += diff_count(self.connections(node_index),
                                other.connections(node_index))
            # Include differences in the function gene
            count += (self.genes[index] !=
                      other.genes[index])
        return count

[docs]    def show_active(self):
        '''
        Prints the active portions of the individual in a somewhat readable
        way.
        '''
        for node_index in self.active:
            node_start = self.node_step * node_index
            print node_index, self.genes[node_start],
            print self.connections(node_index), self.semantics[node_index]
        print self.genes[-self.output_length:]

    def __lt__(self, other):
        '''
        Returns the result of self.fitness < other.fitness.
        '''
        return self.fitness < other.fitness

    def __le__(self, other):
        '''
        Returns the result of self.fitness <= other.fitness.
        '''
        return self.fitness <= other.fitness

[docs]    def dump_genes(self):
        '''
        Returns a file read/writable representation of the individuals genes.
        '''
        return [g if isinstance(g, int) else g.__name__
                for g in self.genes]

[docs]    def dump(self):
        '''
        Returns a json ready dictionary representation of the entire individual
        '''
        genes = self.dump_genes()
        never_active = ''.join(str(int(x)) for x in self.never_active)
        return {'genes': genes,
                'fitness': self.fitness,
                'never_active': never_active,
                'graph_length': self.graph_length,
                'max_arity': self.node_step - 1,
                'output_length': self.output_length,
                'input_length': self.input_length}

[docs]    def load(self, data):
        '''
        Recovers a "dump"ed individual from the dictionary,
        overwriting the calling individual.
        '''
        self.__dict__.update(data)
        self.never_active = [x == '1' for x in data['never_active']]
        self.genes = [g if isinstance(g, int) else problems.__dict__[g]
                      for g in self.genes]

    @staticmethod
[docs]    def reconstruct_individual(data, test_inputs):
        '''
        Recovers an individual's connections and never active node listing.
        Used when post processing individuals from previous runs.
        '''
        copy = dict(data)
        copy['function_list'] = [None]
        individual = Individual(**copy)
        individual.load(data)
        # Ensure it is serial
        for node_index in range(individual.graph_length):
            for conn in individual.connections(node_index):
                if conn > node_index:
                    individual.reorder()
                    break
        # Store a copy of the never active genes after reorder
        never_active = list(individual.never_active)
        # Set semantics values
        individual.active = range(individual.graph_length)
        for inputs in test_inputs:
            individual.evaluate(tuple(inputs))
        individual.determine_active_nodes()
        individual.never_active = never_active

        return individual


[docs]def generate(config, output, frequencies):
    '''
    An ``Individual`` generator that will yield a never ending supply of
    ``Individual`` objects that need to have their fitness set before the
    next ``Individual`` can be yielded.

    Parameters:

    - ``config``: A dictionary containing all configuration information
      required to generate initial individuals.  Should include values
      for:

      - All configuration information required to initialize an Individual.
      - ``dag``: If DAG based individuals should be used.
      - ``reorder``: If the parent should be reordered before making offspring.
      - ``mutation_rate``: The probably to use for mutation.
      - ``off_size``: The number of offspring to produce per generation.
      - ``output_length``: The number of output variables.
      - ``max_arity``: The maximum arity used by any function.
      - ``duplicate``: String specifying the way to handle duplicate
        individual creation, either ``normal'', ``skip'', ``accumulate``, or
        ``single``.
      - ``problem``: The problem these individuals are solving.  Used on in
        the case where problems require unusual individual modification.
    - ``output``: Dictionary used to return information about evolution, will
      send out:

      - ``skipped``: The number of evaluations skipped by ``Skip``.
      - ``estimated``: The estimated number of evaluations that are skippable.
      - ``inactive_bits_changed``: Keeps track of nodes that were active, became
        inactive, and have become active again, looking at how many bits in their
        semantics have changed.  Is a dictionary of frequencies mapping change -> count.
      - ``reactiveated_nodes``: Counts the number of nodes that were active,
        became inactive, and have become active again.
      - ``active_nodes_changed``: Counts how many nodes that were active before and
        after mutation had at least one bit in their semantics change.  Is a dictionary
        mapping number changed in a single mutation -> count.
      - ``active_bits_changed``: Counts how many bits were changed when an active
        node was changed by mutation but remained active.
      - ``child_replaced_parent``: Counts how often the parent is replaced by the offspring.
      - ``parent_not_replaced``: Counts how often no offspring in a generation replaced the parent.
    - ``frequencies``:  Dictionary used to return information about how often
      individuals of different lengths are evolved.
    '''
    output['skipped'] = 0
    output['estimated'] = 0
    output['inactive_bits_changed'] = defaultdict(int)
    output['reactivated_nodes'] = defaultdict(int)
    output['active_nodes_changed'] = defaultdict(int)
    output['active_bits_changed'] = defaultdict(int)
    output['child_replaced_parent'] = 0
    output['parent_not_replaced'] = 0
    if config['ordering'] == 'dag':
        # Override base functions with dag versions
        Individual.determine_active_nodes = \
        Individual.dag_determine_active_nodes
        Individual.random_gene = \
        Individual.dag_random_gene
    if config['duplicate'] == 'single':
        # Override normal mutation with Single
        Individual.mutate = Individual.one_active_mutation
    if config['problem'] == 'Flat':
        # Override normal method for determining active genes
        Individual.determine_active_nodes = Individual.all_active
    parent = Individual(**config)
    # Evaluate initial individual
    yield parent
    while True:
        if config['ordering'] == 'reorder':
            # Reorder the parent
            parent.reorder()
        # Create mutant offspring
        mutants = [parent.new(Individual.mutate, config['mutation_rate'])
                   for _ in range(config['off_size'])]
        # Determine how many active genes the parent has
        active = config['output_length'] + (len(parent.active) *
                                            (config['max_arity'] + 1))
        for index, mutant in enumerate(mutants):
            # Estimates the probability none of the active genes were changed.
            output['estimated'] += (1 - config['mutation_rate']) ** active
            prev = mutant
            if config['duplicate'] not in ['normal', 'single']:
                change = parent.asym_phenotypic_difference(mutant)
                if change == 0:
                    output['skipped'] += 1
                    if config['duplicate'] == 'skip':
                        continue
                    if config['duplicate'] == 'accumulate':
                        while change == 0:
                            # As long as there have been no changes,
                            # keep mutating
                            prev = mutant
                            mutant = prev.new(Individual.mutate,
                                              config['mutation_rate'])
                            change = parent.asym_phenotypic_difference(mutant)
            if 'frequency_results' in config:
                # Records the length of the generated individual
                frequencies[len(mutant.active)] += 1
            # Send the offspring out to be evaluated
            yield mutant
            if config['duplicate'] == 'accumulate':
                # If the mutant is strickly worse, use the last equivalent
                mutants[index] = prev if mutant < parent else mutant
        best_child = max(mutants)
        if parent <= best_child:
            parent_active = set(parent.active)
            re_active = 0
            active_changed = 0
            for newly_active in best_child.active:
                if parent.never_active[newly_active]:
                    # You can't get change information if its never
                    # been evaluated before.
                    continue
                # Determine how the semantics have changed
                prev = parent.semantics[newly_active]
                now = best_child.semantics[newly_active]
                change = bitcount(now ^ prev)
                if newly_active in parent_active:
                    if change > 0:
                        active_changed += 1
                        output['active_bits_changed'][change] += 1
                else:
                    # Was inactive in parent
                    re_active += 1
                    output['inactive_bits_changed'][change] += 1
            output['reactivated_nodes'][re_active] += 1
            output['active_nodes_changed'][active_changed] += 1
            output['child_replaced_parent'] += 1
            # Replace the parent with the child
            parent = best_child
        else:
            output['parent_not_replaced'] += 1


[docs]def multi_indepenedent(config, output, frequencies):
    '''
    Allows for multiple parallel independent populations to be evolved
    at the same time.  Will generate one individual from each population
    before repeating a population.  Not used in the paper ``Analysis of
    Cartesian Genetic Programming's Evolutionary Mechanisms.``

    Parameters:

    - ``config``: A dictionary containing all of the configuration information
      required to generate multiple populations worth of individuals.  Should
      include values for:

      - All configuration information required by ``generate``.
      - ``pop_size``: The number of parallel populations to use.
    - ``output``:  Used to return information about evolution.  Shared by all
      parallel populations.  Will contain all information output by
      ``generate``.
    - ``frequencies``:  Dictionary used to return information about how often
      individuals of different lengths are evolved.  Shared by all parallel
      populations.  Will contain all information output by ``generate``.
    '''
    collective = itertools.izip(*[generate(config, output, frequencies)
                                  for _ in range(config['pop_size'])])
    for next_iterations in collective:
        for next_iteration in next_iterations:
            yield next_iteration
Navigation

Source code for evolution

Quick search

Navigation