diff --git a/evolve_text.py b/evolve_text.py index e0202d2..eceb40a 100644 --- a/evolve_text.py +++ b/evolve_text.py @@ -9,6 +9,7 @@ Full instructions are at: https://sites.google.com/site/sd15spring/home/project-toolbox/evolutionary-algorithms +All non-starter code by Rebecca Gettys except where otherwise noted """ import random @@ -85,15 +86,32 @@ def __repr__(self): def get_text(self): """Return Message as string (rather than actual list of characters)""" - return "".join(self) + # return "".join(self) + return ''.join([str(x) for x in self]) # list comprehensionhttp://stackoverflow.com/questions/497765/python-string-joinlist-on-object-array-rather-than-string-array + #----------------------------------------------------------------------------- # Genetic operators #----------------------------------------------------------------------------- +memo = {} +def levenshtein_distance(a,b): + """ from: https://programmingpraxis.com/2014/09/12/levenshtein-distance/ + I had memoized fibonacci instead - my memoized levenshtein doesn't work, not sure why + """ + if a == b: + return 0 + if a == "": + return len(b) + if b == "": + return len(a) + if (a, b) not in memo: + l1 = levenshtein_distance(a[1:], b) + 1 + l2 = levenshtein_distance(a, b[1:]) + 1 + l3 = levenshtein_distance(a[1:], b[1:]) + (a[0] != b[0]) + memo[(a,b)] = min(l1, l2, l3) + return memo[(a,b)] -# TODO: Implement levenshtein_distance function (see Day 9 in-class exercises) -# HINT: Now would be a great time to implement memoization if you haven't def evaluate_text(message, goal_text, verbose=VERBOSE): """ @@ -119,17 +137,45 @@ def mutate_text(message, prob_ins=0.05, prob_del=0.05, prob_sub=0.05): Substitution: Replace one character of the Message with a random (legal) character """ + loc=random.randint(0, len(message)) + new_char = random.choice(VALID_CHARS) + new_message = [] if random.random() < prob_ins: - # TODO: Implement insertion-type mutation - pass + # done insertion-type mutation + message.insert(loc,new_char) + elif random.random() < prob_del: + #message = message[:loc] + message[loc+1:] + new_message.extend(message[:loc]) + new_message.extend(message[loc+1:]) + message = new_message + elif random.random() < prob_sub: + #message = message[:loc] + new_char + message[loc+1:] + new_message.extend(message[:loc]) + new_message.extend(new_char) + new_message.extend(message[loc+1:]) + message = new_message - # TODO: Also implement deletion and substitution mutations # HINT: Message objects inherit from list, so they also inherit # useful list methods # HINT: You probably want to use the VALID_CHARS global variable - return (message, ) # Length 1 tuple, required by DEAP + return (Message(message), ) # Length 1 tuple, required by DEAP + +def my_crossover_func(parent1, parent2): + "replaces the original 2 point crossover function" + point1 = random.randint (0,len(parent1)) + point2 = random.randint(0,len(parent1)) + if point1>point2: #if the sizes don't work do it over, crossing over with p1 after p2 makes for gross non-genetic + #behaviors, although technically correct + return my_crossover_func(parent1,parent2) + # see Jane run! See Becca recursively code! + else: + new_p1 = parent1[:point1] + parent2[point1:point2] + parent1[point2:] + new_p2 = parent2[:point1] + parent1[point1:point2] + parent2[point2:] + return (Message(new_p1),Message( new_p2)) + + #----------------------------------------------------------------------------- @@ -149,7 +195,8 @@ def get_toolbox(text): # Genetic operators toolbox.register("evaluate", evaluate_text, goal_text=text) - toolbox.register("mate", tools.cxTwoPoint) + #toolbox.register("mate", tools.cxTwoPoint) + toolbox.register("mate", my_crossover_func) toolbox.register("mutate", mutate_text) toolbox.register("select", tools.selTournament, tournsize=3) @@ -185,7 +232,7 @@ def evolve_string(text): toolbox, cxpb=0.5, # Prob. of crossover (mating) mutpb=0.2, # Probability of mutation - ngen=500, # Num. of generations to run + ngen=1000, # Num. of generations to run stats=stats) return pop, log diff --git a/results.txt b/results.txt new file mode 100644 index 0000000..5ce6576 --- /dev/null +++ b/results.txt @@ -0,0 +1,15 @@ +I compared my mating method with the one built into DAEP. I was expecting it to take more generations to arrive at the +correct string with distance = 0. After about 440ish generations using my crossover method, I arrived at the correct +string (THIS IS MY CRAZY STRING). In contrast, when I re-enabled the built in DAEP mating function, at 440 generations +the distance was consistently around 4. After 700 generations, the distance was around 2; I next tried 1000 generations +and discovered that finally, it had reached a distance of one. At that point, I stopped testing. In short, to my great +surprise, my double point crossover metehod resulted in a much faster result! I don't really have a good hypothesis as +to why this is, I looked through a great deal of the output without finding a consistent pattern to explain it. Anyways, +a neat discovery! I didn't include the data that this spit out because it isn't too interesting to read, it is +*extremely* long and you can produce it yourself. + +I wrote everything for this project myself (obviously, not the starter code) except for the memoized levenshtein + distance, which I pulled from the net - there was something wrong with my memoized levenshtein distance originally, I + did a memoized fibonacci instead, and I didn't think I would get a lot of learning out of re-doing it/debugging it + compared to working on other toolboxes. The memoized levenshtein distance I used can be found here. Thanks! + https://programmingpraxis.com/2014/09/12/levenshtein-distance/