Sorted Numbers
我们需要产生一定长度的有序数列,利用GA来求解。
1. engine
其中 genetic.py 的完整代码为:
import random
import statistics
import sys
import time
def _generate_parent(length, geneSet, get_fitness):
genes = []
while len(genes) < length:
sampleSize = min(length - len(genes), len(geneSet))
genes.extend(random.sample(geneSet, sampleSize))
fitness = get_fitness(genes)
return Chromosome(genes, fitness)
def _mutate(parent, geneSet, get_fitness):
childGenes = parent.Genes[:]
index = random.randrange(0, len(parent.Genes))
newGene, alternate = random.sample(geneSet, 2)
childGenes[index] = alternate if newGene == childGenes[index] else newGene
fitness = get_fitness(childGenes)
return Chromosome(childGenes, fitness)
def get_best(get_fitness, targetLen, optimalFitness, geneSet, display):
random.seed()
bestParent = _generate_parent(targetLen, geneSet, get_fitness)
display(bestParent)
if bestParent.Fitness >= optimalFitness:
return bestParent
while True:
child = _mutate(bestParent, geneSet, get_fitness)
if bestParent.Fitness >= child.Fitness:
continue
display(child)
if child.Fitness >= optimalFitness:
return child
bestParent = child
class Chromosome:
def __init__(self, genes, fitness):
self.Genes = genes
self.Fitness = fitness
class Benchmark:
@staticmethod
def run(function):
timings = []
stdout = sys.stdout
for i in range(100):
sys.stdout = None
startTime = time.time()
function()
seconds = time.time() - startTime
sys.stdout = stdout
timings.append(seconds)
mean = statistics.mean(timings)
if i < 10 or i % 10 == 9:
print("{} {:3.2f} {:3.2f}".format(
1 + i, mean,
statistics.stdev(timings, mean) if i > 1 else 0))
下面对 genetic.py 中的各个部分进行叙述:
def _generate_parent(length, geneSet, get_fitness):
genes = []
while len(genes) < length:
sampleSize = min(length - len(genes), len(geneSet))
genes.extend(random.sample(geneSet, sampleSize))
fitness = get_fitness(genes)
return Chromosome(genes, fitness)
函数 _generate_parent 的功能是产生父代染色体,这里用于在给定范围geneSet内产生长度为length的数列,数列中的数是随机选取的。
def _mutate(parent, geneSet, get_fitness):
childGenes = parent.Genes[:]
index = random.randrange(0, len(parent.Genes))
newGene, alternate = random.sample(geneSet, 2)
childGenes[index] = alternate if newGene == childGenes[index] else newGene
fitness = get_fitness(childGenes)
return Chromosome(childGenes, fitness)
函数 _mutate 的功能是对给定的parent染色体进行突变,并计算新的适应值,以产生新的染色体。
def get_best(get_fitness, targetLen, optimalFitness, geneSet, display):
random.seed()
bestParent = _generate_parent(targetLen, geneSet, get_fitness)
display(bestParent)
if bestParent.Fitness >= optimalFitness:
return bestParent
while True:
child = _mutate(bestParent, geneSet, get_fitness)
if bestParent.Fitness >= child.Fitness:
continue
display(child)
if child.Fitness >= optimalFitness:
return child
bestParent = child
函数 get_best 的功能是调用函数 _generate_parent 产生初代染色体,若初代染色体为最优则返回初代染色体;否则进入一个循环,循环内包括:对父代染色体突变,去适应值更优的,在重复循环直到产生最优解。
class Benchmark:
@staticmethod
def run(function):
timings = []
stdout = sys.stdout
for i in range(100):
sys.stdout = None
startTime = time.time()
function()
seconds = time.time() - startTime
sys.stdout = stdout
timings.append(seconds)
mean = statistics.mean(timings)
if i < 10 or i % 10 == 9:
print("{} {:3.2f} {:3.2f}".format(
1 + i, mean,
statistics.stdev(timings, mean) if i > 1 else 0))
类 Benchmark 主要负责对函数function执行100次,输出到目前为止的单次执行的均值和方差。为了减少输出,这里只输出了前十次和后面的每个整十次。
2. sortedNumbersTests.py
首先 sortedNumbersTests.py 的完整代码如下:
import unittest
import datetime
import genetic
class Fitness:
def __init__(self, numbersInSequenceCount, totalGap):
self.NumbersInSequenceCount = numbersInSequenceCount
self.TotalGap = totalGap
def __gt__(self, other):
if self.NumbersInSequenceCount != other.NumbersInSequenceCount:
return self.NumbersInSequenceCount > other.NumbersInSequenceCount
return self.TotalGap < other.TotalGap
def __str__(self):
return "{} Sequential, {} Total Gap".format(
self.NumbersInSequenceCount,
self.TotalGap)
def get_fitness(genes):
fitness = 1
gap = 0
for i in range(1, len(genes)):
if genes[i] > genes[i - 1]:
fitness += 1
else:
gap += genes[i-1] -genes[i]
return Fitness(fitness, gap)
def display(candidate, startTime):
timeDiff = datetime.datetime.now() - startTime
print("{}\t=> {}\t{}".format(
', '.join(map(str, candidate.Genes)),
candidate.Fitness,
timeDiff
))
class SortedNumbersTests(unittest.TestCase):
def test_sort_10_numbers(self):
self.sort_numbers(10)
def test_benchmark(self):
genetic.Benchmark.run(lambda :self.sort_numbers(40))
def sort_numbers(self, totalNumbers):
geneset = [i for i in range(100)]
startTime = datetime.datetime.now()
def fnDisplay(candidate):
display(candidate, startTime)
def fnGetFitness(genes):
return get_fitness(genes)
optimalFitness = Fitness(totalNumbers, 0)
best = genetic.get_best(fnGetFitness, totalNumbers,
optimalFitness, geneset, fnDisplay)
self.assertTrue(not optimalFitness > best.Fitness)
现在对上面代码的各个部分进行讲解。
class Fitness:
def __init__(self, numbersInSequenceCount, totalGap):
self.NumbersInSequenceCount = numbersInSequenceCount
self.TotalGap = totalGap
def __gt__(self, other):
if self.NumbersInSequenceCount != other.NumbersInSequenceCount:
return self.NumbersInSequenceCount > other.NumbersInSequenceCount
return self.TotalGap < other.TotalGap
def __str__(self):
return "{} Sequential, {} Total Gap".format(
self.NumbersInSequenceCount,
self.TotalGap)
首先,声明一个适应值的类,类中包含升序数,和逆序差两个变量。为什么设置这两个变量?因为本次计算的目标是生成一个升序的数列,那么数列中升序的个数越多越好。
举个栗子:
1,2,3,6,5,4 升序数为3
1,2,3,4,5,6 升序数为6
因此后者更好
那么相同升序数的两个数列是不是真的一样好呢?当然不是。因此就引入了逆序差这样的变量。
举个例子:
1,2,10,5,15,16 逆序差为5
1,2,7,5,15,16 逆序差为2
因此后者更好
__gt__是一个内置的大于比较器的查找的函数名称,他表示当一个染色体A的升序数大于另一个染色体B或者当两个染色体A、B升序值相等时,A的逆序差小于B的逆序差时,染色体A的适应值就大于B。
def get_fitness(genes):
fitness = 1
gap = 0
for i in range(1, len(genes)):
if genes[i] > genes[i - 1]:
fitness += 1
else:
gap += genes[i-1] -genes[i]
return Fitness(fitness, gap)
函数get_fitness计算染色体的基因也就是数列的适应值,即升序数和逆序差。
class SortedNumbersTests(unittest.TestCase):
def test_sort_10_numbers(self):
self.sort_numbers(10)
def test_benchmark(self):
genetic.Benchmark.run(lambda :self.sort_numbers(40))
def sort_numbers(self, totalNumbers):
geneset = [i for i in range(100)]
startTime = datetime.datetime.now()
def fnDisplay(candidate):
display(candidate, startTime)
def fnGetFitness(genes):
return get_fitness(genes)
optimalFitness = Fitness(totalNumbers, 0)
best = genetic.get_best(fnGetFitness, totalNumbers,
optimalFitness, geneset, fnDisplay)
self.assertTrue(not optimalFitness > best.Fitness)
由于这里使用了 unittest.TestCase ,因此会执行其中使用test开头的函数。其中,test_sort_10_numbers函数主要是产生一个长度为10的升序数列。而test_benchmark表示的是产生长度为40的升序数列,共执行100次。
sort_numbers调用遗传算法。