人工智能之 KellyCoinflip

2024-01-06 09:12:10
import gym
env = gym.make(‘KellyCoinflip-v0‘)

from scipy.stats import binom
import numpy as np
from repoze.lru import lru_cache

def V(w, b, m=250):
    if w>=250:
      return 250
    if w<=0:
      return 0
    if b==0:
      return w
    else:
      try:
        j = binom.ppf(float(w)/float(m), b, 0.5)
        return 1.2**b * 1.5**-j * (w + m/2 *
            sum(np.multiply(binom.cdf(list(map(lambda x2 :x2-1, range(0,int(j+1)))),b,0.5),
                list(map(lambda x : 1.5**x, list(reversed(range(0, int(j+1)))))) )))
      except ValueError:
        print ("Error:", (w,b,m))

@lru_cache(None)
def VPplan(w, b):
    # optimization: short-circuit
    if w<=0 or w>=250:
      return 0
    else:
      if b==0:
        return w
      else:
          possibleBets = list(map(lambda pb : float(pb)/100.0, range(0*100,int((w*100)+1),1)))
          returns = list(map(lambda pb : 0.6*V(w+pb, b-1) + 0.4*V(w-pb,b-1), possibleBets))
          return float(returns.index(max(returns)))/100.0

# play 500 games and calculate mean reward:
rewards = []
for n in range(0,500):
    done = False
    reward = 0
    while not done:
        w = env._get_obs()[0][0]
        b = env._get_obs()[1]
        bet = VPplan(w, b)
        results = env.step(bet*100)
        print (n, w, b, bet, "results:", results)
        reward = reward+results[1]
        done = results[2]
    rewards.append(reward)
    env.reset()

print (sum(rewards)/len(rewards))
人工智能之 KellyCoinflip
码农公寓

相关文章