Skip to content

Commit 47d5322

Browse files
MorvanZhouMorvan Zhou
authored andcommitted
update
1 parent 39ec58c commit 47d5322

File tree

2 files changed

+126
-2
lines changed

2 files changed

+126
-2
lines changed

tutorial-contents/DQN.py

Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
"""
2+
Know more, visit 莫烦Python: https://morvanzhou.github.io/tutorials/
3+
My Youtube Channel: https://www.youtube.com/user/MorvanZhou
4+
More about Reinforcement learning: https://morvanzhou.github.io/tutorials/machine-learning/reinforcement-learning/
5+
6+
Dependencies:
7+
tensorflow: 1.1.0
8+
matplotlib
9+
numpy
10+
"""
11+
import tensorflow as tf
12+
import numpy as np
13+
import gym
14+
15+
tf.set_random_seed(1)
16+
np.random.seed(1)
17+
18+
# Hyper Parameters
19+
BATCH_SIZE = 32
20+
LR = 0.01 # learning rate
21+
EPSILON = 0.9 # greedy policy
22+
GAMMA = 0.9 # reward discount
23+
TARGET_REPLACE_ITER = 100 # target update frequency
24+
MEMORY_CAPACITY = 2000
25+
MEMORY_COUNTER = 0 # for store experience
26+
LEARNING_STEP_COUNTER = 0 # for target updating
27+
env = gym.make('CartPole-v0')
28+
env = env.unwrapped
29+
N_ACTIONS = env.action_space.n
30+
N_STATES = env.observation_space.shape[0]
31+
MEMORY = np.zeros((MEMORY_CAPACITY, N_STATES * 2 + 2)) # initialize memory
32+
33+
# tf placeholders
34+
tf_s = tf.placeholder(tf.float32, [None, N_STATES])
35+
tf_a = tf.placeholder(tf.int32, [None, ])
36+
tf_r = tf.placeholder(tf.float32, [None, ])
37+
tf_s_ = tf.placeholder(tf.float32, [None, N_STATES])
38+
39+
with tf.variable_scope('q'): # evaluation network
40+
l_eval = tf.layers.dense(tf_s, 10, tf.nn.relu, kernel_initializer=tf.random_normal_initializer(0, 0.1))
41+
q = tf.layers.dense(l_eval, N_ACTIONS, kernel_initializer=tf.random_normal_initializer(0, 0.1))
42+
43+
with tf.variable_scope('q_next'): # target network
44+
l_target = tf.layers.dense(tf_s_, 10, tf.nn.relu)
45+
q_next = tf.layers.dense(l_target, N_ACTIONS)
46+
47+
q_target = tf.stop_gradient(tf_r + GAMMA * tf.reduce_max(q_next, axis=1)) # shape=(None, ), not need any gradient
48+
a_one_hot = tf.one_hot(tf_a, depth=N_ACTIONS, dtype=tf.float32)
49+
q_wrt_a = tf.reduce_sum(q * a_one_hot, axis=1) # shape=(None, ), q for current state
50+
51+
loss = tf.reduce_mean(tf.squared_difference(q_target, q_wrt_a))
52+
train_op = tf.train.AdamOptimizer(LR).minimize(loss)
53+
54+
sess = tf.Session()
55+
sess.run(tf.global_variables_initializer())
56+
57+
58+
def choose_action(s):
59+
s = s[np.newaxis, :]
60+
if np.random.uniform() < EPSILON:
61+
# forward feed the observation and get q value for every actions
62+
actions_value = sess.run(q, feed_dict={tf_s: s})
63+
action = np.argmax(actions_value)
64+
else:
65+
action = np.random.randint(0, N_ACTIONS)
66+
return action
67+
68+
69+
def store_transition(s, a, r, s_):
70+
global MEMORY_COUNTER
71+
transition = np.hstack((s, [a, r], s_))
72+
# replace the old memory with new memory
73+
index = MEMORY_COUNTER % MEMORY_CAPACITY
74+
MEMORY[index, :] = transition
75+
MEMORY_COUNTER += 1
76+
77+
78+
def learn():
79+
# update target net
80+
global LEARNING_STEP_COUNTER
81+
if LEARNING_STEP_COUNTER % TARGET_REPLACE_ITER == 0:
82+
t_params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='q_next')
83+
e_params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='q')
84+
sess.run([tf.assign(t, e) for t, e in zip(t_params, e_params)])
85+
LEARNING_STEP_COUNTER += 1
86+
87+
# learning
88+
sample_index = np.random.choice(MEMORY_CAPACITY, BATCH_SIZE)
89+
b_memory = MEMORY[sample_index, :]
90+
b_s = b_memory[:, :N_STATES]
91+
b_a = b_memory[:, N_STATES].astype(int)
92+
b_r = b_memory[:, N_STATES+1]
93+
b_s_ = b_memory[:, -N_STATES:]
94+
sess.run(train_op, {tf_s: b_s, tf_a: b_a, tf_r: b_r, tf_s_: b_s_})
95+
96+
print('\nCollecting experience...')
97+
for i_episode in range(400):
98+
s = env.reset()
99+
ep_r = 0
100+
while True:
101+
env.render()
102+
a = choose_action(s)
103+
104+
# take action
105+
s_, r, done, info = env.step(a)
106+
107+
# modify the reward
108+
x, x_dot, theta, theta_dot = s_
109+
r1 = (env.x_threshold - abs(x)) / env.x_threshold - 0.8
110+
r2 = (env.theta_threshold_radians - abs(theta)) / env.theta_threshold_radians - 0.5
111+
r = r1 + r2
112+
113+
store_transition(s, a, r, s_)
114+
115+
ep_r += r
116+
if MEMORY_COUNTER > MEMORY_CAPACITY:
117+
learn()
118+
if done:
119+
print('Ep: ', i_episode,
120+
'| Ep_r: ', round(ep_r, 2))
121+
122+
if done:
123+
break
124+
s = s_

tutorial-contents/GAN.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,10 +43,10 @@ def artist_works(): # painting from the famous artist (real target)
4343
with tf.variable_scope('Discriminator'):
4444
real_art = tf.placeholder(tf.float32, [None, ART_COMPONENTS], name='real_in') # receive art work from the famous artist
4545
D_l0 = tf.layers.dense(real_art, 128, tf.nn.relu, name='l')
46-
prob_artist0 = tf.layers.dense(D_l0, 1, tf.nn.sigmoid, name='out') # tell the probability that the art work is made by artist
46+
prob_artist0 = tf.layers.dense(D_l0, 1, tf.nn.sigmoid, name='out') # probability that the art work is made by artist
4747
# reuse layers for generator
4848
D_l1 = tf.layers.dense(G_out, 128, tf.nn.relu, name='l', reuse=True) # receive art work from a newbie like G
49-
prob_artist1 = tf.layers.dense(D_l1, 1, tf.nn.sigmoid, name='out', reuse=True) # tell the probability that the art work is made by artist
49+
prob_artist1 = tf.layers.dense(D_l1, 1, tf.nn.sigmoid, name='out', reuse=True) # probability that the art work is made by artist
5050

5151
D_loss = -tf.reduce_mean(tf.log(prob_artist0) + tf.log(1-prob_artist1))
5252
G_loss = tf.reduce_mean(tf.log(1-prob_artist1))

0 commit comments

Comments
 (0)