Browse Source

modified example, add getting started

Former-commit-id: ea69a1bc96 [formerly 0bbc92b380] [formerly 6fec3b958a [formerly 24fa1146a2]] [formerly fabc1ffa70 [formerly e7bee20e47] [formerly fd72ce8a44 [formerly 816b1e695a]]] [formerly c75ce46759 [formerly 2d0a4ed022] [formerly 009b7fc3f6 [formerly a230f33a5c]] [formerly cd677898cf [formerly ba7c063994] [formerly 48a0d0b433 [formerly bd1760d081]]]] [formerly c9e3aa4d8c [formerly d29669eabe] [formerly fdd6cc8664 [formerly 01ba7dbd71]] [formerly 156641773f [formerly 16de417ec4] [formerly 06e615839a [formerly da642f0b1a]]] [formerly f4887932df [formerly e227471268] [formerly 11db74cbe4 [formerly a1b33ff3e4]] [formerly e9bc6d8f86 [formerly e4bd193a0b] [formerly 2f91a13d40 [formerly c57d97987d]]]]] [formerly 266484d904 [formerly d8f9693a81] [formerly 2db38c3848 [formerly dc0e35babe]] [formerly 0686605d81 [formerly ee0256c7c3] [formerly 9400722e80 [formerly 45a83d9217]]] [formerly ec5969ddb6 [formerly 188aed2dae] [formerly be4ea11d5f [formerly c1e85cc5ee]] [formerly d20f077eef [formerly 7c86bb20f6] [formerly b862830b7a [formerly 2436220857]]]] [formerly 3cbf0e1a3f [formerly 12b9aa357a] [formerly c3ebb3464f [formerly 06f717bda5]] [formerly 0b150c1dfe [formerly 198f1f71b8] [formerly a2603dc8d4 [formerly 2e2f40ea0c]]] [formerly fe223859b7 [formerly 45bf8682f3] [formerly e5791f490f [formerly d89eba872c]] [formerly 140cc57fc8 [formerly 572d47479c] [formerly 70641d7d98 [formerly 4ce32d5fe7]]]]]]
Former-commit-id: ab69d61e37 [formerly a5fef26c66] [formerly bf3c88eed9 [formerly dc82428f48]] [formerly 1180589fa8 [formerly eee9c4ca0d] [formerly ab8f155b84 [formerly b8d6eda0ec]]] [formerly d39441fd4d [formerly 585016d351] [formerly b7dac94b5d [formerly c0cf57d475]] [formerly fc9efd0e06 [formerly 07862c8885] [formerly a635cde5bc [formerly 9822f2c449]]]] [formerly ce9431afaa [formerly c1e9631dcb] [formerly 2b1249795e [formerly 09a3885ff3]] [formerly 32621a421c [formerly edda4ec2ca] [formerly 03f947b757 [formerly e0b9e15ea0]]] [formerly 1c4acd26ba [formerly bd08536a20] [formerly 89eccd518f [formerly 2af8583e82]] [formerly 1fc7a6794f [formerly 56da7a7641] [formerly 70641d7d98]]]]
Former-commit-id: 01e7f55d39 [formerly 601208ce25] [formerly b2a099c8d4 [formerly b5973d6c47]] [formerly 3744ae96a4 [formerly e67267a7df] [formerly e7f169d779 [formerly 3b2e002516]]] [formerly 9d13d9c628 [formerly 348b0c1e02] [formerly 68ff9ed667 [formerly 5f648fa89e]] [formerly 29db7ffb7d [formerly 9eea9e41d6] [formerly 74860b7b8d [formerly 33496d3843]]]]
Former-commit-id: 6fe6466eac [formerly f1234b1ff2] [formerly c00346d36d [formerly b3e41ed094]] [formerly 5549300b7b [formerly 076b278a50] [formerly 886bb4744b [formerly 21afcb3b51]]]
Former-commit-id: fdf5c5726a [formerly 1ada82cc94] [formerly c799fb78d8 [formerly bc3f83871c]]
Former-commit-id: c6f122e5ab [formerly 8b43b35520]
Former-commit-id: 9f30948f74
master
lhenry15 4 years ago
parent
commit
dcf8c251a1
2 changed files with 101 additions and 590 deletions
  1. +96
    -581
      docs/source/getting_started.rst
  2. +5
    -9
      examples/build_IsolationForest_pipline.py

+ 96
- 581
docs/source/getting_started.rst View File

@@ -3,593 +3,108 @@ Getting Started

In this document, we provide some toy examples for getting started. All
the examples in this document and even more examples are available in
`examples/ <https://github.com/datamllab/rlcard/tree/master/examples>`__.
`examples <https://github.com/datamllab/tods/tree/master/examples>`__.

Constructing Point-wise Detection on NAB Dataset
Outlier Detection with Autoencoder on NAB Dataset
------------------------------------------------

We have set up a random agent that can play randomly on each
environment. An example of applying a random agent on Blackjack is as
follow:
To perform the point-wise outlier detection on NAB dataset. We provide an example to construct
such pipeline description:

.. code:: python

import rlcard
from rlcard.agents import RandomAgent
from rlcard.utils import set_global_seed

# Make environment
env = rlcard.make('blackjack', config={'seed': 0})
episode_num = 2

# Set a global seed
set_global_seed(0)

# Set up agents
agent_0 = RandomAgent(action_num=env.action_num)
env.set_agents([agent_0])

for episode in range(episode_num):

# Generate data from the environment
trajectories, _ = env.run(is_training=False)

# Print out the trajectories
print('\nEpisode {}'.format(episode))
for ts in trajectories[0]:
print('State: {}, Action: {}, Reward: {}, Next State: {}, Done: {}'.format(ts[0], ts[1], ts[2], ts[3], ts[4]))

The expected output should look like something as follows:

from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
step_0 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.dataset_to_dataframe.Common'))
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# Step 1: column_parser
step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.column_parser.Common'))
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# Step 2: extract_columns_by_semantic_types(attributes)
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common'))
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE,
data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
pipeline_description.add_step(step_2)

# Step 3: extract_columns_by_semantic_types(targets)
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common'))
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_3.add_output('produce')
step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE,
data=['https://metadata.datadrivendiscovery.org/types/TrueTarget'])
pipeline_description.add_step(step_3)

attributes = 'steps.2.produce'
targets = 'steps.3.produce'

# Step 4: processing
step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.axiswise_scaler'))
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes)
step_4.add_output('produce')
pipeline_description.add_step(step_4)

# Step 5: algorithm
step_5 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_ae'))
step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.4.produce')
step_5.add_output('produce')
pipeline_description.add_step(step_5)

# Step 6: Predictions
step_6 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.construct_predictions.Common'))
step_6.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.5.produce')
step_6.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_6.add_output('produce')
pipeline_description.add_step(step_6)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.6.produce')

# Output to json
data = pipeline_description.to_json()
with open('example_pipeline.json', 'w') as f:
f.write(data)
print(data)

Note that, in order to call each primitive during pipeline construction, one may find the index (python_path) of primitives available in
`entry_points.ini <https://github.com/datamllab/tods/tree/master/tods/entry_points.ini>`__.

The output description json file (example_pipeline.json) should look like something as follows:
::

Episode 0
State: {'obs': array([20, 3]), 'legal_actions': [0, 1]}, Action: 0, Reward: 0, Next State: {'obs': array([15, 3]), 'legal_actions': [0, 1]}, Done: False
State: {'obs': array([15, 3]), 'legal_actions': [0, 1]}, Action: 1, Reward: -1, Next State: {'obs': array([15, 20]), 'legal_actions': [0, 1]}, Done: True

Episode 1
State: {'obs': array([15, 5]), 'legal_actions': [0, 1]}, Action: 1, Reward: 1, Next State: {'obs': array([15, 23]), 'legal_actions': [0, 1]}, Done: True

Note that the states and actions are wrapped by ``env`` in Blackjack. In
this example, the ``[20, 3]`` suggests the current player obtains score
20 while the card that faces up in the dealer’s hand has score 3. Action
0 means “hit” while action 1 means “stand”. Reward 1 suggests the player
wins while reward -1 suggests the dealer wins. Reward 0 suggests a tie.
The above data can be directly fed into a RL algorithm for training.

Deep-Q Learning on Blackjack
----------------------------

The second example is to use Deep-Q learning to train an agent on
Blackjack. We aim to use this example to show how reinforcement learning
algorithms can be developed and applied in our toolkit. We design a
``run`` function which plays one complete game and provides the data for
training RL agents. The example is shown below:
{
"id": "e39bf406-06cf-4c76-88f0-8c8b4447e311",
"schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json",
"created": "2020-09-15T07:26:48.365447Z",
"inputs": [{"name": "inputs"}],
"outputs": [{"data": "steps.6.produce", "name": "output predictions"}],
"steps": [
{"type": "PRIMITIVE", "primitive": {"id": "4b42ce1e-9b98-4a25-b68e-fad13311eb65", "version": "0.3.0", "python_path": "d3m.primitives.data_transformation.dataset_to_dataframe.Common", "name": "Extract a DataFrame from a Dataset", "digest": "a7f5a8f8b276f474c3b40b025d157541de898e4e02555cd8ef76fdeecfbed256"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "inputs.0"}}, "outputs": [{"id": "produce"}]},
{"type": "PRIMITIVE", "primitive": {"id": "d510cb7a-1782-4f51-b44c-58f0236e47c7", "version": "0.6.0", "python_path": "d3m.primitives.data_transformation.column_parser.Common", "name": "Parses strings into their types", "digest": "eccfd70ed359901a625dbde6de40d6bbb4e69d9796ee0ca3a302fd95195451ed"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.0.produce"}}, "outputs": [{"id": "produce"}]},
{"type": "PRIMITIVE", "primitive": {"id": "4503a4c6-42f7-45a1-a1d4-ed69699cf5e1", "version": "0.4.0", "python_path": "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common", "name": "Extracts columns by semantic type", "digest": "9f0303c354df6cec4df7bda0ebb46fb4f101c36ad9a4d1143b9b9c88004629aa"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.1.produce"}}, "outputs": [{"id": "produce"}], "hyperparams": {"semantic_types": {"type": "VALUE", "data": ["https://metadata.datadrivendiscovery.org/types/Attribute"]}}},
{"type": "PRIMITIVE", "primitive": {"id": "4503a4c6-42f7-45a1-a1d4-ed69699cf5e1", "version": "0.4.0", "python_path": "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common", "name": "Extracts columns by semantic type", "digest": "9f0303c354df6cec4df7bda0ebb46fb4f101c36ad9a4d1143b9b9c88004629aa"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.0.produce"}}, "outputs": [{"id": "produce"}], "hyperparams": {"semantic_types": {"type": "VALUE", "data": ["https://metadata.datadrivendiscovery.org/types/TrueTarget"]}}},
{"type": "PRIMITIVE", "primitive": {"id": "642de2e7-5590-3cab-9266-2a53c326c461", "version": "0.0.1", "python_path": "d3m.primitives.tods.timeseries_processing.transformation.axiswise_scaler", "name": "Axis_wise_scale"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.2.produce"}}, "outputs": [{"id": "produce"}]},
{"type": "PRIMITIVE", "primitive": {"id": "67e7fcdf-d645-3417-9aa4-85cd369487d9", "version": "0.0.1", "python_path": "d3m.primitives.tods.detection_algorithm.pyod_ae", "name": "TODS.anomaly_detection_primitives.AutoEncoder"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.4.produce"}}, "outputs": [{"id": "produce"}]},
{"type": "PRIMITIVE", "primitive": {"id": "8d38b340-f83f-4877-baaa-162f8e551736", "version": "0.3.0", "python_path": "d3m.primitives.data_transformation.construct_predictions.Common", "name": "Construct pipeline predictions output", "digest": "6de56912a3f84bbbcc0d1f7ffe646044209120e45bbb21a137236d00fed948e9"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.5.produce"}, "reference": {"type": "CONTAINER", "data": "steps.1.produce"}}, "outputs": [{"id": "produce"}]}],
"digest": "8c6a37e7ac9ef1b302810e56dffa43c3415826ab756ef6917d76dd8ee63d38fc"
}

With the pre-built pipeline description file, we can then feed the NAB data (twitter_IBM) and specify the desired evaluation metric with the path of pipeline description file with
`run_pipeline.py <https://github.com/datamllab/tods/tree/master/examples/run_pipeline.py>`__.
::
python examples/run_pipeline.py --pipeline_path example_pipeline.json --table_path datasets/NAB/realTweets/labeled_Twitter_volume_IBM.csv --metric F1_MACRO --target_index 2

.. code:: python

import tensorflow as tf
import os

import rlcard
from rlcard.agents import DQNAgent
from rlcard.utils import set_global_seed, tournament
from rlcard.utils import Logger

# Make environment
env = rlcard.make('blackjack', config={'seed': 0})
eval_env = rlcard.make('blackjack', config={'seed': 0})

# Set the iterations numbers and how frequently we evaluate/save plot
evaluate_every = 100
evaluate_num = 10000
episode_num = 100000

# The intial memory size
memory_init_size = 100

# Train the agent every X steps
train_every = 1

# The paths for saving the logs and learning curves
log_dir = './experiments/blackjack_dqn_result/'

# Set a global seed
set_global_seed(0)

with tf.Session() as sess:

# Initialize a global step
global_step = tf.Variable(0, name='global_step', trainable=False)

# Set up the agents
agent = DQNAgent(sess,
scope='dqn',
action_num=env.action_num,
replay_memory_init_size=memory_init_size,
train_every=train_every,
state_shape=env.state_shape,
mlp_layers=[10,10])
env.set_agents([agent])
eval_env.set_agents([agent])

# Initialize global variables
sess.run(tf.global_variables_initializer())

# Init a Logger to plot the learning curve
logger = Logger(log_dir)

for episode in range(episode_num):

# Generate data from the environment
trajectories, _ = env.run(is_training=True)

# Feed transitions into agent memory, and train the agent
for ts in trajectories[0]:
agent.feed(ts)

# Evaluate the performance. Play with random agents.
if episode % evaluate_every == 0:
logger.log_performance(env.timestep, tournament(eval_env, evaluate_num)[0])

# Close files in the logger
logger.close_files()

# Plot the learning curve
logger.plot('DQN')
# Save model
save_dir = 'models/blackjack_dqn'
if not os.path.exists(save_dir):
os.makedirs(save_dir)
saver = tf.train.Saver()
saver.save(sess, os.path.join(save_dir, 'model'))

The expected output is something like below:

::

----------------------------------------
timestep | 1
reward | -0.7342
----------------------------------------
INFO - Agent dqn, step 100, rl-loss: 1.0042707920074463
INFO - Copied model parameters to target network.
INFO - Agent dqn, step 136, rl-loss: 0.7888197302818298
----------------------------------------
timestep | 136
reward | -0.1406
----------------------------------------
INFO - Agent dqn, step 278, rl-loss: 0.6946825981140137
----------------------------------------
timestep | 278
reward | -0.1523
----------------------------------------
INFO - Agent dqn, step 412, rl-loss: 0.62268990278244025
----------------------------------------
timestep | 412
reward | -0.088
----------------------------------------
INFO - Agent dqn, step 544, rl-loss: 0.69050502777099616
----------------------------------------
timestep | 544
reward | -0.08
----------------------------------------
INFO - Agent dqn, step 681, rl-loss: 0.61789089441299444
----------------------------------------
timestep | 681
reward | -0.0793
----------------------------------------

In Blackjack, the player will get a payoff at the end of the game: 1 if
the player wins, -1 if the player loses, and 0 if it is a tie. The
performance is measured by the average payoff the player obtains by
playing 10000 episodes. The above example shows that the agent achieves
better and better performance during training. The logs and learning
curves are saved in ``./experiments/blackjack_dqn_result/``.

Running Multiple Processes
--------------------------

The environments can be run with multiple processes to accelerate the
training. Below is an example to train DQN on Blackjack with multiple
processes.

.. code:: python

''' An example of learning a Deep-Q Agent on Blackjack with multiple processes
Note that we must use if __name__ == '__main__' for multiprocessing
'''

import tensorflow as tf
import os

import rlcard
from rlcard.agents import DQNAgent
from rlcard.utils import set_global_seed, tournament
from rlcard.utils import Logger

def main():
# Make environment
env = rlcard.make('blackjack', config={'seed': 0, 'env_num': 4})
eval_env = rlcard.make('blackjack', config={'seed': 0, 'env_num': 4})

# Set the iterations numbers and how frequently we evaluate performance
evaluate_every = 100
evaluate_num = 10000
iteration_num = 100000

# The intial memory size
memory_init_size = 100

# Train the agent every X steps
train_every = 1

# The paths for saving the logs and learning curves
log_dir = './experiments/blackjack_dqn_result/'

# Set a global seed
set_global_seed(0)

with tf.Session() as sess:

# Initialize a global step
global_step = tf.Variable(0, name='global_step', trainable=False)

# Set up the agents
agent = DQNAgent(sess,
scope='dqn',
action_num=env.action_num,
replay_memory_init_size=memory_init_size,
train_every=train_every,
state_shape=env.state_shape,
mlp_layers=[10,10])
env.set_agents([agent])
eval_env.set_agents([agent])

# Initialize global variables
sess.run(tf.global_variables_initializer())

# Initialize a Logger to plot the learning curve
logger = Logger(log_dir)

for iteration in range(iteration_num):

# Generate data from the environment
trajectories, _ = env.run(is_training=True)

# Feed transitions into agent memory, and train the agent
for ts in trajectories[0]:
agent.feed(ts)

# Evaluate the performance. Play with random agents.
if iteration % evaluate_every == 0:
logger.log_performance(env.timestep, tournament(eval_env, evaluate_num)[0])

# Close files in the logger
logger.close_files()

# Plot the learning curve
logger.plot('DQN')
# Save model
save_dir = 'models/blackjack_dqn'
if not os.path.exists(save_dir):
os.makedirs(save_dir)
saver = tf.train.Saver()
saver.save(sess, os.path.join(save_dir, 'model'))

if __name__ == '__main__':
main()

Example output is as follow:

::

----------------------------------------
timestep | 17
reward | -0.7378
----------------------------------------

INFO - Copied model parameters to target network.
INFO - Agent dqn, step 1100, rl-loss: 0.40940183401107797
INFO - Copied model parameters to target network.
INFO - Agent dqn, step 2100, rl-loss: 0.44971221685409546
INFO - Copied model parameters to target network.
INFO - Agent dqn, step 2225, rl-loss: 0.65466868877410897
----------------------------------------
timestep | 2225
reward | -0.0658
----------------------------------------
INFO - Agent dqn, step 3100, rl-loss: 0.48663979768753053
INFO - Copied model parameters to target network.
INFO - Agent dqn, step 4100, rl-loss: 0.71293979883193974
INFO - Copied model parameters to target network.
INFO - Agent dqn, step 4440, rl-loss: 0.55871248245239263
----------------------------------------
timestep | 4440
reward | -0.0736
----------------------------------------

Training CFR on Leduc Hold’em
-----------------------------

To show how we can use ``step`` and ``step_back`` to traverse the game
tree, we provide an example of solving Leduc Hold’em with CFR:

.. code:: python

import numpy as np

import rlcard
from rlcard.agents import CFRAgent
from rlcard import models
from rlcard.utils import set_global_seed, tournament
from rlcard.utils import Logger

# Make environment and enable human mode
env = rlcard.make('leduc-holdem', config={'seed': 0, 'allow_step_back':True})
eval_env = rlcard.make('leduc-holdem', config={'seed': 0})

# Set the iterations numbers and how frequently we evaluate/save plot
evaluate_every = 100
save_plot_every = 1000
evaluate_num = 10000
episode_num = 10000

# The paths for saving the logs and learning curves
log_dir = './experiments/leduc_holdem_cfr_result/'

# Set a global seed
set_global_seed(0)

# Initilize CFR Agent
agent = CFRAgent(env)
agent.load() # If we have saved model, we first load the model

# Evaluate CFR against pre-trained NFSP
eval_env.set_agents([agent, models.load('leduc-holdem-nfsp').agents[0]])

# Init a Logger to plot the learning curve
logger = Logger(log_dir)

for episode in range(episode_num):
agent.train()
print('\rIteration {}'.format(episode), end='')
# Evaluate the performance. Play with NFSP agents.
if episode % evaluate_every == 0:
agent.save() # Save model
logger.log_performance(env.timestep, tournament(eval_env, evaluate_num)[0])

# Close files in the logger
logger.close_files()

# Plot the learning curve
logger.plot('CFR')

In the above example, the performance is measured by playing against a
pre-trained NFSP model. The expected output is as below:

::

Iteration 0
----------------------------------------
timestep | 192
reward | -1.3662
----------------------------------------
Iteration 100
----------------------------------------
timestep | 19392
reward | 0.9462
----------------------------------------
Iteration 200
----------------------------------------
timestep | 38592
reward | 0.8591
----------------------------------------
Iteration 300
----------------------------------------
timestep | 57792
reward | 0.7861
----------------------------------------
Iteration 400
----------------------------------------
timestep | 76992
reward | 0.7752
----------------------------------------
Iteration 500
----------------------------------------
timestep | 96192
reward | 0.7215
----------------------------------------

We observe that CFR achieves better performance as NFSP. However, CFR
requires traversal of the game tree, which is infeasible in large
environments.

Having Fun with Pretrained Leduc Model
--------------------------------------

We have designed simple human interfaces to play against the pretrained
model. Leduc Hold’em is a simplified version of Texas Hold’em. Rules can
be found `here <games.md#leduc-holdem>`__. Example of playing against
Leduc Hold’em CFR model is as below:

.. code:: python

import rlcard
from rlcard import models
from rlcard.agents import LeducholdemHumanAgent as HumanAgent
from rlcard.utils import print_card

# Make environment
# Set 'record_action' to True because we need it to print results
env = rlcard.make('leduc-holdem', config={'record_action': True})
human_agent = HumanAgent(env.action_num)
cfr_agent = models.load('leduc-holdem-cfr').agents[0]
env.set_agents([human_agent, cfr_agent])

print(">> Leduc Hold'em pre-trained model")

while (True):
print(">> Start a new game")

trajectories, payoffs = env.run(is_training=False)
# If the human does not take the final action, we need to
# print other players action
final_state = trajectories[0][-1][-2]
action_record = final_state['action_record']
state = final_state['raw_obs']
_action_list = []
for i in range(1, len(action_record)+1):
if action_record[-i][0] == state['current_player']:
break
_action_list.insert(0, action_record[-i])
for pair in _action_list:
print('>> Player', pair[0], 'chooses', pair[1])

# Let's take a look at what the agent card is
print('=============== CFR Agent ===============')
print_card(env.get_perfect_information()['hand_cards'][1])

print('=============== Result ===============')
if payoffs[0] > 0:
print('You win {} chips!'.format(payoffs[0]))
elif payoffs[0] == 0:
print('It is a tie.')
else:
print('You lose {} chips!'.format(-payoffs[0]))
print('')

input("Press any key to continue...")

Example output is as follow:

::

>> Leduc Hold'em pre-trained model

>> Start a new game!
>> Agent 1 chooses raise

=============== Community Card ===============
┌─────────┐
│░░░░░░░░░│
│░░░░░░░░░│
│░░░░░░░░░│
│░░░░░░░░░│
│░░░░░░░░░│
│░░░░░░░░░│
│░░░░░░░░░│
└─────────┘
=============== Your Hand ===============
┌─────────┐
│J │
│ │
│ │
│ ♥ │
│ │
│ │
│ J│
└─────────┘
=============== Chips ===============
Yours: +
Agent 1: +++
=========== Actions You Can Choose ===========
0: call, 1: raise, 2: fold

>> You choose action (integer):

We also provide a running demo of a rule-based agent for UNO. Try it by
running ``examples/uno_human.py``.

Leduc Hold’em as Single-Agent Environment
-----------------------------------------

We have wrraped the environment as single agent environment by assuming
that other players play with pre-trained models. The interfaces are
exactly the same to OpenAI Gym. Thus, any single-agent algorithm can be
connected to the environment. An example of Leduc Hold’em is as below:

.. code:: python

import tensorflow as tf
import os
import numpy as np

import rlcard
from rlcard.agents import DQNAgent
from rlcard.agents import RandomAgent
from rlcard.utils import set_global_seed, tournament
from rlcard.utils import Logger

# Make environment
env = rlcard.make('leduc-holdem', config={'seed': 0, 'single_agent_mode':True})
eval_env = rlcard.make('leduc-holdem', config={'seed': 0, 'single_agent_mode':True})

# Set the iterations numbers and how frequently we evaluate/save plot
evaluate_every = 1000
evaluate_num = 10000
timesteps = 100000

# The intial memory size
memory_init_size = 1000

# Train the agent every X steps
train_every = 1

# The paths for saving the logs and learning curves
log_dir = './experiments/leduc_holdem_single_dqn_result/'

# Set a global seed
set_global_seed(0)

with tf.Session() as sess:

# Initialize a global step
global_step = tf.Variable(0, name='global_step', trainable=False)

# Set up the agents
agent = DQNAgent(sess,
scope='dqn',
action_num=env.action_num,
replay_memory_init_size=memory_init_size,
train_every=train_every,
state_shape=env.state_shape,
mlp_layers=[128,128])
# Initialize global variables
sess.run(tf.global_variables_initializer())

# Init a Logger to plot the learning curve
logger = Logger(log_dir)

state = env.reset()

for timestep in range(timesteps):
action = agent.step(state)
next_state, reward, done = env.step(action)
ts = (state, action, reward, next_state, done)
agent.feed(ts)

if timestep % evaluate_every == 0:
rewards = []
state = eval_env.reset()
for _ in range(evaluate_num):
action, _ = agent.eval_step(state)
_, reward, done = env.step(action)
if done:
rewards.append(reward)
logger.log_performance(env.timestep, np.mean(rewards))

# Close files in the logger
logger.close_files()

# Plot the learning curve
logger.plot('DQN')
# Save model
save_dir = 'models/leduc_holdem_single_dqn'
if not os.path.exists(save_dir):
os.makedirs(save_dir)
saver = tf.train.Saver()
saver.save(sess, os.path.join(save_dir, 'model'))

+ 5
- 9
examples/build_IsolationForest_pipline.py View File

@@ -2,10 +2,6 @@ from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams
import copy

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
@@ -43,7 +39,7 @@ pipeline_description.add_step(step_3)
attributes = 'steps.2.produce'
targets = 'steps.3.produce'

# Step 4: test primitive
# Step 4: Power transformation
primitive_4 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.power_transformer')
step_4 = PrimitiveStep(primitive=primitive_4)
step_4.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='new')
@@ -51,7 +47,7 @@ step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_re
step_4.add_output('produce')
pipeline_description.add_step(step_4)

# Step 4: test primitive
# Step 5: Axiswise scaling
primitive_5 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.axiswise_scaler')
step_5 = PrimitiveStep(primitive=primitive_5)
step_5.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='new')
@@ -59,7 +55,7 @@ step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_re
step_5.add_output('produce')
pipeline_description.add_step(step_5)

# Step 4: test primitive
# Step 6: Standarization
primitive_6 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler')
step_6 = PrimitiveStep(primitive=primitive_6)
step_6.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='new')
@@ -67,7 +63,7 @@ step_6.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_re
step_6.add_output('produce')
pipeline_description.add_step(step_6)

# Step 4: test primitive
# Step 7: Quantile transformation
primitive_7 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.quantile_transformer')
step_7 = PrimitiveStep(primitive=primitive_7)
step_7.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='new')
@@ -75,7 +71,7 @@ step_7.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_re
step_7.add_output('produce')
pipeline_description.add_step(step_7)

# Step 4: test primitive
# Step 4: Isolation Forest
primitive_8 = index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_iforest')
step_8 = PrimitiveStep(primitive=primitive_8)
step_8.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1)


Loading…
Cancel
Save