modified example, add getting started

Former-commit-id: ea69a1bc96 [formerly 0bbc92b380] [formerly 6fec3b958a [formerly 24fa1146a2]] [formerly fabc1ffa70 [formerly e7bee20e47] [formerly fd72ce8a44 [formerly 816b1e695a]]] [formerly c75ce46759 [formerly 2d0a4ed022] [formerly 009b7fc3f6 [formerly a230f33a5c]] [formerly cd677898cf [formerly ba7c063994] [formerly 48a0d0b433 [formerly bd1760d081]]]] [formerly c9e3aa4d8c [formerly d29669eabe] [formerly fdd6cc8664 [formerly 01ba7dbd71]] [formerly 156641773f [formerly 16de417ec4] [formerly 06e615839a [formerly da642f0b1a]]] [formerly f4887932df [formerly e227471268] [formerly 11db74cbe4 [formerly a1b33ff3e4]] [formerly e9bc6d8f86 [formerly e4bd193a0b] [formerly 2f91a13d40 [formerly c57d97987d]]]]] [formerly 266484d904 [formerly d8f9693a81] [formerly 2db38c3848 [formerly dc0e35babe]] [formerly 0686605d81 [formerly ee0256c7c3] [formerly 9400722e80 [formerly 45a83d9217]]] [formerly ec5969ddb6 [formerly 188aed2dae] [formerly be4ea11d5f [formerly c1e85cc5ee]] [formerly d20f077eef [formerly 7c86bb20f6] [formerly b862830b7a [formerly 2436220857]]]] [formerly 3cbf0e1a3f [formerly 12b9aa357a] [formerly c3ebb3464f [formerly 06f717bda5]] [formerly 0b150c1dfe [formerly 198f1f71b8] [formerly a2603dc8d4 [formerly 2e2f40ea0c]]] [formerly fe223859b7 [formerly 45bf8682f3] [formerly e5791f490f [formerly d89eba872c]] [formerly 140cc57fc8 [formerly 572d47479c] [formerly 70641d7d98 [formerly 4ce32d5fe7]]]]]] Former-commit-id: ab69d61e37 [formerly a5fef26c66] [formerly bf3c88eed9 [formerly dc82428f48]] [formerly 1180589fa8 [formerly eee9c4ca0d] [formerly ab8f155b84 [formerly b8d6eda0ec]]] [formerly d39441fd4d [formerly 585016d351] [formerly b7dac94b5d [formerly c0cf57d475]] [formerly fc9efd0e06 [formerly 07862c8885] [formerly a635cde5bc [formerly 9822f2c449]]]] [formerly ce9431afaa [formerly c1e9631dcb] [formerly 2b1249795e [formerly 09a3885ff3]] [formerly 32621a421c [formerly edda4ec2ca] [formerly 03f947b757 [formerly e0b9e15ea0]]] [formerly 1c4acd26ba [formerly bd08536a20] [formerly 89eccd518f [formerly 2af8583e82]] [formerly 1fc7a6794f [formerly 56da7a7641] [formerly 70641d7d98]]]] Former-commit-id: 01e7f55d39 [formerly 601208ce25] [formerly b2a099c8d4 [formerly b5973d6c47]] [formerly 3744ae96a4 [formerly e67267a7df] [formerly e7f169d779 [formerly 3b2e002516]]] [formerly 9d13d9c628 [formerly 348b0c1e02] [formerly 68ff9ed667 [formerly 5f648fa89e]] [formerly 29db7ffb7d [formerly 9eea9e41d6] [formerly 74860b7b8d [formerly 33496d3843]]]] Former-commit-id: 6fe6466eac [formerly f1234b1ff2] [formerly c00346d36d [formerly b3e41ed094]] [formerly 5549300b7b [formerly 076b278a50] [formerly 886bb4744b [formerly 21afcb3b51]]] Former-commit-id: fdf5c5726a [formerly 1ada82cc94] [formerly c799fb78d8 [formerly bc3f83871c]] Former-commit-id: c6f122e5ab [formerly 8b43b35520] Former-commit-id: 9f30948f74
4 years ago · dcf8c251a1
--- a/docs/source/getting_started.rst
+++ b/docs/source/getting_started.rst
@@ -3,593 +3,108 @@ Getting Started

 In this document, we provide some toy examples for getting started. All
 the examples in this document and even more examples are available in
 `examples/ <https://github.com/datamllab/rlcard/tree/master/examples>`__.
 `examples <https://github.com/datamllab/tods/tree/master/examples>`__.

 Constructing Point-wise Detection on NAB Dataset 
 Outlier Detection with Autoencoder on NAB Dataset 
 ------------------------------------------------

 We have set up a random agent that can play randomly on each
 environment. An example of applying a random agent on Blackjack is as
 follow:
 To perform the point-wise outlier detection on NAB dataset. We provide an example to construct
 such pipeline description:

 .. code:: python

   import rlcard
   from rlcard.agents import RandomAgent
   from rlcard.utils import set_global_seed

   # Make environment
   env = rlcard.make('blackjack', config={'seed': 0})
   episode_num = 2

   # Set a global seed
   set_global_seed(0)

   # Set up agents
   agent_0 = RandomAgent(action_num=env.action_num)
   env.set_agents([agent_0])

   for episode in range(episode_num):

       # Generate data from the environment
       trajectories, _ = env.run(is_training=False)

       # Print out the trajectories
       print('\nEpisode {}'.format(episode))
       for ts in trajectories[0]:
           print('State: {}, Action: {}, Reward: {}, Next State: {}, Done: {}'.format(ts[0], ts[1], ts[2], ts[3], ts[4]))

 The expected output should look like something as follows:

    from d3m import index
    from d3m.metadata.base import ArgumentType
    from d3m.metadata.pipeline import Pipeline, PrimitiveStep

    # Creating pipeline
    pipeline_description = Pipeline()
    pipeline_description.add_input(name='inputs')

    # Step 0: dataset_to_dataframe
    step_0 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.dataset_to_dataframe.Common'))
    step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
    step_0.add_output('produce')
    pipeline_description.add_step(step_0)

    # Step 1: column_parser
    step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.column_parser.Common'))
    step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
    step_1.add_output('produce')
    pipeline_description.add_step(step_1)

    # Step 2: extract_columns_by_semantic_types(attributes)
    step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common'))
    step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
    step_2.add_output('produce')
    step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE,
                            data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
    pipeline_description.add_step(step_2)

    # Step 3: extract_columns_by_semantic_types(targets)
    step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common'))
    step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
    step_3.add_output('produce')
    step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE,
                                data=['https://metadata.datadrivendiscovery.org/types/TrueTarget'])
    pipeline_description.add_step(step_3)

    attributes = 'steps.2.produce'
    targets = 'steps.3.produce'

    # Step 4: processing
    step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.axiswise_scaler'))
    step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes)
    step_4.add_output('produce')
    pipeline_description.add_step(step_4)

    # Step 5: algorithm
    step_5 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_ae'))
    step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.4.produce')
    step_5.add_output('produce')
    pipeline_description.add_step(step_5)

     # Step 6: Predictions
    step_6 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.construct_predictions.Common'))
    step_6.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.5.produce')
    step_6.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
    step_6.add_output('produce')
    pipeline_description.add_step(step_6)

    # Final Output
    pipeline_description.add_output(name='output predictions', data_reference='steps.6.produce')

    # Output to json
    data = pipeline_description.to_json()
    with open('example_pipeline.json', 'w') as f:
        f.write(data)
        print(data)

 Note that, in order to call each primitive during pipeline construction, one may find the index (python_path) of primitives available in
 `entry_points.ini <https://github.com/datamllab/tods/tree/master/tods/entry_points.ini>`__.

 The output description json file (example_pipeline.json) should look like something as follows:
 ::

   Episode 0
   State: {'obs': array([20,  3]), 'legal_actions': [0, 1]}, Action: 0, Reward: 0, Next State: {'obs': array([15,  3]), 'legal_actions': [0, 1]}, Done: False
   State: {'obs': array([15,  3]), 'legal_actions': [0, 1]}, Action: 1, Reward: -1, Next State: {'obs': array([15, 20]), 'legal_actions': [0, 1]}, Done: True

   Episode 1
   State: {'obs': array([15,  5]), 'legal_actions': [0, 1]}, Action: 1, Reward: 1, Next State: {'obs': array([15, 23]), 'legal_actions': [0, 1]}, Done: True

 Note that the states and actions are wrapped by ``env`` in Blackjack. In
 this example, the ``[20, 3]`` suggests the current player obtains score
 20 while the card that faces up in the dealer’s hand has score 3. Action
 0 means “hit” while action 1 means “stand”. Reward 1 suggests the player
 wins while reward -1 suggests the dealer wins. Reward 0 suggests a tie.
 The above data can be directly fed into a RL algorithm for training.

 Deep-Q Learning on Blackjack
 ----------------------------

 The second example is to use Deep-Q learning to train an agent on
 Blackjack. We aim to use this example to show how reinforcement learning
 algorithms can be developed and applied in our toolkit. We design a
 ``run`` function which plays one complete game and provides the data for
 training RL agents. The example is shown below:
    {
    "id": "e39bf406-06cf-4c76-88f0-8c8b4447e311", 
    "schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json", 
    "created": "2020-09-15T07:26:48.365447Z", 
    "inputs": [{"name": "inputs"}], 
    "outputs": [{"data": "steps.6.produce", "name": "output predictions"}], 
    "steps": [
        {"type": "PRIMITIVE", "primitive": {"id": "4b42ce1e-9b98-4a25-b68e-fad13311eb65", "version": "0.3.0", "python_path": "d3m.primitives.data_transformation.dataset_to_dataframe.Common", "name": "Extract a DataFrame from a Dataset", "digest": "a7f5a8f8b276f474c3b40b025d157541de898e4e02555cd8ef76fdeecfbed256"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "inputs.0"}}, "outputs": [{"id": "produce"}]}, 
        {"type": "PRIMITIVE", "primitive": {"id": "d510cb7a-1782-4f51-b44c-58f0236e47c7", "version": "0.6.0", "python_path": "d3m.primitives.data_transformation.column_parser.Common", "name": "Parses strings into their types", "digest": "eccfd70ed359901a625dbde6de40d6bbb4e69d9796ee0ca3a302fd95195451ed"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.0.produce"}}, "outputs": [{"id": "produce"}]}, 
        {"type": "PRIMITIVE", "primitive": {"id": "4503a4c6-42f7-45a1-a1d4-ed69699cf5e1", "version": "0.4.0", "python_path": "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common", "name": "Extracts columns by semantic type", "digest": "9f0303c354df6cec4df7bda0ebb46fb4f101c36ad9a4d1143b9b9c88004629aa"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.1.produce"}}, "outputs": [{"id": "produce"}], "hyperparams": {"semantic_types": {"type": "VALUE", "data": ["https://metadata.datadrivendiscovery.org/types/Attribute"]}}}, 
        {"type": "PRIMITIVE", "primitive": {"id": "4503a4c6-42f7-45a1-a1d4-ed69699cf5e1", "version": "0.4.0", "python_path": "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common", "name": "Extracts columns by semantic type", "digest": "9f0303c354df6cec4df7bda0ebb46fb4f101c36ad9a4d1143b9b9c88004629aa"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.0.produce"}}, "outputs": [{"id": "produce"}], "hyperparams": {"semantic_types": {"type": "VALUE", "data": ["https://metadata.datadrivendiscovery.org/types/TrueTarget"]}}}, 
        {"type": "PRIMITIVE", "primitive": {"id": "642de2e7-5590-3cab-9266-2a53c326c461", "version": "0.0.1", "python_path": "d3m.primitives.tods.timeseries_processing.transformation.axiswise_scaler", "name": "Axis_wise_scale"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.2.produce"}}, "outputs": [{"id": "produce"}]}, 
        {"type": "PRIMITIVE", "primitive": {"id": "67e7fcdf-d645-3417-9aa4-85cd369487d9", "version": "0.0.1", "python_path": "d3m.primitives.tods.detection_algorithm.pyod_ae", "name": "TODS.anomaly_detection_primitives.AutoEncoder"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.4.produce"}}, "outputs": [{"id": "produce"}]}, 
        {"type": "PRIMITIVE", "primitive": {"id": "8d38b340-f83f-4877-baaa-162f8e551736", "version": "0.3.0", "python_path": "d3m.primitives.data_transformation.construct_predictions.Common", "name": "Construct pipeline predictions output", "digest": "6de56912a3f84bbbcc0d1f7ffe646044209120e45bbb21a137236d00fed948e9"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.5.produce"}, "reference": {"type": "CONTAINER", "data": "steps.1.produce"}}, "outputs": [{"id": "produce"}]}], 
    "digest": "8c6a37e7ac9ef1b302810e56dffa43c3415826ab756ef6917d76dd8ee63d38fc"
    }

 With the pre-built pipeline description file, we can then feed the NAB data (twitter_IBM) and specify the desired evaluation metric with the path of pipeline description file with 
 `run_pipeline.py <https://github.com/datamllab/tods/tree/master/examples/run_pipeline.py>`__.
 :: 
    python examples/run_pipeline.py --pipeline_path example_pipeline.json --table_path datasets/NAB/realTweets/labeled_Twitter_volume_IBM.csv --metric F1_MACRO --target_index 2

 .. code:: python

   import tensorflow as tf
   import os

   import rlcard
   from rlcard.agents import DQNAgent
   from rlcard.utils import set_global_seed, tournament
   from rlcard.utils import Logger

   # Make environment
   env = rlcard.make('blackjack', config={'seed': 0})
   eval_env = rlcard.make('blackjack', config={'seed': 0})

   # Set the iterations numbers and how frequently we evaluate/save plot
   evaluate_every = 100
   evaluate_num = 10000
   episode_num = 100000

   # The intial memory size
   memory_init_size = 100

   # Train the agent every X steps
   train_every = 1

   # The paths for saving the logs and learning curves
   log_dir = './experiments/blackjack_dqn_result/'

   # Set a global seed
   set_global_seed(0)

   with tf.Session() as sess:

       # Initialize a global step
       global_step = tf.Variable(0, name='global_step', trainable=False)

       # Set up the agents
       agent = DQNAgent(sess,
                        scope='dqn',
                        action_num=env.action_num,
                        replay_memory_init_size=memory_init_size,
                        train_every=train_every,
                        state_shape=env.state_shape,
                        mlp_layers=[10,10])
       env.set_agents([agent])
       eval_env.set_agents([agent])

       # Initialize global variables
       sess.run(tf.global_variables_initializer())

       # Init a Logger to plot the learning curve
       logger = Logger(log_dir)

       for episode in range(episode_num):

           # Generate data from the environment
           trajectories, _ = env.run(is_training=True)

           # Feed transitions into agent memory, and train the agent
           for ts in trajectories[0]:
               agent.feed(ts)

           # Evaluate the performance. Play with random agents.
           if episode % evaluate_every == 0:
               logger.log_performance(env.timestep, tournament(eval_env, evaluate_num)[0])

       # Close files in the logger
       logger.close_files()

       # Plot the learning curve
       logger.plot('DQN')
       
       # Save model
       save_dir = 'models/blackjack_dqn'
       if not os.path.exists(save_dir):
           os.makedirs(save_dir)
       saver = tf.train.Saver()
       saver.save(sess, os.path.join(save_dir, 'model'))

 The expected output is something like below:

 ::

   ----------------------------------------
     timestep     |  1
     reward       |  -0.7342
   ----------------------------------------
   INFO - Agent dqn, step 100, rl-loss: 1.0042707920074463
   INFO - Copied model parameters to target network.
   INFO - Agent dqn, step 136, rl-loss: 0.7888197302818298
   ----------------------------------------
     timestep     |  136
     reward       |  -0.1406
   ----------------------------------------
   INFO - Agent dqn, step 278, rl-loss: 0.6946825981140137
   ----------------------------------------
     timestep     |  278
     reward       |  -0.1523
   ----------------------------------------
   INFO - Agent dqn, step 412, rl-loss: 0.62268990278244025
   ----------------------------------------
     timestep     |  412
     reward       |  -0.088
   ----------------------------------------
   INFO - Agent dqn, step 544, rl-loss: 0.69050502777099616
   ----------------------------------------
     timestep     |  544
     reward       |  -0.08
   ----------------------------------------
   INFO - Agent dqn, step 681, rl-loss: 0.61789089441299444
   ----------------------------------------
     timestep     |  681
     reward       |  -0.0793
   ----------------------------------------

 In Blackjack, the player will get a payoff at the end of the game: 1 if
 the player wins, -1 if the player loses, and 0 if it is a tie. The
 performance is measured by the average payoff the player obtains by
 playing 10000 episodes. The above example shows that the agent achieves
 better and better performance during training. The logs and learning
 curves are saved in ``./experiments/blackjack_dqn_result/``.

 Running Multiple Processes
 --------------------------

 The environments can be run with multiple processes to accelerate the
 training. Below is an example to train DQN on Blackjack with multiple
 processes.

 .. code:: python

   ''' An example of learning a Deep-Q Agent on Blackjack with multiple processes
   Note that we must use if __name__ == '__main__' for multiprocessing
   '''

   import tensorflow as tf
   import os

   import rlcard
   from rlcard.agents import DQNAgent
   from rlcard.utils import set_global_seed, tournament
   from rlcard.utils import Logger

   def main():
       # Make environment
       env = rlcard.make('blackjack', config={'seed': 0, 'env_num': 4})
       eval_env = rlcard.make('blackjack', config={'seed': 0, 'env_num': 4})

       # Set the iterations numbers and how frequently we evaluate performance
       evaluate_every = 100
       evaluate_num = 10000
       iteration_num = 100000

       # The intial memory size
       memory_init_size = 100

       # Train the agent every X steps
       train_every = 1

       # The paths for saving the logs and learning curves
       log_dir = './experiments/blackjack_dqn_result/'

       # Set a global seed
       set_global_seed(0)

       with tf.Session() as sess:

           # Initialize a global step
           global_step = tf.Variable(0, name='global_step', trainable=False)

           # Set up the agents
           agent = DQNAgent(sess,
                            scope='dqn',
                            action_num=env.action_num,
                            replay_memory_init_size=memory_init_size,
                            train_every=train_every,
                            state_shape=env.state_shape,
                            mlp_layers=[10,10])
           env.set_agents([agent])
           eval_env.set_agents([agent])

           # Initialize global variables
           sess.run(tf.global_variables_initializer())

           # Initialize a Logger to plot the learning curve
           logger = Logger(log_dir)

           for iteration in range(iteration_num):

               # Generate data from the environment
               trajectories, _ = env.run(is_training=True)

               # Feed transitions into agent memory, and train the agent
               for ts in trajectories[0]:
                   agent.feed(ts)

               # Evaluate the performance. Play with random agents.
               if iteration % evaluate_every == 0:
                   logger.log_performance(env.timestep, tournament(eval_env, evaluate_num)[0])

           # Close files in the logger
           logger.close_files()

           # Plot the learning curve
           logger.plot('DQN')
           
           # Save model
           save_dir = 'models/blackjack_dqn'
           if not os.path.exists(save_dir):
               os.makedirs(save_dir)
           saver = tf.train.Saver()
           saver.save(sess, os.path.join(save_dir, 'model'))

   if __name__ == '__main__':
       main()

 Example output is as follow:

 ::

   ----------------------------------------
     timestep     |  17
     reward       |  -0.7378
   ----------------------------------------

   INFO - Copied model parameters to target network.
   INFO - Agent dqn, step 1100, rl-loss: 0.40940183401107797
   INFO - Copied model parameters to target network.
   INFO - Agent dqn, step 2100, rl-loss: 0.44971221685409546
   INFO - Copied model parameters to target network.
   INFO - Agent dqn, step 2225, rl-loss: 0.65466868877410897
   ----------------------------------------
     timestep     |  2225
     reward       |  -0.0658
   ----------------------------------------
   INFO - Agent dqn, step 3100, rl-loss: 0.48663979768753053
   INFO - Copied model parameters to target network.
   INFO - Agent dqn, step 4100, rl-loss: 0.71293979883193974
   INFO - Copied model parameters to target network.
   INFO - Agent dqn, step 4440, rl-loss: 0.55871248245239263
   ----------------------------------------
     timestep     |  4440
     reward       |  -0.0736
   ----------------------------------------

 Training CFR on Leduc Hold’em
 -----------------------------

 To show how we can use ``step`` and ``step_back`` to traverse the game
 tree, we provide an example of solving Leduc Hold’em with CFR:

 .. code:: python

   import numpy as np

   import rlcard
   from rlcard.agents import CFRAgent
   from rlcard import models
   from rlcard.utils import set_global_seed, tournament
   from rlcard.utils import Logger

   # Make environment and enable human mode
   env = rlcard.make('leduc-holdem', config={'seed': 0, 'allow_step_back':True})
   eval_env = rlcard.make('leduc-holdem', config={'seed': 0})

   # Set the iterations numbers and how frequently we evaluate/save plot
   evaluate_every = 100
   save_plot_every = 1000
   evaluate_num = 10000
   episode_num = 10000

   # The paths for saving the logs and learning curves
   log_dir = './experiments/leduc_holdem_cfr_result/'

   # Set a global seed
   set_global_seed(0)

   # Initilize CFR Agent
   agent = CFRAgent(env)
   agent.load()  # If we have saved model, we first load the model

   # Evaluate CFR against pre-trained NFSP
   eval_env.set_agents([agent, models.load('leduc-holdem-nfsp').agents[0]])

   # Init a Logger to plot the learning curve
   logger = Logger(log_dir)

   for episode in range(episode_num):
       agent.train()
       print('\rIteration {}'.format(episode), end='')
       # Evaluate the performance. Play with NFSP agents.
       if episode % evaluate_every == 0:
           agent.save() # Save model
           logger.log_performance(env.timestep, tournament(eval_env, evaluate_num)[0])

   # Close files in the logger
   logger.close_files()

   # Plot the learning curve
   logger.plot('CFR')

 In the above example, the performance is measured by playing against a
 pre-trained NFSP model. The expected output is as below:

 ::

   Iteration 0
   ----------------------------------------
     timestep     |  192
     reward       |  -1.3662
   ----------------------------------------
   Iteration 100
   ----------------------------------------
     timestep     |  19392
     reward       |  0.9462
   ----------------------------------------
   Iteration 200
   ----------------------------------------
     timestep     |  38592
     reward       |  0.8591
   ----------------------------------------
   Iteration 300
   ----------------------------------------
     timestep     |  57792
     reward       |  0.7861
   ----------------------------------------
   Iteration 400
   ----------------------------------------
     timestep     |  76992
     reward       |  0.7752
   ----------------------------------------
   Iteration 500
   ----------------------------------------
     timestep     |  96192
     reward       |  0.7215
   ----------------------------------------

 We observe that CFR achieves better performance as NFSP. However, CFR
 requires traversal of the game tree, which is infeasible in large
 environments.

 Having Fun with Pretrained Leduc Model
 --------------------------------------

 We have designed simple human interfaces to play against the pretrained
 model. Leduc Hold’em is a simplified version of Texas Hold’em. Rules can
 be found `here <games.md#leduc-holdem>`__. Example of playing against
 Leduc Hold’em CFR model is as below:

 .. code:: python

   import rlcard
   from rlcard import models
   from rlcard.agents import LeducholdemHumanAgent as HumanAgent
   from rlcard.utils import print_card

   # Make environment
   # Set 'record_action' to True because we need it to print results
   env = rlcard.make('leduc-holdem', config={'record_action': True})
   human_agent = HumanAgent(env.action_num)
   cfr_agent = models.load('leduc-holdem-cfr').agents[0]
   env.set_agents([human_agent, cfr_agent])

   print(">> Leduc Hold'em pre-trained model")

   while (True):
       print(">> Start a new game")

       trajectories, payoffs = env.run(is_training=False)
       # If the human does not take the final action, we need to
       # print other players action
       final_state = trajectories[0][-1][-2]
       action_record = final_state['action_record']
       state = final_state['raw_obs']
       _action_list = []
       for i in range(1, len(action_record)+1):
           if action_record[-i][0] == state['current_player']:
               break
           _action_list.insert(0, action_record[-i])
       for pair in _action_list:
           print('>> Player', pair[0], 'chooses', pair[1])

       # Let's take a look at what the agent card is
       print('===============     CFR Agent    ===============')
       print_card(env.get_perfect_information()['hand_cards'][1])

       print('===============     Result     ===============')
       if payoffs[0] > 0:
           print('You win {} chips!'.format(payoffs[0]))
       elif payoffs[0] == 0:
           print('It is a tie.')
       else:
           print('You lose {} chips!'.format(-payoffs[0]))
       print('')

       input("Press any key to continue...")

 Example output is as follow:

 ::

   >> Leduc Hold'em pre-trained model

   >> Start a new game!
   >> Agent 1 chooses raise

   =============== Community Card ===============
   ┌─────────┐
   │░░░░░░░░░│
   │░░░░░░░░░│
   │░░░░░░░░░│
   │░░░░░░░░░│
   │░░░░░░░░░│
   │░░░░░░░░░│
   │░░░░░░░░░│
   └─────────┘
   ===============   Your Hand    ===============
   ┌─────────┐
   │J        │
   │         │
   │         │
   │    ♥    │
   │         │
   │         │
   │        J│
   └─────────┘
   ===============     Chips      ===============
   Yours:   +
   Agent 1: +++
   =========== Actions You Can Choose ===========
   0: call, 1: raise, 2: fold

   >> You choose action (integer):

 We also provide a running demo of a rule-based agent for UNO. Try it by
 running ``examples/uno_human.py``.

 Leduc Hold’em as Single-Agent Environment
 -----------------------------------------

 We have wrraped the environment as single agent environment by assuming
 that other players play with pre-trained models. The interfaces are
 exactly the same to OpenAI Gym. Thus, any single-agent algorithm can be
 connected to the environment. An example of Leduc Hold’em is as below:

 .. code:: python

   import tensorflow as tf
   import os
   import numpy as np

   import rlcard
   from rlcard.agents import DQNAgent
   from rlcard.agents import RandomAgent
   from rlcard.utils import set_global_seed, tournament
   from rlcard.utils import Logger

   # Make environment
   env = rlcard.make('leduc-holdem', config={'seed': 0, 'single_agent_mode':True})
   eval_env = rlcard.make('leduc-holdem', config={'seed': 0, 'single_agent_mode':True})

   # Set the iterations numbers and how frequently we evaluate/save plot
   evaluate_every = 1000
   evaluate_num = 10000
   timesteps = 100000

   # The intial memory size
   memory_init_size = 1000

   # Train the agent every X steps
   train_every = 1

   # The paths for saving the logs and learning curves
   log_dir = './experiments/leduc_holdem_single_dqn_result/'

   # Set a global seed
   set_global_seed(0)

   with tf.Session() as sess:

       # Initialize a global step
       global_step = tf.Variable(0, name='global_step', trainable=False)

       # Set up the agents
       agent = DQNAgent(sess,
                        scope='dqn',
                        action_num=env.action_num,
                        replay_memory_init_size=memory_init_size,
                        train_every=train_every,
                        state_shape=env.state_shape,
                        mlp_layers=[128,128])
       # Initialize global variables
       sess.run(tf.global_variables_initializer())

       # Init a Logger to plot the learning curve
       logger = Logger(log_dir)

       state = env.reset()

       for timestep in range(timesteps):
           action = agent.step(state)
           next_state, reward, done = env.step(action)
           ts = (state, action, reward, next_state, done)
           agent.feed(ts)

           if timestep % evaluate_every == 0:
               rewards = []
               state = eval_env.reset()
               for _ in range(evaluate_num):
                   action, _ = agent.eval_step(state)
                   _, reward, done = env.step(action)
                   if done:
                       rewards.append(reward)
               logger.log_performance(env.timestep, np.mean(rewards))

       # Close files in the logger
       logger.close_files()

       # Plot the learning curve
       logger.plot('DQN')
       
       # Save model
       save_dir = 'models/leduc_holdem_single_dqn'
       if not os.path.exists(save_dir):
           os.makedirs(save_dir)
       saver = tf.train.Saver()
       saver.save(sess, os.path.join(save_dir, 'model'))
--- a/examples/build_IsolationForest_pipline.py
+++ b/examples/build_IsolationForest_pipline.py
@@ -2,10 +2,6 @@ from d3m import index
 from d3m.metadata.base import ArgumentType
 from d3m.metadata.pipeline import Pipeline, PrimitiveStep
 from d3m.metadata import hyperparams
 import copy

 # -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
 #                                             extract_columns_by_semantic_types(targets)    ->            ^

 # Creating pipeline
 pipeline_description = Pipeline()
@@ -43,7 +39,7 @@ pipeline_description.add_step(step_3)
 attributes = 'steps.2.produce'
 targets = 'steps.3.produce'

 # Step 4: test primitive
 # Step 4: Power transformation 
 primitive_4 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.power_transformer')
 step_4 = PrimitiveStep(primitive=primitive_4)
 step_4.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='new')
@@ -51,7 +47,7 @@ step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_re
 step_4.add_output('produce')
 pipeline_description.add_step(step_4)

 # Step 4: test primitive
 # Step 5: Axiswise scaling 
 primitive_5 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.axiswise_scaler')
 step_5 = PrimitiveStep(primitive=primitive_5)
 step_5.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='new')
@@ -59,7 +55,7 @@ step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_re
 step_5.add_output('produce')
 pipeline_description.add_step(step_5)

 # Step 4: test primitive
 # Step 6: Standarization 
 primitive_6 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler')
 step_6 = PrimitiveStep(primitive=primitive_6)
 step_6.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='new')
@@ -67,7 +63,7 @@ step_6.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_re
 step_6.add_output('produce')
 pipeline_description.add_step(step_6)

 # Step 4: test primitive
 # Step 7: Quantile transformation 
 primitive_7 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.quantile_transformer')
 step_7 = PrimitiveStep(primitive=primitive_7)
 step_7.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='new')
@@ -75,7 +71,7 @@ step_7.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_re
 step_7.add_output('produce')
 pipeline_description.add_step(step_7)

 # Step 4: test primitive
 # Step 4: Isolation Forest 
 primitive_8 = index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_iforest')
 step_8 = PrimitiveStep(primitive=primitive_8)
 step_8.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1)