diff --git a/docs/source/getting_started.rst b/docs/source/getting_started.rst index 4c46f7d..6ff156a 100644 --- a/docs/source/getting_started.rst +++ b/docs/source/getting_started.rst @@ -3,593 +3,108 @@ Getting Started In this document, we provide some toy examples for getting started. All the examples in this document and even more examples are available in -`examples/ `__. +`examples `__. -Constructing Point-wise Detection on NAB Dataset +Outlier Detection with Autoencoder on NAB Dataset ------------------------------------------------ - -We have set up a random agent that can play randomly on each -environment. An example of applying a random agent on Blackjack is as -follow: +To perform the point-wise outlier detection on NAB dataset. We provide an example to construct +such pipeline description: .. code:: python - import rlcard - from rlcard.agents import RandomAgent - from rlcard.utils import set_global_seed - - # Make environment - env = rlcard.make('blackjack', config={'seed': 0}) - episode_num = 2 - - # Set a global seed - set_global_seed(0) - - # Set up agents - agent_0 = RandomAgent(action_num=env.action_num) - env.set_agents([agent_0]) - - for episode in range(episode_num): - - # Generate data from the environment - trajectories, _ = env.run(is_training=False) - - # Print out the trajectories - print('\nEpisode {}'.format(episode)) - for ts in trajectories[0]: - print('State: {}, Action: {}, Reward: {}, Next State: {}, Done: {}'.format(ts[0], ts[1], ts[2], ts[3], ts[4])) - -The expected output should look like something as follows: - + from d3m import index + from d3m.metadata.base import ArgumentType + from d3m.metadata.pipeline import Pipeline, PrimitiveStep + + # Creating pipeline + pipeline_description = Pipeline() + pipeline_description.add_input(name='inputs') + + # Step 0: dataset_to_dataframe + step_0 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.dataset_to_dataframe.Common')) + step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') + step_0.add_output('produce') + pipeline_description.add_step(step_0) + + # Step 1: column_parser + step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.column_parser.Common')) + step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') + step_1.add_output('produce') + pipeline_description.add_step(step_1) + + # Step 2: extract_columns_by_semantic_types(attributes) + step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common')) + step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') + step_2.add_output('produce') + step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, + data=['https://metadata.datadrivendiscovery.org/types/Attribute']) + pipeline_description.add_step(step_2) + + # Step 3: extract_columns_by_semantic_types(targets) + step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common')) + step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') + step_3.add_output('produce') + step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, + data=['https://metadata.datadrivendiscovery.org/types/TrueTarget']) + pipeline_description.add_step(step_3) + + attributes = 'steps.2.produce' + targets = 'steps.3.produce' + + # Step 4: processing + step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.axiswise_scaler')) + step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes) + step_4.add_output('produce') + pipeline_description.add_step(step_4) + + # Step 5: algorithm + step_5 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_ae')) + step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.4.produce') + step_5.add_output('produce') + pipeline_description.add_step(step_5) + + # Step 6: Predictions + step_6 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.construct_predictions.Common')) + step_6.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.5.produce') + step_6.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') + step_6.add_output('produce') + pipeline_description.add_step(step_6) + + # Final Output + pipeline_description.add_output(name='output predictions', data_reference='steps.6.produce') + + # Output to json + data = pipeline_description.to_json() + with open('example_pipeline.json', 'w') as f: + f.write(data) + print(data) + +Note that, in order to call each primitive during pipeline construction, one may find the index (python_path) of primitives available in +`entry_points.ini `__. + +The output description json file (example_pipeline.json) should look like something as follows: :: - Episode 0 - State: {'obs': array([20, 3]), 'legal_actions': [0, 1]}, Action: 0, Reward: 0, Next State: {'obs': array([15, 3]), 'legal_actions': [0, 1]}, Done: False - State: {'obs': array([15, 3]), 'legal_actions': [0, 1]}, Action: 1, Reward: -1, Next State: {'obs': array([15, 20]), 'legal_actions': [0, 1]}, Done: True - - Episode 1 - State: {'obs': array([15, 5]), 'legal_actions': [0, 1]}, Action: 1, Reward: 1, Next State: {'obs': array([15, 23]), 'legal_actions': [0, 1]}, Done: True - -Note that the states and actions are wrapped by ``env`` in Blackjack. In -this example, the ``[20, 3]`` suggests the current player obtains score -20 while the card that faces up in the dealer’s hand has score 3. Action -0 means “hit” while action 1 means “stand”. Reward 1 suggests the player -wins while reward -1 suggests the dealer wins. Reward 0 suggests a tie. -The above data can be directly fed into a RL algorithm for training. - -Deep-Q Learning on Blackjack ----------------------------- - -The second example is to use Deep-Q learning to train an agent on -Blackjack. We aim to use this example to show how reinforcement learning -algorithms can be developed and applied in our toolkit. We design a -``run`` function which plays one complete game and provides the data for -training RL agents. The example is shown below: + { + "id": "e39bf406-06cf-4c76-88f0-8c8b4447e311", + "schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json", + "created": "2020-09-15T07:26:48.365447Z", + "inputs": [{"name": "inputs"}], + "outputs": [{"data": "steps.6.produce", "name": "output predictions"}], + "steps": [ + {"type": "PRIMITIVE", "primitive": {"id": "4b42ce1e-9b98-4a25-b68e-fad13311eb65", "version": "0.3.0", "python_path": "d3m.primitives.data_transformation.dataset_to_dataframe.Common", "name": "Extract a DataFrame from a Dataset", "digest": "a7f5a8f8b276f474c3b40b025d157541de898e4e02555cd8ef76fdeecfbed256"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "inputs.0"}}, "outputs": [{"id": "produce"}]}, + {"type": "PRIMITIVE", "primitive": {"id": "d510cb7a-1782-4f51-b44c-58f0236e47c7", "version": "0.6.0", "python_path": "d3m.primitives.data_transformation.column_parser.Common", "name": "Parses strings into their types", "digest": "eccfd70ed359901a625dbde6de40d6bbb4e69d9796ee0ca3a302fd95195451ed"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.0.produce"}}, "outputs": [{"id": "produce"}]}, + {"type": "PRIMITIVE", "primitive": {"id": "4503a4c6-42f7-45a1-a1d4-ed69699cf5e1", "version": "0.4.0", "python_path": "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common", "name": "Extracts columns by semantic type", "digest": "9f0303c354df6cec4df7bda0ebb46fb4f101c36ad9a4d1143b9b9c88004629aa"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.1.produce"}}, "outputs": [{"id": "produce"}], "hyperparams": {"semantic_types": {"type": "VALUE", "data": ["https://metadata.datadrivendiscovery.org/types/Attribute"]}}}, + {"type": "PRIMITIVE", "primitive": {"id": "4503a4c6-42f7-45a1-a1d4-ed69699cf5e1", "version": "0.4.0", "python_path": "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common", "name": "Extracts columns by semantic type", "digest": "9f0303c354df6cec4df7bda0ebb46fb4f101c36ad9a4d1143b9b9c88004629aa"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.0.produce"}}, "outputs": [{"id": "produce"}], "hyperparams": {"semantic_types": {"type": "VALUE", "data": ["https://metadata.datadrivendiscovery.org/types/TrueTarget"]}}}, + {"type": "PRIMITIVE", "primitive": {"id": "642de2e7-5590-3cab-9266-2a53c326c461", "version": "0.0.1", "python_path": "d3m.primitives.tods.timeseries_processing.transformation.axiswise_scaler", "name": "Axis_wise_scale"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.2.produce"}}, "outputs": [{"id": "produce"}]}, + {"type": "PRIMITIVE", "primitive": {"id": "67e7fcdf-d645-3417-9aa4-85cd369487d9", "version": "0.0.1", "python_path": "d3m.primitives.tods.detection_algorithm.pyod_ae", "name": "TODS.anomaly_detection_primitives.AutoEncoder"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.4.produce"}}, "outputs": [{"id": "produce"}]}, + {"type": "PRIMITIVE", "primitive": {"id": "8d38b340-f83f-4877-baaa-162f8e551736", "version": "0.3.0", "python_path": "d3m.primitives.data_transformation.construct_predictions.Common", "name": "Construct pipeline predictions output", "digest": "6de56912a3f84bbbcc0d1f7ffe646044209120e45bbb21a137236d00fed948e9"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.5.produce"}, "reference": {"type": "CONTAINER", "data": "steps.1.produce"}}, "outputs": [{"id": "produce"}]}], + "digest": "8c6a37e7ac9ef1b302810e56dffa43c3415826ab756ef6917d76dd8ee63d38fc" + } + +With the pre-built pipeline description file, we can then feed the NAB data (twitter_IBM) and specify the desired evaluation metric with the path of pipeline description file with +`run_pipeline.py `__. +:: + python examples/run_pipeline.py --pipeline_path example_pipeline.json --table_path datasets/NAB/realTweets/labeled_Twitter_volume_IBM.csv --metric F1_MACRO --target_index 2 .. code:: python - - import tensorflow as tf - import os - - import rlcard - from rlcard.agents import DQNAgent - from rlcard.utils import set_global_seed, tournament - from rlcard.utils import Logger - - # Make environment - env = rlcard.make('blackjack', config={'seed': 0}) - eval_env = rlcard.make('blackjack', config={'seed': 0}) - - # Set the iterations numbers and how frequently we evaluate/save plot - evaluate_every = 100 - evaluate_num = 10000 - episode_num = 100000 - - # The intial memory size - memory_init_size = 100 - - # Train the agent every X steps - train_every = 1 - - # The paths for saving the logs and learning curves - log_dir = './experiments/blackjack_dqn_result/' - - # Set a global seed - set_global_seed(0) - - with tf.Session() as sess: - - # Initialize a global step - global_step = tf.Variable(0, name='global_step', trainable=False) - - # Set up the agents - agent = DQNAgent(sess, - scope='dqn', - action_num=env.action_num, - replay_memory_init_size=memory_init_size, - train_every=train_every, - state_shape=env.state_shape, - mlp_layers=[10,10]) - env.set_agents([agent]) - eval_env.set_agents([agent]) - - # Initialize global variables - sess.run(tf.global_variables_initializer()) - - # Init a Logger to plot the learning curve - logger = Logger(log_dir) - - for episode in range(episode_num): - - # Generate data from the environment - trajectories, _ = env.run(is_training=True) - - # Feed transitions into agent memory, and train the agent - for ts in trajectories[0]: - agent.feed(ts) - - # Evaluate the performance. Play with random agents. - if episode % evaluate_every == 0: - logger.log_performance(env.timestep, tournament(eval_env, evaluate_num)[0]) - - # Close files in the logger - logger.close_files() - - # Plot the learning curve - logger.plot('DQN') - - # Save model - save_dir = 'models/blackjack_dqn' - if not os.path.exists(save_dir): - os.makedirs(save_dir) - saver = tf.train.Saver() - saver.save(sess, os.path.join(save_dir, 'model')) - -The expected output is something like below: - -:: - - ---------------------------------------- - timestep | 1 - reward | -0.7342 - ---------------------------------------- - INFO - Agent dqn, step 100, rl-loss: 1.0042707920074463 - INFO - Copied model parameters to target network. - INFO - Agent dqn, step 136, rl-loss: 0.7888197302818298 - ---------------------------------------- - timestep | 136 - reward | -0.1406 - ---------------------------------------- - INFO - Agent dqn, step 278, rl-loss: 0.6946825981140137 - ---------------------------------------- - timestep | 278 - reward | -0.1523 - ---------------------------------------- - INFO - Agent dqn, step 412, rl-loss: 0.62268990278244025 - ---------------------------------------- - timestep | 412 - reward | -0.088 - ---------------------------------------- - INFO - Agent dqn, step 544, rl-loss: 0.69050502777099616 - ---------------------------------------- - timestep | 544 - reward | -0.08 - ---------------------------------------- - INFO - Agent dqn, step 681, rl-loss: 0.61789089441299444 - ---------------------------------------- - timestep | 681 - reward | -0.0793 - ---------------------------------------- - -In Blackjack, the player will get a payoff at the end of the game: 1 if -the player wins, -1 if the player loses, and 0 if it is a tie. The -performance is measured by the average payoff the player obtains by -playing 10000 episodes. The above example shows that the agent achieves -better and better performance during training. The logs and learning -curves are saved in ``./experiments/blackjack_dqn_result/``. - -Running Multiple Processes --------------------------- - -The environments can be run with multiple processes to accelerate the -training. Below is an example to train DQN on Blackjack with multiple -processes. - -.. code:: python - - ''' An example of learning a Deep-Q Agent on Blackjack with multiple processes - Note that we must use if __name__ == '__main__' for multiprocessing - ''' - - import tensorflow as tf - import os - - import rlcard - from rlcard.agents import DQNAgent - from rlcard.utils import set_global_seed, tournament - from rlcard.utils import Logger - - def main(): - # Make environment - env = rlcard.make('blackjack', config={'seed': 0, 'env_num': 4}) - eval_env = rlcard.make('blackjack', config={'seed': 0, 'env_num': 4}) - - # Set the iterations numbers and how frequently we evaluate performance - evaluate_every = 100 - evaluate_num = 10000 - iteration_num = 100000 - - # The intial memory size - memory_init_size = 100 - - # Train the agent every X steps - train_every = 1 - - # The paths for saving the logs and learning curves - log_dir = './experiments/blackjack_dqn_result/' - - # Set a global seed - set_global_seed(0) - - with tf.Session() as sess: - - # Initialize a global step - global_step = tf.Variable(0, name='global_step', trainable=False) - - # Set up the agents - agent = DQNAgent(sess, - scope='dqn', - action_num=env.action_num, - replay_memory_init_size=memory_init_size, - train_every=train_every, - state_shape=env.state_shape, - mlp_layers=[10,10]) - env.set_agents([agent]) - eval_env.set_agents([agent]) - - # Initialize global variables - sess.run(tf.global_variables_initializer()) - - # Initialize a Logger to plot the learning curve - logger = Logger(log_dir) - - for iteration in range(iteration_num): - - # Generate data from the environment - trajectories, _ = env.run(is_training=True) - - # Feed transitions into agent memory, and train the agent - for ts in trajectories[0]: - agent.feed(ts) - - # Evaluate the performance. Play with random agents. - if iteration % evaluate_every == 0: - logger.log_performance(env.timestep, tournament(eval_env, evaluate_num)[0]) - - # Close files in the logger - logger.close_files() - - # Plot the learning curve - logger.plot('DQN') - - # Save model - save_dir = 'models/blackjack_dqn' - if not os.path.exists(save_dir): - os.makedirs(save_dir) - saver = tf.train.Saver() - saver.save(sess, os.path.join(save_dir, 'model')) - - if __name__ == '__main__': - main() - -Example output is as follow: - -:: - - ---------------------------------------- - timestep | 17 - reward | -0.7378 - ---------------------------------------- - - INFO - Copied model parameters to target network. - INFO - Agent dqn, step 1100, rl-loss: 0.40940183401107797 - INFO - Copied model parameters to target network. - INFO - Agent dqn, step 2100, rl-loss: 0.44971221685409546 - INFO - Copied model parameters to target network. - INFO - Agent dqn, step 2225, rl-loss: 0.65466868877410897 - ---------------------------------------- - timestep | 2225 - reward | -0.0658 - ---------------------------------------- - INFO - Agent dqn, step 3100, rl-loss: 0.48663979768753053 - INFO - Copied model parameters to target network. - INFO - Agent dqn, step 4100, rl-loss: 0.71293979883193974 - INFO - Copied model parameters to target network. - INFO - Agent dqn, step 4440, rl-loss: 0.55871248245239263 - ---------------------------------------- - timestep | 4440 - reward | -0.0736 - ---------------------------------------- - -Training CFR on Leduc Hold’em ------------------------------ - -To show how we can use ``step`` and ``step_back`` to traverse the game -tree, we provide an example of solving Leduc Hold’em with CFR: - -.. code:: python - - import numpy as np - - import rlcard - from rlcard.agents import CFRAgent - from rlcard import models - from rlcard.utils import set_global_seed, tournament - from rlcard.utils import Logger - - # Make environment and enable human mode - env = rlcard.make('leduc-holdem', config={'seed': 0, 'allow_step_back':True}) - eval_env = rlcard.make('leduc-holdem', config={'seed': 0}) - - # Set the iterations numbers and how frequently we evaluate/save plot - evaluate_every = 100 - save_plot_every = 1000 - evaluate_num = 10000 - episode_num = 10000 - - # The paths for saving the logs and learning curves - log_dir = './experiments/leduc_holdem_cfr_result/' - - # Set a global seed - set_global_seed(0) - - # Initilize CFR Agent - agent = CFRAgent(env) - agent.load() # If we have saved model, we first load the model - - # Evaluate CFR against pre-trained NFSP - eval_env.set_agents([agent, models.load('leduc-holdem-nfsp').agents[0]]) - - # Init a Logger to plot the learning curve - logger = Logger(log_dir) - - for episode in range(episode_num): - agent.train() - print('\rIteration {}'.format(episode), end='') - # Evaluate the performance. Play with NFSP agents. - if episode % evaluate_every == 0: - agent.save() # Save model - logger.log_performance(env.timestep, tournament(eval_env, evaluate_num)[0]) - - # Close files in the logger - logger.close_files() - - # Plot the learning curve - logger.plot('CFR') - -In the above example, the performance is measured by playing against a -pre-trained NFSP model. The expected output is as below: - -:: - - Iteration 0 - ---------------------------------------- - timestep | 192 - reward | -1.3662 - ---------------------------------------- - Iteration 100 - ---------------------------------------- - timestep | 19392 - reward | 0.9462 - ---------------------------------------- - Iteration 200 - ---------------------------------------- - timestep | 38592 - reward | 0.8591 - ---------------------------------------- - Iteration 300 - ---------------------------------------- - timestep | 57792 - reward | 0.7861 - ---------------------------------------- - Iteration 400 - ---------------------------------------- - timestep | 76992 - reward | 0.7752 - ---------------------------------------- - Iteration 500 - ---------------------------------------- - timestep | 96192 - reward | 0.7215 - ---------------------------------------- - -We observe that CFR achieves better performance as NFSP. However, CFR -requires traversal of the game tree, which is infeasible in large -environments. - -Having Fun with Pretrained Leduc Model --------------------------------------- - -We have designed simple human interfaces to play against the pretrained -model. Leduc Hold’em is a simplified version of Texas Hold’em. Rules can -be found `here `__. Example of playing against -Leduc Hold’em CFR model is as below: - -.. code:: python - - import rlcard - from rlcard import models - from rlcard.agents import LeducholdemHumanAgent as HumanAgent - from rlcard.utils import print_card - - # Make environment - # Set 'record_action' to True because we need it to print results - env = rlcard.make('leduc-holdem', config={'record_action': True}) - human_agent = HumanAgent(env.action_num) - cfr_agent = models.load('leduc-holdem-cfr').agents[0] - env.set_agents([human_agent, cfr_agent]) - - print(">> Leduc Hold'em pre-trained model") - - while (True): - print(">> Start a new game") - - trajectories, payoffs = env.run(is_training=False) - # If the human does not take the final action, we need to - # print other players action - final_state = trajectories[0][-1][-2] - action_record = final_state['action_record'] - state = final_state['raw_obs'] - _action_list = [] - for i in range(1, len(action_record)+1): - if action_record[-i][0] == state['current_player']: - break - _action_list.insert(0, action_record[-i]) - for pair in _action_list: - print('>> Player', pair[0], 'chooses', pair[1]) - - # Let's take a look at what the agent card is - print('=============== CFR Agent ===============') - print_card(env.get_perfect_information()['hand_cards'][1]) - - print('=============== Result ===============') - if payoffs[0] > 0: - print('You win {} chips!'.format(payoffs[0])) - elif payoffs[0] == 0: - print('It is a tie.') - else: - print('You lose {} chips!'.format(-payoffs[0])) - print('') - - input("Press any key to continue...") - -Example output is as follow: - -:: - - >> Leduc Hold'em pre-trained model - - >> Start a new game! - >> Agent 1 chooses raise - - =============== Community Card =============== - ┌─────────┐ - │░░░░░░░░░│ - │░░░░░░░░░│ - │░░░░░░░░░│ - │░░░░░░░░░│ - │░░░░░░░░░│ - │░░░░░░░░░│ - │░░░░░░░░░│ - └─────────┘ - =============== Your Hand =============== - ┌─────────┐ - │J │ - │ │ - │ │ - │ ♥ │ - │ │ - │ │ - │ J│ - └─────────┘ - =============== Chips =============== - Yours: + - Agent 1: +++ - =========== Actions You Can Choose =========== - 0: call, 1: raise, 2: fold - - >> You choose action (integer): - -We also provide a running demo of a rule-based agent for UNO. Try it by -running ``examples/uno_human.py``. - -Leduc Hold’em as Single-Agent Environment ------------------------------------------ - -We have wrraped the environment as single agent environment by assuming -that other players play with pre-trained models. The interfaces are -exactly the same to OpenAI Gym. Thus, any single-agent algorithm can be -connected to the environment. An example of Leduc Hold’em is as below: - -.. code:: python - - import tensorflow as tf - import os - import numpy as np - - import rlcard - from rlcard.agents import DQNAgent - from rlcard.agents import RandomAgent - from rlcard.utils import set_global_seed, tournament - from rlcard.utils import Logger - - # Make environment - env = rlcard.make('leduc-holdem', config={'seed': 0, 'single_agent_mode':True}) - eval_env = rlcard.make('leduc-holdem', config={'seed': 0, 'single_agent_mode':True}) - - # Set the iterations numbers and how frequently we evaluate/save plot - evaluate_every = 1000 - evaluate_num = 10000 - timesteps = 100000 - - # The intial memory size - memory_init_size = 1000 - - # Train the agent every X steps - train_every = 1 - - # The paths for saving the logs and learning curves - log_dir = './experiments/leduc_holdem_single_dqn_result/' - - # Set a global seed - set_global_seed(0) - - with tf.Session() as sess: - - # Initialize a global step - global_step = tf.Variable(0, name='global_step', trainable=False) - - # Set up the agents - agent = DQNAgent(sess, - scope='dqn', - action_num=env.action_num, - replay_memory_init_size=memory_init_size, - train_every=train_every, - state_shape=env.state_shape, - mlp_layers=[128,128]) - # Initialize global variables - sess.run(tf.global_variables_initializer()) - - # Init a Logger to plot the learning curve - logger = Logger(log_dir) - - state = env.reset() - - for timestep in range(timesteps): - action = agent.step(state) - next_state, reward, done = env.step(action) - ts = (state, action, reward, next_state, done) - agent.feed(ts) - - if timestep % evaluate_every == 0: - rewards = [] - state = eval_env.reset() - for _ in range(evaluate_num): - action, _ = agent.eval_step(state) - _, reward, done = env.step(action) - if done: - rewards.append(reward) - logger.log_performance(env.timestep, np.mean(rewards)) - - # Close files in the logger - logger.close_files() - - # Plot the learning curve - logger.plot('DQN') - - # Save model - save_dir = 'models/leduc_holdem_single_dqn' - if not os.path.exists(save_dir): - os.makedirs(save_dir) - saver = tf.train.Saver() - saver.save(sess, os.path.join(save_dir, 'model')) diff --git a/examples/build_IsolationForest_pipline.py b/examples/build_IsolationForest_pipline.py index febd5b7..8ff1d38 100644 --- a/examples/build_IsolationForest_pipline.py +++ b/examples/build_IsolationForest_pipline.py @@ -2,10 +2,6 @@ from d3m import index from d3m.metadata.base import ArgumentType from d3m.metadata.pipeline import Pipeline, PrimitiveStep from d3m.metadata import hyperparams -import copy - -# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest -# extract_columns_by_semantic_types(targets) -> ^ # Creating pipeline pipeline_description = Pipeline() @@ -43,7 +39,7 @@ pipeline_description.add_step(step_3) attributes = 'steps.2.produce' targets = 'steps.3.produce' -# Step 4: test primitive +# Step 4: Power transformation primitive_4 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.power_transformer') step_4 = PrimitiveStep(primitive=primitive_4) step_4.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='new') @@ -51,7 +47,7 @@ step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_re step_4.add_output('produce') pipeline_description.add_step(step_4) -# Step 4: test primitive +# Step 5: Axiswise scaling primitive_5 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.axiswise_scaler') step_5 = PrimitiveStep(primitive=primitive_5) step_5.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='new') @@ -59,7 +55,7 @@ step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_re step_5.add_output('produce') pipeline_description.add_step(step_5) -# Step 4: test primitive +# Step 6: Standarization primitive_6 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler') step_6 = PrimitiveStep(primitive=primitive_6) step_6.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='new') @@ -67,7 +63,7 @@ step_6.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_re step_6.add_output('produce') pipeline_description.add_step(step_6) -# Step 4: test primitive +# Step 7: Quantile transformation primitive_7 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.quantile_transformer') step_7 = PrimitiveStep(primitive=primitive_7) step_7.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='new') @@ -75,7 +71,7 @@ step_7.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_re step_7.add_output('produce') pipeline_description.add_step(step_7) -# Step 4: test primitive +# Step 4: Isolation Forest primitive_8 = index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_iforest') step_8 = PrimitiveStep(primitive=primitive_8) step_8.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1)