From dcf8c251a1e8d867c12ae590025883840b3e1114 Mon Sep 17 00:00:00 2001 From: lhenry15 Date: Tue, 15 Sep 2020 02:48:56 -0500 Subject: [PATCH] modified example, add getting started Former-commit-id: ea69a1bc96c4bc1d11754f4d55fe0a3fdb112664 [formerly 0bbc92b3806402c00e2cddbf2b56e085e6787000] [formerly 6fec3b958a9d7ad8ff87db273fb3f715a9e94f56 [formerly 24fa1146a270fd94ee6882ceee58dfee483259bc]] [formerly fabc1ffa708b6c83c4727a091c881fc953fae238 [formerly e7bee20e47301bc8e96f074695058cabe1634af7] [formerly fd72ce8a447ebd328dbe8cc300c295a84cece0ed [formerly 816b1e695ae963362e75ca05721be73048c65118]]] [formerly c75ce46759bde1d962b4a751f4cbbd43d61665fc [formerly 2d0a4ed022f2f641e466cb1704a7d45b266c26a5] [formerly 009b7fc3f616a7bab7befca8d5d065eaa67a5d2b [formerly a230f33a5c0db9a3d8e04d1b7c9d2360c6b5a4be]] [formerly cd677898cf31ecb8ab5d6bc5f7389cb814279166 [formerly ba7c0639945e841b037fdb606bd443cd4b6b461c] [formerly 48a0d0b43335fe9902e45c38c1dc7ea0601cc168 [formerly bd1760d081ceef8341cee2eb853c08453fe54ab3]]]] [formerly c9e3aa4d8c92e1b534044240af7cb73fabf9a4cd [formerly d29669eabe979772f930b6394509352ca8c8af16] [formerly fdd6cc866421d1072ffdeb0848e400518aa95d7a [formerly 01ba7dbd717dde7b15af74ae5bbc373e72af3fce]] [formerly 156641773f01106235478e2d67a96c217850b3b7 [formerly 16de417ec4f4a1344b2e0f200faa1bf7f25304a5] [formerly 06e615839a52f05b0c92d9981a575d34eaff542e [formerly da642f0b1ad30d7e09ff8c5ff30c2c9f967cc950]]] [formerly f4887932dfe559c352673b99cbc194c773dae226 [formerly e227471268e1738a51e3c9695a870b5a28242b88] [formerly 11db74cbe45796eb904b28da23e07281ea4274a3 [formerly a1b33ff3e4a3240b0d82dc6b832dcc3a6d767a45]] [formerly e9bc6d8f869956fbf338b5647d985715046924fb [formerly e4bd193a0bb9b4d42f8761a406469ccce8623ee3] [formerly 2f91a13d4040716445f26759f38fb8495c8b8161 [formerly c57d97987d261aaebcce5a603bac17fc51b27eb7]]]]] [formerly 266484d9041089b149e543fd77fa855d96545fbe [formerly d8f9693a8182bf85427d5c09cf1f9dd5314cc46d] [formerly 2db38c3848cae819dde5d81575e67d4ce2cdcff3 [formerly dc0e35babe6cc4472f6086a1accaf7df376ef45c]] [formerly 0686605d81ab83ba038549f080533465c5aa4709 [formerly ee0256c7c3ca7467441980c282b8a41afebea980] [formerly 9400722e80097d474d1c470c271b992f49d24591 [formerly 45a83d92179537bd3cfce86d5ff96867c3531989]]] [formerly ec5969ddb67933419088e643c803f7efefced498 [formerly 188aed2daee251f9f0123c8f8907a3d5bde7e703] [formerly be4ea11d5f36d542d58f0ced5a8b2ff46372ad9a [formerly c1e85cc5eedc018b16d4b3cb9e128b3a703183b6]] [formerly d20f077eeffdd62d3c2c2a9ec96cfac0bd461f55 [formerly 7c86bb20f63b63c5daea6f716e156cf196c7ec6c] [formerly b862830b7a33f7403252d71bcc89db9d44279258 [formerly 243622085706414cd17264afd3b813bc777fd74d]]]] [formerly 3cbf0e1a3fb80ec9b040de8332f8045c4393ad44 [formerly 12b9aa357a2b5ee72735e857e4f13a322979fd1e] [formerly c3ebb3464f31697fef2c171366edc70d1d7a8358 [formerly 06f717bda5a7fd29c9936901e20cfc06f4fc1ebe]] [formerly 0b150c1dfea1d05841b9128b0e9ae295aa94c47e [formerly 198f1f71b82816fa3bca296c41beb91f661bf44b] [formerly a2603dc8d4e6dfffe23d75b383de5e7f27d083b2 [formerly 2e2f40ea0c7d14fda4bc586dca68dd908bb31635]]] [formerly fe223859b7bf94943b4659a6ed0743fc7ec03bb7 [formerly 45bf8682f32ded35179ad6e0ae20ab669626722e] [formerly e5791f490f4fd163b350fe8f927200d5da2082fc [formerly d89eba872cc193be62a0c93e5f4c3b37177a2799]] [formerly 140cc57fc83403a3eb2561ba949ef462da82820c [formerly 572d47479c7fb5524e1a823650dd30cd58e9ae29] [formerly 70641d7d980b4fd2b266657b08507753fd4695f9 [formerly 4ce32d5fe716cc10f6934ae12719f2336a443530]]]]]] Former-commit-id: ab69d61e3704f5e21b6f2c8695e044e80794d5bb [formerly a5fef26c661f4042cfdea644946180652d0874d5] [formerly bf3c88eed9e90f4f1b4466194d4160393698b05e [formerly dc82428f48ccb52a0cfa70d1c6ffc27915344537]] [formerly 1180589fa82cf99be81396bfeb77edd41e431fa7 [formerly eee9c4ca0d31ea1d9e6028a406afe3880f63a0ec] [formerly ab8f155b84e0a61ba42830230a6abb811db66693 [formerly b8d6eda0ec09aafd68427e1a1976162f6d47f584]]] [formerly d39441fd4d241cbe5e167a53bb155ac013485074 [formerly 585016d351fb2137743b55d21cee53489e551335] [formerly b7dac94b5dbcee3eb1cd75d36faaf3f87b925718 [formerly c0cf57d475b8db5eb8eb9a00bb271f4671e1194c]] [formerly fc9efd0e06be0ff81b2e86616cd5fdc2918126a6 [formerly 07862c8885de704da52c94e438b2bd395cd66141] [formerly a635cde5bc5fd3b987c166966c4756f18434ba47 [formerly 9822f2c449ae0c34809bafa25629a6cee7ca63b1]]]] [formerly ce9431afaae0203cb0980a1cdd0c6b37bc7895cd [formerly c1e9631dcb3c8848d6a3194318c82870981c74b6] [formerly 2b1249795ea5be36d1816c9c9010909a1e1b94b9 [formerly 09a3885ff3a7d0ffee5d3f0ce80c326789b48ef8]] [formerly 32621a421ca65cae877518ce1dbf98759c2be2fe [formerly edda4ec2ca7d122515478291a543163bc48a35c8] [formerly 03f947b757db4ed4907240dc51aadba55a28b1ca [formerly e0b9e15ea01d3d27143be579e18f1b11eeab9bdd]]] [formerly 1c4acd26ba3958c32094b18ad30b9045dd217132 [formerly bd08536a20894affd3d3dbbe5b4b84704956f0df] [formerly 89eccd518fab8f167428d7f5906c4d0ca0a63316 [formerly 2af8583e821f2db5d4642f7d63c5da6ca29bac70]] [formerly 1fc7a6794fb96d277ddd57314a8f091d748ce977 [formerly 56da7a76412e919e24f913a4fae117fecb399572] [formerly 70641d7d980b4fd2b266657b08507753fd4695f9]]]] Former-commit-id: 01e7f55d393dfe15d6269c9f2d61af2e35ad6900 [formerly 601208ce251d25317b6e1351d5f632f52b538c05] [formerly b2a099c8d49e03252094f2f72f10d08c787198e8 [formerly b5973d6c4792eb2e0b7a24b53d572185c5a67e47]] [formerly 3744ae96a4e38bff3aa4a262c41d6931d832e563 [formerly e67267a7dfeb09a975d84a589b4c3039f9656ff0] [formerly e7f169d779eba53f180514e0ef71f1407e0c9f2e [formerly 3b2e00251695ae6861460d925f49686acee91387]]] [formerly 9d13d9c6282cbf655b4d76d83eae723074047e7f [formerly 348b0c1e02daf90b058fced3375e5412581841b5] [formerly 68ff9ed66743c902a8e2ec2436f533d243346b72 [formerly 5f648fa89e00c33a8f3887020a0dcc40ba8edb5c]] [formerly 29db7ffb7d76ebac1009633de0bdb67441820ad5 [formerly 9eea9e41d60c0f86280797a3ba07f6256168aa5b] [formerly 74860b7b8d19dc08c65d2a0b43453911c242c8c8 [formerly 33496d3843f260ded740d389e9950fb7a53e3f08]]]] Former-commit-id: 6fe6466eacc6cc4b8a5d27a79228ea3d6c6c6adc [formerly f1234b1ff239b13d8218253631e875968b981727] [formerly c00346d36d08dcaeab993cc043fb3696209d07c9 [formerly b3e41ed094a900c0d98e75ce38028ba52cd667f5]] [formerly 5549300b7b5010414dc025ec56a806533fc99c92 [formerly 076b278a50bee97266f6e422f57178f47744ff9d] [formerly 886bb4744b1621c68b43c0d2cecabf31f6f83c3d [formerly 21afcb3b51c5d76b33e9a135d1b49a3da84fee52]]] Former-commit-id: fdf5c5726ab955ccf05b7a0e949f972b613c7203 [formerly 1ada82cc947ac9eb2c5ba7d96a791417a96a4e08] [formerly c799fb78d89f3ff08883bfacbda65592b7cb0151 [formerly bc3f83871c54844dcd8d6fa6b56ac0e191d73ebd]] Former-commit-id: c6f122e5abcfd70dbfe28c9fab34993da2383dee [formerly 8b43b355209ef2e6e034a688e2e3f1c7d8cad07b] Former-commit-id: 9f30948f742094e12021e3921c3f44a38fa655c2 --- docs/source/getting_started.rst | 677 +++++------------------------- examples/build_IsolationForest_pipline.py | 14 +- 2 files changed, 101 insertions(+), 590 deletions(-) diff --git a/docs/source/getting_started.rst b/docs/source/getting_started.rst index 4c46f7d..6ff156a 100644 --- a/docs/source/getting_started.rst +++ b/docs/source/getting_started.rst @@ -3,593 +3,108 @@ Getting Started In this document, we provide some toy examples for getting started. All the examples in this document and even more examples are available in -`examples/ `__. +`examples `__. -Constructing Point-wise Detection on NAB Dataset +Outlier Detection with Autoencoder on NAB Dataset ------------------------------------------------ - -We have set up a random agent that can play randomly on each -environment. An example of applying a random agent on Blackjack is as -follow: +To perform the point-wise outlier detection on NAB dataset. We provide an example to construct +such pipeline description: .. code:: python - import rlcard - from rlcard.agents import RandomAgent - from rlcard.utils import set_global_seed - - # Make environment - env = rlcard.make('blackjack', config={'seed': 0}) - episode_num = 2 - - # Set a global seed - set_global_seed(0) - - # Set up agents - agent_0 = RandomAgent(action_num=env.action_num) - env.set_agents([agent_0]) - - for episode in range(episode_num): - - # Generate data from the environment - trajectories, _ = env.run(is_training=False) - - # Print out the trajectories - print('\nEpisode {}'.format(episode)) - for ts in trajectories[0]: - print('State: {}, Action: {}, Reward: {}, Next State: {}, Done: {}'.format(ts[0], ts[1], ts[2], ts[3], ts[4])) - -The expected output should look like something as follows: - + from d3m import index + from d3m.metadata.base import ArgumentType + from d3m.metadata.pipeline import Pipeline, PrimitiveStep + + # Creating pipeline + pipeline_description = Pipeline() + pipeline_description.add_input(name='inputs') + + # Step 0: dataset_to_dataframe + step_0 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.dataset_to_dataframe.Common')) + step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') + step_0.add_output('produce') + pipeline_description.add_step(step_0) + + # Step 1: column_parser + step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.column_parser.Common')) + step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') + step_1.add_output('produce') + pipeline_description.add_step(step_1) + + # Step 2: extract_columns_by_semantic_types(attributes) + step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common')) + step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') + step_2.add_output('produce') + step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, + data=['https://metadata.datadrivendiscovery.org/types/Attribute']) + pipeline_description.add_step(step_2) + + # Step 3: extract_columns_by_semantic_types(targets) + step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common')) + step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') + step_3.add_output('produce') + step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, + data=['https://metadata.datadrivendiscovery.org/types/TrueTarget']) + pipeline_description.add_step(step_3) + + attributes = 'steps.2.produce' + targets = 'steps.3.produce' + + # Step 4: processing + step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.axiswise_scaler')) + step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes) + step_4.add_output('produce') + pipeline_description.add_step(step_4) + + # Step 5: algorithm + step_5 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_ae')) + step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.4.produce') + step_5.add_output('produce') + pipeline_description.add_step(step_5) + + # Step 6: Predictions + step_6 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.construct_predictions.Common')) + step_6.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.5.produce') + step_6.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') + step_6.add_output('produce') + pipeline_description.add_step(step_6) + + # Final Output + pipeline_description.add_output(name='output predictions', data_reference='steps.6.produce') + + # Output to json + data = pipeline_description.to_json() + with open('example_pipeline.json', 'w') as f: + f.write(data) + print(data) + +Note that, in order to call each primitive during pipeline construction, one may find the index (python_path) of primitives available in +`entry_points.ini `__. + +The output description json file (example_pipeline.json) should look like something as follows: :: - Episode 0 - State: {'obs': array([20, 3]), 'legal_actions': [0, 1]}, Action: 0, Reward: 0, Next State: {'obs': array([15, 3]), 'legal_actions': [0, 1]}, Done: False - State: {'obs': array([15, 3]), 'legal_actions': [0, 1]}, Action: 1, Reward: -1, Next State: {'obs': array([15, 20]), 'legal_actions': [0, 1]}, Done: True - - Episode 1 - State: {'obs': array([15, 5]), 'legal_actions': [0, 1]}, Action: 1, Reward: 1, Next State: {'obs': array([15, 23]), 'legal_actions': [0, 1]}, Done: True - -Note that the states and actions are wrapped by ``env`` in Blackjack. In -this example, the ``[20, 3]`` suggests the current player obtains score -20 while the card that faces up in the dealer’s hand has score 3. Action -0 means “hit” while action 1 means “stand”. Reward 1 suggests the player -wins while reward -1 suggests the dealer wins. Reward 0 suggests a tie. -The above data can be directly fed into a RL algorithm for training. - -Deep-Q Learning on Blackjack ----------------------------- - -The second example is to use Deep-Q learning to train an agent on -Blackjack. We aim to use this example to show how reinforcement learning -algorithms can be developed and applied in our toolkit. We design a -``run`` function which plays one complete game and provides the data for -training RL agents. The example is shown below: + { + "id": "e39bf406-06cf-4c76-88f0-8c8b4447e311", + "schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json", + "created": "2020-09-15T07:26:48.365447Z", + "inputs": [{"name": "inputs"}], + "outputs": [{"data": "steps.6.produce", "name": "output predictions"}], + "steps": [ + {"type": "PRIMITIVE", "primitive": {"id": "4b42ce1e-9b98-4a25-b68e-fad13311eb65", "version": "0.3.0", "python_path": "d3m.primitives.data_transformation.dataset_to_dataframe.Common", "name": "Extract a DataFrame from a Dataset", "digest": "a7f5a8f8b276f474c3b40b025d157541de898e4e02555cd8ef76fdeecfbed256"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "inputs.0"}}, "outputs": [{"id": "produce"}]}, + {"type": "PRIMITIVE", "primitive": {"id": "d510cb7a-1782-4f51-b44c-58f0236e47c7", "version": "0.6.0", "python_path": "d3m.primitives.data_transformation.column_parser.Common", "name": "Parses strings into their types", "digest": "eccfd70ed359901a625dbde6de40d6bbb4e69d9796ee0ca3a302fd95195451ed"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.0.produce"}}, "outputs": [{"id": "produce"}]}, + {"type": "PRIMITIVE", "primitive": {"id": "4503a4c6-42f7-45a1-a1d4-ed69699cf5e1", "version": "0.4.0", "python_path": "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common", "name": "Extracts columns by semantic type", "digest": "9f0303c354df6cec4df7bda0ebb46fb4f101c36ad9a4d1143b9b9c88004629aa"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.1.produce"}}, "outputs": [{"id": "produce"}], "hyperparams": {"semantic_types": {"type": "VALUE", "data": ["https://metadata.datadrivendiscovery.org/types/Attribute"]}}}, + {"type": "PRIMITIVE", "primitive": {"id": "4503a4c6-42f7-45a1-a1d4-ed69699cf5e1", "version": "0.4.0", "python_path": "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common", "name": "Extracts columns by semantic type", "digest": "9f0303c354df6cec4df7bda0ebb46fb4f101c36ad9a4d1143b9b9c88004629aa"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.0.produce"}}, "outputs": [{"id": "produce"}], "hyperparams": {"semantic_types": {"type": "VALUE", "data": ["https://metadata.datadrivendiscovery.org/types/TrueTarget"]}}}, + {"type": "PRIMITIVE", "primitive": {"id": "642de2e7-5590-3cab-9266-2a53c326c461", "version": "0.0.1", "python_path": "d3m.primitives.tods.timeseries_processing.transformation.axiswise_scaler", "name": "Axis_wise_scale"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.2.produce"}}, "outputs": [{"id": "produce"}]}, + {"type": "PRIMITIVE", "primitive": {"id": "67e7fcdf-d645-3417-9aa4-85cd369487d9", "version": "0.0.1", "python_path": "d3m.primitives.tods.detection_algorithm.pyod_ae", "name": "TODS.anomaly_detection_primitives.AutoEncoder"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.4.produce"}}, "outputs": [{"id": "produce"}]}, + {"type": "PRIMITIVE", "primitive": {"id": "8d38b340-f83f-4877-baaa-162f8e551736", "version": "0.3.0", "python_path": "d3m.primitives.data_transformation.construct_predictions.Common", "name": "Construct pipeline predictions output", "digest": "6de56912a3f84bbbcc0d1f7ffe646044209120e45bbb21a137236d00fed948e9"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.5.produce"}, "reference": {"type": "CONTAINER", "data": "steps.1.produce"}}, "outputs": [{"id": "produce"}]}], + "digest": "8c6a37e7ac9ef1b302810e56dffa43c3415826ab756ef6917d76dd8ee63d38fc" + } + +With the pre-built pipeline description file, we can then feed the NAB data (twitter_IBM) and specify the desired evaluation metric with the path of pipeline description file with +`run_pipeline.py `__. +:: + python examples/run_pipeline.py --pipeline_path example_pipeline.json --table_path datasets/NAB/realTweets/labeled_Twitter_volume_IBM.csv --metric F1_MACRO --target_index 2 .. code:: python - - import tensorflow as tf - import os - - import rlcard - from rlcard.agents import DQNAgent - from rlcard.utils import set_global_seed, tournament - from rlcard.utils import Logger - - # Make environment - env = rlcard.make('blackjack', config={'seed': 0}) - eval_env = rlcard.make('blackjack', config={'seed': 0}) - - # Set the iterations numbers and how frequently we evaluate/save plot - evaluate_every = 100 - evaluate_num = 10000 - episode_num = 100000 - - # The intial memory size - memory_init_size = 100 - - # Train the agent every X steps - train_every = 1 - - # The paths for saving the logs and learning curves - log_dir = './experiments/blackjack_dqn_result/' - - # Set a global seed - set_global_seed(0) - - with tf.Session() as sess: - - # Initialize a global step - global_step = tf.Variable(0, name='global_step', trainable=False) - - # Set up the agents - agent = DQNAgent(sess, - scope='dqn', - action_num=env.action_num, - replay_memory_init_size=memory_init_size, - train_every=train_every, - state_shape=env.state_shape, - mlp_layers=[10,10]) - env.set_agents([agent]) - eval_env.set_agents([agent]) - - # Initialize global variables - sess.run(tf.global_variables_initializer()) - - # Init a Logger to plot the learning curve - logger = Logger(log_dir) - - for episode in range(episode_num): - - # Generate data from the environment - trajectories, _ = env.run(is_training=True) - - # Feed transitions into agent memory, and train the agent - for ts in trajectories[0]: - agent.feed(ts) - - # Evaluate the performance. Play with random agents. - if episode % evaluate_every == 0: - logger.log_performance(env.timestep, tournament(eval_env, evaluate_num)[0]) - - # Close files in the logger - logger.close_files() - - # Plot the learning curve - logger.plot('DQN') - - # Save model - save_dir = 'models/blackjack_dqn' - if not os.path.exists(save_dir): - os.makedirs(save_dir) - saver = tf.train.Saver() - saver.save(sess, os.path.join(save_dir, 'model')) - -The expected output is something like below: - -:: - - ---------------------------------------- - timestep | 1 - reward | -0.7342 - ---------------------------------------- - INFO - Agent dqn, step 100, rl-loss: 1.0042707920074463 - INFO - Copied model parameters to target network. - INFO - Agent dqn, step 136, rl-loss: 0.7888197302818298 - ---------------------------------------- - timestep | 136 - reward | -0.1406 - ---------------------------------------- - INFO - Agent dqn, step 278, rl-loss: 0.6946825981140137 - ---------------------------------------- - timestep | 278 - reward | -0.1523 - ---------------------------------------- - INFO - Agent dqn, step 412, rl-loss: 0.62268990278244025 - ---------------------------------------- - timestep | 412 - reward | -0.088 - ---------------------------------------- - INFO - Agent dqn, step 544, rl-loss: 0.69050502777099616 - ---------------------------------------- - timestep | 544 - reward | -0.08 - ---------------------------------------- - INFO - Agent dqn, step 681, rl-loss: 0.61789089441299444 - ---------------------------------------- - timestep | 681 - reward | -0.0793 - ---------------------------------------- - -In Blackjack, the player will get a payoff at the end of the game: 1 if -the player wins, -1 if the player loses, and 0 if it is a tie. The -performance is measured by the average payoff the player obtains by -playing 10000 episodes. The above example shows that the agent achieves -better and better performance during training. The logs and learning -curves are saved in ``./experiments/blackjack_dqn_result/``. - -Running Multiple Processes --------------------------- - -The environments can be run with multiple processes to accelerate the -training. Below is an example to train DQN on Blackjack with multiple -processes. - -.. code:: python - - ''' An example of learning a Deep-Q Agent on Blackjack with multiple processes - Note that we must use if __name__ == '__main__' for multiprocessing - ''' - - import tensorflow as tf - import os - - import rlcard - from rlcard.agents import DQNAgent - from rlcard.utils import set_global_seed, tournament - from rlcard.utils import Logger - - def main(): - # Make environment - env = rlcard.make('blackjack', config={'seed': 0, 'env_num': 4}) - eval_env = rlcard.make('blackjack', config={'seed': 0, 'env_num': 4}) - - # Set the iterations numbers and how frequently we evaluate performance - evaluate_every = 100 - evaluate_num = 10000 - iteration_num = 100000 - - # The intial memory size - memory_init_size = 100 - - # Train the agent every X steps - train_every = 1 - - # The paths for saving the logs and learning curves - log_dir = './experiments/blackjack_dqn_result/' - - # Set a global seed - set_global_seed(0) - - with tf.Session() as sess: - - # Initialize a global step - global_step = tf.Variable(0, name='global_step', trainable=False) - - # Set up the agents - agent = DQNAgent(sess, - scope='dqn', - action_num=env.action_num, - replay_memory_init_size=memory_init_size, - train_every=train_every, - state_shape=env.state_shape, - mlp_layers=[10,10]) - env.set_agents([agent]) - eval_env.set_agents([agent]) - - # Initialize global variables - sess.run(tf.global_variables_initializer()) - - # Initialize a Logger to plot the learning curve - logger = Logger(log_dir) - - for iteration in range(iteration_num): - - # Generate data from the environment - trajectories, _ = env.run(is_training=True) - - # Feed transitions into agent memory, and train the agent - for ts in trajectories[0]: - agent.feed(ts) - - # Evaluate the performance. Play with random agents. - if iteration % evaluate_every == 0: - logger.log_performance(env.timestep, tournament(eval_env, evaluate_num)[0]) - - # Close files in the logger - logger.close_files() - - # Plot the learning curve - logger.plot('DQN') - - # Save model - save_dir = 'models/blackjack_dqn' - if not os.path.exists(save_dir): - os.makedirs(save_dir) - saver = tf.train.Saver() - saver.save(sess, os.path.join(save_dir, 'model')) - - if __name__ == '__main__': - main() - -Example output is as follow: - -:: - - ---------------------------------------- - timestep | 17 - reward | -0.7378 - ---------------------------------------- - - INFO - Copied model parameters to target network. - INFO - Agent dqn, step 1100, rl-loss: 0.40940183401107797 - INFO - Copied model parameters to target network. - INFO - Agent dqn, step 2100, rl-loss: 0.44971221685409546 - INFO - Copied model parameters to target network. - INFO - Agent dqn, step 2225, rl-loss: 0.65466868877410897 - ---------------------------------------- - timestep | 2225 - reward | -0.0658 - ---------------------------------------- - INFO - Agent dqn, step 3100, rl-loss: 0.48663979768753053 - INFO - Copied model parameters to target network. - INFO - Agent dqn, step 4100, rl-loss: 0.71293979883193974 - INFO - Copied model parameters to target network. - INFO - Agent dqn, step 4440, rl-loss: 0.55871248245239263 - ---------------------------------------- - timestep | 4440 - reward | -0.0736 - ---------------------------------------- - -Training CFR on Leduc Hold’em ------------------------------ - -To show how we can use ``step`` and ``step_back`` to traverse the game -tree, we provide an example of solving Leduc Hold’em with CFR: - -.. code:: python - - import numpy as np - - import rlcard - from rlcard.agents import CFRAgent - from rlcard import models - from rlcard.utils import set_global_seed, tournament - from rlcard.utils import Logger - - # Make environment and enable human mode - env = rlcard.make('leduc-holdem', config={'seed': 0, 'allow_step_back':True}) - eval_env = rlcard.make('leduc-holdem', config={'seed': 0}) - - # Set the iterations numbers and how frequently we evaluate/save plot - evaluate_every = 100 - save_plot_every = 1000 - evaluate_num = 10000 - episode_num = 10000 - - # The paths for saving the logs and learning curves - log_dir = './experiments/leduc_holdem_cfr_result/' - - # Set a global seed - set_global_seed(0) - - # Initilize CFR Agent - agent = CFRAgent(env) - agent.load() # If we have saved model, we first load the model - - # Evaluate CFR against pre-trained NFSP - eval_env.set_agents([agent, models.load('leduc-holdem-nfsp').agents[0]]) - - # Init a Logger to plot the learning curve - logger = Logger(log_dir) - - for episode in range(episode_num): - agent.train() - print('\rIteration {}'.format(episode), end='') - # Evaluate the performance. Play with NFSP agents. - if episode % evaluate_every == 0: - agent.save() # Save model - logger.log_performance(env.timestep, tournament(eval_env, evaluate_num)[0]) - - # Close files in the logger - logger.close_files() - - # Plot the learning curve - logger.plot('CFR') - -In the above example, the performance is measured by playing against a -pre-trained NFSP model. The expected output is as below: - -:: - - Iteration 0 - ---------------------------------------- - timestep | 192 - reward | -1.3662 - ---------------------------------------- - Iteration 100 - ---------------------------------------- - timestep | 19392 - reward | 0.9462 - ---------------------------------------- - Iteration 200 - ---------------------------------------- - timestep | 38592 - reward | 0.8591 - ---------------------------------------- - Iteration 300 - ---------------------------------------- - timestep | 57792 - reward | 0.7861 - ---------------------------------------- - Iteration 400 - ---------------------------------------- - timestep | 76992 - reward | 0.7752 - ---------------------------------------- - Iteration 500 - ---------------------------------------- - timestep | 96192 - reward | 0.7215 - ---------------------------------------- - -We observe that CFR achieves better performance as NFSP. However, CFR -requires traversal of the game tree, which is infeasible in large -environments. - -Having Fun with Pretrained Leduc Model --------------------------------------- - -We have designed simple human interfaces to play against the pretrained -model. Leduc Hold’em is a simplified version of Texas Hold’em. Rules can -be found `here `__. Example of playing against -Leduc Hold’em CFR model is as below: - -.. code:: python - - import rlcard - from rlcard import models - from rlcard.agents import LeducholdemHumanAgent as HumanAgent - from rlcard.utils import print_card - - # Make environment - # Set 'record_action' to True because we need it to print results - env = rlcard.make('leduc-holdem', config={'record_action': True}) - human_agent = HumanAgent(env.action_num) - cfr_agent = models.load('leduc-holdem-cfr').agents[0] - env.set_agents([human_agent, cfr_agent]) - - print(">> Leduc Hold'em pre-trained model") - - while (True): - print(">> Start a new game") - - trajectories, payoffs = env.run(is_training=False) - # If the human does not take the final action, we need to - # print other players action - final_state = trajectories[0][-1][-2] - action_record = final_state['action_record'] - state = final_state['raw_obs'] - _action_list = [] - for i in range(1, len(action_record)+1): - if action_record[-i][0] == state['current_player']: - break - _action_list.insert(0, action_record[-i]) - for pair in _action_list: - print('>> Player', pair[0], 'chooses', pair[1]) - - # Let's take a look at what the agent card is - print('=============== CFR Agent ===============') - print_card(env.get_perfect_information()['hand_cards'][1]) - - print('=============== Result ===============') - if payoffs[0] > 0: - print('You win {} chips!'.format(payoffs[0])) - elif payoffs[0] == 0: - print('It is a tie.') - else: - print('You lose {} chips!'.format(-payoffs[0])) - print('') - - input("Press any key to continue...") - -Example output is as follow: - -:: - - >> Leduc Hold'em pre-trained model - - >> Start a new game! - >> Agent 1 chooses raise - - =============== Community Card =============== - ┌─────────┐ - │░░░░░░░░░│ - │░░░░░░░░░│ - │░░░░░░░░░│ - │░░░░░░░░░│ - │░░░░░░░░░│ - │░░░░░░░░░│ - │░░░░░░░░░│ - └─────────┘ - =============== Your Hand =============== - ┌─────────┐ - │J │ - │ │ - │ │ - │ ♥ │ - │ │ - │ │ - │ J│ - └─────────┘ - =============== Chips =============== - Yours: + - Agent 1: +++ - =========== Actions You Can Choose =========== - 0: call, 1: raise, 2: fold - - >> You choose action (integer): - -We also provide a running demo of a rule-based agent for UNO. Try it by -running ``examples/uno_human.py``. - -Leduc Hold’em as Single-Agent Environment ------------------------------------------ - -We have wrraped the environment as single agent environment by assuming -that other players play with pre-trained models. The interfaces are -exactly the same to OpenAI Gym. Thus, any single-agent algorithm can be -connected to the environment. An example of Leduc Hold’em is as below: - -.. code:: python - - import tensorflow as tf - import os - import numpy as np - - import rlcard - from rlcard.agents import DQNAgent - from rlcard.agents import RandomAgent - from rlcard.utils import set_global_seed, tournament - from rlcard.utils import Logger - - # Make environment - env = rlcard.make('leduc-holdem', config={'seed': 0, 'single_agent_mode':True}) - eval_env = rlcard.make('leduc-holdem', config={'seed': 0, 'single_agent_mode':True}) - - # Set the iterations numbers and how frequently we evaluate/save plot - evaluate_every = 1000 - evaluate_num = 10000 - timesteps = 100000 - - # The intial memory size - memory_init_size = 1000 - - # Train the agent every X steps - train_every = 1 - - # The paths for saving the logs and learning curves - log_dir = './experiments/leduc_holdem_single_dqn_result/' - - # Set a global seed - set_global_seed(0) - - with tf.Session() as sess: - - # Initialize a global step - global_step = tf.Variable(0, name='global_step', trainable=False) - - # Set up the agents - agent = DQNAgent(sess, - scope='dqn', - action_num=env.action_num, - replay_memory_init_size=memory_init_size, - train_every=train_every, - state_shape=env.state_shape, - mlp_layers=[128,128]) - # Initialize global variables - sess.run(tf.global_variables_initializer()) - - # Init a Logger to plot the learning curve - logger = Logger(log_dir) - - state = env.reset() - - for timestep in range(timesteps): - action = agent.step(state) - next_state, reward, done = env.step(action) - ts = (state, action, reward, next_state, done) - agent.feed(ts) - - if timestep % evaluate_every == 0: - rewards = [] - state = eval_env.reset() - for _ in range(evaluate_num): - action, _ = agent.eval_step(state) - _, reward, done = env.step(action) - if done: - rewards.append(reward) - logger.log_performance(env.timestep, np.mean(rewards)) - - # Close files in the logger - logger.close_files() - - # Plot the learning curve - logger.plot('DQN') - - # Save model - save_dir = 'models/leduc_holdem_single_dqn' - if not os.path.exists(save_dir): - os.makedirs(save_dir) - saver = tf.train.Saver() - saver.save(sess, os.path.join(save_dir, 'model')) diff --git a/examples/build_IsolationForest_pipline.py b/examples/build_IsolationForest_pipline.py index febd5b7..8ff1d38 100644 --- a/examples/build_IsolationForest_pipline.py +++ b/examples/build_IsolationForest_pipline.py @@ -2,10 +2,6 @@ from d3m import index from d3m.metadata.base import ArgumentType from d3m.metadata.pipeline import Pipeline, PrimitiveStep from d3m.metadata import hyperparams -import copy - -# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest -# extract_columns_by_semantic_types(targets) -> ^ # Creating pipeline pipeline_description = Pipeline() @@ -43,7 +39,7 @@ pipeline_description.add_step(step_3) attributes = 'steps.2.produce' targets = 'steps.3.produce' -# Step 4: test primitive +# Step 4: Power transformation primitive_4 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.power_transformer') step_4 = PrimitiveStep(primitive=primitive_4) step_4.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='new') @@ -51,7 +47,7 @@ step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_re step_4.add_output('produce') pipeline_description.add_step(step_4) -# Step 4: test primitive +# Step 5: Axiswise scaling primitive_5 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.axiswise_scaler') step_5 = PrimitiveStep(primitive=primitive_5) step_5.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='new') @@ -59,7 +55,7 @@ step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_re step_5.add_output('produce') pipeline_description.add_step(step_5) -# Step 4: test primitive +# Step 6: Standarization primitive_6 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler') step_6 = PrimitiveStep(primitive=primitive_6) step_6.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='new') @@ -67,7 +63,7 @@ step_6.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_re step_6.add_output('produce') pipeline_description.add_step(step_6) -# Step 4: test primitive +# Step 7: Quantile transformation primitive_7 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.quantile_transformer') step_7 = PrimitiveStep(primitive=primitive_7) step_7.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='new') @@ -75,7 +71,7 @@ step_7.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_re step_7.add_output('produce') pipeline_description.add_step(step_7) -# Step 4: test primitive +# Step 4: Isolation Forest primitive_8 = index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_iforest') step_8 = PrimitiveStep(primitive=primitive_8) step_8.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1)