diff --git a/datasets/anomaly/system_wise/sample/train.csv b/datasets/anomaly/system_wise/sample/train.csv index 7fa959b..51878bb 100644 --- a/datasets/anomaly/system_wise/sample/train.csv +++ b/datasets/anomaly/system_wise/sample/train.csv @@ -33,72 +33,72 @@ d3mIndex,system,label 31,31.csv,1 32,32.csv,1 33,33.csv,1 -34,34.csv,2 -35,35.csv,2 -36,36.csv,2 -37,37.csv,2 -38,38.csv,2 -39,39.csv,2 -40,40.csv,2 -41,41.csv,2 -42,42.csv,2 -43,43.csv,2 -44,44.csv,2 -45,45.csv,2 -46,46.csv,2 -47,47.csv,2 -48,48.csv,2 -49,49.csv,2 -50,50.csv,2 -51,51.csv,2 -52,52.csv,2 -53,53.csv,2 -54,54.csv,2 -55,55.csv,2 -56,56.csv,2 -57,57.csv,2 -58,58.csv,2 -59,59.csv,2 -60,60.csv,2 -61,61.csv,2 -62,62.csv,2 -63,63.csv,2 -64,64.csv,2 -65,65.csv,2 -66,66.csv,2 -67,67.csv,2 -68,68.csv,2 -69,69.csv,2 -70,70.csv,2 -71,71.csv,2 -72,72.csv,2 -73,73.csv,2 -74,74.csv,2 -75,75.csv,2 -76,76.csv,2 -77,77.csv,2 -78,78.csv,2 -79,79.csv,2 -80,80.csv,2 -81,81.csv,2 -82,82.csv,2 -83,83.csv,2 -84,84.csv,2 -85,85.csv,2 -86,86.csv,2 -87,87.csv,2 -88,88.csv,2 -89,89.csv,2 -90,90.csv,2 -91,91.csv,2 -92,92.csv,2 -93,93.csv,2 -94,94.csv,2 -95,95.csv,2 -96,96.csv,2 -97,97.csv,2 -98,98.csv,2 -99,99.csv,2 +34,34.csv,0 +35,35.csv,0 +36,36.csv,0 +37,37.csv,0 +38,38.csv,0 +39,39.csv,0 +40,40.csv,0 +41,41.csv,0 +42,42.csv,0 +43,43.csv,0 +44,44.csv,0 +45,45.csv,0 +46,46.csv,0 +47,47.csv,0 +48,48.csv,0 +49,49.csv,0 +50,50.csv,0 +51,51.csv,0 +52,52.csv,0 +53,53.csv,0 +54,54.csv,0 +55,55.csv,0 +56,56.csv,0 +57,57.csv,0 +58,58.csv,0 +59,59.csv,0 +60,60.csv,0 +61,61.csv,0 +62,62.csv,0 +63,63.csv,0 +64,64.csv,0 +65,65.csv,0 +66,66.csv,0 +67,67.csv,0 +68,68.csv,0 +69,69.csv,0 +70,70.csv,0 +71,71.csv,0 +72,72.csv,0 +73,73.csv,0 +74,74.csv,0 +75,75.csv,0 +76,76.csv,0 +77,77.csv,0 +78,78.csv,0 +79,79.csv,0 +80,80.csv,0 +81,81.csv,0 +82,82.csv,0 +83,83.csv,0 +84,84.csv,0 +85,85.csv,0 +86,86.csv,0 +87,87.csv,0 +88,88.csv,0 +89,89.csv,0 +90,90.csv,0 +91,91.csv,0 +92,92.csv,0 +93,93.csv,0 +94,94.csv,0 +95,95.csv,0 +96,96.csv,0 +97,97.csv,0 +98,98.csv,0 +99,99.csv,0 100,100.csv,1 101,101.csv,1 102,102.csv,1 @@ -132,70 +132,70 @@ d3mIndex,system,label 130,130.csv,1 131,131.csv,1 132,132.csv,1 -133,133.csv,2 -134,134.csv,2 -135,135.csv,2 -136,136.csv,2 -137,137.csv,2 -138,138.csv,2 -139,139.csv,2 -140,140.csv,2 -141,141.csv,2 -142,142.csv,2 -143,143.csv,2 -144,144.csv,2 -145,145.csv,2 -146,146.csv,2 -147,147.csv,2 -148,148.csv,2 -149,149.csv,2 -150,150.csv,2 -151,151.csv,2 -152,152.csv,2 -153,153.csv,2 -154,154.csv,2 -155,155.csv,2 -156,156.csv,2 -157,157.csv,2 -158,158.csv,2 -159,159.csv,2 -160,160.csv,2 -161,161.csv,2 -162,162.csv,2 -163,163.csv,2 -164,164.csv,2 -165,165.csv,2 -166,166.csv,2 -167,167.csv,2 -168,168.csv,2 -169,169.csv,2 -170,170.csv,2 -171,171.csv,2 -172,172.csv,2 -173,173.csv,2 -174,174.csv,2 -175,175.csv,2 -176,176.csv,2 -177,177.csv,2 -178,178.csv,2 -179,179.csv,2 -180,180.csv,2 -181,181.csv,2 -182,182.csv,2 -183,183.csv,2 -184,184.csv,2 -185,185.csv,2 -186,186.csv,2 -187,187.csv,2 -188,188.csv,2 -189,189.csv,2 -190,190.csv,2 -191,191.csv,2 -192,192.csv,2 -193,193.csv,2 -194,194.csv,2 -195,195.csv,2 -196,196.csv,2 -197,197.csv,2 -198,198.csv,2 -199,199.csv,2 +133,133.csv,0 +134,134.csv,0 +135,135.csv,0 +136,136.csv,0 +137,137.csv,0 +138,138.csv,0 +139,139.csv,0 +140,140.csv,0 +141,141.csv,0 +142,142.csv,0 +143,143.csv,0 +144,144.csv,0 +145,145.csv,0 +146,146.csv,0 +147,147.csv,0 +148,148.csv,0 +149,149.csv,0 +150,150.csv,0 +151,151.csv,0 +152,152.csv,0 +153,153.csv,0 +154,154.csv,0 +155,155.csv,0 +156,156.csv,0 +157,157.csv,0 +158,158.csv,0 +159,159.csv,0 +160,160.csv,0 +161,161.csv,0 +162,162.csv,0 +163,163.csv,0 +164,164.csv,0 +165,165.csv,0 +166,166.csv,0 +167,167.csv,0 +168,168.csv,0 +169,169.csv,0 +170,170.csv,0 +171,171.csv,0 +172,172.csv,0 +173,173.csv,0 +174,174.csv,0 +175,175.csv,0 +176,176.csv,0 +177,177.csv,0 +178,178.csv,0 +179,179.csv,0 +180,180.csv,0 +181,181.csv,0 +182,182.csv,0 +183,183.csv,0 +184,184.csv,0 +185,185.csv,0 +186,186.csv,0 +187,187.csv,0 +188,188.csv,0 +189,189.csv,0 +190,190.csv,0 +191,191.csv,0 +192,192.csv,0 +193,193.csv,0 +194,194.csv,0 +195,195.csv,0 +196,196.csv,0 +197,197.csv,0 +198,198.csv,0 +199,199.csv,0 diff --git a/examples/axolotl_interface/example_pipelines/autoencoder_pipeline.json b/examples/axolotl_interface/example_pipelines/autoencoder_pipeline.json index 0e876c0..bf47eaa 100644 --- a/examples/axolotl_interface/example_pipelines/autoencoder_pipeline.json +++ b/examples/axolotl_interface/example_pipelines/autoencoder_pipeline.json @@ -1 +1 @@ -{"id": "bfd8aedf-36be-4dad-af8a-c324a03db5f9", "schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json", "created": "2021-02-13T17:02:35.500457Z", "inputs": [{"name": "inputs"}], "outputs": [{"data": "steps.6.produce", "name": "output predictions"}], "steps": [{"type": "PRIMITIVE", "primitive": {"id": "c78138d9-9377-31dc-aee8-83d9df049c60", "version": "0.3.0", "python_path": "d3m.primitives.tods.data_processing.dataset_to_dataframe", "name": "Extract a DataFrame from a Dataset"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "inputs.0"}}, "outputs": [{"id": "produce"}]}, {"type": "PRIMITIVE", "primitive": {"id": "81235c29-aeb9-3828-911a-1b25319b6998", "version": "0.6.0", "python_path": "d3m.primitives.tods.data_processing.column_parser", "name": "Parses strings into their types"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.0.produce"}}, "outputs": [{"id": "produce"}]}, {"type": "PRIMITIVE", "primitive": {"id": "a996cd89-ddf0-367f-8e7f-8c013cbc2891", "version": "0.4.0", "python_path": "d3m.primitives.tods.data_processing.extract_columns_by_semantic_types", "name": "Extracts columns by semantic type"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.1.produce"}}, "outputs": [{"id": "produce"}], "hyperparams": {"semantic_types": {"type": "VALUE", "data": ["https://metadata.datadrivendiscovery.org/types/Attribute"]}}}, {"type": "PRIMITIVE", "primitive": {"id": "a996cd89-ddf0-367f-8e7f-8c013cbc2891", "version": "0.4.0", "python_path": "d3m.primitives.tods.data_processing.extract_columns_by_semantic_types", "name": "Extracts columns by semantic type"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.0.produce"}}, "outputs": [{"id": "produce"}], "hyperparams": {"semantic_types": {"type": "VALUE", "data": ["https://metadata.datadrivendiscovery.org/types/TrueTarget"]}}}, {"type": "PRIMITIVE", "primitive": {"id": "642de2e7-5590-3cab-9266-2a53c326c461", "version": "0.0.1", "python_path": "d3m.primitives.tods.timeseries_processing.transformation.axiswise_scaler", "name": "Axis_wise_scale"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.2.produce"}}, "outputs": [{"id": "produce"}]}, {"type": "PRIMITIVE", "primitive": {"id": "67e7fcdf-d645-3417-9aa4-85cd369487d9", "version": "0.0.1", "python_path": "d3m.primitives.tods.detection_algorithm.pyod_ae", "name": "TODS.anomaly_detection_primitives.AutoEncoder"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.4.produce"}}, "outputs": [{"id": "produce"}]}, {"type": "PRIMITIVE", "primitive": {"id": "2530840a-07d4-3874-b7d8-9eb5e4ae2bf3", "version": "0.3.0", "python_path": "d3m.primitives.tods.data_processing.construct_predictions", "name": "Construct pipeline predictions output"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.5.produce"}, "reference": {"type": "CONTAINER", "data": "steps.1.produce"}}, "outputs": [{"id": "produce"}]}], "digest": "01ad8ccf817150186ca15157a4f02ee1f738582137321a8a5a4a3252832ce555"} \ No newline at end of file +{"id": "924e9a77-da5f-4bcc-b9a0-ed65bbaf87fa", "schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json", "created": "2021-03-11T23:41:13.884494Z", "inputs": [{"name": "inputs"}], "outputs": [{"data": "steps.6.produce", "name": "output predictions"}], "steps": [{"type": "PRIMITIVE", "primitive": {"id": "c78138d9-9377-31dc-aee8-83d9df049c60", "version": "0.3.0", "python_path": "d3m.primitives.tods.data_processing.dataset_to_dataframe", "name": "Extract a DataFrame from a Dataset"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "inputs.0"}}, "outputs": [{"id": "produce"}]}, {"type": "PRIMITIVE", "primitive": {"id": "81235c29-aeb9-3828-911a-1b25319b6998", "version": "0.6.0", "python_path": "d3m.primitives.tods.data_processing.column_parser", "name": "Parses strings into their types"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.0.produce"}}, "outputs": [{"id": "produce"}]}, {"type": "PRIMITIVE", "primitive": {"id": "a996cd89-ddf0-367f-8e7f-8c013cbc2891", "version": "0.4.0", "python_path": "d3m.primitives.tods.data_processing.extract_columns_by_semantic_types", "name": "Extracts columns by semantic type"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.1.produce"}}, "outputs": [{"id": "produce"}], "hyperparams": {"semantic_types": {"type": "VALUE", "data": ["https://metadata.datadrivendiscovery.org/types/Attribute"]}}}, {"type": "PRIMITIVE", "primitive": {"id": "a996cd89-ddf0-367f-8e7f-8c013cbc2891", "version": "0.4.0", "python_path": "d3m.primitives.tods.data_processing.extract_columns_by_semantic_types", "name": "Extracts columns by semantic type"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.0.produce"}}, "outputs": [{"id": "produce"}], "hyperparams": {"semantic_types": {"type": "VALUE", "data": ["https://metadata.datadrivendiscovery.org/types/TrueTarget"]}}}, {"type": "PRIMITIVE", "primitive": {"id": "f07ce875-bbc7-36c5-9cc1-ba4bfb7cf48e", "version": "0.1.0", "python_path": "d3m.primitives.tods.feature_analysis.statistical_maximum", "name": "Time Series Decompostional"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.2.produce"}}, "outputs": [{"id": "produce"}]}, {"type": "PRIMITIVE", "primitive": {"id": "67e7fcdf-d645-3417-9aa4-85cd369487d9", "version": "0.0.1", "python_path": "d3m.primitives.tods.detection_algorithm.pyod_ae", "name": "TODS.anomaly_detection_primitives.AutoEncoder"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.4.produce"}}, "outputs": [{"id": "produce"}]}, {"type": "PRIMITIVE", "primitive": {"id": "2530840a-07d4-3874-b7d8-9eb5e4ae2bf3", "version": "0.3.0", "python_path": "d3m.primitives.tods.data_processing.construct_predictions", "name": "Construct pipeline predictions output"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.5.produce"}, "reference": {"type": "CONTAINER", "data": "steps.1.produce"}}, "outputs": [{"id": "produce"}]}], "digest": "bb1cb5328299d8d65cabc152092da553db267494fb12e6320c66110b2c48a265"} \ No newline at end of file diff --git a/examples/axolotl_interface/example_pipelines/script/build_AutoEncoder_pipeline.py b/examples/axolotl_interface/example_pipelines/script/build_AutoEncoder_pipeline.py index 79cc54f..cd9d23d 100644 --- a/examples/axolotl_interface/example_pipelines/script/build_AutoEncoder_pipeline.py +++ b/examples/axolotl_interface/example_pipelines/script/build_AutoEncoder_pipeline.py @@ -41,7 +41,9 @@ attributes = 'steps.2.produce' targets = 'steps.3.produce' # Step 4: processing -step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.axiswise_scaler')) +#step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.axiswise_scaler')) +step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_maximum')) +#step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_minimum')) step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes) step_4.add_output('produce') pipeline_description.add_step(step_4) @@ -64,7 +66,7 @@ pipeline_description.add_output(name='output predictions', data_reference='steps # Output to json data = pipeline_description.to_json() -with open('example_pipeline.json', 'w') as f: +with open('autoencoder_pipeline.json', 'w') as f: f.write(data) print(data) diff --git a/examples/axolotl_interface/example_pipelines/script/build_system_pipeline.py b/examples/axolotl_interface/example_pipelines/script/build_system_pipeline.py index ba00146..f5c148f 100644 --- a/examples/axolotl_interface/example_pipelines/script/build_system_pipeline.py +++ b/examples/axolotl_interface/example_pipelines/script/build_system_pipeline.py @@ -57,29 +57,37 @@ attributes = 'steps.4.produce' targets = 'steps.5.produce' # Step 6: processing -step_6 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.axiswise_scaler')) +step_6 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_maximum')) step_6.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes) step_6.add_output('produce') pipeline_description.add_step(step_6) # Step 7: algorithm -step_7 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_ae')) +#step_7 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_ae')) +step_7 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_ocsvm')) step_7.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.6.produce') -step_7.add_output('produce') +step_7.add_output('produce_score') pipeline_description.add_step(step_7) # Step 8: Predictions -step_8 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.construct_predictions')) -step_8.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.7.produce') -step_8.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') +#step_8 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.construct_predictions')) +step_8 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.system_wise_detection')) +step_8.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.7.produce_score') +#step_8.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') step_8.add_output('produce') pipeline_description.add_step(step_8) +step_9 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.construct_predictions')) +step_9.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.8.produce') +step_9.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') +step_9.add_output('produce') +pipeline_description.add_step(step_9) + # Final Output -pipeline_description.add_output(name='output predictions', data_reference='steps.8.produce') +pipeline_description.add_output(name='output predictions', data_reference='steps.9.produce') # Output to json data = pipeline_description.to_json() -with open('example_pipeline.json', 'w') as f: +with open('system_pipeline.json', 'w') as f: f.write(data) print(data) diff --git a/examples/axolotl_interface/example_pipelines/system_pipeline.json b/examples/axolotl_interface/example_pipelines/system_pipeline.json index 4693ec1..e86b433 100644 --- a/examples/axolotl_interface/example_pipelines/system_pipeline.json +++ b/examples/axolotl_interface/example_pipelines/system_pipeline.json @@ -1 +1 @@ -{"id": "fe8ceeee-a513-45d8-9e28-b46e11f9c635", "schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json", "created": "2021-02-11T21:28:54.508699Z", "inputs": [{"name": "inputs"}], "outputs": [{"data": "steps.8.produce", "name": "output predictions"}], "steps": [{"type": "PRIMITIVE", "primitive": {"id": "f31f8c1f-d1c5-43e5-a4b2-2ae4a761ef2e", "version": "0.2.0", "python_path": "d3m.primitives.tods.common.denormalize", "name": "Denormalize datasets"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "inputs.0"}}, "outputs": [{"id": "produce"}]}, {"type": "PRIMITIVE", "primitive": {"id": "4b42ce1e-9b98-4a25-b68e-fad13311eb65", "version": "0.3.0", "python_path": "d3m.primitives.tods.data_processing.dataset_to_dataframe", "name": "Extract a DataFrame from a Dataset", "digest": "ba00092121d8971b0aa8c1f4b99e97151ca39b44f549eecc03fc61a286567a36"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.0.produce"}}, "outputs": [{"id": "produce"}]}, {"type": "PRIMITIVE", "primitive": {"id": "989562ac-b50f-4462-99cb-abef80d765b2", "version": "0.1.0", "python_path": "d3m.primitives.tods.common.csv_reader", "name": "Columns CSV reader"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.1.produce"}}, "outputs": [{"id": "produce"}], "hyperparams": {"use_columns": {"type": "VALUE", "data": [0, 1]}, "return_result": {"type": "VALUE", "data": "replace"}}}, {"type": "PRIMITIVE", "primitive": {"id": "d510cb7a-1782-4f51-b44c-58f0236e47c7", "version": "0.6.0", "python_path": "d3m.primitives.tods.data_processing.column_parser", "name": "Parses strings into their types", "digest": "ef87bfbd3b35a2d78337c5d3aba9847dfdf56c05c5289e50fe0db766ef8126e0"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.2.produce"}}, "outputs": [{"id": "produce"}], "hyperparams": {"parse_semantic_types": {"type": "VALUE", "data": ["http://schema.org/Boolean", "http://schema.org/Integer", "http://schema.org/Float", "https://metadata.datadrivendiscovery.org/types/FloatVector"]}}}, {"type": "PRIMITIVE", "primitive": {"id": "4503a4c6-42f7-45a1-a1d4-ed69699cf5e1", "version": "0.4.0", "python_path": "d3m.primitives.tods.data_processing.extract_columns_by_semantic_types", "name": "Extracts columns by semantic type", "digest": "35ab3368a69e46da89e4dbb70dab762d4c020c43a9424db622e8ac2ae5c57c06"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.3.produce"}}, "outputs": [{"id": "produce"}], "hyperparams": {"semantic_types": {"type": "VALUE", "data": ["https://metadata.datadrivendiscovery.org/types/Attribute"]}}}, {"type": "PRIMITIVE", "primitive": {"id": "4503a4c6-42f7-45a1-a1d4-ed69699cf5e1", "version": "0.4.0", "python_path": "d3m.primitives.tods.data_processing.extract_columns_by_semantic_types", "name": "Extracts columns by semantic type", "digest": "35ab3368a69e46da89e4dbb70dab762d4c020c43a9424db622e8ac2ae5c57c06"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.3.produce"}}, "outputs": [{"id": "produce"}], "hyperparams": {"semantic_types": {"type": "VALUE", "data": ["https://metadata.datadrivendiscovery.org/types/TrueTarget"]}}}, {"type": "PRIMITIVE", "primitive": {"id": "642de2e7-5590-3cab-9266-2a53c326c461", "version": "0.0.1", "python_path": "d3m.primitives.tods.timeseries_processing.transformation.axiswise_scaler", "name": "Axis_wise_scale"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.4.produce"}}, "outputs": [{"id": "produce"}]}, {"type": "PRIMITIVE", "primitive": {"id": "67e7fcdf-d645-3417-9aa4-85cd369487d9", "version": "0.0.1", "python_path": "d3m.primitives.tods.detection_algorithm.pyod_ae", "name": "TODS.anomaly_detection_primitives.AutoEncoder"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.6.produce"}}, "outputs": [{"id": "produce"}]}, {"type": "PRIMITIVE", "primitive": {"id": "8d38b340-f83f-4877-baaa-162f8e551736", "version": "0.3.0", "python_path": "d3m.primitives.tods.data_processing.construct_predictions", "name": "Construct pipeline predictions output", "digest": "d981f367776ef05d7311b85b86af717a599c7fd363b04db7531bd21ab30a8844"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.7.produce"}, "reference": {"type": "CONTAINER", "data": "steps.1.produce"}}, "outputs": [{"id": "produce"}]}], "digest": "7033f0a107adae468d509f5706a6a79dfcb965d4d5a8d3aef4b79017d33956ed"} \ No newline at end of file +{"id": "f9f918f3-4cd9-4d3c-9a84-8a95b18d3d7c", "schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json", "created": "2021-04-02T20:35:56.617972Z", "inputs": [{"name": "inputs"}], "outputs": [{"data": "steps.9.produce", "name": "output predictions"}], "steps": [{"type": "PRIMITIVE", "primitive": {"id": "f31f8c1f-d1c5-43e5-a4b2-2ae4a761ef2e", "version": "0.2.0", "python_path": "d3m.primitives.tods.common.denormalize", "name": "Denormalize datasets"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "inputs.0"}}, "outputs": [{"id": "produce"}]}, {"type": "PRIMITIVE", "primitive": {"id": "c78138d9-9377-31dc-aee8-83d9df049c60", "version": "0.3.0", "python_path": "d3m.primitives.tods.data_processing.dataset_to_dataframe", "name": "Extract a DataFrame from a Dataset"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.0.produce"}}, "outputs": [{"id": "produce"}]}, {"type": "PRIMITIVE", "primitive": {"id": "989562ac-b50f-4462-99cb-abef80d765b2", "version": "0.1.0", "python_path": "d3m.primitives.tods.common.csv_reader", "name": "Columns CSV reader"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.1.produce"}}, "outputs": [{"id": "produce"}], "hyperparams": {"use_columns": {"type": "VALUE", "data": [0, 1]}, "return_result": {"type": "VALUE", "data": "replace"}}}, {"type": "PRIMITIVE", "primitive": {"id": "81235c29-aeb9-3828-911a-1b25319b6998", "version": "0.6.0", "python_path": "d3m.primitives.tods.data_processing.column_parser", "name": "Parses strings into their types"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.2.produce"}}, "outputs": [{"id": "produce"}], "hyperparams": {"parse_semantic_types": {"type": "VALUE", "data": ["http://schema.org/Boolean", "http://schema.org/Integer", "http://schema.org/Float", "https://metadata.datadrivendiscovery.org/types/FloatVector"]}}}, {"type": "PRIMITIVE", "primitive": {"id": "a996cd89-ddf0-367f-8e7f-8c013cbc2891", "version": "0.4.0", "python_path": "d3m.primitives.tods.data_processing.extract_columns_by_semantic_types", "name": "Extracts columns by semantic type"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.3.produce"}}, "outputs": [{"id": "produce"}], "hyperparams": {"semantic_types": {"type": "VALUE", "data": ["https://metadata.datadrivendiscovery.org/types/Attribute"]}}}, {"type": "PRIMITIVE", "primitive": {"id": "a996cd89-ddf0-367f-8e7f-8c013cbc2891", "version": "0.4.0", "python_path": "d3m.primitives.tods.data_processing.extract_columns_by_semantic_types", "name": "Extracts columns by semantic type"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.3.produce"}}, "outputs": [{"id": "produce"}], "hyperparams": {"semantic_types": {"type": "VALUE", "data": ["https://metadata.datadrivendiscovery.org/types/TrueTarget"]}}}, {"type": "PRIMITIVE", "primitive": {"id": "f07ce875-bbc7-36c5-9cc1-ba4bfb7cf48e", "version": "0.1.0", "python_path": "d3m.primitives.tods.feature_analysis.statistical_maximum", "name": "Time Series Decompostional"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.4.produce"}}, "outputs": [{"id": "produce"}]}, {"type": "PRIMITIVE", "primitive": {"id": "b454adf7-5820-3e6f-8383-619f13fb1cb6", "version": "0.0.1", "python_path": "d3m.primitives.tods.detection_algorithm.pyod_ocsvm", "name": "TODS.anomaly_detection_primitives.OCSVMPrimitive"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.6.produce"}}, "outputs": [{"id": "produce_score"}]}, {"type": "PRIMITIVE", "primitive": {"id": "01d36760-235c-3cdd-95dd-3c682c634c49", "version": "0.1.0", "python_path": "d3m.primitives.tods.detection_algorithm.system_wise_detection", "name": "Sytem_Wise_Anomaly_Detection_Primitive"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.7.produce_score"}}, "outputs": [{"id": "produce"}]}, {"type": "PRIMITIVE", "primitive": {"id": "2530840a-07d4-3874-b7d8-9eb5e4ae2bf3", "version": "0.3.0", "python_path": "d3m.primitives.tods.data_processing.construct_predictions", "name": "Construct pipeline predictions output"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.8.produce"}, "reference": {"type": "CONTAINER", "data": "steps.1.produce"}}, "outputs": [{"id": "produce"}]}], "digest": "c91336de994b3e7089bc3de1728dde5b458c3b9d4ecae7a9c94a26da1219d3f3"} \ No newline at end of file diff --git a/examples/axolotl_interface/run_pipeline.py b/examples/axolotl_interface/run_pipeline.py index cd0ef11..04c45ff 100644 --- a/examples/axolotl_interface/run_pipeline.py +++ b/examples/axolotl_interface/run_pipeline.py @@ -6,19 +6,18 @@ import pandas as pd from tods import generate_dataset, load_pipeline, evaluate_pipeline this_path = os.path.dirname(os.path.abspath(__file__)) -#table_path = 'datasets/NAB/realTweets/labeled_Twitter_volume_IBM.csv' # The path of the dataset +default_data_path = os.path.join(this_path, '../../datasets/anomaly/raw_data/yahoo_sub_5.csv') parser = argparse.ArgumentParser(description='Arguments for running predefined pipelin.') -parser.add_argument('--table_path', type=str, default=os.path.join(this_path, '../../datasets/anomaly/raw_data/yahoo_sub_5.csv'), +parser.add_argument('--table_path', type=str, default=default_data_path, help='Input the path of the input data table') parser.add_argument('--target_index', type=int, default=6, help='Index of the ground truth (for evaluation)') parser.add_argument('--metric',type=str, default='F1_MACRO', help='Evaluation Metric (F1, F1_MACRO)') -parser.add_argument('--pipeline_path', default=os.path.join(this_path, './example_pipelines/autoencoder_pipeline.json'), +parser.add_argument('--pipeline_path', + default=os.path.join(this_path, './example_pipelines/autoencoder_pipeline.json'), help='Input the path of the pre-built pipeline description') -# parser.add_argument('--pipeline_path', default=os.path.join(this_path, '../tods/resources/default_pipeline.json'), -# help='Input the path of the pre-built pipeline description') args = parser.parse_args() @@ -37,4 +36,5 @@ pipeline = load_pipeline(pipeline_path) # Run the pipeline pipeline_result = evaluate_pipeline(dataset, pipeline, metric) print(pipeline_result) +#raise pipeline_result.error[0] diff --git a/tods/common/CSVReader.py b/tods/common/CSVReader.py index 8c1fad6..158b938 100644 --- a/tods/common/CSVReader.py +++ b/tods/common/CSVReader.py @@ -68,7 +68,7 @@ class CSVReaderPrimitive(primitives.FileReaderPrimitiveBase): # pragma: no co # This should be done by primitives later on. dtype=str, # We always expect one row header. - header=0, + header=None, # We want empty strings and not NaNs. na_filter=False, encoding='utf8', @@ -92,7 +92,7 @@ class CSVReaderPrimitive(primitives.FileReaderPrimitiveBase): # pragma: no co data = container.DataFrame(data, { 'schema': metadata_base.CONTAINER_SCHEMA_VERSION, 'structural_type': container.DataFrame, - }, generate_metadata=False) + }, generate_metadata=True) assert column_names is not None diff --git a/tods/common/TODSBasePrimitives.py b/tods/common/TODSBasePrimitives.py new file mode 100644 index 0000000..05db69d --- /dev/null +++ b/tods/common/TODSBasePrimitives.py @@ -0,0 +1,200 @@ +import typing +from typing import Any, Callable, List, Dict, Union, Optional, Sequence, Tuple +import logging +import abc + +from d3m.primitive_interfaces import generator, transformer +from d3m.primitive_interfaces.base import * +from d3m.primitive_interfaces.unsupervised_learning import UnsupervisedLearnerPrimitiveBase + +from d3m.metadata import base as metadata_base, hyperparams, params +from d3m import container +from d3m import utils + +__all__ = ('TODSTransformerPrimitiveBase',) + +class TODSTransformerPrimitiveBase(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): + """ + A base class for primitives which are not fitted at all and can + simply produce (useful) outputs from inputs directly. As such they + also do not have any state (params). + + This class is parameterized using only three type variables, ``Inputs``, + ``Outputs``, and ``Hyperparams``. + """ + + def __init__(self, *, hyperparams: Hyperparams) -> None: + super().__init__(hyperparams=hyperparams) + + def produce(self, *, inputs: container.DataFrame, timeout: float = None, iterations: int = None) -> CallResult[container.DataFrame]: + + is_system = len(inputs.iloc[0, 0].shape) != 0 # check the shape of first row first column, if not a single data entry(,) then it is system-wise data (row, col) + if is_system: + outputs = self._forward(inputs, '_produce') + else: + outputs = self._produce(inputs=inputs) + outputs = outputs.value + + return CallResult(outputs) + + @abc.abstractmethod + def _produce(self, *, inputs: container.DataFrame, timeout: float = None, iterations: int = None) -> CallResult[container.DataFrame]: + """ + make the predictions + """ + #return CallResult(container.DataFrame) + + def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: + """ + A noop. + """ + return CallResult(None) + + def get_params(self) -> None: + """ + A noop. + """ + + return None + + def set_params(self, *, params: None) -> None: + """ + A noop. + """ + + return + + def _forward(self, data, method): + """ + General Forward Function to feed system data one-by-one to the primitive + """ + col_name = list(data.columns)[0] + for i, _ in data.iterrows(): + sys_data = data.iloc[i][col_name] + produce_func = getattr(self, method, None) + out = produce_func(inputs=sys_data) + data.iloc[i][col_name] = out.value + return data + +class TODSUnsupervisedLearnerPrimitiveBase(UnsupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams]): + + def __init__(self, *, hyperparams: Hyperparams, + random_seed: int=0, + docker_containers: Dict[str, DockerContainer] = None) -> None: + super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) + + def produce(self, *, inputs: container.DataFrame, timeout: float = None, iterations: int = None) -> CallResult[container.DataFrame]: + + is_system = len(inputs.iloc[0, 0].shape) != 0 # check the shape of first row first column, if not a single data entry(,) then it is system-wise data (row, col) + if is_system: + outputs = self._forward(inputs, '_produce') + else: + outputs = self._produce(inputs=inputs) + outputs = outputs.value + + return CallResult(outputs) + + def produce_score(self, *, inputs: container.DataFrame, timeout: float = None, iterations: int = None) -> CallResult[container.DataFrame]: + is_system = len(inputs.iloc[0, 0].shape) != 0 # check the shape of first row first column, if not a single data entry(,) then it is system-wise data (row, col) + if is_system: + outputs = self._forward(inputs, '_produce_score') + else: + outputs = self._produce(inputs=inputs) + outputs = outputs.value + + return CallResult(outputs) + + def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: + """ + A noop. + """ + is_system = len(self._inputs.iloc[0, 0].shape) != 0 # check the shape of first row first column, if not a single data entry(,) then it is system-wise data (row, col) + if is_system: + data = inputs + col_name = list(data.columns)[0] + for i, _ in data.iterrows(): + sys_data = data.iloc[i][col_name] + self.set_training_data(inputs=sys_data) + self._fit() + else: + outputs = self._fit() + outputs = outputs.value + + return CallResult(None) + + def fit_multi_produce(self, *, produce_methods: typing.Sequence[str], inputs: Inputs, timeout: float = None, iterations: int = None) -> MultiCallResult: + is_system = len(inputs.iloc[0, 0].shape) != 0 # check the shape of first row first column, if not a single data entry(,) then it is system-wise data (row, col) + if is_system: + data = inputs + produce_method = produce_methods[0] + col_name = list(data.columns)[0] + results = [] + for i, _ in data.iterrows(): + sys_data = data.iloc[i][col_name] + self.set_training_data(inputs=sys_data) + fit_result = self._fit() + if produce_method == "produce": + out = self._produce(inputs=sys_data, timeout=timeout) + else: + out = self._produce_score(inputs=sys_data, timeout=timeout) + data.iloc[i][col_name] = out.value + results.append(out) + iterations_done = None + for result in results: + if result.iterations_done is not None: + if iterations_done is None: + iterations_done = result.iterations_done + else: + iterations_done = max(iterations_done, result.iterations_done) + return MultiCallResult( + values={produce_method: data}, + has_finished=all(result.has_finished for result in results), + iterations_done=iterations_done, + ) + else: + return self._fit_multi_produce(produce_methods=produce_methods, timeout=timeout, iterations=iterations, inputs=inputs) + + @abc.abstractmethod + def _produce(self, *, inputs: container.DataFrame, timeout: float = None, iterations: int = None) -> CallResult[container.DataFrame]: + """ + abstract class + """ + + @abc.abstractmethod + def _produce_score(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: + """ + abstract class + """ + + @abc.abstractmethod + def _fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: + """ + abstract class + """ + + + def get_params(self) -> None: + """ + A noop. + """ + + return None + + def set_params(self, *, params: None) -> None: + """ + A noop. + """ + + return + + def _forward(self, data, method): + """ + General Forward Function to feed system data one-by-one to the primitive + """ + col_name = list(data.columns)[0] + for i, _ in data.iterrows(): + sys_data = data.iloc[i][col_name] + produce_func = getattr(self, method, None) + out = produce_func(inputs=sys_data) + data.iloc[i][col_name] = out.value + return data diff --git a/tods/detection_algorithm/PyodAE.py b/tods/detection_algorithm/PyodAE.py index 784a371..cc0180c 100644 --- a/tods/detection_algorithm/PyodAE.py +++ b/tods/detection_algorithm/PyodAE.py @@ -95,7 +95,7 @@ class Hyperparams(Hyperparams_ODBase): ) epochs = hyperparams.Hyperparameter[int]( - default=100, + default=1, description='Number of epochs to train the model.', semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] ) @@ -335,7 +335,7 @@ class AutoEncoderPrimitive(UnsupervisedOutlierDetectorBase[Inputs, Outputs, Para Returns: None """ - return super().fit() + return super()._fit() def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: """ @@ -347,7 +347,7 @@ class AutoEncoderPrimitive(UnsupervisedOutlierDetectorBase[Inputs, Outputs, Para Container DataFrame 1 marks Outliers, 0 marks normal. """ - return super().produce(inputs=inputs, timeout=timeout, iterations=iterations) + return super()._produce(inputs=inputs, timeout=timeout, iterations=iterations) def get_params(self) -> Params: """ diff --git a/tods/detection_algorithm/SystemWiseDetection.py b/tods/detection_algorithm/SystemWiseDetection.py index d19b20a..3a68a14 100644 --- a/tods/detection_algorithm/SystemWiseDetection.py +++ b/tods/detection_algorithm/SystemWiseDetection.py @@ -142,7 +142,6 @@ class SystemWiseDetectionPrimitive(transformer.TransformerPrimitiveBase[Inputs, self.logger.info('System wise Detection Input Primitive called') - # Get cols to fit. self._fitted = False self._training_inputs, self._training_indices = self._get_columns_to_fit(inputs, self.hyperparams) @@ -316,12 +315,8 @@ class SystemWiseDetectionPrimitive(transformer.TransformerPrimitiveBase[Inputs, def _write(self, inputs: Inputs): inputs.to_csv(str(time.time()) + '.csv') - - - def _system_wise_detection(self,X,method_type,window_size,contamination): - systemIds = X.system_id.unique() - groupedX = X.groupby(X.system_id) + systemIds = [int(idx) for idx in X.index] transformed_X = [] if(method_type=="max"): @@ -330,17 +325,17 @@ class SystemWiseDetectionPrimitive(transformer.TransformerPrimitiveBase[Inputs, """ maxOutlierScorePerSystemList = [] for systemId in systemIds: - systemDf = groupedX.get_group(systemId) - maxOutlierScorePerSystemList.append(np.max(np.abs(systemDf["value_0"].values))) + systemDf = X.iloc[systemId]['system'] + maxOutlierScorePerSystemList.append(np.max(np.abs(systemDf.iloc[:,0].values))) ranking = np.sort(maxOutlierScorePerSystemList) threshold = ranking[int((1 - contamination) * len(ranking))] self.threshold = threshold - mask = (maxOutlierScorePerSystemList >= threshold) + mask = (maxOutlierScorePerSystemList > threshold) ranking[mask] = 1 ranking[np.logical_not(mask)] = 0 for iter in range(len(systemIds)): - transformed_X.append([systemIds[iter],ranking[iter]]) + transformed_X.append(ranking[iter]) if (method_type == "avg"): """ @@ -348,60 +343,72 @@ class SystemWiseDetectionPrimitive(transformer.TransformerPrimitiveBase[Inputs, """ avgOutlierScorePerSystemList = [] for systemId in systemIds: - systemDf = groupedX.get_group(systemId) - avgOutlierScorePerSystemList.append(np.mean(np.abs(systemDf["value_0"].values))) + systemDf = X.iloc[systemId]['system'] + avgOutlierScorePerSystemList.append(np.mean(np.abs(systemDf.iloc[:,0].values))) ranking = np.sort(avgOutlierScorePerSystemList) threshold = ranking[int((1 - contamination) * len(ranking))] self.threshold = threshold - mask = (avgOutlierScorePerSystemList >= threshold) + mask = (avgOutlierScorePerSystemList > threshold) ranking[mask] = 1 ranking[np.logical_not(mask)] = 0 for iter in range(len(systemIds)): - transformed_X.append([systemIds[iter], ranking[iter]]) + transformed_X.append( ranking[iter]) if (method_type == "sliding_window_sum"): """ - Sytems are sorted based on max of max of reconstruction errors in each window" + Sytems are sorted based on max of sum of reconstruction errors in each window" """ - OutlierScorePerSystemList = [] + maxOutlierScorePerSystemList = [] for systemId in systemIds: - systemDf = groupedX.get_group(systemId) - column_value = systemDf["value_0"].values - column_score = np.zeros(len(column_value)) + systemDf = X.iloc[systemId]['system'] + column_value = systemDf.iloc[:,0].values + column_score = [] for iter in range(window_size - 1, len(column_value)): sequence = column_value[iter - window_size + 1:iter + 1] - column_score[iter] = np.sum(np.abs(sequence)) - column_score[:window_size - 1] = column_score[window_size - 1] - OutlierScorePerSystemList.append(column_score.tolist()) - OutlierScorePerSystemList = np.asarray(OutlierScorePerSystemList) + column_score.append(np.sum(np.abs(sequence))) + #column_score[:window_size - 1] = column_score[window_size - 1] + + maxOutlierScorePerSystemList.append(np.max(column_score)) + #OutlierScorePerSystemList = np.asarray(OutlierScorePerSystemList) - maxOutlierScorePerSystemList = OutlierScorePerSystemList.max(axis=1).tolist() + #maxOutlierScorePerSystemList = OutlierScorePerSystemList.max(axis=1).tolist() ranking = np.sort(maxOutlierScorePerSystemList) threshold = ranking[int((1 - contamination) * len(ranking))] self.threshold = threshold - mask = (maxOutlierScorePerSystemList >= threshold) + mask = (maxOutlierScorePerSystemList > threshold) ranking[mask] = 1 ranking[np.logical_not(mask)] = 0 for iter in range(len(systemIds)): - transformed_X.append([systemIds[iter], ranking[iter]]) + transformed_X.append( ranking[iter]) + if (method_type == "majority_voting_sliding_window_sum"): """ Sytem with most vote based on max of sum of reconstruction errors in each window """ OutlierScorePerSystemList = [] + max_time_points = 0 + for systemId in systemIds: + systemDf = X.iloc[systemId]['system'] + max_time_points = max(max_time_points,systemDf.shape[0]) + for systemId in systemIds: - systemDf = groupedX.get_group(systemId) - column_value = systemDf["value_0"].values - column_score = np.zeros(len(column_value)) + column_value = np.zeros(max_time_points) + systemDf = X.iloc[systemId]['system'] + column_value_actual = systemDf.iloc[:, 0].values + column_value[0:len(column_value_actual)] = column_value_actual + column_value[len(column_value_actual):]= column_value_actual[-1] + column_score = [] for iter in range(window_size - 1, len(column_value)): sequence = column_value[iter - window_size + 1:iter + 1] - column_score[iter] = np.sum(np.abs(sequence)) - column_score[:window_size - 1] = column_score[window_size - 1] - OutlierScorePerSystemList.append(column_score.tolist()) + column_score.append(np.sum(np.abs(sequence))) + + OutlierScorePerSystemList.append(column_score) + OutlierScorePerSystemList = np.asarray(OutlierScorePerSystemList) + OutlierScorePerSystemList = ( OutlierScorePerSystemList == OutlierScorePerSystemList.max(axis=0)[None, :]).astype(int) @@ -409,28 +416,39 @@ class SystemWiseDetectionPrimitive(transformer.TransformerPrimitiveBase[Inputs, ranking = np.sort(maxOutlierScorePerSystemList) threshold = ranking[int((1 - contamination) * len(ranking))] + self.threshold = threshold - mask = (maxOutlierScorePerSystemList >= threshold) + mask = (maxOutlierScorePerSystemList > threshold) ranking[mask] = 1 ranking[np.logical_not(mask)] = 0 for iter in range(len(systemIds)): - transformed_X.append([systemIds[iter], ranking[iter]]) + transformed_X.append( ranking[iter]) + if (method_type == "majority_voting_sliding_window_max"): """ Sytem with most vote based on max of max of reconstruction errors in each window """ OutlierScorePerSystemList = [] + max_time_points = 0 for systemId in systemIds: - systemDf = groupedX.get_group(systemId) - column_value = systemDf["value_0"].values - column_score = np.zeros(len(column_value)) + systemDf = X.iloc[systemId]['system'] + max_time_points = max(max_time_points, systemDf.shape[0]) + + for systemId in systemIds: + column_value = np.zeros(max_time_points) + systemDf = X.iloc[systemId]['system'] + column_value_actual = systemDf.iloc[:, 0].values + column_value[0:len(column_value_actual)] = column_value_actual + column_value[len(column_value_actual):] = column_value_actual[-1] + column_score = [] for iter in range(window_size - 1, len(column_value)): sequence = column_value[iter - window_size + 1:iter + 1] - column_score[iter] = np.max(np.abs(sequence)) - column_score[:window_size - 1] = column_score[window_size - 1] - OutlierScorePerSystemList.append(column_score.tolist()) + column_score.append(np.max(np.abs(sequence))) + + OutlierScorePerSystemList.append(column_score) OutlierScorePerSystemList = np.asarray(OutlierScorePerSystemList) + OutlierScorePerSystemList = ( OutlierScorePerSystemList == OutlierScorePerSystemList.max(axis=0)[None, :]).astype(int) @@ -439,11 +457,11 @@ class SystemWiseDetectionPrimitive(transformer.TransformerPrimitiveBase[Inputs, ranking = np.sort(maxOutlierScorePerSystemList) threshold = ranking[int((1 - contamination) * len(ranking))] self.threshold = threshold - mask = (maxOutlierScorePerSystemList >= threshold) + mask = (maxOutlierScorePerSystemList > threshold) ranking[mask] = 1 ranking[np.logical_not(mask)] = 0 for iter in range(len(systemIds)): - transformed_X.append([systemIds[iter], ranking[iter]]) + transformed_X.append(ranking[iter]) return transformed_X diff --git a/tods/detection_algorithm/SystemWiseDetection_bkup.py b/tods/detection_algorithm/SystemWiseDetection_bkup.py new file mode 100644 index 0000000..d675e2c --- /dev/null +++ b/tods/detection_algorithm/SystemWiseDetection_bkup.py @@ -0,0 +1,455 @@ +import os +from typing import Any,Optional,List +import statsmodels.api as sm +import numpy as np +from d3m import container, utils as d3m_utils +from d3m import utils + +from numpy import ndarray +from collections import OrderedDict +from scipy import sparse +import os + +import numpy +import typing +import time + +from d3m import container +from d3m.primitive_interfaces import base, transformer + +from d3m.container import DataFrame as d3m_dataframe +from d3m.metadata import hyperparams, params, base as metadata_base + +from d3m.base import utils as base_utils +import uuid +from d3m.exceptions import PrimitiveNotFittedError + +__all__ = ('SystemWiseDetectionPrimitive',) + +Inputs = container.DataFrame +Outputs = container.DataFrame + +class Params(params.Params): + #to-do : how to make params dynamic + use_column_names: Optional[Any] + + + +class Hyperparams(hyperparams.Hyperparams): + + #Tuning Parameter + #default -1 considers entire time series is considered + window_size = hyperparams.Hyperparameter(default=10, semantic_types=[ + 'https://metadata.datadrivendiscovery.org/types/TuningParameter', + ], description="Window Size for decomposition") + + method_type = hyperparams.Enumeration( + values=['max', 'avg', 'sliding_window_sum','majority_voting_sliding_window_sum','majority_voting_sliding_window_max'], + default='majority_voting_sliding_window_max', + semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], + description="The type of method used to find anomalous system", + ) + contamination = hyperparams.Uniform( + lower=0., + upper=0.5, + default=0.1, + description='The amount of contamination of the data set, i.e. the proportion of outliers in the data set. ', + semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] + ) + + #control parameter + use_columns = hyperparams.Set( + elements=hyperparams.Hyperparameter[int](-1), + default=(), + semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], + description="A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.", + ) + exclude_columns = hyperparams.Set( + elements=hyperparams.Hyperparameter[int](-1), + default=(), + semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], + description="A set of column indices to not operate on. Applicable only if \"use_columns\" is not provided.", + ) + return_result = hyperparams.Enumeration( + values=['append', 'replace', 'new'], + default='new', + semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], + description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.", + ) + use_semantic_types = hyperparams.UniformBool( + default=False, + semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], + description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe" + ) + add_index_columns = hyperparams.UniformBool( + default=False, + semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], + description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", + ) + error_on_no_input = hyperparams.UniformBool( + default=True, + semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], + description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.", + ) + + return_semantic_type = hyperparams.Enumeration[str]( + values=['https://metadata.datadrivendiscovery.org/types/Attribute', + 'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute'], + default='https://metadata.datadrivendiscovery.org/types/Attribute', + description='Decides what semantic type to attach to generated attributes', + semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] + ) + + + +class SystemWiseDetectionPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): + """ + Primitive to find abs_energy of time series + """ + + metadata = metadata_base.PrimitiveMetadata({ + "__author__": "DATA Lab at Texas A&M University", + 'name': 'Sytem_Wise_Anomaly_Detection_Primitive', + 'python_path': 'd3m.primitives.tods.detection_algorithm.system_wise_detection', + 'source': { + 'name': 'DATA Lab at Texas A&M University', + 'contact': 'mailto:khlai037@tamu.edu' + }, + "hyperparams_to_tune": ['window_size','method_type','contamination'], + 'version': '0.1.0', + 'algorithm_types': [ + metadata_base.PrimitiveAlgorithmType.TODS_PRIMITIVE, + ], + 'primitive_family': metadata_base.PrimitiveFamily.ANOMALY_DETECTION, + 'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'Sytem_Wise_Anomaly_Detection_Primitive')), + }) + + def __init__(self, *, hyperparams: Hyperparams) -> None: + super().__init__(hyperparams=hyperparams) + self.primitiveNo = 0 + + def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: + """ + + Args: + inputs: Container DataFrame + timeout: Default + iterations: Default + + Returns: + Container DataFrame containing abs_energy of time series + """ + + self.logger.info('System wise Detection Input Primitive called') + + # Get cols to fit. + self._fitted = False + self._training_inputs, self._training_indices = self._get_columns_to_fit(inputs, self.hyperparams) + self._input_column_names = self._training_inputs.columns + + if len(self._training_indices) > 0: + # self._clf.fit(self._training_inputs) + self._fitted = True + else: + if self.hyperparams['error_on_no_input']: + raise RuntimeError("No input columns were selected") + self.logger.warn("No input columns were selected") + + if not self._fitted: + raise PrimitiveNotFittedError("Primitive not fitted.") + system_wise_detection_input = inputs + if self.hyperparams['use_semantic_types']: + system_wise_detection_input = inputs.iloc[:, self._training_indices] + output_columns = [] + if len(self._training_indices) > 0: + system_wise_detection_output = self._system_wise_detection(system_wise_detection_input,self.hyperparams["method_type"],self.hyperparams["window_size"],self.hyperparams["contamination"]) + outputs = system_wise_detection_output + + + if sparse.issparse(system_wise_detection_output): + system_wise_detection_output = system_wise_detection_output.toarray() + outputs = self._wrap_predictions(inputs, system_wise_detection_output) + + #if len(outputs.columns) == len(self._input_column_names): + # outputs.columns = self._input_column_names + + output_columns = [outputs] + + + else: + if self.hyperparams['error_on_no_input']: + raise RuntimeError("No input columns were selected") + self.logger.warn("No input columns were selected") + + + self.logger.info('System wise Detection Primitive returned') + outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'], + add_index_columns=self.hyperparams['add_index_columns'], + inputs=inputs, column_indices=self._training_indices, + columns_list=output_columns) + return base.CallResult(outputs) + + @classmethod + def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): + """ + Select columns to fit. + Args: + inputs: Container DataFrame + hyperparams: d3m.metadata.hyperparams.Hyperparams + + Returns: + list + """ + if not hyperparams['use_semantic_types']: + return inputs, list(range(len(inputs.columns))) + + inputs_metadata = inputs.metadata + + def can_produce_column(column_index: int) -> bool: + return cls._can_produce_column(inputs_metadata, column_index, hyperparams) + + use_columns = hyperparams['use_columns'] + exclude_columns = hyperparams['exclude_columns'] + + columns_to_produce, columns_not_to_produce = base_utils.get_columns_to_use(inputs_metadata, + use_columns=use_columns, + exclude_columns=exclude_columns, + can_use_column=can_produce_column) + return inputs.iloc[:, columns_to_produce], columns_to_produce + # return columns_to_produce + + @classmethod + def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, + hyperparams: Hyperparams) -> bool: + """ + Output whether a column can be processed. + Args: + inputs_metadata: d3m.metadata.base.DataMetadata + column_index: int + + Returns: + bool + """ + column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)) + + accepted_structural_types = (int, float, numpy.integer, numpy.float64) + accepted_semantic_types = set() + accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/Attribute") + if not issubclass(column_metadata['structural_type'], accepted_structural_types): + return False + + semantic_types = set(column_metadata.get('semantic_types', [])) + return True + if len(semantic_types) == 0: + cls.logger.warning("No semantic types found in column metadata") + return False + + # Making sure all accepted_semantic_types are available in semantic_types + if len(accepted_semantic_types - semantic_types) == 0: + return True + + return False + + @classmethod + def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs], + target_columns_metadata: List[OrderedDict]) -> metadata_base.DataMetadata: + """ + Updata metadata for selected columns. + Args: + inputs_metadata: metadata_base.DataMetadata + outputs: Container Dataframe + target_columns_metadata: list + + Returns: + d3m.metadata.base.DataMetadata + """ + outputs_metadata = metadata_base.DataMetadata().generate(value=outputs) + + for column_index, column_metadata in enumerate(target_columns_metadata): + column_metadata.pop("structural_type", None) + outputs_metadata = outputs_metadata.update_column(column_index, column_metadata) + + return outputs_metadata + + def _wrap_predictions(self, inputs: Inputs, predictions: ndarray) -> Outputs: + """ + Wrap predictions into dataframe + Args: + inputs: Container Dataframe + predictions: array-like data (n_samples, n_features) + + Returns: + Dataframe + """ + outputs = d3m_dataframe(predictions, generate_metadata=True) + target_columns_metadata = self._add_target_columns_metadata(outputs.metadata, self.hyperparams,self.primitiveNo) + outputs.metadata = self._update_predictions_metadata(inputs.metadata, outputs, target_columns_metadata) + + return outputs + + @classmethod + def _add_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams, primitiveNo): + """ + Add target columns metadata + Args: + outputs_metadata: metadata.base.DataMetadata + hyperparams: d3m.metadata.hyperparams.Hyperparams + + Returns: + List[OrderedDict] + """ + outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] + target_columns_metadata: List[OrderedDict] = [] + for column_index in range(outputs_length): + column_name = "{0}{1}_{2}".format(cls.metadata.query()['name'], primitiveNo, column_index) + column_metadata = OrderedDict() + semantic_types = set() + semantic_types.add(hyperparams["return_semantic_type"]) + column_metadata['semantic_types'] = list(semantic_types) + + column_metadata["name"] = str(column_name) + target_columns_metadata.append(column_metadata) + + return target_columns_metadata + + def _write(self, inputs: Inputs): + inputs.to_csv(str(time.time()) + '.csv') + + def _system_wise_detection(self,X,method_type,window_size,contamination): + #systemIds = X.system_id.unique() + systemIds = [int(idx) for idx in X.index] + #groupedX = X.groupby(X.system_id) + print(systemIds) + print(X.iloc[0]) + systemDf = X.iloc(systemIds[0])['system'] + print(systemDf) + exit() + + transformed_X = [] + if(method_type=="max"): + """ + Sytems are sorted based on maximum of reconstruction errors" + """ + maxOutlierScorePerSystemList = [] + for systemId in systemIds: + systemDf = groupedX.get_group(systemId) + #systemDf = X[systemId]['system'] + maxOutlierScorePerSystemList.append(np.max(np.abs(systemDf["value_0"].values))) + + ranking = np.sort(maxOutlierScorePerSystemList) + threshold = ranking[int((1 - contamination) * len(ranking))] + self.threshold = threshold + mask = (maxOutlierScorePerSystemList >= threshold) + ranking[mask] = 1 + ranking[np.logical_not(mask)] = 0 + for iter in range(len(systemIds)): + transformed_X.append([systemIds[iter],ranking[iter]]) + + if (method_type == "avg"): + """ + Sytems are sorted based on average of reconstruction errors" + """ + avgOutlierScorePerSystemList = [] + for systemId in systemIds: + systemDf = groupedX.get_group(systemId) + avgOutlierScorePerSystemList.append(np.mean(np.abs(systemDf["value_0"].values))) + + ranking = np.sort(avgOutlierScorePerSystemList) + threshold = ranking[int((1 - contamination) * len(ranking))] + self.threshold = threshold + mask = (avgOutlierScorePerSystemList >= threshold) + ranking[mask] = 1 + ranking[np.logical_not(mask)] = 0 + for iter in range(len(systemIds)): + transformed_X.append([systemIds[iter], ranking[iter]]) + + if (method_type == "sliding_window_sum"): + """ + Sytems are sorted based on max of max of reconstruction errors in each window" + """ + OutlierScorePerSystemList = [] + for systemId in systemIds: + systemDf = groupedX.get_group(systemId) + column_value = systemDf["value_0"].values + column_score = np.zeros(len(column_value)) + for iter in range(window_size - 1, len(column_value)): + sequence = column_value[iter - window_size + 1:iter + 1] + column_score[iter] = np.sum(np.abs(sequence)) + column_score[:window_size - 1] = column_score[window_size - 1] + OutlierScorePerSystemList.append(column_score.tolist()) + OutlierScorePerSystemList = np.asarray(OutlierScorePerSystemList) + + maxOutlierScorePerSystemList = OutlierScorePerSystemList.max(axis=1).tolist() + + ranking = np.sort(maxOutlierScorePerSystemList) + threshold = ranking[int((1 - contamination) * len(ranking))] + self.threshold = threshold + mask = (maxOutlierScorePerSystemList >= threshold) + ranking[mask] = 1 + ranking[np.logical_not(mask)] = 0 + for iter in range(len(systemIds)): + transformed_X.append([systemIds[iter], ranking[iter]]) + + if (method_type == "majority_voting_sliding_window_sum"): + """ + Sytem with most vote based on max of sum of reconstruction errors in each window + """ + OutlierScorePerSystemList = [] + for systemId in systemIds: + systemDf = groupedX.get_group(systemId) + column_value = systemDf["value_0"].values + column_score = np.zeros(len(column_value)) + for iter in range(window_size - 1, len(column_value)): + sequence = column_value[iter - window_size + 1:iter + 1] + column_score[iter] = np.sum(np.abs(sequence)) + column_score[:window_size - 1] = column_score[window_size - 1] + OutlierScorePerSystemList.append(column_score.tolist()) + OutlierScorePerSystemList = np.asarray(OutlierScorePerSystemList) + OutlierScorePerSystemList = ( + OutlierScorePerSystemList == OutlierScorePerSystemList.max(axis=0)[None, :]).astype(int) + + maxOutlierScorePerSystemList = OutlierScorePerSystemList.sum(axis=1).tolist() + + ranking = np.sort(maxOutlierScorePerSystemList) + threshold = ranking[int((1 - contamination) * len(ranking))] + self.threshold = threshold + mask = (maxOutlierScorePerSystemList >= threshold) + ranking[mask] = 1 + ranking[np.logical_not(mask)] = 0 + for iter in range(len(systemIds)): + transformed_X.append([systemIds[iter], ranking[iter]]) + + if (method_type == "majority_voting_sliding_window_max"): + """ + Sytem with most vote based on max of max of reconstruction errors in each window + """ + OutlierScorePerSystemList = [] + for systemId in systemIds: + systemDf = groupedX.get_group(systemId) + column_value = systemDf["value_0"].values + column_score = np.zeros(len(column_value)) + for iter in range(window_size - 1, len(column_value)): + sequence = column_value[iter - window_size + 1:iter + 1] + column_score[iter] = np.max(np.abs(sequence)) + column_score[:window_size - 1] = column_score[window_size - 1] + OutlierScorePerSystemList.append(column_score.tolist()) + OutlierScorePerSystemList = np.asarray(OutlierScorePerSystemList) + OutlierScorePerSystemList = ( + OutlierScorePerSystemList == OutlierScorePerSystemList.max(axis=0)[None, :]).astype(int) + + maxOutlierScorePerSystemList = OutlierScorePerSystemList.sum(axis=1).tolist() + + ranking = np.sort(maxOutlierScorePerSystemList) + threshold = ranking[int((1 - contamination) * len(ranking))] + self.threshold = threshold + mask = (maxOutlierScorePerSystemList >= threshold) + ranking[mask] = 1 + ranking[np.logical_not(mask)] = 0 + for iter in range(len(systemIds)): + transformed_X.append([systemIds[iter], ranking[iter]]) + + return transformed_X + + + + diff --git a/tods/detection_algorithm/UODBasePrimitive.py b/tods/detection_algorithm/UODBasePrimitive.py index 96076f7..6a7cd6c 100755 --- a/tods/detection_algorithm/UODBasePrimitive.py +++ b/tods/detection_algorithm/UODBasePrimitive.py @@ -30,6 +30,7 @@ from d3m.primitive_interfaces.base import CallResult, DockerContainer, Primitive # # from d3m.primitive_interfaces.supervised_learning import SupervisedLearnerPrimitiveBase from d3m.primitive_interfaces.unsupervised_learning import UnsupervisedLearnerPrimitiveBase +from ..common.TODSBasePrimitives import TODSUnsupervisedLearnerPrimitiveBase from d3m.primitive_interfaces.transformer import TransformerPrimitiveBase from d3m.primitive_interfaces.base import * @@ -141,7 +142,10 @@ class Hyperparams_ODBase(hyperparams.Hyperparams): semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] ) -class UnsupervisedOutlierDetectorBase(UnsupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams]): + +# OutlierDetectorBase.__doc__ = OutlierDetectorBase.__doc__ + +class UnsupervisedOutlierDetectorBase(TODSUnsupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams]): """ Parameters ---------- @@ -234,7 +238,7 @@ class UnsupervisedOutlierDetectorBase(UnsupervisedLearnerPrimitiveBase[Inputs, O self.right_inds_[self.right_inds_ > len(self._inputs)] = len(self._inputs) # print(self.left_inds_, self.right_inds_) - def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: + def _fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: """ Fit model with training data. Args: @@ -248,6 +252,7 @@ class UnsupervisedOutlierDetectorBase(UnsupervisedLearnerPrimitiveBase[Inputs, O if self._fitted: # pragma: no cover return CallResult(None) + self._training_inputs, self._training_indices = self._get_columns_to_fit(self._inputs, self.hyperparams) self._input_column_names = self._training_inputs.columns @@ -271,7 +276,7 @@ class UnsupervisedOutlierDetectorBase(UnsupervisedLearnerPrimitiveBase[Inputs, O return CallResult(None) - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: + def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: """ Process the testing data. Args: @@ -336,7 +341,7 @@ class UnsupervisedOutlierDetectorBase(UnsupervisedLearnerPrimitiveBase[Inputs, O return CallResult(outputs) - def produce_score(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: + def _produce_score(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: """ Process the testing data. Args: @@ -688,3 +693,553 @@ class UnsupervisedOutlierDetectorBase(UnsupervisedLearnerPrimitiveBase[Inputs, O # OutlierDetectorBase.__doc__ = OutlierDetectorBase.__doc__ + +class UnsupervisedOutlierDetectorBase2(UnsupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams]): + """ + Parameters + ---------- + contamination : float in (0., 0.5), optional (default=0.1) + The amount of contamination of the data set, i.e. + the proportion of outliers in the data set. When fitting this is used + to define the threshold on the decision function. + + Attributes + ---------- + clf_.decision_scores_ : numpy array of shape (n_samples,) + The outlier scores of the training data. + The higher, the more abnormal. Outliers tend to have higher + scores. This value is available once the detector is + fitted. + + clf_.threshold_: float within (0, 1) + For outlier, decision_scores_ more than threshold_. + For inlier, decision_scores_ less than threshold_. + + clf_.labels_ : int, either 0 or 1 + The binary labels of the training data. 0 stands for inliers. + and 1 for outliers/anomalies. It is generated by applying. + ``threshold_`` on ``decision_scores_``. + + left_inds_ : ndarray, + One of the mapping from decision_score to data. + For point outlier detection, left_inds_ exactly equals the index of each data point. + For Collective outlier detection, left_inds_ equals the start index of each subsequence. + + left_inds_ : ndarray, + One of the mapping from decision_score to data. + For point outlier detection, left_inds_ exactly equals the index of each data point plus 1. + For Collective outlier detection, left_inds_ equals the ending index of each subsequence. + """ + # probability_score: + # window_size: int + # The moving window size. + + __author__ = "DATALAB @Taxes A&M University" + metadata: metadata_base.PrimitiveMetadata = None + + def __init__(self, *, + hyperparams: Hyperparams, + random_seed: int = 0, + docker_containers: Dict[str, DockerContainer] = None) -> None: + super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) + + self._clf = None + self._clf_fit_parameter = {} + self.primitiveNo = 0 + + self.window_size = hyperparams['window_size'] + self.step_size = hyperparams['step_size'] + self.left_inds_ = None + self.right_inds_ = None + + self._inputs = None + self._outputs = None + self._training_inputs = None + self._training_outputs = None + self._target_names = None + self._training_indices = None + self._target_column_indices = None + self._target_columns_metadata: List[OrderedDict] = None + self._input_column_names = None + self._fitted = False +# + @abc.abstractmethod + def set_training_data(self, *, inputs: Inputs) -> None: + """ + Set training data for outlier detection. + Args: + inputs: Container DataFrame + + Returns: + None + """ + self._inputs = inputs + self._fitted = False + + def _set_subseq_inds(self): + + self.left_inds_ = getattr(self._clf, 'left_inds_', None) + self.right_inds_ = getattr(self._clf, 'right_inds_', None) + + if self.left_inds_ is None or self.right_inds_ is None: + self.left_inds_ = numpy.arange(0, len(self._inputs), self.step_size) + self.right_inds_ = self.left_inds_ + self.window_size + self.right_inds_[self.right_inds_ > len(self._inputs)] = len(self._inputs) + # print(self.left_inds_, self.right_inds_) + + def _fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: + """ + Fit model with training data. + Args: + *: Container DataFrame. Time series data up to fit. + + Returns: + None + """ + # print('Fit:', self._clf) + + if self._fitted: # pragma: no cover + return CallResult(None) + + print(self._inputs) + + self._training_inputs, self._training_indices = self._get_columns_to_fit(self._inputs, self.hyperparams) + self._input_column_names = self._training_inputs.columns + + print(self._training_inputs, self._training_indices) + exit() + + if self._training_inputs is None: # pragma: no cover + return CallResult(None) + #print("self._training_indices ", self._training_indices) + if len(self._training_indices) > 0: + + # print('Fit: ', self._clf) + # print('Fit: ', self._training_inputs.values.shape) + # print('Fit: ', self._clf.fit(self._training_inputs.values)) + + self._clf.fit(X=self._training_inputs.values, **self._clf_fit_parameter) + self._fitted = True + self._set_subseq_inds() + + else: # pragma: no cover + if self.hyperparams['error_on_no_input']: + raise RuntimeError("No input columns were selected") + self.logger.warn("No input columns were selected") + + return CallResult(None) + + def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: + """ + Process the testing data. + Args: + inputs: Container DataFrame. Time series data up to outlier detection. + + Returns: + Container DataFrame + 1 marks Outliers, 0 marks normal. + """ + + if not self._fitted: # pragma: no cover + raise PrimitiveNotFittedError("Primitive not fitted.") + sk_inputs = inputs + if self.hyperparams['use_semantic_types']: + sk_inputs = inputs.iloc[:, self._training_indices] + output_columns = [] + #print("skinputs ", sk_inputs.values) + if len(self._training_indices) > 0: + + if self.hyperparams['return_subseq_inds']: + + if getattr(self._clf, 'left_inds_', None) is None or getattr(self._clf, 'right_inds_', None) is None: # point OD + pred_label = self._clf.predict(sk_inputs.values) + left_inds_ = numpy.arange(0, len(pred_label), self.step_size) + right_inds_ = left_inds_ + self.window_size + right_inds_[right_inds_ > len(pred_label)] = len(pred_label) + else: + pred_label, left_inds_, right_inds_ = self._clf.predict(sk_inputs.values) + + # print(pred_label.shape, left_inds_.shape, right_inds_.shape) + # print(pred_label, left_inds_, right_inds_) + + sk_output = numpy.concatenate((numpy.expand_dims(pred_label, axis=1), + numpy.expand_dims(left_inds_, axis=1), + numpy.expand_dims(right_inds_, axis=1)), axis=1) + + + else: + if getattr(self._clf, 'left_inds_', None) is None or getattr(self._clf, 'right_inds_', None) is None: # point OD + sk_output = self._clf.predict(sk_inputs.values) + + else: + sk_output, _, _ = self._clf.predict(sk_inputs.values) + + #print("sk output ", sk_output) + if sparse.issparse(sk_output): # pragma: no cover + sk_output = sk_output.toarray() + + outputs = self._wrap_predictions(inputs, sk_output) + if len(outputs.columns) == len(self._input_column_names): + outputs.columns = self._input_column_names + output_columns = [outputs] + else: # pragma: no cover + if self.hyperparams['error_on_no_input']: + raise RuntimeError("No input columns were selected") + self.logger.warn("No input columns were selected") + + outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'], + add_index_columns=self.hyperparams['add_index_columns'], + inputs=inputs, column_indices=self._training_indices, + columns_list=output_columns) + + return CallResult(outputs) + + def _produce_score(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: + """ + Process the testing data. + Args: + inputs: Container DataFrame. Time series data up to outlier detection. + + Returns: + Container DataFrame + 1 marks Outliers, 0 marks normal. + """ + + if not self._fitted: # pragma: no cover + raise PrimitiveNotFittedError("Primitive not fitted.") + sk_inputs = inputs + if self.hyperparams['use_semantic_types']: + sk_inputs = inputs.iloc[:, self._training_indices] + output_columns = [] + if len(self._training_indices) > 0: + + if self.hyperparams['return_subseq_inds']: + + if getattr(self._clf, 'left_inds_', None) is None or getattr(self._clf, 'right_inds_', None) is None: # point OD + pred_score = self._clf.decision_function(sk_inputs.values).ravel() + left_inds_ = numpy.arange(0, len(pred_score), self.step_size) + right_inds_ = left_inds_ + self.window_size + right_inds_[right_inds_ > len(pred_score)] = len(pred_score) + + else: + pred_score, left_inds_, right_inds_ = self._clf.decision_function(sk_inputs.values) + + # print(pred_score.shape, left_inds_.shape, right_inds_.shape) + + sk_output = numpy.concatenate((numpy.expand_dims(pred_score, axis=1), + numpy.expand_dims(left_inds_, axis=1), + numpy.expand_dims(right_inds_, axis=1)), axis=1) + + else: + if getattr(self._clf, 'left_inds_', None) is None or getattr(self._clf, 'right_inds_', None) is None: # point OD + sk_output = self._clf.decision_function(sk_inputs.values) + + else: + sk_output, _, _ = self._clf.decision_function(sk_inputs.values) + + if sparse.issparse(sk_output): # pragma: no cover + sk_output = sk_output.toarray() + outputs = self._wrap_predictions(inputs, sk_output) + if len(outputs.columns) == len(self._input_column_names): + outputs.columns = self._input_column_names + output_columns = [outputs] + else: # pragma: no cover + if self.hyperparams['error_on_no_input']: + raise RuntimeError("No input columns were selected") + self.logger.warn("No input columns were selected") + + outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'], + add_index_columns=self.hyperparams['add_index_columns'], + inputs=inputs, column_indices=self._training_indices, + columns_list=output_columns) + return CallResult(outputs) + + + def get_params(self) -> Params_ODBase: + """ + Return parameters. + Args: + None + + Returns: + class Params_ODBase + """ + + if not self._fitted: + return Params_ODBase( + # decision_scores_=None, + # threshold_=None, + # labels_=None, + left_inds_=None, + right_inds_=None, + clf_=copy.copy(self._clf), + + # Keep previous + input_column_names=self._input_column_names, + training_indices_=self._training_indices, + target_names_=self._target_names, + target_column_indices_=self._target_column_indices, + target_columns_metadata_=self._target_columns_metadata + ) + + return Params_ODBase( + # decision_scores_=getattr(self._clf, 'decision_scores_', None), + # threshold_=getattr(self._clf, 'threshold_', None), + # labels_=getattr(self._clf, 'labels_', None), + left_inds_=self.left_inds_, # numpy.array(self.left_inds_) + right_inds_=self.right_inds_, # numpy.array(self.right_inds_) + clf_=copy.copy(self._clf), + + # Keep previous + input_column_names=self._input_column_names, + training_indices_=self._training_indices, + target_names_=self._target_names, + target_column_indices_=self._target_column_indices, + target_columns_metadata_=self._target_columns_metadata + ) + # pass + + + def set_params(self, *, params: Params_ODBase) -> None: + """ + Set parameters for outlier detection. + Args: + params: class Params_ODBase + + Returns: + None + """ + + # self._clf.decision_scores_ = params['decision_scores_'] + # self._clf.threshold_ = params['threshold_'] + # self._clf.labels_ = params['labels_'] + self.left_inds_ = params['left_inds_'] + self.right_inds_ = params['right_inds_'] + self._clf = copy.copy(params['clf_']) + + # Keep previous + self._input_column_names = params['input_column_names'] + self._training_indices = params['training_indices_'] + self._target_names = params['target_names_'] + self._target_column_indices = params['target_column_indices_'] + self._target_columns_metadata = params['target_columns_metadata_'] + + + # if params['decision_scores_'] is not None: + # self._fitted = True + # if params['threshold_'] is not None: + # self._fitted = True + # if params['labels_'] is not None: + # self._fitted = True + if params['left_inds_'] is not None: + self._fitted = True + if params['right_inds_'] is not None: + self._fitted = True + + @classmethod + def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): # pragma: no cover + """ + Select columns to fit. + Args: + inputs: Container DataFrame + hyperparams: d3m.metadata.hyperparams.Hyperparams + + Returns: + list + """ + #print("*******************get columns to fit***********") + if not hyperparams['use_semantic_types']: + return inputs, list(range(len(inputs.columns))) + + inputs_metadata = inputs.metadata + #print("inputs_metadata ", inputs_metadata) + + def can_produce_column(column_index: int) -> bool: + return cls._can_produce_column(inputs_metadata, column_index, hyperparams) + + columns_to_produce, columns_not_to_produce = base_utils.get_columns_to_use(inputs_metadata, + use_columns=hyperparams['use_columns'], + exclude_columns=hyperparams['exclude_columns'], + can_use_column=can_produce_column) + #print("columns_to_produce ", columns_to_produce) + return inputs.iloc[:, columns_to_produce], columns_to_produce + # return columns_to_produce + + + @classmethod + def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, + hyperparams: Hyperparams) -> bool: # pragma: no cover + """ + Output whether a column can be processed. + Args: + inputs_metadata: d3m.metadata.base.DataMetadata + column_index: int + + Returns: + bool + """ + + column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)) + #print("column metadasta ", ) + accepted_structural_types = (int, float, numpy.integer, numpy.float64) + accepted_semantic_types = set() + accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/Attribute") + + if not issubclass(column_metadata['structural_type'], accepted_structural_types): + return False + + semantic_types = set(column_metadata.get('semantic_types', [])) + #print("semantic_types ", column_metadata.get('semantic_types')) + if len(semantic_types) == 0: + cls.logger.warning("No semantic types found in column metadata") + return False + + # Making sure all accepted_semantic_types are available in semantic_types + if len(accepted_semantic_types - semantic_types) == 0: + return True + + return False + + + @classmethod + def _get_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams) -> List[OrderedDict]: # pragma: no cover + """ + Output metadata of selected columns. + Args: + outputs_metadata: metadata_base.DataMetadata + hyperparams: d3m.metadata.hyperparams.Hyperparams + + Returns: + d3m.metadata.base.DataMetadata + """ + outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] + + target_columns_metadata: List[OrderedDict] = [] + for column_index in range(outputs_length): + column_metadata = OrderedDict(outputs_metadata.query_column(column_index)) + + # Update semantic types and prepare it for predicted targets. + semantic_types = set(column_metadata.get('semantic_types', [])) + semantic_types_to_remove = set([]) + add_semantic_types = [] + add_semantic_types.add(hyperparams["return_semantic_type"]) + semantic_types = semantic_types - semantic_types_to_remove + semantic_types = semantic_types.union(add_semantic_types) + column_metadata['semantic_types'] = list(semantic_types) + + target_columns_metadata.append(column_metadata) + + return target_columns_metadata + + + @classmethod + def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs], + target_columns_metadata: List[OrderedDict]) -> metadata_base.DataMetadata: # pragma: no cover + """ + Updata metadata for selected columns. + Args: + inputs_metadata: metadata_base.DataMetadata + outputs: Container Dataframe + target_columns_metadata: list + + Returns: + d3m.metadata.base.DataMetadata + """ + outputs_metadata = metadata_base.DataMetadata().generate(value=outputs) + + for column_index, column_metadata in enumerate(target_columns_metadata): + column_metadata.pop("structural_type", None) + outputs_metadata = outputs_metadata.update_column(column_index, column_metadata) + + return outputs_metadata + + + def _wrap_predictions(self, inputs: Inputs, predictions: ndarray) -> Outputs: # pragma: no cover + """ + Wrap predictions into dataframe + Args: + inputs: Container Dataframe + predictions: array-like data (n_samples, n_features) + + Returns: + Dataframe + """ + outputs = d3m_dataframe(predictions, generate_metadata=True) + # target_columns_metadata = self._copy_inputs_metadata(inputs.metadata, self._training_indices, outputs.metadata, + # self.hyperparams) + target_columns_metadata = self._add_target_columns_metadata(outputs.metadata, self.hyperparams, self.primitiveNo) + outputs.metadata = self._update_predictions_metadata(inputs.metadata, outputs, target_columns_metadata) + # print(outputs.metadata.to_internal_simple_structure()) + + return outputs + + @classmethod + def _add_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams, primitiveNo): # pragma: no cover + """ + Add target columns metadata + Args: + outputs_metadata: metadata.base.DataMetadata + hyperparams: d3m.metadata.hyperparams.Hyperparams + + Returns: + List[OrderedDict] + """ + outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] + target_columns_metadata: List[OrderedDict] = [] + for column_index in range(outputs_length): + column_name = "{0}{1}_{2}".format(cls.metadata.query()['name'], primitiveNo, column_index) + column_metadata = OrderedDict() + semantic_types = set() + semantic_types.add(hyperparams["return_semantic_type"]) + column_metadata['semantic_types'] = list(semantic_types) + + column_metadata["name"] = str(column_name) + target_columns_metadata.append(column_metadata) + + return target_columns_metadata + + @classmethod + def _copy_inputs_metadata(cls, inputs_metadata: metadata_base.DataMetadata, input_indices: List[int], + outputs_metadata: metadata_base.DataMetadata, hyperparams): # pragma: no cover + """ + Updata metadata for selected columns. + Args: + inputs_metadata: metadata.base.DataMetadata + input_indices: list + outputs_metadata: metadata.base.DataMetadata + hyperparams: d3m.metadata.hyperparams.Hyperparams + + Returns: + d3m.metadata.base.DataMetadata + """ + outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] + target_columns_metadata: List[OrderedDict] = [] + for column_index in input_indices: + column_name = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)).get("name") + if column_name is None: + column_name = "output_{}".format(column_index) + + column_metadata = OrderedDict(inputs_metadata.query_column(column_index)) + semantic_types = set(column_metadata.get('semantic_types', [])) + semantic_types_to_remove = set([]) + add_semantic_types = set() + add_semantic_types.add(hyperparams["return_semantic_type"]) + semantic_types = semantic_types - semantic_types_to_remove + semantic_types = semantic_types.union(add_semantic_types) + column_metadata['semantic_types'] = list(semantic_types) + + column_metadata["name"] = str(column_name) + target_columns_metadata.append(column_metadata) + + # If outputs has more columns than index, add Attribute Type to all remaining + if outputs_length > len(input_indices): + for column_index in range(len(input_indices), outputs_length): + column_metadata = OrderedDict() + semantic_types = set() + semantic_types.add(hyperparams["return_semantic_type"]) + column_name = "output_{}".format(column_index) + column_metadata["semantic_types"] = list(semantic_types) + column_metadata["name"] = str(column_name) + target_columns_metadata.append(column_metadata) + + return target_columns_metadata diff --git a/tods/feature_analysis/AutoCorrelation.py b/tods/feature_analysis/AutoCorrelation.py index e3049c6..7740133 100644 --- a/tods/feature_analysis/AutoCorrelation.py +++ b/tods/feature_analysis/AutoCorrelation.py @@ -25,7 +25,7 @@ from d3m.primitive_interfaces import base, transformer from d3m.metadata import base as metadata_base, hyperparams from d3m.metadata import hyperparams, params, base as metadata_base from d3m.primitive_interfaces.base import CallResult, DockerContainer -from d3m.primitive_interfaces.unsupervised_learning import UnsupervisedLearnerPrimitiveBase +from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase from statsmodels.tsa.stattools import acf @@ -186,7 +186,7 @@ class ACF: -class AutoCorrelationPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): +class AutoCorrelationPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): """ A primitive that performs autocorrelation on a DataFrame acf() function documentation: https://www.statsmodels.org/dev/generated/statsmodels.tsa.stattools.acf.html @@ -233,26 +233,8 @@ class AutoCorrelationPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outp 'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'AutocorrelationPrimitive')), }) - def __init__(self, *, - hyperparams: Hyperparams, # - random_seed: int = 0, - docker_containers: Dict[str, DockerContainer] = None) -> None: - super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) - - self._clf = ACF(unbiased = hyperparams['unbiased'], - nlags = hyperparams['nlags'], - qstat = hyperparams['qstat'], - fft = hyperparams['fft'], - alpha = hyperparams['alpha'], - missing = hyperparams['missing'] - ) - - self.primitiveNo = PrimitiveCount.primitive_no - PrimitiveCount.primitive_no+=1 - - - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: + def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: """ Process the testing data. Args: @@ -261,6 +243,16 @@ class AutoCorrelationPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outp Returns: Container DataFrame after AutoCorrelation. """ + self._clf = ACF(unbiased = self.hyperparams['unbiased'], + nlags = self.hyperparams['nlags'], + qstat = self.hyperparams['qstat'], + fft = self.hyperparams['fft'], + alpha = self.hyperparams['alpha'], + missing = self.hyperparams['missing'] + ) + + self.primitiveNo = PrimitiveCount.primitive_no + PrimitiveCount.primitive_no+=1 # Get cols to fit. self._fitted = False diff --git a/tods/feature_analysis/BKFilter.py b/tods/feature_analysis/BKFilter.py index cb0523b..a7d8b6d 100644 --- a/tods/feature_analysis/BKFilter.py +++ b/tods/feature_analysis/BKFilter.py @@ -20,6 +20,7 @@ from d3m import utils from d3m.base import utils as base_utils from d3m.exceptions import PrimitiveNotFittedError from d3m.primitive_interfaces.base import CallResult, DockerContainer +from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase import os.path @@ -118,7 +119,7 @@ class Hyperparams(hyperparams.Hyperparams): ) -class BKFilterPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): +class BKFilterPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): """ Filter a time series using the Baxter-King bandpass filter. @@ -173,7 +174,7 @@ class BKFilterPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hy }) - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: + def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: """ Process the testing data. Args: diff --git a/tods/feature_analysis/DiscreteCosineTransform.py b/tods/feature_analysis/DiscreteCosineTransform.py index 6151c88..6f61f3a 100644 --- a/tods/feature_analysis/DiscreteCosineTransform.py +++ b/tods/feature_analysis/DiscreteCosineTransform.py @@ -14,6 +14,7 @@ import math from scipy.fft import dct from collections import OrderedDict from typing import cast, Dict, List, Union, Sequence, Optional, Tuple +from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase from scipy import sparse @@ -160,7 +161,7 @@ class DCT: -class DiscreteCosineTransformPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): +class DiscreteCosineTransformPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): """ Compute the 1-D discrete Cosine Transform. Return the Discrete Cosine Transform of arbitrary type sequence x. @@ -242,7 +243,7 @@ class DiscreteCosineTransformPrimitive(transformer.TransformerPrimitiveBase[Inpu workers = self.hyperparams['workers'] ) - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: + def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: """ Args: diff --git a/tods/feature_analysis/FastFourierTransform.py b/tods/feature_analysis/FastFourierTransform.py index 1f96846..249bbee 100644 --- a/tods/feature_analysis/FastFourierTransform.py +++ b/tods/feature_analysis/FastFourierTransform.py @@ -17,6 +17,7 @@ from typing import cast, Dict, List, Union, Sequence, Optional, Tuple from scipy import sparse from numpy import ndarray +from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase __all__ = ('FastFourierTransformPrimitive',) @@ -157,7 +158,7 @@ class FFT: -class FastFourierTransformPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): +class FastFourierTransformPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): """ Compute the 1-D discrete Fourier Transform. This function computes the 1-D n-point discrete Fourier Transform (DFT) with the efficient Fast Fourier Transform (FFT) algorithm @@ -232,7 +233,7 @@ class FastFourierTransformPrimitive(transformer.TransformerPrimitiveBase[Inputs, workers = self.hyperparams['workers'] ) - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: + def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: """ Args: diff --git a/tods/feature_analysis/HPFilter.py b/tods/feature_analysis/HPFilter.py index 47e2679..70d7f03 100644 --- a/tods/feature_analysis/HPFilter.py +++ b/tods/feature_analysis/HPFilter.py @@ -21,6 +21,7 @@ from d3m.base import utils as base_utils from d3m.exceptions import PrimitiveNotFittedError from d3m.primitive_interfaces.base import CallResult, DockerContainer +from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase import statsmodels.api as sm @@ -101,7 +102,7 @@ class Hyperparams(hyperparams.Hyperparams): ) -class HPFilterPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): +class HPFilterPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): """ Filter a time series using the Hodrick-Prescott filter. @@ -150,7 +151,7 @@ class HPFilterPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hy }) - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: + def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: """ Process the testing data. Args: diff --git a/tods/feature_analysis/NonNegativeMatrixFactorization.py b/tods/feature_analysis/NonNegativeMatrixFactorization.py index 10fcf49..81ce397 100644 --- a/tods/feature_analysis/NonNegativeMatrixFactorization.py +++ b/tods/feature_analysis/NonNegativeMatrixFactorization.py @@ -15,6 +15,7 @@ import numpy from numpy import ndarray import warnings +from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase __all__ = ('NonNegativeMatrixFactorizationPrimitive',) @@ -211,7 +212,7 @@ class NMF: return result -class NonNegativeMatrixFactorizationPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): +class NonNegativeMatrixFactorizationPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): """ Calculates Latent factors of a given matrix of timeseries data @@ -299,7 +300,7 @@ class NonNegativeMatrixFactorizationPrimitive(transformer.TransformerPrimitiveBa learning_rate = self.hyperparams['learning_rate'], ) - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: + def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: assert isinstance(inputs, container.DataFrame), type(dataframe) diff --git a/tods/feature_analysis/SpectralResidualTransform.py b/tods/feature_analysis/SpectralResidualTransform.py index f52a125..4917582 100644 --- a/tods/feature_analysis/SpectralResidualTransform.py +++ b/tods/feature_analysis/SpectralResidualTransform.py @@ -23,6 +23,7 @@ from d3m.metadata import hyperparams, params, base as metadata_base from d3m.base import utils as base_utils from d3m.exceptions import PrimitiveNotFittedError +from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase __all__ = ('SpectralResidualTransformPrimitive',) @@ -87,7 +88,7 @@ class Hyperparams(hyperparams.Hyperparams): -class SpectralResidualTransformPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): +class SpectralResidualTransformPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): """ Primitive to find Spectral Residual Transform of time series """ @@ -110,7 +111,7 @@ class SpectralResidualTransformPrimitive(transformer.TransformerPrimitiveBase[In 'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'SpectralResidualTransformPrimitive')), }) - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: + def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: """ Args: diff --git a/tods/feature_analysis/StatisticalAbsEnergy.py b/tods/feature_analysis/StatisticalAbsEnergy.py index fcbcf85..261233a 100644 --- a/tods/feature_analysis/StatisticalAbsEnergy.py +++ b/tods/feature_analysis/StatisticalAbsEnergy.py @@ -20,6 +20,7 @@ from d3m.primitive_interfaces import base, transformer from d3m.container import DataFrame as d3m_dataframe from d3m.metadata import hyperparams, params, base as metadata_base +from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase from d3m.base import utils as base_utils from d3m.exceptions import PrimitiveNotFittedError @@ -87,7 +88,7 @@ class Hyperparams(hyperparams.Hyperparams): -class StatisticalAbsEnergyPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): +class StatisticalAbsEnergyPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): """ Primitive to find abs_energy of time series """ @@ -112,7 +113,7 @@ class StatisticalAbsEnergyPrimitive(transformer.TransformerPrimitiveBase[Inputs, }) - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: + def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: """ Args: diff --git a/tods/feature_analysis/StatisticalAbsSum.py b/tods/feature_analysis/StatisticalAbsSum.py index 34c56c7..409817f 100644 --- a/tods/feature_analysis/StatisticalAbsSum.py +++ b/tods/feature_analysis/StatisticalAbsSum.py @@ -23,6 +23,7 @@ from d3m.metadata import hyperparams, params, base as metadata_base from d3m.base import utils as base_utils from d3m.exceptions import PrimitiveNotFittedError +from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase __all__ = ('StatisticalAbsSumPrimitive',) @@ -87,7 +88,7 @@ class Hyperparams(hyperparams.Hyperparams): -class StatisticalAbsSumPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): +class StatisticalAbsSumPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): """ Primitive to find abs_sum of time series """ @@ -109,7 +110,7 @@ class StatisticalAbsSumPrimitive(transformer.TransformerPrimitiveBase[Inputs, Ou 'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'StatisticalAbsSumPrimitive')), }) - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: + def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: """ Args: diff --git a/tods/feature_analysis/StatisticalGmean.py b/tods/feature_analysis/StatisticalGmean.py index 93a7c1e..19ca550 100644 --- a/tods/feature_analysis/StatisticalGmean.py +++ b/tods/feature_analysis/StatisticalGmean.py @@ -24,6 +24,7 @@ from d3m.metadata import hyperparams, params, base as metadata_base from d3m.base import utils as base_utils from d3m.exceptions import PrimitiveNotFittedError from d3m.exceptions import UnexpectedValueError +from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase __all__ = ('StatisticalGmeanPrimitive',) @@ -88,7 +89,7 @@ class Hyperparams(hyperparams.Hyperparams): -class StatisticalGmeanPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): +class StatisticalGmeanPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): """ Primitive to find gmean of time series . Will only take positive values as inputs . @@ -111,7 +112,7 @@ class StatisticalGmeanPrimitive(transformer.TransformerPrimitiveBase[Inputs, Out 'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'StatisticalGmeanPrimitive')), }) - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: + def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: """ Args: diff --git a/tods/feature_analysis/StatisticalHmean.py b/tods/feature_analysis/StatisticalHmean.py index 9bb951c..ce2b51f 100644 --- a/tods/feature_analysis/StatisticalHmean.py +++ b/tods/feature_analysis/StatisticalHmean.py @@ -24,6 +24,7 @@ from d3m.metadata import hyperparams, params, base as metadata_base from d3m.base import utils as base_utils from d3m.exceptions import PrimitiveNotFittedError +from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase __all__ = ('StatisticalHmeanPrimitive',) @@ -88,7 +89,7 @@ class Hyperparams(hyperparams.Hyperparams): -class StatisticalHmeanPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): +class StatisticalHmeanPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): """ Primitive to find Harmonic mean of time series Harmonic mean only defined if all elements greater than or equal to zero @@ -113,7 +114,7 @@ class StatisticalHmeanPrimitive(transformer.TransformerPrimitiveBase[Inputs, Out 'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'StatisticalHmeanPrimitive')), }) - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: + def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: """ Args: diff --git a/tods/feature_analysis/StatisticalKurtosis.py b/tods/feature_analysis/StatisticalKurtosis.py index 9806131..13778a1 100644 --- a/tods/feature_analysis/StatisticalKurtosis.py +++ b/tods/feature_analysis/StatisticalKurtosis.py @@ -24,6 +24,7 @@ from d3m.metadata import hyperparams, params, base as metadata_base from d3m.base import utils as base_utils from d3m.exceptions import PrimitiveNotFittedError +from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase __all__ = ('StatisticalKurtosisPrimitive',) @@ -88,7 +89,7 @@ class Hyperparams(hyperparams.Hyperparams): -class StatisticalKurtosisPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): +class StatisticalKurtosisPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): """ Primitive to find kurtosis of time series """ @@ -110,7 +111,7 @@ class StatisticalKurtosisPrimitive(transformer.TransformerPrimitiveBase[Inputs, 'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'StatisticalKurtosisPrimitive')), }) - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: + def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: """ Args: diff --git a/tods/feature_analysis/StatisticalMaximum.py b/tods/feature_analysis/StatisticalMaximum.py index 7e23ad8..4ba871d 100644 --- a/tods/feature_analysis/StatisticalMaximum.py +++ b/tods/feature_analysis/StatisticalMaximum.py @@ -9,11 +9,11 @@ from numpy import ndarray from collections import OrderedDict from scipy import sparse import os +import uuid import numpy import typing import time -import uuid from d3m import container from d3m.primitive_interfaces import base, transformer @@ -23,6 +23,7 @@ from d3m.metadata import hyperparams, params, base as metadata_base from d3m.base import utils as base_utils from d3m.exceptions import PrimitiveNotFittedError +from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase __all__ = ('StatisticalMaximumPrimitive',) @@ -87,7 +88,7 @@ class Hyperparams(hyperparams.Hyperparams): -class StatisticalMaximumPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): +class StatisticalMaximumPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): """ Primitive to find maximum of time series """ @@ -110,7 +111,7 @@ class StatisticalMaximumPrimitive(transformer.TransformerPrimitiveBase[Inputs, O 'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'StatisticalMaximumPrimitive')), }) - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: + def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: """ Args: @@ -159,11 +160,11 @@ class StatisticalMaximumPrimitive(transformer.TransformerPrimitiveBase[Inputs, O if self.hyperparams['error_on_no_input']: raise RuntimeError("No input columns were selected") self.logger.warn("No input columns were selected") + outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'], add_index_columns=self.hyperparams['add_index_columns'], inputs=inputs, column_indices=self._training_indices, columns_list=output_columns) - self.logger.info('Statistical Maximum Primitive returned') return base.CallResult(outputs) @@ -314,6 +315,6 @@ class StatisticalMaximumPrimitive(transformer.TransformerPrimitiveBase[Inputs, O sequence = column_value[iter-window_size+1:iter+1] column_maximum[iter] = np.max(sequence) column_maximum[:window_size-1] = column_maximum[window_size-1] - transformed_X[column + "_maximum"] = column_maximum + transformed_X[str(column) + "_maximum"] = column_maximum return transformed_X diff --git a/tods/feature_analysis/StatisticalMean.py b/tods/feature_analysis/StatisticalMean.py index 4aac410..2ce57ec 100644 --- a/tods/feature_analysis/StatisticalMean.py +++ b/tods/feature_analysis/StatisticalMean.py @@ -23,6 +23,7 @@ from d3m.metadata import hyperparams, params, base as metadata_base from d3m.base import utils as base_utils from d3m.exceptions import PrimitiveNotFittedError +from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase __all__ = ('StatisticalMeanPrimitive',) @@ -87,7 +88,7 @@ class Hyperparams(hyperparams.Hyperparams): -class StatisticalMeanPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): +class StatisticalMeanPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): """ Primitive to find mean of time series """ @@ -110,7 +111,7 @@ class StatisticalMeanPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outp 'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'StatisticalMeanPrimitive')), }) - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: + def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: """ Args: diff --git a/tods/feature_analysis/StatisticalMeanAbs.py b/tods/feature_analysis/StatisticalMeanAbs.py index 332f0e7..6ecab42 100644 --- a/tods/feature_analysis/StatisticalMeanAbs.py +++ b/tods/feature_analysis/StatisticalMeanAbs.py @@ -23,6 +23,7 @@ from d3m.metadata import hyperparams, params, base as metadata_base from d3m.base import utils as base_utils from d3m.exceptions import PrimitiveNotFittedError +from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase __all__ = ('StatisticalMeanAbsPrimitive',) @@ -87,7 +88,7 @@ class Hyperparams(hyperparams.Hyperparams): -class StatisticalMeanAbsPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): +class StatisticalMeanAbsPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): """ Primitive to find mean_abs of time series """ @@ -109,7 +110,7 @@ class StatisticalMeanAbsPrimitive(transformer.TransformerPrimitiveBase[Inputs, O 'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'StatisticalMeanAbsPrimitive')), }) - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: + def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: """ Args: diff --git a/tods/feature_analysis/StatisticalMeanAbsTemporalDerivative.py b/tods/feature_analysis/StatisticalMeanAbsTemporalDerivative.py index 257f82b..3eb6c56 100644 --- a/tods/feature_analysis/StatisticalMeanAbsTemporalDerivative.py +++ b/tods/feature_analysis/StatisticalMeanAbsTemporalDerivative.py @@ -23,6 +23,7 @@ from d3m.metadata import hyperparams, params, base as metadata_base from d3m.base import utils as base_utils from d3m.exceptions import PrimitiveNotFittedError +from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase __all__ = ('StatisticalMeanAbsTemporalDerivativePrimitive',) @@ -87,7 +88,7 @@ class Hyperparams(hyperparams.Hyperparams): -class StatisticalMeanAbsTemporalDerivativePrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): +class StatisticalMeanAbsTemporalDerivativePrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): """ Primitive to find mean_abs_temporal_derivative of time series """ @@ -110,7 +111,7 @@ class StatisticalMeanAbsTemporalDerivativePrimitive(transformer.TransformerPrimi 'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'StatisticalMeanAbsTemporalDerivativePrimitive')), }) - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: + def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: """ Args: diff --git a/tods/feature_analysis/StatisticalMeanTemporalDerivative.py b/tods/feature_analysis/StatisticalMeanTemporalDerivative.py index d1902f2..09fd6f2 100644 --- a/tods/feature_analysis/StatisticalMeanTemporalDerivative.py +++ b/tods/feature_analysis/StatisticalMeanTemporalDerivative.py @@ -23,6 +23,7 @@ from d3m.metadata import hyperparams, params, base as metadata_base from d3m.base import utils as base_utils from d3m.exceptions import PrimitiveNotFittedError +from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase __all__ = ('StatisticalMeanTemporalDerivativePrimitive',) @@ -87,7 +88,7 @@ class Hyperparams(hyperparams.Hyperparams): -class StatisticalMeanTemporalDerivativePrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): +class StatisticalMeanTemporalDerivativePrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): """ Primitive to find mean_temporal_derivative of time series """ @@ -110,7 +111,7 @@ class StatisticalMeanTemporalDerivativePrimitive(transformer.TransformerPrimitiv 'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'StatisticalMeanTemporalDerivativePrimitive')), }) - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: + def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: """ Args: diff --git a/tods/feature_analysis/StatisticalMedian.py b/tods/feature_analysis/StatisticalMedian.py index 29264cd..8cb7889 100644 --- a/tods/feature_analysis/StatisticalMedian.py +++ b/tods/feature_analysis/StatisticalMedian.py @@ -23,6 +23,7 @@ from d3m.metadata import hyperparams, params, base as metadata_base from d3m.base import utils as base_utils from d3m.exceptions import PrimitiveNotFittedError +from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase __all__ = ('StatisticalMedianPrimitive',) @@ -87,7 +88,7 @@ class Hyperparams(hyperparams.Hyperparams): -class StatisticalMedianPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): +class StatisticalMedianPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): """ Primitive to find median of time series """ @@ -110,7 +111,7 @@ class StatisticalMedianPrimitive(transformer.TransformerPrimitiveBase[Inputs, Ou }) - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: + def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: """ Args: diff --git a/tods/feature_analysis/StatisticalMedianAbsoluteDeviation.py b/tods/feature_analysis/StatisticalMedianAbsoluteDeviation.py index 2b0cc66..afae604 100644 --- a/tods/feature_analysis/StatisticalMedianAbsoluteDeviation.py +++ b/tods/feature_analysis/StatisticalMedianAbsoluteDeviation.py @@ -24,6 +24,7 @@ from d3m.metadata import hyperparams, params, base as metadata_base from d3m.base import utils as base_utils from d3m.exceptions import PrimitiveNotFittedError +from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase __all__ = ('StatisticalMedianAbsoluteDeviationPrimitive',) @@ -88,7 +89,7 @@ class Hyperparams(hyperparams.Hyperparams): -class StatisticalMedianAbsoluteDeviationPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): +class StatisticalMedianAbsoluteDeviationPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): """ Primitive to find median_absolute_deviation of time series """ @@ -111,7 +112,7 @@ class StatisticalMedianAbsoluteDeviationPrimitive(transformer.TransformerPrimiti 'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'StatisticalMedianAbsoluteDeviationPrimitive')), }) - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: + def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: """ Args: diff --git a/tods/feature_analysis/StatisticalMinimum.py b/tods/feature_analysis/StatisticalMinimum.py index ea865fa..8202944 100644 --- a/tods/feature_analysis/StatisticalMinimum.py +++ b/tods/feature_analysis/StatisticalMinimum.py @@ -23,6 +23,7 @@ from d3m.metadata import hyperparams, params, base as metadata_base from d3m.base import utils as base_utils from d3m.exceptions import PrimitiveNotFittedError +from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase __all__ = ('StatisticalMinimumPrimitive',) @@ -87,7 +88,7 @@ class Hyperparams(hyperparams.Hyperparams): -class StatisticalMinimumPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): +class StatisticalMinimumPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): """ Primitive to find minimum of time series """ @@ -110,7 +111,7 @@ class StatisticalMinimumPrimitive(transformer.TransformerPrimitiveBase[Inputs, O 'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'StatisticalMinimumPrimitive')), }) - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: + def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: """ Args: diff --git a/tods/feature_analysis/StatisticalSkew.py b/tods/feature_analysis/StatisticalSkew.py index 97f79fa..7891608 100644 --- a/tods/feature_analysis/StatisticalSkew.py +++ b/tods/feature_analysis/StatisticalSkew.py @@ -24,6 +24,7 @@ from d3m.metadata import hyperparams, params, base as metadata_base from d3m.base import utils as base_utils from d3m.exceptions import PrimitiveNotFittedError +from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase __all__ = ('StatisticalSkewPrimitive',) @@ -88,7 +89,7 @@ class Hyperparams(hyperparams.Hyperparams): -class StatisticalSkewPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): +class StatisticalSkewPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): """ Primitive to find skew of time series """ @@ -111,7 +112,7 @@ class StatisticalSkewPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outp 'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'StatisticalSkewPrimitive')), }) - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: + def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: """ Args: diff --git a/tods/feature_analysis/StatisticalStd.py b/tods/feature_analysis/StatisticalStd.py index bd41999..903e401 100644 --- a/tods/feature_analysis/StatisticalStd.py +++ b/tods/feature_analysis/StatisticalStd.py @@ -23,6 +23,7 @@ from d3m.metadata import hyperparams, params, base as metadata_base from d3m.base import utils as base_utils from d3m.exceptions import PrimitiveNotFittedError +from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase __all__ = ('StatisticalStdPrimitive',) @@ -87,7 +88,7 @@ class Hyperparams(hyperparams.Hyperparams): -class StatisticalStdPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): +class StatisticalStdPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): """ Primitive to find std of time series """ @@ -110,7 +111,7 @@ class StatisticalStdPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outpu 'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'StatisticalStdPrimitive')), }) - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: + def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: """ Args: diff --git a/tods/feature_analysis/StatisticalVar.py b/tods/feature_analysis/StatisticalVar.py index 65aee21..a7e594b 100644 --- a/tods/feature_analysis/StatisticalVar.py +++ b/tods/feature_analysis/StatisticalVar.py @@ -23,6 +23,7 @@ from d3m.metadata import hyperparams, params, base as metadata_base from d3m.base import utils as base_utils from d3m.exceptions import PrimitiveNotFittedError +from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase __all__ = ('StatisticalVarPrimitive',) @@ -87,7 +88,7 @@ class Hyperparams(hyperparams.Hyperparams): -class StatisticalVarPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): +class StatisticalVarPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): """ Primitive to find var of time series """ @@ -109,7 +110,7 @@ class StatisticalVarPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outpu 'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'StatisticalVarPrimitive')), }) - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: + def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: """ Args: diff --git a/tods/feature_analysis/StatisticalVariation.py b/tods/feature_analysis/StatisticalVariation.py index 6ed78aa..73fe1d1 100644 --- a/tods/feature_analysis/StatisticalVariation.py +++ b/tods/feature_analysis/StatisticalVariation.py @@ -24,6 +24,7 @@ from d3m.metadata import hyperparams, params, base as metadata_base from d3m.base import utils as base_utils from d3m.exceptions import PrimitiveNotFittedError +from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase __all__ = ('StatisticalVariationPrimitive',) @@ -88,7 +89,7 @@ class Hyperparams(hyperparams.Hyperparams): -class StatisticalVariationPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): +class StatisticalVariationPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): """ Primitive to find variation of time series """ @@ -112,7 +113,7 @@ class StatisticalVariationPrimitive(transformer.TransformerPrimitiveBase[Inputs, }) - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: + def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: """ Args: diff --git a/tods/feature_analysis/StatisticalVecSum.py b/tods/feature_analysis/StatisticalVecSum.py index fe0b8e6..a649702 100644 --- a/tods/feature_analysis/StatisticalVecSum.py +++ b/tods/feature_analysis/StatisticalVecSum.py @@ -23,6 +23,7 @@ from d3m.metadata import hyperparams, params, base as metadata_base from d3m.base import utils as base_utils from d3m.exceptions import PrimitiveNotFittedError +from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase __all__ = ('StatisticalVecSumPrimitive',) @@ -87,7 +88,7 @@ class Hyperparams(hyperparams.Hyperparams): -class StatisticalVecSumPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): +class StatisticalVecSumPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): """ Primitive to find vec_sum of time series """ @@ -110,7 +111,7 @@ class StatisticalVecSumPrimitive(transformer.TransformerPrimitiveBase[Inputs, Ou 'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'StatisticalVecSumPrimitive')), }) - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: + def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: """ Args: diff --git a/tods/feature_analysis/StatisticalWillisonAmplitude.py b/tods/feature_analysis/StatisticalWillisonAmplitude.py index 87bd9e4..73c295d 100644 --- a/tods/feature_analysis/StatisticalWillisonAmplitude.py +++ b/tods/feature_analysis/StatisticalWillisonAmplitude.py @@ -23,6 +23,7 @@ from d3m.metadata import hyperparams, params, base as metadata_base from d3m.base import utils as base_utils from d3m.exceptions import PrimitiveNotFittedError +from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase __all__ = ('StatisticalWillisonAmplitudePrimitive',) @@ -91,7 +92,7 @@ class Hyperparams(hyperparams.Hyperparams): -class StatisticalWillisonAmplitudePrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): +class StatisticalWillisonAmplitudePrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): """ Primitive to find willison amplitude of time series """ @@ -114,7 +115,7 @@ class StatisticalWillisonAmplitudePrimitive(transformer.TransformerPrimitiveBase 'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'StatisticalWillisonAmplitudePrimitive')), }) - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: + def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: """ Args: diff --git a/tods/feature_analysis/StatisticalZeroCrossing.py b/tods/feature_analysis/StatisticalZeroCrossing.py index b8cbe8f..83b3a2a 100644 --- a/tods/feature_analysis/StatisticalZeroCrossing.py +++ b/tods/feature_analysis/StatisticalZeroCrossing.py @@ -23,6 +23,7 @@ from d3m.metadata import hyperparams, params, base as metadata_base from d3m.base import utils as base_utils from d3m.exceptions import PrimitiveNotFittedError +from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase __all__ = ('StatisticalZeroCrossingPrimitive',) @@ -83,7 +84,7 @@ class Hyperparams(hyperparams.Hyperparams): -class StatisticalZeroCrossingPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): +class StatisticalZeroCrossingPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): """ Primitive to find zero_crossing of time series. A column indicating zero crossing on ith row . 1 indicates crossing 0 is for normal """ @@ -105,7 +106,7 @@ class StatisticalZeroCrossingPrimitive(transformer.TransformerPrimitiveBase[Inpu 'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'StatisticalZeroCrossingPrimitive')), }) - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: + def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: """ Args: diff --git a/tods/feature_analysis/TRMF.py b/tods/feature_analysis/TRMF.py index 064fb18..a4a3fd9 100644 --- a/tods/feature_analysis/TRMF.py +++ b/tods/feature_analysis/TRMF.py @@ -22,6 +22,7 @@ from d3m.exceptions import PrimitiveNotFittedError from d3m.primitive_interfaces.base import CallResult, DockerContainer from d3m.primitive_interfaces import base, transformer # from d3m.primitive_interfaces.unsupervised_learning import UnsupervisedLearnerPrimitiveBase +from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase Inputs = d3m_dataframe @@ -161,7 +162,7 @@ class Hyperparams(hyperparams.Hyperparams): semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] ) -class TRMFPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): +class TRMFPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): """Temporal Regularized Matrix Factorization. Parameters @@ -241,7 +242,7 @@ class TRMFPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperp }) - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: + def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: """ Process the testing data. Args: diff --git a/tods/feature_analysis/WaveletTransform.py b/tods/feature_analysis/WaveletTransform.py index 6725b00..68827f3 100644 --- a/tods/feature_analysis/WaveletTransform.py +++ b/tods/feature_analysis/WaveletTransform.py @@ -19,6 +19,7 @@ from collections import OrderedDict from scipy import sparse import logging import uuid +from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase __all__ = ('WaveletTransformPrimitive',) @@ -148,7 +149,7 @@ class Hyperparams(hyperparams.Hyperparams): ) -class WaveletTransformPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): +class WaveletTransformPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): """ A primitive of Multilevel 1D Discrete Wavelet Transform of data. See `PyWavelet documentation `_ for details. @@ -203,7 +204,7 @@ class WaveletTransformPrimitive(transformer.TransformerPrimitiveBase[Inputs, Out ) - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: + def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: """ Process the testing data. Args: diff --git a/tods/tests/detection_algorithm/test_PyodABOD.py b/tods/tests/detection_algorithm/test_PyodABOD.py index 0efc789..7afc588 100644 --- a/tods/tests/detection_algorithm/test_PyodABOD.py +++ b/tods/tests/detection_algorithm/test_PyodABOD.py @@ -126,7 +126,7 @@ class ABODTest(unittest.TestCase): 'selector': ['__ALL_ELEMENTS__', 0], 'metadata': { 'name': 'Angle-base Outlier Detection Primitive0_0', - 'structural_type': 'numpy.float64', + 'structural_type': 'numpy.int64', 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute'] }, }]) diff --git a/tods/tests/detection_algorithm/test_PyodHBOS.py b/tods/tests/detection_algorithm/test_PyodHBOS.py index f6a25f4..c541ddd 100644 --- a/tods/tests/detection_algorithm/test_PyodHBOS.py +++ b/tods/tests/detection_algorithm/test_PyodHBOS.py @@ -63,8 +63,6 @@ class HBOSTest(unittest.TestCase): primitive.fit() new_main = primitive.produce(inputs=main).value new_main_score = primitive.produce_score(inputs=main).value - print(new_main) - print(new_main_score) self.assertEqual(utils.to_json_structure(new_main.metadata.to_internal_simple_structure()), [{ @@ -125,7 +123,7 @@ class HBOSTest(unittest.TestCase): 'selector': ['__ALL_ELEMENTS__', 0], 'metadata': { 'name': 'HBOS0_0', - 'structural_type': 'numpy.float64', + 'structural_type': 'numpy.int64', 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute'] }, }]) diff --git a/tods/tests/detection_algorithm/test_Telemanom.py b/tods/tests/detection_algorithm/test_Telemanom.py index 20070e0..21829dd 100644 --- a/tods/tests/detection_algorithm/test_Telemanom.py +++ b/tods/tests/detection_algorithm/test_Telemanom.py @@ -5,7 +5,7 @@ from d3m.metadata import base as metadata_base from tods.detection_algorithm.Telemanom import TelemanomPrimitive -class SODTest(unittest.TestCase): +class TelemanomTest(unittest.TestCase): def test_basic(self): self.maxDiff = None main = container.DataFrame({'a': [1., 2., 3., 4.,5,6,7,8,9], 'b': [2., 3., 4., 5.,6,7,8,9,10], 'c': [3., 4., 5., 6.,7,8,9,10,11]}, @@ -91,21 +91,21 @@ class SODTest(unittest.TestCase): 'selector': ['__ALL_ELEMENTS__', 0], 'metadata': { 'name': 'Telemanom0_0', - 'structural_type': 'numpy.float64', + 'structural_type': 'numpy.int64', 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute'], }, }, { 'selector': ['__ALL_ELEMENTS__', 1], 'metadata': { - 'structural_type': 'numpy.float64', 'name': 'Telemanom0_1', + 'structural_type': 'numpy.int64', 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute'], }, }, { 'selector': ['__ALL_ELEMENTS__', 2], 'metadata': { - 'structural_type': 'numpy.float64', 'name': 'Telemanom0_2', + 'structural_type': 'numpy.int64', 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute'], } }]) diff --git a/tods/tests/feature_analysis/test_Autocorrelation.py b/tods/tests/feature_analysis/test_Autocorrelation.py index a976125..dca11d4 100644 --- a/tods/tests/feature_analysis/test_Autocorrelation.py +++ b/tods/tests/feature_analysis/test_Autocorrelation.py @@ -66,7 +66,7 @@ class AutoCorrelationTestCase(unittest.TestCase): hyperparams_class = AutoCorrelation.AutoCorrelationPrimitive.metadata.get_hyperparams().defaults() hyperparams_class = hyperparams_class.replace({'nlags': 2}) primitive = AutoCorrelation.AutoCorrelationPrimitive(hyperparams=hyperparams_class) - new_main = primitive.produce(inputs=main).value + new_main = primitive._produce(inputs=main).value print(new_main) # new_main_drop = new_main['value_acf'] diff --git a/tods/tests/feature_analysis/test_BKFilter.py b/tods/tests/feature_analysis/test_BKFilter.py index ba7219f..5ef4d5b 100755 --- a/tods/tests/feature_analysis/test_BKFilter.py +++ b/tods/tests/feature_analysis/test_BKFilter.py @@ -54,7 +54,7 @@ class BKFilterTest(unittest.TestCase): hyperparams_class = BKFilter.BKFilterPrimitive.metadata.get_hyperparams() primitive = BKFilter.BKFilterPrimitive(hyperparams=hyperparams_class.defaults()) - new_main = primitive.produce(inputs=main).value + new_main = primitive._produce(inputs=main).value print(new_main) diff --git a/tods/tests/feature_analysis/test_DiscreteCosineTransform.py b/tods/tests/feature_analysis/test_DiscreteCosineTransform.py index 1d32b01..6a8cfcc 100644 --- a/tods/tests/feature_analysis/test_DiscreteCosineTransform.py +++ b/tods/tests/feature_analysis/test_DiscreteCosineTransform.py @@ -66,7 +66,7 @@ class DctTestCase(unittest.TestCase): 'return_result':'append', }) primitive = DiscreteCosineTransform.DiscreteCosineTransformPrimitive(hyperparams=hp) - new_main = primitive.produce(inputs=main).value + new_main = primitive._produce(inputs=main).value c = pd.DataFrame({"A":[1,2,3], "B":['a','b','c'],'A_dct_coeff':[1.200000e+01,-3.464102e+00,-4.440892e-16]}) diff --git a/tods/tests/feature_analysis/test_FastFourierTransform.py b/tods/tests/feature_analysis/test_FastFourierTransform.py index e4f7ac4..b462c38 100644 --- a/tods/tests/feature_analysis/test_FastFourierTransform.py +++ b/tods/tests/feature_analysis/test_FastFourierTransform.py @@ -67,7 +67,7 @@ class FftTestCase(unittest.TestCase): 'return_result':'append', }) primitive = FastFourierTransform.FastFourierTransformPrimitive(hyperparams=hp) - new_main = primitive.produce(inputs=main).value + new_main = primitive._produce(inputs=main).value c = pd.DataFrame({"A":[1,2,3], "B":['a','b','c'],'A_fft_abs':[6.000000,1.732051,1.732051],'A_fft_phse':[-0.000000,2.617994,-2.617994]}) diff --git a/tods/tests/feature_analysis/test_HPFilter.py b/tods/tests/feature_analysis/test_HPFilter.py index 8bb227f..98fda51 100644 --- a/tods/tests/feature_analysis/test_HPFilter.py +++ b/tods/tests/feature_analysis/test_HPFilter.py @@ -54,7 +54,7 @@ class HPFilterTest(unittest.TestCase): hyperparams_class = HPFilter.HPFilterPrimitive.metadata.get_hyperparams() primitive = HPFilter.HPFilterPrimitive(hyperparams=hyperparams_class.defaults()) - new_main = primitive.produce(inputs=main).value + new_main = primitive._produce(inputs=main).value print(new_main) diff --git a/tods/tests/feature_analysis/test_NonNegativeMatrixFactorization.py b/tods/tests/feature_analysis/test_NonNegativeMatrixFactorization.py index a1a99a1..9c074fb 100644 --- a/tods/tests/feature_analysis/test_NonNegativeMatrixFactorization.py +++ b/tods/tests/feature_analysis/test_NonNegativeMatrixFactorization.py @@ -74,7 +74,7 @@ class NmfTestCase(unittest.TestCase): 'H': b, }) primitive = NonNegativeMatrixFactorization.NonNegativeMatrixFactorizationPrimitive(hyperparams=hp) - new_main = primitive.produce(inputs=main).value + new_main = primitive._produce(inputs=main).value print("new_main",new_main) c = pd.DataFrame({"A":[1,2,3,np.nan,np.nan], "B":[4,5,6,np.nan,np.nan], diff --git a/tods/tests/feature_analysis/test_SpectralResidualTransform.py b/tods/tests/feature_analysis/test_SpectralResidualTransform.py index 8b1cfde..773f5e3 100644 --- a/tods/tests/feature_analysis/test_SpectralResidualTransform.py +++ b/tods/tests/feature_analysis/test_SpectralResidualTransform.py @@ -55,7 +55,7 @@ class SpectralResidualTransformTestCase(unittest.TestCase): primitive = SpectralResidualTransform.SpectralResidualTransformPrimitive(hyperparams=hp) - output_main = primitive.produce(inputs=main).value + output_main = primitive._produce(inputs=main).value expected_output = container.DataFrame( {'timestamp': [1, 3, 2, 5], 'values': [1.0, 2.0, 3.0, 4.0], 'b': [1.0, 4.0, 5.0, 6.0], diff --git a/tods/tests/feature_analysis/test_StastiticalStd.py b/tods/tests/feature_analysis/test_StastiticalStd.py index 138ba0d..e0e968b 100644 --- a/tods/tests/feature_analysis/test_StastiticalStd.py +++ b/tods/tests/feature_analysis/test_StastiticalStd.py @@ -56,7 +56,7 @@ class StatisticalStdTestCase(unittest.TestCase): primitive = StatisticalStd.StatisticalStdPrimitive(hyperparams=hp) - output_main = primitive.produce(inputs=main).value + output_main = primitive._produce(inputs=main).value print(output_main) expected_output = container.DataFrame( {'timestamp': [1, 3, 2, 5], 'values': [1.0, 2.0, 3.0, 4.0], 'b': [1.0, 4.0, 5.0, 6.0], diff --git a/tods/tests/feature_analysis/test_StatisticalAbsEnergy.py b/tods/tests/feature_analysis/test_StatisticalAbsEnergy.py index 77fa942..8d02100 100644 --- a/tods/tests/feature_analysis/test_StatisticalAbsEnergy.py +++ b/tods/tests/feature_analysis/test_StatisticalAbsEnergy.py @@ -56,7 +56,7 @@ class StatisticalAbsEnergyTestCase(unittest.TestCase): primitive = StatisticalAbsEnergy.StatisticalAbsEnergyPrimitive(hyperparams=hp) - output_main = primitive.produce(inputs=main).value + output_main = primitive._produce(inputs=main).value print(output_main) expected_output = container.DataFrame( {'timestamp': [1, 3, 2, 5], 'values': [1.0, 2.0, 3.0, 4.0], 'b': [1.0, 4.0, 5.0, 6.0], diff --git a/tods/tests/feature_analysis/test_StatisticalAbsSum.py b/tods/tests/feature_analysis/test_StatisticalAbsSum.py index 47d58ae..b3431b2 100644 --- a/tods/tests/feature_analysis/test_StatisticalAbsSum.py +++ b/tods/tests/feature_analysis/test_StatisticalAbsSum.py @@ -56,7 +56,7 @@ class StatisticalAbsSumTestCase(unittest.TestCase): primitive = StatisticalAbsSum.StatisticalAbsSumPrimitive(hyperparams=hp) - output_main = primitive.produce(inputs=main).value + output_main = primitive._produce(inputs=main).value expected_output = container.DataFrame( {'timestamp': [1, 3, 2, 5], 'values': [1.0, 2.0, 3.0, 4.0], 'b': [1.0, 4.0, -5.0, 6.0], diff --git a/tods/tests/feature_analysis/test_StatisticalGmean.py b/tods/tests/feature_analysis/test_StatisticalGmean.py index 5f3b373..5705dd1 100644 --- a/tods/tests/feature_analysis/test_StatisticalGmean.py +++ b/tods/tests/feature_analysis/test_StatisticalGmean.py @@ -56,7 +56,7 @@ class StatisticalGmeanTestCase(unittest.TestCase): primitive = StatisticalGmean.StatisticalGmeanPrimitive(hyperparams=hp) - output_main = primitive.produce(inputs=main).value + output_main = primitive._produce(inputs=main).value print(output_main[['values_gmean', 'b_gmean']]) expected_output = container.DataFrame( diff --git a/tods/tests/feature_analysis/test_StatisticalHmean.py b/tods/tests/feature_analysis/test_StatisticalHmean.py index 2710d61..5e0d47b 100644 --- a/tods/tests/feature_analysis/test_StatisticalHmean.py +++ b/tods/tests/feature_analysis/test_StatisticalHmean.py @@ -56,7 +56,7 @@ class StatisticalHmeanTestCase(unittest.TestCase): primitive = StatisticalHmean.StatisticalHmeanPrimitive(hyperparams=hp) - output_main = primitive.produce(inputs=main).value + output_main = primitive._produce(inputs=main).value #print(output_main[['values_hmean', 'b_hmean']]) expected_output = container.DataFrame( diff --git a/tods/tests/feature_analysis/test_StatisticalKurtosis.py b/tods/tests/feature_analysis/test_StatisticalKurtosis.py index 7ed5807..05d2cba 100644 --- a/tods/tests/feature_analysis/test_StatisticalKurtosis.py +++ b/tods/tests/feature_analysis/test_StatisticalKurtosis.py @@ -56,7 +56,7 @@ class StatisticalKurtosisTestCase(unittest.TestCase): primitive = StatisticalKurtosis.StatisticalKurtosisPrimitive(hyperparams=hp) - output_main = primitive.produce(inputs=main).value + output_main = primitive._produce(inputs=main).value print(output_main[['values_kurtosis', 'b_kurtosis']]) expected_output = container.DataFrame( diff --git a/tods/tests/feature_analysis/test_StatisticalMaximum.py b/tods/tests/feature_analysis/test_StatisticalMaximum.py index 48c189f..9454c75 100644 --- a/tods/tests/feature_analysis/test_StatisticalMaximum.py +++ b/tods/tests/feature_analysis/test_StatisticalMaximum.py @@ -56,7 +56,7 @@ class StatisticalMaximumTestCase(unittest.TestCase): primitive = StatisticalMaximum.StatisticalMaximumPrimitive(hyperparams=hp) - output_main = primitive.produce(inputs=main).value + output_main = primitive._produce(inputs=main).value print(output_main) expected_output = container.DataFrame( {'timestamp': [1, 3, 2, 5], 'values': [1.0, 2.0, 3.0, 4.0], 'b': [1.0, 4.0, 5.0, 6.0], diff --git a/tods/tests/feature_analysis/test_StatisticalMean.py b/tods/tests/feature_analysis/test_StatisticalMean.py index 138a035..6111e14 100644 --- a/tods/tests/feature_analysis/test_StatisticalMean.py +++ b/tods/tests/feature_analysis/test_StatisticalMean.py @@ -56,7 +56,7 @@ class StatisticalMeanTestCase(unittest.TestCase): primitive = StatisticalMean.StatisticalMeanPrimitive(hyperparams=hp) - output_main = primitive.produce(inputs=main).value + output_main = primitive._produce(inputs=main).value print(output_main) expected_output = container.DataFrame( {'timestamp': [1, 3, 2, 5], 'values': [1.0, 2.0, 3.0, 4.0], 'b': [1.0, 4.0, 5.0, 6.0], diff --git a/tods/tests/feature_analysis/test_StatisticalMeanAbs.py b/tods/tests/feature_analysis/test_StatisticalMeanAbs.py index 5f71cf3..d540edd 100644 --- a/tods/tests/feature_analysis/test_StatisticalMeanAbs.py +++ b/tods/tests/feature_analysis/test_StatisticalMeanAbs.py @@ -56,7 +56,7 @@ class StatisticalMeanAbsTestCase(unittest.TestCase): primitive = StatisticalMeanAbs.StatisticalMeanAbsPrimitive(hyperparams=hp) - output_main = primitive.produce(inputs=main).value + output_main = primitive._produce(inputs=main).value print(output_main) expected_output = container.DataFrame( {'timestamp': [1, 3, 2, 5], 'values': [1.0, 2.0, 3.0, 4.0], 'b': [1.0, 4.0, 5.0, 6.0], diff --git a/tods/tests/feature_analysis/test_StatisticalMeanAbsTemporalDerivative.py b/tods/tests/feature_analysis/test_StatisticalMeanAbsTemporalDerivative.py index 8cccb93..59c37ac 100644 --- a/tods/tests/feature_analysis/test_StatisticalMeanAbsTemporalDerivative.py +++ b/tods/tests/feature_analysis/test_StatisticalMeanAbsTemporalDerivative.py @@ -56,7 +56,7 @@ class StatisticalMeanAbsTemporalDerivativeTestCase(unittest.TestCase): primitive = StatisticalMeanAbsTemporalDerivative.StatisticalMeanAbsTemporalDerivativePrimitive(hyperparams=hp) - output_main = primitive.produce(inputs=main).value + output_main = primitive._produce(inputs=main).value print(output_main[['values_mean_abs_temporal_derivative', 'b_mean_abs_temporal_derivative']]) expected_output = container.DataFrame( {'timestamp': [1, 3, 2, 5], 'values': [1.0, 2.0, 3.0, 4.0], 'b': [1.0, 4.0, 5.0, 6.0], diff --git a/tods/tests/feature_analysis/test_StatisticalMeanTemporalDerivative.py b/tods/tests/feature_analysis/test_StatisticalMeanTemporalDerivative.py index 942bfba..e508c82 100644 --- a/tods/tests/feature_analysis/test_StatisticalMeanTemporalDerivative.py +++ b/tods/tests/feature_analysis/test_StatisticalMeanTemporalDerivative.py @@ -56,7 +56,7 @@ class StatisticalMeanTemporalDerivativeTestCase(unittest.TestCase): primitive = StatisticalMeanTemporalDerivative.StatisticalMeanTemporalDerivativePrimitive(hyperparams=hp) - output_main = primitive.produce(inputs=main).value + output_main = primitive._produce(inputs=main).value print(output_main[['values_mean_temporal_derivative', 'b_mean_temporal_derivative']]) expected_output = container.DataFrame( {'timestamp': [1, 3, 2, 5], 'values': [1.0, 2.0, 3.0, 4.0], 'b': [1.0, 4.0, 5.0, 6.0], diff --git a/tods/tests/feature_analysis/test_StatisticalMedian.py b/tods/tests/feature_analysis/test_StatisticalMedian.py index 2c921c6..e3dd206 100644 --- a/tods/tests/feature_analysis/test_StatisticalMedian.py +++ b/tods/tests/feature_analysis/test_StatisticalMedian.py @@ -56,7 +56,7 @@ class StatisticalMedianTestCase(unittest.TestCase): primitive = StatisticalMedian.StatisticalMedianPrimitive(hyperparams=hp) - output_main = primitive.produce(inputs=main).value + output_main = primitive._produce(inputs=main).value expected_output = container.DataFrame( {'timestamp': [1, 3, 2, 5], 'values': [1.0, 2.0, 3.0, 4.0], 'b': [1.0, 4.0, 5.0, 6.0], diff --git a/tods/tests/feature_analysis/test_StatisticalMedianAbsoluteDeviation.py b/tods/tests/feature_analysis/test_StatisticalMedianAbsoluteDeviation.py index 1c232ce..689bc4f 100644 --- a/tods/tests/feature_analysis/test_StatisticalMedianAbsoluteDeviation.py +++ b/tods/tests/feature_analysis/test_StatisticalMedianAbsoluteDeviation.py @@ -56,7 +56,7 @@ class StatisticalMedianAbsoluteDeviationTestCase(unittest.TestCase): primitive = StatisticalMedianAbsoluteDeviation.StatisticalMedianAbsoluteDeviationPrimitive(hyperparams=hp) - output_main = primitive.produce(inputs=main).value + output_main = primitive._produce(inputs=main).value print(output_main[['values_median_absolute_deviation', 'b_median_absolute_deviation']]) expected_output = container.DataFrame( diff --git a/tods/tests/feature_analysis/test_StatisticalMinimum.py b/tods/tests/feature_analysis/test_StatisticalMinimum.py index cd1f444..34c106b 100644 --- a/tods/tests/feature_analysis/test_StatisticalMinimum.py +++ b/tods/tests/feature_analysis/test_StatisticalMinimum.py @@ -56,7 +56,7 @@ class StatisticalMinimumTestCase(unittest.TestCase): primitive = StatisticalMinimum.StatisticalMinimumPrimitive(hyperparams=hp) - output_main = primitive.produce(inputs=main).value + output_main = primitive._produce(inputs=main).value print(output_main) expected_output = container.DataFrame( {'timestamp': [1, 3, 2, 5], 'values': [1.0, 2.0, 3.0, 4.0], 'b': [1.0, 4.0, 5.0, 6.0], diff --git a/tods/tests/feature_analysis/test_StatisticalSkew.py b/tods/tests/feature_analysis/test_StatisticalSkew.py index 92719ca..b2f4627 100644 --- a/tods/tests/feature_analysis/test_StatisticalSkew.py +++ b/tods/tests/feature_analysis/test_StatisticalSkew.py @@ -56,7 +56,7 @@ class StatisticalSkewTestCase(unittest.TestCase): primitive = StatisticalSkew.StatisticalSkewPrimitive(hyperparams=hp) - output_main = primitive.produce(inputs=main).value + output_main = primitive._produce(inputs=main).value print(output_main[['values_skew', 'b_skew']]) expected_output = container.DataFrame( diff --git a/tods/tests/feature_analysis/test_StatisticalVar.py b/tods/tests/feature_analysis/test_StatisticalVar.py index 9ff58d1..0d3b2ae 100644 --- a/tods/tests/feature_analysis/test_StatisticalVar.py +++ b/tods/tests/feature_analysis/test_StatisticalVar.py @@ -56,7 +56,7 @@ class StatisticalVarTestCase(unittest.TestCase): primitive = StatisticalVar.StatisticalVarPrimitive(hyperparams=hp) - output_main = primitive.produce(inputs=main).value + output_main = primitive._produce(inputs=main).value print(output_main) expected_output = container.DataFrame( {'timestamp': [1, 3, 2, 5], 'values': [1.0, 2.0, 3.0, 4.0], 'b': [1.0, 4.0, 5.0, 6.0], diff --git a/tods/tests/feature_analysis/test_StatisticalVariation.py b/tods/tests/feature_analysis/test_StatisticalVariation.py index 5fa5ad1..9340e95 100644 --- a/tods/tests/feature_analysis/test_StatisticalVariation.py +++ b/tods/tests/feature_analysis/test_StatisticalVariation.py @@ -56,7 +56,7 @@ class StatisticalVariationTestCase(unittest.TestCase): primitive = StatisticalVariation.StatisticalVariationPrimitive(hyperparams=hp) - output_main = primitive.produce(inputs=main).value + output_main = primitive._produce(inputs=main).value print(output_main[['values_variation', 'b_variation']]) expected_output = container.DataFrame( diff --git a/tods/tests/feature_analysis/test_StatisticalVecSum.py b/tods/tests/feature_analysis/test_StatisticalVecSum.py index 75358df..640c749 100644 --- a/tods/tests/feature_analysis/test_StatisticalVecSum.py +++ b/tods/tests/feature_analysis/test_StatisticalVecSum.py @@ -56,7 +56,7 @@ class StatisticalVecSumTestCase(unittest.TestCase): primitive = StatisticalVecSum.StatisticalVecSumPrimitive(hyperparams=hp) - output_main = primitive.produce(inputs=main).value + output_main = primitive._produce(inputs=main).value print(output_main) expected_output = container.DataFrame( {'timestamp': [1, 3, 2, 5], 'values': [1.0, 2.0, 3.0, 4.0], 'b': [1.0, 4.0, -5.0, 6.0], diff --git a/tods/tests/feature_analysis/test_StatisticalWillisonAmplitude.py b/tods/tests/feature_analysis/test_StatisticalWillisonAmplitude.py index f3a326f..043be8b 100644 --- a/tods/tests/feature_analysis/test_StatisticalWillisonAmplitude.py +++ b/tods/tests/feature_analysis/test_StatisticalWillisonAmplitude.py @@ -57,7 +57,7 @@ class StatisticalWillisonAmplitudeTestCase(unittest.TestCase): primitive = StatisticalWillisonAmplitude.StatisticalWillisonAmplitudePrimitive(hyperparams=hp) - output_main = primitive.produce(inputs=main).value + output_main = primitive._produce(inputs=main).value print(output_main[['values_willison_amplitude', 'b_willison_amplitude']]) expected_output = container.DataFrame( {'timestamp': [1, 3, 2, 5], 'values': [1.0, 2.0, 3.0, 4.0], 'b': [1.0, 4.0, 5.0, 6.0], diff --git a/tods/tests/feature_analysis/test_StatisticalZeroCrossing.py b/tods/tests/feature_analysis/test_StatisticalZeroCrossing.py index 893b444..c723bad 100644 --- a/tods/tests/feature_analysis/test_StatisticalZeroCrossing.py +++ b/tods/tests/feature_analysis/test_StatisticalZeroCrossing.py @@ -55,7 +55,7 @@ class StatisticalZeroCrossingTestCase(unittest.TestCase): primitive = StatisticalZeroCrossing.StatisticalZeroCrossingPrimitive(hyperparams=hp) - output_main = primitive.produce(inputs=main).value + output_main = primitive._produce(inputs=main).value print(output_main) expected_output = container.DataFrame( {'timestamp': [1, 3, 2, 5], 'values': [1.0, -2.0, 3.0, 4.0], 'b': [1.0, 4.0, 5.0, 6.0], diff --git a/tods/tests/feature_analysis/test_TRMF.py b/tods/tests/feature_analysis/test_TRMF.py index 5593af5..378fa28 100644 --- a/tods/tests/feature_analysis/test_TRMF.py +++ b/tods/tests/feature_analysis/test_TRMF.py @@ -56,7 +56,7 @@ class TRMFTest(unittest.TestCase): primitive = TRMF.TRMFPrimitive(hyperparams=hyperparams_class.defaults()) # primitive.set_training_data(inputs=main) # primitive.fit() - new_main = primitive.produce(inputs=main).value + new_main = primitive._produce(inputs=main).value print(new_main) diff --git a/tods/tests/feature_analysis/test_WaveletTransformer.py b/tods/tests/feature_analysis/test_WaveletTransformer.py index a81143a..59bd3f3 100644 --- a/tods/tests/feature_analysis/test_WaveletTransformer.py +++ b/tods/tests/feature_analysis/test_WaveletTransformer.py @@ -28,7 +28,7 @@ class WaveletTransformerTestCase(unittest.TestCase): 'return_result': 'new'}) primitive = WaveletTransformPrimitive(hyperparams=hyperparams) - new_main = primitive.produce(inputs=main).value + new_main = primitive._produce(inputs=main).value # print(new_main) # print(mean_mse, std_mse) @@ -89,7 +89,7 @@ class WaveletTransformerTestCase(unittest.TestCase): hyperparams = hyperparams_default.replace({'inverse': 1}) primitive = WaveletTransformPrimitive(hyperparams=hyperparams) - main_recover = primitive.produce(inputs=main).value + main_recover = primitive._produce(inputs=main).value self.assertAlmostEqual(main_recover.values.tolist(), main.values.tolist(), delta=1e-6) # print(main.metadata.to_internal_simple_structure())