@@ -33,72 +33,72 @@ d3mIndex,system,label | |||
31,31.csv,1 | |||
32,32.csv,1 | |||
33,33.csv,1 | |||
34,34.csv,2 | |||
35,35.csv,2 | |||
36,36.csv,2 | |||
37,37.csv,2 | |||
38,38.csv,2 | |||
39,39.csv,2 | |||
40,40.csv,2 | |||
41,41.csv,2 | |||
42,42.csv,2 | |||
43,43.csv,2 | |||
44,44.csv,2 | |||
45,45.csv,2 | |||
46,46.csv,2 | |||
47,47.csv,2 | |||
48,48.csv,2 | |||
49,49.csv,2 | |||
50,50.csv,2 | |||
51,51.csv,2 | |||
52,52.csv,2 | |||
53,53.csv,2 | |||
54,54.csv,2 | |||
55,55.csv,2 | |||
56,56.csv,2 | |||
57,57.csv,2 | |||
58,58.csv,2 | |||
59,59.csv,2 | |||
60,60.csv,2 | |||
61,61.csv,2 | |||
62,62.csv,2 | |||
63,63.csv,2 | |||
64,64.csv,2 | |||
65,65.csv,2 | |||
66,66.csv,2 | |||
67,67.csv,2 | |||
68,68.csv,2 | |||
69,69.csv,2 | |||
70,70.csv,2 | |||
71,71.csv,2 | |||
72,72.csv,2 | |||
73,73.csv,2 | |||
74,74.csv,2 | |||
75,75.csv,2 | |||
76,76.csv,2 | |||
77,77.csv,2 | |||
78,78.csv,2 | |||
79,79.csv,2 | |||
80,80.csv,2 | |||
81,81.csv,2 | |||
82,82.csv,2 | |||
83,83.csv,2 | |||
84,84.csv,2 | |||
85,85.csv,2 | |||
86,86.csv,2 | |||
87,87.csv,2 | |||
88,88.csv,2 | |||
89,89.csv,2 | |||
90,90.csv,2 | |||
91,91.csv,2 | |||
92,92.csv,2 | |||
93,93.csv,2 | |||
94,94.csv,2 | |||
95,95.csv,2 | |||
96,96.csv,2 | |||
97,97.csv,2 | |||
98,98.csv,2 | |||
99,99.csv,2 | |||
34,34.csv,0 | |||
35,35.csv,0 | |||
36,36.csv,0 | |||
37,37.csv,0 | |||
38,38.csv,0 | |||
39,39.csv,0 | |||
40,40.csv,0 | |||
41,41.csv,0 | |||
42,42.csv,0 | |||
43,43.csv,0 | |||
44,44.csv,0 | |||
45,45.csv,0 | |||
46,46.csv,0 | |||
47,47.csv,0 | |||
48,48.csv,0 | |||
49,49.csv,0 | |||
50,50.csv,0 | |||
51,51.csv,0 | |||
52,52.csv,0 | |||
53,53.csv,0 | |||
54,54.csv,0 | |||
55,55.csv,0 | |||
56,56.csv,0 | |||
57,57.csv,0 | |||
58,58.csv,0 | |||
59,59.csv,0 | |||
60,60.csv,0 | |||
61,61.csv,0 | |||
62,62.csv,0 | |||
63,63.csv,0 | |||
64,64.csv,0 | |||
65,65.csv,0 | |||
66,66.csv,0 | |||
67,67.csv,0 | |||
68,68.csv,0 | |||
69,69.csv,0 | |||
70,70.csv,0 | |||
71,71.csv,0 | |||
72,72.csv,0 | |||
73,73.csv,0 | |||
74,74.csv,0 | |||
75,75.csv,0 | |||
76,76.csv,0 | |||
77,77.csv,0 | |||
78,78.csv,0 | |||
79,79.csv,0 | |||
80,80.csv,0 | |||
81,81.csv,0 | |||
82,82.csv,0 | |||
83,83.csv,0 | |||
84,84.csv,0 | |||
85,85.csv,0 | |||
86,86.csv,0 | |||
87,87.csv,0 | |||
88,88.csv,0 | |||
89,89.csv,0 | |||
90,90.csv,0 | |||
91,91.csv,0 | |||
92,92.csv,0 | |||
93,93.csv,0 | |||
94,94.csv,0 | |||
95,95.csv,0 | |||
96,96.csv,0 | |||
97,97.csv,0 | |||
98,98.csv,0 | |||
99,99.csv,0 | |||
100,100.csv,1 | |||
101,101.csv,1 | |||
102,102.csv,1 | |||
@@ -132,70 +132,70 @@ d3mIndex,system,label | |||
130,130.csv,1 | |||
131,131.csv,1 | |||
132,132.csv,1 | |||
133,133.csv,2 | |||
134,134.csv,2 | |||
135,135.csv,2 | |||
136,136.csv,2 | |||
137,137.csv,2 | |||
138,138.csv,2 | |||
139,139.csv,2 | |||
140,140.csv,2 | |||
141,141.csv,2 | |||
142,142.csv,2 | |||
143,143.csv,2 | |||
144,144.csv,2 | |||
145,145.csv,2 | |||
146,146.csv,2 | |||
147,147.csv,2 | |||
148,148.csv,2 | |||
149,149.csv,2 | |||
150,150.csv,2 | |||
151,151.csv,2 | |||
152,152.csv,2 | |||
153,153.csv,2 | |||
154,154.csv,2 | |||
155,155.csv,2 | |||
156,156.csv,2 | |||
157,157.csv,2 | |||
158,158.csv,2 | |||
159,159.csv,2 | |||
160,160.csv,2 | |||
161,161.csv,2 | |||
162,162.csv,2 | |||
163,163.csv,2 | |||
164,164.csv,2 | |||
165,165.csv,2 | |||
166,166.csv,2 | |||
167,167.csv,2 | |||
168,168.csv,2 | |||
169,169.csv,2 | |||
170,170.csv,2 | |||
171,171.csv,2 | |||
172,172.csv,2 | |||
173,173.csv,2 | |||
174,174.csv,2 | |||
175,175.csv,2 | |||
176,176.csv,2 | |||
177,177.csv,2 | |||
178,178.csv,2 | |||
179,179.csv,2 | |||
180,180.csv,2 | |||
181,181.csv,2 | |||
182,182.csv,2 | |||
183,183.csv,2 | |||
184,184.csv,2 | |||
185,185.csv,2 | |||
186,186.csv,2 | |||
187,187.csv,2 | |||
188,188.csv,2 | |||
189,189.csv,2 | |||
190,190.csv,2 | |||
191,191.csv,2 | |||
192,192.csv,2 | |||
193,193.csv,2 | |||
194,194.csv,2 | |||
195,195.csv,2 | |||
196,196.csv,2 | |||
197,197.csv,2 | |||
198,198.csv,2 | |||
199,199.csv,2 | |||
133,133.csv,0 | |||
134,134.csv,0 | |||
135,135.csv,0 | |||
136,136.csv,0 | |||
137,137.csv,0 | |||
138,138.csv,0 | |||
139,139.csv,0 | |||
140,140.csv,0 | |||
141,141.csv,0 | |||
142,142.csv,0 | |||
143,143.csv,0 | |||
144,144.csv,0 | |||
145,145.csv,0 | |||
146,146.csv,0 | |||
147,147.csv,0 | |||
148,148.csv,0 | |||
149,149.csv,0 | |||
150,150.csv,0 | |||
151,151.csv,0 | |||
152,152.csv,0 | |||
153,153.csv,0 | |||
154,154.csv,0 | |||
155,155.csv,0 | |||
156,156.csv,0 | |||
157,157.csv,0 | |||
158,158.csv,0 | |||
159,159.csv,0 | |||
160,160.csv,0 | |||
161,161.csv,0 | |||
162,162.csv,0 | |||
163,163.csv,0 | |||
164,164.csv,0 | |||
165,165.csv,0 | |||
166,166.csv,0 | |||
167,167.csv,0 | |||
168,168.csv,0 | |||
169,169.csv,0 | |||
170,170.csv,0 | |||
171,171.csv,0 | |||
172,172.csv,0 | |||
173,173.csv,0 | |||
174,174.csv,0 | |||
175,175.csv,0 | |||
176,176.csv,0 | |||
177,177.csv,0 | |||
178,178.csv,0 | |||
179,179.csv,0 | |||
180,180.csv,0 | |||
181,181.csv,0 | |||
182,182.csv,0 | |||
183,183.csv,0 | |||
184,184.csv,0 | |||
185,185.csv,0 | |||
186,186.csv,0 | |||
187,187.csv,0 | |||
188,188.csv,0 | |||
189,189.csv,0 | |||
190,190.csv,0 | |||
191,191.csv,0 | |||
192,192.csv,0 | |||
193,193.csv,0 | |||
194,194.csv,0 | |||
195,195.csv,0 | |||
196,196.csv,0 | |||
197,197.csv,0 | |||
198,198.csv,0 | |||
199,199.csv,0 |
@@ -1 +1 @@ | |||
{"id": "bfd8aedf-36be-4dad-af8a-c324a03db5f9", "schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json", "created": "2021-02-13T17:02:35.500457Z", "inputs": [{"name": "inputs"}], "outputs": [{"data": "steps.6.produce", "name": "output predictions"}], "steps": [{"type": "PRIMITIVE", "primitive": {"id": "c78138d9-9377-31dc-aee8-83d9df049c60", "version": "0.3.0", "python_path": "d3m.primitives.tods.data_processing.dataset_to_dataframe", "name": "Extract a DataFrame from a Dataset"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "inputs.0"}}, "outputs": [{"id": "produce"}]}, {"type": "PRIMITIVE", "primitive": {"id": "81235c29-aeb9-3828-911a-1b25319b6998", "version": "0.6.0", "python_path": "d3m.primitives.tods.data_processing.column_parser", "name": "Parses strings into their types"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.0.produce"}}, "outputs": [{"id": "produce"}]}, {"type": "PRIMITIVE", "primitive": {"id": "a996cd89-ddf0-367f-8e7f-8c013cbc2891", "version": "0.4.0", "python_path": "d3m.primitives.tods.data_processing.extract_columns_by_semantic_types", "name": "Extracts columns by semantic type"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.1.produce"}}, "outputs": [{"id": "produce"}], "hyperparams": {"semantic_types": {"type": "VALUE", "data": ["https://metadata.datadrivendiscovery.org/types/Attribute"]}}}, {"type": "PRIMITIVE", "primitive": {"id": "a996cd89-ddf0-367f-8e7f-8c013cbc2891", "version": "0.4.0", "python_path": "d3m.primitives.tods.data_processing.extract_columns_by_semantic_types", "name": "Extracts columns by semantic type"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.0.produce"}}, "outputs": [{"id": "produce"}], "hyperparams": {"semantic_types": {"type": "VALUE", "data": ["https://metadata.datadrivendiscovery.org/types/TrueTarget"]}}}, {"type": "PRIMITIVE", "primitive": {"id": "642de2e7-5590-3cab-9266-2a53c326c461", "version": "0.0.1", "python_path": "d3m.primitives.tods.timeseries_processing.transformation.axiswise_scaler", "name": "Axis_wise_scale"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.2.produce"}}, "outputs": [{"id": "produce"}]}, {"type": "PRIMITIVE", "primitive": {"id": "67e7fcdf-d645-3417-9aa4-85cd369487d9", "version": "0.0.1", "python_path": "d3m.primitives.tods.detection_algorithm.pyod_ae", "name": "TODS.anomaly_detection_primitives.AutoEncoder"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.4.produce"}}, "outputs": [{"id": "produce"}]}, {"type": "PRIMITIVE", "primitive": {"id": "2530840a-07d4-3874-b7d8-9eb5e4ae2bf3", "version": "0.3.0", "python_path": "d3m.primitives.tods.data_processing.construct_predictions", "name": "Construct pipeline predictions output"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.5.produce"}, "reference": {"type": "CONTAINER", "data": "steps.1.produce"}}, "outputs": [{"id": "produce"}]}], "digest": "01ad8ccf817150186ca15157a4f02ee1f738582137321a8a5a4a3252832ce555"} | |||
{"id": "924e9a77-da5f-4bcc-b9a0-ed65bbaf87fa", "schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json", "created": "2021-03-11T23:41:13.884494Z", "inputs": [{"name": "inputs"}], "outputs": [{"data": "steps.6.produce", "name": "output predictions"}], "steps": [{"type": "PRIMITIVE", "primitive": {"id": "c78138d9-9377-31dc-aee8-83d9df049c60", "version": "0.3.0", "python_path": "d3m.primitives.tods.data_processing.dataset_to_dataframe", "name": "Extract a DataFrame from a Dataset"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "inputs.0"}}, "outputs": [{"id": "produce"}]}, {"type": "PRIMITIVE", "primitive": {"id": "81235c29-aeb9-3828-911a-1b25319b6998", "version": "0.6.0", "python_path": "d3m.primitives.tods.data_processing.column_parser", "name": "Parses strings into their types"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.0.produce"}}, "outputs": [{"id": "produce"}]}, {"type": "PRIMITIVE", "primitive": {"id": "a996cd89-ddf0-367f-8e7f-8c013cbc2891", "version": "0.4.0", "python_path": "d3m.primitives.tods.data_processing.extract_columns_by_semantic_types", "name": "Extracts columns by semantic type"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.1.produce"}}, "outputs": [{"id": "produce"}], "hyperparams": {"semantic_types": {"type": "VALUE", "data": ["https://metadata.datadrivendiscovery.org/types/Attribute"]}}}, {"type": "PRIMITIVE", "primitive": {"id": "a996cd89-ddf0-367f-8e7f-8c013cbc2891", "version": "0.4.0", "python_path": "d3m.primitives.tods.data_processing.extract_columns_by_semantic_types", "name": "Extracts columns by semantic type"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.0.produce"}}, "outputs": [{"id": "produce"}], "hyperparams": {"semantic_types": {"type": "VALUE", "data": ["https://metadata.datadrivendiscovery.org/types/TrueTarget"]}}}, {"type": "PRIMITIVE", "primitive": {"id": "f07ce875-bbc7-36c5-9cc1-ba4bfb7cf48e", "version": "0.1.0", "python_path": "d3m.primitives.tods.feature_analysis.statistical_maximum", "name": "Time Series Decompostional"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.2.produce"}}, "outputs": [{"id": "produce"}]}, {"type": "PRIMITIVE", "primitive": {"id": "67e7fcdf-d645-3417-9aa4-85cd369487d9", "version": "0.0.1", "python_path": "d3m.primitives.tods.detection_algorithm.pyod_ae", "name": "TODS.anomaly_detection_primitives.AutoEncoder"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.4.produce"}}, "outputs": [{"id": "produce"}]}, {"type": "PRIMITIVE", "primitive": {"id": "2530840a-07d4-3874-b7d8-9eb5e4ae2bf3", "version": "0.3.0", "python_path": "d3m.primitives.tods.data_processing.construct_predictions", "name": "Construct pipeline predictions output"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.5.produce"}, "reference": {"type": "CONTAINER", "data": "steps.1.produce"}}, "outputs": [{"id": "produce"}]}], "digest": "bb1cb5328299d8d65cabc152092da553db267494fb12e6320c66110b2c48a265"} |
@@ -41,7 +41,9 @@ attributes = 'steps.2.produce' | |||
targets = 'steps.3.produce' | |||
# Step 4: processing | |||
step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.axiswise_scaler')) | |||
#step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.axiswise_scaler')) | |||
step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_maximum')) | |||
#step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_minimum')) | |||
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes) | |||
step_4.add_output('produce') | |||
pipeline_description.add_step(step_4) | |||
@@ -64,7 +66,7 @@ pipeline_description.add_output(name='output predictions', data_reference='steps | |||
# Output to json | |||
data = pipeline_description.to_json() | |||
with open('example_pipeline.json', 'w') as f: | |||
with open('autoencoder_pipeline.json', 'w') as f: | |||
f.write(data) | |||
print(data) | |||
@@ -57,29 +57,37 @@ attributes = 'steps.4.produce' | |||
targets = 'steps.5.produce' | |||
# Step 6: processing | |||
step_6 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.axiswise_scaler')) | |||
step_6 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_maximum')) | |||
step_6.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes) | |||
step_6.add_output('produce') | |||
pipeline_description.add_step(step_6) | |||
# Step 7: algorithm | |||
step_7 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_ae')) | |||
#step_7 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_ae')) | |||
step_7 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_ocsvm')) | |||
step_7.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.6.produce') | |||
step_7.add_output('produce') | |||
step_7.add_output('produce_score') | |||
pipeline_description.add_step(step_7) | |||
# Step 8: Predictions | |||
step_8 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.construct_predictions')) | |||
step_8.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.7.produce') | |||
step_8.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
#step_8 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.construct_predictions')) | |||
step_8 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.system_wise_detection')) | |||
step_8.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.7.produce_score') | |||
#step_8.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
step_8.add_output('produce') | |||
pipeline_description.add_step(step_8) | |||
step_9 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.construct_predictions')) | |||
step_9.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.8.produce') | |||
step_9.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
step_9.add_output('produce') | |||
pipeline_description.add_step(step_9) | |||
# Final Output | |||
pipeline_description.add_output(name='output predictions', data_reference='steps.8.produce') | |||
pipeline_description.add_output(name='output predictions', data_reference='steps.9.produce') | |||
# Output to json | |||
data = pipeline_description.to_json() | |||
with open('example_pipeline.json', 'w') as f: | |||
with open('system_pipeline.json', 'w') as f: | |||
f.write(data) | |||
print(data) |
@@ -1 +1 @@ | |||
{"id": "fe8ceeee-a513-45d8-9e28-b46e11f9c635", "schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json", "created": "2021-02-11T21:28:54.508699Z", "inputs": [{"name": "inputs"}], "outputs": [{"data": "steps.8.produce", "name": "output predictions"}], "steps": [{"type": "PRIMITIVE", "primitive": {"id": "f31f8c1f-d1c5-43e5-a4b2-2ae4a761ef2e", "version": "0.2.0", "python_path": "d3m.primitives.tods.common.denormalize", "name": "Denormalize datasets"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "inputs.0"}}, "outputs": [{"id": "produce"}]}, {"type": "PRIMITIVE", "primitive": {"id": "4b42ce1e-9b98-4a25-b68e-fad13311eb65", "version": "0.3.0", "python_path": "d3m.primitives.tods.data_processing.dataset_to_dataframe", "name": "Extract a DataFrame from a Dataset", "digest": "ba00092121d8971b0aa8c1f4b99e97151ca39b44f549eecc03fc61a286567a36"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.0.produce"}}, "outputs": [{"id": "produce"}]}, {"type": "PRIMITIVE", "primitive": {"id": "989562ac-b50f-4462-99cb-abef80d765b2", "version": "0.1.0", "python_path": "d3m.primitives.tods.common.csv_reader", "name": "Columns CSV reader"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.1.produce"}}, "outputs": [{"id": "produce"}], "hyperparams": {"use_columns": {"type": "VALUE", "data": [0, 1]}, "return_result": {"type": "VALUE", "data": "replace"}}}, {"type": "PRIMITIVE", "primitive": {"id": "d510cb7a-1782-4f51-b44c-58f0236e47c7", "version": "0.6.0", "python_path": "d3m.primitives.tods.data_processing.column_parser", "name": "Parses strings into their types", "digest": "ef87bfbd3b35a2d78337c5d3aba9847dfdf56c05c5289e50fe0db766ef8126e0"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.2.produce"}}, "outputs": [{"id": "produce"}], "hyperparams": {"parse_semantic_types": {"type": "VALUE", "data": ["http://schema.org/Boolean", "http://schema.org/Integer", "http://schema.org/Float", "https://metadata.datadrivendiscovery.org/types/FloatVector"]}}}, {"type": "PRIMITIVE", "primitive": {"id": "4503a4c6-42f7-45a1-a1d4-ed69699cf5e1", "version": "0.4.0", "python_path": "d3m.primitives.tods.data_processing.extract_columns_by_semantic_types", "name": "Extracts columns by semantic type", "digest": "35ab3368a69e46da89e4dbb70dab762d4c020c43a9424db622e8ac2ae5c57c06"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.3.produce"}}, "outputs": [{"id": "produce"}], "hyperparams": {"semantic_types": {"type": "VALUE", "data": ["https://metadata.datadrivendiscovery.org/types/Attribute"]}}}, {"type": "PRIMITIVE", "primitive": {"id": "4503a4c6-42f7-45a1-a1d4-ed69699cf5e1", "version": "0.4.0", "python_path": "d3m.primitives.tods.data_processing.extract_columns_by_semantic_types", "name": "Extracts columns by semantic type", "digest": "35ab3368a69e46da89e4dbb70dab762d4c020c43a9424db622e8ac2ae5c57c06"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.3.produce"}}, "outputs": [{"id": "produce"}], "hyperparams": {"semantic_types": {"type": "VALUE", "data": ["https://metadata.datadrivendiscovery.org/types/TrueTarget"]}}}, {"type": "PRIMITIVE", "primitive": {"id": "642de2e7-5590-3cab-9266-2a53c326c461", "version": "0.0.1", "python_path": "d3m.primitives.tods.timeseries_processing.transformation.axiswise_scaler", "name": "Axis_wise_scale"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.4.produce"}}, "outputs": [{"id": "produce"}]}, {"type": "PRIMITIVE", "primitive": {"id": "67e7fcdf-d645-3417-9aa4-85cd369487d9", "version": "0.0.1", "python_path": "d3m.primitives.tods.detection_algorithm.pyod_ae", "name": "TODS.anomaly_detection_primitives.AutoEncoder"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.6.produce"}}, "outputs": [{"id": "produce"}]}, {"type": "PRIMITIVE", "primitive": {"id": "8d38b340-f83f-4877-baaa-162f8e551736", "version": "0.3.0", "python_path": "d3m.primitives.tods.data_processing.construct_predictions", "name": "Construct pipeline predictions output", "digest": "d981f367776ef05d7311b85b86af717a599c7fd363b04db7531bd21ab30a8844"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.7.produce"}, "reference": {"type": "CONTAINER", "data": "steps.1.produce"}}, "outputs": [{"id": "produce"}]}], "digest": "7033f0a107adae468d509f5706a6a79dfcb965d4d5a8d3aef4b79017d33956ed"} | |||
{"id": "f9f918f3-4cd9-4d3c-9a84-8a95b18d3d7c", "schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json", "created": "2021-04-02T20:35:56.617972Z", "inputs": [{"name": "inputs"}], "outputs": [{"data": "steps.9.produce", "name": "output predictions"}], "steps": [{"type": "PRIMITIVE", "primitive": {"id": "f31f8c1f-d1c5-43e5-a4b2-2ae4a761ef2e", "version": "0.2.0", "python_path": "d3m.primitives.tods.common.denormalize", "name": "Denormalize datasets"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "inputs.0"}}, "outputs": [{"id": "produce"}]}, {"type": "PRIMITIVE", "primitive": {"id": "c78138d9-9377-31dc-aee8-83d9df049c60", "version": "0.3.0", "python_path": "d3m.primitives.tods.data_processing.dataset_to_dataframe", "name": "Extract a DataFrame from a Dataset"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.0.produce"}}, "outputs": [{"id": "produce"}]}, {"type": "PRIMITIVE", "primitive": {"id": "989562ac-b50f-4462-99cb-abef80d765b2", "version": "0.1.0", "python_path": "d3m.primitives.tods.common.csv_reader", "name": "Columns CSV reader"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.1.produce"}}, "outputs": [{"id": "produce"}], "hyperparams": {"use_columns": {"type": "VALUE", "data": [0, 1]}, "return_result": {"type": "VALUE", "data": "replace"}}}, {"type": "PRIMITIVE", "primitive": {"id": "81235c29-aeb9-3828-911a-1b25319b6998", "version": "0.6.0", "python_path": "d3m.primitives.tods.data_processing.column_parser", "name": "Parses strings into their types"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.2.produce"}}, "outputs": [{"id": "produce"}], "hyperparams": {"parse_semantic_types": {"type": "VALUE", "data": ["http://schema.org/Boolean", "http://schema.org/Integer", "http://schema.org/Float", "https://metadata.datadrivendiscovery.org/types/FloatVector"]}}}, {"type": "PRIMITIVE", "primitive": {"id": "a996cd89-ddf0-367f-8e7f-8c013cbc2891", "version": "0.4.0", "python_path": "d3m.primitives.tods.data_processing.extract_columns_by_semantic_types", "name": "Extracts columns by semantic type"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.3.produce"}}, "outputs": [{"id": "produce"}], "hyperparams": {"semantic_types": {"type": "VALUE", "data": ["https://metadata.datadrivendiscovery.org/types/Attribute"]}}}, {"type": "PRIMITIVE", "primitive": {"id": "a996cd89-ddf0-367f-8e7f-8c013cbc2891", "version": "0.4.0", "python_path": "d3m.primitives.tods.data_processing.extract_columns_by_semantic_types", "name": "Extracts columns by semantic type"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.3.produce"}}, "outputs": [{"id": "produce"}], "hyperparams": {"semantic_types": {"type": "VALUE", "data": ["https://metadata.datadrivendiscovery.org/types/TrueTarget"]}}}, {"type": "PRIMITIVE", "primitive": {"id": "f07ce875-bbc7-36c5-9cc1-ba4bfb7cf48e", "version": "0.1.0", "python_path": "d3m.primitives.tods.feature_analysis.statistical_maximum", "name": "Time Series Decompostional"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.4.produce"}}, "outputs": [{"id": "produce"}]}, {"type": "PRIMITIVE", "primitive": {"id": "b454adf7-5820-3e6f-8383-619f13fb1cb6", "version": "0.0.1", "python_path": "d3m.primitives.tods.detection_algorithm.pyod_ocsvm", "name": "TODS.anomaly_detection_primitives.OCSVMPrimitive"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.6.produce"}}, "outputs": [{"id": "produce_score"}]}, {"type": "PRIMITIVE", "primitive": {"id": "01d36760-235c-3cdd-95dd-3c682c634c49", "version": "0.1.0", "python_path": "d3m.primitives.tods.detection_algorithm.system_wise_detection", "name": "Sytem_Wise_Anomaly_Detection_Primitive"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.7.produce_score"}}, "outputs": [{"id": "produce"}]}, {"type": "PRIMITIVE", "primitive": {"id": "2530840a-07d4-3874-b7d8-9eb5e4ae2bf3", "version": "0.3.0", "python_path": "d3m.primitives.tods.data_processing.construct_predictions", "name": "Construct pipeline predictions output"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.8.produce"}, "reference": {"type": "CONTAINER", "data": "steps.1.produce"}}, "outputs": [{"id": "produce"}]}], "digest": "c91336de994b3e7089bc3de1728dde5b458c3b9d4ecae7a9c94a26da1219d3f3"} |
@@ -6,19 +6,18 @@ import pandas as pd | |||
from tods import generate_dataset, load_pipeline, evaluate_pipeline | |||
this_path = os.path.dirname(os.path.abspath(__file__)) | |||
#table_path = 'datasets/NAB/realTweets/labeled_Twitter_volume_IBM.csv' # The path of the dataset | |||
default_data_path = os.path.join(this_path, '../../datasets/anomaly/raw_data/yahoo_sub_5.csv') | |||
parser = argparse.ArgumentParser(description='Arguments for running predefined pipelin.') | |||
parser.add_argument('--table_path', type=str, default=os.path.join(this_path, '../../datasets/anomaly/raw_data/yahoo_sub_5.csv'), | |||
parser.add_argument('--table_path', type=str, default=default_data_path, | |||
help='Input the path of the input data table') | |||
parser.add_argument('--target_index', type=int, default=6, | |||
help='Index of the ground truth (for evaluation)') | |||
parser.add_argument('--metric',type=str, default='F1_MACRO', | |||
help='Evaluation Metric (F1, F1_MACRO)') | |||
parser.add_argument('--pipeline_path', default=os.path.join(this_path, './example_pipelines/autoencoder_pipeline.json'), | |||
parser.add_argument('--pipeline_path', | |||
default=os.path.join(this_path, './example_pipelines/autoencoder_pipeline.json'), | |||
help='Input the path of the pre-built pipeline description') | |||
# parser.add_argument('--pipeline_path', default=os.path.join(this_path, '../tods/resources/default_pipeline.json'), | |||
# help='Input the path of the pre-built pipeline description') | |||
args = parser.parse_args() | |||
@@ -37,4 +36,5 @@ pipeline = load_pipeline(pipeline_path) | |||
# Run the pipeline | |||
pipeline_result = evaluate_pipeline(dataset, pipeline, metric) | |||
print(pipeline_result) | |||
#raise pipeline_result.error[0] | |||
@@ -68,7 +68,7 @@ class CSVReaderPrimitive(primitives.FileReaderPrimitiveBase): # pragma: no co | |||
# This should be done by primitives later on. | |||
dtype=str, | |||
# We always expect one row header. | |||
header=0, | |||
header=None, | |||
# We want empty strings and not NaNs. | |||
na_filter=False, | |||
encoding='utf8', | |||
@@ -92,7 +92,7 @@ class CSVReaderPrimitive(primitives.FileReaderPrimitiveBase): # pragma: no co | |||
data = container.DataFrame(data, { | |||
'schema': metadata_base.CONTAINER_SCHEMA_VERSION, | |||
'structural_type': container.DataFrame, | |||
}, generate_metadata=False) | |||
}, generate_metadata=True) | |||
assert column_names is not None | |||
@@ -0,0 +1,200 @@ | |||
import typing | |||
from typing import Any, Callable, List, Dict, Union, Optional, Sequence, Tuple | |||
import logging | |||
import abc | |||
from d3m.primitive_interfaces import generator, transformer | |||
from d3m.primitive_interfaces.base import * | |||
from d3m.primitive_interfaces.unsupervised_learning import UnsupervisedLearnerPrimitiveBase | |||
from d3m.metadata import base as metadata_base, hyperparams, params | |||
from d3m import container | |||
from d3m import utils | |||
__all__ = ('TODSTransformerPrimitiveBase',) | |||
class TODSTransformerPrimitiveBase(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||
""" | |||
A base class for primitives which are not fitted at all and can | |||
simply produce (useful) outputs from inputs directly. As such they | |||
also do not have any state (params). | |||
This class is parameterized using only three type variables, ``Inputs``, | |||
``Outputs``, and ``Hyperparams``. | |||
""" | |||
def __init__(self, *, hyperparams: Hyperparams) -> None: | |||
super().__init__(hyperparams=hyperparams) | |||
def produce(self, *, inputs: container.DataFrame, timeout: float = None, iterations: int = None) -> CallResult[container.DataFrame]: | |||
is_system = len(inputs.iloc[0, 0].shape) != 0 # check the shape of first row first column, if not a single data entry(,) then it is system-wise data (row, col) | |||
if is_system: | |||
outputs = self._forward(inputs, '_produce') | |||
else: | |||
outputs = self._produce(inputs=inputs) | |||
outputs = outputs.value | |||
return CallResult(outputs) | |||
@abc.abstractmethod | |||
def _produce(self, *, inputs: container.DataFrame, timeout: float = None, iterations: int = None) -> CallResult[container.DataFrame]: | |||
""" | |||
make the predictions | |||
""" | |||
#return CallResult(container.DataFrame) | |||
def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: | |||
""" | |||
A noop. | |||
""" | |||
return CallResult(None) | |||
def get_params(self) -> None: | |||
""" | |||
A noop. | |||
""" | |||
return None | |||
def set_params(self, *, params: None) -> None: | |||
""" | |||
A noop. | |||
""" | |||
return | |||
def _forward(self, data, method): | |||
""" | |||
General Forward Function to feed system data one-by-one to the primitive | |||
""" | |||
col_name = list(data.columns)[0] | |||
for i, _ in data.iterrows(): | |||
sys_data = data.iloc[i][col_name] | |||
produce_func = getattr(self, method, None) | |||
out = produce_func(inputs=sys_data) | |||
data.iloc[i][col_name] = out.value | |||
return data | |||
class TODSUnsupervisedLearnerPrimitiveBase(UnsupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams]): | |||
def __init__(self, *, hyperparams: Hyperparams, | |||
random_seed: int=0, | |||
docker_containers: Dict[str, DockerContainer] = None) -> None: | |||
super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) | |||
def produce(self, *, inputs: container.DataFrame, timeout: float = None, iterations: int = None) -> CallResult[container.DataFrame]: | |||
is_system = len(inputs.iloc[0, 0].shape) != 0 # check the shape of first row first column, if not a single data entry(,) then it is system-wise data (row, col) | |||
if is_system: | |||
outputs = self._forward(inputs, '_produce') | |||
else: | |||
outputs = self._produce(inputs=inputs) | |||
outputs = outputs.value | |||
return CallResult(outputs) | |||
def produce_score(self, *, inputs: container.DataFrame, timeout: float = None, iterations: int = None) -> CallResult[container.DataFrame]: | |||
is_system = len(inputs.iloc[0, 0].shape) != 0 # check the shape of first row first column, if not a single data entry(,) then it is system-wise data (row, col) | |||
if is_system: | |||
outputs = self._forward(inputs, '_produce_score') | |||
else: | |||
outputs = self._produce(inputs=inputs) | |||
outputs = outputs.value | |||
return CallResult(outputs) | |||
def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: | |||
""" | |||
A noop. | |||
""" | |||
is_system = len(self._inputs.iloc[0, 0].shape) != 0 # check the shape of first row first column, if not a single data entry(,) then it is system-wise data (row, col) | |||
if is_system: | |||
data = inputs | |||
col_name = list(data.columns)[0] | |||
for i, _ in data.iterrows(): | |||
sys_data = data.iloc[i][col_name] | |||
self.set_training_data(inputs=sys_data) | |||
self._fit() | |||
else: | |||
outputs = self._fit() | |||
outputs = outputs.value | |||
return CallResult(None) | |||
def fit_multi_produce(self, *, produce_methods: typing.Sequence[str], inputs: Inputs, timeout: float = None, iterations: int = None) -> MultiCallResult: | |||
is_system = len(inputs.iloc[0, 0].shape) != 0 # check the shape of first row first column, if not a single data entry(,) then it is system-wise data (row, col) | |||
if is_system: | |||
data = inputs | |||
produce_method = produce_methods[0] | |||
col_name = list(data.columns)[0] | |||
results = [] | |||
for i, _ in data.iterrows(): | |||
sys_data = data.iloc[i][col_name] | |||
self.set_training_data(inputs=sys_data) | |||
fit_result = self._fit() | |||
if produce_method == "produce": | |||
out = self._produce(inputs=sys_data, timeout=timeout) | |||
else: | |||
out = self._produce_score(inputs=sys_data, timeout=timeout) | |||
data.iloc[i][col_name] = out.value | |||
results.append(out) | |||
iterations_done = None | |||
for result in results: | |||
if result.iterations_done is not None: | |||
if iterations_done is None: | |||
iterations_done = result.iterations_done | |||
else: | |||
iterations_done = max(iterations_done, result.iterations_done) | |||
return MultiCallResult( | |||
values={produce_method: data}, | |||
has_finished=all(result.has_finished for result in results), | |||
iterations_done=iterations_done, | |||
) | |||
else: | |||
return self._fit_multi_produce(produce_methods=produce_methods, timeout=timeout, iterations=iterations, inputs=inputs) | |||
@abc.abstractmethod | |||
def _produce(self, *, inputs: container.DataFrame, timeout: float = None, iterations: int = None) -> CallResult[container.DataFrame]: | |||
""" | |||
abstract class | |||
""" | |||
@abc.abstractmethod | |||
def _produce_score(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: | |||
""" | |||
abstract class | |||
""" | |||
@abc.abstractmethod | |||
def _fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: | |||
""" | |||
abstract class | |||
""" | |||
def get_params(self) -> None: | |||
""" | |||
A noop. | |||
""" | |||
return None | |||
def set_params(self, *, params: None) -> None: | |||
""" | |||
A noop. | |||
""" | |||
return | |||
def _forward(self, data, method): | |||
""" | |||
General Forward Function to feed system data one-by-one to the primitive | |||
""" | |||
col_name = list(data.columns)[0] | |||
for i, _ in data.iterrows(): | |||
sys_data = data.iloc[i][col_name] | |||
produce_func = getattr(self, method, None) | |||
out = produce_func(inputs=sys_data) | |||
data.iloc[i][col_name] = out.value | |||
return data |
@@ -95,7 +95,7 @@ class Hyperparams(Hyperparams_ODBase): | |||
) | |||
epochs = hyperparams.Hyperparameter[int]( | |||
default=100, | |||
default=1, | |||
description='Number of epochs to train the model.', | |||
semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] | |||
) | |||
@@ -335,7 +335,7 @@ class AutoEncoderPrimitive(UnsupervisedOutlierDetectorBase[Inputs, Outputs, Para | |||
Returns: | |||
None | |||
""" | |||
return super().fit() | |||
return super()._fit() | |||
def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: | |||
""" | |||
@@ -347,7 +347,7 @@ class AutoEncoderPrimitive(UnsupervisedOutlierDetectorBase[Inputs, Outputs, Para | |||
Container DataFrame | |||
1 marks Outliers, 0 marks normal. | |||
""" | |||
return super().produce(inputs=inputs, timeout=timeout, iterations=iterations) | |||
return super()._produce(inputs=inputs, timeout=timeout, iterations=iterations) | |||
def get_params(self) -> Params: | |||
""" | |||
@@ -142,7 +142,6 @@ class SystemWiseDetectionPrimitive(transformer.TransformerPrimitiveBase[Inputs, | |||
self.logger.info('System wise Detection Input Primitive called') | |||
# Get cols to fit. | |||
self._fitted = False | |||
self._training_inputs, self._training_indices = self._get_columns_to_fit(inputs, self.hyperparams) | |||
@@ -316,12 +315,8 @@ class SystemWiseDetectionPrimitive(transformer.TransformerPrimitiveBase[Inputs, | |||
def _write(self, inputs: Inputs): | |||
inputs.to_csv(str(time.time()) + '.csv') | |||
def _system_wise_detection(self,X,method_type,window_size,contamination): | |||
systemIds = X.system_id.unique() | |||
groupedX = X.groupby(X.system_id) | |||
systemIds = [int(idx) for idx in X.index] | |||
transformed_X = [] | |||
if(method_type=="max"): | |||
@@ -330,17 +325,17 @@ class SystemWiseDetectionPrimitive(transformer.TransformerPrimitiveBase[Inputs, | |||
""" | |||
maxOutlierScorePerSystemList = [] | |||
for systemId in systemIds: | |||
systemDf = groupedX.get_group(systemId) | |||
maxOutlierScorePerSystemList.append(np.max(np.abs(systemDf["value_0"].values))) | |||
systemDf = X.iloc[systemId]['system'] | |||
maxOutlierScorePerSystemList.append(np.max(np.abs(systemDf.iloc[:,0].values))) | |||
ranking = np.sort(maxOutlierScorePerSystemList) | |||
threshold = ranking[int((1 - contamination) * len(ranking))] | |||
self.threshold = threshold | |||
mask = (maxOutlierScorePerSystemList >= threshold) | |||
mask = (maxOutlierScorePerSystemList > threshold) | |||
ranking[mask] = 1 | |||
ranking[np.logical_not(mask)] = 0 | |||
for iter in range(len(systemIds)): | |||
transformed_X.append([systemIds[iter],ranking[iter]]) | |||
transformed_X.append(ranking[iter]) | |||
if (method_type == "avg"): | |||
""" | |||
@@ -348,60 +343,72 @@ class SystemWiseDetectionPrimitive(transformer.TransformerPrimitiveBase[Inputs, | |||
""" | |||
avgOutlierScorePerSystemList = [] | |||
for systemId in systemIds: | |||
systemDf = groupedX.get_group(systemId) | |||
avgOutlierScorePerSystemList.append(np.mean(np.abs(systemDf["value_0"].values))) | |||
systemDf = X.iloc[systemId]['system'] | |||
avgOutlierScorePerSystemList.append(np.mean(np.abs(systemDf.iloc[:,0].values))) | |||
ranking = np.sort(avgOutlierScorePerSystemList) | |||
threshold = ranking[int((1 - contamination) * len(ranking))] | |||
self.threshold = threshold | |||
mask = (avgOutlierScorePerSystemList >= threshold) | |||
mask = (avgOutlierScorePerSystemList > threshold) | |||
ranking[mask] = 1 | |||
ranking[np.logical_not(mask)] = 0 | |||
for iter in range(len(systemIds)): | |||
transformed_X.append([systemIds[iter], ranking[iter]]) | |||
transformed_X.append( ranking[iter]) | |||
if (method_type == "sliding_window_sum"): | |||
""" | |||
Sytems are sorted based on max of max of reconstruction errors in each window" | |||
Sytems are sorted based on max of sum of reconstruction errors in each window" | |||
""" | |||
OutlierScorePerSystemList = [] | |||
maxOutlierScorePerSystemList = [] | |||
for systemId in systemIds: | |||
systemDf = groupedX.get_group(systemId) | |||
column_value = systemDf["value_0"].values | |||
column_score = np.zeros(len(column_value)) | |||
systemDf = X.iloc[systemId]['system'] | |||
column_value = systemDf.iloc[:,0].values | |||
column_score = [] | |||
for iter in range(window_size - 1, len(column_value)): | |||
sequence = column_value[iter - window_size + 1:iter + 1] | |||
column_score[iter] = np.sum(np.abs(sequence)) | |||
column_score[:window_size - 1] = column_score[window_size - 1] | |||
OutlierScorePerSystemList.append(column_score.tolist()) | |||
OutlierScorePerSystemList = np.asarray(OutlierScorePerSystemList) | |||
column_score.append(np.sum(np.abs(sequence))) | |||
#column_score[:window_size - 1] = column_score[window_size - 1] | |||
maxOutlierScorePerSystemList.append(np.max(column_score)) | |||
#OutlierScorePerSystemList = np.asarray(OutlierScorePerSystemList) | |||
maxOutlierScorePerSystemList = OutlierScorePerSystemList.max(axis=1).tolist() | |||
#maxOutlierScorePerSystemList = OutlierScorePerSystemList.max(axis=1).tolist() | |||
ranking = np.sort(maxOutlierScorePerSystemList) | |||
threshold = ranking[int((1 - contamination) * len(ranking))] | |||
self.threshold = threshold | |||
mask = (maxOutlierScorePerSystemList >= threshold) | |||
mask = (maxOutlierScorePerSystemList > threshold) | |||
ranking[mask] = 1 | |||
ranking[np.logical_not(mask)] = 0 | |||
for iter in range(len(systemIds)): | |||
transformed_X.append([systemIds[iter], ranking[iter]]) | |||
transformed_X.append( ranking[iter]) | |||
if (method_type == "majority_voting_sliding_window_sum"): | |||
""" | |||
Sytem with most vote based on max of sum of reconstruction errors in each window | |||
""" | |||
OutlierScorePerSystemList = [] | |||
max_time_points = 0 | |||
for systemId in systemIds: | |||
systemDf = X.iloc[systemId]['system'] | |||
max_time_points = max(max_time_points,systemDf.shape[0]) | |||
for systemId in systemIds: | |||
systemDf = groupedX.get_group(systemId) | |||
column_value = systemDf["value_0"].values | |||
column_score = np.zeros(len(column_value)) | |||
column_value = np.zeros(max_time_points) | |||
systemDf = X.iloc[systemId]['system'] | |||
column_value_actual = systemDf.iloc[:, 0].values | |||
column_value[0:len(column_value_actual)] = column_value_actual | |||
column_value[len(column_value_actual):]= column_value_actual[-1] | |||
column_score = [] | |||
for iter in range(window_size - 1, len(column_value)): | |||
sequence = column_value[iter - window_size + 1:iter + 1] | |||
column_score[iter] = np.sum(np.abs(sequence)) | |||
column_score[:window_size - 1] = column_score[window_size - 1] | |||
OutlierScorePerSystemList.append(column_score.tolist()) | |||
column_score.append(np.sum(np.abs(sequence))) | |||
OutlierScorePerSystemList.append(column_score) | |||
OutlierScorePerSystemList = np.asarray(OutlierScorePerSystemList) | |||
OutlierScorePerSystemList = ( | |||
OutlierScorePerSystemList == OutlierScorePerSystemList.max(axis=0)[None, :]).astype(int) | |||
@@ -409,28 +416,39 @@ class SystemWiseDetectionPrimitive(transformer.TransformerPrimitiveBase[Inputs, | |||
ranking = np.sort(maxOutlierScorePerSystemList) | |||
threshold = ranking[int((1 - contamination) * len(ranking))] | |||
self.threshold = threshold | |||
mask = (maxOutlierScorePerSystemList >= threshold) | |||
mask = (maxOutlierScorePerSystemList > threshold) | |||
ranking[mask] = 1 | |||
ranking[np.logical_not(mask)] = 0 | |||
for iter in range(len(systemIds)): | |||
transformed_X.append([systemIds[iter], ranking[iter]]) | |||
transformed_X.append( ranking[iter]) | |||
if (method_type == "majority_voting_sliding_window_max"): | |||
""" | |||
Sytem with most vote based on max of max of reconstruction errors in each window | |||
""" | |||
OutlierScorePerSystemList = [] | |||
max_time_points = 0 | |||
for systemId in systemIds: | |||
systemDf = groupedX.get_group(systemId) | |||
column_value = systemDf["value_0"].values | |||
column_score = np.zeros(len(column_value)) | |||
systemDf = X.iloc[systemId]['system'] | |||
max_time_points = max(max_time_points, systemDf.shape[0]) | |||
for systemId in systemIds: | |||
column_value = np.zeros(max_time_points) | |||
systemDf = X.iloc[systemId]['system'] | |||
column_value_actual = systemDf.iloc[:, 0].values | |||
column_value[0:len(column_value_actual)] = column_value_actual | |||
column_value[len(column_value_actual):] = column_value_actual[-1] | |||
column_score = [] | |||
for iter in range(window_size - 1, len(column_value)): | |||
sequence = column_value[iter - window_size + 1:iter + 1] | |||
column_score[iter] = np.max(np.abs(sequence)) | |||
column_score[:window_size - 1] = column_score[window_size - 1] | |||
OutlierScorePerSystemList.append(column_score.tolist()) | |||
column_score.append(np.max(np.abs(sequence))) | |||
OutlierScorePerSystemList.append(column_score) | |||
OutlierScorePerSystemList = np.asarray(OutlierScorePerSystemList) | |||
OutlierScorePerSystemList = ( | |||
OutlierScorePerSystemList == OutlierScorePerSystemList.max(axis=0)[None, :]).astype(int) | |||
@@ -439,11 +457,11 @@ class SystemWiseDetectionPrimitive(transformer.TransformerPrimitiveBase[Inputs, | |||
ranking = np.sort(maxOutlierScorePerSystemList) | |||
threshold = ranking[int((1 - contamination) * len(ranking))] | |||
self.threshold = threshold | |||
mask = (maxOutlierScorePerSystemList >= threshold) | |||
mask = (maxOutlierScorePerSystemList > threshold) | |||
ranking[mask] = 1 | |||
ranking[np.logical_not(mask)] = 0 | |||
for iter in range(len(systemIds)): | |||
transformed_X.append([systemIds[iter], ranking[iter]]) | |||
transformed_X.append(ranking[iter]) | |||
return transformed_X | |||
@@ -0,0 +1,455 @@ | |||
import os | |||
from typing import Any,Optional,List | |||
import statsmodels.api as sm | |||
import numpy as np | |||
from d3m import container, utils as d3m_utils | |||
from d3m import utils | |||
from numpy import ndarray | |||
from collections import OrderedDict | |||
from scipy import sparse | |||
import os | |||
import numpy | |||
import typing | |||
import time | |||
from d3m import container | |||
from d3m.primitive_interfaces import base, transformer | |||
from d3m.container import DataFrame as d3m_dataframe | |||
from d3m.metadata import hyperparams, params, base as metadata_base | |||
from d3m.base import utils as base_utils | |||
import uuid | |||
from d3m.exceptions import PrimitiveNotFittedError | |||
__all__ = ('SystemWiseDetectionPrimitive',) | |||
Inputs = container.DataFrame | |||
Outputs = container.DataFrame | |||
class Params(params.Params): | |||
#to-do : how to make params dynamic | |||
use_column_names: Optional[Any] | |||
class Hyperparams(hyperparams.Hyperparams): | |||
#Tuning Parameter | |||
#default -1 considers entire time series is considered | |||
window_size = hyperparams.Hyperparameter(default=10, semantic_types=[ | |||
'https://metadata.datadrivendiscovery.org/types/TuningParameter', | |||
], description="Window Size for decomposition") | |||
method_type = hyperparams.Enumeration( | |||
values=['max', 'avg', 'sliding_window_sum','majority_voting_sliding_window_sum','majority_voting_sliding_window_max'], | |||
default='majority_voting_sliding_window_max', | |||
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], | |||
description="The type of method used to find anomalous system", | |||
) | |||
contamination = hyperparams.Uniform( | |||
lower=0., | |||
upper=0.5, | |||
default=0.1, | |||
description='The amount of contamination of the data set, i.e. the proportion of outliers in the data set. ', | |||
semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] | |||
) | |||
#control parameter | |||
use_columns = hyperparams.Set( | |||
elements=hyperparams.Hyperparameter[int](-1), | |||
default=(), | |||
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], | |||
description="A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.", | |||
) | |||
exclude_columns = hyperparams.Set( | |||
elements=hyperparams.Hyperparameter[int](-1), | |||
default=(), | |||
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], | |||
description="A set of column indices to not operate on. Applicable only if \"use_columns\" is not provided.", | |||
) | |||
return_result = hyperparams.Enumeration( | |||
values=['append', 'replace', 'new'], | |||
default='new', | |||
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], | |||
description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.", | |||
) | |||
use_semantic_types = hyperparams.UniformBool( | |||
default=False, | |||
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], | |||
description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe" | |||
) | |||
add_index_columns = hyperparams.UniformBool( | |||
default=False, | |||
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], | |||
description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", | |||
) | |||
error_on_no_input = hyperparams.UniformBool( | |||
default=True, | |||
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], | |||
description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.", | |||
) | |||
return_semantic_type = hyperparams.Enumeration[str]( | |||
values=['https://metadata.datadrivendiscovery.org/types/Attribute', | |||
'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute'], | |||
default='https://metadata.datadrivendiscovery.org/types/Attribute', | |||
description='Decides what semantic type to attach to generated attributes', | |||
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] | |||
) | |||
class SystemWiseDetectionPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||
""" | |||
Primitive to find abs_energy of time series | |||
""" | |||
metadata = metadata_base.PrimitiveMetadata({ | |||
"__author__": "DATA Lab at Texas A&M University", | |||
'name': 'Sytem_Wise_Anomaly_Detection_Primitive', | |||
'python_path': 'd3m.primitives.tods.detection_algorithm.system_wise_detection', | |||
'source': { | |||
'name': 'DATA Lab at Texas A&M University', | |||
'contact': 'mailto:khlai037@tamu.edu' | |||
}, | |||
"hyperparams_to_tune": ['window_size','method_type','contamination'], | |||
'version': '0.1.0', | |||
'algorithm_types': [ | |||
metadata_base.PrimitiveAlgorithmType.TODS_PRIMITIVE, | |||
], | |||
'primitive_family': metadata_base.PrimitiveFamily.ANOMALY_DETECTION, | |||
'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'Sytem_Wise_Anomaly_Detection_Primitive')), | |||
}) | |||
def __init__(self, *, hyperparams: Hyperparams) -> None: | |||
super().__init__(hyperparams=hyperparams) | |||
self.primitiveNo = 0 | |||
def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||
""" | |||
Args: | |||
inputs: Container DataFrame | |||
timeout: Default | |||
iterations: Default | |||
Returns: | |||
Container DataFrame containing abs_energy of time series | |||
""" | |||
self.logger.info('System wise Detection Input Primitive called') | |||
# Get cols to fit. | |||
self._fitted = False | |||
self._training_inputs, self._training_indices = self._get_columns_to_fit(inputs, self.hyperparams) | |||
self._input_column_names = self._training_inputs.columns | |||
if len(self._training_indices) > 0: | |||
# self._clf.fit(self._training_inputs) | |||
self._fitted = True | |||
else: | |||
if self.hyperparams['error_on_no_input']: | |||
raise RuntimeError("No input columns were selected") | |||
self.logger.warn("No input columns were selected") | |||
if not self._fitted: | |||
raise PrimitiveNotFittedError("Primitive not fitted.") | |||
system_wise_detection_input = inputs | |||
if self.hyperparams['use_semantic_types']: | |||
system_wise_detection_input = inputs.iloc[:, self._training_indices] | |||
output_columns = [] | |||
if len(self._training_indices) > 0: | |||
system_wise_detection_output = self._system_wise_detection(system_wise_detection_input,self.hyperparams["method_type"],self.hyperparams["window_size"],self.hyperparams["contamination"]) | |||
outputs = system_wise_detection_output | |||
if sparse.issparse(system_wise_detection_output): | |||
system_wise_detection_output = system_wise_detection_output.toarray() | |||
outputs = self._wrap_predictions(inputs, system_wise_detection_output) | |||
#if len(outputs.columns) == len(self._input_column_names): | |||
# outputs.columns = self._input_column_names | |||
output_columns = [outputs] | |||
else: | |||
if self.hyperparams['error_on_no_input']: | |||
raise RuntimeError("No input columns were selected") | |||
self.logger.warn("No input columns were selected") | |||
self.logger.info('System wise Detection Primitive returned') | |||
outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'], | |||
add_index_columns=self.hyperparams['add_index_columns'], | |||
inputs=inputs, column_indices=self._training_indices, | |||
columns_list=output_columns) | |||
return base.CallResult(outputs) | |||
@classmethod | |||
def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): | |||
""" | |||
Select columns to fit. | |||
Args: | |||
inputs: Container DataFrame | |||
hyperparams: d3m.metadata.hyperparams.Hyperparams | |||
Returns: | |||
list | |||
""" | |||
if not hyperparams['use_semantic_types']: | |||
return inputs, list(range(len(inputs.columns))) | |||
inputs_metadata = inputs.metadata | |||
def can_produce_column(column_index: int) -> bool: | |||
return cls._can_produce_column(inputs_metadata, column_index, hyperparams) | |||
use_columns = hyperparams['use_columns'] | |||
exclude_columns = hyperparams['exclude_columns'] | |||
columns_to_produce, columns_not_to_produce = base_utils.get_columns_to_use(inputs_metadata, | |||
use_columns=use_columns, | |||
exclude_columns=exclude_columns, | |||
can_use_column=can_produce_column) | |||
return inputs.iloc[:, columns_to_produce], columns_to_produce | |||
# return columns_to_produce | |||
@classmethod | |||
def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, | |||
hyperparams: Hyperparams) -> bool: | |||
""" | |||
Output whether a column can be processed. | |||
Args: | |||
inputs_metadata: d3m.metadata.base.DataMetadata | |||
column_index: int | |||
Returns: | |||
bool | |||
""" | |||
column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)) | |||
accepted_structural_types = (int, float, numpy.integer, numpy.float64) | |||
accepted_semantic_types = set() | |||
accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/Attribute") | |||
if not issubclass(column_metadata['structural_type'], accepted_structural_types): | |||
return False | |||
semantic_types = set(column_metadata.get('semantic_types', [])) | |||
return True | |||
if len(semantic_types) == 0: | |||
cls.logger.warning("No semantic types found in column metadata") | |||
return False | |||
# Making sure all accepted_semantic_types are available in semantic_types | |||
if len(accepted_semantic_types - semantic_types) == 0: | |||
return True | |||
return False | |||
@classmethod | |||
def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs], | |||
target_columns_metadata: List[OrderedDict]) -> metadata_base.DataMetadata: | |||
""" | |||
Updata metadata for selected columns. | |||
Args: | |||
inputs_metadata: metadata_base.DataMetadata | |||
outputs: Container Dataframe | |||
target_columns_metadata: list | |||
Returns: | |||
d3m.metadata.base.DataMetadata | |||
""" | |||
outputs_metadata = metadata_base.DataMetadata().generate(value=outputs) | |||
for column_index, column_metadata in enumerate(target_columns_metadata): | |||
column_metadata.pop("structural_type", None) | |||
outputs_metadata = outputs_metadata.update_column(column_index, column_metadata) | |||
return outputs_metadata | |||
def _wrap_predictions(self, inputs: Inputs, predictions: ndarray) -> Outputs: | |||
""" | |||
Wrap predictions into dataframe | |||
Args: | |||
inputs: Container Dataframe | |||
predictions: array-like data (n_samples, n_features) | |||
Returns: | |||
Dataframe | |||
""" | |||
outputs = d3m_dataframe(predictions, generate_metadata=True) | |||
target_columns_metadata = self._add_target_columns_metadata(outputs.metadata, self.hyperparams,self.primitiveNo) | |||
outputs.metadata = self._update_predictions_metadata(inputs.metadata, outputs, target_columns_metadata) | |||
return outputs | |||
@classmethod | |||
def _add_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams, primitiveNo): | |||
""" | |||
Add target columns metadata | |||
Args: | |||
outputs_metadata: metadata.base.DataMetadata | |||
hyperparams: d3m.metadata.hyperparams.Hyperparams | |||
Returns: | |||
List[OrderedDict] | |||
""" | |||
outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] | |||
target_columns_metadata: List[OrderedDict] = [] | |||
for column_index in range(outputs_length): | |||
column_name = "{0}{1}_{2}".format(cls.metadata.query()['name'], primitiveNo, column_index) | |||
column_metadata = OrderedDict() | |||
semantic_types = set() | |||
semantic_types.add(hyperparams["return_semantic_type"]) | |||
column_metadata['semantic_types'] = list(semantic_types) | |||
column_metadata["name"] = str(column_name) | |||
target_columns_metadata.append(column_metadata) | |||
return target_columns_metadata | |||
def _write(self, inputs: Inputs): | |||
inputs.to_csv(str(time.time()) + '.csv') | |||
def _system_wise_detection(self,X,method_type,window_size,contamination): | |||
#systemIds = X.system_id.unique() | |||
systemIds = [int(idx) for idx in X.index] | |||
#groupedX = X.groupby(X.system_id) | |||
print(systemIds) | |||
print(X.iloc[0]) | |||
systemDf = X.iloc(systemIds[0])['system'] | |||
print(systemDf) | |||
exit() | |||
transformed_X = [] | |||
if(method_type=="max"): | |||
""" | |||
Sytems are sorted based on maximum of reconstruction errors" | |||
""" | |||
maxOutlierScorePerSystemList = [] | |||
for systemId in systemIds: | |||
systemDf = groupedX.get_group(systemId) | |||
#systemDf = X[systemId]['system'] | |||
maxOutlierScorePerSystemList.append(np.max(np.abs(systemDf["value_0"].values))) | |||
ranking = np.sort(maxOutlierScorePerSystemList) | |||
threshold = ranking[int((1 - contamination) * len(ranking))] | |||
self.threshold = threshold | |||
mask = (maxOutlierScorePerSystemList >= threshold) | |||
ranking[mask] = 1 | |||
ranking[np.logical_not(mask)] = 0 | |||
for iter in range(len(systemIds)): | |||
transformed_X.append([systemIds[iter],ranking[iter]]) | |||
if (method_type == "avg"): | |||
""" | |||
Sytems are sorted based on average of reconstruction errors" | |||
""" | |||
avgOutlierScorePerSystemList = [] | |||
for systemId in systemIds: | |||
systemDf = groupedX.get_group(systemId) | |||
avgOutlierScorePerSystemList.append(np.mean(np.abs(systemDf["value_0"].values))) | |||
ranking = np.sort(avgOutlierScorePerSystemList) | |||
threshold = ranking[int((1 - contamination) * len(ranking))] | |||
self.threshold = threshold | |||
mask = (avgOutlierScorePerSystemList >= threshold) | |||
ranking[mask] = 1 | |||
ranking[np.logical_not(mask)] = 0 | |||
for iter in range(len(systemIds)): | |||
transformed_X.append([systemIds[iter], ranking[iter]]) | |||
if (method_type == "sliding_window_sum"): | |||
""" | |||
Sytems are sorted based on max of max of reconstruction errors in each window" | |||
""" | |||
OutlierScorePerSystemList = [] | |||
for systemId in systemIds: | |||
systemDf = groupedX.get_group(systemId) | |||
column_value = systemDf["value_0"].values | |||
column_score = np.zeros(len(column_value)) | |||
for iter in range(window_size - 1, len(column_value)): | |||
sequence = column_value[iter - window_size + 1:iter + 1] | |||
column_score[iter] = np.sum(np.abs(sequence)) | |||
column_score[:window_size - 1] = column_score[window_size - 1] | |||
OutlierScorePerSystemList.append(column_score.tolist()) | |||
OutlierScorePerSystemList = np.asarray(OutlierScorePerSystemList) | |||
maxOutlierScorePerSystemList = OutlierScorePerSystemList.max(axis=1).tolist() | |||
ranking = np.sort(maxOutlierScorePerSystemList) | |||
threshold = ranking[int((1 - contamination) * len(ranking))] | |||
self.threshold = threshold | |||
mask = (maxOutlierScorePerSystemList >= threshold) | |||
ranking[mask] = 1 | |||
ranking[np.logical_not(mask)] = 0 | |||
for iter in range(len(systemIds)): | |||
transformed_X.append([systemIds[iter], ranking[iter]]) | |||
if (method_type == "majority_voting_sliding_window_sum"): | |||
""" | |||
Sytem with most vote based on max of sum of reconstruction errors in each window | |||
""" | |||
OutlierScorePerSystemList = [] | |||
for systemId in systemIds: | |||
systemDf = groupedX.get_group(systemId) | |||
column_value = systemDf["value_0"].values | |||
column_score = np.zeros(len(column_value)) | |||
for iter in range(window_size - 1, len(column_value)): | |||
sequence = column_value[iter - window_size + 1:iter + 1] | |||
column_score[iter] = np.sum(np.abs(sequence)) | |||
column_score[:window_size - 1] = column_score[window_size - 1] | |||
OutlierScorePerSystemList.append(column_score.tolist()) | |||
OutlierScorePerSystemList = np.asarray(OutlierScorePerSystemList) | |||
OutlierScorePerSystemList = ( | |||
OutlierScorePerSystemList == OutlierScorePerSystemList.max(axis=0)[None, :]).astype(int) | |||
maxOutlierScorePerSystemList = OutlierScorePerSystemList.sum(axis=1).tolist() | |||
ranking = np.sort(maxOutlierScorePerSystemList) | |||
threshold = ranking[int((1 - contamination) * len(ranking))] | |||
self.threshold = threshold | |||
mask = (maxOutlierScorePerSystemList >= threshold) | |||
ranking[mask] = 1 | |||
ranking[np.logical_not(mask)] = 0 | |||
for iter in range(len(systemIds)): | |||
transformed_X.append([systemIds[iter], ranking[iter]]) | |||
if (method_type == "majority_voting_sliding_window_max"): | |||
""" | |||
Sytem with most vote based on max of max of reconstruction errors in each window | |||
""" | |||
OutlierScorePerSystemList = [] | |||
for systemId in systemIds: | |||
systemDf = groupedX.get_group(systemId) | |||
column_value = systemDf["value_0"].values | |||
column_score = np.zeros(len(column_value)) | |||
for iter in range(window_size - 1, len(column_value)): | |||
sequence = column_value[iter - window_size + 1:iter + 1] | |||
column_score[iter] = np.max(np.abs(sequence)) | |||
column_score[:window_size - 1] = column_score[window_size - 1] | |||
OutlierScorePerSystemList.append(column_score.tolist()) | |||
OutlierScorePerSystemList = np.asarray(OutlierScorePerSystemList) | |||
OutlierScorePerSystemList = ( | |||
OutlierScorePerSystemList == OutlierScorePerSystemList.max(axis=0)[None, :]).astype(int) | |||
maxOutlierScorePerSystemList = OutlierScorePerSystemList.sum(axis=1).tolist() | |||
ranking = np.sort(maxOutlierScorePerSystemList) | |||
threshold = ranking[int((1 - contamination) * len(ranking))] | |||
self.threshold = threshold | |||
mask = (maxOutlierScorePerSystemList >= threshold) | |||
ranking[mask] = 1 | |||
ranking[np.logical_not(mask)] = 0 | |||
for iter in range(len(systemIds)): | |||
transformed_X.append([systemIds[iter], ranking[iter]]) | |||
return transformed_X | |||
@@ -30,6 +30,7 @@ from d3m.primitive_interfaces.base import CallResult, DockerContainer, Primitive | |||
# # from d3m.primitive_interfaces.supervised_learning import SupervisedLearnerPrimitiveBase | |||
from d3m.primitive_interfaces.unsupervised_learning import UnsupervisedLearnerPrimitiveBase | |||
from ..common.TODSBasePrimitives import TODSUnsupervisedLearnerPrimitiveBase | |||
from d3m.primitive_interfaces.transformer import TransformerPrimitiveBase | |||
from d3m.primitive_interfaces.base import * | |||
@@ -141,7 +142,10 @@ class Hyperparams_ODBase(hyperparams.Hyperparams): | |||
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] | |||
) | |||
class UnsupervisedOutlierDetectorBase(UnsupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams]): | |||
# OutlierDetectorBase.__doc__ = OutlierDetectorBase.__doc__ | |||
class UnsupervisedOutlierDetectorBase(TODSUnsupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams]): | |||
""" | |||
Parameters | |||
---------- | |||
@@ -234,7 +238,7 @@ class UnsupervisedOutlierDetectorBase(UnsupervisedLearnerPrimitiveBase[Inputs, O | |||
self.right_inds_[self.right_inds_ > len(self._inputs)] = len(self._inputs) | |||
# print(self.left_inds_, self.right_inds_) | |||
def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: | |||
def _fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: | |||
""" | |||
Fit model with training data. | |||
Args: | |||
@@ -248,6 +252,7 @@ class UnsupervisedOutlierDetectorBase(UnsupervisedLearnerPrimitiveBase[Inputs, O | |||
if self._fitted: # pragma: no cover | |||
return CallResult(None) | |||
self._training_inputs, self._training_indices = self._get_columns_to_fit(self._inputs, self.hyperparams) | |||
self._input_column_names = self._training_inputs.columns | |||
@@ -271,7 +276,7 @@ class UnsupervisedOutlierDetectorBase(UnsupervisedLearnerPrimitiveBase[Inputs, O | |||
return CallResult(None) | |||
def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: | |||
def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: | |||
""" | |||
Process the testing data. | |||
Args: | |||
@@ -336,7 +341,7 @@ class UnsupervisedOutlierDetectorBase(UnsupervisedLearnerPrimitiveBase[Inputs, O | |||
return CallResult(outputs) | |||
def produce_score(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: | |||
def _produce_score(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: | |||
""" | |||
Process the testing data. | |||
Args: | |||
@@ -688,3 +693,553 @@ class UnsupervisedOutlierDetectorBase(UnsupervisedLearnerPrimitiveBase[Inputs, O | |||
# OutlierDetectorBase.__doc__ = OutlierDetectorBase.__doc__ | |||
class UnsupervisedOutlierDetectorBase2(UnsupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams]): | |||
""" | |||
Parameters | |||
---------- | |||
contamination : float in (0., 0.5), optional (default=0.1) | |||
The amount of contamination of the data set, i.e. | |||
the proportion of outliers in the data set. When fitting this is used | |||
to define the threshold on the decision function. | |||
Attributes | |||
---------- | |||
clf_.decision_scores_ : numpy array of shape (n_samples,) | |||
The outlier scores of the training data. | |||
The higher, the more abnormal. Outliers tend to have higher | |||
scores. This value is available once the detector is | |||
fitted. | |||
clf_.threshold_: float within (0, 1) | |||
For outlier, decision_scores_ more than threshold_. | |||
For inlier, decision_scores_ less than threshold_. | |||
clf_.labels_ : int, either 0 or 1 | |||
The binary labels of the training data. 0 stands for inliers. | |||
and 1 for outliers/anomalies. It is generated by applying. | |||
``threshold_`` on ``decision_scores_``. | |||
left_inds_ : ndarray, | |||
One of the mapping from decision_score to data. | |||
For point outlier detection, left_inds_ exactly equals the index of each data point. | |||
For Collective outlier detection, left_inds_ equals the start index of each subsequence. | |||
left_inds_ : ndarray, | |||
One of the mapping from decision_score to data. | |||
For point outlier detection, left_inds_ exactly equals the index of each data point plus 1. | |||
For Collective outlier detection, left_inds_ equals the ending index of each subsequence. | |||
""" | |||
# probability_score: | |||
# window_size: int | |||
# The moving window size. | |||
__author__ = "DATALAB @Taxes A&M University" | |||
metadata: metadata_base.PrimitiveMetadata = None | |||
def __init__(self, *, | |||
hyperparams: Hyperparams, | |||
random_seed: int = 0, | |||
docker_containers: Dict[str, DockerContainer] = None) -> None: | |||
super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) | |||
self._clf = None | |||
self._clf_fit_parameter = {} | |||
self.primitiveNo = 0 | |||
self.window_size = hyperparams['window_size'] | |||
self.step_size = hyperparams['step_size'] | |||
self.left_inds_ = None | |||
self.right_inds_ = None | |||
self._inputs = None | |||
self._outputs = None | |||
self._training_inputs = None | |||
self._training_outputs = None | |||
self._target_names = None | |||
self._training_indices = None | |||
self._target_column_indices = None | |||
self._target_columns_metadata: List[OrderedDict] = None | |||
self._input_column_names = None | |||
self._fitted = False | |||
# | |||
@abc.abstractmethod | |||
def set_training_data(self, *, inputs: Inputs) -> None: | |||
""" | |||
Set training data for outlier detection. | |||
Args: | |||
inputs: Container DataFrame | |||
Returns: | |||
None | |||
""" | |||
self._inputs = inputs | |||
self._fitted = False | |||
def _set_subseq_inds(self): | |||
self.left_inds_ = getattr(self._clf, 'left_inds_', None) | |||
self.right_inds_ = getattr(self._clf, 'right_inds_', None) | |||
if self.left_inds_ is None or self.right_inds_ is None: | |||
self.left_inds_ = numpy.arange(0, len(self._inputs), self.step_size) | |||
self.right_inds_ = self.left_inds_ + self.window_size | |||
self.right_inds_[self.right_inds_ > len(self._inputs)] = len(self._inputs) | |||
# print(self.left_inds_, self.right_inds_) | |||
def _fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: | |||
""" | |||
Fit model with training data. | |||
Args: | |||
*: Container DataFrame. Time series data up to fit. | |||
Returns: | |||
None | |||
""" | |||
# print('Fit:', self._clf) | |||
if self._fitted: # pragma: no cover | |||
return CallResult(None) | |||
print(self._inputs) | |||
self._training_inputs, self._training_indices = self._get_columns_to_fit(self._inputs, self.hyperparams) | |||
self._input_column_names = self._training_inputs.columns | |||
print(self._training_inputs, self._training_indices) | |||
exit() | |||
if self._training_inputs is None: # pragma: no cover | |||
return CallResult(None) | |||
#print("self._training_indices ", self._training_indices) | |||
if len(self._training_indices) > 0: | |||
# print('Fit: ', self._clf) | |||
# print('Fit: ', self._training_inputs.values.shape) | |||
# print('Fit: ', self._clf.fit(self._training_inputs.values)) | |||
self._clf.fit(X=self._training_inputs.values, **self._clf_fit_parameter) | |||
self._fitted = True | |||
self._set_subseq_inds() | |||
else: # pragma: no cover | |||
if self.hyperparams['error_on_no_input']: | |||
raise RuntimeError("No input columns were selected") | |||
self.logger.warn("No input columns were selected") | |||
return CallResult(None) | |||
def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: | |||
""" | |||
Process the testing data. | |||
Args: | |||
inputs: Container DataFrame. Time series data up to outlier detection. | |||
Returns: | |||
Container DataFrame | |||
1 marks Outliers, 0 marks normal. | |||
""" | |||
if not self._fitted: # pragma: no cover | |||
raise PrimitiveNotFittedError("Primitive not fitted.") | |||
sk_inputs = inputs | |||
if self.hyperparams['use_semantic_types']: | |||
sk_inputs = inputs.iloc[:, self._training_indices] | |||
output_columns = [] | |||
#print("skinputs ", sk_inputs.values) | |||
if len(self._training_indices) > 0: | |||
if self.hyperparams['return_subseq_inds']: | |||
if getattr(self._clf, 'left_inds_', None) is None or getattr(self._clf, 'right_inds_', None) is None: # point OD | |||
pred_label = self._clf.predict(sk_inputs.values) | |||
left_inds_ = numpy.arange(0, len(pred_label), self.step_size) | |||
right_inds_ = left_inds_ + self.window_size | |||
right_inds_[right_inds_ > len(pred_label)] = len(pred_label) | |||
else: | |||
pred_label, left_inds_, right_inds_ = self._clf.predict(sk_inputs.values) | |||
# print(pred_label.shape, left_inds_.shape, right_inds_.shape) | |||
# print(pred_label, left_inds_, right_inds_) | |||
sk_output = numpy.concatenate((numpy.expand_dims(pred_label, axis=1), | |||
numpy.expand_dims(left_inds_, axis=1), | |||
numpy.expand_dims(right_inds_, axis=1)), axis=1) | |||
else: | |||
if getattr(self._clf, 'left_inds_', None) is None or getattr(self._clf, 'right_inds_', None) is None: # point OD | |||
sk_output = self._clf.predict(sk_inputs.values) | |||
else: | |||
sk_output, _, _ = self._clf.predict(sk_inputs.values) | |||
#print("sk output ", sk_output) | |||
if sparse.issparse(sk_output): # pragma: no cover | |||
sk_output = sk_output.toarray() | |||
outputs = self._wrap_predictions(inputs, sk_output) | |||
if len(outputs.columns) == len(self._input_column_names): | |||
outputs.columns = self._input_column_names | |||
output_columns = [outputs] | |||
else: # pragma: no cover | |||
if self.hyperparams['error_on_no_input']: | |||
raise RuntimeError("No input columns were selected") | |||
self.logger.warn("No input columns were selected") | |||
outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'], | |||
add_index_columns=self.hyperparams['add_index_columns'], | |||
inputs=inputs, column_indices=self._training_indices, | |||
columns_list=output_columns) | |||
return CallResult(outputs) | |||
def _produce_score(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: | |||
""" | |||
Process the testing data. | |||
Args: | |||
inputs: Container DataFrame. Time series data up to outlier detection. | |||
Returns: | |||
Container DataFrame | |||
1 marks Outliers, 0 marks normal. | |||
""" | |||
if not self._fitted: # pragma: no cover | |||
raise PrimitiveNotFittedError("Primitive not fitted.") | |||
sk_inputs = inputs | |||
if self.hyperparams['use_semantic_types']: | |||
sk_inputs = inputs.iloc[:, self._training_indices] | |||
output_columns = [] | |||
if len(self._training_indices) > 0: | |||
if self.hyperparams['return_subseq_inds']: | |||
if getattr(self._clf, 'left_inds_', None) is None or getattr(self._clf, 'right_inds_', None) is None: # point OD | |||
pred_score = self._clf.decision_function(sk_inputs.values).ravel() | |||
left_inds_ = numpy.arange(0, len(pred_score), self.step_size) | |||
right_inds_ = left_inds_ + self.window_size | |||
right_inds_[right_inds_ > len(pred_score)] = len(pred_score) | |||
else: | |||
pred_score, left_inds_, right_inds_ = self._clf.decision_function(sk_inputs.values) | |||
# print(pred_score.shape, left_inds_.shape, right_inds_.shape) | |||
sk_output = numpy.concatenate((numpy.expand_dims(pred_score, axis=1), | |||
numpy.expand_dims(left_inds_, axis=1), | |||
numpy.expand_dims(right_inds_, axis=1)), axis=1) | |||
else: | |||
if getattr(self._clf, 'left_inds_', None) is None or getattr(self._clf, 'right_inds_', None) is None: # point OD | |||
sk_output = self._clf.decision_function(sk_inputs.values) | |||
else: | |||
sk_output, _, _ = self._clf.decision_function(sk_inputs.values) | |||
if sparse.issparse(sk_output): # pragma: no cover | |||
sk_output = sk_output.toarray() | |||
outputs = self._wrap_predictions(inputs, sk_output) | |||
if len(outputs.columns) == len(self._input_column_names): | |||
outputs.columns = self._input_column_names | |||
output_columns = [outputs] | |||
else: # pragma: no cover | |||
if self.hyperparams['error_on_no_input']: | |||
raise RuntimeError("No input columns were selected") | |||
self.logger.warn("No input columns were selected") | |||
outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'], | |||
add_index_columns=self.hyperparams['add_index_columns'], | |||
inputs=inputs, column_indices=self._training_indices, | |||
columns_list=output_columns) | |||
return CallResult(outputs) | |||
def get_params(self) -> Params_ODBase: | |||
""" | |||
Return parameters. | |||
Args: | |||
None | |||
Returns: | |||
class Params_ODBase | |||
""" | |||
if not self._fitted: | |||
return Params_ODBase( | |||
# decision_scores_=None, | |||
# threshold_=None, | |||
# labels_=None, | |||
left_inds_=None, | |||
right_inds_=None, | |||
clf_=copy.copy(self._clf), | |||
# Keep previous | |||
input_column_names=self._input_column_names, | |||
training_indices_=self._training_indices, | |||
target_names_=self._target_names, | |||
target_column_indices_=self._target_column_indices, | |||
target_columns_metadata_=self._target_columns_metadata | |||
) | |||
return Params_ODBase( | |||
# decision_scores_=getattr(self._clf, 'decision_scores_', None), | |||
# threshold_=getattr(self._clf, 'threshold_', None), | |||
# labels_=getattr(self._clf, 'labels_', None), | |||
left_inds_=self.left_inds_, # numpy.array(self.left_inds_) | |||
right_inds_=self.right_inds_, # numpy.array(self.right_inds_) | |||
clf_=copy.copy(self._clf), | |||
# Keep previous | |||
input_column_names=self._input_column_names, | |||
training_indices_=self._training_indices, | |||
target_names_=self._target_names, | |||
target_column_indices_=self._target_column_indices, | |||
target_columns_metadata_=self._target_columns_metadata | |||
) | |||
# pass | |||
def set_params(self, *, params: Params_ODBase) -> None: | |||
""" | |||
Set parameters for outlier detection. | |||
Args: | |||
params: class Params_ODBase | |||
Returns: | |||
None | |||
""" | |||
# self._clf.decision_scores_ = params['decision_scores_'] | |||
# self._clf.threshold_ = params['threshold_'] | |||
# self._clf.labels_ = params['labels_'] | |||
self.left_inds_ = params['left_inds_'] | |||
self.right_inds_ = params['right_inds_'] | |||
self._clf = copy.copy(params['clf_']) | |||
# Keep previous | |||
self._input_column_names = params['input_column_names'] | |||
self._training_indices = params['training_indices_'] | |||
self._target_names = params['target_names_'] | |||
self._target_column_indices = params['target_column_indices_'] | |||
self._target_columns_metadata = params['target_columns_metadata_'] | |||
# if params['decision_scores_'] is not None: | |||
# self._fitted = True | |||
# if params['threshold_'] is not None: | |||
# self._fitted = True | |||
# if params['labels_'] is not None: | |||
# self._fitted = True | |||
if params['left_inds_'] is not None: | |||
self._fitted = True | |||
if params['right_inds_'] is not None: | |||
self._fitted = True | |||
@classmethod | |||
def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): # pragma: no cover | |||
""" | |||
Select columns to fit. | |||
Args: | |||
inputs: Container DataFrame | |||
hyperparams: d3m.metadata.hyperparams.Hyperparams | |||
Returns: | |||
list | |||
""" | |||
#print("*******************get columns to fit***********") | |||
if not hyperparams['use_semantic_types']: | |||
return inputs, list(range(len(inputs.columns))) | |||
inputs_metadata = inputs.metadata | |||
#print("inputs_metadata ", inputs_metadata) | |||
def can_produce_column(column_index: int) -> bool: | |||
return cls._can_produce_column(inputs_metadata, column_index, hyperparams) | |||
columns_to_produce, columns_not_to_produce = base_utils.get_columns_to_use(inputs_metadata, | |||
use_columns=hyperparams['use_columns'], | |||
exclude_columns=hyperparams['exclude_columns'], | |||
can_use_column=can_produce_column) | |||
#print("columns_to_produce ", columns_to_produce) | |||
return inputs.iloc[:, columns_to_produce], columns_to_produce | |||
# return columns_to_produce | |||
@classmethod | |||
def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, | |||
hyperparams: Hyperparams) -> bool: # pragma: no cover | |||
""" | |||
Output whether a column can be processed. | |||
Args: | |||
inputs_metadata: d3m.metadata.base.DataMetadata | |||
column_index: int | |||
Returns: | |||
bool | |||
""" | |||
column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)) | |||
#print("column metadasta ", ) | |||
accepted_structural_types = (int, float, numpy.integer, numpy.float64) | |||
accepted_semantic_types = set() | |||
accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/Attribute") | |||
if not issubclass(column_metadata['structural_type'], accepted_structural_types): | |||
return False | |||
semantic_types = set(column_metadata.get('semantic_types', [])) | |||
#print("semantic_types ", column_metadata.get('semantic_types')) | |||
if len(semantic_types) == 0: | |||
cls.logger.warning("No semantic types found in column metadata") | |||
return False | |||
# Making sure all accepted_semantic_types are available in semantic_types | |||
if len(accepted_semantic_types - semantic_types) == 0: | |||
return True | |||
return False | |||
@classmethod | |||
def _get_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams) -> List[OrderedDict]: # pragma: no cover | |||
""" | |||
Output metadata of selected columns. | |||
Args: | |||
outputs_metadata: metadata_base.DataMetadata | |||
hyperparams: d3m.metadata.hyperparams.Hyperparams | |||
Returns: | |||
d3m.metadata.base.DataMetadata | |||
""" | |||
outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] | |||
target_columns_metadata: List[OrderedDict] = [] | |||
for column_index in range(outputs_length): | |||
column_metadata = OrderedDict(outputs_metadata.query_column(column_index)) | |||
# Update semantic types and prepare it for predicted targets. | |||
semantic_types = set(column_metadata.get('semantic_types', [])) | |||
semantic_types_to_remove = set([]) | |||
add_semantic_types = [] | |||
add_semantic_types.add(hyperparams["return_semantic_type"]) | |||
semantic_types = semantic_types - semantic_types_to_remove | |||
semantic_types = semantic_types.union(add_semantic_types) | |||
column_metadata['semantic_types'] = list(semantic_types) | |||
target_columns_metadata.append(column_metadata) | |||
return target_columns_metadata | |||
@classmethod | |||
def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs], | |||
target_columns_metadata: List[OrderedDict]) -> metadata_base.DataMetadata: # pragma: no cover | |||
""" | |||
Updata metadata for selected columns. | |||
Args: | |||
inputs_metadata: metadata_base.DataMetadata | |||
outputs: Container Dataframe | |||
target_columns_metadata: list | |||
Returns: | |||
d3m.metadata.base.DataMetadata | |||
""" | |||
outputs_metadata = metadata_base.DataMetadata().generate(value=outputs) | |||
for column_index, column_metadata in enumerate(target_columns_metadata): | |||
column_metadata.pop("structural_type", None) | |||
outputs_metadata = outputs_metadata.update_column(column_index, column_metadata) | |||
return outputs_metadata | |||
def _wrap_predictions(self, inputs: Inputs, predictions: ndarray) -> Outputs: # pragma: no cover | |||
""" | |||
Wrap predictions into dataframe | |||
Args: | |||
inputs: Container Dataframe | |||
predictions: array-like data (n_samples, n_features) | |||
Returns: | |||
Dataframe | |||
""" | |||
outputs = d3m_dataframe(predictions, generate_metadata=True) | |||
# target_columns_metadata = self._copy_inputs_metadata(inputs.metadata, self._training_indices, outputs.metadata, | |||
# self.hyperparams) | |||
target_columns_metadata = self._add_target_columns_metadata(outputs.metadata, self.hyperparams, self.primitiveNo) | |||
outputs.metadata = self._update_predictions_metadata(inputs.metadata, outputs, target_columns_metadata) | |||
# print(outputs.metadata.to_internal_simple_structure()) | |||
return outputs | |||
@classmethod | |||
def _add_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams, primitiveNo): # pragma: no cover | |||
""" | |||
Add target columns metadata | |||
Args: | |||
outputs_metadata: metadata.base.DataMetadata | |||
hyperparams: d3m.metadata.hyperparams.Hyperparams | |||
Returns: | |||
List[OrderedDict] | |||
""" | |||
outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] | |||
target_columns_metadata: List[OrderedDict] = [] | |||
for column_index in range(outputs_length): | |||
column_name = "{0}{1}_{2}".format(cls.metadata.query()['name'], primitiveNo, column_index) | |||
column_metadata = OrderedDict() | |||
semantic_types = set() | |||
semantic_types.add(hyperparams["return_semantic_type"]) | |||
column_metadata['semantic_types'] = list(semantic_types) | |||
column_metadata["name"] = str(column_name) | |||
target_columns_metadata.append(column_metadata) | |||
return target_columns_metadata | |||
@classmethod | |||
def _copy_inputs_metadata(cls, inputs_metadata: metadata_base.DataMetadata, input_indices: List[int], | |||
outputs_metadata: metadata_base.DataMetadata, hyperparams): # pragma: no cover | |||
""" | |||
Updata metadata for selected columns. | |||
Args: | |||
inputs_metadata: metadata.base.DataMetadata | |||
input_indices: list | |||
outputs_metadata: metadata.base.DataMetadata | |||
hyperparams: d3m.metadata.hyperparams.Hyperparams | |||
Returns: | |||
d3m.metadata.base.DataMetadata | |||
""" | |||
outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] | |||
target_columns_metadata: List[OrderedDict] = [] | |||
for column_index in input_indices: | |||
column_name = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)).get("name") | |||
if column_name is None: | |||
column_name = "output_{}".format(column_index) | |||
column_metadata = OrderedDict(inputs_metadata.query_column(column_index)) | |||
semantic_types = set(column_metadata.get('semantic_types', [])) | |||
semantic_types_to_remove = set([]) | |||
add_semantic_types = set() | |||
add_semantic_types.add(hyperparams["return_semantic_type"]) | |||
semantic_types = semantic_types - semantic_types_to_remove | |||
semantic_types = semantic_types.union(add_semantic_types) | |||
column_metadata['semantic_types'] = list(semantic_types) | |||
column_metadata["name"] = str(column_name) | |||
target_columns_metadata.append(column_metadata) | |||
# If outputs has more columns than index, add Attribute Type to all remaining | |||
if outputs_length > len(input_indices): | |||
for column_index in range(len(input_indices), outputs_length): | |||
column_metadata = OrderedDict() | |||
semantic_types = set() | |||
semantic_types.add(hyperparams["return_semantic_type"]) | |||
column_name = "output_{}".format(column_index) | |||
column_metadata["semantic_types"] = list(semantic_types) | |||
column_metadata["name"] = str(column_name) | |||
target_columns_metadata.append(column_metadata) | |||
return target_columns_metadata |
@@ -25,7 +25,7 @@ from d3m.primitive_interfaces import base, transformer | |||
from d3m.metadata import base as metadata_base, hyperparams | |||
from d3m.metadata import hyperparams, params, base as metadata_base | |||
from d3m.primitive_interfaces.base import CallResult, DockerContainer | |||
from d3m.primitive_interfaces.unsupervised_learning import UnsupervisedLearnerPrimitiveBase | |||
from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase | |||
from statsmodels.tsa.stattools import acf | |||
@@ -186,7 +186,7 @@ class ACF: | |||
class AutoCorrelationPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||
class AutoCorrelationPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||
""" | |||
A primitive that performs autocorrelation on a DataFrame | |||
acf() function documentation: https://www.statsmodels.org/dev/generated/statsmodels.tsa.stattools.acf.html | |||
@@ -233,26 +233,8 @@ class AutoCorrelationPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outp | |||
'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'AutocorrelationPrimitive')), | |||
}) | |||
def __init__(self, *, | |||
hyperparams: Hyperparams, # | |||
random_seed: int = 0, | |||
docker_containers: Dict[str, DockerContainer] = None) -> None: | |||
super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) | |||
self._clf = ACF(unbiased = hyperparams['unbiased'], | |||
nlags = hyperparams['nlags'], | |||
qstat = hyperparams['qstat'], | |||
fft = hyperparams['fft'], | |||
alpha = hyperparams['alpha'], | |||
missing = hyperparams['missing'] | |||
) | |||
self.primitiveNo = PrimitiveCount.primitive_no | |||
PrimitiveCount.primitive_no+=1 | |||
def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||
def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||
""" | |||
Process the testing data. | |||
Args: | |||
@@ -261,6 +243,16 @@ class AutoCorrelationPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outp | |||
Returns: | |||
Container DataFrame after AutoCorrelation. | |||
""" | |||
self._clf = ACF(unbiased = self.hyperparams['unbiased'], | |||
nlags = self.hyperparams['nlags'], | |||
qstat = self.hyperparams['qstat'], | |||
fft = self.hyperparams['fft'], | |||
alpha = self.hyperparams['alpha'], | |||
missing = self.hyperparams['missing'] | |||
) | |||
self.primitiveNo = PrimitiveCount.primitive_no | |||
PrimitiveCount.primitive_no+=1 | |||
# Get cols to fit. | |||
self._fitted = False | |||
@@ -20,6 +20,7 @@ from d3m import utils | |||
from d3m.base import utils as base_utils | |||
from d3m.exceptions import PrimitiveNotFittedError | |||
from d3m.primitive_interfaces.base import CallResult, DockerContainer | |||
from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase | |||
import os.path | |||
@@ -118,7 +119,7 @@ class Hyperparams(hyperparams.Hyperparams): | |||
) | |||
class BKFilterPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||
class BKFilterPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||
""" | |||
Filter a time series using the Baxter-King bandpass filter. | |||
@@ -173,7 +174,7 @@ class BKFilterPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hy | |||
}) | |||
def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: | |||
def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: | |||
""" | |||
Process the testing data. | |||
Args: | |||
@@ -14,6 +14,7 @@ import math | |||
from scipy.fft import dct | |||
from collections import OrderedDict | |||
from typing import cast, Dict, List, Union, Sequence, Optional, Tuple | |||
from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase | |||
from scipy import sparse | |||
@@ -160,7 +161,7 @@ class DCT: | |||
class DiscreteCosineTransformPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||
class DiscreteCosineTransformPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||
""" | |||
Compute the 1-D discrete Cosine Transform. | |||
Return the Discrete Cosine Transform of arbitrary type sequence x. | |||
@@ -242,7 +243,7 @@ class DiscreteCosineTransformPrimitive(transformer.TransformerPrimitiveBase[Inpu | |||
workers = self.hyperparams['workers'] | |||
) | |||
def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||
def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||
""" | |||
Args: | |||
@@ -17,6 +17,7 @@ from typing import cast, Dict, List, Union, Sequence, Optional, Tuple | |||
from scipy import sparse | |||
from numpy import ndarray | |||
from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase | |||
__all__ = ('FastFourierTransformPrimitive',) | |||
@@ -157,7 +158,7 @@ class FFT: | |||
class FastFourierTransformPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||
class FastFourierTransformPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||
""" | |||
Compute the 1-D discrete Fourier Transform. | |||
This function computes the 1-D n-point discrete Fourier Transform (DFT) with the efficient Fast Fourier Transform (FFT) algorithm | |||
@@ -232,7 +233,7 @@ class FastFourierTransformPrimitive(transformer.TransformerPrimitiveBase[Inputs, | |||
workers = self.hyperparams['workers'] | |||
) | |||
def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||
def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||
""" | |||
Args: | |||
@@ -21,6 +21,7 @@ from d3m.base import utils as base_utils | |||
from d3m.exceptions import PrimitiveNotFittedError | |||
from d3m.primitive_interfaces.base import CallResult, DockerContainer | |||
from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase | |||
import statsmodels.api as sm | |||
@@ -101,7 +102,7 @@ class Hyperparams(hyperparams.Hyperparams): | |||
) | |||
class HPFilterPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||
class HPFilterPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||
""" | |||
Filter a time series using the Hodrick-Prescott filter. | |||
@@ -150,7 +151,7 @@ class HPFilterPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hy | |||
}) | |||
def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: | |||
def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: | |||
""" | |||
Process the testing data. | |||
Args: | |||
@@ -15,6 +15,7 @@ import numpy | |||
from numpy import ndarray | |||
import warnings | |||
from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase | |||
__all__ = ('NonNegativeMatrixFactorizationPrimitive',) | |||
@@ -211,7 +212,7 @@ class NMF: | |||
return result | |||
class NonNegativeMatrixFactorizationPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||
class NonNegativeMatrixFactorizationPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||
""" | |||
Calculates Latent factors of a given matrix of timeseries data | |||
@@ -299,7 +300,7 @@ class NonNegativeMatrixFactorizationPrimitive(transformer.TransformerPrimitiveBa | |||
learning_rate = self.hyperparams['learning_rate'], | |||
) | |||
def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||
def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||
assert isinstance(inputs, container.DataFrame), type(dataframe) | |||
@@ -23,6 +23,7 @@ from d3m.metadata import hyperparams, params, base as metadata_base | |||
from d3m.base import utils as base_utils | |||
from d3m.exceptions import PrimitiveNotFittedError | |||
from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase | |||
__all__ = ('SpectralResidualTransformPrimitive',) | |||
@@ -87,7 +88,7 @@ class Hyperparams(hyperparams.Hyperparams): | |||
class SpectralResidualTransformPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||
class SpectralResidualTransformPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||
""" | |||
Primitive to find Spectral Residual Transform of time series | |||
""" | |||
@@ -110,7 +111,7 @@ class SpectralResidualTransformPrimitive(transformer.TransformerPrimitiveBase[In | |||
'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'SpectralResidualTransformPrimitive')), | |||
}) | |||
def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||
def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||
""" | |||
Args: | |||
@@ -20,6 +20,7 @@ from d3m.primitive_interfaces import base, transformer | |||
from d3m.container import DataFrame as d3m_dataframe | |||
from d3m.metadata import hyperparams, params, base as metadata_base | |||
from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase | |||
from d3m.base import utils as base_utils | |||
from d3m.exceptions import PrimitiveNotFittedError | |||
@@ -87,7 +88,7 @@ class Hyperparams(hyperparams.Hyperparams): | |||
class StatisticalAbsEnergyPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||
class StatisticalAbsEnergyPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||
""" | |||
Primitive to find abs_energy of time series | |||
""" | |||
@@ -112,7 +113,7 @@ class StatisticalAbsEnergyPrimitive(transformer.TransformerPrimitiveBase[Inputs, | |||
}) | |||
def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||
def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||
""" | |||
Args: | |||
@@ -23,6 +23,7 @@ from d3m.metadata import hyperparams, params, base as metadata_base | |||
from d3m.base import utils as base_utils | |||
from d3m.exceptions import PrimitiveNotFittedError | |||
from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase | |||
__all__ = ('StatisticalAbsSumPrimitive',) | |||
@@ -87,7 +88,7 @@ class Hyperparams(hyperparams.Hyperparams): | |||
class StatisticalAbsSumPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||
class StatisticalAbsSumPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||
""" | |||
Primitive to find abs_sum of time series | |||
""" | |||
@@ -109,7 +110,7 @@ class StatisticalAbsSumPrimitive(transformer.TransformerPrimitiveBase[Inputs, Ou | |||
'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'StatisticalAbsSumPrimitive')), | |||
}) | |||
def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||
def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||
""" | |||
Args: | |||
@@ -24,6 +24,7 @@ from d3m.metadata import hyperparams, params, base as metadata_base | |||
from d3m.base import utils as base_utils | |||
from d3m.exceptions import PrimitiveNotFittedError | |||
from d3m.exceptions import UnexpectedValueError | |||
from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase | |||
__all__ = ('StatisticalGmeanPrimitive',) | |||
@@ -88,7 +89,7 @@ class Hyperparams(hyperparams.Hyperparams): | |||
class StatisticalGmeanPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||
class StatisticalGmeanPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||
""" | |||
Primitive to find gmean of time series . | |||
Will only take positive values as inputs . | |||
@@ -111,7 +112,7 @@ class StatisticalGmeanPrimitive(transformer.TransformerPrimitiveBase[Inputs, Out | |||
'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'StatisticalGmeanPrimitive')), | |||
}) | |||
def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||
def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||
""" | |||
Args: | |||
@@ -24,6 +24,7 @@ from d3m.metadata import hyperparams, params, base as metadata_base | |||
from d3m.base import utils as base_utils | |||
from d3m.exceptions import PrimitiveNotFittedError | |||
from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase | |||
__all__ = ('StatisticalHmeanPrimitive',) | |||
@@ -88,7 +89,7 @@ class Hyperparams(hyperparams.Hyperparams): | |||
class StatisticalHmeanPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||
class StatisticalHmeanPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||
""" | |||
Primitive to find Harmonic mean of time series | |||
Harmonic mean only defined if all elements greater than or equal to zero | |||
@@ -113,7 +114,7 @@ class StatisticalHmeanPrimitive(transformer.TransformerPrimitiveBase[Inputs, Out | |||
'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'StatisticalHmeanPrimitive')), | |||
}) | |||
def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||
def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||
""" | |||
Args: | |||
@@ -24,6 +24,7 @@ from d3m.metadata import hyperparams, params, base as metadata_base | |||
from d3m.base import utils as base_utils | |||
from d3m.exceptions import PrimitiveNotFittedError | |||
from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase | |||
__all__ = ('StatisticalKurtosisPrimitive',) | |||
@@ -88,7 +89,7 @@ class Hyperparams(hyperparams.Hyperparams): | |||
class StatisticalKurtosisPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||
class StatisticalKurtosisPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||
""" | |||
Primitive to find kurtosis of time series | |||
""" | |||
@@ -110,7 +111,7 @@ class StatisticalKurtosisPrimitive(transformer.TransformerPrimitiveBase[Inputs, | |||
'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'StatisticalKurtosisPrimitive')), | |||
}) | |||
def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||
def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||
""" | |||
Args: | |||
@@ -9,11 +9,11 @@ from numpy import ndarray | |||
from collections import OrderedDict | |||
from scipy import sparse | |||
import os | |||
import uuid | |||
import numpy | |||
import typing | |||
import time | |||
import uuid | |||
from d3m import container | |||
from d3m.primitive_interfaces import base, transformer | |||
@@ -23,6 +23,7 @@ from d3m.metadata import hyperparams, params, base as metadata_base | |||
from d3m.base import utils as base_utils | |||
from d3m.exceptions import PrimitiveNotFittedError | |||
from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase | |||
__all__ = ('StatisticalMaximumPrimitive',) | |||
@@ -87,7 +88,7 @@ class Hyperparams(hyperparams.Hyperparams): | |||
class StatisticalMaximumPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||
class StatisticalMaximumPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||
""" | |||
Primitive to find maximum of time series | |||
""" | |||
@@ -110,7 +111,7 @@ class StatisticalMaximumPrimitive(transformer.TransformerPrimitiveBase[Inputs, O | |||
'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'StatisticalMaximumPrimitive')), | |||
}) | |||
def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||
def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||
""" | |||
Args: | |||
@@ -159,11 +160,11 @@ class StatisticalMaximumPrimitive(transformer.TransformerPrimitiveBase[Inputs, O | |||
if self.hyperparams['error_on_no_input']: | |||
raise RuntimeError("No input columns were selected") | |||
self.logger.warn("No input columns were selected") | |||
outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'], | |||
add_index_columns=self.hyperparams['add_index_columns'], | |||
inputs=inputs, column_indices=self._training_indices, | |||
columns_list=output_columns) | |||
self.logger.info('Statistical Maximum Primitive returned') | |||
return base.CallResult(outputs) | |||
@@ -314,6 +315,6 @@ class StatisticalMaximumPrimitive(transformer.TransformerPrimitiveBase[Inputs, O | |||
sequence = column_value[iter-window_size+1:iter+1] | |||
column_maximum[iter] = np.max(sequence) | |||
column_maximum[:window_size-1] = column_maximum[window_size-1] | |||
transformed_X[column + "_maximum"] = column_maximum | |||
transformed_X[str(column) + "_maximum"] = column_maximum | |||
return transformed_X |
@@ -23,6 +23,7 @@ from d3m.metadata import hyperparams, params, base as metadata_base | |||
from d3m.base import utils as base_utils | |||
from d3m.exceptions import PrimitiveNotFittedError | |||
from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase | |||
__all__ = ('StatisticalMeanPrimitive',) | |||
@@ -87,7 +88,7 @@ class Hyperparams(hyperparams.Hyperparams): | |||
class StatisticalMeanPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||
class StatisticalMeanPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||
""" | |||
Primitive to find mean of time series | |||
""" | |||
@@ -110,7 +111,7 @@ class StatisticalMeanPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outp | |||
'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'StatisticalMeanPrimitive')), | |||
}) | |||
def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||
def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||
""" | |||
Args: | |||
@@ -23,6 +23,7 @@ from d3m.metadata import hyperparams, params, base as metadata_base | |||
from d3m.base import utils as base_utils | |||
from d3m.exceptions import PrimitiveNotFittedError | |||
from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase | |||
__all__ = ('StatisticalMeanAbsPrimitive',) | |||
@@ -87,7 +88,7 @@ class Hyperparams(hyperparams.Hyperparams): | |||
class StatisticalMeanAbsPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||
class StatisticalMeanAbsPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||
""" | |||
Primitive to find mean_abs of time series | |||
""" | |||
@@ -109,7 +110,7 @@ class StatisticalMeanAbsPrimitive(transformer.TransformerPrimitiveBase[Inputs, O | |||
'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'StatisticalMeanAbsPrimitive')), | |||
}) | |||
def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||
def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||
""" | |||
Args: | |||
@@ -23,6 +23,7 @@ from d3m.metadata import hyperparams, params, base as metadata_base | |||
from d3m.base import utils as base_utils | |||
from d3m.exceptions import PrimitiveNotFittedError | |||
from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase | |||
__all__ = ('StatisticalMeanAbsTemporalDerivativePrimitive',) | |||
@@ -87,7 +88,7 @@ class Hyperparams(hyperparams.Hyperparams): | |||
class StatisticalMeanAbsTemporalDerivativePrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||
class StatisticalMeanAbsTemporalDerivativePrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||
""" | |||
Primitive to find mean_abs_temporal_derivative of time series | |||
""" | |||
@@ -110,7 +111,7 @@ class StatisticalMeanAbsTemporalDerivativePrimitive(transformer.TransformerPrimi | |||
'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'StatisticalMeanAbsTemporalDerivativePrimitive')), | |||
}) | |||
def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||
def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||
""" | |||
Args: | |||
@@ -23,6 +23,7 @@ from d3m.metadata import hyperparams, params, base as metadata_base | |||
from d3m.base import utils as base_utils | |||
from d3m.exceptions import PrimitiveNotFittedError | |||
from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase | |||
__all__ = ('StatisticalMeanTemporalDerivativePrimitive',) | |||
@@ -87,7 +88,7 @@ class Hyperparams(hyperparams.Hyperparams): | |||
class StatisticalMeanTemporalDerivativePrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||
class StatisticalMeanTemporalDerivativePrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||
""" | |||
Primitive to find mean_temporal_derivative of time series | |||
""" | |||
@@ -110,7 +111,7 @@ class StatisticalMeanTemporalDerivativePrimitive(transformer.TransformerPrimitiv | |||
'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'StatisticalMeanTemporalDerivativePrimitive')), | |||
}) | |||
def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||
def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||
""" | |||
Args: | |||
@@ -23,6 +23,7 @@ from d3m.metadata import hyperparams, params, base as metadata_base | |||
from d3m.base import utils as base_utils | |||
from d3m.exceptions import PrimitiveNotFittedError | |||
from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase | |||
__all__ = ('StatisticalMedianPrimitive',) | |||
@@ -87,7 +88,7 @@ class Hyperparams(hyperparams.Hyperparams): | |||
class StatisticalMedianPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||
class StatisticalMedianPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||
""" | |||
Primitive to find median of time series | |||
""" | |||
@@ -110,7 +111,7 @@ class StatisticalMedianPrimitive(transformer.TransformerPrimitiveBase[Inputs, Ou | |||
}) | |||
def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||
def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||
""" | |||
Args: | |||
@@ -24,6 +24,7 @@ from d3m.metadata import hyperparams, params, base as metadata_base | |||
from d3m.base import utils as base_utils | |||
from d3m.exceptions import PrimitiveNotFittedError | |||
from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase | |||
__all__ = ('StatisticalMedianAbsoluteDeviationPrimitive',) | |||
@@ -88,7 +89,7 @@ class Hyperparams(hyperparams.Hyperparams): | |||
class StatisticalMedianAbsoluteDeviationPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||
class StatisticalMedianAbsoluteDeviationPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||
""" | |||
Primitive to find median_absolute_deviation of time series | |||
""" | |||
@@ -111,7 +112,7 @@ class StatisticalMedianAbsoluteDeviationPrimitive(transformer.TransformerPrimiti | |||
'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'StatisticalMedianAbsoluteDeviationPrimitive')), | |||
}) | |||
def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||
def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||
""" | |||
Args: | |||
@@ -23,6 +23,7 @@ from d3m.metadata import hyperparams, params, base as metadata_base | |||
from d3m.base import utils as base_utils | |||
from d3m.exceptions import PrimitiveNotFittedError | |||
from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase | |||
__all__ = ('StatisticalMinimumPrimitive',) | |||
@@ -87,7 +88,7 @@ class Hyperparams(hyperparams.Hyperparams): | |||
class StatisticalMinimumPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||
class StatisticalMinimumPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||
""" | |||
Primitive to find minimum of time series | |||
""" | |||
@@ -110,7 +111,7 @@ class StatisticalMinimumPrimitive(transformer.TransformerPrimitiveBase[Inputs, O | |||
'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'StatisticalMinimumPrimitive')), | |||
}) | |||
def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||
def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||
""" | |||
Args: | |||
@@ -24,6 +24,7 @@ from d3m.metadata import hyperparams, params, base as metadata_base | |||
from d3m.base import utils as base_utils | |||
from d3m.exceptions import PrimitiveNotFittedError | |||
from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase | |||
__all__ = ('StatisticalSkewPrimitive',) | |||
@@ -88,7 +89,7 @@ class Hyperparams(hyperparams.Hyperparams): | |||
class StatisticalSkewPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||
class StatisticalSkewPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||
""" | |||
Primitive to find skew of time series | |||
""" | |||
@@ -111,7 +112,7 @@ class StatisticalSkewPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outp | |||
'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'StatisticalSkewPrimitive')), | |||
}) | |||
def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||
def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||
""" | |||
Args: | |||
@@ -23,6 +23,7 @@ from d3m.metadata import hyperparams, params, base as metadata_base | |||
from d3m.base import utils as base_utils | |||
from d3m.exceptions import PrimitiveNotFittedError | |||
from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase | |||
__all__ = ('StatisticalStdPrimitive',) | |||
@@ -87,7 +88,7 @@ class Hyperparams(hyperparams.Hyperparams): | |||
class StatisticalStdPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||
class StatisticalStdPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||
""" | |||
Primitive to find std of time series | |||
""" | |||
@@ -110,7 +111,7 @@ class StatisticalStdPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outpu | |||
'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'StatisticalStdPrimitive')), | |||
}) | |||
def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||
def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||
""" | |||
Args: | |||
@@ -23,6 +23,7 @@ from d3m.metadata import hyperparams, params, base as metadata_base | |||
from d3m.base import utils as base_utils | |||
from d3m.exceptions import PrimitiveNotFittedError | |||
from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase | |||
__all__ = ('StatisticalVarPrimitive',) | |||
@@ -87,7 +88,7 @@ class Hyperparams(hyperparams.Hyperparams): | |||
class StatisticalVarPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||
class StatisticalVarPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||
""" | |||
Primitive to find var of time series | |||
""" | |||
@@ -109,7 +110,7 @@ class StatisticalVarPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outpu | |||
'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'StatisticalVarPrimitive')), | |||
}) | |||
def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||
def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||
""" | |||
Args: | |||
@@ -24,6 +24,7 @@ from d3m.metadata import hyperparams, params, base as metadata_base | |||
from d3m.base import utils as base_utils | |||
from d3m.exceptions import PrimitiveNotFittedError | |||
from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase | |||
__all__ = ('StatisticalVariationPrimitive',) | |||
@@ -88,7 +89,7 @@ class Hyperparams(hyperparams.Hyperparams): | |||
class StatisticalVariationPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||
class StatisticalVariationPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||
""" | |||
Primitive to find variation of time series | |||
""" | |||
@@ -112,7 +113,7 @@ class StatisticalVariationPrimitive(transformer.TransformerPrimitiveBase[Inputs, | |||
}) | |||
def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||
def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||
""" | |||
Args: | |||
@@ -23,6 +23,7 @@ from d3m.metadata import hyperparams, params, base as metadata_base | |||
from d3m.base import utils as base_utils | |||
from d3m.exceptions import PrimitiveNotFittedError | |||
from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase | |||
__all__ = ('StatisticalVecSumPrimitive',) | |||
@@ -87,7 +88,7 @@ class Hyperparams(hyperparams.Hyperparams): | |||
class StatisticalVecSumPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||
class StatisticalVecSumPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||
""" | |||
Primitive to find vec_sum of time series | |||
""" | |||
@@ -110,7 +111,7 @@ class StatisticalVecSumPrimitive(transformer.TransformerPrimitiveBase[Inputs, Ou | |||
'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'StatisticalVecSumPrimitive')), | |||
}) | |||
def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||
def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||
""" | |||
Args: | |||
@@ -23,6 +23,7 @@ from d3m.metadata import hyperparams, params, base as metadata_base | |||
from d3m.base import utils as base_utils | |||
from d3m.exceptions import PrimitiveNotFittedError | |||
from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase | |||
__all__ = ('StatisticalWillisonAmplitudePrimitive',) | |||
@@ -91,7 +92,7 @@ class Hyperparams(hyperparams.Hyperparams): | |||
class StatisticalWillisonAmplitudePrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||
class StatisticalWillisonAmplitudePrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||
""" | |||
Primitive to find willison amplitude of time series | |||
""" | |||
@@ -114,7 +115,7 @@ class StatisticalWillisonAmplitudePrimitive(transformer.TransformerPrimitiveBase | |||
'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'StatisticalWillisonAmplitudePrimitive')), | |||
}) | |||
def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||
def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||
""" | |||
Args: | |||
@@ -23,6 +23,7 @@ from d3m.metadata import hyperparams, params, base as metadata_base | |||
from d3m.base import utils as base_utils | |||
from d3m.exceptions import PrimitiveNotFittedError | |||
from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase | |||
__all__ = ('StatisticalZeroCrossingPrimitive',) | |||
@@ -83,7 +84,7 @@ class Hyperparams(hyperparams.Hyperparams): | |||
class StatisticalZeroCrossingPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||
class StatisticalZeroCrossingPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||
""" | |||
Primitive to find zero_crossing of time series. A column indicating zero crossing on ith row . 1 indicates crossing 0 is for normal | |||
""" | |||
@@ -105,7 +106,7 @@ class StatisticalZeroCrossingPrimitive(transformer.TransformerPrimitiveBase[Inpu | |||
'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'StatisticalZeroCrossingPrimitive')), | |||
}) | |||
def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||
def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||
""" | |||
Args: | |||
@@ -22,6 +22,7 @@ from d3m.exceptions import PrimitiveNotFittedError | |||
from d3m.primitive_interfaces.base import CallResult, DockerContainer | |||
from d3m.primitive_interfaces import base, transformer | |||
# from d3m.primitive_interfaces.unsupervised_learning import UnsupervisedLearnerPrimitiveBase | |||
from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase | |||
Inputs = d3m_dataframe | |||
@@ -161,7 +162,7 @@ class Hyperparams(hyperparams.Hyperparams): | |||
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] | |||
) | |||
class TRMFPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||
class TRMFPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||
"""Temporal Regularized Matrix Factorization. | |||
Parameters | |||
@@ -241,7 +242,7 @@ class TRMFPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperp | |||
}) | |||
def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: | |||
def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: | |||
""" | |||
Process the testing data. | |||
Args: | |||
@@ -19,6 +19,7 @@ from collections import OrderedDict | |||
from scipy import sparse | |||
import logging | |||
import uuid | |||
from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase | |||
__all__ = ('WaveletTransformPrimitive',) | |||
@@ -148,7 +149,7 @@ class Hyperparams(hyperparams.Hyperparams): | |||
) | |||
class WaveletTransformPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||
class WaveletTransformPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||
""" | |||
A primitive of Multilevel 1D Discrete Wavelet Transform of data. | |||
See `PyWavelet documentation <https://pywavelets.readthedocs.io/en/latest/ref/>`_ for details. | |||
@@ -203,7 +204,7 @@ class WaveletTransformPrimitive(transformer.TransformerPrimitiveBase[Inputs, Out | |||
) | |||
def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||
def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||
""" | |||
Process the testing data. | |||
Args: | |||
@@ -126,7 +126,7 @@ class ABODTest(unittest.TestCase): | |||
'selector': ['__ALL_ELEMENTS__', 0], | |||
'metadata': { | |||
'name': 'Angle-base Outlier Detection Primitive0_0', | |||
'structural_type': 'numpy.float64', | |||
'structural_type': 'numpy.int64', | |||
'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute'] | |||
}, | |||
}]) | |||
@@ -63,8 +63,6 @@ class HBOSTest(unittest.TestCase): | |||
primitive.fit() | |||
new_main = primitive.produce(inputs=main).value | |||
new_main_score = primitive.produce_score(inputs=main).value | |||
print(new_main) | |||
print(new_main_score) | |||
self.assertEqual(utils.to_json_structure(new_main.metadata.to_internal_simple_structure()), [{ | |||
@@ -125,7 +123,7 @@ class HBOSTest(unittest.TestCase): | |||
'selector': ['__ALL_ELEMENTS__', 0], | |||
'metadata': { | |||
'name': 'HBOS0_0', | |||
'structural_type': 'numpy.float64', | |||
'structural_type': 'numpy.int64', | |||
'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute'] | |||
}, | |||
}]) | |||
@@ -5,7 +5,7 @@ from d3m.metadata import base as metadata_base | |||
from tods.detection_algorithm.Telemanom import TelemanomPrimitive | |||
class SODTest(unittest.TestCase): | |||
class TelemanomTest(unittest.TestCase): | |||
def test_basic(self): | |||
self.maxDiff = None | |||
main = container.DataFrame({'a': [1., 2., 3., 4.,5,6,7,8,9], 'b': [2., 3., 4., 5.,6,7,8,9,10], 'c': [3., 4., 5., 6.,7,8,9,10,11]}, | |||
@@ -91,21 +91,21 @@ class SODTest(unittest.TestCase): | |||
'selector': ['__ALL_ELEMENTS__', 0], | |||
'metadata': { | |||
'name': 'Telemanom0_0', | |||
'structural_type': 'numpy.float64', | |||
'structural_type': 'numpy.int64', | |||
'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute'], | |||
}, | |||
}, { | |||
'selector': ['__ALL_ELEMENTS__', 1], | |||
'metadata': { | |||
'structural_type': 'numpy.float64', | |||
'name': 'Telemanom0_1', | |||
'structural_type': 'numpy.int64', | |||
'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute'], | |||
}, | |||
}, { | |||
'selector': ['__ALL_ELEMENTS__', 2], | |||
'metadata': { | |||
'structural_type': 'numpy.float64', | |||
'name': 'Telemanom0_2', | |||
'structural_type': 'numpy.int64', | |||
'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute'], | |||
} | |||
}]) | |||
@@ -66,7 +66,7 @@ class AutoCorrelationTestCase(unittest.TestCase): | |||
hyperparams_class = AutoCorrelation.AutoCorrelationPrimitive.metadata.get_hyperparams().defaults() | |||
hyperparams_class = hyperparams_class.replace({'nlags': 2}) | |||
primitive = AutoCorrelation.AutoCorrelationPrimitive(hyperparams=hyperparams_class) | |||
new_main = primitive.produce(inputs=main).value | |||
new_main = primitive._produce(inputs=main).value | |||
print(new_main) | |||
# new_main_drop = new_main['value_acf'] | |||
@@ -54,7 +54,7 @@ class BKFilterTest(unittest.TestCase): | |||
hyperparams_class = BKFilter.BKFilterPrimitive.metadata.get_hyperparams() | |||
primitive = BKFilter.BKFilterPrimitive(hyperparams=hyperparams_class.defaults()) | |||
new_main = primitive.produce(inputs=main).value | |||
new_main = primitive._produce(inputs=main).value | |||
print(new_main) | |||
@@ -66,7 +66,7 @@ class DctTestCase(unittest.TestCase): | |||
'return_result':'append', | |||
}) | |||
primitive = DiscreteCosineTransform.DiscreteCosineTransformPrimitive(hyperparams=hp) | |||
new_main = primitive.produce(inputs=main).value | |||
new_main = primitive._produce(inputs=main).value | |||
c = pd.DataFrame({"A":[1,2,3], "B":['a','b','c'],'A_dct_coeff':[1.200000e+01,-3.464102e+00,-4.440892e-16]}) | |||
@@ -67,7 +67,7 @@ class FftTestCase(unittest.TestCase): | |||
'return_result':'append', | |||
}) | |||
primitive = FastFourierTransform.FastFourierTransformPrimitive(hyperparams=hp) | |||
new_main = primitive.produce(inputs=main).value | |||
new_main = primitive._produce(inputs=main).value | |||
c = pd.DataFrame({"A":[1,2,3], "B":['a','b','c'],'A_fft_abs':[6.000000,1.732051,1.732051],'A_fft_phse':[-0.000000,2.617994,-2.617994]}) | |||
@@ -54,7 +54,7 @@ class HPFilterTest(unittest.TestCase): | |||
hyperparams_class = HPFilter.HPFilterPrimitive.metadata.get_hyperparams() | |||
primitive = HPFilter.HPFilterPrimitive(hyperparams=hyperparams_class.defaults()) | |||
new_main = primitive.produce(inputs=main).value | |||
new_main = primitive._produce(inputs=main).value | |||
print(new_main) | |||
@@ -74,7 +74,7 @@ class NmfTestCase(unittest.TestCase): | |||
'H': b, | |||
}) | |||
primitive = NonNegativeMatrixFactorization.NonNegativeMatrixFactorizationPrimitive(hyperparams=hp) | |||
new_main = primitive.produce(inputs=main).value | |||
new_main = primitive._produce(inputs=main).value | |||
print("new_main",new_main) | |||
c = pd.DataFrame({"A":[1,2,3,np.nan,np.nan], "B":[4,5,6,np.nan,np.nan], | |||
@@ -55,7 +55,7 @@ class SpectralResidualTransformTestCase(unittest.TestCase): | |||
primitive = SpectralResidualTransform.SpectralResidualTransformPrimitive(hyperparams=hp) | |||
output_main = primitive.produce(inputs=main).value | |||
output_main = primitive._produce(inputs=main).value | |||
expected_output = container.DataFrame( | |||
{'timestamp': [1, 3, 2, 5], 'values': [1.0, 2.0, 3.0, 4.0], 'b': [1.0, 4.0, 5.0, 6.0], | |||
@@ -56,7 +56,7 @@ class StatisticalStdTestCase(unittest.TestCase): | |||
primitive = StatisticalStd.StatisticalStdPrimitive(hyperparams=hp) | |||
output_main = primitive.produce(inputs=main).value | |||
output_main = primitive._produce(inputs=main).value | |||
print(output_main) | |||
expected_output = container.DataFrame( | |||
{'timestamp': [1, 3, 2, 5], 'values': [1.0, 2.0, 3.0, 4.0], 'b': [1.0, 4.0, 5.0, 6.0], | |||
@@ -56,7 +56,7 @@ class StatisticalAbsEnergyTestCase(unittest.TestCase): | |||
primitive = StatisticalAbsEnergy.StatisticalAbsEnergyPrimitive(hyperparams=hp) | |||
output_main = primitive.produce(inputs=main).value | |||
output_main = primitive._produce(inputs=main).value | |||
print(output_main) | |||
expected_output = container.DataFrame( | |||
{'timestamp': [1, 3, 2, 5], 'values': [1.0, 2.0, 3.0, 4.0], 'b': [1.0, 4.0, 5.0, 6.0], | |||
@@ -56,7 +56,7 @@ class StatisticalAbsSumTestCase(unittest.TestCase): | |||
primitive = StatisticalAbsSum.StatisticalAbsSumPrimitive(hyperparams=hp) | |||
output_main = primitive.produce(inputs=main).value | |||
output_main = primitive._produce(inputs=main).value | |||
expected_output = container.DataFrame( | |||
{'timestamp': [1, 3, 2, 5], 'values': [1.0, 2.0, 3.0, 4.0], 'b': [1.0, 4.0, -5.0, 6.0], | |||
@@ -56,7 +56,7 @@ class StatisticalGmeanTestCase(unittest.TestCase): | |||
primitive = StatisticalGmean.StatisticalGmeanPrimitive(hyperparams=hp) | |||
output_main = primitive.produce(inputs=main).value | |||
output_main = primitive._produce(inputs=main).value | |||
print(output_main[['values_gmean', 'b_gmean']]) | |||
expected_output = container.DataFrame( | |||
@@ -56,7 +56,7 @@ class StatisticalHmeanTestCase(unittest.TestCase): | |||
primitive = StatisticalHmean.StatisticalHmeanPrimitive(hyperparams=hp) | |||
output_main = primitive.produce(inputs=main).value | |||
output_main = primitive._produce(inputs=main).value | |||
#print(output_main[['values_hmean', 'b_hmean']]) | |||
expected_output = container.DataFrame( | |||
@@ -56,7 +56,7 @@ class StatisticalKurtosisTestCase(unittest.TestCase): | |||
primitive = StatisticalKurtosis.StatisticalKurtosisPrimitive(hyperparams=hp) | |||
output_main = primitive.produce(inputs=main).value | |||
output_main = primitive._produce(inputs=main).value | |||
print(output_main[['values_kurtosis', 'b_kurtosis']]) | |||
expected_output = container.DataFrame( | |||
@@ -56,7 +56,7 @@ class StatisticalMaximumTestCase(unittest.TestCase): | |||
primitive = StatisticalMaximum.StatisticalMaximumPrimitive(hyperparams=hp) | |||
output_main = primitive.produce(inputs=main).value | |||
output_main = primitive._produce(inputs=main).value | |||
print(output_main) | |||
expected_output = container.DataFrame( | |||
{'timestamp': [1, 3, 2, 5], 'values': [1.0, 2.0, 3.0, 4.0], 'b': [1.0, 4.0, 5.0, 6.0], | |||
@@ -56,7 +56,7 @@ class StatisticalMeanTestCase(unittest.TestCase): | |||
primitive = StatisticalMean.StatisticalMeanPrimitive(hyperparams=hp) | |||
output_main = primitive.produce(inputs=main).value | |||
output_main = primitive._produce(inputs=main).value | |||
print(output_main) | |||
expected_output = container.DataFrame( | |||
{'timestamp': [1, 3, 2, 5], 'values': [1.0, 2.0, 3.0, 4.0], 'b': [1.0, 4.0, 5.0, 6.0], | |||
@@ -56,7 +56,7 @@ class StatisticalMeanAbsTestCase(unittest.TestCase): | |||
primitive = StatisticalMeanAbs.StatisticalMeanAbsPrimitive(hyperparams=hp) | |||
output_main = primitive.produce(inputs=main).value | |||
output_main = primitive._produce(inputs=main).value | |||
print(output_main) | |||
expected_output = container.DataFrame( | |||
{'timestamp': [1, 3, 2, 5], 'values': [1.0, 2.0, 3.0, 4.0], 'b': [1.0, 4.0, 5.0, 6.0], | |||
@@ -56,7 +56,7 @@ class StatisticalMeanAbsTemporalDerivativeTestCase(unittest.TestCase): | |||
primitive = StatisticalMeanAbsTemporalDerivative.StatisticalMeanAbsTemporalDerivativePrimitive(hyperparams=hp) | |||
output_main = primitive.produce(inputs=main).value | |||
output_main = primitive._produce(inputs=main).value | |||
print(output_main[['values_mean_abs_temporal_derivative', 'b_mean_abs_temporal_derivative']]) | |||
expected_output = container.DataFrame( | |||
{'timestamp': [1, 3, 2, 5], 'values': [1.0, 2.0, 3.0, 4.0], 'b': [1.0, 4.0, 5.0, 6.0], | |||
@@ -56,7 +56,7 @@ class StatisticalMeanTemporalDerivativeTestCase(unittest.TestCase): | |||
primitive = StatisticalMeanTemporalDerivative.StatisticalMeanTemporalDerivativePrimitive(hyperparams=hp) | |||
output_main = primitive.produce(inputs=main).value | |||
output_main = primitive._produce(inputs=main).value | |||
print(output_main[['values_mean_temporal_derivative', 'b_mean_temporal_derivative']]) | |||
expected_output = container.DataFrame( | |||
{'timestamp': [1, 3, 2, 5], 'values': [1.0, 2.0, 3.0, 4.0], 'b': [1.0, 4.0, 5.0, 6.0], | |||
@@ -56,7 +56,7 @@ class StatisticalMedianTestCase(unittest.TestCase): | |||
primitive = StatisticalMedian.StatisticalMedianPrimitive(hyperparams=hp) | |||
output_main = primitive.produce(inputs=main).value | |||
output_main = primitive._produce(inputs=main).value | |||
expected_output = container.DataFrame( | |||
{'timestamp': [1, 3, 2, 5], 'values': [1.0, 2.0, 3.0, 4.0], 'b': [1.0, 4.0, 5.0, 6.0], | |||
@@ -56,7 +56,7 @@ class StatisticalMedianAbsoluteDeviationTestCase(unittest.TestCase): | |||
primitive = StatisticalMedianAbsoluteDeviation.StatisticalMedianAbsoluteDeviationPrimitive(hyperparams=hp) | |||
output_main = primitive.produce(inputs=main).value | |||
output_main = primitive._produce(inputs=main).value | |||
print(output_main[['values_median_absolute_deviation', 'b_median_absolute_deviation']]) | |||
expected_output = container.DataFrame( | |||
@@ -56,7 +56,7 @@ class StatisticalMinimumTestCase(unittest.TestCase): | |||
primitive = StatisticalMinimum.StatisticalMinimumPrimitive(hyperparams=hp) | |||
output_main = primitive.produce(inputs=main).value | |||
output_main = primitive._produce(inputs=main).value | |||
print(output_main) | |||
expected_output = container.DataFrame( | |||
{'timestamp': [1, 3, 2, 5], 'values': [1.0, 2.0, 3.0, 4.0], 'b': [1.0, 4.0, 5.0, 6.0], | |||
@@ -56,7 +56,7 @@ class StatisticalSkewTestCase(unittest.TestCase): | |||
primitive = StatisticalSkew.StatisticalSkewPrimitive(hyperparams=hp) | |||
output_main = primitive.produce(inputs=main).value | |||
output_main = primitive._produce(inputs=main).value | |||
print(output_main[['values_skew', 'b_skew']]) | |||
expected_output = container.DataFrame( | |||
@@ -56,7 +56,7 @@ class StatisticalVarTestCase(unittest.TestCase): | |||
primitive = StatisticalVar.StatisticalVarPrimitive(hyperparams=hp) | |||
output_main = primitive.produce(inputs=main).value | |||
output_main = primitive._produce(inputs=main).value | |||
print(output_main) | |||
expected_output = container.DataFrame( | |||
{'timestamp': [1, 3, 2, 5], 'values': [1.0, 2.0, 3.0, 4.0], 'b': [1.0, 4.0, 5.0, 6.0], | |||
@@ -56,7 +56,7 @@ class StatisticalVariationTestCase(unittest.TestCase): | |||
primitive = StatisticalVariation.StatisticalVariationPrimitive(hyperparams=hp) | |||
output_main = primitive.produce(inputs=main).value | |||
output_main = primitive._produce(inputs=main).value | |||
print(output_main[['values_variation', 'b_variation']]) | |||
expected_output = container.DataFrame( | |||
@@ -56,7 +56,7 @@ class StatisticalVecSumTestCase(unittest.TestCase): | |||
primitive = StatisticalVecSum.StatisticalVecSumPrimitive(hyperparams=hp) | |||
output_main = primitive.produce(inputs=main).value | |||
output_main = primitive._produce(inputs=main).value | |||
print(output_main) | |||
expected_output = container.DataFrame( | |||
{'timestamp': [1, 3, 2, 5], 'values': [1.0, 2.0, 3.0, 4.0], 'b': [1.0, 4.0, -5.0, 6.0], | |||
@@ -57,7 +57,7 @@ class StatisticalWillisonAmplitudeTestCase(unittest.TestCase): | |||
primitive = StatisticalWillisonAmplitude.StatisticalWillisonAmplitudePrimitive(hyperparams=hp) | |||
output_main = primitive.produce(inputs=main).value | |||
output_main = primitive._produce(inputs=main).value | |||
print(output_main[['values_willison_amplitude', 'b_willison_amplitude']]) | |||
expected_output = container.DataFrame( | |||
{'timestamp': [1, 3, 2, 5], 'values': [1.0, 2.0, 3.0, 4.0], 'b': [1.0, 4.0, 5.0, 6.0], | |||
@@ -55,7 +55,7 @@ class StatisticalZeroCrossingTestCase(unittest.TestCase): | |||
primitive = StatisticalZeroCrossing.StatisticalZeroCrossingPrimitive(hyperparams=hp) | |||
output_main = primitive.produce(inputs=main).value | |||
output_main = primitive._produce(inputs=main).value | |||
print(output_main) | |||
expected_output = container.DataFrame( | |||
{'timestamp': [1, 3, 2, 5], 'values': [1.0, -2.0, 3.0, 4.0], 'b': [1.0, 4.0, 5.0, 6.0], | |||
@@ -56,7 +56,7 @@ class TRMFTest(unittest.TestCase): | |||
primitive = TRMF.TRMFPrimitive(hyperparams=hyperparams_class.defaults()) | |||
# primitive.set_training_data(inputs=main) | |||
# primitive.fit() | |||
new_main = primitive.produce(inputs=main).value | |||
new_main = primitive._produce(inputs=main).value | |||
print(new_main) | |||
@@ -28,7 +28,7 @@ class WaveletTransformerTestCase(unittest.TestCase): | |||
'return_result': 'new'}) | |||
primitive = WaveletTransformPrimitive(hyperparams=hyperparams) | |||
new_main = primitive.produce(inputs=main).value | |||
new_main = primitive._produce(inputs=main).value | |||
# print(new_main) | |||
# print(mean_mse, std_mse) | |||
@@ -89,7 +89,7 @@ class WaveletTransformerTestCase(unittest.TestCase): | |||
hyperparams = hyperparams_default.replace({'inverse': 1}) | |||
primitive = WaveletTransformPrimitive(hyperparams=hyperparams) | |||
main_recover = primitive.produce(inputs=main).value | |||
main_recover = primitive._produce(inputs=main).value | |||
self.assertAlmostEqual(main_recover.values.tolist(), main.values.tolist(), delta=1e-6) | |||
# print(main.metadata.to_internal_simple_structure()) | |||