@@ -33,72 +33,72 @@ d3mIndex,system,label | |||||
31,31.csv,1 | 31,31.csv,1 | ||||
32,32.csv,1 | 32,32.csv,1 | ||||
33,33.csv,1 | 33,33.csv,1 | ||||
34,34.csv,2 | |||||
35,35.csv,2 | |||||
36,36.csv,2 | |||||
37,37.csv,2 | |||||
38,38.csv,2 | |||||
39,39.csv,2 | |||||
40,40.csv,2 | |||||
41,41.csv,2 | |||||
42,42.csv,2 | |||||
43,43.csv,2 | |||||
44,44.csv,2 | |||||
45,45.csv,2 | |||||
46,46.csv,2 | |||||
47,47.csv,2 | |||||
48,48.csv,2 | |||||
49,49.csv,2 | |||||
50,50.csv,2 | |||||
51,51.csv,2 | |||||
52,52.csv,2 | |||||
53,53.csv,2 | |||||
54,54.csv,2 | |||||
55,55.csv,2 | |||||
56,56.csv,2 | |||||
57,57.csv,2 | |||||
58,58.csv,2 | |||||
59,59.csv,2 | |||||
60,60.csv,2 | |||||
61,61.csv,2 | |||||
62,62.csv,2 | |||||
63,63.csv,2 | |||||
64,64.csv,2 | |||||
65,65.csv,2 | |||||
66,66.csv,2 | |||||
67,67.csv,2 | |||||
68,68.csv,2 | |||||
69,69.csv,2 | |||||
70,70.csv,2 | |||||
71,71.csv,2 | |||||
72,72.csv,2 | |||||
73,73.csv,2 | |||||
74,74.csv,2 | |||||
75,75.csv,2 | |||||
76,76.csv,2 | |||||
77,77.csv,2 | |||||
78,78.csv,2 | |||||
79,79.csv,2 | |||||
80,80.csv,2 | |||||
81,81.csv,2 | |||||
82,82.csv,2 | |||||
83,83.csv,2 | |||||
84,84.csv,2 | |||||
85,85.csv,2 | |||||
86,86.csv,2 | |||||
87,87.csv,2 | |||||
88,88.csv,2 | |||||
89,89.csv,2 | |||||
90,90.csv,2 | |||||
91,91.csv,2 | |||||
92,92.csv,2 | |||||
93,93.csv,2 | |||||
94,94.csv,2 | |||||
95,95.csv,2 | |||||
96,96.csv,2 | |||||
97,97.csv,2 | |||||
98,98.csv,2 | |||||
99,99.csv,2 | |||||
34,34.csv,0 | |||||
35,35.csv,0 | |||||
36,36.csv,0 | |||||
37,37.csv,0 | |||||
38,38.csv,0 | |||||
39,39.csv,0 | |||||
40,40.csv,0 | |||||
41,41.csv,0 | |||||
42,42.csv,0 | |||||
43,43.csv,0 | |||||
44,44.csv,0 | |||||
45,45.csv,0 | |||||
46,46.csv,0 | |||||
47,47.csv,0 | |||||
48,48.csv,0 | |||||
49,49.csv,0 | |||||
50,50.csv,0 | |||||
51,51.csv,0 | |||||
52,52.csv,0 | |||||
53,53.csv,0 | |||||
54,54.csv,0 | |||||
55,55.csv,0 | |||||
56,56.csv,0 | |||||
57,57.csv,0 | |||||
58,58.csv,0 | |||||
59,59.csv,0 | |||||
60,60.csv,0 | |||||
61,61.csv,0 | |||||
62,62.csv,0 | |||||
63,63.csv,0 | |||||
64,64.csv,0 | |||||
65,65.csv,0 | |||||
66,66.csv,0 | |||||
67,67.csv,0 | |||||
68,68.csv,0 | |||||
69,69.csv,0 | |||||
70,70.csv,0 | |||||
71,71.csv,0 | |||||
72,72.csv,0 | |||||
73,73.csv,0 | |||||
74,74.csv,0 | |||||
75,75.csv,0 | |||||
76,76.csv,0 | |||||
77,77.csv,0 | |||||
78,78.csv,0 | |||||
79,79.csv,0 | |||||
80,80.csv,0 | |||||
81,81.csv,0 | |||||
82,82.csv,0 | |||||
83,83.csv,0 | |||||
84,84.csv,0 | |||||
85,85.csv,0 | |||||
86,86.csv,0 | |||||
87,87.csv,0 | |||||
88,88.csv,0 | |||||
89,89.csv,0 | |||||
90,90.csv,0 | |||||
91,91.csv,0 | |||||
92,92.csv,0 | |||||
93,93.csv,0 | |||||
94,94.csv,0 | |||||
95,95.csv,0 | |||||
96,96.csv,0 | |||||
97,97.csv,0 | |||||
98,98.csv,0 | |||||
99,99.csv,0 | |||||
100,100.csv,1 | 100,100.csv,1 | ||||
101,101.csv,1 | 101,101.csv,1 | ||||
102,102.csv,1 | 102,102.csv,1 | ||||
@@ -132,70 +132,70 @@ d3mIndex,system,label | |||||
130,130.csv,1 | 130,130.csv,1 | ||||
131,131.csv,1 | 131,131.csv,1 | ||||
132,132.csv,1 | 132,132.csv,1 | ||||
133,133.csv,2 | |||||
134,134.csv,2 | |||||
135,135.csv,2 | |||||
136,136.csv,2 | |||||
137,137.csv,2 | |||||
138,138.csv,2 | |||||
139,139.csv,2 | |||||
140,140.csv,2 | |||||
141,141.csv,2 | |||||
142,142.csv,2 | |||||
143,143.csv,2 | |||||
144,144.csv,2 | |||||
145,145.csv,2 | |||||
146,146.csv,2 | |||||
147,147.csv,2 | |||||
148,148.csv,2 | |||||
149,149.csv,2 | |||||
150,150.csv,2 | |||||
151,151.csv,2 | |||||
152,152.csv,2 | |||||
153,153.csv,2 | |||||
154,154.csv,2 | |||||
155,155.csv,2 | |||||
156,156.csv,2 | |||||
157,157.csv,2 | |||||
158,158.csv,2 | |||||
159,159.csv,2 | |||||
160,160.csv,2 | |||||
161,161.csv,2 | |||||
162,162.csv,2 | |||||
163,163.csv,2 | |||||
164,164.csv,2 | |||||
165,165.csv,2 | |||||
166,166.csv,2 | |||||
167,167.csv,2 | |||||
168,168.csv,2 | |||||
169,169.csv,2 | |||||
170,170.csv,2 | |||||
171,171.csv,2 | |||||
172,172.csv,2 | |||||
173,173.csv,2 | |||||
174,174.csv,2 | |||||
175,175.csv,2 | |||||
176,176.csv,2 | |||||
177,177.csv,2 | |||||
178,178.csv,2 | |||||
179,179.csv,2 | |||||
180,180.csv,2 | |||||
181,181.csv,2 | |||||
182,182.csv,2 | |||||
183,183.csv,2 | |||||
184,184.csv,2 | |||||
185,185.csv,2 | |||||
186,186.csv,2 | |||||
187,187.csv,2 | |||||
188,188.csv,2 | |||||
189,189.csv,2 | |||||
190,190.csv,2 | |||||
191,191.csv,2 | |||||
192,192.csv,2 | |||||
193,193.csv,2 | |||||
194,194.csv,2 | |||||
195,195.csv,2 | |||||
196,196.csv,2 | |||||
197,197.csv,2 | |||||
198,198.csv,2 | |||||
199,199.csv,2 | |||||
133,133.csv,0 | |||||
134,134.csv,0 | |||||
135,135.csv,0 | |||||
136,136.csv,0 | |||||
137,137.csv,0 | |||||
138,138.csv,0 | |||||
139,139.csv,0 | |||||
140,140.csv,0 | |||||
141,141.csv,0 | |||||
142,142.csv,0 | |||||
143,143.csv,0 | |||||
144,144.csv,0 | |||||
145,145.csv,0 | |||||
146,146.csv,0 | |||||
147,147.csv,0 | |||||
148,148.csv,0 | |||||
149,149.csv,0 | |||||
150,150.csv,0 | |||||
151,151.csv,0 | |||||
152,152.csv,0 | |||||
153,153.csv,0 | |||||
154,154.csv,0 | |||||
155,155.csv,0 | |||||
156,156.csv,0 | |||||
157,157.csv,0 | |||||
158,158.csv,0 | |||||
159,159.csv,0 | |||||
160,160.csv,0 | |||||
161,161.csv,0 | |||||
162,162.csv,0 | |||||
163,163.csv,0 | |||||
164,164.csv,0 | |||||
165,165.csv,0 | |||||
166,166.csv,0 | |||||
167,167.csv,0 | |||||
168,168.csv,0 | |||||
169,169.csv,0 | |||||
170,170.csv,0 | |||||
171,171.csv,0 | |||||
172,172.csv,0 | |||||
173,173.csv,0 | |||||
174,174.csv,0 | |||||
175,175.csv,0 | |||||
176,176.csv,0 | |||||
177,177.csv,0 | |||||
178,178.csv,0 | |||||
179,179.csv,0 | |||||
180,180.csv,0 | |||||
181,181.csv,0 | |||||
182,182.csv,0 | |||||
183,183.csv,0 | |||||
184,184.csv,0 | |||||
185,185.csv,0 | |||||
186,186.csv,0 | |||||
187,187.csv,0 | |||||
188,188.csv,0 | |||||
189,189.csv,0 | |||||
190,190.csv,0 | |||||
191,191.csv,0 | |||||
192,192.csv,0 | |||||
193,193.csv,0 | |||||
194,194.csv,0 | |||||
195,195.csv,0 | |||||
196,196.csv,0 | |||||
197,197.csv,0 | |||||
198,198.csv,0 | |||||
199,199.csv,0 |
@@ -1 +1 @@ | |||||
{"id": "bfd8aedf-36be-4dad-af8a-c324a03db5f9", "schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json", "created": "2021-02-13T17:02:35.500457Z", "inputs": [{"name": "inputs"}], "outputs": [{"data": "steps.6.produce", "name": "output predictions"}], "steps": [{"type": "PRIMITIVE", "primitive": {"id": "c78138d9-9377-31dc-aee8-83d9df049c60", "version": "0.3.0", "python_path": "d3m.primitives.tods.data_processing.dataset_to_dataframe", "name": "Extract a DataFrame from a Dataset"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "inputs.0"}}, "outputs": [{"id": "produce"}]}, {"type": "PRIMITIVE", "primitive": {"id": "81235c29-aeb9-3828-911a-1b25319b6998", "version": "0.6.0", "python_path": "d3m.primitives.tods.data_processing.column_parser", "name": "Parses strings into their types"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.0.produce"}}, "outputs": [{"id": "produce"}]}, {"type": "PRIMITIVE", "primitive": {"id": "a996cd89-ddf0-367f-8e7f-8c013cbc2891", "version": "0.4.0", "python_path": "d3m.primitives.tods.data_processing.extract_columns_by_semantic_types", "name": "Extracts columns by semantic type"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.1.produce"}}, "outputs": [{"id": "produce"}], "hyperparams": {"semantic_types": {"type": "VALUE", "data": ["https://metadata.datadrivendiscovery.org/types/Attribute"]}}}, {"type": "PRIMITIVE", "primitive": {"id": "a996cd89-ddf0-367f-8e7f-8c013cbc2891", "version": "0.4.0", "python_path": "d3m.primitives.tods.data_processing.extract_columns_by_semantic_types", "name": "Extracts columns by semantic type"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.0.produce"}}, "outputs": [{"id": "produce"}], "hyperparams": {"semantic_types": {"type": "VALUE", "data": ["https://metadata.datadrivendiscovery.org/types/TrueTarget"]}}}, {"type": "PRIMITIVE", "primitive": {"id": "642de2e7-5590-3cab-9266-2a53c326c461", "version": "0.0.1", "python_path": "d3m.primitives.tods.timeseries_processing.transformation.axiswise_scaler", "name": "Axis_wise_scale"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.2.produce"}}, "outputs": [{"id": "produce"}]}, {"type": "PRIMITIVE", "primitive": {"id": "67e7fcdf-d645-3417-9aa4-85cd369487d9", "version": "0.0.1", "python_path": "d3m.primitives.tods.detection_algorithm.pyod_ae", "name": "TODS.anomaly_detection_primitives.AutoEncoder"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.4.produce"}}, "outputs": [{"id": "produce"}]}, {"type": "PRIMITIVE", "primitive": {"id": "2530840a-07d4-3874-b7d8-9eb5e4ae2bf3", "version": "0.3.0", "python_path": "d3m.primitives.tods.data_processing.construct_predictions", "name": "Construct pipeline predictions output"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.5.produce"}, "reference": {"type": "CONTAINER", "data": "steps.1.produce"}}, "outputs": [{"id": "produce"}]}], "digest": "01ad8ccf817150186ca15157a4f02ee1f738582137321a8a5a4a3252832ce555"} | |||||
{"id": "924e9a77-da5f-4bcc-b9a0-ed65bbaf87fa", "schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json", "created": "2021-03-11T23:41:13.884494Z", "inputs": [{"name": "inputs"}], "outputs": [{"data": "steps.6.produce", "name": "output predictions"}], "steps": [{"type": "PRIMITIVE", "primitive": {"id": "c78138d9-9377-31dc-aee8-83d9df049c60", "version": "0.3.0", "python_path": "d3m.primitives.tods.data_processing.dataset_to_dataframe", "name": "Extract a DataFrame from a Dataset"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "inputs.0"}}, "outputs": [{"id": "produce"}]}, {"type": "PRIMITIVE", "primitive": {"id": "81235c29-aeb9-3828-911a-1b25319b6998", "version": "0.6.0", "python_path": "d3m.primitives.tods.data_processing.column_parser", "name": "Parses strings into their types"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.0.produce"}}, "outputs": [{"id": "produce"}]}, {"type": "PRIMITIVE", "primitive": {"id": "a996cd89-ddf0-367f-8e7f-8c013cbc2891", "version": "0.4.0", "python_path": "d3m.primitives.tods.data_processing.extract_columns_by_semantic_types", "name": "Extracts columns by semantic type"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.1.produce"}}, "outputs": [{"id": "produce"}], "hyperparams": {"semantic_types": {"type": "VALUE", "data": ["https://metadata.datadrivendiscovery.org/types/Attribute"]}}}, {"type": "PRIMITIVE", "primitive": {"id": "a996cd89-ddf0-367f-8e7f-8c013cbc2891", "version": "0.4.0", "python_path": "d3m.primitives.tods.data_processing.extract_columns_by_semantic_types", "name": "Extracts columns by semantic type"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.0.produce"}}, "outputs": [{"id": "produce"}], "hyperparams": {"semantic_types": {"type": "VALUE", "data": ["https://metadata.datadrivendiscovery.org/types/TrueTarget"]}}}, {"type": "PRIMITIVE", "primitive": {"id": "f07ce875-bbc7-36c5-9cc1-ba4bfb7cf48e", "version": "0.1.0", "python_path": "d3m.primitives.tods.feature_analysis.statistical_maximum", "name": "Time Series Decompostional"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.2.produce"}}, "outputs": [{"id": "produce"}]}, {"type": "PRIMITIVE", "primitive": {"id": "67e7fcdf-d645-3417-9aa4-85cd369487d9", "version": "0.0.1", "python_path": "d3m.primitives.tods.detection_algorithm.pyod_ae", "name": "TODS.anomaly_detection_primitives.AutoEncoder"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.4.produce"}}, "outputs": [{"id": "produce"}]}, {"type": "PRIMITIVE", "primitive": {"id": "2530840a-07d4-3874-b7d8-9eb5e4ae2bf3", "version": "0.3.0", "python_path": "d3m.primitives.tods.data_processing.construct_predictions", "name": "Construct pipeline predictions output"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.5.produce"}, "reference": {"type": "CONTAINER", "data": "steps.1.produce"}}, "outputs": [{"id": "produce"}]}], "digest": "bb1cb5328299d8d65cabc152092da553db267494fb12e6320c66110b2c48a265"} |
@@ -41,7 +41,9 @@ attributes = 'steps.2.produce' | |||||
targets = 'steps.3.produce' | targets = 'steps.3.produce' | ||||
# Step 4: processing | # Step 4: processing | ||||
step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.axiswise_scaler')) | |||||
#step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.axiswise_scaler')) | |||||
step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_maximum')) | |||||
#step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_minimum')) | |||||
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes) | step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes) | ||||
step_4.add_output('produce') | step_4.add_output('produce') | ||||
pipeline_description.add_step(step_4) | pipeline_description.add_step(step_4) | ||||
@@ -64,7 +66,7 @@ pipeline_description.add_output(name='output predictions', data_reference='steps | |||||
# Output to json | # Output to json | ||||
data = pipeline_description.to_json() | data = pipeline_description.to_json() | ||||
with open('example_pipeline.json', 'w') as f: | |||||
with open('autoencoder_pipeline.json', 'w') as f: | |||||
f.write(data) | f.write(data) | ||||
print(data) | print(data) | ||||
@@ -57,29 +57,37 @@ attributes = 'steps.4.produce' | |||||
targets = 'steps.5.produce' | targets = 'steps.5.produce' | ||||
# Step 6: processing | # Step 6: processing | ||||
step_6 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.axiswise_scaler')) | |||||
step_6 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_maximum')) | |||||
step_6.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes) | step_6.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes) | ||||
step_6.add_output('produce') | step_6.add_output('produce') | ||||
pipeline_description.add_step(step_6) | pipeline_description.add_step(step_6) | ||||
# Step 7: algorithm | # Step 7: algorithm | ||||
step_7 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_ae')) | |||||
#step_7 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_ae')) | |||||
step_7 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_ocsvm')) | |||||
step_7.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.6.produce') | step_7.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.6.produce') | ||||
step_7.add_output('produce') | |||||
step_7.add_output('produce_score') | |||||
pipeline_description.add_step(step_7) | pipeline_description.add_step(step_7) | ||||
# Step 8: Predictions | # Step 8: Predictions | ||||
step_8 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.construct_predictions')) | |||||
step_8.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.7.produce') | |||||
step_8.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||||
#step_8 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.construct_predictions')) | |||||
step_8 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.system_wise_detection')) | |||||
step_8.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.7.produce_score') | |||||
#step_8.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||||
step_8.add_output('produce') | step_8.add_output('produce') | ||||
pipeline_description.add_step(step_8) | pipeline_description.add_step(step_8) | ||||
step_9 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.construct_predictions')) | |||||
step_9.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.8.produce') | |||||
step_9.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||||
step_9.add_output('produce') | |||||
pipeline_description.add_step(step_9) | |||||
# Final Output | # Final Output | ||||
pipeline_description.add_output(name='output predictions', data_reference='steps.8.produce') | |||||
pipeline_description.add_output(name='output predictions', data_reference='steps.9.produce') | |||||
# Output to json | # Output to json | ||||
data = pipeline_description.to_json() | data = pipeline_description.to_json() | ||||
with open('example_pipeline.json', 'w') as f: | |||||
with open('system_pipeline.json', 'w') as f: | |||||
f.write(data) | f.write(data) | ||||
print(data) | print(data) |
@@ -1 +1 @@ | |||||
{"id": "fe8ceeee-a513-45d8-9e28-b46e11f9c635", "schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json", "created": "2021-02-11T21:28:54.508699Z", "inputs": [{"name": "inputs"}], "outputs": [{"data": "steps.8.produce", "name": "output predictions"}], "steps": [{"type": "PRIMITIVE", "primitive": {"id": "f31f8c1f-d1c5-43e5-a4b2-2ae4a761ef2e", "version": "0.2.0", "python_path": "d3m.primitives.tods.common.denormalize", "name": "Denormalize datasets"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "inputs.0"}}, "outputs": [{"id": "produce"}]}, {"type": "PRIMITIVE", "primitive": {"id": "4b42ce1e-9b98-4a25-b68e-fad13311eb65", "version": "0.3.0", "python_path": "d3m.primitives.tods.data_processing.dataset_to_dataframe", "name": "Extract a DataFrame from a Dataset", "digest": "ba00092121d8971b0aa8c1f4b99e97151ca39b44f549eecc03fc61a286567a36"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.0.produce"}}, "outputs": [{"id": "produce"}]}, {"type": "PRIMITIVE", "primitive": {"id": "989562ac-b50f-4462-99cb-abef80d765b2", "version": "0.1.0", "python_path": "d3m.primitives.tods.common.csv_reader", "name": "Columns CSV reader"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.1.produce"}}, "outputs": [{"id": "produce"}], "hyperparams": {"use_columns": {"type": "VALUE", "data": [0, 1]}, "return_result": {"type": "VALUE", "data": "replace"}}}, {"type": "PRIMITIVE", "primitive": {"id": "d510cb7a-1782-4f51-b44c-58f0236e47c7", "version": "0.6.0", "python_path": "d3m.primitives.tods.data_processing.column_parser", "name": "Parses strings into their types", "digest": "ef87bfbd3b35a2d78337c5d3aba9847dfdf56c05c5289e50fe0db766ef8126e0"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.2.produce"}}, "outputs": [{"id": "produce"}], "hyperparams": {"parse_semantic_types": {"type": "VALUE", "data": ["http://schema.org/Boolean", "http://schema.org/Integer", "http://schema.org/Float", "https://metadata.datadrivendiscovery.org/types/FloatVector"]}}}, {"type": "PRIMITIVE", "primitive": {"id": "4503a4c6-42f7-45a1-a1d4-ed69699cf5e1", "version": "0.4.0", "python_path": "d3m.primitives.tods.data_processing.extract_columns_by_semantic_types", "name": "Extracts columns by semantic type", "digest": "35ab3368a69e46da89e4dbb70dab762d4c020c43a9424db622e8ac2ae5c57c06"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.3.produce"}}, "outputs": [{"id": "produce"}], "hyperparams": {"semantic_types": {"type": "VALUE", "data": ["https://metadata.datadrivendiscovery.org/types/Attribute"]}}}, {"type": "PRIMITIVE", "primitive": {"id": "4503a4c6-42f7-45a1-a1d4-ed69699cf5e1", "version": "0.4.0", "python_path": "d3m.primitives.tods.data_processing.extract_columns_by_semantic_types", "name": "Extracts columns by semantic type", "digest": "35ab3368a69e46da89e4dbb70dab762d4c020c43a9424db622e8ac2ae5c57c06"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.3.produce"}}, "outputs": [{"id": "produce"}], "hyperparams": {"semantic_types": {"type": "VALUE", "data": ["https://metadata.datadrivendiscovery.org/types/TrueTarget"]}}}, {"type": "PRIMITIVE", "primitive": {"id": "642de2e7-5590-3cab-9266-2a53c326c461", "version": "0.0.1", "python_path": "d3m.primitives.tods.timeseries_processing.transformation.axiswise_scaler", "name": "Axis_wise_scale"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.4.produce"}}, "outputs": [{"id": "produce"}]}, {"type": "PRIMITIVE", "primitive": {"id": "67e7fcdf-d645-3417-9aa4-85cd369487d9", "version": "0.0.1", "python_path": "d3m.primitives.tods.detection_algorithm.pyod_ae", "name": "TODS.anomaly_detection_primitives.AutoEncoder"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.6.produce"}}, "outputs": [{"id": "produce"}]}, {"type": "PRIMITIVE", "primitive": {"id": "8d38b340-f83f-4877-baaa-162f8e551736", "version": "0.3.0", "python_path": "d3m.primitives.tods.data_processing.construct_predictions", "name": "Construct pipeline predictions output", "digest": "d981f367776ef05d7311b85b86af717a599c7fd363b04db7531bd21ab30a8844"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.7.produce"}, "reference": {"type": "CONTAINER", "data": "steps.1.produce"}}, "outputs": [{"id": "produce"}]}], "digest": "7033f0a107adae468d509f5706a6a79dfcb965d4d5a8d3aef4b79017d33956ed"} | |||||
{"id": "f9f918f3-4cd9-4d3c-9a84-8a95b18d3d7c", "schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json", "created": "2021-04-02T20:35:56.617972Z", "inputs": [{"name": "inputs"}], "outputs": [{"data": "steps.9.produce", "name": "output predictions"}], "steps": [{"type": "PRIMITIVE", "primitive": {"id": "f31f8c1f-d1c5-43e5-a4b2-2ae4a761ef2e", "version": "0.2.0", "python_path": "d3m.primitives.tods.common.denormalize", "name": "Denormalize datasets"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "inputs.0"}}, "outputs": [{"id": "produce"}]}, {"type": "PRIMITIVE", "primitive": {"id": "c78138d9-9377-31dc-aee8-83d9df049c60", "version": "0.3.0", "python_path": "d3m.primitives.tods.data_processing.dataset_to_dataframe", "name": "Extract a DataFrame from a Dataset"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.0.produce"}}, "outputs": [{"id": "produce"}]}, {"type": "PRIMITIVE", "primitive": {"id": "989562ac-b50f-4462-99cb-abef80d765b2", "version": "0.1.0", "python_path": "d3m.primitives.tods.common.csv_reader", "name": "Columns CSV reader"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.1.produce"}}, "outputs": [{"id": "produce"}], "hyperparams": {"use_columns": {"type": "VALUE", "data": [0, 1]}, "return_result": {"type": "VALUE", "data": "replace"}}}, {"type": "PRIMITIVE", "primitive": {"id": "81235c29-aeb9-3828-911a-1b25319b6998", "version": "0.6.0", "python_path": "d3m.primitives.tods.data_processing.column_parser", "name": "Parses strings into their types"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.2.produce"}}, "outputs": [{"id": "produce"}], "hyperparams": {"parse_semantic_types": {"type": "VALUE", "data": ["http://schema.org/Boolean", "http://schema.org/Integer", "http://schema.org/Float", "https://metadata.datadrivendiscovery.org/types/FloatVector"]}}}, {"type": "PRIMITIVE", "primitive": {"id": "a996cd89-ddf0-367f-8e7f-8c013cbc2891", "version": "0.4.0", "python_path": "d3m.primitives.tods.data_processing.extract_columns_by_semantic_types", "name": "Extracts columns by semantic type"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.3.produce"}}, "outputs": [{"id": "produce"}], "hyperparams": {"semantic_types": {"type": "VALUE", "data": ["https://metadata.datadrivendiscovery.org/types/Attribute"]}}}, {"type": "PRIMITIVE", "primitive": {"id": "a996cd89-ddf0-367f-8e7f-8c013cbc2891", "version": "0.4.0", "python_path": "d3m.primitives.tods.data_processing.extract_columns_by_semantic_types", "name": "Extracts columns by semantic type"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.3.produce"}}, "outputs": [{"id": "produce"}], "hyperparams": {"semantic_types": {"type": "VALUE", "data": ["https://metadata.datadrivendiscovery.org/types/TrueTarget"]}}}, {"type": "PRIMITIVE", "primitive": {"id": "f07ce875-bbc7-36c5-9cc1-ba4bfb7cf48e", "version": "0.1.0", "python_path": "d3m.primitives.tods.feature_analysis.statistical_maximum", "name": "Time Series Decompostional"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.4.produce"}}, "outputs": [{"id": "produce"}]}, {"type": "PRIMITIVE", "primitive": {"id": "b454adf7-5820-3e6f-8383-619f13fb1cb6", "version": "0.0.1", "python_path": "d3m.primitives.tods.detection_algorithm.pyod_ocsvm", "name": "TODS.anomaly_detection_primitives.OCSVMPrimitive"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.6.produce"}}, "outputs": [{"id": "produce_score"}]}, {"type": "PRIMITIVE", "primitive": {"id": "01d36760-235c-3cdd-95dd-3c682c634c49", "version": "0.1.0", "python_path": "d3m.primitives.tods.detection_algorithm.system_wise_detection", "name": "Sytem_Wise_Anomaly_Detection_Primitive"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.7.produce_score"}}, "outputs": [{"id": "produce"}]}, {"type": "PRIMITIVE", "primitive": {"id": "2530840a-07d4-3874-b7d8-9eb5e4ae2bf3", "version": "0.3.0", "python_path": "d3m.primitives.tods.data_processing.construct_predictions", "name": "Construct pipeline predictions output"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.8.produce"}, "reference": {"type": "CONTAINER", "data": "steps.1.produce"}}, "outputs": [{"id": "produce"}]}], "digest": "c91336de994b3e7089bc3de1728dde5b458c3b9d4ecae7a9c94a26da1219d3f3"} |
@@ -6,19 +6,18 @@ import pandas as pd | |||||
from tods import generate_dataset, load_pipeline, evaluate_pipeline | from tods import generate_dataset, load_pipeline, evaluate_pipeline | ||||
this_path = os.path.dirname(os.path.abspath(__file__)) | this_path = os.path.dirname(os.path.abspath(__file__)) | ||||
#table_path = 'datasets/NAB/realTweets/labeled_Twitter_volume_IBM.csv' # The path of the dataset | |||||
default_data_path = os.path.join(this_path, '../../datasets/anomaly/raw_data/yahoo_sub_5.csv') | |||||
parser = argparse.ArgumentParser(description='Arguments for running predefined pipelin.') | parser = argparse.ArgumentParser(description='Arguments for running predefined pipelin.') | ||||
parser.add_argument('--table_path', type=str, default=os.path.join(this_path, '../../datasets/anomaly/raw_data/yahoo_sub_5.csv'), | |||||
parser.add_argument('--table_path', type=str, default=default_data_path, | |||||
help='Input the path of the input data table') | help='Input the path of the input data table') | ||||
parser.add_argument('--target_index', type=int, default=6, | parser.add_argument('--target_index', type=int, default=6, | ||||
help='Index of the ground truth (for evaluation)') | help='Index of the ground truth (for evaluation)') | ||||
parser.add_argument('--metric',type=str, default='F1_MACRO', | parser.add_argument('--metric',type=str, default='F1_MACRO', | ||||
help='Evaluation Metric (F1, F1_MACRO)') | help='Evaluation Metric (F1, F1_MACRO)') | ||||
parser.add_argument('--pipeline_path', default=os.path.join(this_path, './example_pipelines/autoencoder_pipeline.json'), | |||||
parser.add_argument('--pipeline_path', | |||||
default=os.path.join(this_path, './example_pipelines/autoencoder_pipeline.json'), | |||||
help='Input the path of the pre-built pipeline description') | help='Input the path of the pre-built pipeline description') | ||||
# parser.add_argument('--pipeline_path', default=os.path.join(this_path, '../tods/resources/default_pipeline.json'), | |||||
# help='Input the path of the pre-built pipeline description') | |||||
args = parser.parse_args() | args = parser.parse_args() | ||||
@@ -37,4 +36,5 @@ pipeline = load_pipeline(pipeline_path) | |||||
# Run the pipeline | # Run the pipeline | ||||
pipeline_result = evaluate_pipeline(dataset, pipeline, metric) | pipeline_result = evaluate_pipeline(dataset, pipeline, metric) | ||||
print(pipeline_result) | print(pipeline_result) | ||||
#raise pipeline_result.error[0] | |||||
@@ -68,7 +68,7 @@ class CSVReaderPrimitive(primitives.FileReaderPrimitiveBase): # pragma: no co | |||||
# This should be done by primitives later on. | # This should be done by primitives later on. | ||||
dtype=str, | dtype=str, | ||||
# We always expect one row header. | # We always expect one row header. | ||||
header=0, | |||||
header=None, | |||||
# We want empty strings and not NaNs. | # We want empty strings and not NaNs. | ||||
na_filter=False, | na_filter=False, | ||||
encoding='utf8', | encoding='utf8', | ||||
@@ -92,7 +92,7 @@ class CSVReaderPrimitive(primitives.FileReaderPrimitiveBase): # pragma: no co | |||||
data = container.DataFrame(data, { | data = container.DataFrame(data, { | ||||
'schema': metadata_base.CONTAINER_SCHEMA_VERSION, | 'schema': metadata_base.CONTAINER_SCHEMA_VERSION, | ||||
'structural_type': container.DataFrame, | 'structural_type': container.DataFrame, | ||||
}, generate_metadata=False) | |||||
}, generate_metadata=True) | |||||
assert column_names is not None | assert column_names is not None | ||||
@@ -0,0 +1,200 @@ | |||||
import typing | |||||
from typing import Any, Callable, List, Dict, Union, Optional, Sequence, Tuple | |||||
import logging | |||||
import abc | |||||
from d3m.primitive_interfaces import generator, transformer | |||||
from d3m.primitive_interfaces.base import * | |||||
from d3m.primitive_interfaces.unsupervised_learning import UnsupervisedLearnerPrimitiveBase | |||||
from d3m.metadata import base as metadata_base, hyperparams, params | |||||
from d3m import container | |||||
from d3m import utils | |||||
__all__ = ('TODSTransformerPrimitiveBase',) | |||||
class TODSTransformerPrimitiveBase(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||||
""" | |||||
A base class for primitives which are not fitted at all and can | |||||
simply produce (useful) outputs from inputs directly. As such they | |||||
also do not have any state (params). | |||||
This class is parameterized using only three type variables, ``Inputs``, | |||||
``Outputs``, and ``Hyperparams``. | |||||
""" | |||||
def __init__(self, *, hyperparams: Hyperparams) -> None: | |||||
super().__init__(hyperparams=hyperparams) | |||||
def produce(self, *, inputs: container.DataFrame, timeout: float = None, iterations: int = None) -> CallResult[container.DataFrame]: | |||||
is_system = len(inputs.iloc[0, 0].shape) != 0 # check the shape of first row first column, if not a single data entry(,) then it is system-wise data (row, col) | |||||
if is_system: | |||||
outputs = self._forward(inputs, '_produce') | |||||
else: | |||||
outputs = self._produce(inputs=inputs) | |||||
outputs = outputs.value | |||||
return CallResult(outputs) | |||||
@abc.abstractmethod | |||||
def _produce(self, *, inputs: container.DataFrame, timeout: float = None, iterations: int = None) -> CallResult[container.DataFrame]: | |||||
""" | |||||
make the predictions | |||||
""" | |||||
#return CallResult(container.DataFrame) | |||||
def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: | |||||
""" | |||||
A noop. | |||||
""" | |||||
return CallResult(None) | |||||
def get_params(self) -> None: | |||||
""" | |||||
A noop. | |||||
""" | |||||
return None | |||||
def set_params(self, *, params: None) -> None: | |||||
""" | |||||
A noop. | |||||
""" | |||||
return | |||||
def _forward(self, data, method): | |||||
""" | |||||
General Forward Function to feed system data one-by-one to the primitive | |||||
""" | |||||
col_name = list(data.columns)[0] | |||||
for i, _ in data.iterrows(): | |||||
sys_data = data.iloc[i][col_name] | |||||
produce_func = getattr(self, method, None) | |||||
out = produce_func(inputs=sys_data) | |||||
data.iloc[i][col_name] = out.value | |||||
return data | |||||
class TODSUnsupervisedLearnerPrimitiveBase(UnsupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams]): | |||||
def __init__(self, *, hyperparams: Hyperparams, | |||||
random_seed: int=0, | |||||
docker_containers: Dict[str, DockerContainer] = None) -> None: | |||||
super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) | |||||
def produce(self, *, inputs: container.DataFrame, timeout: float = None, iterations: int = None) -> CallResult[container.DataFrame]: | |||||
is_system = len(inputs.iloc[0, 0].shape) != 0 # check the shape of first row first column, if not a single data entry(,) then it is system-wise data (row, col) | |||||
if is_system: | |||||
outputs = self._forward(inputs, '_produce') | |||||
else: | |||||
outputs = self._produce(inputs=inputs) | |||||
outputs = outputs.value | |||||
return CallResult(outputs) | |||||
def produce_score(self, *, inputs: container.DataFrame, timeout: float = None, iterations: int = None) -> CallResult[container.DataFrame]: | |||||
is_system = len(inputs.iloc[0, 0].shape) != 0 # check the shape of first row first column, if not a single data entry(,) then it is system-wise data (row, col) | |||||
if is_system: | |||||
outputs = self._forward(inputs, '_produce_score') | |||||
else: | |||||
outputs = self._produce(inputs=inputs) | |||||
outputs = outputs.value | |||||
return CallResult(outputs) | |||||
def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: | |||||
""" | |||||
A noop. | |||||
""" | |||||
is_system = len(self._inputs.iloc[0, 0].shape) != 0 # check the shape of first row first column, if not a single data entry(,) then it is system-wise data (row, col) | |||||
if is_system: | |||||
data = inputs | |||||
col_name = list(data.columns)[0] | |||||
for i, _ in data.iterrows(): | |||||
sys_data = data.iloc[i][col_name] | |||||
self.set_training_data(inputs=sys_data) | |||||
self._fit() | |||||
else: | |||||
outputs = self._fit() | |||||
outputs = outputs.value | |||||
return CallResult(None) | |||||
def fit_multi_produce(self, *, produce_methods: typing.Sequence[str], inputs: Inputs, timeout: float = None, iterations: int = None) -> MultiCallResult: | |||||
is_system = len(inputs.iloc[0, 0].shape) != 0 # check the shape of first row first column, if not a single data entry(,) then it is system-wise data (row, col) | |||||
if is_system: | |||||
data = inputs | |||||
produce_method = produce_methods[0] | |||||
col_name = list(data.columns)[0] | |||||
results = [] | |||||
for i, _ in data.iterrows(): | |||||
sys_data = data.iloc[i][col_name] | |||||
self.set_training_data(inputs=sys_data) | |||||
fit_result = self._fit() | |||||
if produce_method == "produce": | |||||
out = self._produce(inputs=sys_data, timeout=timeout) | |||||
else: | |||||
out = self._produce_score(inputs=sys_data, timeout=timeout) | |||||
data.iloc[i][col_name] = out.value | |||||
results.append(out) | |||||
iterations_done = None | |||||
for result in results: | |||||
if result.iterations_done is not None: | |||||
if iterations_done is None: | |||||
iterations_done = result.iterations_done | |||||
else: | |||||
iterations_done = max(iterations_done, result.iterations_done) | |||||
return MultiCallResult( | |||||
values={produce_method: data}, | |||||
has_finished=all(result.has_finished for result in results), | |||||
iterations_done=iterations_done, | |||||
) | |||||
else: | |||||
return self._fit_multi_produce(produce_methods=produce_methods, timeout=timeout, iterations=iterations, inputs=inputs) | |||||
@abc.abstractmethod | |||||
def _produce(self, *, inputs: container.DataFrame, timeout: float = None, iterations: int = None) -> CallResult[container.DataFrame]: | |||||
""" | |||||
abstract class | |||||
""" | |||||
@abc.abstractmethod | |||||
def _produce_score(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: | |||||
""" | |||||
abstract class | |||||
""" | |||||
@abc.abstractmethod | |||||
def _fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: | |||||
""" | |||||
abstract class | |||||
""" | |||||
def get_params(self) -> None: | |||||
""" | |||||
A noop. | |||||
""" | |||||
return None | |||||
def set_params(self, *, params: None) -> None: | |||||
""" | |||||
A noop. | |||||
""" | |||||
return | |||||
def _forward(self, data, method): | |||||
""" | |||||
General Forward Function to feed system data one-by-one to the primitive | |||||
""" | |||||
col_name = list(data.columns)[0] | |||||
for i, _ in data.iterrows(): | |||||
sys_data = data.iloc[i][col_name] | |||||
produce_func = getattr(self, method, None) | |||||
out = produce_func(inputs=sys_data) | |||||
data.iloc[i][col_name] = out.value | |||||
return data |
@@ -95,7 +95,7 @@ class Hyperparams(Hyperparams_ODBase): | |||||
) | ) | ||||
epochs = hyperparams.Hyperparameter[int]( | epochs = hyperparams.Hyperparameter[int]( | ||||
default=100, | |||||
default=1, | |||||
description='Number of epochs to train the model.', | description='Number of epochs to train the model.', | ||||
semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] | semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] | ||||
) | ) | ||||
@@ -335,7 +335,7 @@ class AutoEncoderPrimitive(UnsupervisedOutlierDetectorBase[Inputs, Outputs, Para | |||||
Returns: | Returns: | ||||
None | None | ||||
""" | """ | ||||
return super().fit() | |||||
return super()._fit() | |||||
def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: | def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: | ||||
""" | """ | ||||
@@ -347,7 +347,7 @@ class AutoEncoderPrimitive(UnsupervisedOutlierDetectorBase[Inputs, Outputs, Para | |||||
Container DataFrame | Container DataFrame | ||||
1 marks Outliers, 0 marks normal. | 1 marks Outliers, 0 marks normal. | ||||
""" | """ | ||||
return super().produce(inputs=inputs, timeout=timeout, iterations=iterations) | |||||
return super()._produce(inputs=inputs, timeout=timeout, iterations=iterations) | |||||
def get_params(self) -> Params: | def get_params(self) -> Params: | ||||
""" | """ | ||||
@@ -142,7 +142,6 @@ class SystemWiseDetectionPrimitive(transformer.TransformerPrimitiveBase[Inputs, | |||||
self.logger.info('System wise Detection Input Primitive called') | self.logger.info('System wise Detection Input Primitive called') | ||||
# Get cols to fit. | # Get cols to fit. | ||||
self._fitted = False | self._fitted = False | ||||
self._training_inputs, self._training_indices = self._get_columns_to_fit(inputs, self.hyperparams) | self._training_inputs, self._training_indices = self._get_columns_to_fit(inputs, self.hyperparams) | ||||
@@ -316,12 +315,8 @@ class SystemWiseDetectionPrimitive(transformer.TransformerPrimitiveBase[Inputs, | |||||
def _write(self, inputs: Inputs): | def _write(self, inputs: Inputs): | ||||
inputs.to_csv(str(time.time()) + '.csv') | inputs.to_csv(str(time.time()) + '.csv') | ||||
def _system_wise_detection(self,X,method_type,window_size,contamination): | def _system_wise_detection(self,X,method_type,window_size,contamination): | ||||
systemIds = X.system_id.unique() | |||||
groupedX = X.groupby(X.system_id) | |||||
systemIds = [int(idx) for idx in X.index] | |||||
transformed_X = [] | transformed_X = [] | ||||
if(method_type=="max"): | if(method_type=="max"): | ||||
@@ -330,17 +325,17 @@ class SystemWiseDetectionPrimitive(transformer.TransformerPrimitiveBase[Inputs, | |||||
""" | """ | ||||
maxOutlierScorePerSystemList = [] | maxOutlierScorePerSystemList = [] | ||||
for systemId in systemIds: | for systemId in systemIds: | ||||
systemDf = groupedX.get_group(systemId) | |||||
maxOutlierScorePerSystemList.append(np.max(np.abs(systemDf["value_0"].values))) | |||||
systemDf = X.iloc[systemId]['system'] | |||||
maxOutlierScorePerSystemList.append(np.max(np.abs(systemDf.iloc[:,0].values))) | |||||
ranking = np.sort(maxOutlierScorePerSystemList) | ranking = np.sort(maxOutlierScorePerSystemList) | ||||
threshold = ranking[int((1 - contamination) * len(ranking))] | threshold = ranking[int((1 - contamination) * len(ranking))] | ||||
self.threshold = threshold | self.threshold = threshold | ||||
mask = (maxOutlierScorePerSystemList >= threshold) | |||||
mask = (maxOutlierScorePerSystemList > threshold) | |||||
ranking[mask] = 1 | ranking[mask] = 1 | ||||
ranking[np.logical_not(mask)] = 0 | ranking[np.logical_not(mask)] = 0 | ||||
for iter in range(len(systemIds)): | for iter in range(len(systemIds)): | ||||
transformed_X.append([systemIds[iter],ranking[iter]]) | |||||
transformed_X.append(ranking[iter]) | |||||
if (method_type == "avg"): | if (method_type == "avg"): | ||||
""" | """ | ||||
@@ -348,60 +343,72 @@ class SystemWiseDetectionPrimitive(transformer.TransformerPrimitiveBase[Inputs, | |||||
""" | """ | ||||
avgOutlierScorePerSystemList = [] | avgOutlierScorePerSystemList = [] | ||||
for systemId in systemIds: | for systemId in systemIds: | ||||
systemDf = groupedX.get_group(systemId) | |||||
avgOutlierScorePerSystemList.append(np.mean(np.abs(systemDf["value_0"].values))) | |||||
systemDf = X.iloc[systemId]['system'] | |||||
avgOutlierScorePerSystemList.append(np.mean(np.abs(systemDf.iloc[:,0].values))) | |||||
ranking = np.sort(avgOutlierScorePerSystemList) | ranking = np.sort(avgOutlierScorePerSystemList) | ||||
threshold = ranking[int((1 - contamination) * len(ranking))] | threshold = ranking[int((1 - contamination) * len(ranking))] | ||||
self.threshold = threshold | self.threshold = threshold | ||||
mask = (avgOutlierScorePerSystemList >= threshold) | |||||
mask = (avgOutlierScorePerSystemList > threshold) | |||||
ranking[mask] = 1 | ranking[mask] = 1 | ||||
ranking[np.logical_not(mask)] = 0 | ranking[np.logical_not(mask)] = 0 | ||||
for iter in range(len(systemIds)): | for iter in range(len(systemIds)): | ||||
transformed_X.append([systemIds[iter], ranking[iter]]) | |||||
transformed_X.append( ranking[iter]) | |||||
if (method_type == "sliding_window_sum"): | if (method_type == "sliding_window_sum"): | ||||
""" | """ | ||||
Sytems are sorted based on max of max of reconstruction errors in each window" | |||||
Sytems are sorted based on max of sum of reconstruction errors in each window" | |||||
""" | """ | ||||
OutlierScorePerSystemList = [] | |||||
maxOutlierScorePerSystemList = [] | |||||
for systemId in systemIds: | for systemId in systemIds: | ||||
systemDf = groupedX.get_group(systemId) | |||||
column_value = systemDf["value_0"].values | |||||
column_score = np.zeros(len(column_value)) | |||||
systemDf = X.iloc[systemId]['system'] | |||||
column_value = systemDf.iloc[:,0].values | |||||
column_score = [] | |||||
for iter in range(window_size - 1, len(column_value)): | for iter in range(window_size - 1, len(column_value)): | ||||
sequence = column_value[iter - window_size + 1:iter + 1] | sequence = column_value[iter - window_size + 1:iter + 1] | ||||
column_score[iter] = np.sum(np.abs(sequence)) | |||||
column_score[:window_size - 1] = column_score[window_size - 1] | |||||
OutlierScorePerSystemList.append(column_score.tolist()) | |||||
OutlierScorePerSystemList = np.asarray(OutlierScorePerSystemList) | |||||
column_score.append(np.sum(np.abs(sequence))) | |||||
#column_score[:window_size - 1] = column_score[window_size - 1] | |||||
maxOutlierScorePerSystemList.append(np.max(column_score)) | |||||
#OutlierScorePerSystemList = np.asarray(OutlierScorePerSystemList) | |||||
maxOutlierScorePerSystemList = OutlierScorePerSystemList.max(axis=1).tolist() | |||||
#maxOutlierScorePerSystemList = OutlierScorePerSystemList.max(axis=1).tolist() | |||||
ranking = np.sort(maxOutlierScorePerSystemList) | ranking = np.sort(maxOutlierScorePerSystemList) | ||||
threshold = ranking[int((1 - contamination) * len(ranking))] | threshold = ranking[int((1 - contamination) * len(ranking))] | ||||
self.threshold = threshold | self.threshold = threshold | ||||
mask = (maxOutlierScorePerSystemList >= threshold) | |||||
mask = (maxOutlierScorePerSystemList > threshold) | |||||
ranking[mask] = 1 | ranking[mask] = 1 | ||||
ranking[np.logical_not(mask)] = 0 | ranking[np.logical_not(mask)] = 0 | ||||
for iter in range(len(systemIds)): | for iter in range(len(systemIds)): | ||||
transformed_X.append([systemIds[iter], ranking[iter]]) | |||||
transformed_X.append( ranking[iter]) | |||||
if (method_type == "majority_voting_sliding_window_sum"): | if (method_type == "majority_voting_sliding_window_sum"): | ||||
""" | """ | ||||
Sytem with most vote based on max of sum of reconstruction errors in each window | Sytem with most vote based on max of sum of reconstruction errors in each window | ||||
""" | """ | ||||
OutlierScorePerSystemList = [] | OutlierScorePerSystemList = [] | ||||
max_time_points = 0 | |||||
for systemId in systemIds: | |||||
systemDf = X.iloc[systemId]['system'] | |||||
max_time_points = max(max_time_points,systemDf.shape[0]) | |||||
for systemId in systemIds: | for systemId in systemIds: | ||||
systemDf = groupedX.get_group(systemId) | |||||
column_value = systemDf["value_0"].values | |||||
column_score = np.zeros(len(column_value)) | |||||
column_value = np.zeros(max_time_points) | |||||
systemDf = X.iloc[systemId]['system'] | |||||
column_value_actual = systemDf.iloc[:, 0].values | |||||
column_value[0:len(column_value_actual)] = column_value_actual | |||||
column_value[len(column_value_actual):]= column_value_actual[-1] | |||||
column_score = [] | |||||
for iter in range(window_size - 1, len(column_value)): | for iter in range(window_size - 1, len(column_value)): | ||||
sequence = column_value[iter - window_size + 1:iter + 1] | sequence = column_value[iter - window_size + 1:iter + 1] | ||||
column_score[iter] = np.sum(np.abs(sequence)) | |||||
column_score[:window_size - 1] = column_score[window_size - 1] | |||||
OutlierScorePerSystemList.append(column_score.tolist()) | |||||
column_score.append(np.sum(np.abs(sequence))) | |||||
OutlierScorePerSystemList.append(column_score) | |||||
OutlierScorePerSystemList = np.asarray(OutlierScorePerSystemList) | OutlierScorePerSystemList = np.asarray(OutlierScorePerSystemList) | ||||
OutlierScorePerSystemList = ( | OutlierScorePerSystemList = ( | ||||
OutlierScorePerSystemList == OutlierScorePerSystemList.max(axis=0)[None, :]).astype(int) | OutlierScorePerSystemList == OutlierScorePerSystemList.max(axis=0)[None, :]).astype(int) | ||||
@@ -409,28 +416,39 @@ class SystemWiseDetectionPrimitive(transformer.TransformerPrimitiveBase[Inputs, | |||||
ranking = np.sort(maxOutlierScorePerSystemList) | ranking = np.sort(maxOutlierScorePerSystemList) | ||||
threshold = ranking[int((1 - contamination) * len(ranking))] | threshold = ranking[int((1 - contamination) * len(ranking))] | ||||
self.threshold = threshold | self.threshold = threshold | ||||
mask = (maxOutlierScorePerSystemList >= threshold) | |||||
mask = (maxOutlierScorePerSystemList > threshold) | |||||
ranking[mask] = 1 | ranking[mask] = 1 | ||||
ranking[np.logical_not(mask)] = 0 | ranking[np.logical_not(mask)] = 0 | ||||
for iter in range(len(systemIds)): | for iter in range(len(systemIds)): | ||||
transformed_X.append([systemIds[iter], ranking[iter]]) | |||||
transformed_X.append( ranking[iter]) | |||||
if (method_type == "majority_voting_sliding_window_max"): | if (method_type == "majority_voting_sliding_window_max"): | ||||
""" | """ | ||||
Sytem with most vote based on max of max of reconstruction errors in each window | Sytem with most vote based on max of max of reconstruction errors in each window | ||||
""" | """ | ||||
OutlierScorePerSystemList = [] | OutlierScorePerSystemList = [] | ||||
max_time_points = 0 | |||||
for systemId in systemIds: | for systemId in systemIds: | ||||
systemDf = groupedX.get_group(systemId) | |||||
column_value = systemDf["value_0"].values | |||||
column_score = np.zeros(len(column_value)) | |||||
systemDf = X.iloc[systemId]['system'] | |||||
max_time_points = max(max_time_points, systemDf.shape[0]) | |||||
for systemId in systemIds: | |||||
column_value = np.zeros(max_time_points) | |||||
systemDf = X.iloc[systemId]['system'] | |||||
column_value_actual = systemDf.iloc[:, 0].values | |||||
column_value[0:len(column_value_actual)] = column_value_actual | |||||
column_value[len(column_value_actual):] = column_value_actual[-1] | |||||
column_score = [] | |||||
for iter in range(window_size - 1, len(column_value)): | for iter in range(window_size - 1, len(column_value)): | ||||
sequence = column_value[iter - window_size + 1:iter + 1] | sequence = column_value[iter - window_size + 1:iter + 1] | ||||
column_score[iter] = np.max(np.abs(sequence)) | |||||
column_score[:window_size - 1] = column_score[window_size - 1] | |||||
OutlierScorePerSystemList.append(column_score.tolist()) | |||||
column_score.append(np.max(np.abs(sequence))) | |||||
OutlierScorePerSystemList.append(column_score) | |||||
OutlierScorePerSystemList = np.asarray(OutlierScorePerSystemList) | OutlierScorePerSystemList = np.asarray(OutlierScorePerSystemList) | ||||
OutlierScorePerSystemList = ( | OutlierScorePerSystemList = ( | ||||
OutlierScorePerSystemList == OutlierScorePerSystemList.max(axis=0)[None, :]).astype(int) | OutlierScorePerSystemList == OutlierScorePerSystemList.max(axis=0)[None, :]).astype(int) | ||||
@@ -439,11 +457,11 @@ class SystemWiseDetectionPrimitive(transformer.TransformerPrimitiveBase[Inputs, | |||||
ranking = np.sort(maxOutlierScorePerSystemList) | ranking = np.sort(maxOutlierScorePerSystemList) | ||||
threshold = ranking[int((1 - contamination) * len(ranking))] | threshold = ranking[int((1 - contamination) * len(ranking))] | ||||
self.threshold = threshold | self.threshold = threshold | ||||
mask = (maxOutlierScorePerSystemList >= threshold) | |||||
mask = (maxOutlierScorePerSystemList > threshold) | |||||
ranking[mask] = 1 | ranking[mask] = 1 | ||||
ranking[np.logical_not(mask)] = 0 | ranking[np.logical_not(mask)] = 0 | ||||
for iter in range(len(systemIds)): | for iter in range(len(systemIds)): | ||||
transformed_X.append([systemIds[iter], ranking[iter]]) | |||||
transformed_X.append(ranking[iter]) | |||||
return transformed_X | return transformed_X | ||||
@@ -0,0 +1,455 @@ | |||||
import os | |||||
from typing import Any,Optional,List | |||||
import statsmodels.api as sm | |||||
import numpy as np | |||||
from d3m import container, utils as d3m_utils | |||||
from d3m import utils | |||||
from numpy import ndarray | |||||
from collections import OrderedDict | |||||
from scipy import sparse | |||||
import os | |||||
import numpy | |||||
import typing | |||||
import time | |||||
from d3m import container | |||||
from d3m.primitive_interfaces import base, transformer | |||||
from d3m.container import DataFrame as d3m_dataframe | |||||
from d3m.metadata import hyperparams, params, base as metadata_base | |||||
from d3m.base import utils as base_utils | |||||
import uuid | |||||
from d3m.exceptions import PrimitiveNotFittedError | |||||
__all__ = ('SystemWiseDetectionPrimitive',) | |||||
Inputs = container.DataFrame | |||||
Outputs = container.DataFrame | |||||
class Params(params.Params): | |||||
#to-do : how to make params dynamic | |||||
use_column_names: Optional[Any] | |||||
class Hyperparams(hyperparams.Hyperparams): | |||||
#Tuning Parameter | |||||
#default -1 considers entire time series is considered | |||||
window_size = hyperparams.Hyperparameter(default=10, semantic_types=[ | |||||
'https://metadata.datadrivendiscovery.org/types/TuningParameter', | |||||
], description="Window Size for decomposition") | |||||
method_type = hyperparams.Enumeration( | |||||
values=['max', 'avg', 'sliding_window_sum','majority_voting_sliding_window_sum','majority_voting_sliding_window_max'], | |||||
default='majority_voting_sliding_window_max', | |||||
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], | |||||
description="The type of method used to find anomalous system", | |||||
) | |||||
contamination = hyperparams.Uniform( | |||||
lower=0., | |||||
upper=0.5, | |||||
default=0.1, | |||||
description='The amount of contamination of the data set, i.e. the proportion of outliers in the data set. ', | |||||
semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] | |||||
) | |||||
#control parameter | |||||
use_columns = hyperparams.Set( | |||||
elements=hyperparams.Hyperparameter[int](-1), | |||||
default=(), | |||||
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], | |||||
description="A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.", | |||||
) | |||||
exclude_columns = hyperparams.Set( | |||||
elements=hyperparams.Hyperparameter[int](-1), | |||||
default=(), | |||||
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], | |||||
description="A set of column indices to not operate on. Applicable only if \"use_columns\" is not provided.", | |||||
) | |||||
return_result = hyperparams.Enumeration( | |||||
values=['append', 'replace', 'new'], | |||||
default='new', | |||||
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], | |||||
description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.", | |||||
) | |||||
use_semantic_types = hyperparams.UniformBool( | |||||
default=False, | |||||
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], | |||||
description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe" | |||||
) | |||||
add_index_columns = hyperparams.UniformBool( | |||||
default=False, | |||||
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], | |||||
description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", | |||||
) | |||||
error_on_no_input = hyperparams.UniformBool( | |||||
default=True, | |||||
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], | |||||
description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.", | |||||
) | |||||
return_semantic_type = hyperparams.Enumeration[str]( | |||||
values=['https://metadata.datadrivendiscovery.org/types/Attribute', | |||||
'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute'], | |||||
default='https://metadata.datadrivendiscovery.org/types/Attribute', | |||||
description='Decides what semantic type to attach to generated attributes', | |||||
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] | |||||
) | |||||
class SystemWiseDetectionPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||||
""" | |||||
Primitive to find abs_energy of time series | |||||
""" | |||||
metadata = metadata_base.PrimitiveMetadata({ | |||||
"__author__": "DATA Lab at Texas A&M University", | |||||
'name': 'Sytem_Wise_Anomaly_Detection_Primitive', | |||||
'python_path': 'd3m.primitives.tods.detection_algorithm.system_wise_detection', | |||||
'source': { | |||||
'name': 'DATA Lab at Texas A&M University', | |||||
'contact': 'mailto:khlai037@tamu.edu' | |||||
}, | |||||
"hyperparams_to_tune": ['window_size','method_type','contamination'], | |||||
'version': '0.1.0', | |||||
'algorithm_types': [ | |||||
metadata_base.PrimitiveAlgorithmType.TODS_PRIMITIVE, | |||||
], | |||||
'primitive_family': metadata_base.PrimitiveFamily.ANOMALY_DETECTION, | |||||
'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'Sytem_Wise_Anomaly_Detection_Primitive')), | |||||
}) | |||||
def __init__(self, *, hyperparams: Hyperparams) -> None: | |||||
super().__init__(hyperparams=hyperparams) | |||||
self.primitiveNo = 0 | |||||
def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||||
""" | |||||
Args: | |||||
inputs: Container DataFrame | |||||
timeout: Default | |||||
iterations: Default | |||||
Returns: | |||||
Container DataFrame containing abs_energy of time series | |||||
""" | |||||
self.logger.info('System wise Detection Input Primitive called') | |||||
# Get cols to fit. | |||||
self._fitted = False | |||||
self._training_inputs, self._training_indices = self._get_columns_to_fit(inputs, self.hyperparams) | |||||
self._input_column_names = self._training_inputs.columns | |||||
if len(self._training_indices) > 0: | |||||
# self._clf.fit(self._training_inputs) | |||||
self._fitted = True | |||||
else: | |||||
if self.hyperparams['error_on_no_input']: | |||||
raise RuntimeError("No input columns were selected") | |||||
self.logger.warn("No input columns were selected") | |||||
if not self._fitted: | |||||
raise PrimitiveNotFittedError("Primitive not fitted.") | |||||
system_wise_detection_input = inputs | |||||
if self.hyperparams['use_semantic_types']: | |||||
system_wise_detection_input = inputs.iloc[:, self._training_indices] | |||||
output_columns = [] | |||||
if len(self._training_indices) > 0: | |||||
system_wise_detection_output = self._system_wise_detection(system_wise_detection_input,self.hyperparams["method_type"],self.hyperparams["window_size"],self.hyperparams["contamination"]) | |||||
outputs = system_wise_detection_output | |||||
if sparse.issparse(system_wise_detection_output): | |||||
system_wise_detection_output = system_wise_detection_output.toarray() | |||||
outputs = self._wrap_predictions(inputs, system_wise_detection_output) | |||||
#if len(outputs.columns) == len(self._input_column_names): | |||||
# outputs.columns = self._input_column_names | |||||
output_columns = [outputs] | |||||
else: | |||||
if self.hyperparams['error_on_no_input']: | |||||
raise RuntimeError("No input columns were selected") | |||||
self.logger.warn("No input columns were selected") | |||||
self.logger.info('System wise Detection Primitive returned') | |||||
outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'], | |||||
add_index_columns=self.hyperparams['add_index_columns'], | |||||
inputs=inputs, column_indices=self._training_indices, | |||||
columns_list=output_columns) | |||||
return base.CallResult(outputs) | |||||
@classmethod | |||||
def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): | |||||
""" | |||||
Select columns to fit. | |||||
Args: | |||||
inputs: Container DataFrame | |||||
hyperparams: d3m.metadata.hyperparams.Hyperparams | |||||
Returns: | |||||
list | |||||
""" | |||||
if not hyperparams['use_semantic_types']: | |||||
return inputs, list(range(len(inputs.columns))) | |||||
inputs_metadata = inputs.metadata | |||||
def can_produce_column(column_index: int) -> bool: | |||||
return cls._can_produce_column(inputs_metadata, column_index, hyperparams) | |||||
use_columns = hyperparams['use_columns'] | |||||
exclude_columns = hyperparams['exclude_columns'] | |||||
columns_to_produce, columns_not_to_produce = base_utils.get_columns_to_use(inputs_metadata, | |||||
use_columns=use_columns, | |||||
exclude_columns=exclude_columns, | |||||
can_use_column=can_produce_column) | |||||
return inputs.iloc[:, columns_to_produce], columns_to_produce | |||||
# return columns_to_produce | |||||
@classmethod | |||||
def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, | |||||
hyperparams: Hyperparams) -> bool: | |||||
""" | |||||
Output whether a column can be processed. | |||||
Args: | |||||
inputs_metadata: d3m.metadata.base.DataMetadata | |||||
column_index: int | |||||
Returns: | |||||
bool | |||||
""" | |||||
column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)) | |||||
accepted_structural_types = (int, float, numpy.integer, numpy.float64) | |||||
accepted_semantic_types = set() | |||||
accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/Attribute") | |||||
if not issubclass(column_metadata['structural_type'], accepted_structural_types): | |||||
return False | |||||
semantic_types = set(column_metadata.get('semantic_types', [])) | |||||
return True | |||||
if len(semantic_types) == 0: | |||||
cls.logger.warning("No semantic types found in column metadata") | |||||
return False | |||||
# Making sure all accepted_semantic_types are available in semantic_types | |||||
if len(accepted_semantic_types - semantic_types) == 0: | |||||
return True | |||||
return False | |||||
@classmethod | |||||
def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs], | |||||
target_columns_metadata: List[OrderedDict]) -> metadata_base.DataMetadata: | |||||
""" | |||||
Updata metadata for selected columns. | |||||
Args: | |||||
inputs_metadata: metadata_base.DataMetadata | |||||
outputs: Container Dataframe | |||||
target_columns_metadata: list | |||||
Returns: | |||||
d3m.metadata.base.DataMetadata | |||||
""" | |||||
outputs_metadata = metadata_base.DataMetadata().generate(value=outputs) | |||||
for column_index, column_metadata in enumerate(target_columns_metadata): | |||||
column_metadata.pop("structural_type", None) | |||||
outputs_metadata = outputs_metadata.update_column(column_index, column_metadata) | |||||
return outputs_metadata | |||||
def _wrap_predictions(self, inputs: Inputs, predictions: ndarray) -> Outputs: | |||||
""" | |||||
Wrap predictions into dataframe | |||||
Args: | |||||
inputs: Container Dataframe | |||||
predictions: array-like data (n_samples, n_features) | |||||
Returns: | |||||
Dataframe | |||||
""" | |||||
outputs = d3m_dataframe(predictions, generate_metadata=True) | |||||
target_columns_metadata = self._add_target_columns_metadata(outputs.metadata, self.hyperparams,self.primitiveNo) | |||||
outputs.metadata = self._update_predictions_metadata(inputs.metadata, outputs, target_columns_metadata) | |||||
return outputs | |||||
@classmethod | |||||
def _add_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams, primitiveNo): | |||||
""" | |||||
Add target columns metadata | |||||
Args: | |||||
outputs_metadata: metadata.base.DataMetadata | |||||
hyperparams: d3m.metadata.hyperparams.Hyperparams | |||||
Returns: | |||||
List[OrderedDict] | |||||
""" | |||||
outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] | |||||
target_columns_metadata: List[OrderedDict] = [] | |||||
for column_index in range(outputs_length): | |||||
column_name = "{0}{1}_{2}".format(cls.metadata.query()['name'], primitiveNo, column_index) | |||||
column_metadata = OrderedDict() | |||||
semantic_types = set() | |||||
semantic_types.add(hyperparams["return_semantic_type"]) | |||||
column_metadata['semantic_types'] = list(semantic_types) | |||||
column_metadata["name"] = str(column_name) | |||||
target_columns_metadata.append(column_metadata) | |||||
return target_columns_metadata | |||||
def _write(self, inputs: Inputs): | |||||
inputs.to_csv(str(time.time()) + '.csv') | |||||
def _system_wise_detection(self,X,method_type,window_size,contamination): | |||||
#systemIds = X.system_id.unique() | |||||
systemIds = [int(idx) for idx in X.index] | |||||
#groupedX = X.groupby(X.system_id) | |||||
print(systemIds) | |||||
print(X.iloc[0]) | |||||
systemDf = X.iloc(systemIds[0])['system'] | |||||
print(systemDf) | |||||
exit() | |||||
transformed_X = [] | |||||
if(method_type=="max"): | |||||
""" | |||||
Sytems are sorted based on maximum of reconstruction errors" | |||||
""" | |||||
maxOutlierScorePerSystemList = [] | |||||
for systemId in systemIds: | |||||
systemDf = groupedX.get_group(systemId) | |||||
#systemDf = X[systemId]['system'] | |||||
maxOutlierScorePerSystemList.append(np.max(np.abs(systemDf["value_0"].values))) | |||||
ranking = np.sort(maxOutlierScorePerSystemList) | |||||
threshold = ranking[int((1 - contamination) * len(ranking))] | |||||
self.threshold = threshold | |||||
mask = (maxOutlierScorePerSystemList >= threshold) | |||||
ranking[mask] = 1 | |||||
ranking[np.logical_not(mask)] = 0 | |||||
for iter in range(len(systemIds)): | |||||
transformed_X.append([systemIds[iter],ranking[iter]]) | |||||
if (method_type == "avg"): | |||||
""" | |||||
Sytems are sorted based on average of reconstruction errors" | |||||
""" | |||||
avgOutlierScorePerSystemList = [] | |||||
for systemId in systemIds: | |||||
systemDf = groupedX.get_group(systemId) | |||||
avgOutlierScorePerSystemList.append(np.mean(np.abs(systemDf["value_0"].values))) | |||||
ranking = np.sort(avgOutlierScorePerSystemList) | |||||
threshold = ranking[int((1 - contamination) * len(ranking))] | |||||
self.threshold = threshold | |||||
mask = (avgOutlierScorePerSystemList >= threshold) | |||||
ranking[mask] = 1 | |||||
ranking[np.logical_not(mask)] = 0 | |||||
for iter in range(len(systemIds)): | |||||
transformed_X.append([systemIds[iter], ranking[iter]]) | |||||
if (method_type == "sliding_window_sum"): | |||||
""" | |||||
Sytems are sorted based on max of max of reconstruction errors in each window" | |||||
""" | |||||
OutlierScorePerSystemList = [] | |||||
for systemId in systemIds: | |||||
systemDf = groupedX.get_group(systemId) | |||||
column_value = systemDf["value_0"].values | |||||
column_score = np.zeros(len(column_value)) | |||||
for iter in range(window_size - 1, len(column_value)): | |||||
sequence = column_value[iter - window_size + 1:iter + 1] | |||||
column_score[iter] = np.sum(np.abs(sequence)) | |||||
column_score[:window_size - 1] = column_score[window_size - 1] | |||||
OutlierScorePerSystemList.append(column_score.tolist()) | |||||
OutlierScorePerSystemList = np.asarray(OutlierScorePerSystemList) | |||||
maxOutlierScorePerSystemList = OutlierScorePerSystemList.max(axis=1).tolist() | |||||
ranking = np.sort(maxOutlierScorePerSystemList) | |||||
threshold = ranking[int((1 - contamination) * len(ranking))] | |||||
self.threshold = threshold | |||||
mask = (maxOutlierScorePerSystemList >= threshold) | |||||
ranking[mask] = 1 | |||||
ranking[np.logical_not(mask)] = 0 | |||||
for iter in range(len(systemIds)): | |||||
transformed_X.append([systemIds[iter], ranking[iter]]) | |||||
if (method_type == "majority_voting_sliding_window_sum"): | |||||
""" | |||||
Sytem with most vote based on max of sum of reconstruction errors in each window | |||||
""" | |||||
OutlierScorePerSystemList = [] | |||||
for systemId in systemIds: | |||||
systemDf = groupedX.get_group(systemId) | |||||
column_value = systemDf["value_0"].values | |||||
column_score = np.zeros(len(column_value)) | |||||
for iter in range(window_size - 1, len(column_value)): | |||||
sequence = column_value[iter - window_size + 1:iter + 1] | |||||
column_score[iter] = np.sum(np.abs(sequence)) | |||||
column_score[:window_size - 1] = column_score[window_size - 1] | |||||
OutlierScorePerSystemList.append(column_score.tolist()) | |||||
OutlierScorePerSystemList = np.asarray(OutlierScorePerSystemList) | |||||
OutlierScorePerSystemList = ( | |||||
OutlierScorePerSystemList == OutlierScorePerSystemList.max(axis=0)[None, :]).astype(int) | |||||
maxOutlierScorePerSystemList = OutlierScorePerSystemList.sum(axis=1).tolist() | |||||
ranking = np.sort(maxOutlierScorePerSystemList) | |||||
threshold = ranking[int((1 - contamination) * len(ranking))] | |||||
self.threshold = threshold | |||||
mask = (maxOutlierScorePerSystemList >= threshold) | |||||
ranking[mask] = 1 | |||||
ranking[np.logical_not(mask)] = 0 | |||||
for iter in range(len(systemIds)): | |||||
transformed_X.append([systemIds[iter], ranking[iter]]) | |||||
if (method_type == "majority_voting_sliding_window_max"): | |||||
""" | |||||
Sytem with most vote based on max of max of reconstruction errors in each window | |||||
""" | |||||
OutlierScorePerSystemList = [] | |||||
for systemId in systemIds: | |||||
systemDf = groupedX.get_group(systemId) | |||||
column_value = systemDf["value_0"].values | |||||
column_score = np.zeros(len(column_value)) | |||||
for iter in range(window_size - 1, len(column_value)): | |||||
sequence = column_value[iter - window_size + 1:iter + 1] | |||||
column_score[iter] = np.max(np.abs(sequence)) | |||||
column_score[:window_size - 1] = column_score[window_size - 1] | |||||
OutlierScorePerSystemList.append(column_score.tolist()) | |||||
OutlierScorePerSystemList = np.asarray(OutlierScorePerSystemList) | |||||
OutlierScorePerSystemList = ( | |||||
OutlierScorePerSystemList == OutlierScorePerSystemList.max(axis=0)[None, :]).astype(int) | |||||
maxOutlierScorePerSystemList = OutlierScorePerSystemList.sum(axis=1).tolist() | |||||
ranking = np.sort(maxOutlierScorePerSystemList) | |||||
threshold = ranking[int((1 - contamination) * len(ranking))] | |||||
self.threshold = threshold | |||||
mask = (maxOutlierScorePerSystemList >= threshold) | |||||
ranking[mask] = 1 | |||||
ranking[np.logical_not(mask)] = 0 | |||||
for iter in range(len(systemIds)): | |||||
transformed_X.append([systemIds[iter], ranking[iter]]) | |||||
return transformed_X | |||||
@@ -30,6 +30,7 @@ from d3m.primitive_interfaces.base import CallResult, DockerContainer, Primitive | |||||
# # from d3m.primitive_interfaces.supervised_learning import SupervisedLearnerPrimitiveBase | # # from d3m.primitive_interfaces.supervised_learning import SupervisedLearnerPrimitiveBase | ||||
from d3m.primitive_interfaces.unsupervised_learning import UnsupervisedLearnerPrimitiveBase | from d3m.primitive_interfaces.unsupervised_learning import UnsupervisedLearnerPrimitiveBase | ||||
from ..common.TODSBasePrimitives import TODSUnsupervisedLearnerPrimitiveBase | |||||
from d3m.primitive_interfaces.transformer import TransformerPrimitiveBase | from d3m.primitive_interfaces.transformer import TransformerPrimitiveBase | ||||
from d3m.primitive_interfaces.base import * | from d3m.primitive_interfaces.base import * | ||||
@@ -141,7 +142,10 @@ class Hyperparams_ODBase(hyperparams.Hyperparams): | |||||
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] | semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] | ||||
) | ) | ||||
class UnsupervisedOutlierDetectorBase(UnsupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams]): | |||||
# OutlierDetectorBase.__doc__ = OutlierDetectorBase.__doc__ | |||||
class UnsupervisedOutlierDetectorBase(TODSUnsupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams]): | |||||
""" | """ | ||||
Parameters | Parameters | ||||
---------- | ---------- | ||||
@@ -234,7 +238,7 @@ class UnsupervisedOutlierDetectorBase(UnsupervisedLearnerPrimitiveBase[Inputs, O | |||||
self.right_inds_[self.right_inds_ > len(self._inputs)] = len(self._inputs) | self.right_inds_[self.right_inds_ > len(self._inputs)] = len(self._inputs) | ||||
# print(self.left_inds_, self.right_inds_) | # print(self.left_inds_, self.right_inds_) | ||||
def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: | |||||
def _fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: | |||||
""" | """ | ||||
Fit model with training data. | Fit model with training data. | ||||
Args: | Args: | ||||
@@ -248,6 +252,7 @@ class UnsupervisedOutlierDetectorBase(UnsupervisedLearnerPrimitiveBase[Inputs, O | |||||
if self._fitted: # pragma: no cover | if self._fitted: # pragma: no cover | ||||
return CallResult(None) | return CallResult(None) | ||||
self._training_inputs, self._training_indices = self._get_columns_to_fit(self._inputs, self.hyperparams) | self._training_inputs, self._training_indices = self._get_columns_to_fit(self._inputs, self.hyperparams) | ||||
self._input_column_names = self._training_inputs.columns | self._input_column_names = self._training_inputs.columns | ||||
@@ -271,7 +276,7 @@ class UnsupervisedOutlierDetectorBase(UnsupervisedLearnerPrimitiveBase[Inputs, O | |||||
return CallResult(None) | return CallResult(None) | ||||
def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: | |||||
def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: | |||||
""" | """ | ||||
Process the testing data. | Process the testing data. | ||||
Args: | Args: | ||||
@@ -336,7 +341,7 @@ class UnsupervisedOutlierDetectorBase(UnsupervisedLearnerPrimitiveBase[Inputs, O | |||||
return CallResult(outputs) | return CallResult(outputs) | ||||
def produce_score(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: | |||||
def _produce_score(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: | |||||
""" | """ | ||||
Process the testing data. | Process the testing data. | ||||
Args: | Args: | ||||
@@ -688,3 +693,553 @@ class UnsupervisedOutlierDetectorBase(UnsupervisedLearnerPrimitiveBase[Inputs, O | |||||
# OutlierDetectorBase.__doc__ = OutlierDetectorBase.__doc__ | # OutlierDetectorBase.__doc__ = OutlierDetectorBase.__doc__ | ||||
class UnsupervisedOutlierDetectorBase2(UnsupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams]): | |||||
""" | |||||
Parameters | |||||
---------- | |||||
contamination : float in (0., 0.5), optional (default=0.1) | |||||
The amount of contamination of the data set, i.e. | |||||
the proportion of outliers in the data set. When fitting this is used | |||||
to define the threshold on the decision function. | |||||
Attributes | |||||
---------- | |||||
clf_.decision_scores_ : numpy array of shape (n_samples,) | |||||
The outlier scores of the training data. | |||||
The higher, the more abnormal. Outliers tend to have higher | |||||
scores. This value is available once the detector is | |||||
fitted. | |||||
clf_.threshold_: float within (0, 1) | |||||
For outlier, decision_scores_ more than threshold_. | |||||
For inlier, decision_scores_ less than threshold_. | |||||
clf_.labels_ : int, either 0 or 1 | |||||
The binary labels of the training data. 0 stands for inliers. | |||||
and 1 for outliers/anomalies. It is generated by applying. | |||||
``threshold_`` on ``decision_scores_``. | |||||
left_inds_ : ndarray, | |||||
One of the mapping from decision_score to data. | |||||
For point outlier detection, left_inds_ exactly equals the index of each data point. | |||||
For Collective outlier detection, left_inds_ equals the start index of each subsequence. | |||||
left_inds_ : ndarray, | |||||
One of the mapping from decision_score to data. | |||||
For point outlier detection, left_inds_ exactly equals the index of each data point plus 1. | |||||
For Collective outlier detection, left_inds_ equals the ending index of each subsequence. | |||||
""" | |||||
# probability_score: | |||||
# window_size: int | |||||
# The moving window size. | |||||
__author__ = "DATALAB @Taxes A&M University" | |||||
metadata: metadata_base.PrimitiveMetadata = None | |||||
def __init__(self, *, | |||||
hyperparams: Hyperparams, | |||||
random_seed: int = 0, | |||||
docker_containers: Dict[str, DockerContainer] = None) -> None: | |||||
super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) | |||||
self._clf = None | |||||
self._clf_fit_parameter = {} | |||||
self.primitiveNo = 0 | |||||
self.window_size = hyperparams['window_size'] | |||||
self.step_size = hyperparams['step_size'] | |||||
self.left_inds_ = None | |||||
self.right_inds_ = None | |||||
self._inputs = None | |||||
self._outputs = None | |||||
self._training_inputs = None | |||||
self._training_outputs = None | |||||
self._target_names = None | |||||
self._training_indices = None | |||||
self._target_column_indices = None | |||||
self._target_columns_metadata: List[OrderedDict] = None | |||||
self._input_column_names = None | |||||
self._fitted = False | |||||
# | |||||
@abc.abstractmethod | |||||
def set_training_data(self, *, inputs: Inputs) -> None: | |||||
""" | |||||
Set training data for outlier detection. | |||||
Args: | |||||
inputs: Container DataFrame | |||||
Returns: | |||||
None | |||||
""" | |||||
self._inputs = inputs | |||||
self._fitted = False | |||||
def _set_subseq_inds(self): | |||||
self.left_inds_ = getattr(self._clf, 'left_inds_', None) | |||||
self.right_inds_ = getattr(self._clf, 'right_inds_', None) | |||||
if self.left_inds_ is None or self.right_inds_ is None: | |||||
self.left_inds_ = numpy.arange(0, len(self._inputs), self.step_size) | |||||
self.right_inds_ = self.left_inds_ + self.window_size | |||||
self.right_inds_[self.right_inds_ > len(self._inputs)] = len(self._inputs) | |||||
# print(self.left_inds_, self.right_inds_) | |||||
def _fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: | |||||
""" | |||||
Fit model with training data. | |||||
Args: | |||||
*: Container DataFrame. Time series data up to fit. | |||||
Returns: | |||||
None | |||||
""" | |||||
# print('Fit:', self._clf) | |||||
if self._fitted: # pragma: no cover | |||||
return CallResult(None) | |||||
print(self._inputs) | |||||
self._training_inputs, self._training_indices = self._get_columns_to_fit(self._inputs, self.hyperparams) | |||||
self._input_column_names = self._training_inputs.columns | |||||
print(self._training_inputs, self._training_indices) | |||||
exit() | |||||
if self._training_inputs is None: # pragma: no cover | |||||
return CallResult(None) | |||||
#print("self._training_indices ", self._training_indices) | |||||
if len(self._training_indices) > 0: | |||||
# print('Fit: ', self._clf) | |||||
# print('Fit: ', self._training_inputs.values.shape) | |||||
# print('Fit: ', self._clf.fit(self._training_inputs.values)) | |||||
self._clf.fit(X=self._training_inputs.values, **self._clf_fit_parameter) | |||||
self._fitted = True | |||||
self._set_subseq_inds() | |||||
else: # pragma: no cover | |||||
if self.hyperparams['error_on_no_input']: | |||||
raise RuntimeError("No input columns were selected") | |||||
self.logger.warn("No input columns were selected") | |||||
return CallResult(None) | |||||
def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: | |||||
""" | |||||
Process the testing data. | |||||
Args: | |||||
inputs: Container DataFrame. Time series data up to outlier detection. | |||||
Returns: | |||||
Container DataFrame | |||||
1 marks Outliers, 0 marks normal. | |||||
""" | |||||
if not self._fitted: # pragma: no cover | |||||
raise PrimitiveNotFittedError("Primitive not fitted.") | |||||
sk_inputs = inputs | |||||
if self.hyperparams['use_semantic_types']: | |||||
sk_inputs = inputs.iloc[:, self._training_indices] | |||||
output_columns = [] | |||||
#print("skinputs ", sk_inputs.values) | |||||
if len(self._training_indices) > 0: | |||||
if self.hyperparams['return_subseq_inds']: | |||||
if getattr(self._clf, 'left_inds_', None) is None or getattr(self._clf, 'right_inds_', None) is None: # point OD | |||||
pred_label = self._clf.predict(sk_inputs.values) | |||||
left_inds_ = numpy.arange(0, len(pred_label), self.step_size) | |||||
right_inds_ = left_inds_ + self.window_size | |||||
right_inds_[right_inds_ > len(pred_label)] = len(pred_label) | |||||
else: | |||||
pred_label, left_inds_, right_inds_ = self._clf.predict(sk_inputs.values) | |||||
# print(pred_label.shape, left_inds_.shape, right_inds_.shape) | |||||
# print(pred_label, left_inds_, right_inds_) | |||||
sk_output = numpy.concatenate((numpy.expand_dims(pred_label, axis=1), | |||||
numpy.expand_dims(left_inds_, axis=1), | |||||
numpy.expand_dims(right_inds_, axis=1)), axis=1) | |||||
else: | |||||
if getattr(self._clf, 'left_inds_', None) is None or getattr(self._clf, 'right_inds_', None) is None: # point OD | |||||
sk_output = self._clf.predict(sk_inputs.values) | |||||
else: | |||||
sk_output, _, _ = self._clf.predict(sk_inputs.values) | |||||
#print("sk output ", sk_output) | |||||
if sparse.issparse(sk_output): # pragma: no cover | |||||
sk_output = sk_output.toarray() | |||||
outputs = self._wrap_predictions(inputs, sk_output) | |||||
if len(outputs.columns) == len(self._input_column_names): | |||||
outputs.columns = self._input_column_names | |||||
output_columns = [outputs] | |||||
else: # pragma: no cover | |||||
if self.hyperparams['error_on_no_input']: | |||||
raise RuntimeError("No input columns were selected") | |||||
self.logger.warn("No input columns were selected") | |||||
outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'], | |||||
add_index_columns=self.hyperparams['add_index_columns'], | |||||
inputs=inputs, column_indices=self._training_indices, | |||||
columns_list=output_columns) | |||||
return CallResult(outputs) | |||||
def _produce_score(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: | |||||
""" | |||||
Process the testing data. | |||||
Args: | |||||
inputs: Container DataFrame. Time series data up to outlier detection. | |||||
Returns: | |||||
Container DataFrame | |||||
1 marks Outliers, 0 marks normal. | |||||
""" | |||||
if not self._fitted: # pragma: no cover | |||||
raise PrimitiveNotFittedError("Primitive not fitted.") | |||||
sk_inputs = inputs | |||||
if self.hyperparams['use_semantic_types']: | |||||
sk_inputs = inputs.iloc[:, self._training_indices] | |||||
output_columns = [] | |||||
if len(self._training_indices) > 0: | |||||
if self.hyperparams['return_subseq_inds']: | |||||
if getattr(self._clf, 'left_inds_', None) is None or getattr(self._clf, 'right_inds_', None) is None: # point OD | |||||
pred_score = self._clf.decision_function(sk_inputs.values).ravel() | |||||
left_inds_ = numpy.arange(0, len(pred_score), self.step_size) | |||||
right_inds_ = left_inds_ + self.window_size | |||||
right_inds_[right_inds_ > len(pred_score)] = len(pred_score) | |||||
else: | |||||
pred_score, left_inds_, right_inds_ = self._clf.decision_function(sk_inputs.values) | |||||
# print(pred_score.shape, left_inds_.shape, right_inds_.shape) | |||||
sk_output = numpy.concatenate((numpy.expand_dims(pred_score, axis=1), | |||||
numpy.expand_dims(left_inds_, axis=1), | |||||
numpy.expand_dims(right_inds_, axis=1)), axis=1) | |||||
else: | |||||
if getattr(self._clf, 'left_inds_', None) is None or getattr(self._clf, 'right_inds_', None) is None: # point OD | |||||
sk_output = self._clf.decision_function(sk_inputs.values) | |||||
else: | |||||
sk_output, _, _ = self._clf.decision_function(sk_inputs.values) | |||||
if sparse.issparse(sk_output): # pragma: no cover | |||||
sk_output = sk_output.toarray() | |||||
outputs = self._wrap_predictions(inputs, sk_output) | |||||
if len(outputs.columns) == len(self._input_column_names): | |||||
outputs.columns = self._input_column_names | |||||
output_columns = [outputs] | |||||
else: # pragma: no cover | |||||
if self.hyperparams['error_on_no_input']: | |||||
raise RuntimeError("No input columns were selected") | |||||
self.logger.warn("No input columns were selected") | |||||
outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'], | |||||
add_index_columns=self.hyperparams['add_index_columns'], | |||||
inputs=inputs, column_indices=self._training_indices, | |||||
columns_list=output_columns) | |||||
return CallResult(outputs) | |||||
def get_params(self) -> Params_ODBase: | |||||
""" | |||||
Return parameters. | |||||
Args: | |||||
None | |||||
Returns: | |||||
class Params_ODBase | |||||
""" | |||||
if not self._fitted: | |||||
return Params_ODBase( | |||||
# decision_scores_=None, | |||||
# threshold_=None, | |||||
# labels_=None, | |||||
left_inds_=None, | |||||
right_inds_=None, | |||||
clf_=copy.copy(self._clf), | |||||
# Keep previous | |||||
input_column_names=self._input_column_names, | |||||
training_indices_=self._training_indices, | |||||
target_names_=self._target_names, | |||||
target_column_indices_=self._target_column_indices, | |||||
target_columns_metadata_=self._target_columns_metadata | |||||
) | |||||
return Params_ODBase( | |||||
# decision_scores_=getattr(self._clf, 'decision_scores_', None), | |||||
# threshold_=getattr(self._clf, 'threshold_', None), | |||||
# labels_=getattr(self._clf, 'labels_', None), | |||||
left_inds_=self.left_inds_, # numpy.array(self.left_inds_) | |||||
right_inds_=self.right_inds_, # numpy.array(self.right_inds_) | |||||
clf_=copy.copy(self._clf), | |||||
# Keep previous | |||||
input_column_names=self._input_column_names, | |||||
training_indices_=self._training_indices, | |||||
target_names_=self._target_names, | |||||
target_column_indices_=self._target_column_indices, | |||||
target_columns_metadata_=self._target_columns_metadata | |||||
) | |||||
# pass | |||||
def set_params(self, *, params: Params_ODBase) -> None: | |||||
""" | |||||
Set parameters for outlier detection. | |||||
Args: | |||||
params: class Params_ODBase | |||||
Returns: | |||||
None | |||||
""" | |||||
# self._clf.decision_scores_ = params['decision_scores_'] | |||||
# self._clf.threshold_ = params['threshold_'] | |||||
# self._clf.labels_ = params['labels_'] | |||||
self.left_inds_ = params['left_inds_'] | |||||
self.right_inds_ = params['right_inds_'] | |||||
self._clf = copy.copy(params['clf_']) | |||||
# Keep previous | |||||
self._input_column_names = params['input_column_names'] | |||||
self._training_indices = params['training_indices_'] | |||||
self._target_names = params['target_names_'] | |||||
self._target_column_indices = params['target_column_indices_'] | |||||
self._target_columns_metadata = params['target_columns_metadata_'] | |||||
# if params['decision_scores_'] is not None: | |||||
# self._fitted = True | |||||
# if params['threshold_'] is not None: | |||||
# self._fitted = True | |||||
# if params['labels_'] is not None: | |||||
# self._fitted = True | |||||
if params['left_inds_'] is not None: | |||||
self._fitted = True | |||||
if params['right_inds_'] is not None: | |||||
self._fitted = True | |||||
@classmethod | |||||
def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): # pragma: no cover | |||||
""" | |||||
Select columns to fit. | |||||
Args: | |||||
inputs: Container DataFrame | |||||
hyperparams: d3m.metadata.hyperparams.Hyperparams | |||||
Returns: | |||||
list | |||||
""" | |||||
#print("*******************get columns to fit***********") | |||||
if not hyperparams['use_semantic_types']: | |||||
return inputs, list(range(len(inputs.columns))) | |||||
inputs_metadata = inputs.metadata | |||||
#print("inputs_metadata ", inputs_metadata) | |||||
def can_produce_column(column_index: int) -> bool: | |||||
return cls._can_produce_column(inputs_metadata, column_index, hyperparams) | |||||
columns_to_produce, columns_not_to_produce = base_utils.get_columns_to_use(inputs_metadata, | |||||
use_columns=hyperparams['use_columns'], | |||||
exclude_columns=hyperparams['exclude_columns'], | |||||
can_use_column=can_produce_column) | |||||
#print("columns_to_produce ", columns_to_produce) | |||||
return inputs.iloc[:, columns_to_produce], columns_to_produce | |||||
# return columns_to_produce | |||||
@classmethod | |||||
def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, | |||||
hyperparams: Hyperparams) -> bool: # pragma: no cover | |||||
""" | |||||
Output whether a column can be processed. | |||||
Args: | |||||
inputs_metadata: d3m.metadata.base.DataMetadata | |||||
column_index: int | |||||
Returns: | |||||
bool | |||||
""" | |||||
column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)) | |||||
#print("column metadasta ", ) | |||||
accepted_structural_types = (int, float, numpy.integer, numpy.float64) | |||||
accepted_semantic_types = set() | |||||
accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/Attribute") | |||||
if not issubclass(column_metadata['structural_type'], accepted_structural_types): | |||||
return False | |||||
semantic_types = set(column_metadata.get('semantic_types', [])) | |||||
#print("semantic_types ", column_metadata.get('semantic_types')) | |||||
if len(semantic_types) == 0: | |||||
cls.logger.warning("No semantic types found in column metadata") | |||||
return False | |||||
# Making sure all accepted_semantic_types are available in semantic_types | |||||
if len(accepted_semantic_types - semantic_types) == 0: | |||||
return True | |||||
return False | |||||
@classmethod | |||||
def _get_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams) -> List[OrderedDict]: # pragma: no cover | |||||
""" | |||||
Output metadata of selected columns. | |||||
Args: | |||||
outputs_metadata: metadata_base.DataMetadata | |||||
hyperparams: d3m.metadata.hyperparams.Hyperparams | |||||
Returns: | |||||
d3m.metadata.base.DataMetadata | |||||
""" | |||||
outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] | |||||
target_columns_metadata: List[OrderedDict] = [] | |||||
for column_index in range(outputs_length): | |||||
column_metadata = OrderedDict(outputs_metadata.query_column(column_index)) | |||||
# Update semantic types and prepare it for predicted targets. | |||||
semantic_types = set(column_metadata.get('semantic_types', [])) | |||||
semantic_types_to_remove = set([]) | |||||
add_semantic_types = [] | |||||
add_semantic_types.add(hyperparams["return_semantic_type"]) | |||||
semantic_types = semantic_types - semantic_types_to_remove | |||||
semantic_types = semantic_types.union(add_semantic_types) | |||||
column_metadata['semantic_types'] = list(semantic_types) | |||||
target_columns_metadata.append(column_metadata) | |||||
return target_columns_metadata | |||||
@classmethod | |||||
def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs], | |||||
target_columns_metadata: List[OrderedDict]) -> metadata_base.DataMetadata: # pragma: no cover | |||||
""" | |||||
Updata metadata for selected columns. | |||||
Args: | |||||
inputs_metadata: metadata_base.DataMetadata | |||||
outputs: Container Dataframe | |||||
target_columns_metadata: list | |||||
Returns: | |||||
d3m.metadata.base.DataMetadata | |||||
""" | |||||
outputs_metadata = metadata_base.DataMetadata().generate(value=outputs) | |||||
for column_index, column_metadata in enumerate(target_columns_metadata): | |||||
column_metadata.pop("structural_type", None) | |||||
outputs_metadata = outputs_metadata.update_column(column_index, column_metadata) | |||||
return outputs_metadata | |||||
def _wrap_predictions(self, inputs: Inputs, predictions: ndarray) -> Outputs: # pragma: no cover | |||||
""" | |||||
Wrap predictions into dataframe | |||||
Args: | |||||
inputs: Container Dataframe | |||||
predictions: array-like data (n_samples, n_features) | |||||
Returns: | |||||
Dataframe | |||||
""" | |||||
outputs = d3m_dataframe(predictions, generate_metadata=True) | |||||
# target_columns_metadata = self._copy_inputs_metadata(inputs.metadata, self._training_indices, outputs.metadata, | |||||
# self.hyperparams) | |||||
target_columns_metadata = self._add_target_columns_metadata(outputs.metadata, self.hyperparams, self.primitiveNo) | |||||
outputs.metadata = self._update_predictions_metadata(inputs.metadata, outputs, target_columns_metadata) | |||||
# print(outputs.metadata.to_internal_simple_structure()) | |||||
return outputs | |||||
@classmethod | |||||
def _add_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams, primitiveNo): # pragma: no cover | |||||
""" | |||||
Add target columns metadata | |||||
Args: | |||||
outputs_metadata: metadata.base.DataMetadata | |||||
hyperparams: d3m.metadata.hyperparams.Hyperparams | |||||
Returns: | |||||
List[OrderedDict] | |||||
""" | |||||
outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] | |||||
target_columns_metadata: List[OrderedDict] = [] | |||||
for column_index in range(outputs_length): | |||||
column_name = "{0}{1}_{2}".format(cls.metadata.query()['name'], primitiveNo, column_index) | |||||
column_metadata = OrderedDict() | |||||
semantic_types = set() | |||||
semantic_types.add(hyperparams["return_semantic_type"]) | |||||
column_metadata['semantic_types'] = list(semantic_types) | |||||
column_metadata["name"] = str(column_name) | |||||
target_columns_metadata.append(column_metadata) | |||||
return target_columns_metadata | |||||
@classmethod | |||||
def _copy_inputs_metadata(cls, inputs_metadata: metadata_base.DataMetadata, input_indices: List[int], | |||||
outputs_metadata: metadata_base.DataMetadata, hyperparams): # pragma: no cover | |||||
""" | |||||
Updata metadata for selected columns. | |||||
Args: | |||||
inputs_metadata: metadata.base.DataMetadata | |||||
input_indices: list | |||||
outputs_metadata: metadata.base.DataMetadata | |||||
hyperparams: d3m.metadata.hyperparams.Hyperparams | |||||
Returns: | |||||
d3m.metadata.base.DataMetadata | |||||
""" | |||||
outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] | |||||
target_columns_metadata: List[OrderedDict] = [] | |||||
for column_index in input_indices: | |||||
column_name = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)).get("name") | |||||
if column_name is None: | |||||
column_name = "output_{}".format(column_index) | |||||
column_metadata = OrderedDict(inputs_metadata.query_column(column_index)) | |||||
semantic_types = set(column_metadata.get('semantic_types', [])) | |||||
semantic_types_to_remove = set([]) | |||||
add_semantic_types = set() | |||||
add_semantic_types.add(hyperparams["return_semantic_type"]) | |||||
semantic_types = semantic_types - semantic_types_to_remove | |||||
semantic_types = semantic_types.union(add_semantic_types) | |||||
column_metadata['semantic_types'] = list(semantic_types) | |||||
column_metadata["name"] = str(column_name) | |||||
target_columns_metadata.append(column_metadata) | |||||
# If outputs has more columns than index, add Attribute Type to all remaining | |||||
if outputs_length > len(input_indices): | |||||
for column_index in range(len(input_indices), outputs_length): | |||||
column_metadata = OrderedDict() | |||||
semantic_types = set() | |||||
semantic_types.add(hyperparams["return_semantic_type"]) | |||||
column_name = "output_{}".format(column_index) | |||||
column_metadata["semantic_types"] = list(semantic_types) | |||||
column_metadata["name"] = str(column_name) | |||||
target_columns_metadata.append(column_metadata) | |||||
return target_columns_metadata |
@@ -25,7 +25,7 @@ from d3m.primitive_interfaces import base, transformer | |||||
from d3m.metadata import base as metadata_base, hyperparams | from d3m.metadata import base as metadata_base, hyperparams | ||||
from d3m.metadata import hyperparams, params, base as metadata_base | from d3m.metadata import hyperparams, params, base as metadata_base | ||||
from d3m.primitive_interfaces.base import CallResult, DockerContainer | from d3m.primitive_interfaces.base import CallResult, DockerContainer | ||||
from d3m.primitive_interfaces.unsupervised_learning import UnsupervisedLearnerPrimitiveBase | |||||
from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase | |||||
from statsmodels.tsa.stattools import acf | from statsmodels.tsa.stattools import acf | ||||
@@ -186,7 +186,7 @@ class ACF: | |||||
class AutoCorrelationPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||||
class AutoCorrelationPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||||
""" | """ | ||||
A primitive that performs autocorrelation on a DataFrame | A primitive that performs autocorrelation on a DataFrame | ||||
acf() function documentation: https://www.statsmodels.org/dev/generated/statsmodels.tsa.stattools.acf.html | acf() function documentation: https://www.statsmodels.org/dev/generated/statsmodels.tsa.stattools.acf.html | ||||
@@ -233,26 +233,8 @@ class AutoCorrelationPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outp | |||||
'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'AutocorrelationPrimitive')), | 'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'AutocorrelationPrimitive')), | ||||
}) | }) | ||||
def __init__(self, *, | |||||
hyperparams: Hyperparams, # | |||||
random_seed: int = 0, | |||||
docker_containers: Dict[str, DockerContainer] = None) -> None: | |||||
super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) | |||||
self._clf = ACF(unbiased = hyperparams['unbiased'], | |||||
nlags = hyperparams['nlags'], | |||||
qstat = hyperparams['qstat'], | |||||
fft = hyperparams['fft'], | |||||
alpha = hyperparams['alpha'], | |||||
missing = hyperparams['missing'] | |||||
) | |||||
self.primitiveNo = PrimitiveCount.primitive_no | |||||
PrimitiveCount.primitive_no+=1 | |||||
def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||||
def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||||
""" | """ | ||||
Process the testing data. | Process the testing data. | ||||
Args: | Args: | ||||
@@ -261,6 +243,16 @@ class AutoCorrelationPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outp | |||||
Returns: | Returns: | ||||
Container DataFrame after AutoCorrelation. | Container DataFrame after AutoCorrelation. | ||||
""" | """ | ||||
self._clf = ACF(unbiased = self.hyperparams['unbiased'], | |||||
nlags = self.hyperparams['nlags'], | |||||
qstat = self.hyperparams['qstat'], | |||||
fft = self.hyperparams['fft'], | |||||
alpha = self.hyperparams['alpha'], | |||||
missing = self.hyperparams['missing'] | |||||
) | |||||
self.primitiveNo = PrimitiveCount.primitive_no | |||||
PrimitiveCount.primitive_no+=1 | |||||
# Get cols to fit. | # Get cols to fit. | ||||
self._fitted = False | self._fitted = False | ||||
@@ -20,6 +20,7 @@ from d3m import utils | |||||
from d3m.base import utils as base_utils | from d3m.base import utils as base_utils | ||||
from d3m.exceptions import PrimitiveNotFittedError | from d3m.exceptions import PrimitiveNotFittedError | ||||
from d3m.primitive_interfaces.base import CallResult, DockerContainer | from d3m.primitive_interfaces.base import CallResult, DockerContainer | ||||
from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase | |||||
import os.path | import os.path | ||||
@@ -118,7 +119,7 @@ class Hyperparams(hyperparams.Hyperparams): | |||||
) | ) | ||||
class BKFilterPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||||
class BKFilterPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||||
""" | """ | ||||
Filter a time series using the Baxter-King bandpass filter. | Filter a time series using the Baxter-King bandpass filter. | ||||
@@ -173,7 +174,7 @@ class BKFilterPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hy | |||||
}) | }) | ||||
def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: | |||||
def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: | |||||
""" | """ | ||||
Process the testing data. | Process the testing data. | ||||
Args: | Args: | ||||
@@ -14,6 +14,7 @@ import math | |||||
from scipy.fft import dct | from scipy.fft import dct | ||||
from collections import OrderedDict | from collections import OrderedDict | ||||
from typing import cast, Dict, List, Union, Sequence, Optional, Tuple | from typing import cast, Dict, List, Union, Sequence, Optional, Tuple | ||||
from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase | |||||
from scipy import sparse | from scipy import sparse | ||||
@@ -160,7 +161,7 @@ class DCT: | |||||
class DiscreteCosineTransformPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||||
class DiscreteCosineTransformPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||||
""" | """ | ||||
Compute the 1-D discrete Cosine Transform. | Compute the 1-D discrete Cosine Transform. | ||||
Return the Discrete Cosine Transform of arbitrary type sequence x. | Return the Discrete Cosine Transform of arbitrary type sequence x. | ||||
@@ -242,7 +243,7 @@ class DiscreteCosineTransformPrimitive(transformer.TransformerPrimitiveBase[Inpu | |||||
workers = self.hyperparams['workers'] | workers = self.hyperparams['workers'] | ||||
) | ) | ||||
def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||||
def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||||
""" | """ | ||||
Args: | Args: | ||||
@@ -17,6 +17,7 @@ from typing import cast, Dict, List, Union, Sequence, Optional, Tuple | |||||
from scipy import sparse | from scipy import sparse | ||||
from numpy import ndarray | from numpy import ndarray | ||||
from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase | |||||
__all__ = ('FastFourierTransformPrimitive',) | __all__ = ('FastFourierTransformPrimitive',) | ||||
@@ -157,7 +158,7 @@ class FFT: | |||||
class FastFourierTransformPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||||
class FastFourierTransformPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||||
""" | """ | ||||
Compute the 1-D discrete Fourier Transform. | Compute the 1-D discrete Fourier Transform. | ||||
This function computes the 1-D n-point discrete Fourier Transform (DFT) with the efficient Fast Fourier Transform (FFT) algorithm | This function computes the 1-D n-point discrete Fourier Transform (DFT) with the efficient Fast Fourier Transform (FFT) algorithm | ||||
@@ -232,7 +233,7 @@ class FastFourierTransformPrimitive(transformer.TransformerPrimitiveBase[Inputs, | |||||
workers = self.hyperparams['workers'] | workers = self.hyperparams['workers'] | ||||
) | ) | ||||
def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||||
def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||||
""" | """ | ||||
Args: | Args: | ||||
@@ -21,6 +21,7 @@ from d3m.base import utils as base_utils | |||||
from d3m.exceptions import PrimitiveNotFittedError | from d3m.exceptions import PrimitiveNotFittedError | ||||
from d3m.primitive_interfaces.base import CallResult, DockerContainer | from d3m.primitive_interfaces.base import CallResult, DockerContainer | ||||
from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase | |||||
import statsmodels.api as sm | import statsmodels.api as sm | ||||
@@ -101,7 +102,7 @@ class Hyperparams(hyperparams.Hyperparams): | |||||
) | ) | ||||
class HPFilterPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||||
class HPFilterPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||||
""" | """ | ||||
Filter a time series using the Hodrick-Prescott filter. | Filter a time series using the Hodrick-Prescott filter. | ||||
@@ -150,7 +151,7 @@ class HPFilterPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hy | |||||
}) | }) | ||||
def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: | |||||
def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: | |||||
""" | """ | ||||
Process the testing data. | Process the testing data. | ||||
Args: | Args: | ||||
@@ -15,6 +15,7 @@ import numpy | |||||
from numpy import ndarray | from numpy import ndarray | ||||
import warnings | import warnings | ||||
from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase | |||||
__all__ = ('NonNegativeMatrixFactorizationPrimitive',) | __all__ = ('NonNegativeMatrixFactorizationPrimitive',) | ||||
@@ -211,7 +212,7 @@ class NMF: | |||||
return result | return result | ||||
class NonNegativeMatrixFactorizationPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||||
class NonNegativeMatrixFactorizationPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||||
""" | """ | ||||
Calculates Latent factors of a given matrix of timeseries data | Calculates Latent factors of a given matrix of timeseries data | ||||
@@ -299,7 +300,7 @@ class NonNegativeMatrixFactorizationPrimitive(transformer.TransformerPrimitiveBa | |||||
learning_rate = self.hyperparams['learning_rate'], | learning_rate = self.hyperparams['learning_rate'], | ||||
) | ) | ||||
def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||||
def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||||
assert isinstance(inputs, container.DataFrame), type(dataframe) | assert isinstance(inputs, container.DataFrame), type(dataframe) | ||||
@@ -23,6 +23,7 @@ from d3m.metadata import hyperparams, params, base as metadata_base | |||||
from d3m.base import utils as base_utils | from d3m.base import utils as base_utils | ||||
from d3m.exceptions import PrimitiveNotFittedError | from d3m.exceptions import PrimitiveNotFittedError | ||||
from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase | |||||
__all__ = ('SpectralResidualTransformPrimitive',) | __all__ = ('SpectralResidualTransformPrimitive',) | ||||
@@ -87,7 +88,7 @@ class Hyperparams(hyperparams.Hyperparams): | |||||
class SpectralResidualTransformPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||||
class SpectralResidualTransformPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||||
""" | """ | ||||
Primitive to find Spectral Residual Transform of time series | Primitive to find Spectral Residual Transform of time series | ||||
""" | """ | ||||
@@ -110,7 +111,7 @@ class SpectralResidualTransformPrimitive(transformer.TransformerPrimitiveBase[In | |||||
'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'SpectralResidualTransformPrimitive')), | 'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'SpectralResidualTransformPrimitive')), | ||||
}) | }) | ||||
def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||||
def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||||
""" | """ | ||||
Args: | Args: | ||||
@@ -20,6 +20,7 @@ from d3m.primitive_interfaces import base, transformer | |||||
from d3m.container import DataFrame as d3m_dataframe | from d3m.container import DataFrame as d3m_dataframe | ||||
from d3m.metadata import hyperparams, params, base as metadata_base | from d3m.metadata import hyperparams, params, base as metadata_base | ||||
from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase | |||||
from d3m.base import utils as base_utils | from d3m.base import utils as base_utils | ||||
from d3m.exceptions import PrimitiveNotFittedError | from d3m.exceptions import PrimitiveNotFittedError | ||||
@@ -87,7 +88,7 @@ class Hyperparams(hyperparams.Hyperparams): | |||||
class StatisticalAbsEnergyPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||||
class StatisticalAbsEnergyPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||||
""" | """ | ||||
Primitive to find abs_energy of time series | Primitive to find abs_energy of time series | ||||
""" | """ | ||||
@@ -112,7 +113,7 @@ class StatisticalAbsEnergyPrimitive(transformer.TransformerPrimitiveBase[Inputs, | |||||
}) | }) | ||||
def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||||
def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||||
""" | """ | ||||
Args: | Args: | ||||
@@ -23,6 +23,7 @@ from d3m.metadata import hyperparams, params, base as metadata_base | |||||
from d3m.base import utils as base_utils | from d3m.base import utils as base_utils | ||||
from d3m.exceptions import PrimitiveNotFittedError | from d3m.exceptions import PrimitiveNotFittedError | ||||
from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase | |||||
__all__ = ('StatisticalAbsSumPrimitive',) | __all__ = ('StatisticalAbsSumPrimitive',) | ||||
@@ -87,7 +88,7 @@ class Hyperparams(hyperparams.Hyperparams): | |||||
class StatisticalAbsSumPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||||
class StatisticalAbsSumPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||||
""" | """ | ||||
Primitive to find abs_sum of time series | Primitive to find abs_sum of time series | ||||
""" | """ | ||||
@@ -109,7 +110,7 @@ class StatisticalAbsSumPrimitive(transformer.TransformerPrimitiveBase[Inputs, Ou | |||||
'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'StatisticalAbsSumPrimitive')), | 'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'StatisticalAbsSumPrimitive')), | ||||
}) | }) | ||||
def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||||
def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||||
""" | """ | ||||
Args: | Args: | ||||
@@ -24,6 +24,7 @@ from d3m.metadata import hyperparams, params, base as metadata_base | |||||
from d3m.base import utils as base_utils | from d3m.base import utils as base_utils | ||||
from d3m.exceptions import PrimitiveNotFittedError | from d3m.exceptions import PrimitiveNotFittedError | ||||
from d3m.exceptions import UnexpectedValueError | from d3m.exceptions import UnexpectedValueError | ||||
from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase | |||||
__all__ = ('StatisticalGmeanPrimitive',) | __all__ = ('StatisticalGmeanPrimitive',) | ||||
@@ -88,7 +89,7 @@ class Hyperparams(hyperparams.Hyperparams): | |||||
class StatisticalGmeanPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||||
class StatisticalGmeanPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||||
""" | """ | ||||
Primitive to find gmean of time series . | Primitive to find gmean of time series . | ||||
Will only take positive values as inputs . | Will only take positive values as inputs . | ||||
@@ -111,7 +112,7 @@ class StatisticalGmeanPrimitive(transformer.TransformerPrimitiveBase[Inputs, Out | |||||
'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'StatisticalGmeanPrimitive')), | 'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'StatisticalGmeanPrimitive')), | ||||
}) | }) | ||||
def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||||
def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||||
""" | """ | ||||
Args: | Args: | ||||
@@ -24,6 +24,7 @@ from d3m.metadata import hyperparams, params, base as metadata_base | |||||
from d3m.base import utils as base_utils | from d3m.base import utils as base_utils | ||||
from d3m.exceptions import PrimitiveNotFittedError | from d3m.exceptions import PrimitiveNotFittedError | ||||
from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase | |||||
__all__ = ('StatisticalHmeanPrimitive',) | __all__ = ('StatisticalHmeanPrimitive',) | ||||
@@ -88,7 +89,7 @@ class Hyperparams(hyperparams.Hyperparams): | |||||
class StatisticalHmeanPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||||
class StatisticalHmeanPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||||
""" | """ | ||||
Primitive to find Harmonic mean of time series | Primitive to find Harmonic mean of time series | ||||
Harmonic mean only defined if all elements greater than or equal to zero | Harmonic mean only defined if all elements greater than or equal to zero | ||||
@@ -113,7 +114,7 @@ class StatisticalHmeanPrimitive(transformer.TransformerPrimitiveBase[Inputs, Out | |||||
'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'StatisticalHmeanPrimitive')), | 'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'StatisticalHmeanPrimitive')), | ||||
}) | }) | ||||
def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||||
def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||||
""" | """ | ||||
Args: | Args: | ||||
@@ -24,6 +24,7 @@ from d3m.metadata import hyperparams, params, base as metadata_base | |||||
from d3m.base import utils as base_utils | from d3m.base import utils as base_utils | ||||
from d3m.exceptions import PrimitiveNotFittedError | from d3m.exceptions import PrimitiveNotFittedError | ||||
from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase | |||||
__all__ = ('StatisticalKurtosisPrimitive',) | __all__ = ('StatisticalKurtosisPrimitive',) | ||||
@@ -88,7 +89,7 @@ class Hyperparams(hyperparams.Hyperparams): | |||||
class StatisticalKurtosisPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||||
class StatisticalKurtosisPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||||
""" | """ | ||||
Primitive to find kurtosis of time series | Primitive to find kurtosis of time series | ||||
""" | """ | ||||
@@ -110,7 +111,7 @@ class StatisticalKurtosisPrimitive(transformer.TransformerPrimitiveBase[Inputs, | |||||
'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'StatisticalKurtosisPrimitive')), | 'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'StatisticalKurtosisPrimitive')), | ||||
}) | }) | ||||
def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||||
def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||||
""" | """ | ||||
Args: | Args: | ||||
@@ -9,11 +9,11 @@ from numpy import ndarray | |||||
from collections import OrderedDict | from collections import OrderedDict | ||||
from scipy import sparse | from scipy import sparse | ||||
import os | import os | ||||
import uuid | |||||
import numpy | import numpy | ||||
import typing | import typing | ||||
import time | import time | ||||
import uuid | |||||
from d3m import container | from d3m import container | ||||
from d3m.primitive_interfaces import base, transformer | from d3m.primitive_interfaces import base, transformer | ||||
@@ -23,6 +23,7 @@ from d3m.metadata import hyperparams, params, base as metadata_base | |||||
from d3m.base import utils as base_utils | from d3m.base import utils as base_utils | ||||
from d3m.exceptions import PrimitiveNotFittedError | from d3m.exceptions import PrimitiveNotFittedError | ||||
from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase | |||||
__all__ = ('StatisticalMaximumPrimitive',) | __all__ = ('StatisticalMaximumPrimitive',) | ||||
@@ -87,7 +88,7 @@ class Hyperparams(hyperparams.Hyperparams): | |||||
class StatisticalMaximumPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||||
class StatisticalMaximumPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||||
""" | """ | ||||
Primitive to find maximum of time series | Primitive to find maximum of time series | ||||
""" | """ | ||||
@@ -110,7 +111,7 @@ class StatisticalMaximumPrimitive(transformer.TransformerPrimitiveBase[Inputs, O | |||||
'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'StatisticalMaximumPrimitive')), | 'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'StatisticalMaximumPrimitive')), | ||||
}) | }) | ||||
def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||||
def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||||
""" | """ | ||||
Args: | Args: | ||||
@@ -159,11 +160,11 @@ class StatisticalMaximumPrimitive(transformer.TransformerPrimitiveBase[Inputs, O | |||||
if self.hyperparams['error_on_no_input']: | if self.hyperparams['error_on_no_input']: | ||||
raise RuntimeError("No input columns were selected") | raise RuntimeError("No input columns were selected") | ||||
self.logger.warn("No input columns were selected") | self.logger.warn("No input columns were selected") | ||||
outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'], | outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'], | ||||
add_index_columns=self.hyperparams['add_index_columns'], | add_index_columns=self.hyperparams['add_index_columns'], | ||||
inputs=inputs, column_indices=self._training_indices, | inputs=inputs, column_indices=self._training_indices, | ||||
columns_list=output_columns) | columns_list=output_columns) | ||||
self.logger.info('Statistical Maximum Primitive returned') | self.logger.info('Statistical Maximum Primitive returned') | ||||
return base.CallResult(outputs) | return base.CallResult(outputs) | ||||
@@ -314,6 +315,6 @@ class StatisticalMaximumPrimitive(transformer.TransformerPrimitiveBase[Inputs, O | |||||
sequence = column_value[iter-window_size+1:iter+1] | sequence = column_value[iter-window_size+1:iter+1] | ||||
column_maximum[iter] = np.max(sequence) | column_maximum[iter] = np.max(sequence) | ||||
column_maximum[:window_size-1] = column_maximum[window_size-1] | column_maximum[:window_size-1] = column_maximum[window_size-1] | ||||
transformed_X[column + "_maximum"] = column_maximum | |||||
transformed_X[str(column) + "_maximum"] = column_maximum | |||||
return transformed_X | return transformed_X |
@@ -23,6 +23,7 @@ from d3m.metadata import hyperparams, params, base as metadata_base | |||||
from d3m.base import utils as base_utils | from d3m.base import utils as base_utils | ||||
from d3m.exceptions import PrimitiveNotFittedError | from d3m.exceptions import PrimitiveNotFittedError | ||||
from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase | |||||
__all__ = ('StatisticalMeanPrimitive',) | __all__ = ('StatisticalMeanPrimitive',) | ||||
@@ -87,7 +88,7 @@ class Hyperparams(hyperparams.Hyperparams): | |||||
class StatisticalMeanPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||||
class StatisticalMeanPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||||
""" | """ | ||||
Primitive to find mean of time series | Primitive to find mean of time series | ||||
""" | """ | ||||
@@ -110,7 +111,7 @@ class StatisticalMeanPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outp | |||||
'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'StatisticalMeanPrimitive')), | 'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'StatisticalMeanPrimitive')), | ||||
}) | }) | ||||
def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||||
def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||||
""" | """ | ||||
Args: | Args: | ||||
@@ -23,6 +23,7 @@ from d3m.metadata import hyperparams, params, base as metadata_base | |||||
from d3m.base import utils as base_utils | from d3m.base import utils as base_utils | ||||
from d3m.exceptions import PrimitiveNotFittedError | from d3m.exceptions import PrimitiveNotFittedError | ||||
from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase | |||||
__all__ = ('StatisticalMeanAbsPrimitive',) | __all__ = ('StatisticalMeanAbsPrimitive',) | ||||
@@ -87,7 +88,7 @@ class Hyperparams(hyperparams.Hyperparams): | |||||
class StatisticalMeanAbsPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||||
class StatisticalMeanAbsPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||||
""" | """ | ||||
Primitive to find mean_abs of time series | Primitive to find mean_abs of time series | ||||
""" | """ | ||||
@@ -109,7 +110,7 @@ class StatisticalMeanAbsPrimitive(transformer.TransformerPrimitiveBase[Inputs, O | |||||
'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'StatisticalMeanAbsPrimitive')), | 'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'StatisticalMeanAbsPrimitive')), | ||||
}) | }) | ||||
def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||||
def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||||
""" | """ | ||||
Args: | Args: | ||||
@@ -23,6 +23,7 @@ from d3m.metadata import hyperparams, params, base as metadata_base | |||||
from d3m.base import utils as base_utils | from d3m.base import utils as base_utils | ||||
from d3m.exceptions import PrimitiveNotFittedError | from d3m.exceptions import PrimitiveNotFittedError | ||||
from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase | |||||
__all__ = ('StatisticalMeanAbsTemporalDerivativePrimitive',) | __all__ = ('StatisticalMeanAbsTemporalDerivativePrimitive',) | ||||
@@ -87,7 +88,7 @@ class Hyperparams(hyperparams.Hyperparams): | |||||
class StatisticalMeanAbsTemporalDerivativePrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||||
class StatisticalMeanAbsTemporalDerivativePrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||||
""" | """ | ||||
Primitive to find mean_abs_temporal_derivative of time series | Primitive to find mean_abs_temporal_derivative of time series | ||||
""" | """ | ||||
@@ -110,7 +111,7 @@ class StatisticalMeanAbsTemporalDerivativePrimitive(transformer.TransformerPrimi | |||||
'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'StatisticalMeanAbsTemporalDerivativePrimitive')), | 'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'StatisticalMeanAbsTemporalDerivativePrimitive')), | ||||
}) | }) | ||||
def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||||
def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||||
""" | """ | ||||
Args: | Args: | ||||
@@ -23,6 +23,7 @@ from d3m.metadata import hyperparams, params, base as metadata_base | |||||
from d3m.base import utils as base_utils | from d3m.base import utils as base_utils | ||||
from d3m.exceptions import PrimitiveNotFittedError | from d3m.exceptions import PrimitiveNotFittedError | ||||
from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase | |||||
__all__ = ('StatisticalMeanTemporalDerivativePrimitive',) | __all__ = ('StatisticalMeanTemporalDerivativePrimitive',) | ||||
@@ -87,7 +88,7 @@ class Hyperparams(hyperparams.Hyperparams): | |||||
class StatisticalMeanTemporalDerivativePrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||||
class StatisticalMeanTemporalDerivativePrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||||
""" | """ | ||||
Primitive to find mean_temporal_derivative of time series | Primitive to find mean_temporal_derivative of time series | ||||
""" | """ | ||||
@@ -110,7 +111,7 @@ class StatisticalMeanTemporalDerivativePrimitive(transformer.TransformerPrimitiv | |||||
'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'StatisticalMeanTemporalDerivativePrimitive')), | 'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'StatisticalMeanTemporalDerivativePrimitive')), | ||||
}) | }) | ||||
def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||||
def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||||
""" | """ | ||||
Args: | Args: | ||||
@@ -23,6 +23,7 @@ from d3m.metadata import hyperparams, params, base as metadata_base | |||||
from d3m.base import utils as base_utils | from d3m.base import utils as base_utils | ||||
from d3m.exceptions import PrimitiveNotFittedError | from d3m.exceptions import PrimitiveNotFittedError | ||||
from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase | |||||
__all__ = ('StatisticalMedianPrimitive',) | __all__ = ('StatisticalMedianPrimitive',) | ||||
@@ -87,7 +88,7 @@ class Hyperparams(hyperparams.Hyperparams): | |||||
class StatisticalMedianPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||||
class StatisticalMedianPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||||
""" | """ | ||||
Primitive to find median of time series | Primitive to find median of time series | ||||
""" | """ | ||||
@@ -110,7 +111,7 @@ class StatisticalMedianPrimitive(transformer.TransformerPrimitiveBase[Inputs, Ou | |||||
}) | }) | ||||
def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||||
def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||||
""" | """ | ||||
Args: | Args: | ||||
@@ -24,6 +24,7 @@ from d3m.metadata import hyperparams, params, base as metadata_base | |||||
from d3m.base import utils as base_utils | from d3m.base import utils as base_utils | ||||
from d3m.exceptions import PrimitiveNotFittedError | from d3m.exceptions import PrimitiveNotFittedError | ||||
from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase | |||||
__all__ = ('StatisticalMedianAbsoluteDeviationPrimitive',) | __all__ = ('StatisticalMedianAbsoluteDeviationPrimitive',) | ||||
@@ -88,7 +89,7 @@ class Hyperparams(hyperparams.Hyperparams): | |||||
class StatisticalMedianAbsoluteDeviationPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||||
class StatisticalMedianAbsoluteDeviationPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||||
""" | """ | ||||
Primitive to find median_absolute_deviation of time series | Primitive to find median_absolute_deviation of time series | ||||
""" | """ | ||||
@@ -111,7 +112,7 @@ class StatisticalMedianAbsoluteDeviationPrimitive(transformer.TransformerPrimiti | |||||
'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'StatisticalMedianAbsoluteDeviationPrimitive')), | 'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'StatisticalMedianAbsoluteDeviationPrimitive')), | ||||
}) | }) | ||||
def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||||
def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||||
""" | """ | ||||
Args: | Args: | ||||
@@ -23,6 +23,7 @@ from d3m.metadata import hyperparams, params, base as metadata_base | |||||
from d3m.base import utils as base_utils | from d3m.base import utils as base_utils | ||||
from d3m.exceptions import PrimitiveNotFittedError | from d3m.exceptions import PrimitiveNotFittedError | ||||
from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase | |||||
__all__ = ('StatisticalMinimumPrimitive',) | __all__ = ('StatisticalMinimumPrimitive',) | ||||
@@ -87,7 +88,7 @@ class Hyperparams(hyperparams.Hyperparams): | |||||
class StatisticalMinimumPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||||
class StatisticalMinimumPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||||
""" | """ | ||||
Primitive to find minimum of time series | Primitive to find minimum of time series | ||||
""" | """ | ||||
@@ -110,7 +111,7 @@ class StatisticalMinimumPrimitive(transformer.TransformerPrimitiveBase[Inputs, O | |||||
'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'StatisticalMinimumPrimitive')), | 'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'StatisticalMinimumPrimitive')), | ||||
}) | }) | ||||
def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||||
def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||||
""" | """ | ||||
Args: | Args: | ||||
@@ -24,6 +24,7 @@ from d3m.metadata import hyperparams, params, base as metadata_base | |||||
from d3m.base import utils as base_utils | from d3m.base import utils as base_utils | ||||
from d3m.exceptions import PrimitiveNotFittedError | from d3m.exceptions import PrimitiveNotFittedError | ||||
from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase | |||||
__all__ = ('StatisticalSkewPrimitive',) | __all__ = ('StatisticalSkewPrimitive',) | ||||
@@ -88,7 +89,7 @@ class Hyperparams(hyperparams.Hyperparams): | |||||
class StatisticalSkewPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||||
class StatisticalSkewPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||||
""" | """ | ||||
Primitive to find skew of time series | Primitive to find skew of time series | ||||
""" | """ | ||||
@@ -111,7 +112,7 @@ class StatisticalSkewPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outp | |||||
'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'StatisticalSkewPrimitive')), | 'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'StatisticalSkewPrimitive')), | ||||
}) | }) | ||||
def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||||
def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||||
""" | """ | ||||
Args: | Args: | ||||
@@ -23,6 +23,7 @@ from d3m.metadata import hyperparams, params, base as metadata_base | |||||
from d3m.base import utils as base_utils | from d3m.base import utils as base_utils | ||||
from d3m.exceptions import PrimitiveNotFittedError | from d3m.exceptions import PrimitiveNotFittedError | ||||
from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase | |||||
__all__ = ('StatisticalStdPrimitive',) | __all__ = ('StatisticalStdPrimitive',) | ||||
@@ -87,7 +88,7 @@ class Hyperparams(hyperparams.Hyperparams): | |||||
class StatisticalStdPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||||
class StatisticalStdPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||||
""" | """ | ||||
Primitive to find std of time series | Primitive to find std of time series | ||||
""" | """ | ||||
@@ -110,7 +111,7 @@ class StatisticalStdPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outpu | |||||
'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'StatisticalStdPrimitive')), | 'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'StatisticalStdPrimitive')), | ||||
}) | }) | ||||
def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||||
def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||||
""" | """ | ||||
Args: | Args: | ||||
@@ -23,6 +23,7 @@ from d3m.metadata import hyperparams, params, base as metadata_base | |||||
from d3m.base import utils as base_utils | from d3m.base import utils as base_utils | ||||
from d3m.exceptions import PrimitiveNotFittedError | from d3m.exceptions import PrimitiveNotFittedError | ||||
from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase | |||||
__all__ = ('StatisticalVarPrimitive',) | __all__ = ('StatisticalVarPrimitive',) | ||||
@@ -87,7 +88,7 @@ class Hyperparams(hyperparams.Hyperparams): | |||||
class StatisticalVarPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||||
class StatisticalVarPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||||
""" | """ | ||||
Primitive to find var of time series | Primitive to find var of time series | ||||
""" | """ | ||||
@@ -109,7 +110,7 @@ class StatisticalVarPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outpu | |||||
'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'StatisticalVarPrimitive')), | 'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'StatisticalVarPrimitive')), | ||||
}) | }) | ||||
def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||||
def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||||
""" | """ | ||||
Args: | Args: | ||||
@@ -24,6 +24,7 @@ from d3m.metadata import hyperparams, params, base as metadata_base | |||||
from d3m.base import utils as base_utils | from d3m.base import utils as base_utils | ||||
from d3m.exceptions import PrimitiveNotFittedError | from d3m.exceptions import PrimitiveNotFittedError | ||||
from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase | |||||
__all__ = ('StatisticalVariationPrimitive',) | __all__ = ('StatisticalVariationPrimitive',) | ||||
@@ -88,7 +89,7 @@ class Hyperparams(hyperparams.Hyperparams): | |||||
class StatisticalVariationPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||||
class StatisticalVariationPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||||
""" | """ | ||||
Primitive to find variation of time series | Primitive to find variation of time series | ||||
""" | """ | ||||
@@ -112,7 +113,7 @@ class StatisticalVariationPrimitive(transformer.TransformerPrimitiveBase[Inputs, | |||||
}) | }) | ||||
def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||||
def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||||
""" | """ | ||||
Args: | Args: | ||||
@@ -23,6 +23,7 @@ from d3m.metadata import hyperparams, params, base as metadata_base | |||||
from d3m.base import utils as base_utils | from d3m.base import utils as base_utils | ||||
from d3m.exceptions import PrimitiveNotFittedError | from d3m.exceptions import PrimitiveNotFittedError | ||||
from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase | |||||
__all__ = ('StatisticalVecSumPrimitive',) | __all__ = ('StatisticalVecSumPrimitive',) | ||||
@@ -87,7 +88,7 @@ class Hyperparams(hyperparams.Hyperparams): | |||||
class StatisticalVecSumPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||||
class StatisticalVecSumPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||||
""" | """ | ||||
Primitive to find vec_sum of time series | Primitive to find vec_sum of time series | ||||
""" | """ | ||||
@@ -110,7 +111,7 @@ class StatisticalVecSumPrimitive(transformer.TransformerPrimitiveBase[Inputs, Ou | |||||
'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'StatisticalVecSumPrimitive')), | 'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'StatisticalVecSumPrimitive')), | ||||
}) | }) | ||||
def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||||
def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||||
""" | """ | ||||
Args: | Args: | ||||
@@ -23,6 +23,7 @@ from d3m.metadata import hyperparams, params, base as metadata_base | |||||
from d3m.base import utils as base_utils | from d3m.base import utils as base_utils | ||||
from d3m.exceptions import PrimitiveNotFittedError | from d3m.exceptions import PrimitiveNotFittedError | ||||
from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase | |||||
__all__ = ('StatisticalWillisonAmplitudePrimitive',) | __all__ = ('StatisticalWillisonAmplitudePrimitive',) | ||||
@@ -91,7 +92,7 @@ class Hyperparams(hyperparams.Hyperparams): | |||||
class StatisticalWillisonAmplitudePrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||||
class StatisticalWillisonAmplitudePrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||||
""" | """ | ||||
Primitive to find willison amplitude of time series | Primitive to find willison amplitude of time series | ||||
""" | """ | ||||
@@ -114,7 +115,7 @@ class StatisticalWillisonAmplitudePrimitive(transformer.TransformerPrimitiveBase | |||||
'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'StatisticalWillisonAmplitudePrimitive')), | 'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'StatisticalWillisonAmplitudePrimitive')), | ||||
}) | }) | ||||
def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||||
def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||||
""" | """ | ||||
Args: | Args: | ||||
@@ -23,6 +23,7 @@ from d3m.metadata import hyperparams, params, base as metadata_base | |||||
from d3m.base import utils as base_utils | from d3m.base import utils as base_utils | ||||
from d3m.exceptions import PrimitiveNotFittedError | from d3m.exceptions import PrimitiveNotFittedError | ||||
from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase | |||||
__all__ = ('StatisticalZeroCrossingPrimitive',) | __all__ = ('StatisticalZeroCrossingPrimitive',) | ||||
@@ -83,7 +84,7 @@ class Hyperparams(hyperparams.Hyperparams): | |||||
class StatisticalZeroCrossingPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||||
class StatisticalZeroCrossingPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||||
""" | """ | ||||
Primitive to find zero_crossing of time series. A column indicating zero crossing on ith row . 1 indicates crossing 0 is for normal | Primitive to find zero_crossing of time series. A column indicating zero crossing on ith row . 1 indicates crossing 0 is for normal | ||||
""" | """ | ||||
@@ -105,7 +106,7 @@ class StatisticalZeroCrossingPrimitive(transformer.TransformerPrimitiveBase[Inpu | |||||
'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'StatisticalZeroCrossingPrimitive')), | 'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'StatisticalZeroCrossingPrimitive')), | ||||
}) | }) | ||||
def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||||
def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||||
""" | """ | ||||
Args: | Args: | ||||
@@ -22,6 +22,7 @@ from d3m.exceptions import PrimitiveNotFittedError | |||||
from d3m.primitive_interfaces.base import CallResult, DockerContainer | from d3m.primitive_interfaces.base import CallResult, DockerContainer | ||||
from d3m.primitive_interfaces import base, transformer | from d3m.primitive_interfaces import base, transformer | ||||
# from d3m.primitive_interfaces.unsupervised_learning import UnsupervisedLearnerPrimitiveBase | # from d3m.primitive_interfaces.unsupervised_learning import UnsupervisedLearnerPrimitiveBase | ||||
from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase | |||||
Inputs = d3m_dataframe | Inputs = d3m_dataframe | ||||
@@ -161,7 +162,7 @@ class Hyperparams(hyperparams.Hyperparams): | |||||
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] | semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] | ||||
) | ) | ||||
class TRMFPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||||
class TRMFPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||||
"""Temporal Regularized Matrix Factorization. | """Temporal Regularized Matrix Factorization. | ||||
Parameters | Parameters | ||||
@@ -241,7 +242,7 @@ class TRMFPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperp | |||||
}) | }) | ||||
def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: | |||||
def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: | |||||
""" | """ | ||||
Process the testing data. | Process the testing data. | ||||
Args: | Args: | ||||
@@ -19,6 +19,7 @@ from collections import OrderedDict | |||||
from scipy import sparse | from scipy import sparse | ||||
import logging | import logging | ||||
import uuid | import uuid | ||||
from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase | |||||
__all__ = ('WaveletTransformPrimitive',) | __all__ = ('WaveletTransformPrimitive',) | ||||
@@ -148,7 +149,7 @@ class Hyperparams(hyperparams.Hyperparams): | |||||
) | ) | ||||
class WaveletTransformPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||||
class WaveletTransformPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||||
""" | """ | ||||
A primitive of Multilevel 1D Discrete Wavelet Transform of data. | A primitive of Multilevel 1D Discrete Wavelet Transform of data. | ||||
See `PyWavelet documentation <https://pywavelets.readthedocs.io/en/latest/ref/>`_ for details. | See `PyWavelet documentation <https://pywavelets.readthedocs.io/en/latest/ref/>`_ for details. | ||||
@@ -203,7 +204,7 @@ class WaveletTransformPrimitive(transformer.TransformerPrimitiveBase[Inputs, Out | |||||
) | ) | ||||
def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||||
def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||||
""" | """ | ||||
Process the testing data. | Process the testing data. | ||||
Args: | Args: | ||||
@@ -126,7 +126,7 @@ class ABODTest(unittest.TestCase): | |||||
'selector': ['__ALL_ELEMENTS__', 0], | 'selector': ['__ALL_ELEMENTS__', 0], | ||||
'metadata': { | 'metadata': { | ||||
'name': 'Angle-base Outlier Detection Primitive0_0', | 'name': 'Angle-base Outlier Detection Primitive0_0', | ||||
'structural_type': 'numpy.float64', | |||||
'structural_type': 'numpy.int64', | |||||
'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute'] | 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute'] | ||||
}, | }, | ||||
}]) | }]) | ||||
@@ -63,8 +63,6 @@ class HBOSTest(unittest.TestCase): | |||||
primitive.fit() | primitive.fit() | ||||
new_main = primitive.produce(inputs=main).value | new_main = primitive.produce(inputs=main).value | ||||
new_main_score = primitive.produce_score(inputs=main).value | new_main_score = primitive.produce_score(inputs=main).value | ||||
print(new_main) | |||||
print(new_main_score) | |||||
self.assertEqual(utils.to_json_structure(new_main.metadata.to_internal_simple_structure()), [{ | self.assertEqual(utils.to_json_structure(new_main.metadata.to_internal_simple_structure()), [{ | ||||
@@ -125,7 +123,7 @@ class HBOSTest(unittest.TestCase): | |||||
'selector': ['__ALL_ELEMENTS__', 0], | 'selector': ['__ALL_ELEMENTS__', 0], | ||||
'metadata': { | 'metadata': { | ||||
'name': 'HBOS0_0', | 'name': 'HBOS0_0', | ||||
'structural_type': 'numpy.float64', | |||||
'structural_type': 'numpy.int64', | |||||
'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute'] | 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute'] | ||||
}, | }, | ||||
}]) | }]) | ||||
@@ -5,7 +5,7 @@ from d3m.metadata import base as metadata_base | |||||
from tods.detection_algorithm.Telemanom import TelemanomPrimitive | from tods.detection_algorithm.Telemanom import TelemanomPrimitive | ||||
class SODTest(unittest.TestCase): | |||||
class TelemanomTest(unittest.TestCase): | |||||
def test_basic(self): | def test_basic(self): | ||||
self.maxDiff = None | self.maxDiff = None | ||||
main = container.DataFrame({'a': [1., 2., 3., 4.,5,6,7,8,9], 'b': [2., 3., 4., 5.,6,7,8,9,10], 'c': [3., 4., 5., 6.,7,8,9,10,11]}, | main = container.DataFrame({'a': [1., 2., 3., 4.,5,6,7,8,9], 'b': [2., 3., 4., 5.,6,7,8,9,10], 'c': [3., 4., 5., 6.,7,8,9,10,11]}, | ||||
@@ -91,21 +91,21 @@ class SODTest(unittest.TestCase): | |||||
'selector': ['__ALL_ELEMENTS__', 0], | 'selector': ['__ALL_ELEMENTS__', 0], | ||||
'metadata': { | 'metadata': { | ||||
'name': 'Telemanom0_0', | 'name': 'Telemanom0_0', | ||||
'structural_type': 'numpy.float64', | |||||
'structural_type': 'numpy.int64', | |||||
'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute'], | 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute'], | ||||
}, | }, | ||||
}, { | }, { | ||||
'selector': ['__ALL_ELEMENTS__', 1], | 'selector': ['__ALL_ELEMENTS__', 1], | ||||
'metadata': { | 'metadata': { | ||||
'structural_type': 'numpy.float64', | |||||
'name': 'Telemanom0_1', | 'name': 'Telemanom0_1', | ||||
'structural_type': 'numpy.int64', | |||||
'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute'], | 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute'], | ||||
}, | }, | ||||
}, { | }, { | ||||
'selector': ['__ALL_ELEMENTS__', 2], | 'selector': ['__ALL_ELEMENTS__', 2], | ||||
'metadata': { | 'metadata': { | ||||
'structural_type': 'numpy.float64', | |||||
'name': 'Telemanom0_2', | 'name': 'Telemanom0_2', | ||||
'structural_type': 'numpy.int64', | |||||
'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute'], | 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute'], | ||||
} | } | ||||
}]) | }]) | ||||
@@ -66,7 +66,7 @@ class AutoCorrelationTestCase(unittest.TestCase): | |||||
hyperparams_class = AutoCorrelation.AutoCorrelationPrimitive.metadata.get_hyperparams().defaults() | hyperparams_class = AutoCorrelation.AutoCorrelationPrimitive.metadata.get_hyperparams().defaults() | ||||
hyperparams_class = hyperparams_class.replace({'nlags': 2}) | hyperparams_class = hyperparams_class.replace({'nlags': 2}) | ||||
primitive = AutoCorrelation.AutoCorrelationPrimitive(hyperparams=hyperparams_class) | primitive = AutoCorrelation.AutoCorrelationPrimitive(hyperparams=hyperparams_class) | ||||
new_main = primitive.produce(inputs=main).value | |||||
new_main = primitive._produce(inputs=main).value | |||||
print(new_main) | print(new_main) | ||||
# new_main_drop = new_main['value_acf'] | # new_main_drop = new_main['value_acf'] | ||||
@@ -54,7 +54,7 @@ class BKFilterTest(unittest.TestCase): | |||||
hyperparams_class = BKFilter.BKFilterPrimitive.metadata.get_hyperparams() | hyperparams_class = BKFilter.BKFilterPrimitive.metadata.get_hyperparams() | ||||
primitive = BKFilter.BKFilterPrimitive(hyperparams=hyperparams_class.defaults()) | primitive = BKFilter.BKFilterPrimitive(hyperparams=hyperparams_class.defaults()) | ||||
new_main = primitive.produce(inputs=main).value | |||||
new_main = primitive._produce(inputs=main).value | |||||
print(new_main) | print(new_main) | ||||
@@ -66,7 +66,7 @@ class DctTestCase(unittest.TestCase): | |||||
'return_result':'append', | 'return_result':'append', | ||||
}) | }) | ||||
primitive = DiscreteCosineTransform.DiscreteCosineTransformPrimitive(hyperparams=hp) | primitive = DiscreteCosineTransform.DiscreteCosineTransformPrimitive(hyperparams=hp) | ||||
new_main = primitive.produce(inputs=main).value | |||||
new_main = primitive._produce(inputs=main).value | |||||
c = pd.DataFrame({"A":[1,2,3], "B":['a','b','c'],'A_dct_coeff':[1.200000e+01,-3.464102e+00,-4.440892e-16]}) | c = pd.DataFrame({"A":[1,2,3], "B":['a','b','c'],'A_dct_coeff':[1.200000e+01,-3.464102e+00,-4.440892e-16]}) | ||||
@@ -67,7 +67,7 @@ class FftTestCase(unittest.TestCase): | |||||
'return_result':'append', | 'return_result':'append', | ||||
}) | }) | ||||
primitive = FastFourierTransform.FastFourierTransformPrimitive(hyperparams=hp) | primitive = FastFourierTransform.FastFourierTransformPrimitive(hyperparams=hp) | ||||
new_main = primitive.produce(inputs=main).value | |||||
new_main = primitive._produce(inputs=main).value | |||||
c = pd.DataFrame({"A":[1,2,3], "B":['a','b','c'],'A_fft_abs':[6.000000,1.732051,1.732051],'A_fft_phse':[-0.000000,2.617994,-2.617994]}) | c = pd.DataFrame({"A":[1,2,3], "B":['a','b','c'],'A_fft_abs':[6.000000,1.732051,1.732051],'A_fft_phse':[-0.000000,2.617994,-2.617994]}) | ||||
@@ -54,7 +54,7 @@ class HPFilterTest(unittest.TestCase): | |||||
hyperparams_class = HPFilter.HPFilterPrimitive.metadata.get_hyperparams() | hyperparams_class = HPFilter.HPFilterPrimitive.metadata.get_hyperparams() | ||||
primitive = HPFilter.HPFilterPrimitive(hyperparams=hyperparams_class.defaults()) | primitive = HPFilter.HPFilterPrimitive(hyperparams=hyperparams_class.defaults()) | ||||
new_main = primitive.produce(inputs=main).value | |||||
new_main = primitive._produce(inputs=main).value | |||||
print(new_main) | print(new_main) | ||||
@@ -74,7 +74,7 @@ class NmfTestCase(unittest.TestCase): | |||||
'H': b, | 'H': b, | ||||
}) | }) | ||||
primitive = NonNegativeMatrixFactorization.NonNegativeMatrixFactorizationPrimitive(hyperparams=hp) | primitive = NonNegativeMatrixFactorization.NonNegativeMatrixFactorizationPrimitive(hyperparams=hp) | ||||
new_main = primitive.produce(inputs=main).value | |||||
new_main = primitive._produce(inputs=main).value | |||||
print("new_main",new_main) | print("new_main",new_main) | ||||
c = pd.DataFrame({"A":[1,2,3,np.nan,np.nan], "B":[4,5,6,np.nan,np.nan], | c = pd.DataFrame({"A":[1,2,3,np.nan,np.nan], "B":[4,5,6,np.nan,np.nan], | ||||
@@ -55,7 +55,7 @@ class SpectralResidualTransformTestCase(unittest.TestCase): | |||||
primitive = SpectralResidualTransform.SpectralResidualTransformPrimitive(hyperparams=hp) | primitive = SpectralResidualTransform.SpectralResidualTransformPrimitive(hyperparams=hp) | ||||
output_main = primitive.produce(inputs=main).value | |||||
output_main = primitive._produce(inputs=main).value | |||||
expected_output = container.DataFrame( | expected_output = container.DataFrame( | ||||
{'timestamp': [1, 3, 2, 5], 'values': [1.0, 2.0, 3.0, 4.0], 'b': [1.0, 4.0, 5.0, 6.0], | {'timestamp': [1, 3, 2, 5], 'values': [1.0, 2.0, 3.0, 4.0], 'b': [1.0, 4.0, 5.0, 6.0], | ||||
@@ -56,7 +56,7 @@ class StatisticalStdTestCase(unittest.TestCase): | |||||
primitive = StatisticalStd.StatisticalStdPrimitive(hyperparams=hp) | primitive = StatisticalStd.StatisticalStdPrimitive(hyperparams=hp) | ||||
output_main = primitive.produce(inputs=main).value | |||||
output_main = primitive._produce(inputs=main).value | |||||
print(output_main) | print(output_main) | ||||
expected_output = container.DataFrame( | expected_output = container.DataFrame( | ||||
{'timestamp': [1, 3, 2, 5], 'values': [1.0, 2.0, 3.0, 4.0], 'b': [1.0, 4.0, 5.0, 6.0], | {'timestamp': [1, 3, 2, 5], 'values': [1.0, 2.0, 3.0, 4.0], 'b': [1.0, 4.0, 5.0, 6.0], | ||||
@@ -56,7 +56,7 @@ class StatisticalAbsEnergyTestCase(unittest.TestCase): | |||||
primitive = StatisticalAbsEnergy.StatisticalAbsEnergyPrimitive(hyperparams=hp) | primitive = StatisticalAbsEnergy.StatisticalAbsEnergyPrimitive(hyperparams=hp) | ||||
output_main = primitive.produce(inputs=main).value | |||||
output_main = primitive._produce(inputs=main).value | |||||
print(output_main) | print(output_main) | ||||
expected_output = container.DataFrame( | expected_output = container.DataFrame( | ||||
{'timestamp': [1, 3, 2, 5], 'values': [1.0, 2.0, 3.0, 4.0], 'b': [1.0, 4.0, 5.0, 6.0], | {'timestamp': [1, 3, 2, 5], 'values': [1.0, 2.0, 3.0, 4.0], 'b': [1.0, 4.0, 5.0, 6.0], | ||||
@@ -56,7 +56,7 @@ class StatisticalAbsSumTestCase(unittest.TestCase): | |||||
primitive = StatisticalAbsSum.StatisticalAbsSumPrimitive(hyperparams=hp) | primitive = StatisticalAbsSum.StatisticalAbsSumPrimitive(hyperparams=hp) | ||||
output_main = primitive.produce(inputs=main).value | |||||
output_main = primitive._produce(inputs=main).value | |||||
expected_output = container.DataFrame( | expected_output = container.DataFrame( | ||||
{'timestamp': [1, 3, 2, 5], 'values': [1.0, 2.0, 3.0, 4.0], 'b': [1.0, 4.0, -5.0, 6.0], | {'timestamp': [1, 3, 2, 5], 'values': [1.0, 2.0, 3.0, 4.0], 'b': [1.0, 4.0, -5.0, 6.0], | ||||
@@ -56,7 +56,7 @@ class StatisticalGmeanTestCase(unittest.TestCase): | |||||
primitive = StatisticalGmean.StatisticalGmeanPrimitive(hyperparams=hp) | primitive = StatisticalGmean.StatisticalGmeanPrimitive(hyperparams=hp) | ||||
output_main = primitive.produce(inputs=main).value | |||||
output_main = primitive._produce(inputs=main).value | |||||
print(output_main[['values_gmean', 'b_gmean']]) | print(output_main[['values_gmean', 'b_gmean']]) | ||||
expected_output = container.DataFrame( | expected_output = container.DataFrame( | ||||
@@ -56,7 +56,7 @@ class StatisticalHmeanTestCase(unittest.TestCase): | |||||
primitive = StatisticalHmean.StatisticalHmeanPrimitive(hyperparams=hp) | primitive = StatisticalHmean.StatisticalHmeanPrimitive(hyperparams=hp) | ||||
output_main = primitive.produce(inputs=main).value | |||||
output_main = primitive._produce(inputs=main).value | |||||
#print(output_main[['values_hmean', 'b_hmean']]) | #print(output_main[['values_hmean', 'b_hmean']]) | ||||
expected_output = container.DataFrame( | expected_output = container.DataFrame( | ||||
@@ -56,7 +56,7 @@ class StatisticalKurtosisTestCase(unittest.TestCase): | |||||
primitive = StatisticalKurtosis.StatisticalKurtosisPrimitive(hyperparams=hp) | primitive = StatisticalKurtosis.StatisticalKurtosisPrimitive(hyperparams=hp) | ||||
output_main = primitive.produce(inputs=main).value | |||||
output_main = primitive._produce(inputs=main).value | |||||
print(output_main[['values_kurtosis', 'b_kurtosis']]) | print(output_main[['values_kurtosis', 'b_kurtosis']]) | ||||
expected_output = container.DataFrame( | expected_output = container.DataFrame( | ||||
@@ -56,7 +56,7 @@ class StatisticalMaximumTestCase(unittest.TestCase): | |||||
primitive = StatisticalMaximum.StatisticalMaximumPrimitive(hyperparams=hp) | primitive = StatisticalMaximum.StatisticalMaximumPrimitive(hyperparams=hp) | ||||
output_main = primitive.produce(inputs=main).value | |||||
output_main = primitive._produce(inputs=main).value | |||||
print(output_main) | print(output_main) | ||||
expected_output = container.DataFrame( | expected_output = container.DataFrame( | ||||
{'timestamp': [1, 3, 2, 5], 'values': [1.0, 2.0, 3.0, 4.0], 'b': [1.0, 4.0, 5.0, 6.0], | {'timestamp': [1, 3, 2, 5], 'values': [1.0, 2.0, 3.0, 4.0], 'b': [1.0, 4.0, 5.0, 6.0], | ||||
@@ -56,7 +56,7 @@ class StatisticalMeanTestCase(unittest.TestCase): | |||||
primitive = StatisticalMean.StatisticalMeanPrimitive(hyperparams=hp) | primitive = StatisticalMean.StatisticalMeanPrimitive(hyperparams=hp) | ||||
output_main = primitive.produce(inputs=main).value | |||||
output_main = primitive._produce(inputs=main).value | |||||
print(output_main) | print(output_main) | ||||
expected_output = container.DataFrame( | expected_output = container.DataFrame( | ||||
{'timestamp': [1, 3, 2, 5], 'values': [1.0, 2.0, 3.0, 4.0], 'b': [1.0, 4.0, 5.0, 6.0], | {'timestamp': [1, 3, 2, 5], 'values': [1.0, 2.0, 3.0, 4.0], 'b': [1.0, 4.0, 5.0, 6.0], | ||||
@@ -56,7 +56,7 @@ class StatisticalMeanAbsTestCase(unittest.TestCase): | |||||
primitive = StatisticalMeanAbs.StatisticalMeanAbsPrimitive(hyperparams=hp) | primitive = StatisticalMeanAbs.StatisticalMeanAbsPrimitive(hyperparams=hp) | ||||
output_main = primitive.produce(inputs=main).value | |||||
output_main = primitive._produce(inputs=main).value | |||||
print(output_main) | print(output_main) | ||||
expected_output = container.DataFrame( | expected_output = container.DataFrame( | ||||
{'timestamp': [1, 3, 2, 5], 'values': [1.0, 2.0, 3.0, 4.0], 'b': [1.0, 4.0, 5.0, 6.0], | {'timestamp': [1, 3, 2, 5], 'values': [1.0, 2.0, 3.0, 4.0], 'b': [1.0, 4.0, 5.0, 6.0], | ||||
@@ -56,7 +56,7 @@ class StatisticalMeanAbsTemporalDerivativeTestCase(unittest.TestCase): | |||||
primitive = StatisticalMeanAbsTemporalDerivative.StatisticalMeanAbsTemporalDerivativePrimitive(hyperparams=hp) | primitive = StatisticalMeanAbsTemporalDerivative.StatisticalMeanAbsTemporalDerivativePrimitive(hyperparams=hp) | ||||
output_main = primitive.produce(inputs=main).value | |||||
output_main = primitive._produce(inputs=main).value | |||||
print(output_main[['values_mean_abs_temporal_derivative', 'b_mean_abs_temporal_derivative']]) | print(output_main[['values_mean_abs_temporal_derivative', 'b_mean_abs_temporal_derivative']]) | ||||
expected_output = container.DataFrame( | expected_output = container.DataFrame( | ||||
{'timestamp': [1, 3, 2, 5], 'values': [1.0, 2.0, 3.0, 4.0], 'b': [1.0, 4.0, 5.0, 6.0], | {'timestamp': [1, 3, 2, 5], 'values': [1.0, 2.0, 3.0, 4.0], 'b': [1.0, 4.0, 5.0, 6.0], | ||||
@@ -56,7 +56,7 @@ class StatisticalMeanTemporalDerivativeTestCase(unittest.TestCase): | |||||
primitive = StatisticalMeanTemporalDerivative.StatisticalMeanTemporalDerivativePrimitive(hyperparams=hp) | primitive = StatisticalMeanTemporalDerivative.StatisticalMeanTemporalDerivativePrimitive(hyperparams=hp) | ||||
output_main = primitive.produce(inputs=main).value | |||||
output_main = primitive._produce(inputs=main).value | |||||
print(output_main[['values_mean_temporal_derivative', 'b_mean_temporal_derivative']]) | print(output_main[['values_mean_temporal_derivative', 'b_mean_temporal_derivative']]) | ||||
expected_output = container.DataFrame( | expected_output = container.DataFrame( | ||||
{'timestamp': [1, 3, 2, 5], 'values': [1.0, 2.0, 3.0, 4.0], 'b': [1.0, 4.0, 5.0, 6.0], | {'timestamp': [1, 3, 2, 5], 'values': [1.0, 2.0, 3.0, 4.0], 'b': [1.0, 4.0, 5.0, 6.0], | ||||
@@ -56,7 +56,7 @@ class StatisticalMedianTestCase(unittest.TestCase): | |||||
primitive = StatisticalMedian.StatisticalMedianPrimitive(hyperparams=hp) | primitive = StatisticalMedian.StatisticalMedianPrimitive(hyperparams=hp) | ||||
output_main = primitive.produce(inputs=main).value | |||||
output_main = primitive._produce(inputs=main).value | |||||
expected_output = container.DataFrame( | expected_output = container.DataFrame( | ||||
{'timestamp': [1, 3, 2, 5], 'values': [1.0, 2.0, 3.0, 4.0], 'b': [1.0, 4.0, 5.0, 6.0], | {'timestamp': [1, 3, 2, 5], 'values': [1.0, 2.0, 3.0, 4.0], 'b': [1.0, 4.0, 5.0, 6.0], | ||||
@@ -56,7 +56,7 @@ class StatisticalMedianAbsoluteDeviationTestCase(unittest.TestCase): | |||||
primitive = StatisticalMedianAbsoluteDeviation.StatisticalMedianAbsoluteDeviationPrimitive(hyperparams=hp) | primitive = StatisticalMedianAbsoluteDeviation.StatisticalMedianAbsoluteDeviationPrimitive(hyperparams=hp) | ||||
output_main = primitive.produce(inputs=main).value | |||||
output_main = primitive._produce(inputs=main).value | |||||
print(output_main[['values_median_absolute_deviation', 'b_median_absolute_deviation']]) | print(output_main[['values_median_absolute_deviation', 'b_median_absolute_deviation']]) | ||||
expected_output = container.DataFrame( | expected_output = container.DataFrame( | ||||
@@ -56,7 +56,7 @@ class StatisticalMinimumTestCase(unittest.TestCase): | |||||
primitive = StatisticalMinimum.StatisticalMinimumPrimitive(hyperparams=hp) | primitive = StatisticalMinimum.StatisticalMinimumPrimitive(hyperparams=hp) | ||||
output_main = primitive.produce(inputs=main).value | |||||
output_main = primitive._produce(inputs=main).value | |||||
print(output_main) | print(output_main) | ||||
expected_output = container.DataFrame( | expected_output = container.DataFrame( | ||||
{'timestamp': [1, 3, 2, 5], 'values': [1.0, 2.0, 3.0, 4.0], 'b': [1.0, 4.0, 5.0, 6.0], | {'timestamp': [1, 3, 2, 5], 'values': [1.0, 2.0, 3.0, 4.0], 'b': [1.0, 4.0, 5.0, 6.0], | ||||
@@ -56,7 +56,7 @@ class StatisticalSkewTestCase(unittest.TestCase): | |||||
primitive = StatisticalSkew.StatisticalSkewPrimitive(hyperparams=hp) | primitive = StatisticalSkew.StatisticalSkewPrimitive(hyperparams=hp) | ||||
output_main = primitive.produce(inputs=main).value | |||||
output_main = primitive._produce(inputs=main).value | |||||
print(output_main[['values_skew', 'b_skew']]) | print(output_main[['values_skew', 'b_skew']]) | ||||
expected_output = container.DataFrame( | expected_output = container.DataFrame( | ||||
@@ -56,7 +56,7 @@ class StatisticalVarTestCase(unittest.TestCase): | |||||
primitive = StatisticalVar.StatisticalVarPrimitive(hyperparams=hp) | primitive = StatisticalVar.StatisticalVarPrimitive(hyperparams=hp) | ||||
output_main = primitive.produce(inputs=main).value | |||||
output_main = primitive._produce(inputs=main).value | |||||
print(output_main) | print(output_main) | ||||
expected_output = container.DataFrame( | expected_output = container.DataFrame( | ||||
{'timestamp': [1, 3, 2, 5], 'values': [1.0, 2.0, 3.0, 4.0], 'b': [1.0, 4.0, 5.0, 6.0], | {'timestamp': [1, 3, 2, 5], 'values': [1.0, 2.0, 3.0, 4.0], 'b': [1.0, 4.0, 5.0, 6.0], | ||||
@@ -56,7 +56,7 @@ class StatisticalVariationTestCase(unittest.TestCase): | |||||
primitive = StatisticalVariation.StatisticalVariationPrimitive(hyperparams=hp) | primitive = StatisticalVariation.StatisticalVariationPrimitive(hyperparams=hp) | ||||
output_main = primitive.produce(inputs=main).value | |||||
output_main = primitive._produce(inputs=main).value | |||||
print(output_main[['values_variation', 'b_variation']]) | print(output_main[['values_variation', 'b_variation']]) | ||||
expected_output = container.DataFrame( | expected_output = container.DataFrame( | ||||
@@ -56,7 +56,7 @@ class StatisticalVecSumTestCase(unittest.TestCase): | |||||
primitive = StatisticalVecSum.StatisticalVecSumPrimitive(hyperparams=hp) | primitive = StatisticalVecSum.StatisticalVecSumPrimitive(hyperparams=hp) | ||||
output_main = primitive.produce(inputs=main).value | |||||
output_main = primitive._produce(inputs=main).value | |||||
print(output_main) | print(output_main) | ||||
expected_output = container.DataFrame( | expected_output = container.DataFrame( | ||||
{'timestamp': [1, 3, 2, 5], 'values': [1.0, 2.0, 3.0, 4.0], 'b': [1.0, 4.0, -5.0, 6.0], | {'timestamp': [1, 3, 2, 5], 'values': [1.0, 2.0, 3.0, 4.0], 'b': [1.0, 4.0, -5.0, 6.0], | ||||
@@ -57,7 +57,7 @@ class StatisticalWillisonAmplitudeTestCase(unittest.TestCase): | |||||
primitive = StatisticalWillisonAmplitude.StatisticalWillisonAmplitudePrimitive(hyperparams=hp) | primitive = StatisticalWillisonAmplitude.StatisticalWillisonAmplitudePrimitive(hyperparams=hp) | ||||
output_main = primitive.produce(inputs=main).value | |||||
output_main = primitive._produce(inputs=main).value | |||||
print(output_main[['values_willison_amplitude', 'b_willison_amplitude']]) | print(output_main[['values_willison_amplitude', 'b_willison_amplitude']]) | ||||
expected_output = container.DataFrame( | expected_output = container.DataFrame( | ||||
{'timestamp': [1, 3, 2, 5], 'values': [1.0, 2.0, 3.0, 4.0], 'b': [1.0, 4.0, 5.0, 6.0], | {'timestamp': [1, 3, 2, 5], 'values': [1.0, 2.0, 3.0, 4.0], 'b': [1.0, 4.0, 5.0, 6.0], | ||||
@@ -55,7 +55,7 @@ class StatisticalZeroCrossingTestCase(unittest.TestCase): | |||||
primitive = StatisticalZeroCrossing.StatisticalZeroCrossingPrimitive(hyperparams=hp) | primitive = StatisticalZeroCrossing.StatisticalZeroCrossingPrimitive(hyperparams=hp) | ||||
output_main = primitive.produce(inputs=main).value | |||||
output_main = primitive._produce(inputs=main).value | |||||
print(output_main) | print(output_main) | ||||
expected_output = container.DataFrame( | expected_output = container.DataFrame( | ||||
{'timestamp': [1, 3, 2, 5], 'values': [1.0, -2.0, 3.0, 4.0], 'b': [1.0, 4.0, 5.0, 6.0], | {'timestamp': [1, 3, 2, 5], 'values': [1.0, -2.0, 3.0, 4.0], 'b': [1.0, 4.0, 5.0, 6.0], | ||||
@@ -56,7 +56,7 @@ class TRMFTest(unittest.TestCase): | |||||
primitive = TRMF.TRMFPrimitive(hyperparams=hyperparams_class.defaults()) | primitive = TRMF.TRMFPrimitive(hyperparams=hyperparams_class.defaults()) | ||||
# primitive.set_training_data(inputs=main) | # primitive.set_training_data(inputs=main) | ||||
# primitive.fit() | # primitive.fit() | ||||
new_main = primitive.produce(inputs=main).value | |||||
new_main = primitive._produce(inputs=main).value | |||||
print(new_main) | print(new_main) | ||||
@@ -28,7 +28,7 @@ class WaveletTransformerTestCase(unittest.TestCase): | |||||
'return_result': 'new'}) | 'return_result': 'new'}) | ||||
primitive = WaveletTransformPrimitive(hyperparams=hyperparams) | primitive = WaveletTransformPrimitive(hyperparams=hyperparams) | ||||
new_main = primitive.produce(inputs=main).value | |||||
new_main = primitive._produce(inputs=main).value | |||||
# print(new_main) | # print(new_main) | ||||
# print(mean_mse, std_mse) | # print(mean_mse, std_mse) | ||||
@@ -89,7 +89,7 @@ class WaveletTransformerTestCase(unittest.TestCase): | |||||
hyperparams = hyperparams_default.replace({'inverse': 1}) | hyperparams = hyperparams_default.replace({'inverse': 1}) | ||||
primitive = WaveletTransformPrimitive(hyperparams=hyperparams) | primitive = WaveletTransformPrimitive(hyperparams=hyperparams) | ||||
main_recover = primitive.produce(inputs=main).value | |||||
main_recover = primitive._produce(inputs=main).value | |||||
self.assertAlmostEqual(main_recover.values.tolist(), main.values.tolist(), delta=1e-6) | self.assertAlmostEqual(main_recover.values.tolist(), main.values.tolist(), delta=1e-6) | ||||
# print(main.metadata.to_internal_simple_structure()) | # print(main.metadata.to_internal_simple_structure()) | ||||