Browse Source

add base_class for system-wise data

master
lhenry15 4 years ago
parent
commit
c9cfcea65f
73 changed files with 1575 additions and 319 deletions
  1. +133
    -133
      datasets/anomaly/system_wise/sample/train.csv
  2. +1
    -1
      examples/axolotl_interface/example_pipelines/autoencoder_pipeline.json
  3. +4
    -2
      examples/axolotl_interface/example_pipelines/script/build_AutoEncoder_pipeline.py
  4. +16
    -8
      examples/axolotl_interface/example_pipelines/script/build_system_pipeline.py
  5. +1
    -1
      examples/axolotl_interface/example_pipelines/system_pipeline.json
  6. +5
    -5
      examples/axolotl_interface/run_pipeline.py
  7. +2
    -2
      tods/common/CSVReader.py
  8. +200
    -0
      tods/common/TODSBasePrimitives.py
  9. +3
    -3
      tods/detection_algorithm/PyodAE.py
  10. +60
    -42
      tods/detection_algorithm/SystemWiseDetection.py
  11. +455
    -0
      tods/detection_algorithm/SystemWiseDetection_bkup.py
  12. +559
    -4
      tods/detection_algorithm/UODBasePrimitive.py
  13. +13
    -21
      tods/feature_analysis/AutoCorrelation.py
  14. +3
    -2
      tods/feature_analysis/BKFilter.py
  15. +3
    -2
      tods/feature_analysis/DiscreteCosineTransform.py
  16. +3
    -2
      tods/feature_analysis/FastFourierTransform.py
  17. +3
    -2
      tods/feature_analysis/HPFilter.py
  18. +3
    -2
      tods/feature_analysis/NonNegativeMatrixFactorization.py
  19. +3
    -2
      tods/feature_analysis/SpectralResidualTransform.py
  20. +3
    -2
      tods/feature_analysis/StatisticalAbsEnergy.py
  21. +3
    -2
      tods/feature_analysis/StatisticalAbsSum.py
  22. +3
    -2
      tods/feature_analysis/StatisticalGmean.py
  23. +3
    -2
      tods/feature_analysis/StatisticalHmean.py
  24. +3
    -2
      tods/feature_analysis/StatisticalKurtosis.py
  25. +6
    -5
      tods/feature_analysis/StatisticalMaximum.py
  26. +3
    -2
      tods/feature_analysis/StatisticalMean.py
  27. +3
    -2
      tods/feature_analysis/StatisticalMeanAbs.py
  28. +3
    -2
      tods/feature_analysis/StatisticalMeanAbsTemporalDerivative.py
  29. +3
    -2
      tods/feature_analysis/StatisticalMeanTemporalDerivative.py
  30. +3
    -2
      tods/feature_analysis/StatisticalMedian.py
  31. +3
    -2
      tods/feature_analysis/StatisticalMedianAbsoluteDeviation.py
  32. +3
    -2
      tods/feature_analysis/StatisticalMinimum.py
  33. +3
    -2
      tods/feature_analysis/StatisticalSkew.py
  34. +3
    -2
      tods/feature_analysis/StatisticalStd.py
  35. +3
    -2
      tods/feature_analysis/StatisticalVar.py
  36. +3
    -2
      tods/feature_analysis/StatisticalVariation.py
  37. +3
    -2
      tods/feature_analysis/StatisticalVecSum.py
  38. +3
    -2
      tods/feature_analysis/StatisticalWillisonAmplitude.py
  39. +3
    -2
      tods/feature_analysis/StatisticalZeroCrossing.py
  40. +3
    -2
      tods/feature_analysis/TRMF.py
  41. +3
    -2
      tods/feature_analysis/WaveletTransform.py
  42. +1
    -1
      tods/tests/detection_algorithm/test_PyodABOD.py
  43. +1
    -3
      tods/tests/detection_algorithm/test_PyodHBOS.py
  44. +4
    -4
      tods/tests/detection_algorithm/test_Telemanom.py
  45. +1
    -1
      tods/tests/feature_analysis/test_Autocorrelation.py
  46. +1
    -1
      tods/tests/feature_analysis/test_BKFilter.py
  47. +1
    -1
      tods/tests/feature_analysis/test_DiscreteCosineTransform.py
  48. +1
    -1
      tods/tests/feature_analysis/test_FastFourierTransform.py
  49. +1
    -1
      tods/tests/feature_analysis/test_HPFilter.py
  50. +1
    -1
      tods/tests/feature_analysis/test_NonNegativeMatrixFactorization.py
  51. +1
    -1
      tods/tests/feature_analysis/test_SpectralResidualTransform.py
  52. +1
    -1
      tods/tests/feature_analysis/test_StastiticalStd.py
  53. +1
    -1
      tods/tests/feature_analysis/test_StatisticalAbsEnergy.py
  54. +1
    -1
      tods/tests/feature_analysis/test_StatisticalAbsSum.py
  55. +1
    -1
      tods/tests/feature_analysis/test_StatisticalGmean.py
  56. +1
    -1
      tods/tests/feature_analysis/test_StatisticalHmean.py
  57. +1
    -1
      tods/tests/feature_analysis/test_StatisticalKurtosis.py
  58. +1
    -1
      tods/tests/feature_analysis/test_StatisticalMaximum.py
  59. +1
    -1
      tods/tests/feature_analysis/test_StatisticalMean.py
  60. +1
    -1
      tods/tests/feature_analysis/test_StatisticalMeanAbs.py
  61. +1
    -1
      tods/tests/feature_analysis/test_StatisticalMeanAbsTemporalDerivative.py
  62. +1
    -1
      tods/tests/feature_analysis/test_StatisticalMeanTemporalDerivative.py
  63. +1
    -1
      tods/tests/feature_analysis/test_StatisticalMedian.py
  64. +1
    -1
      tods/tests/feature_analysis/test_StatisticalMedianAbsoluteDeviation.py
  65. +1
    -1
      tods/tests/feature_analysis/test_StatisticalMinimum.py
  66. +1
    -1
      tods/tests/feature_analysis/test_StatisticalSkew.py
  67. +1
    -1
      tods/tests/feature_analysis/test_StatisticalVar.py
  68. +1
    -1
      tods/tests/feature_analysis/test_StatisticalVariation.py
  69. +1
    -1
      tods/tests/feature_analysis/test_StatisticalVecSum.py
  70. +1
    -1
      tods/tests/feature_analysis/test_StatisticalWillisonAmplitude.py
  71. +1
    -1
      tods/tests/feature_analysis/test_StatisticalZeroCrossing.py
  72. +1
    -1
      tods/tests/feature_analysis/test_TRMF.py
  73. +2
    -2
      tods/tests/feature_analysis/test_WaveletTransformer.py

+ 133
- 133
datasets/anomaly/system_wise/sample/train.csv View File

@@ -33,72 +33,72 @@ d3mIndex,system,label
31,31.csv,1
32,32.csv,1
33,33.csv,1
34,34.csv,2
35,35.csv,2
36,36.csv,2
37,37.csv,2
38,38.csv,2
39,39.csv,2
40,40.csv,2
41,41.csv,2
42,42.csv,2
43,43.csv,2
44,44.csv,2
45,45.csv,2
46,46.csv,2
47,47.csv,2
48,48.csv,2
49,49.csv,2
50,50.csv,2
51,51.csv,2
52,52.csv,2
53,53.csv,2
54,54.csv,2
55,55.csv,2
56,56.csv,2
57,57.csv,2
58,58.csv,2
59,59.csv,2
60,60.csv,2
61,61.csv,2
62,62.csv,2
63,63.csv,2
64,64.csv,2
65,65.csv,2
66,66.csv,2
67,67.csv,2
68,68.csv,2
69,69.csv,2
70,70.csv,2
71,71.csv,2
72,72.csv,2
73,73.csv,2
74,74.csv,2
75,75.csv,2
76,76.csv,2
77,77.csv,2
78,78.csv,2
79,79.csv,2
80,80.csv,2
81,81.csv,2
82,82.csv,2
83,83.csv,2
84,84.csv,2
85,85.csv,2
86,86.csv,2
87,87.csv,2
88,88.csv,2
89,89.csv,2
90,90.csv,2
91,91.csv,2
92,92.csv,2
93,93.csv,2
94,94.csv,2
95,95.csv,2
96,96.csv,2
97,97.csv,2
98,98.csv,2
99,99.csv,2
34,34.csv,0
35,35.csv,0
36,36.csv,0
37,37.csv,0
38,38.csv,0
39,39.csv,0
40,40.csv,0
41,41.csv,0
42,42.csv,0
43,43.csv,0
44,44.csv,0
45,45.csv,0
46,46.csv,0
47,47.csv,0
48,48.csv,0
49,49.csv,0
50,50.csv,0
51,51.csv,0
52,52.csv,0
53,53.csv,0
54,54.csv,0
55,55.csv,0
56,56.csv,0
57,57.csv,0
58,58.csv,0
59,59.csv,0
60,60.csv,0
61,61.csv,0
62,62.csv,0
63,63.csv,0
64,64.csv,0
65,65.csv,0
66,66.csv,0
67,67.csv,0
68,68.csv,0
69,69.csv,0
70,70.csv,0
71,71.csv,0
72,72.csv,0
73,73.csv,0
74,74.csv,0
75,75.csv,0
76,76.csv,0
77,77.csv,0
78,78.csv,0
79,79.csv,0
80,80.csv,0
81,81.csv,0
82,82.csv,0
83,83.csv,0
84,84.csv,0
85,85.csv,0
86,86.csv,0
87,87.csv,0
88,88.csv,0
89,89.csv,0
90,90.csv,0
91,91.csv,0
92,92.csv,0
93,93.csv,0
94,94.csv,0
95,95.csv,0
96,96.csv,0
97,97.csv,0
98,98.csv,0
99,99.csv,0
100,100.csv,1
101,101.csv,1
102,102.csv,1
@@ -132,70 +132,70 @@ d3mIndex,system,label
130,130.csv,1
131,131.csv,1
132,132.csv,1
133,133.csv,2
134,134.csv,2
135,135.csv,2
136,136.csv,2
137,137.csv,2
138,138.csv,2
139,139.csv,2
140,140.csv,2
141,141.csv,2
142,142.csv,2
143,143.csv,2
144,144.csv,2
145,145.csv,2
146,146.csv,2
147,147.csv,2
148,148.csv,2
149,149.csv,2
150,150.csv,2
151,151.csv,2
152,152.csv,2
153,153.csv,2
154,154.csv,2
155,155.csv,2
156,156.csv,2
157,157.csv,2
158,158.csv,2
159,159.csv,2
160,160.csv,2
161,161.csv,2
162,162.csv,2
163,163.csv,2
164,164.csv,2
165,165.csv,2
166,166.csv,2
167,167.csv,2
168,168.csv,2
169,169.csv,2
170,170.csv,2
171,171.csv,2
172,172.csv,2
173,173.csv,2
174,174.csv,2
175,175.csv,2
176,176.csv,2
177,177.csv,2
178,178.csv,2
179,179.csv,2
180,180.csv,2
181,181.csv,2
182,182.csv,2
183,183.csv,2
184,184.csv,2
185,185.csv,2
186,186.csv,2
187,187.csv,2
188,188.csv,2
189,189.csv,2
190,190.csv,2
191,191.csv,2
192,192.csv,2
193,193.csv,2
194,194.csv,2
195,195.csv,2
196,196.csv,2
197,197.csv,2
198,198.csv,2
199,199.csv,2
133,133.csv,0
134,134.csv,0
135,135.csv,0
136,136.csv,0
137,137.csv,0
138,138.csv,0
139,139.csv,0
140,140.csv,0
141,141.csv,0
142,142.csv,0
143,143.csv,0
144,144.csv,0
145,145.csv,0
146,146.csv,0
147,147.csv,0
148,148.csv,0
149,149.csv,0
150,150.csv,0
151,151.csv,0
152,152.csv,0
153,153.csv,0
154,154.csv,0
155,155.csv,0
156,156.csv,0
157,157.csv,0
158,158.csv,0
159,159.csv,0
160,160.csv,0
161,161.csv,0
162,162.csv,0
163,163.csv,0
164,164.csv,0
165,165.csv,0
166,166.csv,0
167,167.csv,0
168,168.csv,0
169,169.csv,0
170,170.csv,0
171,171.csv,0
172,172.csv,0
173,173.csv,0
174,174.csv,0
175,175.csv,0
176,176.csv,0
177,177.csv,0
178,178.csv,0
179,179.csv,0
180,180.csv,0
181,181.csv,0
182,182.csv,0
183,183.csv,0
184,184.csv,0
185,185.csv,0
186,186.csv,0
187,187.csv,0
188,188.csv,0
189,189.csv,0
190,190.csv,0
191,191.csv,0
192,192.csv,0
193,193.csv,0
194,194.csv,0
195,195.csv,0
196,196.csv,0
197,197.csv,0
198,198.csv,0
199,199.csv,0

+ 1
- 1
examples/axolotl_interface/example_pipelines/autoencoder_pipeline.json View File

@@ -1 +1 @@
{"id": "bfd8aedf-36be-4dad-af8a-c324a03db5f9", "schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json", "created": "2021-02-13T17:02:35.500457Z", "inputs": [{"name": "inputs"}], "outputs": [{"data": "steps.6.produce", "name": "output predictions"}], "steps": [{"type": "PRIMITIVE", "primitive": {"id": "c78138d9-9377-31dc-aee8-83d9df049c60", "version": "0.3.0", "python_path": "d3m.primitives.tods.data_processing.dataset_to_dataframe", "name": "Extract a DataFrame from a Dataset"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "inputs.0"}}, "outputs": [{"id": "produce"}]}, {"type": "PRIMITIVE", "primitive": {"id": "81235c29-aeb9-3828-911a-1b25319b6998", "version": "0.6.0", "python_path": "d3m.primitives.tods.data_processing.column_parser", "name": "Parses strings into their types"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.0.produce"}}, "outputs": [{"id": "produce"}]}, {"type": "PRIMITIVE", "primitive": {"id": "a996cd89-ddf0-367f-8e7f-8c013cbc2891", "version": "0.4.0", "python_path": "d3m.primitives.tods.data_processing.extract_columns_by_semantic_types", "name": "Extracts columns by semantic type"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.1.produce"}}, "outputs": [{"id": "produce"}], "hyperparams": {"semantic_types": {"type": "VALUE", "data": ["https://metadata.datadrivendiscovery.org/types/Attribute"]}}}, {"type": "PRIMITIVE", "primitive": {"id": "a996cd89-ddf0-367f-8e7f-8c013cbc2891", "version": "0.4.0", "python_path": "d3m.primitives.tods.data_processing.extract_columns_by_semantic_types", "name": "Extracts columns by semantic type"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.0.produce"}}, "outputs": [{"id": "produce"}], "hyperparams": {"semantic_types": {"type": "VALUE", "data": ["https://metadata.datadrivendiscovery.org/types/TrueTarget"]}}}, {"type": "PRIMITIVE", "primitive": {"id": "642de2e7-5590-3cab-9266-2a53c326c461", "version": "0.0.1", "python_path": "d3m.primitives.tods.timeseries_processing.transformation.axiswise_scaler", "name": "Axis_wise_scale"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.2.produce"}}, "outputs": [{"id": "produce"}]}, {"type": "PRIMITIVE", "primitive": {"id": "67e7fcdf-d645-3417-9aa4-85cd369487d9", "version": "0.0.1", "python_path": "d3m.primitives.tods.detection_algorithm.pyod_ae", "name": "TODS.anomaly_detection_primitives.AutoEncoder"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.4.produce"}}, "outputs": [{"id": "produce"}]}, {"type": "PRIMITIVE", "primitive": {"id": "2530840a-07d4-3874-b7d8-9eb5e4ae2bf3", "version": "0.3.0", "python_path": "d3m.primitives.tods.data_processing.construct_predictions", "name": "Construct pipeline predictions output"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.5.produce"}, "reference": {"type": "CONTAINER", "data": "steps.1.produce"}}, "outputs": [{"id": "produce"}]}], "digest": "01ad8ccf817150186ca15157a4f02ee1f738582137321a8a5a4a3252832ce555"}
{"id": "924e9a77-da5f-4bcc-b9a0-ed65bbaf87fa", "schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json", "created": "2021-03-11T23:41:13.884494Z", "inputs": [{"name": "inputs"}], "outputs": [{"data": "steps.6.produce", "name": "output predictions"}], "steps": [{"type": "PRIMITIVE", "primitive": {"id": "c78138d9-9377-31dc-aee8-83d9df049c60", "version": "0.3.0", "python_path": "d3m.primitives.tods.data_processing.dataset_to_dataframe", "name": "Extract a DataFrame from a Dataset"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "inputs.0"}}, "outputs": [{"id": "produce"}]}, {"type": "PRIMITIVE", "primitive": {"id": "81235c29-aeb9-3828-911a-1b25319b6998", "version": "0.6.0", "python_path": "d3m.primitives.tods.data_processing.column_parser", "name": "Parses strings into their types"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.0.produce"}}, "outputs": [{"id": "produce"}]}, {"type": "PRIMITIVE", "primitive": {"id": "a996cd89-ddf0-367f-8e7f-8c013cbc2891", "version": "0.4.0", "python_path": "d3m.primitives.tods.data_processing.extract_columns_by_semantic_types", "name": "Extracts columns by semantic type"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.1.produce"}}, "outputs": [{"id": "produce"}], "hyperparams": {"semantic_types": {"type": "VALUE", "data": ["https://metadata.datadrivendiscovery.org/types/Attribute"]}}}, {"type": "PRIMITIVE", "primitive": {"id": "a996cd89-ddf0-367f-8e7f-8c013cbc2891", "version": "0.4.0", "python_path": "d3m.primitives.tods.data_processing.extract_columns_by_semantic_types", "name": "Extracts columns by semantic type"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.0.produce"}}, "outputs": [{"id": "produce"}], "hyperparams": {"semantic_types": {"type": "VALUE", "data": ["https://metadata.datadrivendiscovery.org/types/TrueTarget"]}}}, {"type": "PRIMITIVE", "primitive": {"id": "f07ce875-bbc7-36c5-9cc1-ba4bfb7cf48e", "version": "0.1.0", "python_path": "d3m.primitives.tods.feature_analysis.statistical_maximum", "name": "Time Series Decompostional"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.2.produce"}}, "outputs": [{"id": "produce"}]}, {"type": "PRIMITIVE", "primitive": {"id": "67e7fcdf-d645-3417-9aa4-85cd369487d9", "version": "0.0.1", "python_path": "d3m.primitives.tods.detection_algorithm.pyod_ae", "name": "TODS.anomaly_detection_primitives.AutoEncoder"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.4.produce"}}, "outputs": [{"id": "produce"}]}, {"type": "PRIMITIVE", "primitive": {"id": "2530840a-07d4-3874-b7d8-9eb5e4ae2bf3", "version": "0.3.0", "python_path": "d3m.primitives.tods.data_processing.construct_predictions", "name": "Construct pipeline predictions output"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.5.produce"}, "reference": {"type": "CONTAINER", "data": "steps.1.produce"}}, "outputs": [{"id": "produce"}]}], "digest": "bb1cb5328299d8d65cabc152092da553db267494fb12e6320c66110b2c48a265"}

+ 4
- 2
examples/axolotl_interface/example_pipelines/script/build_AutoEncoder_pipeline.py View File

@@ -41,7 +41,9 @@ attributes = 'steps.2.produce'
targets = 'steps.3.produce'

# Step 4: processing
step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.axiswise_scaler'))
#step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.axiswise_scaler'))
step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_maximum'))
#step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_minimum'))
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes)
step_4.add_output('produce')
pipeline_description.add_step(step_4)
@@ -64,7 +66,7 @@ pipeline_description.add_output(name='output predictions', data_reference='steps

# Output to json
data = pipeline_description.to_json()
with open('example_pipeline.json', 'w') as f:
with open('autoencoder_pipeline.json', 'w') as f:
f.write(data)
print(data)


+ 16
- 8
examples/axolotl_interface/example_pipelines/script/build_system_pipeline.py View File

@@ -57,29 +57,37 @@ attributes = 'steps.4.produce'
targets = 'steps.5.produce'

# Step 6: processing
step_6 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.axiswise_scaler'))
step_6 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_maximum'))
step_6.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes)
step_6.add_output('produce')
pipeline_description.add_step(step_6)

# Step 7: algorithm
step_7 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_ae'))
#step_7 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_ae'))
step_7 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_ocsvm'))
step_7.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.6.produce')
step_7.add_output('produce')
step_7.add_output('produce_score')
pipeline_description.add_step(step_7)

# Step 8: Predictions
step_8 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.construct_predictions'))
step_8.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.7.produce')
step_8.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
#step_8 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.construct_predictions'))
step_8 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.system_wise_detection'))
step_8.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.7.produce_score')
#step_8.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_8.add_output('produce')
pipeline_description.add_step(step_8)

step_9 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.construct_predictions'))
step_9.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.8.produce')
step_9.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_9.add_output('produce')
pipeline_description.add_step(step_9)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.8.produce')
pipeline_description.add_output(name='output predictions', data_reference='steps.9.produce')

# Output to json
data = pipeline_description.to_json()
with open('example_pipeline.json', 'w') as f:
with open('system_pipeline.json', 'w') as f:
f.write(data)
print(data)

+ 1
- 1
examples/axolotl_interface/example_pipelines/system_pipeline.json View File

@@ -1 +1 @@
{"id": "fe8ceeee-a513-45d8-9e28-b46e11f9c635", "schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json", "created": "2021-02-11T21:28:54.508699Z", "inputs": [{"name": "inputs"}], "outputs": [{"data": "steps.8.produce", "name": "output predictions"}], "steps": [{"type": "PRIMITIVE", "primitive": {"id": "f31f8c1f-d1c5-43e5-a4b2-2ae4a761ef2e", "version": "0.2.0", "python_path": "d3m.primitives.tods.common.denormalize", "name": "Denormalize datasets"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "inputs.0"}}, "outputs": [{"id": "produce"}]}, {"type": "PRIMITIVE", "primitive": {"id": "4b42ce1e-9b98-4a25-b68e-fad13311eb65", "version": "0.3.0", "python_path": "d3m.primitives.tods.data_processing.dataset_to_dataframe", "name": "Extract a DataFrame from a Dataset", "digest": "ba00092121d8971b0aa8c1f4b99e97151ca39b44f549eecc03fc61a286567a36"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.0.produce"}}, "outputs": [{"id": "produce"}]}, {"type": "PRIMITIVE", "primitive": {"id": "989562ac-b50f-4462-99cb-abef80d765b2", "version": "0.1.0", "python_path": "d3m.primitives.tods.common.csv_reader", "name": "Columns CSV reader"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.1.produce"}}, "outputs": [{"id": "produce"}], "hyperparams": {"use_columns": {"type": "VALUE", "data": [0, 1]}, "return_result": {"type": "VALUE", "data": "replace"}}}, {"type": "PRIMITIVE", "primitive": {"id": "d510cb7a-1782-4f51-b44c-58f0236e47c7", "version": "0.6.0", "python_path": "d3m.primitives.tods.data_processing.column_parser", "name": "Parses strings into their types", "digest": "ef87bfbd3b35a2d78337c5d3aba9847dfdf56c05c5289e50fe0db766ef8126e0"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.2.produce"}}, "outputs": [{"id": "produce"}], "hyperparams": {"parse_semantic_types": {"type": "VALUE", "data": ["http://schema.org/Boolean", "http://schema.org/Integer", "http://schema.org/Float", "https://metadata.datadrivendiscovery.org/types/FloatVector"]}}}, {"type": "PRIMITIVE", "primitive": {"id": "4503a4c6-42f7-45a1-a1d4-ed69699cf5e1", "version": "0.4.0", "python_path": "d3m.primitives.tods.data_processing.extract_columns_by_semantic_types", "name": "Extracts columns by semantic type", "digest": "35ab3368a69e46da89e4dbb70dab762d4c020c43a9424db622e8ac2ae5c57c06"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.3.produce"}}, "outputs": [{"id": "produce"}], "hyperparams": {"semantic_types": {"type": "VALUE", "data": ["https://metadata.datadrivendiscovery.org/types/Attribute"]}}}, {"type": "PRIMITIVE", "primitive": {"id": "4503a4c6-42f7-45a1-a1d4-ed69699cf5e1", "version": "0.4.0", "python_path": "d3m.primitives.tods.data_processing.extract_columns_by_semantic_types", "name": "Extracts columns by semantic type", "digest": "35ab3368a69e46da89e4dbb70dab762d4c020c43a9424db622e8ac2ae5c57c06"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.3.produce"}}, "outputs": [{"id": "produce"}], "hyperparams": {"semantic_types": {"type": "VALUE", "data": ["https://metadata.datadrivendiscovery.org/types/TrueTarget"]}}}, {"type": "PRIMITIVE", "primitive": {"id": "642de2e7-5590-3cab-9266-2a53c326c461", "version": "0.0.1", "python_path": "d3m.primitives.tods.timeseries_processing.transformation.axiswise_scaler", "name": "Axis_wise_scale"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.4.produce"}}, "outputs": [{"id": "produce"}]}, {"type": "PRIMITIVE", "primitive": {"id": "67e7fcdf-d645-3417-9aa4-85cd369487d9", "version": "0.0.1", "python_path": "d3m.primitives.tods.detection_algorithm.pyod_ae", "name": "TODS.anomaly_detection_primitives.AutoEncoder"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.6.produce"}}, "outputs": [{"id": "produce"}]}, {"type": "PRIMITIVE", "primitive": {"id": "8d38b340-f83f-4877-baaa-162f8e551736", "version": "0.3.0", "python_path": "d3m.primitives.tods.data_processing.construct_predictions", "name": "Construct pipeline predictions output", "digest": "d981f367776ef05d7311b85b86af717a599c7fd363b04db7531bd21ab30a8844"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.7.produce"}, "reference": {"type": "CONTAINER", "data": "steps.1.produce"}}, "outputs": [{"id": "produce"}]}], "digest": "7033f0a107adae468d509f5706a6a79dfcb965d4d5a8d3aef4b79017d33956ed"}
{"id": "f9f918f3-4cd9-4d3c-9a84-8a95b18d3d7c", "schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json", "created": "2021-04-02T20:35:56.617972Z", "inputs": [{"name": "inputs"}], "outputs": [{"data": "steps.9.produce", "name": "output predictions"}], "steps": [{"type": "PRIMITIVE", "primitive": {"id": "f31f8c1f-d1c5-43e5-a4b2-2ae4a761ef2e", "version": "0.2.0", "python_path": "d3m.primitives.tods.common.denormalize", "name": "Denormalize datasets"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "inputs.0"}}, "outputs": [{"id": "produce"}]}, {"type": "PRIMITIVE", "primitive": {"id": "c78138d9-9377-31dc-aee8-83d9df049c60", "version": "0.3.0", "python_path": "d3m.primitives.tods.data_processing.dataset_to_dataframe", "name": "Extract a DataFrame from a Dataset"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.0.produce"}}, "outputs": [{"id": "produce"}]}, {"type": "PRIMITIVE", "primitive": {"id": "989562ac-b50f-4462-99cb-abef80d765b2", "version": "0.1.0", "python_path": "d3m.primitives.tods.common.csv_reader", "name": "Columns CSV reader"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.1.produce"}}, "outputs": [{"id": "produce"}], "hyperparams": {"use_columns": {"type": "VALUE", "data": [0, 1]}, "return_result": {"type": "VALUE", "data": "replace"}}}, {"type": "PRIMITIVE", "primitive": {"id": "81235c29-aeb9-3828-911a-1b25319b6998", "version": "0.6.0", "python_path": "d3m.primitives.tods.data_processing.column_parser", "name": "Parses strings into their types"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.2.produce"}}, "outputs": [{"id": "produce"}], "hyperparams": {"parse_semantic_types": {"type": "VALUE", "data": ["http://schema.org/Boolean", "http://schema.org/Integer", "http://schema.org/Float", "https://metadata.datadrivendiscovery.org/types/FloatVector"]}}}, {"type": "PRIMITIVE", "primitive": {"id": "a996cd89-ddf0-367f-8e7f-8c013cbc2891", "version": "0.4.0", "python_path": "d3m.primitives.tods.data_processing.extract_columns_by_semantic_types", "name": "Extracts columns by semantic type"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.3.produce"}}, "outputs": [{"id": "produce"}], "hyperparams": {"semantic_types": {"type": "VALUE", "data": ["https://metadata.datadrivendiscovery.org/types/Attribute"]}}}, {"type": "PRIMITIVE", "primitive": {"id": "a996cd89-ddf0-367f-8e7f-8c013cbc2891", "version": "0.4.0", "python_path": "d3m.primitives.tods.data_processing.extract_columns_by_semantic_types", "name": "Extracts columns by semantic type"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.3.produce"}}, "outputs": [{"id": "produce"}], "hyperparams": {"semantic_types": {"type": "VALUE", "data": ["https://metadata.datadrivendiscovery.org/types/TrueTarget"]}}}, {"type": "PRIMITIVE", "primitive": {"id": "f07ce875-bbc7-36c5-9cc1-ba4bfb7cf48e", "version": "0.1.0", "python_path": "d3m.primitives.tods.feature_analysis.statistical_maximum", "name": "Time Series Decompostional"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.4.produce"}}, "outputs": [{"id": "produce"}]}, {"type": "PRIMITIVE", "primitive": {"id": "b454adf7-5820-3e6f-8383-619f13fb1cb6", "version": "0.0.1", "python_path": "d3m.primitives.tods.detection_algorithm.pyod_ocsvm", "name": "TODS.anomaly_detection_primitives.OCSVMPrimitive"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.6.produce"}}, "outputs": [{"id": "produce_score"}]}, {"type": "PRIMITIVE", "primitive": {"id": "01d36760-235c-3cdd-95dd-3c682c634c49", "version": "0.1.0", "python_path": "d3m.primitives.tods.detection_algorithm.system_wise_detection", "name": "Sytem_Wise_Anomaly_Detection_Primitive"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.7.produce_score"}}, "outputs": [{"id": "produce"}]}, {"type": "PRIMITIVE", "primitive": {"id": "2530840a-07d4-3874-b7d8-9eb5e4ae2bf3", "version": "0.3.0", "python_path": "d3m.primitives.tods.data_processing.construct_predictions", "name": "Construct pipeline predictions output"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.8.produce"}, "reference": {"type": "CONTAINER", "data": "steps.1.produce"}}, "outputs": [{"id": "produce"}]}], "digest": "c91336de994b3e7089bc3de1728dde5b458c3b9d4ecae7a9c94a26da1219d3f3"}

+ 5
- 5
examples/axolotl_interface/run_pipeline.py View File

@@ -6,19 +6,18 @@ import pandas as pd
from tods import generate_dataset, load_pipeline, evaluate_pipeline

this_path = os.path.dirname(os.path.abspath(__file__))
#table_path = 'datasets/NAB/realTweets/labeled_Twitter_volume_IBM.csv' # The path of the dataset
default_data_path = os.path.join(this_path, '../../datasets/anomaly/raw_data/yahoo_sub_5.csv')

parser = argparse.ArgumentParser(description='Arguments for running predefined pipelin.')
parser.add_argument('--table_path', type=str, default=os.path.join(this_path, '../../datasets/anomaly/raw_data/yahoo_sub_5.csv'),
parser.add_argument('--table_path', type=str, default=default_data_path,
help='Input the path of the input data table')
parser.add_argument('--target_index', type=int, default=6,
help='Index of the ground truth (for evaluation)')
parser.add_argument('--metric',type=str, default='F1_MACRO',
help='Evaluation Metric (F1, F1_MACRO)')
parser.add_argument('--pipeline_path', default=os.path.join(this_path, './example_pipelines/autoencoder_pipeline.json'),
parser.add_argument('--pipeline_path',
default=os.path.join(this_path, './example_pipelines/autoencoder_pipeline.json'),
help='Input the path of the pre-built pipeline description')
# parser.add_argument('--pipeline_path', default=os.path.join(this_path, '../tods/resources/default_pipeline.json'),
# help='Input the path of the pre-built pipeline description')

args = parser.parse_args()

@@ -37,4 +36,5 @@ pipeline = load_pipeline(pipeline_path)
# Run the pipeline
pipeline_result = evaluate_pipeline(dataset, pipeline, metric)
print(pipeline_result)
#raise pipeline_result.error[0]


+ 2
- 2
tods/common/CSVReader.py View File

@@ -68,7 +68,7 @@ class CSVReaderPrimitive(primitives.FileReaderPrimitiveBase): # pragma: no co
# This should be done by primitives later on.
dtype=str,
# We always expect one row header.
header=0,
header=None,
# We want empty strings and not NaNs.
na_filter=False,
encoding='utf8',
@@ -92,7 +92,7 @@ class CSVReaderPrimitive(primitives.FileReaderPrimitiveBase): # pragma: no co
data = container.DataFrame(data, {
'schema': metadata_base.CONTAINER_SCHEMA_VERSION,
'structural_type': container.DataFrame,
}, generate_metadata=False)
}, generate_metadata=True)

assert column_names is not None



+ 200
- 0
tods/common/TODSBasePrimitives.py View File

@@ -0,0 +1,200 @@
import typing
from typing import Any, Callable, List, Dict, Union, Optional, Sequence, Tuple
import logging
import abc

from d3m.primitive_interfaces import generator, transformer
from d3m.primitive_interfaces.base import *
from d3m.primitive_interfaces.unsupervised_learning import UnsupervisedLearnerPrimitiveBase

from d3m.metadata import base as metadata_base, hyperparams, params
from d3m import container
from d3m import utils

__all__ = ('TODSTransformerPrimitiveBase',)

class TODSTransformerPrimitiveBase(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]):
"""
A base class for primitives which are not fitted at all and can
simply produce (useful) outputs from inputs directly. As such they
also do not have any state (params).

This class is parameterized using only three type variables, ``Inputs``,
``Outputs``, and ``Hyperparams``.
"""

def __init__(self, *, hyperparams: Hyperparams) -> None:
super().__init__(hyperparams=hyperparams)

def produce(self, *, inputs: container.DataFrame, timeout: float = None, iterations: int = None) -> CallResult[container.DataFrame]:

is_system = len(inputs.iloc[0, 0].shape) != 0 # check the shape of first row first column, if not a single data entry(,) then it is system-wise data (row, col)
if is_system:
outputs = self._forward(inputs, '_produce')
else:
outputs = self._produce(inputs=inputs)
outputs = outputs.value

return CallResult(outputs)

@abc.abstractmethod
def _produce(self, *, inputs: container.DataFrame, timeout: float = None, iterations: int = None) -> CallResult[container.DataFrame]:
"""
make the predictions
"""
#return CallResult(container.DataFrame)

def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]:
"""
A noop.
"""
return CallResult(None)

def get_params(self) -> None:
"""
A noop.
"""

return None

def set_params(self, *, params: None) -> None:
"""
A noop.
"""

return

def _forward(self, data, method):
"""
General Forward Function to feed system data one-by-one to the primitive
"""
col_name = list(data.columns)[0]
for i, _ in data.iterrows():
sys_data = data.iloc[i][col_name]
produce_func = getattr(self, method, None)
out = produce_func(inputs=sys_data)
data.iloc[i][col_name] = out.value
return data

class TODSUnsupervisedLearnerPrimitiveBase(UnsupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams]):

def __init__(self, *, hyperparams: Hyperparams,
random_seed: int=0,
docker_containers: Dict[str, DockerContainer] = None) -> None:
super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers)

def produce(self, *, inputs: container.DataFrame, timeout: float = None, iterations: int = None) -> CallResult[container.DataFrame]:

is_system = len(inputs.iloc[0, 0].shape) != 0 # check the shape of first row first column, if not a single data entry(,) then it is system-wise data (row, col)
if is_system:
outputs = self._forward(inputs, '_produce')
else:
outputs = self._produce(inputs=inputs)
outputs = outputs.value

return CallResult(outputs)

def produce_score(self, *, inputs: container.DataFrame, timeout: float = None, iterations: int = None) -> CallResult[container.DataFrame]:
is_system = len(inputs.iloc[0, 0].shape) != 0 # check the shape of first row first column, if not a single data entry(,) then it is system-wise data (row, col)
if is_system:
outputs = self._forward(inputs, '_produce_score')
else:
outputs = self._produce(inputs=inputs)
outputs = outputs.value

return CallResult(outputs)

def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]:
"""
A noop.
"""
is_system = len(self._inputs.iloc[0, 0].shape) != 0 # check the shape of first row first column, if not a single data entry(,) then it is system-wise data (row, col)
if is_system:
data = inputs
col_name = list(data.columns)[0]
for i, _ in data.iterrows():
sys_data = data.iloc[i][col_name]
self.set_training_data(inputs=sys_data)
self._fit()
else:
outputs = self._fit()
outputs = outputs.value

return CallResult(None)

def fit_multi_produce(self, *, produce_methods: typing.Sequence[str], inputs: Inputs, timeout: float = None, iterations: int = None) -> MultiCallResult:
is_system = len(inputs.iloc[0, 0].shape) != 0 # check the shape of first row first column, if not a single data entry(,) then it is system-wise data (row, col)
if is_system:
data = inputs
produce_method = produce_methods[0]
col_name = list(data.columns)[0]
results = []
for i, _ in data.iterrows():
sys_data = data.iloc[i][col_name]
self.set_training_data(inputs=sys_data)
fit_result = self._fit()
if produce_method == "produce":
out = self._produce(inputs=sys_data, timeout=timeout)
else:
out = self._produce_score(inputs=sys_data, timeout=timeout)
data.iloc[i][col_name] = out.value
results.append(out)
iterations_done = None
for result in results:
if result.iterations_done is not None:
if iterations_done is None:
iterations_done = result.iterations_done
else:
iterations_done = max(iterations_done, result.iterations_done)
return MultiCallResult(
values={produce_method: data},
has_finished=all(result.has_finished for result in results),
iterations_done=iterations_done,
)
else:
return self._fit_multi_produce(produce_methods=produce_methods, timeout=timeout, iterations=iterations, inputs=inputs)

@abc.abstractmethod
def _produce(self, *, inputs: container.DataFrame, timeout: float = None, iterations: int = None) -> CallResult[container.DataFrame]:
"""
abstract class
"""

@abc.abstractmethod
def _produce_score(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]:
"""
abstract class
"""

@abc.abstractmethod
def _fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]:
"""
abstract class
"""


def get_params(self) -> None:
"""
A noop.
"""

return None

def set_params(self, *, params: None) -> None:
"""
A noop.
"""

return

def _forward(self, data, method):
"""
General Forward Function to feed system data one-by-one to the primitive
"""
col_name = list(data.columns)[0]
for i, _ in data.iterrows():
sys_data = data.iloc[i][col_name]
produce_func = getattr(self, method, None)
out = produce_func(inputs=sys_data)
data.iloc[i][col_name] = out.value
return data

+ 3
- 3
tods/detection_algorithm/PyodAE.py View File

@@ -95,7 +95,7 @@ class Hyperparams(Hyperparams_ODBase):
)

epochs = hyperparams.Hyperparameter[int](
default=100,
default=1,
description='Number of epochs to train the model.',
semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
)
@@ -335,7 +335,7 @@ class AutoEncoderPrimitive(UnsupervisedOutlierDetectorBase[Inputs, Outputs, Para
Returns:
None
"""
return super().fit()
return super()._fit()

def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]:
"""
@@ -347,7 +347,7 @@ class AutoEncoderPrimitive(UnsupervisedOutlierDetectorBase[Inputs, Outputs, Para
Container DataFrame
1 marks Outliers, 0 marks normal.
"""
return super().produce(inputs=inputs, timeout=timeout, iterations=iterations)
return super()._produce(inputs=inputs, timeout=timeout, iterations=iterations)

def get_params(self) -> Params:
"""


+ 60
- 42
tods/detection_algorithm/SystemWiseDetection.py View File

@@ -142,7 +142,6 @@ class SystemWiseDetectionPrimitive(transformer.TransformerPrimitiveBase[Inputs,

self.logger.info('System wise Detection Input Primitive called')

# Get cols to fit.
self._fitted = False
self._training_inputs, self._training_indices = self._get_columns_to_fit(inputs, self.hyperparams)
@@ -316,12 +315,8 @@ class SystemWiseDetectionPrimitive(transformer.TransformerPrimitiveBase[Inputs,
def _write(self, inputs: Inputs):
inputs.to_csv(str(time.time()) + '.csv')




def _system_wise_detection(self,X,method_type,window_size,contamination):
systemIds = X.system_id.unique()
groupedX = X.groupby(X.system_id)
systemIds = [int(idx) for idx in X.index]

transformed_X = []
if(method_type=="max"):
@@ -330,17 +325,17 @@ class SystemWiseDetectionPrimitive(transformer.TransformerPrimitiveBase[Inputs,
"""
maxOutlierScorePerSystemList = []
for systemId in systemIds:
systemDf = groupedX.get_group(systemId)
maxOutlierScorePerSystemList.append(np.max(np.abs(systemDf["value_0"].values)))
systemDf = X.iloc[systemId]['system']
maxOutlierScorePerSystemList.append(np.max(np.abs(systemDf.iloc[:,0].values)))

ranking = np.sort(maxOutlierScorePerSystemList)
threshold = ranking[int((1 - contamination) * len(ranking))]
self.threshold = threshold
mask = (maxOutlierScorePerSystemList >= threshold)
mask = (maxOutlierScorePerSystemList > threshold)
ranking[mask] = 1
ranking[np.logical_not(mask)] = 0
for iter in range(len(systemIds)):
transformed_X.append([systemIds[iter],ranking[iter]])
transformed_X.append(ranking[iter])

if (method_type == "avg"):
"""
@@ -348,60 +343,72 @@ class SystemWiseDetectionPrimitive(transformer.TransformerPrimitiveBase[Inputs,
"""
avgOutlierScorePerSystemList = []
for systemId in systemIds:
systemDf = groupedX.get_group(systemId)
avgOutlierScorePerSystemList.append(np.mean(np.abs(systemDf["value_0"].values)))
systemDf = X.iloc[systemId]['system']
avgOutlierScorePerSystemList.append(np.mean(np.abs(systemDf.iloc[:,0].values)))

ranking = np.sort(avgOutlierScorePerSystemList)
threshold = ranking[int((1 - contamination) * len(ranking))]
self.threshold = threshold
mask = (avgOutlierScorePerSystemList >= threshold)
mask = (avgOutlierScorePerSystemList > threshold)
ranking[mask] = 1
ranking[np.logical_not(mask)] = 0
for iter in range(len(systemIds)):
transformed_X.append([systemIds[iter], ranking[iter]])
transformed_X.append( ranking[iter])

if (method_type == "sliding_window_sum"):
"""
Sytems are sorted based on max of max of reconstruction errors in each window"
Sytems are sorted based on max of sum of reconstruction errors in each window"
"""
OutlierScorePerSystemList = []
maxOutlierScorePerSystemList = []
for systemId in systemIds:
systemDf = groupedX.get_group(systemId)
column_value = systemDf["value_0"].values
column_score = np.zeros(len(column_value))
systemDf = X.iloc[systemId]['system']
column_value = systemDf.iloc[:,0].values
column_score = []
for iter in range(window_size - 1, len(column_value)):
sequence = column_value[iter - window_size + 1:iter + 1]
column_score[iter] = np.sum(np.abs(sequence))
column_score[:window_size - 1] = column_score[window_size - 1]
OutlierScorePerSystemList.append(column_score.tolist())
OutlierScorePerSystemList = np.asarray(OutlierScorePerSystemList)
column_score.append(np.sum(np.abs(sequence)))
#column_score[:window_size - 1] = column_score[window_size - 1]

maxOutlierScorePerSystemList.append(np.max(column_score))
#OutlierScorePerSystemList = np.asarray(OutlierScorePerSystemList)

maxOutlierScorePerSystemList = OutlierScorePerSystemList.max(axis=1).tolist()
#maxOutlierScorePerSystemList = OutlierScorePerSystemList.max(axis=1).tolist()

ranking = np.sort(maxOutlierScorePerSystemList)
threshold = ranking[int((1 - contamination) * len(ranking))]
self.threshold = threshold
mask = (maxOutlierScorePerSystemList >= threshold)
mask = (maxOutlierScorePerSystemList > threshold)
ranking[mask] = 1
ranking[np.logical_not(mask)] = 0
for iter in range(len(systemIds)):
transformed_X.append([systemIds[iter], ranking[iter]])
transformed_X.append( ranking[iter])


if (method_type == "majority_voting_sliding_window_sum"):
"""
Sytem with most vote based on max of sum of reconstruction errors in each window
"""
OutlierScorePerSystemList = []
max_time_points = 0
for systemId in systemIds:
systemDf = X.iloc[systemId]['system']
max_time_points = max(max_time_points,systemDf.shape[0])

for systemId in systemIds:
systemDf = groupedX.get_group(systemId)
column_value = systemDf["value_0"].values
column_score = np.zeros(len(column_value))
column_value = np.zeros(max_time_points)
systemDf = X.iloc[systemId]['system']
column_value_actual = systemDf.iloc[:, 0].values
column_value[0:len(column_value_actual)] = column_value_actual
column_value[len(column_value_actual):]= column_value_actual[-1]
column_score = []
for iter in range(window_size - 1, len(column_value)):
sequence = column_value[iter - window_size + 1:iter + 1]
column_score[iter] = np.sum(np.abs(sequence))
column_score[:window_size - 1] = column_score[window_size - 1]
OutlierScorePerSystemList.append(column_score.tolist())
column_score.append(np.sum(np.abs(sequence)))

OutlierScorePerSystemList.append(column_score)

OutlierScorePerSystemList = np.asarray(OutlierScorePerSystemList)

OutlierScorePerSystemList = (
OutlierScorePerSystemList == OutlierScorePerSystemList.max(axis=0)[None, :]).astype(int)

@@ -409,28 +416,39 @@ class SystemWiseDetectionPrimitive(transformer.TransformerPrimitiveBase[Inputs,

ranking = np.sort(maxOutlierScorePerSystemList)
threshold = ranking[int((1 - contamination) * len(ranking))]

self.threshold = threshold
mask = (maxOutlierScorePerSystemList >= threshold)
mask = (maxOutlierScorePerSystemList > threshold)
ranking[mask] = 1
ranking[np.logical_not(mask)] = 0
for iter in range(len(systemIds)):
transformed_X.append([systemIds[iter], ranking[iter]])
transformed_X.append( ranking[iter])


if (method_type == "majority_voting_sliding_window_max"):
"""
Sytem with most vote based on max of max of reconstruction errors in each window
"""
OutlierScorePerSystemList = []
max_time_points = 0
for systemId in systemIds:
systemDf = groupedX.get_group(systemId)
column_value = systemDf["value_0"].values
column_score = np.zeros(len(column_value))
systemDf = X.iloc[systemId]['system']
max_time_points = max(max_time_points, systemDf.shape[0])

for systemId in systemIds:
column_value = np.zeros(max_time_points)
systemDf = X.iloc[systemId]['system']
column_value_actual = systemDf.iloc[:, 0].values
column_value[0:len(column_value_actual)] = column_value_actual
column_value[len(column_value_actual):] = column_value_actual[-1]
column_score = []
for iter in range(window_size - 1, len(column_value)):
sequence = column_value[iter - window_size + 1:iter + 1]
column_score[iter] = np.max(np.abs(sequence))
column_score[:window_size - 1] = column_score[window_size - 1]
OutlierScorePerSystemList.append(column_score.tolist())
column_score.append(np.max(np.abs(sequence)))
OutlierScorePerSystemList.append(column_score)
OutlierScorePerSystemList = np.asarray(OutlierScorePerSystemList)

OutlierScorePerSystemList = (
OutlierScorePerSystemList == OutlierScorePerSystemList.max(axis=0)[None, :]).astype(int)

@@ -439,11 +457,11 @@ class SystemWiseDetectionPrimitive(transformer.TransformerPrimitiveBase[Inputs,
ranking = np.sort(maxOutlierScorePerSystemList)
threshold = ranking[int((1 - contamination) * len(ranking))]
self.threshold = threshold
mask = (maxOutlierScorePerSystemList >= threshold)
mask = (maxOutlierScorePerSystemList > threshold)
ranking[mask] = 1
ranking[np.logical_not(mask)] = 0
for iter in range(len(systemIds)):
transformed_X.append([systemIds[iter], ranking[iter]])
transformed_X.append(ranking[iter])

return transformed_X



+ 455
- 0
tods/detection_algorithm/SystemWiseDetection_bkup.py View File

@@ -0,0 +1,455 @@
import os
from typing import Any,Optional,List
import statsmodels.api as sm
import numpy as np
from d3m import container, utils as d3m_utils
from d3m import utils

from numpy import ndarray
from collections import OrderedDict
from scipy import sparse
import os

import numpy
import typing
import time

from d3m import container
from d3m.primitive_interfaces import base, transformer

from d3m.container import DataFrame as d3m_dataframe
from d3m.metadata import hyperparams, params, base as metadata_base

from d3m.base import utils as base_utils
import uuid
from d3m.exceptions import PrimitiveNotFittedError

__all__ = ('SystemWiseDetectionPrimitive',)

Inputs = container.DataFrame
Outputs = container.DataFrame

class Params(params.Params):
#to-do : how to make params dynamic
use_column_names: Optional[Any]



class Hyperparams(hyperparams.Hyperparams):

#Tuning Parameter
#default -1 considers entire time series is considered
window_size = hyperparams.Hyperparameter(default=10, semantic_types=[
'https://metadata.datadrivendiscovery.org/types/TuningParameter',
], description="Window Size for decomposition")

method_type = hyperparams.Enumeration(
values=['max', 'avg', 'sliding_window_sum','majority_voting_sliding_window_sum','majority_voting_sliding_window_max'],
default='majority_voting_sliding_window_max',
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
description="The type of method used to find anomalous system",
)
contamination = hyperparams.Uniform(
lower=0.,
upper=0.5,
default=0.1,
description='The amount of contamination of the data set, i.e. the proportion of outliers in the data set. ',
semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
)

#control parameter
use_columns = hyperparams.Set(
elements=hyperparams.Hyperparameter[int](-1),
default=(),
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
description="A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.",
)
exclude_columns = hyperparams.Set(
elements=hyperparams.Hyperparameter[int](-1),
default=(),
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
description="A set of column indices to not operate on. Applicable only if \"use_columns\" is not provided.",
)
return_result = hyperparams.Enumeration(
values=['append', 'replace', 'new'],
default='new',
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.",
)
use_semantic_types = hyperparams.UniformBool(
default=False,
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe"
)
add_index_columns = hyperparams.UniformBool(
default=False,
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".",
)
error_on_no_input = hyperparams.UniformBool(
default=True,
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.",
)

return_semantic_type = hyperparams.Enumeration[str](
values=['https://metadata.datadrivendiscovery.org/types/Attribute',
'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute'],
default='https://metadata.datadrivendiscovery.org/types/Attribute',
description='Decides what semantic type to attach to generated attributes',
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter']
)



class SystemWiseDetectionPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]):
"""
Primitive to find abs_energy of time series
"""

metadata = metadata_base.PrimitiveMetadata({
"__author__": "DATA Lab at Texas A&M University",
'name': 'Sytem_Wise_Anomaly_Detection_Primitive',
'python_path': 'd3m.primitives.tods.detection_algorithm.system_wise_detection',
'source': {
'name': 'DATA Lab at Texas A&M University',
'contact': 'mailto:khlai037@tamu.edu'
},
"hyperparams_to_tune": ['window_size','method_type','contamination'],
'version': '0.1.0',
'algorithm_types': [
metadata_base.PrimitiveAlgorithmType.TODS_PRIMITIVE,
],
'primitive_family': metadata_base.PrimitiveFamily.ANOMALY_DETECTION,
'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'Sytem_Wise_Anomaly_Detection_Primitive')),
})

def __init__(self, *, hyperparams: Hyperparams) -> None:
super().__init__(hyperparams=hyperparams)
self.primitiveNo = 0

def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]:
"""

Args:
inputs: Container DataFrame
timeout: Default
iterations: Default

Returns:
Container DataFrame containing abs_energy of time series
"""

self.logger.info('System wise Detection Input Primitive called')
# Get cols to fit.
self._fitted = False
self._training_inputs, self._training_indices = self._get_columns_to_fit(inputs, self.hyperparams)
self._input_column_names = self._training_inputs.columns

if len(self._training_indices) > 0:
# self._clf.fit(self._training_inputs)
self._fitted = True
else:
if self.hyperparams['error_on_no_input']:
raise RuntimeError("No input columns were selected")
self.logger.warn("No input columns were selected")

if not self._fitted:
raise PrimitiveNotFittedError("Primitive not fitted.")
system_wise_detection_input = inputs
if self.hyperparams['use_semantic_types']:
system_wise_detection_input = inputs.iloc[:, self._training_indices]
output_columns = []
if len(self._training_indices) > 0:
system_wise_detection_output = self._system_wise_detection(system_wise_detection_input,self.hyperparams["method_type"],self.hyperparams["window_size"],self.hyperparams["contamination"])
outputs = system_wise_detection_output


if sparse.issparse(system_wise_detection_output):
system_wise_detection_output = system_wise_detection_output.toarray()
outputs = self._wrap_predictions(inputs, system_wise_detection_output)

#if len(outputs.columns) == len(self._input_column_names):
# outputs.columns = self._input_column_names

output_columns = [outputs]


else:
if self.hyperparams['error_on_no_input']:
raise RuntimeError("No input columns were selected")
self.logger.warn("No input columns were selected")


self.logger.info('System wise Detection Primitive returned')
outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'],
add_index_columns=self.hyperparams['add_index_columns'],
inputs=inputs, column_indices=self._training_indices,
columns_list=output_columns)
return base.CallResult(outputs)

@classmethod
def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams):
"""
Select columns to fit.
Args:
inputs: Container DataFrame
hyperparams: d3m.metadata.hyperparams.Hyperparams

Returns:
list
"""
if not hyperparams['use_semantic_types']:
return inputs, list(range(len(inputs.columns)))

inputs_metadata = inputs.metadata

def can_produce_column(column_index: int) -> bool:
return cls._can_produce_column(inputs_metadata, column_index, hyperparams)

use_columns = hyperparams['use_columns']
exclude_columns = hyperparams['exclude_columns']

columns_to_produce, columns_not_to_produce = base_utils.get_columns_to_use(inputs_metadata,
use_columns=use_columns,
exclude_columns=exclude_columns,
can_use_column=can_produce_column)
return inputs.iloc[:, columns_to_produce], columns_to_produce
# return columns_to_produce

@classmethod
def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int,
hyperparams: Hyperparams) -> bool:
"""
Output whether a column can be processed.
Args:
inputs_metadata: d3m.metadata.base.DataMetadata
column_index: int

Returns:
bool
"""
column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index))

accepted_structural_types = (int, float, numpy.integer, numpy.float64)
accepted_semantic_types = set()
accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/Attribute")
if not issubclass(column_metadata['structural_type'], accepted_structural_types):
return False

semantic_types = set(column_metadata.get('semantic_types', []))
return True
if len(semantic_types) == 0:
cls.logger.warning("No semantic types found in column metadata")
return False

# Making sure all accepted_semantic_types are available in semantic_types
if len(accepted_semantic_types - semantic_types) == 0:
return True

return False

@classmethod
def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs],
target_columns_metadata: List[OrderedDict]) -> metadata_base.DataMetadata:
"""
Updata metadata for selected columns.
Args:
inputs_metadata: metadata_base.DataMetadata
outputs: Container Dataframe
target_columns_metadata: list

Returns:
d3m.metadata.base.DataMetadata
"""
outputs_metadata = metadata_base.DataMetadata().generate(value=outputs)

for column_index, column_metadata in enumerate(target_columns_metadata):
column_metadata.pop("structural_type", None)
outputs_metadata = outputs_metadata.update_column(column_index, column_metadata)

return outputs_metadata

def _wrap_predictions(self, inputs: Inputs, predictions: ndarray) -> Outputs:
"""
Wrap predictions into dataframe
Args:
inputs: Container Dataframe
predictions: array-like data (n_samples, n_features)

Returns:
Dataframe
"""
outputs = d3m_dataframe(predictions, generate_metadata=True)
target_columns_metadata = self._add_target_columns_metadata(outputs.metadata, self.hyperparams,self.primitiveNo)
outputs.metadata = self._update_predictions_metadata(inputs.metadata, outputs, target_columns_metadata)

return outputs

@classmethod
def _add_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams, primitiveNo):
"""
Add target columns metadata
Args:
outputs_metadata: metadata.base.DataMetadata
hyperparams: d3m.metadata.hyperparams.Hyperparams

Returns:
List[OrderedDict]
"""
outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length']
target_columns_metadata: List[OrderedDict] = []
for column_index in range(outputs_length):
column_name = "{0}{1}_{2}".format(cls.metadata.query()['name'], primitiveNo, column_index)
column_metadata = OrderedDict()
semantic_types = set()
semantic_types.add(hyperparams["return_semantic_type"])
column_metadata['semantic_types'] = list(semantic_types)

column_metadata["name"] = str(column_name)
target_columns_metadata.append(column_metadata)

return target_columns_metadata

def _write(self, inputs: Inputs):
inputs.to_csv(str(time.time()) + '.csv')

def _system_wise_detection(self,X,method_type,window_size,contamination):
#systemIds = X.system_id.unique()
systemIds = [int(idx) for idx in X.index]
#groupedX = X.groupby(X.system_id)
print(systemIds)
print(X.iloc[0])
systemDf = X.iloc(systemIds[0])['system']
print(systemDf)
exit()

transformed_X = []
if(method_type=="max"):
"""
Sytems are sorted based on maximum of reconstruction errors"
"""
maxOutlierScorePerSystemList = []
for systemId in systemIds:
systemDf = groupedX.get_group(systemId)
#systemDf = X[systemId]['system']
maxOutlierScorePerSystemList.append(np.max(np.abs(systemDf["value_0"].values)))

ranking = np.sort(maxOutlierScorePerSystemList)
threshold = ranking[int((1 - contamination) * len(ranking))]
self.threshold = threshold
mask = (maxOutlierScorePerSystemList >= threshold)
ranking[mask] = 1
ranking[np.logical_not(mask)] = 0
for iter in range(len(systemIds)):
transformed_X.append([systemIds[iter],ranking[iter]])

if (method_type == "avg"):
"""
Sytems are sorted based on average of reconstruction errors"
"""
avgOutlierScorePerSystemList = []
for systemId in systemIds:
systemDf = groupedX.get_group(systemId)
avgOutlierScorePerSystemList.append(np.mean(np.abs(systemDf["value_0"].values)))

ranking = np.sort(avgOutlierScorePerSystemList)
threshold = ranking[int((1 - contamination) * len(ranking))]
self.threshold = threshold
mask = (avgOutlierScorePerSystemList >= threshold)
ranking[mask] = 1
ranking[np.logical_not(mask)] = 0
for iter in range(len(systemIds)):
transformed_X.append([systemIds[iter], ranking[iter]])

if (method_type == "sliding_window_sum"):
"""
Sytems are sorted based on max of max of reconstruction errors in each window"
"""
OutlierScorePerSystemList = []
for systemId in systemIds:
systemDf = groupedX.get_group(systemId)
column_value = systemDf["value_0"].values
column_score = np.zeros(len(column_value))
for iter in range(window_size - 1, len(column_value)):
sequence = column_value[iter - window_size + 1:iter + 1]
column_score[iter] = np.sum(np.abs(sequence))
column_score[:window_size - 1] = column_score[window_size - 1]
OutlierScorePerSystemList.append(column_score.tolist())
OutlierScorePerSystemList = np.asarray(OutlierScorePerSystemList)

maxOutlierScorePerSystemList = OutlierScorePerSystemList.max(axis=1).tolist()

ranking = np.sort(maxOutlierScorePerSystemList)
threshold = ranking[int((1 - contamination) * len(ranking))]
self.threshold = threshold
mask = (maxOutlierScorePerSystemList >= threshold)
ranking[mask] = 1
ranking[np.logical_not(mask)] = 0
for iter in range(len(systemIds)):
transformed_X.append([systemIds[iter], ranking[iter]])

if (method_type == "majority_voting_sliding_window_sum"):
"""
Sytem with most vote based on max of sum of reconstruction errors in each window
"""
OutlierScorePerSystemList = []
for systemId in systemIds:
systemDf = groupedX.get_group(systemId)
column_value = systemDf["value_0"].values
column_score = np.zeros(len(column_value))
for iter in range(window_size - 1, len(column_value)):
sequence = column_value[iter - window_size + 1:iter + 1]
column_score[iter] = np.sum(np.abs(sequence))
column_score[:window_size - 1] = column_score[window_size - 1]
OutlierScorePerSystemList.append(column_score.tolist())
OutlierScorePerSystemList = np.asarray(OutlierScorePerSystemList)
OutlierScorePerSystemList = (
OutlierScorePerSystemList == OutlierScorePerSystemList.max(axis=0)[None, :]).astype(int)

maxOutlierScorePerSystemList = OutlierScorePerSystemList.sum(axis=1).tolist()

ranking = np.sort(maxOutlierScorePerSystemList)
threshold = ranking[int((1 - contamination) * len(ranking))]
self.threshold = threshold
mask = (maxOutlierScorePerSystemList >= threshold)
ranking[mask] = 1
ranking[np.logical_not(mask)] = 0
for iter in range(len(systemIds)):
transformed_X.append([systemIds[iter], ranking[iter]])

if (method_type == "majority_voting_sliding_window_max"):
"""
Sytem with most vote based on max of max of reconstruction errors in each window
"""
OutlierScorePerSystemList = []
for systemId in systemIds:
systemDf = groupedX.get_group(systemId)
column_value = systemDf["value_0"].values
column_score = np.zeros(len(column_value))
for iter in range(window_size - 1, len(column_value)):
sequence = column_value[iter - window_size + 1:iter + 1]
column_score[iter] = np.max(np.abs(sequence))
column_score[:window_size - 1] = column_score[window_size - 1]
OutlierScorePerSystemList.append(column_score.tolist())
OutlierScorePerSystemList = np.asarray(OutlierScorePerSystemList)
OutlierScorePerSystemList = (
OutlierScorePerSystemList == OutlierScorePerSystemList.max(axis=0)[None, :]).astype(int)

maxOutlierScorePerSystemList = OutlierScorePerSystemList.sum(axis=1).tolist()

ranking = np.sort(maxOutlierScorePerSystemList)
threshold = ranking[int((1 - contamination) * len(ranking))]
self.threshold = threshold
mask = (maxOutlierScorePerSystemList >= threshold)
ranking[mask] = 1
ranking[np.logical_not(mask)] = 0
for iter in range(len(systemIds)):
transformed_X.append([systemIds[iter], ranking[iter]])

return transformed_X





+ 559
- 4
tods/detection_algorithm/UODBasePrimitive.py View File

@@ -30,6 +30,7 @@ from d3m.primitive_interfaces.base import CallResult, DockerContainer, Primitive

# # from d3m.primitive_interfaces.supervised_learning import SupervisedLearnerPrimitiveBase
from d3m.primitive_interfaces.unsupervised_learning import UnsupervisedLearnerPrimitiveBase
from ..common.TODSBasePrimitives import TODSUnsupervisedLearnerPrimitiveBase
from d3m.primitive_interfaces.transformer import TransformerPrimitiveBase

from d3m.primitive_interfaces.base import *
@@ -141,7 +142,10 @@ class Hyperparams_ODBase(hyperparams.Hyperparams):
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter']
)

class UnsupervisedOutlierDetectorBase(UnsupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams]):

# OutlierDetectorBase.__doc__ = OutlierDetectorBase.__doc__

class UnsupervisedOutlierDetectorBase(TODSUnsupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams]):
"""
Parameters
----------
@@ -234,7 +238,7 @@ class UnsupervisedOutlierDetectorBase(UnsupervisedLearnerPrimitiveBase[Inputs, O
self.right_inds_[self.right_inds_ > len(self._inputs)] = len(self._inputs)
# print(self.left_inds_, self.right_inds_)

def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]:
def _fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]:
"""
Fit model with training data.
Args:
@@ -248,6 +252,7 @@ class UnsupervisedOutlierDetectorBase(UnsupervisedLearnerPrimitiveBase[Inputs, O
if self._fitted: # pragma: no cover
return CallResult(None)


self._training_inputs, self._training_indices = self._get_columns_to_fit(self._inputs, self.hyperparams)
self._input_column_names = self._training_inputs.columns

@@ -271,7 +276,7 @@ class UnsupervisedOutlierDetectorBase(UnsupervisedLearnerPrimitiveBase[Inputs, O

return CallResult(None)

def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]:
def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]:
"""
Process the testing data.
Args:
@@ -336,7 +341,7 @@ class UnsupervisedOutlierDetectorBase(UnsupervisedLearnerPrimitiveBase[Inputs, O
return CallResult(outputs)

def produce_score(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]:
def _produce_score(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]:
"""
Process the testing data.
Args:
@@ -688,3 +693,553 @@ class UnsupervisedOutlierDetectorBase(UnsupervisedLearnerPrimitiveBase[Inputs, O


# OutlierDetectorBase.__doc__ = OutlierDetectorBase.__doc__

class UnsupervisedOutlierDetectorBase2(UnsupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams]):
"""
Parameters
----------
contamination : float in (0., 0.5), optional (default=0.1)
The amount of contamination of the data set, i.e.
the proportion of outliers in the data set. When fitting this is used
to define the threshold on the decision function.

Attributes
----------
clf_.decision_scores_ : numpy array of shape (n_samples,)
The outlier scores of the training data.
The higher, the more abnormal. Outliers tend to have higher
scores. This value is available once the detector is
fitted.

clf_.threshold_: float within (0, 1)
For outlier, decision_scores_ more than threshold_.
For inlier, decision_scores_ less than threshold_.

clf_.labels_ : int, either 0 or 1
The binary labels of the training data. 0 stands for inliers.
and 1 for outliers/anomalies. It is generated by applying.
``threshold_`` on ``decision_scores_``.

left_inds_ : ndarray,
One of the mapping from decision_score to data.
For point outlier detection, left_inds_ exactly equals the index of each data point.
For Collective outlier detection, left_inds_ equals the start index of each subsequence.

left_inds_ : ndarray,
One of the mapping from decision_score to data.
For point outlier detection, left_inds_ exactly equals the index of each data point plus 1.
For Collective outlier detection, left_inds_ equals the ending index of each subsequence.
"""
# probability_score:
# window_size: int
# The moving window size.

__author__ = "DATALAB @Taxes A&M University"
metadata: metadata_base.PrimitiveMetadata = None

def __init__(self, *,
hyperparams: Hyperparams,
random_seed: int = 0,
docker_containers: Dict[str, DockerContainer] = None) -> None:
super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers)

self._clf = None
self._clf_fit_parameter = {}
self.primitiveNo = 0

self.window_size = hyperparams['window_size']
self.step_size = hyperparams['step_size']
self.left_inds_ = None
self.right_inds_ = None

self._inputs = None
self._outputs = None
self._training_inputs = None
self._training_outputs = None
self._target_names = None
self._training_indices = None
self._target_column_indices = None
self._target_columns_metadata: List[OrderedDict] = None
self._input_column_names = None
self._fitted = False
#
@abc.abstractmethod
def set_training_data(self, *, inputs: Inputs) -> None:
"""
Set training data for outlier detection.
Args:
inputs: Container DataFrame

Returns:
None
"""
self._inputs = inputs
self._fitted = False

def _set_subseq_inds(self):

self.left_inds_ = getattr(self._clf, 'left_inds_', None)
self.right_inds_ = getattr(self._clf, 'right_inds_', None)

if self.left_inds_ is None or self.right_inds_ is None:
self.left_inds_ = numpy.arange(0, len(self._inputs), self.step_size)
self.right_inds_ = self.left_inds_ + self.window_size
self.right_inds_[self.right_inds_ > len(self._inputs)] = len(self._inputs)
# print(self.left_inds_, self.right_inds_)

def _fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]:
"""
Fit model with training data.
Args:
*: Container DataFrame. Time series data up to fit.

Returns:
None
"""
# print('Fit:', self._clf)

if self._fitted: # pragma: no cover
return CallResult(None)

print(self._inputs)

self._training_inputs, self._training_indices = self._get_columns_to_fit(self._inputs, self.hyperparams)
self._input_column_names = self._training_inputs.columns

print(self._training_inputs, self._training_indices)
exit()

if self._training_inputs is None: # pragma: no cover
return CallResult(None)
#print("self._training_indices ", self._training_indices)
if len(self._training_indices) > 0:

# print('Fit: ', self._clf)
# print('Fit: ', self._training_inputs.values.shape)
# print('Fit: ', self._clf.fit(self._training_inputs.values))

self._clf.fit(X=self._training_inputs.values, **self._clf_fit_parameter)
self._fitted = True
self._set_subseq_inds()

else: # pragma: no cover
if self.hyperparams['error_on_no_input']:
raise RuntimeError("No input columns were selected")
self.logger.warn("No input columns were selected")

return CallResult(None)

def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]:
"""
Process the testing data.
Args:
inputs: Container DataFrame. Time series data up to outlier detection.

Returns:
Container DataFrame
1 marks Outliers, 0 marks normal.
"""

if not self._fitted: # pragma: no cover
raise PrimitiveNotFittedError("Primitive not fitted.")
sk_inputs = inputs
if self.hyperparams['use_semantic_types']:
sk_inputs = inputs.iloc[:, self._training_indices]
output_columns = []
#print("skinputs ", sk_inputs.values)
if len(self._training_indices) > 0:

if self.hyperparams['return_subseq_inds']:

if getattr(self._clf, 'left_inds_', None) is None or getattr(self._clf, 'right_inds_', None) is None: # point OD
pred_label = self._clf.predict(sk_inputs.values)
left_inds_ = numpy.arange(0, len(pred_label), self.step_size)
right_inds_ = left_inds_ + self.window_size
right_inds_[right_inds_ > len(pred_label)] = len(pred_label)
else:
pred_label, left_inds_, right_inds_ = self._clf.predict(sk_inputs.values)

# print(pred_label.shape, left_inds_.shape, right_inds_.shape)
# print(pred_label, left_inds_, right_inds_)

sk_output = numpy.concatenate((numpy.expand_dims(pred_label, axis=1),
numpy.expand_dims(left_inds_, axis=1),
numpy.expand_dims(right_inds_, axis=1)), axis=1)


else:
if getattr(self._clf, 'left_inds_', None) is None or getattr(self._clf, 'right_inds_', None) is None: # point OD
sk_output = self._clf.predict(sk_inputs.values)

else:
sk_output, _, _ = self._clf.predict(sk_inputs.values)

#print("sk output ", sk_output)
if sparse.issparse(sk_output): # pragma: no cover
sk_output = sk_output.toarray()

outputs = self._wrap_predictions(inputs, sk_output)
if len(outputs.columns) == len(self._input_column_names):
outputs.columns = self._input_column_names
output_columns = [outputs]
else: # pragma: no cover
if self.hyperparams['error_on_no_input']:
raise RuntimeError("No input columns were selected")
self.logger.warn("No input columns were selected")
outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'],
add_index_columns=self.hyperparams['add_index_columns'],
inputs=inputs, column_indices=self._training_indices,
columns_list=output_columns)
return CallResult(outputs)

def _produce_score(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]:
"""
Process the testing data.
Args:
inputs: Container DataFrame. Time series data up to outlier detection.

Returns:
Container DataFrame
1 marks Outliers, 0 marks normal.
"""

if not self._fitted: # pragma: no cover
raise PrimitiveNotFittedError("Primitive not fitted.")
sk_inputs = inputs
if self.hyperparams['use_semantic_types']:
sk_inputs = inputs.iloc[:, self._training_indices]
output_columns = []
if len(self._training_indices) > 0:

if self.hyperparams['return_subseq_inds']:

if getattr(self._clf, 'left_inds_', None) is None or getattr(self._clf, 'right_inds_', None) is None: # point OD
pred_score = self._clf.decision_function(sk_inputs.values).ravel()
left_inds_ = numpy.arange(0, len(pred_score), self.step_size)
right_inds_ = left_inds_ + self.window_size
right_inds_[right_inds_ > len(pred_score)] = len(pred_score)

else:
pred_score, left_inds_, right_inds_ = self._clf.decision_function(sk_inputs.values)

# print(pred_score.shape, left_inds_.shape, right_inds_.shape)

sk_output = numpy.concatenate((numpy.expand_dims(pred_score, axis=1),
numpy.expand_dims(left_inds_, axis=1),
numpy.expand_dims(right_inds_, axis=1)), axis=1)

else:
if getattr(self._clf, 'left_inds_', None) is None or getattr(self._clf, 'right_inds_', None) is None: # point OD
sk_output = self._clf.decision_function(sk_inputs.values)

else:
sk_output, _, _ = self._clf.decision_function(sk_inputs.values)

if sparse.issparse(sk_output): # pragma: no cover
sk_output = sk_output.toarray()
outputs = self._wrap_predictions(inputs, sk_output)
if len(outputs.columns) == len(self._input_column_names):
outputs.columns = self._input_column_names
output_columns = [outputs]
else: # pragma: no cover
if self.hyperparams['error_on_no_input']:
raise RuntimeError("No input columns were selected")
self.logger.warn("No input columns were selected")

outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'],
add_index_columns=self.hyperparams['add_index_columns'],
inputs=inputs, column_indices=self._training_indices,
columns_list=output_columns)
return CallResult(outputs)


def get_params(self) -> Params_ODBase:
"""
Return parameters.
Args:
None

Returns:
class Params_ODBase
"""

if not self._fitted:
return Params_ODBase(
# decision_scores_=None,
# threshold_=None,
# labels_=None,
left_inds_=None,
right_inds_=None,
clf_=copy.copy(self._clf),

# Keep previous
input_column_names=self._input_column_names,
training_indices_=self._training_indices,
target_names_=self._target_names,
target_column_indices_=self._target_column_indices,
target_columns_metadata_=self._target_columns_metadata
)

return Params_ODBase(
# decision_scores_=getattr(self._clf, 'decision_scores_', None),
# threshold_=getattr(self._clf, 'threshold_', None),
# labels_=getattr(self._clf, 'labels_', None),
left_inds_=self.left_inds_, # numpy.array(self.left_inds_)
right_inds_=self.right_inds_, # numpy.array(self.right_inds_)
clf_=copy.copy(self._clf),

# Keep previous
input_column_names=self._input_column_names,
training_indices_=self._training_indices,
target_names_=self._target_names,
target_column_indices_=self._target_column_indices,
target_columns_metadata_=self._target_columns_metadata
)
# pass


def set_params(self, *, params: Params_ODBase) -> None:
"""
Set parameters for outlier detection.
Args:
params: class Params_ODBase

Returns:
None
"""

# self._clf.decision_scores_ = params['decision_scores_']
# self._clf.threshold_ = params['threshold_']
# self._clf.labels_ = params['labels_']
self.left_inds_ = params['left_inds_']
self.right_inds_ = params['right_inds_']
self._clf = copy.copy(params['clf_'])

# Keep previous
self._input_column_names = params['input_column_names']
self._training_indices = params['training_indices_']
self._target_names = params['target_names_']
self._target_column_indices = params['target_column_indices_']
self._target_columns_metadata = params['target_columns_metadata_']


# if params['decision_scores_'] is not None:
# self._fitted = True
# if params['threshold_'] is not None:
# self._fitted = True
# if params['labels_'] is not None:
# self._fitted = True
if params['left_inds_'] is not None:
self._fitted = True
if params['right_inds_'] is not None:
self._fitted = True

@classmethod
def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): # pragma: no cover
"""
Select columns to fit.
Args:
inputs: Container DataFrame
hyperparams: d3m.metadata.hyperparams.Hyperparams

Returns:
list
"""
#print("*******************get columns to fit***********")
if not hyperparams['use_semantic_types']:
return inputs, list(range(len(inputs.columns)))

inputs_metadata = inputs.metadata
#print("inputs_metadata ", inputs_metadata)
def can_produce_column(column_index: int) -> bool:
return cls._can_produce_column(inputs_metadata, column_index, hyperparams)
columns_to_produce, columns_not_to_produce = base_utils.get_columns_to_use(inputs_metadata,
use_columns=hyperparams['use_columns'],
exclude_columns=hyperparams['exclude_columns'],
can_use_column=can_produce_column)
#print("columns_to_produce ", columns_to_produce)
return inputs.iloc[:, columns_to_produce], columns_to_produce
# return columns_to_produce


@classmethod
def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int,
hyperparams: Hyperparams) -> bool: # pragma: no cover
"""
Output whether a column can be processed.
Args:
inputs_metadata: d3m.metadata.base.DataMetadata
column_index: int

Returns:
bool
"""
column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index))
#print("column metadasta ", )
accepted_structural_types = (int, float, numpy.integer, numpy.float64)
accepted_semantic_types = set()
accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/Attribute")
if not issubclass(column_metadata['structural_type'], accepted_structural_types):
return False

semantic_types = set(column_metadata.get('semantic_types', []))
#print("semantic_types ", column_metadata.get('semantic_types'))
if len(semantic_types) == 0:
cls.logger.warning("No semantic types found in column metadata")
return False

# Making sure all accepted_semantic_types are available in semantic_types
if len(accepted_semantic_types - semantic_types) == 0:
return True

return False


@classmethod
def _get_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams) -> List[OrderedDict]: # pragma: no cover
"""
Output metadata of selected columns.
Args:
outputs_metadata: metadata_base.DataMetadata
hyperparams: d3m.metadata.hyperparams.Hyperparams

Returns:
d3m.metadata.base.DataMetadata
"""
outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length']

target_columns_metadata: List[OrderedDict] = []
for column_index in range(outputs_length):
column_metadata = OrderedDict(outputs_metadata.query_column(column_index))

# Update semantic types and prepare it for predicted targets.
semantic_types = set(column_metadata.get('semantic_types', []))
semantic_types_to_remove = set([])
add_semantic_types = []
add_semantic_types.add(hyperparams["return_semantic_type"])
semantic_types = semantic_types - semantic_types_to_remove
semantic_types = semantic_types.union(add_semantic_types)
column_metadata['semantic_types'] = list(semantic_types)

target_columns_metadata.append(column_metadata)

return target_columns_metadata


@classmethod
def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs],
target_columns_metadata: List[OrderedDict]) -> metadata_base.DataMetadata: # pragma: no cover
"""
Updata metadata for selected columns.
Args:
inputs_metadata: metadata_base.DataMetadata
outputs: Container Dataframe
target_columns_metadata: list

Returns:
d3m.metadata.base.DataMetadata
"""
outputs_metadata = metadata_base.DataMetadata().generate(value=outputs)

for column_index, column_metadata in enumerate(target_columns_metadata):
column_metadata.pop("structural_type", None)
outputs_metadata = outputs_metadata.update_column(column_index, column_metadata)

return outputs_metadata


def _wrap_predictions(self, inputs: Inputs, predictions: ndarray) -> Outputs: # pragma: no cover
"""
Wrap predictions into dataframe
Args:
inputs: Container Dataframe
predictions: array-like data (n_samples, n_features)

Returns:
Dataframe
"""
outputs = d3m_dataframe(predictions, generate_metadata=True)
# target_columns_metadata = self._copy_inputs_metadata(inputs.metadata, self._training_indices, outputs.metadata,
# self.hyperparams)
target_columns_metadata = self._add_target_columns_metadata(outputs.metadata, self.hyperparams, self.primitiveNo)
outputs.metadata = self._update_predictions_metadata(inputs.metadata, outputs, target_columns_metadata)
# print(outputs.metadata.to_internal_simple_structure())

return outputs

@classmethod
def _add_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams, primitiveNo): # pragma: no cover
"""
Add target columns metadata
Args:
outputs_metadata: metadata.base.DataMetadata
hyperparams: d3m.metadata.hyperparams.Hyperparams

Returns:
List[OrderedDict]
"""
outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length']
target_columns_metadata: List[OrderedDict] = []
for column_index in range(outputs_length):
column_name = "{0}{1}_{2}".format(cls.metadata.query()['name'], primitiveNo, column_index)
column_metadata = OrderedDict()
semantic_types = set()
semantic_types.add(hyperparams["return_semantic_type"])
column_metadata['semantic_types'] = list(semantic_types)

column_metadata["name"] = str(column_name)
target_columns_metadata.append(column_metadata)

return target_columns_metadata

@classmethod
def _copy_inputs_metadata(cls, inputs_metadata: metadata_base.DataMetadata, input_indices: List[int],
outputs_metadata: metadata_base.DataMetadata, hyperparams): # pragma: no cover
"""
Updata metadata for selected columns.
Args:
inputs_metadata: metadata.base.DataMetadata
input_indices: list
outputs_metadata: metadata.base.DataMetadata
hyperparams: d3m.metadata.hyperparams.Hyperparams

Returns:
d3m.metadata.base.DataMetadata
"""
outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length']
target_columns_metadata: List[OrderedDict] = []
for column_index in input_indices:
column_name = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)).get("name")
if column_name is None:
column_name = "output_{}".format(column_index)

column_metadata = OrderedDict(inputs_metadata.query_column(column_index))
semantic_types = set(column_metadata.get('semantic_types', []))
semantic_types_to_remove = set([])
add_semantic_types = set()
add_semantic_types.add(hyperparams["return_semantic_type"])
semantic_types = semantic_types - semantic_types_to_remove
semantic_types = semantic_types.union(add_semantic_types)
column_metadata['semantic_types'] = list(semantic_types)

column_metadata["name"] = str(column_name)
target_columns_metadata.append(column_metadata)

# If outputs has more columns than index, add Attribute Type to all remaining
if outputs_length > len(input_indices):
for column_index in range(len(input_indices), outputs_length):
column_metadata = OrderedDict()
semantic_types = set()
semantic_types.add(hyperparams["return_semantic_type"])
column_name = "output_{}".format(column_index)
column_metadata["semantic_types"] = list(semantic_types)
column_metadata["name"] = str(column_name)
target_columns_metadata.append(column_metadata)

return target_columns_metadata

+ 13
- 21
tods/feature_analysis/AutoCorrelation.py View File

@@ -25,7 +25,7 @@ from d3m.primitive_interfaces import base, transformer
from d3m.metadata import base as metadata_base, hyperparams
from d3m.metadata import hyperparams, params, base as metadata_base
from d3m.primitive_interfaces.base import CallResult, DockerContainer
from d3m.primitive_interfaces.unsupervised_learning import UnsupervisedLearnerPrimitiveBase
from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase

from statsmodels.tsa.stattools import acf

@@ -186,7 +186,7 @@ class ACF:



class AutoCorrelationPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]):
class AutoCorrelationPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]):
"""
A primitive that performs autocorrelation on a DataFrame
acf() function documentation: https://www.statsmodels.org/dev/generated/statsmodels.tsa.stattools.acf.html
@@ -233,26 +233,8 @@ class AutoCorrelationPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outp
'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'AutocorrelationPrimitive')),
})

def __init__(self, *,
hyperparams: Hyperparams, #
random_seed: int = 0,
docker_containers: Dict[str, DockerContainer] = None) -> None:
super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers)


self._clf = ACF(unbiased = hyperparams['unbiased'],
nlags = hyperparams['nlags'],
qstat = hyperparams['qstat'],
fft = hyperparams['fft'],
alpha = hyperparams['alpha'],
missing = hyperparams['missing']
)

self.primitiveNo = PrimitiveCount.primitive_no
PrimitiveCount.primitive_no+=1


def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]:
def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]:
"""
Process the testing data.
Args:
@@ -261,6 +243,16 @@ class AutoCorrelationPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outp
Returns:
Container DataFrame after AutoCorrelation.
"""
self._clf = ACF(unbiased = self.hyperparams['unbiased'],
nlags = self.hyperparams['nlags'],
qstat = self.hyperparams['qstat'],
fft = self.hyperparams['fft'],
alpha = self.hyperparams['alpha'],
missing = self.hyperparams['missing']
)

self.primitiveNo = PrimitiveCount.primitive_no
PrimitiveCount.primitive_no+=1

# Get cols to fit.
self._fitted = False


+ 3
- 2
tods/feature_analysis/BKFilter.py View File

@@ -20,6 +20,7 @@ from d3m import utils
from d3m.base import utils as base_utils
from d3m.exceptions import PrimitiveNotFittedError
from d3m.primitive_interfaces.base import CallResult, DockerContainer
from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase


import os.path
@@ -118,7 +119,7 @@ class Hyperparams(hyperparams.Hyperparams):
)

class BKFilterPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]):
class BKFilterPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]):
"""
Filter a time series using the Baxter-King bandpass filter.

@@ -173,7 +174,7 @@ class BKFilterPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hy
})


def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]:
def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]:
"""
Process the testing data.
Args:


+ 3
- 2
tods/feature_analysis/DiscreteCosineTransform.py View File

@@ -14,6 +14,7 @@ import math
from scipy.fft import dct
from collections import OrderedDict
from typing import cast, Dict, List, Union, Sequence, Optional, Tuple
from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase


from scipy import sparse
@@ -160,7 +161,7 @@ class DCT:


class DiscreteCosineTransformPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]):
class DiscreteCosineTransformPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]):
"""
Compute the 1-D discrete Cosine Transform.
Return the Discrete Cosine Transform of arbitrary type sequence x.
@@ -242,7 +243,7 @@ class DiscreteCosineTransformPrimitive(transformer.TransformerPrimitiveBase[Inpu
workers = self.hyperparams['workers']
)

def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]:
def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]:
"""

Args:


+ 3
- 2
tods/feature_analysis/FastFourierTransform.py View File

@@ -17,6 +17,7 @@ from typing import cast, Dict, List, Union, Sequence, Optional, Tuple

from scipy import sparse
from numpy import ndarray
from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase

__all__ = ('FastFourierTransformPrimitive',)

@@ -157,7 +158,7 @@ class FFT:


class FastFourierTransformPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]):
class FastFourierTransformPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]):
"""
Compute the 1-D discrete Fourier Transform.
This function computes the 1-D n-point discrete Fourier Transform (DFT) with the efficient Fast Fourier Transform (FFT) algorithm
@@ -232,7 +233,7 @@ class FastFourierTransformPrimitive(transformer.TransformerPrimitiveBase[Inputs,
workers = self.hyperparams['workers']
)

def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]:
def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]:
"""

Args:


+ 3
- 2
tods/feature_analysis/HPFilter.py View File

@@ -21,6 +21,7 @@ from d3m.base import utils as base_utils
from d3m.exceptions import PrimitiveNotFittedError
from d3m.primitive_interfaces.base import CallResult, DockerContainer

from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase

import statsmodels.api as sm

@@ -101,7 +102,7 @@ class Hyperparams(hyperparams.Hyperparams):
)

class HPFilterPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]):
class HPFilterPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]):
"""
Filter a time series using the Hodrick-Prescott filter.

@@ -150,7 +151,7 @@ class HPFilterPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hy
})


def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]:
def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]:
"""
Process the testing data.
Args:


+ 3
- 2
tods/feature_analysis/NonNegativeMatrixFactorization.py View File

@@ -15,6 +15,7 @@ import numpy
from numpy import ndarray
import warnings

from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase


__all__ = ('NonNegativeMatrixFactorizationPrimitive',)
@@ -211,7 +212,7 @@ class NMF:
return result


class NonNegativeMatrixFactorizationPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]):
class NonNegativeMatrixFactorizationPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]):
"""
Calculates Latent factors of a given matrix of timeseries data

@@ -299,7 +300,7 @@ class NonNegativeMatrixFactorizationPrimitive(transformer.TransformerPrimitiveBa
learning_rate = self.hyperparams['learning_rate'],
)

def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]:
def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]:

assert isinstance(inputs, container.DataFrame), type(dataframe)



+ 3
- 2
tods/feature_analysis/SpectralResidualTransform.py View File

@@ -23,6 +23,7 @@ from d3m.metadata import hyperparams, params, base as metadata_base

from d3m.base import utils as base_utils
from d3m.exceptions import PrimitiveNotFittedError
from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase

__all__ = ('SpectralResidualTransformPrimitive',)

@@ -87,7 +88,7 @@ class Hyperparams(hyperparams.Hyperparams):



class SpectralResidualTransformPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]):
class SpectralResidualTransformPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]):
"""
Primitive to find Spectral Residual Transform of time series
"""
@@ -110,7 +111,7 @@ class SpectralResidualTransformPrimitive(transformer.TransformerPrimitiveBase[In
'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'SpectralResidualTransformPrimitive')),
})

def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]:
def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]:
"""

Args:


+ 3
- 2
tods/feature_analysis/StatisticalAbsEnergy.py View File

@@ -20,6 +20,7 @@ from d3m.primitive_interfaces import base, transformer

from d3m.container import DataFrame as d3m_dataframe
from d3m.metadata import hyperparams, params, base as metadata_base
from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase

from d3m.base import utils as base_utils
from d3m.exceptions import PrimitiveNotFittedError
@@ -87,7 +88,7 @@ class Hyperparams(hyperparams.Hyperparams):



class StatisticalAbsEnergyPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]):
class StatisticalAbsEnergyPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]):
"""
Primitive to find abs_energy of time series
"""
@@ -112,7 +113,7 @@ class StatisticalAbsEnergyPrimitive(transformer.TransformerPrimitiveBase[Inputs,

})

def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]:
def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]:
"""

Args:


+ 3
- 2
tods/feature_analysis/StatisticalAbsSum.py View File

@@ -23,6 +23,7 @@ from d3m.metadata import hyperparams, params, base as metadata_base

from d3m.base import utils as base_utils
from d3m.exceptions import PrimitiveNotFittedError
from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase

__all__ = ('StatisticalAbsSumPrimitive',)

@@ -87,7 +88,7 @@ class Hyperparams(hyperparams.Hyperparams):



class StatisticalAbsSumPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]):
class StatisticalAbsSumPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]):
"""
Primitive to find abs_sum of time series
"""
@@ -109,7 +110,7 @@ class StatisticalAbsSumPrimitive(transformer.TransformerPrimitiveBase[Inputs, Ou
'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'StatisticalAbsSumPrimitive')),
})

def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]:
def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]:
"""

Args:


+ 3
- 2
tods/feature_analysis/StatisticalGmean.py View File

@@ -24,6 +24,7 @@ from d3m.metadata import hyperparams, params, base as metadata_base
from d3m.base import utils as base_utils
from d3m.exceptions import PrimitiveNotFittedError
from d3m.exceptions import UnexpectedValueError
from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase

__all__ = ('StatisticalGmeanPrimitive',)

@@ -88,7 +89,7 @@ class Hyperparams(hyperparams.Hyperparams):



class StatisticalGmeanPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]):
class StatisticalGmeanPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]):
"""
Primitive to find gmean of time series .
Will only take positive values as inputs .
@@ -111,7 +112,7 @@ class StatisticalGmeanPrimitive(transformer.TransformerPrimitiveBase[Inputs, Out
'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'StatisticalGmeanPrimitive')),
})

def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]:
def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]:
"""

Args:


+ 3
- 2
tods/feature_analysis/StatisticalHmean.py View File

@@ -24,6 +24,7 @@ from d3m.metadata import hyperparams, params, base as metadata_base

from d3m.base import utils as base_utils
from d3m.exceptions import PrimitiveNotFittedError
from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase

__all__ = ('StatisticalHmeanPrimitive',)

@@ -88,7 +89,7 @@ class Hyperparams(hyperparams.Hyperparams):



class StatisticalHmeanPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]):
class StatisticalHmeanPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]):
"""
Primitive to find Harmonic mean of time series
Harmonic mean only defined if all elements greater than or equal to zero
@@ -113,7 +114,7 @@ class StatisticalHmeanPrimitive(transformer.TransformerPrimitiveBase[Inputs, Out
'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'StatisticalHmeanPrimitive')),
})

def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]:
def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]:
"""

Args:


+ 3
- 2
tods/feature_analysis/StatisticalKurtosis.py View File

@@ -24,6 +24,7 @@ from d3m.metadata import hyperparams, params, base as metadata_base

from d3m.base import utils as base_utils
from d3m.exceptions import PrimitiveNotFittedError
from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase

__all__ = ('StatisticalKurtosisPrimitive',)

@@ -88,7 +89,7 @@ class Hyperparams(hyperparams.Hyperparams):



class StatisticalKurtosisPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]):
class StatisticalKurtosisPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]):
"""
Primitive to find kurtosis of time series
"""
@@ -110,7 +111,7 @@ class StatisticalKurtosisPrimitive(transformer.TransformerPrimitiveBase[Inputs,
'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'StatisticalKurtosisPrimitive')),
})

def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]:
def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]:
"""

Args:


+ 6
- 5
tods/feature_analysis/StatisticalMaximum.py View File

@@ -9,11 +9,11 @@ from numpy import ndarray
from collections import OrderedDict
from scipy import sparse
import os
import uuid

import numpy
import typing
import time
import uuid

from d3m import container
from d3m.primitive_interfaces import base, transformer
@@ -23,6 +23,7 @@ from d3m.metadata import hyperparams, params, base as metadata_base

from d3m.base import utils as base_utils
from d3m.exceptions import PrimitiveNotFittedError
from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase

__all__ = ('StatisticalMaximumPrimitive',)

@@ -87,7 +88,7 @@ class Hyperparams(hyperparams.Hyperparams):



class StatisticalMaximumPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]):
class StatisticalMaximumPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]):
"""
Primitive to find maximum of time series
"""
@@ -110,7 +111,7 @@ class StatisticalMaximumPrimitive(transformer.TransformerPrimitiveBase[Inputs, O
'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'StatisticalMaximumPrimitive')),
})

def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]:
def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]:
"""

Args:
@@ -159,11 +160,11 @@ class StatisticalMaximumPrimitive(transformer.TransformerPrimitiveBase[Inputs, O
if self.hyperparams['error_on_no_input']:
raise RuntimeError("No input columns were selected")
self.logger.warn("No input columns were selected")

outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'],
add_index_columns=self.hyperparams['add_index_columns'],
inputs=inputs, column_indices=self._training_indices,
columns_list=output_columns)

self.logger.info('Statistical Maximum Primitive returned')

return base.CallResult(outputs)
@@ -314,6 +315,6 @@ class StatisticalMaximumPrimitive(transformer.TransformerPrimitiveBase[Inputs, O
sequence = column_value[iter-window_size+1:iter+1]
column_maximum[iter] = np.max(sequence)
column_maximum[:window_size-1] = column_maximum[window_size-1]
transformed_X[column + "_maximum"] = column_maximum
transformed_X[str(column) + "_maximum"] = column_maximum

return transformed_X

+ 3
- 2
tods/feature_analysis/StatisticalMean.py View File

@@ -23,6 +23,7 @@ from d3m.metadata import hyperparams, params, base as metadata_base

from d3m.base import utils as base_utils
from d3m.exceptions import PrimitiveNotFittedError
from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase

__all__ = ('StatisticalMeanPrimitive',)

@@ -87,7 +88,7 @@ class Hyperparams(hyperparams.Hyperparams):



class StatisticalMeanPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]):
class StatisticalMeanPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]):
"""
Primitive to find mean of time series
"""
@@ -110,7 +111,7 @@ class StatisticalMeanPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outp
'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'StatisticalMeanPrimitive')),
})

def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]:
def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]:
"""

Args:


+ 3
- 2
tods/feature_analysis/StatisticalMeanAbs.py View File

@@ -23,6 +23,7 @@ from d3m.metadata import hyperparams, params, base as metadata_base

from d3m.base import utils as base_utils
from d3m.exceptions import PrimitiveNotFittedError
from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase

__all__ = ('StatisticalMeanAbsPrimitive',)

@@ -87,7 +88,7 @@ class Hyperparams(hyperparams.Hyperparams):



class StatisticalMeanAbsPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]):
class StatisticalMeanAbsPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]):
"""
Primitive to find mean_abs of time series
"""
@@ -109,7 +110,7 @@ class StatisticalMeanAbsPrimitive(transformer.TransformerPrimitiveBase[Inputs, O
'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'StatisticalMeanAbsPrimitive')),
})

def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]:
def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]:
"""

Args:


+ 3
- 2
tods/feature_analysis/StatisticalMeanAbsTemporalDerivative.py View File

@@ -23,6 +23,7 @@ from d3m.metadata import hyperparams, params, base as metadata_base

from d3m.base import utils as base_utils
from d3m.exceptions import PrimitiveNotFittedError
from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase

__all__ = ('StatisticalMeanAbsTemporalDerivativePrimitive',)

@@ -87,7 +88,7 @@ class Hyperparams(hyperparams.Hyperparams):



class StatisticalMeanAbsTemporalDerivativePrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]):
class StatisticalMeanAbsTemporalDerivativePrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]):
"""
Primitive to find mean_abs_temporal_derivative of time series
"""
@@ -110,7 +111,7 @@ class StatisticalMeanAbsTemporalDerivativePrimitive(transformer.TransformerPrimi
'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'StatisticalMeanAbsTemporalDerivativePrimitive')),
})

def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]:
def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]:
"""

Args:


+ 3
- 2
tods/feature_analysis/StatisticalMeanTemporalDerivative.py View File

@@ -23,6 +23,7 @@ from d3m.metadata import hyperparams, params, base as metadata_base

from d3m.base import utils as base_utils
from d3m.exceptions import PrimitiveNotFittedError
from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase

__all__ = ('StatisticalMeanTemporalDerivativePrimitive',)

@@ -87,7 +88,7 @@ class Hyperparams(hyperparams.Hyperparams):



class StatisticalMeanTemporalDerivativePrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]):
class StatisticalMeanTemporalDerivativePrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]):
"""
Primitive to find mean_temporal_derivative of time series
"""
@@ -110,7 +111,7 @@ class StatisticalMeanTemporalDerivativePrimitive(transformer.TransformerPrimitiv
'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'StatisticalMeanTemporalDerivativePrimitive')),
})

def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]:
def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]:
"""

Args:


+ 3
- 2
tods/feature_analysis/StatisticalMedian.py View File

@@ -23,6 +23,7 @@ from d3m.metadata import hyperparams, params, base as metadata_base

from d3m.base import utils as base_utils
from d3m.exceptions import PrimitiveNotFittedError
from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase

__all__ = ('StatisticalMedianPrimitive',)

@@ -87,7 +88,7 @@ class Hyperparams(hyperparams.Hyperparams):



class StatisticalMedianPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]):
class StatisticalMedianPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]):
"""
Primitive to find median of time series
"""
@@ -110,7 +111,7 @@ class StatisticalMedianPrimitive(transformer.TransformerPrimitiveBase[Inputs, Ou

})

def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]:
def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]:
"""

Args:


+ 3
- 2
tods/feature_analysis/StatisticalMedianAbsoluteDeviation.py View File

@@ -24,6 +24,7 @@ from d3m.metadata import hyperparams, params, base as metadata_base

from d3m.base import utils as base_utils
from d3m.exceptions import PrimitiveNotFittedError
from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase

__all__ = ('StatisticalMedianAbsoluteDeviationPrimitive',)

@@ -88,7 +89,7 @@ class Hyperparams(hyperparams.Hyperparams):



class StatisticalMedianAbsoluteDeviationPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]):
class StatisticalMedianAbsoluteDeviationPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]):
"""
Primitive to find median_absolute_deviation of time series
"""
@@ -111,7 +112,7 @@ class StatisticalMedianAbsoluteDeviationPrimitive(transformer.TransformerPrimiti
'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'StatisticalMedianAbsoluteDeviationPrimitive')),
})

def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]:
def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]:
"""

Args:


+ 3
- 2
tods/feature_analysis/StatisticalMinimum.py View File

@@ -23,6 +23,7 @@ from d3m.metadata import hyperparams, params, base as metadata_base

from d3m.base import utils as base_utils
from d3m.exceptions import PrimitiveNotFittedError
from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase

__all__ = ('StatisticalMinimumPrimitive',)

@@ -87,7 +88,7 @@ class Hyperparams(hyperparams.Hyperparams):



class StatisticalMinimumPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]):
class StatisticalMinimumPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]):
"""
Primitive to find minimum of time series
"""
@@ -110,7 +111,7 @@ class StatisticalMinimumPrimitive(transformer.TransformerPrimitiveBase[Inputs, O
'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'StatisticalMinimumPrimitive')),
})

def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]:
def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]:
"""

Args:


+ 3
- 2
tods/feature_analysis/StatisticalSkew.py View File

@@ -24,6 +24,7 @@ from d3m.metadata import hyperparams, params, base as metadata_base

from d3m.base import utils as base_utils
from d3m.exceptions import PrimitiveNotFittedError
from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase

__all__ = ('StatisticalSkewPrimitive',)

@@ -88,7 +89,7 @@ class Hyperparams(hyperparams.Hyperparams):



class StatisticalSkewPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]):
class StatisticalSkewPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]):
"""
Primitive to find skew of time series
"""
@@ -111,7 +112,7 @@ class StatisticalSkewPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outp
'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'StatisticalSkewPrimitive')),
})

def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]:
def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]:
"""

Args:


+ 3
- 2
tods/feature_analysis/StatisticalStd.py View File

@@ -23,6 +23,7 @@ from d3m.metadata import hyperparams, params, base as metadata_base

from d3m.base import utils as base_utils
from d3m.exceptions import PrimitiveNotFittedError
from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase

__all__ = ('StatisticalStdPrimitive',)

@@ -87,7 +88,7 @@ class Hyperparams(hyperparams.Hyperparams):



class StatisticalStdPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]):
class StatisticalStdPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]):
"""
Primitive to find std of time series
"""
@@ -110,7 +111,7 @@ class StatisticalStdPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outpu
'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'StatisticalStdPrimitive')),
})

def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]:
def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]:
"""

Args:


+ 3
- 2
tods/feature_analysis/StatisticalVar.py View File

@@ -23,6 +23,7 @@ from d3m.metadata import hyperparams, params, base as metadata_base

from d3m.base import utils as base_utils
from d3m.exceptions import PrimitiveNotFittedError
from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase

__all__ = ('StatisticalVarPrimitive',)

@@ -87,7 +88,7 @@ class Hyperparams(hyperparams.Hyperparams):



class StatisticalVarPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]):
class StatisticalVarPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]):
"""
Primitive to find var of time series
"""
@@ -109,7 +110,7 @@ class StatisticalVarPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outpu
'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'StatisticalVarPrimitive')),
})

def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]:
def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]:
"""

Args:


+ 3
- 2
tods/feature_analysis/StatisticalVariation.py View File

@@ -24,6 +24,7 @@ from d3m.metadata import hyperparams, params, base as metadata_base

from d3m.base import utils as base_utils
from d3m.exceptions import PrimitiveNotFittedError
from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase

__all__ = ('StatisticalVariationPrimitive',)

@@ -88,7 +89,7 @@ class Hyperparams(hyperparams.Hyperparams):



class StatisticalVariationPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]):
class StatisticalVariationPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]):
"""
Primitive to find variation of time series
"""
@@ -112,7 +113,7 @@ class StatisticalVariationPrimitive(transformer.TransformerPrimitiveBase[Inputs,

})

def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]:
def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]:
"""

Args:


+ 3
- 2
tods/feature_analysis/StatisticalVecSum.py View File

@@ -23,6 +23,7 @@ from d3m.metadata import hyperparams, params, base as metadata_base

from d3m.base import utils as base_utils
from d3m.exceptions import PrimitiveNotFittedError
from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase

__all__ = ('StatisticalVecSumPrimitive',)

@@ -87,7 +88,7 @@ class Hyperparams(hyperparams.Hyperparams):



class StatisticalVecSumPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]):
class StatisticalVecSumPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]):
"""
Primitive to find vec_sum of time series
"""
@@ -110,7 +111,7 @@ class StatisticalVecSumPrimitive(transformer.TransformerPrimitiveBase[Inputs, Ou
'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'StatisticalVecSumPrimitive')),
})

def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]:
def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]:
"""

Args:


+ 3
- 2
tods/feature_analysis/StatisticalWillisonAmplitude.py View File

@@ -23,6 +23,7 @@ from d3m.metadata import hyperparams, params, base as metadata_base

from d3m.base import utils as base_utils
from d3m.exceptions import PrimitiveNotFittedError
from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase

__all__ = ('StatisticalWillisonAmplitudePrimitive',)

@@ -91,7 +92,7 @@ class Hyperparams(hyperparams.Hyperparams):



class StatisticalWillisonAmplitudePrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]):
class StatisticalWillisonAmplitudePrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]):
"""
Primitive to find willison amplitude of time series
"""
@@ -114,7 +115,7 @@ class StatisticalWillisonAmplitudePrimitive(transformer.TransformerPrimitiveBase
'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'StatisticalWillisonAmplitudePrimitive')),
})

def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]:
def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]:
"""

Args:


+ 3
- 2
tods/feature_analysis/StatisticalZeroCrossing.py View File

@@ -23,6 +23,7 @@ from d3m.metadata import hyperparams, params, base as metadata_base

from d3m.base import utils as base_utils
from d3m.exceptions import PrimitiveNotFittedError
from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase

__all__ = ('StatisticalZeroCrossingPrimitive',)

@@ -83,7 +84,7 @@ class Hyperparams(hyperparams.Hyperparams):



class StatisticalZeroCrossingPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]):
class StatisticalZeroCrossingPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]):
"""
Primitive to find zero_crossing of time series. A column indicating zero crossing on ith row . 1 indicates crossing 0 is for normal
"""
@@ -105,7 +106,7 @@ class StatisticalZeroCrossingPrimitive(transformer.TransformerPrimitiveBase[Inpu
'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'StatisticalZeroCrossingPrimitive')),
})

def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]:
def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]:
"""

Args:


+ 3
- 2
tods/feature_analysis/TRMF.py View File

@@ -22,6 +22,7 @@ from d3m.exceptions import PrimitiveNotFittedError
from d3m.primitive_interfaces.base import CallResult, DockerContainer
from d3m.primitive_interfaces import base, transformer
# from d3m.primitive_interfaces.unsupervised_learning import UnsupervisedLearnerPrimitiveBase
from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase


Inputs = d3m_dataframe
@@ -161,7 +162,7 @@ class Hyperparams(hyperparams.Hyperparams):
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter']
)

class TRMFPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]):
class TRMFPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]):
"""Temporal Regularized Matrix Factorization.

Parameters
@@ -241,7 +242,7 @@ class TRMFPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperp
})

def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]:
def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]:
"""
Process the testing data.
Args:


+ 3
- 2
tods/feature_analysis/WaveletTransform.py View File

@@ -19,6 +19,7 @@ from collections import OrderedDict
from scipy import sparse
import logging
import uuid
from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase

__all__ = ('WaveletTransformPrimitive',)

@@ -148,7 +149,7 @@ class Hyperparams(hyperparams.Hyperparams):
)


class WaveletTransformPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]):
class WaveletTransformPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]):
"""
A primitive of Multilevel 1D Discrete Wavelet Transform of data.
See `PyWavelet documentation <https://pywavelets.readthedocs.io/en/latest/ref/>`_ for details.
@@ -203,7 +204,7 @@ class WaveletTransformPrimitive(transformer.TransformerPrimitiveBase[Inputs, Out
)


def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]:
def _produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]:
"""
Process the testing data.
Args:


+ 1
- 1
tods/tests/detection_algorithm/test_PyodABOD.py View File

@@ -126,7 +126,7 @@ class ABODTest(unittest.TestCase):
'selector': ['__ALL_ELEMENTS__', 0],
'metadata': {
'name': 'Angle-base Outlier Detection Primitive0_0',
'structural_type': 'numpy.float64',
'structural_type': 'numpy.int64',
'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute']
},
}])


+ 1
- 3
tods/tests/detection_algorithm/test_PyodHBOS.py View File

@@ -63,8 +63,6 @@ class HBOSTest(unittest.TestCase):
primitive.fit()
new_main = primitive.produce(inputs=main).value
new_main_score = primitive.produce_score(inputs=main).value
print(new_main)
print(new_main_score)


self.assertEqual(utils.to_json_structure(new_main.metadata.to_internal_simple_structure()), [{
@@ -125,7 +123,7 @@ class HBOSTest(unittest.TestCase):
'selector': ['__ALL_ELEMENTS__', 0],
'metadata': {
'name': 'HBOS0_0',
'structural_type': 'numpy.float64',
'structural_type': 'numpy.int64',
'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute']
},
}])


+ 4
- 4
tods/tests/detection_algorithm/test_Telemanom.py View File

@@ -5,7 +5,7 @@ from d3m.metadata import base as metadata_base
from tods.detection_algorithm.Telemanom import TelemanomPrimitive


class SODTest(unittest.TestCase):
class TelemanomTest(unittest.TestCase):
def test_basic(self):
self.maxDiff = None
main = container.DataFrame({'a': [1., 2., 3., 4.,5,6,7,8,9], 'b': [2., 3., 4., 5.,6,7,8,9,10], 'c': [3., 4., 5., 6.,7,8,9,10,11]},
@@ -91,21 +91,21 @@ class SODTest(unittest.TestCase):
'selector': ['__ALL_ELEMENTS__', 0],
'metadata': {
'name': 'Telemanom0_0',
'structural_type': 'numpy.float64',
'structural_type': 'numpy.int64',
'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute'],
},
}, {
'selector': ['__ALL_ELEMENTS__', 1],
'metadata': {
'structural_type': 'numpy.float64',
'name': 'Telemanom0_1',
'structural_type': 'numpy.int64',
'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute'],
},
}, {
'selector': ['__ALL_ELEMENTS__', 2],
'metadata': {
'structural_type': 'numpy.float64',
'name': 'Telemanom0_2',
'structural_type': 'numpy.int64',
'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute'],
}
}])


+ 1
- 1
tods/tests/feature_analysis/test_Autocorrelation.py View File

@@ -66,7 +66,7 @@ class AutoCorrelationTestCase(unittest.TestCase):
hyperparams_class = AutoCorrelation.AutoCorrelationPrimitive.metadata.get_hyperparams().defaults()
hyperparams_class = hyperparams_class.replace({'nlags': 2})
primitive = AutoCorrelation.AutoCorrelationPrimitive(hyperparams=hyperparams_class)
new_main = primitive.produce(inputs=main).value
new_main = primitive._produce(inputs=main).value
print(new_main)
# new_main_drop = new_main['value_acf']


+ 1
- 1
tods/tests/feature_analysis/test_BKFilter.py View File

@@ -54,7 +54,7 @@ class BKFilterTest(unittest.TestCase):

hyperparams_class = BKFilter.BKFilterPrimitive.metadata.get_hyperparams()
primitive = BKFilter.BKFilterPrimitive(hyperparams=hyperparams_class.defaults())
new_main = primitive.produce(inputs=main).value
new_main = primitive._produce(inputs=main).value
print(new_main)




+ 1
- 1
tods/tests/feature_analysis/test_DiscreteCosineTransform.py View File

@@ -66,7 +66,7 @@ class DctTestCase(unittest.TestCase):
'return_result':'append',
})
primitive = DiscreteCosineTransform.DiscreteCosineTransformPrimitive(hyperparams=hp)
new_main = primitive.produce(inputs=main).value
new_main = primitive._produce(inputs=main).value

c = pd.DataFrame({"A":[1,2,3], "B":['a','b','c'],'A_dct_coeff':[1.200000e+01,-3.464102e+00,-4.440892e-16]})



+ 1
- 1
tods/tests/feature_analysis/test_FastFourierTransform.py View File

@@ -67,7 +67,7 @@ class FftTestCase(unittest.TestCase):
'return_result':'append',
})
primitive = FastFourierTransform.FastFourierTransformPrimitive(hyperparams=hp)
new_main = primitive.produce(inputs=main).value
new_main = primitive._produce(inputs=main).value

c = pd.DataFrame({"A":[1,2,3], "B":['a','b','c'],'A_fft_abs':[6.000000,1.732051,1.732051],'A_fft_phse':[-0.000000,2.617994,-2.617994]})



+ 1
- 1
tods/tests/feature_analysis/test_HPFilter.py View File

@@ -54,7 +54,7 @@ class HPFilterTest(unittest.TestCase):

hyperparams_class = HPFilter.HPFilterPrimitive.metadata.get_hyperparams()
primitive = HPFilter.HPFilterPrimitive(hyperparams=hyperparams_class.defaults())
new_main = primitive.produce(inputs=main).value
new_main = primitive._produce(inputs=main).value
print(new_main)




+ 1
- 1
tods/tests/feature_analysis/test_NonNegativeMatrixFactorization.py View File

@@ -74,7 +74,7 @@ class NmfTestCase(unittest.TestCase):
'H': b,
})
primitive = NonNegativeMatrixFactorization.NonNegativeMatrixFactorizationPrimitive(hyperparams=hp)
new_main = primitive.produce(inputs=main).value
new_main = primitive._produce(inputs=main).value

print("new_main",new_main)
c = pd.DataFrame({"A":[1,2,3,np.nan,np.nan], "B":[4,5,6,np.nan,np.nan],


+ 1
- 1
tods/tests/feature_analysis/test_SpectralResidualTransform.py View File

@@ -55,7 +55,7 @@ class SpectralResidualTransformTestCase(unittest.TestCase):

primitive = SpectralResidualTransform.SpectralResidualTransformPrimitive(hyperparams=hp)

output_main = primitive.produce(inputs=main).value
output_main = primitive._produce(inputs=main).value

expected_output = container.DataFrame(
{'timestamp': [1, 3, 2, 5], 'values': [1.0, 2.0, 3.0, 4.0], 'b': [1.0, 4.0, 5.0, 6.0],


+ 1
- 1
tods/tests/feature_analysis/test_StastiticalStd.py View File

@@ -56,7 +56,7 @@ class StatisticalStdTestCase(unittest.TestCase):

primitive = StatisticalStd.StatisticalStdPrimitive(hyperparams=hp)

output_main = primitive.produce(inputs=main).value
output_main = primitive._produce(inputs=main).value
print(output_main)
expected_output = container.DataFrame(
{'timestamp': [1, 3, 2, 5], 'values': [1.0, 2.0, 3.0, 4.0], 'b': [1.0, 4.0, 5.0, 6.0],


+ 1
- 1
tods/tests/feature_analysis/test_StatisticalAbsEnergy.py View File

@@ -56,7 +56,7 @@ class StatisticalAbsEnergyTestCase(unittest.TestCase):

primitive = StatisticalAbsEnergy.StatisticalAbsEnergyPrimitive(hyperparams=hp)

output_main = primitive.produce(inputs=main).value
output_main = primitive._produce(inputs=main).value
print(output_main)
expected_output = container.DataFrame(
{'timestamp': [1, 3, 2, 5], 'values': [1.0, 2.0, 3.0, 4.0], 'b': [1.0, 4.0, 5.0, 6.0],


+ 1
- 1
tods/tests/feature_analysis/test_StatisticalAbsSum.py View File

@@ -56,7 +56,7 @@ class StatisticalAbsSumTestCase(unittest.TestCase):

primitive = StatisticalAbsSum.StatisticalAbsSumPrimitive(hyperparams=hp)

output_main = primitive.produce(inputs=main).value
output_main = primitive._produce(inputs=main).value

expected_output = container.DataFrame(
{'timestamp': [1, 3, 2, 5], 'values': [1.0, 2.0, 3.0, 4.0], 'b': [1.0, 4.0, -5.0, 6.0],


+ 1
- 1
tods/tests/feature_analysis/test_StatisticalGmean.py View File

@@ -56,7 +56,7 @@ class StatisticalGmeanTestCase(unittest.TestCase):

primitive = StatisticalGmean.StatisticalGmeanPrimitive(hyperparams=hp)

output_main = primitive.produce(inputs=main).value
output_main = primitive._produce(inputs=main).value
print(output_main[['values_gmean', 'b_gmean']])

expected_output = container.DataFrame(


+ 1
- 1
tods/tests/feature_analysis/test_StatisticalHmean.py View File

@@ -56,7 +56,7 @@ class StatisticalHmeanTestCase(unittest.TestCase):

primitive = StatisticalHmean.StatisticalHmeanPrimitive(hyperparams=hp)

output_main = primitive.produce(inputs=main).value
output_main = primitive._produce(inputs=main).value
#print(output_main[['values_hmean', 'b_hmean']])

expected_output = container.DataFrame(


+ 1
- 1
tods/tests/feature_analysis/test_StatisticalKurtosis.py View File

@@ -56,7 +56,7 @@ class StatisticalKurtosisTestCase(unittest.TestCase):

primitive = StatisticalKurtosis.StatisticalKurtosisPrimitive(hyperparams=hp)

output_main = primitive.produce(inputs=main).value
output_main = primitive._produce(inputs=main).value
print(output_main[['values_kurtosis', 'b_kurtosis']])

expected_output = container.DataFrame(


+ 1
- 1
tods/tests/feature_analysis/test_StatisticalMaximum.py View File

@@ -56,7 +56,7 @@ class StatisticalMaximumTestCase(unittest.TestCase):

primitive = StatisticalMaximum.StatisticalMaximumPrimitive(hyperparams=hp)

output_main = primitive.produce(inputs=main).value
output_main = primitive._produce(inputs=main).value
print(output_main)
expected_output = container.DataFrame(
{'timestamp': [1, 3, 2, 5], 'values': [1.0, 2.0, 3.0, 4.0], 'b': [1.0, 4.0, 5.0, 6.0],


+ 1
- 1
tods/tests/feature_analysis/test_StatisticalMean.py View File

@@ -56,7 +56,7 @@ class StatisticalMeanTestCase(unittest.TestCase):

primitive = StatisticalMean.StatisticalMeanPrimitive(hyperparams=hp)

output_main = primitive.produce(inputs=main).value
output_main = primitive._produce(inputs=main).value
print(output_main)
expected_output = container.DataFrame(
{'timestamp': [1, 3, 2, 5], 'values': [1.0, 2.0, 3.0, 4.0], 'b': [1.0, 4.0, 5.0, 6.0],


+ 1
- 1
tods/tests/feature_analysis/test_StatisticalMeanAbs.py View File

@@ -56,7 +56,7 @@ class StatisticalMeanAbsTestCase(unittest.TestCase):

primitive = StatisticalMeanAbs.StatisticalMeanAbsPrimitive(hyperparams=hp)

output_main = primitive.produce(inputs=main).value
output_main = primitive._produce(inputs=main).value
print(output_main)
expected_output = container.DataFrame(
{'timestamp': [1, 3, 2, 5], 'values': [1.0, 2.0, 3.0, 4.0], 'b': [1.0, 4.0, 5.0, 6.0],


+ 1
- 1
tods/tests/feature_analysis/test_StatisticalMeanAbsTemporalDerivative.py View File

@@ -56,7 +56,7 @@ class StatisticalMeanAbsTemporalDerivativeTestCase(unittest.TestCase):

primitive = StatisticalMeanAbsTemporalDerivative.StatisticalMeanAbsTemporalDerivativePrimitive(hyperparams=hp)

output_main = primitive.produce(inputs=main).value
output_main = primitive._produce(inputs=main).value
print(output_main[['values_mean_abs_temporal_derivative', 'b_mean_abs_temporal_derivative']])
expected_output = container.DataFrame(
{'timestamp': [1, 3, 2, 5], 'values': [1.0, 2.0, 3.0, 4.0], 'b': [1.0, 4.0, 5.0, 6.0],


+ 1
- 1
tods/tests/feature_analysis/test_StatisticalMeanTemporalDerivative.py View File

@@ -56,7 +56,7 @@ class StatisticalMeanTemporalDerivativeTestCase(unittest.TestCase):

primitive = StatisticalMeanTemporalDerivative.StatisticalMeanTemporalDerivativePrimitive(hyperparams=hp)

output_main = primitive.produce(inputs=main).value
output_main = primitive._produce(inputs=main).value
print(output_main[['values_mean_temporal_derivative', 'b_mean_temporal_derivative']])
expected_output = container.DataFrame(
{'timestamp': [1, 3, 2, 5], 'values': [1.0, 2.0, 3.0, 4.0], 'b': [1.0, 4.0, 5.0, 6.0],


+ 1
- 1
tods/tests/feature_analysis/test_StatisticalMedian.py View File

@@ -56,7 +56,7 @@ class StatisticalMedianTestCase(unittest.TestCase):

primitive = StatisticalMedian.StatisticalMedianPrimitive(hyperparams=hp)

output_main = primitive.produce(inputs=main).value
output_main = primitive._produce(inputs=main).value

expected_output = container.DataFrame(
{'timestamp': [1, 3, 2, 5], 'values': [1.0, 2.0, 3.0, 4.0], 'b': [1.0, 4.0, 5.0, 6.0],


+ 1
- 1
tods/tests/feature_analysis/test_StatisticalMedianAbsoluteDeviation.py View File

@@ -56,7 +56,7 @@ class StatisticalMedianAbsoluteDeviationTestCase(unittest.TestCase):

primitive = StatisticalMedianAbsoluteDeviation.StatisticalMedianAbsoluteDeviationPrimitive(hyperparams=hp)

output_main = primitive.produce(inputs=main).value
output_main = primitive._produce(inputs=main).value
print(output_main[['values_median_absolute_deviation', 'b_median_absolute_deviation']])

expected_output = container.DataFrame(


+ 1
- 1
tods/tests/feature_analysis/test_StatisticalMinimum.py View File

@@ -56,7 +56,7 @@ class StatisticalMinimumTestCase(unittest.TestCase):

primitive = StatisticalMinimum.StatisticalMinimumPrimitive(hyperparams=hp)

output_main = primitive.produce(inputs=main).value
output_main = primitive._produce(inputs=main).value
print(output_main)
expected_output = container.DataFrame(
{'timestamp': [1, 3, 2, 5], 'values': [1.0, 2.0, 3.0, 4.0], 'b': [1.0, 4.0, 5.0, 6.0],


+ 1
- 1
tods/tests/feature_analysis/test_StatisticalSkew.py View File

@@ -56,7 +56,7 @@ class StatisticalSkewTestCase(unittest.TestCase):

primitive = StatisticalSkew.StatisticalSkewPrimitive(hyperparams=hp)

output_main = primitive.produce(inputs=main).value
output_main = primitive._produce(inputs=main).value
print(output_main[['values_skew', 'b_skew']])

expected_output = container.DataFrame(


+ 1
- 1
tods/tests/feature_analysis/test_StatisticalVar.py View File

@@ -56,7 +56,7 @@ class StatisticalVarTestCase(unittest.TestCase):

primitive = StatisticalVar.StatisticalVarPrimitive(hyperparams=hp)

output_main = primitive.produce(inputs=main).value
output_main = primitive._produce(inputs=main).value
print(output_main)
expected_output = container.DataFrame(
{'timestamp': [1, 3, 2, 5], 'values': [1.0, 2.0, 3.0, 4.0], 'b': [1.0, 4.0, 5.0, 6.0],


+ 1
- 1
tods/tests/feature_analysis/test_StatisticalVariation.py View File

@@ -56,7 +56,7 @@ class StatisticalVariationTestCase(unittest.TestCase):

primitive = StatisticalVariation.StatisticalVariationPrimitive(hyperparams=hp)

output_main = primitive.produce(inputs=main).value
output_main = primitive._produce(inputs=main).value
print(output_main[['values_variation', 'b_variation']])

expected_output = container.DataFrame(


+ 1
- 1
tods/tests/feature_analysis/test_StatisticalVecSum.py View File

@@ -56,7 +56,7 @@ class StatisticalVecSumTestCase(unittest.TestCase):

primitive = StatisticalVecSum.StatisticalVecSumPrimitive(hyperparams=hp)

output_main = primitive.produce(inputs=main).value
output_main = primitive._produce(inputs=main).value
print(output_main)
expected_output = container.DataFrame(
{'timestamp': [1, 3, 2, 5], 'values': [1.0, 2.0, 3.0, 4.0], 'b': [1.0, 4.0, -5.0, 6.0],


+ 1
- 1
tods/tests/feature_analysis/test_StatisticalWillisonAmplitude.py View File

@@ -57,7 +57,7 @@ class StatisticalWillisonAmplitudeTestCase(unittest.TestCase):

primitive = StatisticalWillisonAmplitude.StatisticalWillisonAmplitudePrimitive(hyperparams=hp)

output_main = primitive.produce(inputs=main).value
output_main = primitive._produce(inputs=main).value
print(output_main[['values_willison_amplitude', 'b_willison_amplitude']])
expected_output = container.DataFrame(
{'timestamp': [1, 3, 2, 5], 'values': [1.0, 2.0, 3.0, 4.0], 'b': [1.0, 4.0, 5.0, 6.0],


+ 1
- 1
tods/tests/feature_analysis/test_StatisticalZeroCrossing.py View File

@@ -55,7 +55,7 @@ class StatisticalZeroCrossingTestCase(unittest.TestCase):

primitive = StatisticalZeroCrossing.StatisticalZeroCrossingPrimitive(hyperparams=hp)

output_main = primitive.produce(inputs=main).value
output_main = primitive._produce(inputs=main).value
print(output_main)
expected_output = container.DataFrame(
{'timestamp': [1, 3, 2, 5], 'values': [1.0, -2.0, 3.0, 4.0], 'b': [1.0, 4.0, 5.0, 6.0],


+ 1
- 1
tods/tests/feature_analysis/test_TRMF.py View File

@@ -56,7 +56,7 @@ class TRMFTest(unittest.TestCase):
primitive = TRMF.TRMFPrimitive(hyperparams=hyperparams_class.defaults())
# primitive.set_training_data(inputs=main)
# primitive.fit()
new_main = primitive.produce(inputs=main).value
new_main = primitive._produce(inputs=main).value
print(new_main)




+ 2
- 2
tods/tests/feature_analysis/test_WaveletTransformer.py View File

@@ -28,7 +28,7 @@ class WaveletTransformerTestCase(unittest.TestCase):
'return_result': 'new'})

primitive = WaveletTransformPrimitive(hyperparams=hyperparams)
new_main = primitive.produce(inputs=main).value
new_main = primitive._produce(inputs=main).value

# print(new_main)
# print(mean_mse, std_mse)
@@ -89,7 +89,7 @@ class WaveletTransformerTestCase(unittest.TestCase):
hyperparams = hyperparams_default.replace({'inverse': 1})

primitive = WaveletTransformPrimitive(hyperparams=hyperparams)
main_recover = primitive.produce(inputs=main).value
main_recover = primitive._produce(inputs=main).value

self.assertAlmostEqual(main_recover.values.tolist(), main.values.tolist(), delta=1e-6)
# print(main.metadata.to_internal_simple_structure())


Loading…
Cancel
Save