Browse Source

fix concept drift feature

tags/v1.6.0
mindspore-ci-bot wuxiaoyu (F) 4 years ago
parent
commit
a82050a984
4 changed files with 49 additions and 60 deletions
  1. +0
    -1
      examples/reliability/concept_drift_time_series.py
  2. +10
    -7
      mindarmour/reliability/concept_drift/README.md
  3. +39
    -40
      mindarmour/reliability/concept_drift/concept_drift_check_time_series.py
  4. +0
    -12
      setup.py

+ 0
- 1
examples/reliability/concept_drift_time_series.py View File

@@ -34,7 +34,6 @@ Please choose one column or multiple columns.
import numpy as np
from mindarmour import ConceptDriftCheckTimeSeries


# input data
DATA_FILE = r'archive/individual_stocks_5yr/individual_stocks_5yr/AEE_data.csv'
data = np.loadtxt(DATA_FILE, str, delimiter=",")


+ 10
- 7
mindarmour/reliability/concept_drift/README.md View File

@@ -53,11 +53,11 @@ concept = ConceptDriftCheckTimeSeries(window_size=100, rolling_window=10, step=1
need_label=False)
```

>window_size(int): Size of a concept window, belongs to [10, 1/3*len(input data)]. If the data is periodic, usually window_size equals 2-5 periods, such as, for monthly/weekly data, the data volume of 30/7 days is a period. Default: 100.
>window_size(int): Size of a concept window, no less than 10. If given the input data, window_size belongs to [10, 1/3*len(input data)]. If the data is periodic, usually window_size equals 2-5 periods, such as, for monthly/weekly data, the data volume of 30/7 days is a period. Default: 100.
rolling_window(int): Smoothing window size, belongs to [1, window_size]. Default:10.
step(int): The jump length of the sliding window, belongs to [1,window_size]. Default:10.
threshold_index(float): The threshold index, (-∞,+∞), Default: 1.5.
need_label(bool: False or True. If need_label=True, concept drift labels are needed. Default: False.
threshold_index(float): The threshold index. Default: 1.5.
need_label(bool): False or True. If need_label=True, concept drift labels are needed. Default: False.

### Data

@@ -74,19 +74,22 @@ data = data[1:, 2].astype('float64') # here we choose one column or multiple co

```python
drift_score, threshold, concept_drift_location = concept.concept_check(data)
# the result is saved as pdf named 'concept_drift_check.pdf'
```

>drift_score(numpy.ndarray): The concept drift score of the example series.
threshold(float): The threshold to judge concept drift.
concept_drift_location(list): The location of the concept drift.
concept_drift_location(list): The location of the concept drift.


## Script Description

```python
```bash
├── mindarmour
├── reliability # descriptions about GhostNet # shell script for evaluation with CPU, GPU or Ascend
├──concept_drift
├──concept_drift.py
├──readme.md
├──__init__.py
├──concept_drift_check_time_series.py
├──README.md
```


+ 39
- 40
mindarmour/reliability/concept_drift/concept_drift_check_time_series.py View File

@@ -24,23 +24,24 @@ from mindarmour.utils._check_param import check_param_type, check_param_in_range

class ConceptDriftCheckTimeSeries:
"""
Concept is used for example series distribution change detection.
ConceptDriftCheckTimeSeries is used for example series distribution change detection.

Args:
window_size(int): Size of a concept window, belongs to [10, 1/3*len(input data)].
If the data is periodic, usually window_size equals 2-5 periods, such as,
for monthly/weekly data, the data volume of 30/7 days is a period. Default: 100.
window_size(int): Size of a concept window, no less than 10. If given the input data,
window_size belongs to [10, 1/3*len(input data)]. If the data is periodic, usually
window_size equals 2-5 periods, such as, for monthly/weekly data, the data volume
of 30/7 days is a period. Default: 100.
rolling_window(int): Smoothing window size, belongs to [1, window_size]. Default:10.
step(int): The jump length of the sliding window, belongs to [1, window_size]. Default:10.
threshold_index(float): The threshold index, (-∞, +∞), Default: 1.5.
threshold_index(float): The threshold index, :math:`(-\infty, +\infty)`. Default: 1.5.
need_label(bool): False or True. If need_label=True, concept drift labels are needed.
Default: False.

Examples:
>>> concept = ConceptDriftCheckTimeSeries(window_size=100, rolling_window=10,
>>> step=10, threshold_index=1.5, need_label=False)
>>> data_example = np.array([np.random.rand(1000),
>>> np.random.rand(1000),np.random.rand(1000)]).T
>>> data_example = 5*np.random.rand(1000)
>>> data_example[200: 800] = 20*np.random.rand(600)
>>> score, threshold, concept_drift_location = concept.concept_check(data_example)
"""

@@ -48,10 +49,7 @@ class ConceptDriftCheckTimeSeries:
step=10, threshold_index=1.5, need_label=False):
self.window_size = check_param_type('window_size', window_size, int)
self.rolling_window = check_param_type('rolling_window', rolling_window, int)
self.rolling_window = check_param_in_range('rolling_window',
rolling_window, 1, window_size)
self.step = check_param_type('step', step, int)
self.step = check_param_in_range('step', step, 1, window_size)
self.threshold_index = check_param_type('threshold_index', threshold_index, float)
self.need_label = check_param_type('need_label', need_label, bool)
self._in_size = window_size
@@ -66,8 +64,8 @@ class ConceptDriftCheckTimeSeries:
window_data(numpy.ndarray): The input data (in one window).

Returns:
w_out(numpy.ndarray): The output weight of reservoir model.
x_state(numpy.ndarray): The state of the reservoir model in the latent space.
- numpy.ndarray, the output weight of reservoir model.
- numpy.ndarray, the state of the reservoir model in the latent space.

Examples:
>>> input_data = np.random.rand(100)
@@ -99,7 +97,7 @@ class ConceptDriftCheckTimeSeries:
data_y(numpy.ndarray): Data y.

Returns:
distance_score_mean(float): Distance between data_x and data_y.
- float, distance between data_x and data_y.

Examples:
>>> x = np.random.rand(100)
@@ -125,7 +123,7 @@ class ConceptDriftCheckTimeSeries:
data(numpy.ndarray): Input data.

Returns:
smooth_data(numpy.ndarray): Data after smoothing.
- numpy.ndarray, data after smoothing.

Examples:
>>> data_example = np.random.rand(100)
@@ -150,28 +148,30 @@ class ConceptDriftCheckTimeSeries:

def concept_check(self, data):
"""
Find concept drift locations in a example series.
Find concept drift locations in a data series.

Args:
data(numpy.ndarray): Input data. The shape of data could be (n,1) or (n,m).
Note that each column (m columns) is one data series.

Returns:
drift_score(numpy.ndarray): The concept drift score of the example series.
threshold(float): The threshold to judge concept drift.
concept_drift_location(list): The location of the concept drift.
- numpy.ndarray, the concept drift score of the example series.
- float, the threshold to judge concept drift.
- list, the location of the concept drift.

Examples:
>>> concept = ConceptDriftCheckTimeSeries(window_size=100, rolling_window=10,
>>> step=10, threshold_index=1.5, need_label=False)
>>> data_example = np.array([np.random.rand(1000),
>>> np.random.rand(1000), np.random.rand(1000)]).T
>>> score, drift_threshold, point = concept.concept_check(data_example)
>>> data_example = 5*np.random.rand(1000)
>>> data_example[200: 800] = 20*np.random.rand(600)
>>> score, drift_threshold, drift_location = concept.concept_check(data_example)
"""
# data check
data = _check_array_not_empty('data', data)
data = check_param_type('data', data, np.ndarray)
check_param_in_range('window_size', self.window_size, 10, int((1 / 3)*len(data)))
check_param_in_range('rolling_window', self.rolling_window, 1, self.window_size)
check_param_in_range('step', self.step, 1, self.window_size)
original_data = data
data = self._data_process(data)
# calculate drift score
@@ -190,39 +190,38 @@ class ConceptDriftCheckTimeSeries:
# find drift blocks
concept_drift_location, drift_point = _drift_blocks(drift_score,
label_continue, label_location)
# show result
_plot_show(original_data, threshold, concept_drift_location,
drift_point, drift_score)
# save result
_result_save(original_data, threshold, concept_drift_location, drift_point, drift_score)
return drift_score, threshold, concept_drift_location


def _plot_show(original_data, threshold, concept_location, drift_point, drift_score):
def _result_save(original_data, threshold, concept_location, drift_point, drift_score):
"""
To show the result.
To save the result.

Args:
original_data(numpy.ndarray): The input data.
threshold(float): The concept drift threshold.
concept_location(list): The concept drift locations(x-axis).
drift_point(list): The precise drift location of a drift.
drift_point(list): The precise drift point of a drift.
drift_score(numpy.ndarray): The drift score of input data.
"""
plt.figure(figsize=(20, 8))
plt.subplot(2, 1, 1)
# Plot input data and drift points
plt.plot(original_data, label="data")
plt.title('concept drift check, threshold=' + str(threshold))
plt.title('concept drift check, threshold=' + str(threshold), fontsize=25)
plt.scatter(concept_location, np.ones(len(concept_location)),
marker='*', s=200, color="b", label="concept drift occurred")
for _, i in enumerate(drift_point):
plt.axvline(x=i, color='r', linestyle='--')
plt.legend()
plt.legend(fontsize=15)
plt.subplot(2, 1, 2)
# Plot drift score
plt.plot(drift_score, label="drift_score")
plt.axhline(y=threshold, color='r', linestyle='--', label="threshold")
plt.legend()
plt.show()
plt.legend(fontsize=15)
plt.savefig('concept_drift_check.pdf')


def _original_label(original_data, threshold, drift_score, window_size, step_size):
@@ -238,9 +237,9 @@ def _original_label(original_data, threshold, drift_score, window_size, step_siz
step_size(int): The jump length of the sliding window.

Returns:
label(list): The drift label of input data.
- list, the drift label of input data.
0 means no drift, and 1 means drift happens.
label_location(list): The locations of drifts(x-axis).
- list, the locations of drifts(x-axis).
"""
label = []
label_location = []
@@ -262,7 +261,7 @@ def _label_continue_process(label):
label(list): The original drift label.

Returns:
label_continue(numpy.ndarray): The continual drift label.
- numpy.ndarray, The continual drift label.
The drift may happen occasionally, we hope to avoid
frequent alarms, so label continue process is necessary.
"""
@@ -290,7 +289,7 @@ def _continue_block(location):
location(numpy.ndarray): The locations of concept drift.

Returns:
area(list): Continue blocks of concept drift.
- list, continue blocks of concept drift.
"""
area = []
for _, loc in groupby(enumerate(location), _find_loc):
@@ -309,8 +308,8 @@ def _drift_blocks(drift_score, label_continue, label_location):
label_location(numpy.ndarray): The locations of concept drift(x-axis).

Returns:
concept_location(list): The concept drift locations(x-axis) after continual blocks finding.
drift_point(list): Return a precise beginning location of a drift.
- list, the concept drift locations(x-axis) after continual blocks finding.
- list, return a precise beginning location of a drift.
"""
# Find drift blocks
area = _continue_block(np.where(label_continue == 1)[0])
@@ -341,7 +340,7 @@ def _w_generate(res_size, in_size, input_data):
input_data(numpy.ndarray): Input data.

Returns:
x_state(numpy.ndarray): The state of reservoir.
- numpy.ndarray, the state of reservoir.
"""
# Weight generates randomly
np.random.seed(42)
@@ -373,7 +372,7 @@ def _cal_distance(matrix1, matrix2):
matrix2(numpy.ndarray): Input array.

Returns:
distance(numpy.ndarray): Distance between two arrays.
- numpy.ndarray, distance between two arrays.
"""
w_mean_x = np.mean(matrix1, axis=0)
w_mean_y = np.mean(matrix2, axis=0)
@@ -390,7 +389,7 @@ def _cal_threshold(distance, threshold_index):
threshold_index(float): Threshold adjusted index, [-∞, +∞].

Returns:
threshold(float): [0, 1].
- float, [0, 1].
"""
distance = distance[distance > 0]
# Threshold calculation


+ 0
- 12
setup.py View File

@@ -24,17 +24,6 @@ version = '1.2.0'
cur_dir = os.path.dirname(os.path.realpath(__file__))
pkg_dir = os.path.join(cur_dir, 'build')

try:
from wheel.bdist_wheel import bdist_wheel as _bdist_wheel


class bdist_wheel(_bdist_wheel):
def finalize_options(self):
_bdist_wheel.finalize_options(self)
self.root_is_pure = False
except ImportError:
bdist_wheel = None


def write_version(file):
file.write("__version__ = '{}'\n".format(version))
@@ -122,7 +111,6 @@ setup(
cmdclass={
'egg_info': EggInfo,
'build_py': BuildPy,
'bdist_wheel': bdist_wheel
},
install_requires=[
'scipy >= 1.5.3',


Loading…
Cancel
Save