It seems that maybe when i use KitNet + a RunningAveragePostprocessor i am getting nan
scores from the RunningAveragePostprocessor.
If I do this:
# Import modules.
from sklearn.utils import shuffle
from pysad.evaluation import AUROCMetric
from pysad.models import xStream, RobustRandomCutForest, KNNCAD, ExactStorm, HalfSpaceTrees, IForestASD, KitNet
from pysad.utils import ArrayStreamer
from pysad.transform.postprocessing import RunningAveragePostprocessor
from pysad.transform.preprocessing import InstanceUnitNormScaler
from pysad.utils import Data
from tqdm import tqdm
import numpy as np
# This example demonstrates the usage of the most modules in PySAD framework.
if __name__ == "__main__":
np.random.seed(61) # Fix random seed.
n_initial = 100
# Get data to stream.
data = Data("data")
X_all, y_all = data.get_data("arrhythmia.mat")
#X_all, y_all = shuffle(X_all, y_all)
X_initial, y_initial = X_all[:n_initial], y_all[:n_initial]
X_stream, y_stream = X_all[n_initial:], y_all[n_initial:]
iterator = ArrayStreamer(shuffle=False) # Init streamer to simulate streaming data.
model = KitNet(max_size_ae=10, grace_feature_mapping=100, grace_anomaly_detector=100, learning_rate=0.1, hidden_ratio=0.75)
preprocessor = InstanceUnitNormScaler() # Init normalizer.
postprocessor = RunningAveragePostprocessor(window_size=5) # Init running average postprocessor.
auroc = AUROCMetric() # Init area under receiver-operating- characteristics curve metric.
for X, y in tqdm(iterator.iter(X_stream, y_stream)): # Stream data.
X = preprocessor.fit_transform_partial(X) # Fit preprocessor to and transform the instance.
score = model.fit_score_partial(X) # Fit model to and score the instance.
print(score)
#score = postprocessor.fit_transform_partial(score) # Apply running averaging to the score.
#print(score)
auroc.update(y, score) # Update AUROC metric.
# Output resulting AUROCS metric.
print("\nAUROC: ", auroc.get())
I see output that looks generally ok but it seem like a nan
got in that kinda breaks things when it comes to the AUC
/usr/local/lib/python3.6/dist-packages/sklearn/utils/deprecation.py:143: FutureWarning: The sklearn.utils.testing module is deprecated in version 0.22 and will be removed in version 0.24. The corresponding classes / functions should instead be imported from sklearn.utils. Anything that cannot be imported from sklearn.utils is now part of the private API.
warnings.warn(message, FutureWarning)
0it [00:00, ?it/s]/usr/local/lib/python3.6/dist-packages/pysad/models/kitnet_model/dA.py:119: RuntimeWarning: invalid value encountered in true_divide
x = (x - self.norm_min) / (self.norm_max - self.norm_min + 0.0000000000000001)
101it [00:00, 948.75it/s]Feature-Mapper: train-mode, Anomaly-Detector: off-mode
0.0
...
0.0
The Feature-Mapper found a mapping: 274 features to 136 autoencoders.
Feature-Mapper: execute-mode, Anomaly-Detector: train-mode
nan
176861904806278.84
1.2789157528725288
0.04468589042395759
0.1220238749287982
0.059888825651861544
0.09122945608076023
...
0.1389761646050123
/usr/local/lib/python3.6/dist-packages/pysad/models/kitnet_model/utils.py:14: RuntimeWarning: overflow encountered in exp
return 1. / (1 + numpy.exp(-x))
220it [00:03, 54.62it/s]0.12782183995180338
49677121607436.65
136071359600522.08
0.10972949863882411
...
0.1299215446450402
0.1567376498625513
0.1494816850581486
352it [00:05, 69.36it/s]
0.1402801274133297
0.18201141940107077
52873910494109.26
0.13997148683334693
0.13615269873450922
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-3-8af057e15ede> in <module>()
47
48 # Output resulting AUROCS metric.
---> 49 print("\nAUROC: ", auroc.get())
6 frames
/usr/local/lib/python3.6/dist-packages/sklearn/utils/validation.py in _assert_all_finite(X, allow_nan, msg_dtype)
97 msg_err.format
98 (type_err,
---> 99 msg_dtype if msg_dtype is not None else X.dtype)
100 )
101 # for object dtype data, we only check for NaNs (GH-13254)
ValueError: Input contains NaN, infinity or a value too large for dtype('float64').
I think the issue is the nan
after the line The Feature-Mapper found a mapping: 274 features to 136 autoencoders. Feature-Mapper: execute-mode, Anomaly-Detector: train-mode
This might be ok but if i then use it with a RunningAveragePostprocessor
the nan seems to break the running average so its all just nans:
# Import modules.
from sklearn.utils import shuffle
from pysad.evaluation import AUROCMetric
from pysad.models import xStream, RobustRandomCutForest, KNNCAD, ExactStorm, HalfSpaceTrees, IForestASD, KitNet
from pysad.utils import ArrayStreamer
from pysad.transform.postprocessing import RunningAveragePostprocessor
from pysad.transform.preprocessing import InstanceUnitNormScaler
from pysad.utils import Data
from tqdm import tqdm
import numpy as np
# This example demonstrates the usage of the most modules in PySAD framework.
if __name__ == "__main__":
np.random.seed(61) # Fix random seed.
n_initial = 100
# Get data to stream.
data = Data("data")
X_all, y_all = data.get_data("arrhythmia.mat")
#X_all, y_all = shuffle(X_all, y_all)
X_initial, y_initial = X_all[:n_initial], y_all[:n_initial]
X_stream, y_stream = X_all[n_initial:], y_all[n_initial:]
iterator = ArrayStreamer(shuffle=False) # Init streamer to simulate streaming data.
model = KitNet(max_size_ae=10, grace_feature_mapping=100, grace_anomaly_detector=100, learning_rate=0.1, hidden_ratio=0.75)
preprocessor = InstanceUnitNormScaler() # Init normalizer.
postprocessor = RunningAveragePostprocessor(window_size=5) # Init running average postprocessor.
auroc = AUROCMetric() # Init area under receiver-operating- characteristics curve metric.
for X, y in tqdm(iterator.iter(X_stream, y_stream)): # Stream data.
X = preprocessor.fit_transform_partial(X) # Fit preprocessor to and transform the instance.
score = model.fit_score_partial(X) # Fit model to and score the instance.
#print(score)
score = postprocessor.fit_transform_partial(score) # Apply running averaging to the score.
print(score)
auroc.update(y, score) # Update AUROC metric.
# Output resulting AUROCS metric.
print("\nAUROC: ", auroc.get())
So output with the nan sort of being propagated is:
/usr/local/lib/python3.6/dist-packages/sklearn/utils/deprecation.py:143: FutureWarning: The sklearn.utils.testing module is deprecated in version 0.22 and will be removed in version 0.24. The corresponding classes / functions should instead be imported from sklearn.utils. Anything that cannot be imported from sklearn.utils is now part of the private API.
warnings.warn(message, FutureWarning)
0it [00:00, ?it/s]/usr/local/lib/python3.6/dist-packages/pysad/models/kitnet_model/dA.py:119: RuntimeWarning: invalid value encountered in true_divide
x = (x - self.norm_min) / (self.norm_max - self.norm_min + 0.0000000000000001)
101it [00:00, 881.82it/s]Feature-Mapper: train-mode, Anomaly-Detector: off-mode
0.0
0.0
0.0
...
0.0
The Feature-Mapper found a mapping: 274 features to 136 autoencoders.
Feature-Mapper: execute-mode, Anomaly-Detector: train-mode
nan
nan
nan
nan
185it [00:02, 46.04it/s]nan
nan
nan
193it [00:02, 42.56it/s]nan
nan
nan
200it [00:02, 41.06it/s]nan
nan
nan
nan
Feature-Mapper: execute-mode, Anomaly-Detector: exeute-mode
nan
nan
206it [00:02, 45.11it/s]/usr/local/lib/python3.6/dist-packages/pysad/models/kitnet_model/utils.py:14: RuntimeWarning: overflow encountered in exp
return 1. / (1 + numpy.exp(-x))
213it [00:02, 49.93it/s]nan
nan
nan
nan
nan
nan
...