# Librairies ```python import numpy as np import matplotlib.pyplot as plt import seaborn as sns import pandas as pd from sklearn.neighbors import LocalOutlierFactor ``` # Load data ```python # SImulation d'un jeu de données np.random.seed(0) m=100 X = np.random.randn(m).reshape(-1,1) y = 3 * x - 2 + np.random.randn(m).reshape(-1,1) X = np.concatenate((x, [[2]]), axis=0) y = np.concatenate((y, [[-8]]), axis=0) plt.scatter(X, y) ``` <matplotlib.collections.PathCollection at 0x7029325cd610> ![png](LocaOutlierFactor_3_1.png) ```python # méthode IQR ``` ```python fig, ax = plt.subplots(1, 2, figsize=(9, 4)) ax[0].boxplot(y) ax[0].set_title("y") ax[1].boxplot(X, vert=False) ax[1].set_title("X") plt.show() ``` ![png](LocaOutlierFactor_5_0.png) on ne détecte pas d'outliers avec les boxplots --> la méthode du IQR ne suffit pas ```python data = np.concatenate((X, y), axis=1) ``` ```python detection_model = LocalOutlierFactor(contamination=0.01, n_neighbors=5) outliers = detection_model.fit_predict(data) plt.scatter(X, y, c=outliers, cmap='bwr_r') ``` <matplotlib.collections.PathCollection at 0x702931979310> ![png](LocaOutlierFactor_8_1.png)