# Librairies ```python import numpy as np import matplotlib.pyplot as plt import seaborn as sns import pandas as pd ``` # Load Data ```python df = sns.load_dataset('diamonds') df.head() ``` <div> <style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style> <table border="1" class="dataframe"> <thead> <tr style="text-align: right;"> <th></th> <th>carat</th> <th>cut</th> <th>color</th> <th>clarity</th> <th>depth</th> <th>table</th> <th>price</th> <th>x</th> <th>y</th> <th>z</th> </tr> </thead> <tbody> <tr> <th>0</th> <td>0.23</td> <td>Ideal</td> <td>E</td> <td>SI2</td> <td>61.5</td> <td>55.0</td> <td>326</td> <td>3.95</td> <td>3.98</td> <td>2.43</td> </tr> <tr> <th>1</th> <td>0.21</td> <td>Premium</td> <td>E</td> <td>SI1</td> <td>59.8</td> <td>61.0</td> <td>326</td> <td>3.89</td> <td>3.84</td> <td>2.31</td> </tr> <tr> <th>2</th> <td>0.23</td> <td>Good</td> <td>E</td> <td>VS1</td> <td>56.9</td> <td>65.0</td> <td>327</td> <td>4.05</td> <td>4.07</td> <td>2.31</td> </tr> <tr> <th>3</th> <td>0.29</td> <td>Premium</td> <td>I</td> <td>VS2</td> <td>62.4</td> <td>58.0</td> <td>334</td> <td>4.20</td> <td>4.23</td> <td>2.63</td> </tr> <tr> <th>4</th> <td>0.31</td> <td>Good</td> <td>J</td> <td>SI2</td> <td>63.3</td> <td>58.0</td> <td>335</td> <td>4.34</td> <td>4.35</td> <td>2.75</td> </tr> </tbody> </table> </div> ```python df["carat"].plot(kind='box', vert=False) ``` <Axes: > ![png](method_IQR_4_1.png) ```python def iqr_outlier_detection(df, column, threshold=1.5): Q1 = df[column].quantile(0.25) Q3 = df[column].quantile(0.75) IQR = Q3 - Q1 lower_bound = Q1 - threshold * IQR upper_bound = Q3 + threshold * IQR outliers = df[(df[column] < lower_bound) | (df[column] > upper_bound)] return outliers ``` ```python iqr_outlier_detection(df, 'carat', threshold=1.5) ``` <div> <style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style> <table border="1" class="dataframe"> <thead> <tr style="text-align: right;"> <th></th> <th>carat</th> <th>cut</th> <th>color</th> <th>clarity</th> <th>depth</th> <th>table</th> <th>price</th> <th>x</th> <th>y</th> <th>z</th> </tr> </thead> <tbody> <tr> <th>12246</th> <td>2.06</td> <td>Premium</td> <td>J</td> <td>I1</td> <td>61.2</td> <td>58.0</td> <td>5203</td> <td>8.10</td> <td>8.07</td> <td>4.95</td> </tr> <tr> <th>13002</th> <td>2.14</td> <td>Fair</td> <td>J</td> <td>I1</td> <td>69.4</td> <td>57.0</td> <td>5405</td> <td>7.74</td> <td>7.70</td> <td>5.36</td> </tr> <tr> <th>13118</th> <td>2.15</td> <td>Fair</td> <td>J</td> <td>I1</td> <td>65.5</td> <td>57.0</td> <td>5430</td> <td>8.01</td> <td>7.95</td> <td>5.23</td> </tr> <tr> <th>13757</th> <td>2.22</td> <td>Fair</td> <td>J</td> <td>I1</td> <td>66.7</td> <td>56.0</td> <td>5607</td> <td>8.04</td> <td>8.02</td> <td>5.36</td> </tr> <tr> <th>13991</th> <td>2.01</td> <td>Fair</td> <td>I</td> <td>I1</td> <td>67.4</td> <td>58.0</td> <td>5696</td> <td>7.71</td> <td>7.64</td> <td>5.17</td> </tr> <tr> <th>...</th> <td>...</td> <td>...</td> <td>...</td> <td>...</td> <td>...</td> <td>...</td> <td>...</td> <td>...</td> <td>...</td> <td>...</td> </tr> <tr> <th>27741</th> <td>2.15</td> <td>Ideal</td> <td>G</td> <td>SI2</td> <td>62.6</td> <td>54.0</td> <td>18791</td> <td>8.29</td> <td>8.35</td> <td>5.21</td> </tr> <tr> <th>27742</th> <td>2.04</td> <td>Premium</td> <td>H</td> <td>SI1</td> <td>58.1</td> <td>60.0</td> <td>18795</td> <td>8.37</td> <td>8.28</td> <td>4.84</td> </tr> <tr> <th>27744</th> <td>2.29</td> <td>Premium</td> <td>I</td> <td>SI1</td> <td>61.8</td> <td>59.0</td> <td>18797</td> <td>8.52</td> <td>8.45</td> <td>5.24</td> </tr> <tr> <th>27746</th> <td>2.07</td> <td>Ideal</td> <td>G</td> <td>SI2</td> <td>62.5</td> <td>55.0</td> <td>18804</td> <td>8.20</td> <td>8.13</td> <td>5.11</td> </tr> <tr> <th>27749</th> <td>2.29</td> <td>Premium</td> <td>I</td> <td>VS2</td> <td>60.8</td> <td>60.0</td> <td>18823</td> <td>8.50</td> <td>8.47</td> <td>5.16</td> </tr> </tbody> </table> <p>1889 rows × 10 columns</p> </div>