# Librairies
```python
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
import scipy
from scipy.stats import chisquare
```
# Data
```python
df= sns.load_dataset("tips")
df.head()
```
<div>
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
<table border="1" class="dataframe">
<thead>
<tr style="text-align: right;">
<th></th>
<th>total_bill</th>
<th>tip</th>
<th>sex</th>
<th>smoker</th>
<th>day</th>
<th>time</th>
<th>size</th>
</tr>
</thead>
<tbody>
<tr>
<th>0</th>
<td>16.99</td>
<td>1.01</td>
<td>Female</td>
<td>No</td>
<td>Sun</td>
<td>Dinner</td>
<td>2</td>
</tr>
<tr>
<th>1</th>
<td>10.34</td>
<td>1.66</td>
<td>Male</td>
<td>No</td>
<td>Sun</td>
<td>Dinner</td>
<td>3</td>
</tr>
<tr>
<th>2</th>
<td>21.01</td>
<td>3.50</td>
<td>Male</td>
<td>No</td>
<td>Sun</td>
<td>Dinner</td>
<td>3</td>
</tr>
<tr>
<th>3</th>
<td>23.68</td>
<td>3.31</td>
<td>Male</td>
<td>No</td>
<td>Sun</td>
<td>Dinner</td>
<td>2</td>
</tr>
<tr>
<th>4</th>
<td>24.59</td>
<td>3.61</td>
<td>Female</td>
<td>No</td>
<td>Sun</td>
<td>Dinner</td>
<td>4</td>
</tr>
</tbody>
</table>
</div>
# Test Chisquare
H0 : "les clients donnent les pourboires: 20% le jeudi, 10% le vendredi, 40% le samedi, et 30% le dimanche"
```python
# Valeurs théoriques en %
expected_frequencies = np.array([0.2, 0.1, 0.4, 0.3])
```
```python
# Valeurs mesurées en %
df["day"].value_counts(normalize=True, sort=False).round(2)
```
day
Thur 0.25
Fri 0.08
Sat 0.36
Sun 0.31
Name: proportion, dtype: float64
```python
# Valeurs mesurées en nombre d'effectifs
observed_frequencies = df["day"].value_counts(normalize=False, sort=False).round(2).values
observed_frequencies
```
array([62, 19, 87, 76], dtype=int64)
```python
# Valeurs théoriques en nombre d'effectifs
expected_frequencies = expected_frequencies * len(df)
expected_frequencies
```
array([48.8, 24.4, 97.6, 73.2])
```python
chisquare(f_obs=observed_frequencies, f_exp=expected_frequencies)
```
Power_divergenceResult(statistic=6.023907103825136, pvalue=0.11045286802428235)
# En résumé
```python
print(f"H0 :\"les clients donnent les pourboires: 20% le jeudi, 10% le vendredi, 40% le samedi, et 30% le dimanche")
print()
expected_frequencies = np.array([0.2, 0.1, 0.4, 0.3])
expected_frequencies = expected_frequencies * len(df)
observed_frequencies = df["day"].value_counts(normalize=False, sort=False).round(2).values
alpha = 0.02
p_value = chisquare(f_obs=observed_frequencies, f_exp=expected_frequencies).pvalue
if p_value < alpha:
print("Nous avons suffisamment d'éléments pour rejeter H0")
else:
print("Nous n'avons pas suffisamment d'éléments pour rejeter H0")
```
H0 :"les clients donnent les pourboires: 20% le jeudi, 10% le vendredi, 40% le samedi, et 30% le dimanche
Nous n'avons pas suffisamment d'éléments pour rejeter H0