# Librairies
```python
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
import scipy
from scipy.stats import chi2_contingency
print("Pandas version: {}".format(pd.__version__))
print("Numpy version: {}".format(np.__version__))
print("Seaborn version: {}".format(sns.__version__))
print("Scipy version: {}".format(scipy.__version__))
print("Matplotlib version: {}".format(matplotlib.__version__))
```
Pandas version: 2.2.2
Numpy version: 1.26.4
Seaborn version: 0.13.2
Scipy version: 1.13.1
Matplotlib version: 3.9.2
# Data
```python
df = sns.load_dataset("tips")
df.head()
```
<div>
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
<table border="1" class="dataframe">
<thead>
<tr style="text-align: right;">
<th></th>
<th>total_bill</th>
<th>tip</th>
<th>sex</th>
<th>smoker</th>
<th>day</th>
<th>time</th>
<th>size</th>
</tr>
</thead>
<tbody>
<tr>
<th>0</th>
<td>16.99</td>
<td>1.01</td>
<td>Female</td>
<td>No</td>
<td>Sun</td>
<td>Dinner</td>
<td>2</td>
</tr>
<tr>
<th>1</th>
<td>10.34</td>
<td>1.66</td>
<td>Male</td>
<td>No</td>
<td>Sun</td>
<td>Dinner</td>
<td>3</td>
</tr>
<tr>
<th>2</th>
<td>21.01</td>
<td>3.50</td>
<td>Male</td>
<td>No</td>
<td>Sun</td>
<td>Dinner</td>
<td>3</td>
</tr>
<tr>
<th>3</th>
<td>23.68</td>
<td>3.31</td>
<td>Male</td>
<td>No</td>
<td>Sun</td>
<td>Dinner</td>
<td>2</td>
</tr>
<tr>
<th>4</th>
<td>24.59</td>
<td>3.61</td>
<td>Female</td>
<td>No</td>
<td>Sun</td>
<td>Dinner</td>
<td>4</td>
</tr>
</tbody>
</table>
</div>
# Chi2 Contingency
H0 : "Il n'y a pas de relation entre le nombre de personnes (size) et le jour de la semaine (day)"
```python
contigency_table = pd.crosstab(df["size"], df["day"])
contigency_table
```
<div>
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
<table border="1" class="dataframe">
<thead>
<tr style="text-align: right;">
<th>day</th>
<th>Thur</th>
<th>Fri</th>
<th>Sat</th>
<th>Sun</th>
</tr>
<tr>
<th>size</th>
<th></th>
<th></th>
<th></th>
<th></th>
</tr>
</thead>
<tbody>
<tr>
<th>1</th>
<td>1</td>
<td>1</td>
<td>2</td>
<td>0</td>
</tr>
<tr>
<th>2</th>
<td>48</td>
<td>16</td>
<td>53</td>
<td>39</td>
</tr>
<tr>
<th>3</th>
<td>4</td>
<td>1</td>
<td>18</td>
<td>15</td>
</tr>
<tr>
<th>4</th>
<td>5</td>
<td>1</td>
<td>13</td>
<td>18</td>
</tr>
<tr>
<th>5</th>
<td>1</td>
<td>0</td>
<td>1</td>
<td>3</td>
</tr>
<tr>
<th>6</th>
<td>3</td>
<td>0</td>
<td>0</td>
<td>1</td>
</tr>
</tbody>
</table>
</div>
```python
chi2_contingency(contigency_table)
```
Chi2ContingencyResult(statistic=29.632849936919712, pvalue=0.013316478351860587, dof=15, expected_freq=array([[ 1.01639344, 0.31147541, 1.42622951, 1.24590164],
[39.63934426, 12.14754098, 55.62295082, 48.59016393],
[ 9.6557377 , 2.95901639, 13.54918033, 11.83606557],
[ 9.40163934, 2.88114754, 13.19262295, 11.52459016],
[ 1.2704918 , 0.38934426, 1.78278689, 1.55737705],
[ 1.01639344, 0.31147541, 1.42622951, 1.24590164]]))
```python
pd.DataFrame(
chi2_contingency(contigency_table).expected_freq,
columns=df["day"].sort_values().unique(),
index=df["size"].sort_values().unique()
).round()
```
<div>
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
<table border="1" class="dataframe">
<thead>
<tr style="text-align: right;">
<th></th>
<th>Thur</th>
<th>Fri</th>
<th>Sat</th>
<th>Sun</th>
</tr>
</thead>
<tbody>
<tr>
<th>1</th>
<td>1.0</td>
<td>0.0</td>
<td>1.0</td>
<td>1.0</td>
</tr>
<tr>
<th>2</th>
<td>40.0</td>
<td>12.0</td>
<td>56.0</td>
<td>49.0</td>
</tr>
<tr>
<th>3</th>
<td>10.0</td>
<td>3.0</td>
<td>14.0</td>
<td>12.0</td>
</tr>
<tr>
<th>4</th>
<td>9.0</td>
<td>3.0</td>
<td>13.0</td>
<td>12.0</td>
</tr>
<tr>
<th>5</th>
<td>1.0</td>
<td>0.0</td>
<td>2.0</td>
<td>2.0</td>
</tr>
<tr>
<th>6</th>
<td>1.0</td>
<td>0.0</td>
<td>1.0</td>
<td>1.0</td>
</tr>
</tbody>
</table>
</div>
```python
diff = pd.DataFrame(
chi2_contingency(contigency_table).expected_freq,
columns=df["day"].sort_values().unique(),
index=df["size"].sort_values().unique()
).round() - contigency_table
sns.heatmap(diff, annot=True, cmap="coolwarm")
plt.show()
```

# En résumé
```python
print(f"H0 :\"Il n'y a pas de relation entre le nombre de personnes (size) et le jour de la semaine (day)\"")
print()
alpha = 0.02
p_value = chi2_contingency(contigency_table).pvalue
if p_value < alpha:
print("Nous avons suffisamment d'éléments pour rejeter H0")
else:
print("Nous n'avons pas suffisamment d'éléments pour rejeter H0")
```
H0 :"Il n'y a pas de relation entre le nombre de personnes (size) et le jour de la semaine (day)"
Nous avons suffisamment d'éléments pour rejeter H0