```python
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn
from sklearn.preprocessing import PolynomialFeatures
print("sklearn version :", sklearn.__version__)
```
sklearn version : 1.5.1
```python
df = sns.load_dataset("mpg")
X = df[["mpg"]] # la variable doit être une liste pour pouvoir utiliser PolynomialFeatures
```
```python
polynomial_transform = PolynomialFeatures()
polynomial_transform.fit(X)
polynomial_transform.transform(X).astype(int)
```
array([[ 1, 18, 324],
[ 1, 15, 225],
[ 1, 18, 324],
...,
[ 1, 32, 1024],
[ 1, 28, 784],
[ 1, 31, 961]])
```python
# Pour retirer la colonne du biais
polynomial_transform = PolynomialFeatures(include_bias=False)
polynomial_transform.fit(X)
polynomial_transform.transform(X).astype(int)[:10]
```
array([[ 18, 324],
[ 15, 225],
[ 18, 324],
[ 16, 256],
[ 17, 289],
[ 15, 225],
[ 14, 196],
[ 14, 196],
[ 14, 196],
[ 15, 225]])
```python
# Pour changer le degrée du polynome
polynomial_transform = PolynomialFeatures(degree=3, include_bias=False)
polynomial_transform.fit(X)
polynomial_transform.transform(X).astype(int)
```
array([[ 18, 324, 5832],
[ 15, 225, 3375],
[ 18, 324, 5832],
...,
[ 32, 1024, 32768],
[ 28, 784, 21952],
[ 31, 961, 29791]])
# Combinaison de plusieurs variables
```python
X = df[["mpg", "cylinders"]]
```
```python
polynomial_transform = PolynomialFeatures(degree=2, include_bias=False)
polynomial_transform.fit(X)
polynomial_transform.transform(X).astype(int)
```
array([[ 18, 8, 324, 144, 64],
[ 15, 8, 225, 120, 64],
[ 18, 8, 324, 144, 64],
...,
[ 32, 4, 1024, 128, 16],
[ 28, 4, 784, 112, 16],
[ 31, 4, 961, 124, 16]])
```python
# Pour avoir uniquement les ineractions entre les deux variables (x1*x2)s ans les carrés:
polynomial_transform = PolynomialFeatures(degree=2, include_bias=False, interaction_only=True)
polynomial_transform.fit(X)
polynomial_transform.transform(X).astype(int)
```
array([[ 18, 8, 144],
[ 15, 8, 120],
[ 18, 8, 144],
...,
[ 32, 4, 128],
[ 28, 4, 112],
[ 31, 4, 124]])