import pandas as pd
import numpy as np
df = pd.read_csv('auto-mpg.csv')
df.columns = ['mpg', 'cylinders',
'displacement', 'horsepower',
'weight', 'accelerationi',
'model year', 'origin', 'name']
df['horsepower'].replace('?', np.nan, inplace=True)
df.dropna(subset=['horsepower'], axis=0, how='any', inplace=True)
df['horsepower'] = df['horsepower'].astype('float')
df['horsepower'] = df['horsepower']/df['horsepower'].max()
print(df['horsepower'].head())
0 0.717391
1 0.652174
2 0.652174
3 0.608696
4 0.860870
Name: horsepower, dtype: float64
$$ \frac{N - \min}{\max - \min} $$
각 원소를 위와 같은 방식으로 정규화 할 수 있다.
import pandas as pd
import numpy as np
df = pd.read_csv('auto-mpg.csv')
df.columns = ['mpg', 'cylinders',
'displacement', 'horsepower',
'weight', 'accelerationi',
'model year', 'origin', 'name']
df['horsepower'].replace('?', np.nan, inplace=True)
df.dropna(subset=['horsepower'], axis=0, how='any', inplace=True)
df['horsepower'] = df['horsepower'].astype('float')
min_x = df['horsepower'] - df['horsepower'].min()
min_max = df['horsepower'].max() - df['horsepower'].min()
df['horsepower'] = min_x / min_max
print(df['horsepower'].head())
0 0.646739
1 0.565217
2 0.565217
3 0.510870
4 0.826087
Name: horsepower, dtype: float64