In [1]:
%matplotlib inline
import numpy as np
from sklearn.datasets import load_iris
import pandas as pd
import matplotlib.pyplot as plt
In [2]:
iris_data = load_iris()
iris = pd.DataFrame(iris_data.data, columns=iris_data.feature_names)
iris['species'] = iris_data.target
print(iris_data.target_names)
iris.head()
['setosa' 'versicolor' 'virginica']
Out[2]:
sepal length (cm) sepal width (cm) petal length (cm) petal width (cm) species
0 5.1 3.5 1.4 0.2 0
1 4.9 3.0 1.4 0.2 0
2 4.7 3.2 1.3 0.2 0
3 4.6 3.1 1.5 0.2 0
4 5.0 3.6 1.4 0.2 0
In [3]:
x = iris.iloc[:,0]
y = iris.iloc[:,2]
plt.xlabel("sepal length (cm)")
plt.ylabel("petal length (cm)")
plt.scatter(x, y, c=iris['species'])
plt.show()

Regression

In [4]:
from sklearn.linear_model import LinearRegression

x = iris.loc[iris['species']==2].iloc[:,0].values.reshape(-1, 1)
y = iris.loc[iris['species']==2].iloc[:,2]

model = LinearRegression()
model.fit(x, y)

print("Predicted petal length for 6.5cm sepal length:", model.predict(6.5))
Predicted petal length for 6.5cm sepal length: [ 5.48599289]
In [5]:
# Plot
x_new = np.linspace(4.5, 8.0, 100).reshape(-1, 1)
y_new = model.predict(x_new)

plt.scatter(x, y)
plt.plot(x_new, y_new)
plt.show()