# -*- coding: utf-8 -*- # --- # jupyter: # jupytext_format_version: '1.2' # kernelspec: # display_name: Python 3 # language: python # name: python3 # language_info: # codemirror_mode: # name: ipython # version: 3 # file_extension: .py # mimetype: text/x-python # name: python # nbconvert_exporter: python # pygments_lexer: ipython3 # version: 3.5.2 # --- # ## Datasets # ## Moons # # + % matplotlib inline import numpy as np from sklearn import datasets import matplotlib.pyplot as plt # generate sample data np.random.seed(0) X, y = datasets.make_moons(200, noise=0.20) # plot data plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.Spectral) plt.show() # - # ## XOR # + import numpy as np import matplotlib.pyplot as plt from sklearn.gaussian_process import GaussianProcessClassifier rng = np.random.RandomState(0) X = rng.randn(200, 2) Y = np.logical_xor(X[:, 0] > 0, X[:, 1] > 0) # plot data plt.scatter(X[:, 0], X[:, 1], c=Y, cmap=plt.cm.Spectral) plt.show() # - # ## Digital # + import matplotlib.pyplot as plt from sklearn.datasets import load_digits # load data digits = load_digits() # copied from notebook 02_sklearn_data.ipynb fig = plt.figure(figsize=(6, 6)) # figure size in inches fig.subplots_adjust(left=0, right=1, bottom=0, top=1, hspace=0.05, wspace=0.05) # plot the digits: each image is 8x8 pixels for i in range(64): ax = fig.add_subplot(8, 8, i + 1, xticks=[], yticks=[]) ax.imshow(digits.images[i], cmap=plt.cm.binary) # label the image with the target value ax.text(0, 7, str(digits.target[i])) # - # ## Iris # # This data sets consists of 3 different types of irises’ (Setosa, Versicolour, and Virginica) petal and sepal length, stored in a 150x4 numpy.ndarray # # The rows being the samples and the columns being: Sepal Length, Sepal Width, Petal Length and Petal Width. # # + import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import Axes3D from sklearn import datasets from sklearn.decomposition import PCA # import some data to play with iris = datasets.load_iris() X = iris.data[:, :] y = iris.target # Plot the samples plt.figure(figsize=(15, 5)) plt.subplots_adjust(bottom=.05, top=.9, left=.05, right=.95) plt.subplot(121) plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.Set1, edgecolor='k') plt.xlabel('Sepal length') plt.ylabel('Sepal width') plt.subplot(122) plt.scatter(X[:, 2], X[:, 3], c=y, cmap=plt.cm.Set1, edgecolor='k') plt.xlabel('Petal Length') plt.ylabel('Petal Width') plt.show() # + from sklearn.manifold import Isomap iso = Isomap(n_neighbors=5, n_components=2) proj = iso.fit_transform(X) plt.figure(figsize=(15, 9)) plt.scatter(proj[:, 0], proj[:, 1], c=y) plt.colorbar() plt.show() # - # ## blobs # # + import matplotlib.pyplot as plt from sklearn.datasets import make_blobs # Generate 3 blobs with 2 classes where the second blob contains # half positive samples and half negative samples. Probability in this # blob is therefore 0.5. centers = [(-5, -5), (0, 0), (5, 5)] n_samples = 500 X, y = make_blobs(n_samples=n_samples, n_features=2, cluster_std=1.0, centers=centers, shuffle=False, random_state=42) plt.figure(figsize=(15, 9)) plt.scatter(X[:, 0], X[:, 1], c=y) plt.colorbar() plt.show()