Add some references

6 years ago · a17b67444f
--- a/1_logistic_regression/Logistic_regression.ipynb
+++ b/1_logistic_regression/Logistic_regression.ipynb
--- a/1_logistic_regression/Logistic_regression.py
+++ b/1_logistic_regression/Logistic_regression.py
@@ -254,19 +254,72 @@ plt.show()

 # ## Multi-class recognition

 # ### Load & show the data

 # +
 import matplotlib.pyplot as plt 
 from sklearn.datasets import load_digits

 # load data
 digits = load_digits()

 # copied from notebook 02_sklearn_data.ipynb
 fig = plt.figure(figsize=(6, 6))  # figure size in inches
 fig.subplots_adjust(left=0, right=1, bottom=0, top=1, hspace=0.05, wspace=0.05)

 # plot the digits: each image is 8x8 pixels
 for i in range(64):
    ax = fig.add_subplot(8, 8, i + 1, xticks=[], yticks=[])
    ax.imshow(digits.images[i], cmap=plt.cm.binary)
    
    # label the image with the target value
    ax.text(0, 7, str(digits.target[i]))
 # -

 # ### Visualizing the Data
 #
 # A good first-step for many problems is to visualize the data using one of the Dimensionality Reduction techniques we saw earlier. We'll start with the most straightforward one, Principal Component Analysis (PCA).
 #
 # PCA seeks orthogonal linear combinations of the features which show the greatest variance, and as such, can help give you a good idea of the structure of the data set. Here we'll use RandomizedPCA, because it's faster for large N.

 # +
 from sklearn.decomposition import PCA
 pca = PCA(n_components=2, svd_solver="randomized")
 proj = pca.fit_transform(digits.data)

 plt.scatter(proj[:, 0], proj[:, 1], c=digits.target)
 plt.colorbar()
 # -

 # A weakness of PCA is that it produces a linear dimensionality reduction:
 # this may miss some interesting relationships in the data.  If we want to
 # see a nonlinear mapping  of the data, we can use one of the several
 # methods in the `manifold` module.  Here we'll use [Isomap](https://blog.csdn.net/VictoriaW/article/details/78497316) (a concatenation
 # of Isometric Mapping) which is a manifold learning method based on
 # graph theory:

 # +
 from sklearn.manifold import Isomap
 iso = Isomap(n_neighbors=5, n_components=2)
 proj = iso.fit_transform(digits.data)

 plt.scatter(proj[:, 0], proj[:, 1], c=digits.target)
 plt.colorbar()
 # -

 # ## Program

 # +
 from sklearn.datasets import load_digits
 from sklearn.linear_model.logistic import LogisticRegression
 from sklearn.metrics import accuracy_score

 import matplotlib.pyplot as plt 

 # load digital data
 digits, dig_label = load_digits(return_X_y=True)
 print(digits.shape)

 # draw one digital
 plt.gray() 
 plt.matshow(digits[0].reshape([8, 8])) 
 plt.show() 

 # calculate train/test data number
 N = len(digits)
 N_train = int(N*0.8)
@@ -288,12 +341,17 @@ pred_test  = lr.predict(x_test)
 # calculate train/test accuracy
 acc_train = accuracy_score(y_train, pred_train)
 acc_test = accuracy_score(y_test, pred_test)
 print("accuracy train = %f, accuracy_test = %f" % (acc_train, acc_test)
 print("accuracy train = %f, accuracy_test = %f" % (acc_train, acc_test))

 score_train = lr.score(x_train, y_train)
 score_test  = lr.score(x_test, y_test)
 print("score_train = %f, score_test = %f" % (score_train, score_test))



 # +
 from sklearn.metrics import confusion_matrix

 # plot confusion matrix
 cm = confusion_matrix(y_test,pred_test)

--- a/References_notes.md
+++ b/References_notes.md
@@ -11,13 +11,12 @@ MachineLearningNotebooks/08.%20Practical_NeuralNets.ipynb
 http://sofasofa.io/competitions.php?type=practice
 https://www.kaggle.com/competitions

 https://github.com/wmpscc/DataMiningNotesAndPractice/blob/master/2.KMeans%E7%AE%97%E6%B3%95%E4%B8%8E%E4%BA%A4%E9%80%9A%E4%BA%8B%E6%95%85%E7%90%86%E8%B5%94%E5%AE%A1%E6%A0%B8%E9%A2%84%E6%B5%8B.md

 evaluation metrics
 http://localhost:8889/notebooks/machineLearning/10_digits_classification.ipynb
 Titanic
 notebooks/data-science-ipython-notebooks/kaggle/titanic.ipynb

 model selection and assessment
 http://localhost:8889/notebooks/machineLearning/notebooks/01%20-%20Model%20Selection%20and%20Assessment.ipynb

 ## Method

 Programming Multiclass Logistic Regression
 http://localhost:8889/notebooks/MachineLearningNotebooks/05.%20Logistic%20Regression.ipynb
@@ -26,4 +25,25 @@ Equation for MLP
 http://localhost:8889/notebooks/MachineLearningNotebooks/07.%20MLP%20Neural%20Networks.ipynb

 Optimization methods
 http://localhost:8889/notebooks/MachineLearningNotebooks/06.%20Optimization.ipynb
 http://localhost:8889/notebooks/MachineLearningNotebooks/06.%20Optimization.ipynb


 https://github.com/wmpscc/DataMiningNotesAndPractice/blob/master/2.KMeans%E7%AE%97%E6%B3%95%E4%B8%8E%E4%BA%A4%E9%80%9A%E4%BA%8B%E6%95%85%E7%90%86%E8%B5%94%E5%AE%A1%E6%A0%B8%E9%A2%84%E6%B5%8B.md

 evaluation metrics
 http://localhost:8889/notebooks/machineLearning/10_digits_classification.ipynb


 model selection and assessment
 http://localhost:8889/notebooks/machineLearning/notebooks/01%20-%20Model%20Selection%20and%20Assessment.ipynb


 NN
 神经网络——梯度下降&反向传播 https://blog.csdn.net/skullfang/article/details/78634317
 零基础入门深度学习(3) - 神经网络和反向传播算法 https://www.zybuluo.com/hanbingtao/note/476663
 如何直观地解释 backpropagation 算法？ https://www.zhihu.com/question/27239198
 一文弄懂神经网络中的反向传播法——BackPropagation https://www.cnblogs.com/charlotte77/p/5629865.html

 https://medium.com/@UdacityINDIA/how-to-build-your-first-neural-network-with-python-6819c7f65dbf
 https://enlight.nyc/projects/neural-network/
 https://www.python-course.eu/neural_networks_with_python_numpy.php