You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

convert(MNIST2Matlab).m 3.0 kB

8 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899
  1. % Version 1.000
  2. %
  3. % Code provided by Ruslan Salakhutdinov and Geoff Hinton
  4. %
  5. % Permission is granted for anyone to copy, use, modify, or distribute this
  6. % program and accompanying programs and documents for any purpose, provided
  7. % this copyright notice is retained and prominently displayed, along with
  8. % a note saying that the original programs are available from our
  9. % web page.
  10. % The programs and documents are distributed without any warranty, express or
  11. % implied. As the programs were written for research purposes only, they have
  12. % not been tested to the degree that would be advisable in any important
  13. % application. All use of these programs is entirely at the user's own risk.
  14. % This program reads raw MNIST files available at
  15. % http://yann.lecun.com/exdb/mnist/
  16. % and converts them to files in matlab format
  17. % Before using this program you first need to download files:
  18. % train-images-idx3-ubyte.gz train-labels-idx1-ubyte.gz
  19. % t10k-images-idx3-ubyte.gz t10k-labels-idx1-ubyte.gz
  20. % and gunzip them. You need to allocate some space for this.
  21. % This program was originally written by Yee Whye Teh
  22. % Work with test files first
  23. fprintf(1,'You first need to download files:\n train-images-idx3-ubyte.gz\n train-labels-idx1-ubyte.gz\n t10k-images-idx3-ubyte.gz\n t10k-labels-idx1-ubyte.gz\n from http://yann.lecun.com/exdb/mnist/\n and gunzip them \n');
  24. f = fopen('t10k-images-idx3-ubyte','r');
  25. [a,count] = fread(f,4,'int32');
  26. g = fopen('t10k-labels-idx1-ubyte','r');
  27. [l,count] = fread(g,2,'int32');
  28. fprintf(1,'Starting to convert Test MNIST images (prints 10 dots) \n');
  29. n = 1000;
  30. Df = cell(1,10);
  31. for d=0:9,
  32. Df{d+1} = fopen(['test' num2str(d) '.ascii'],'w');
  33. end;
  34. for i=1:10,
  35. fprintf('.');
  36. rawimages = fread(f,28*28*n,'uchar');
  37. rawlabels = fread(g,n,'uchar');
  38. rawimages = reshape(rawimages,28*28,n);
  39. for j=1:n,
  40. fprintf(Df{rawlabels(j)+1},'%3d ',rawimages(:,j));
  41. fprintf(Df{rawlabels(j)+1},'\n');
  42. end;
  43. end;
  44. fprintf(1,'\n');
  45. for d=0:9,
  46. fclose(Df{d+1});
  47. D = load(['test' num2str(d) '.ascii'],'-ascii');
  48. fprintf('%5d Digits of class %d\n',size(D,1),d);
  49. save(['test' num2str(d) '.mat'],'D','-mat');
  50. end;
  51. % Work with trainig files second
  52. f = fopen('train-images-idx3-ubyte','r');
  53. [a,count] = fread(f,4,'int32');
  54. g = fopen('train-labels-idx1-ubyte','r');
  55. [l,count] = fread(g,2,'int32');
  56. fprintf(1,'Starting to convert Training MNIST images (prints 60 dots)\n');
  57. n = 1000;
  58. Df = cell(1,10);
  59. for d=0:9,
  60. Df{d+1} = fopen(['digit' num2str(d) '.ascii'],'w');
  61. end;
  62. for i=1:60,
  63. fprintf('.');
  64. rawimages = fread(f,28*28*n,'uchar');
  65. rawlabels = fread(g,n,'uchar');
  66. rawimages = reshape(rawimages,28*28,n);
  67. for j=1:n,
  68. fprintf(Df{rawlabels(j)+1},'%3d ',rawimages(:,j));
  69. fprintf(Df{rawlabels(j)+1},'\n');
  70. end;
  71. end;
  72. fprintf(1,'\n');
  73. for d=0:9,
  74. fclose(Df{d+1});
  75. D = load(['digit' num2str(d) '.ascii'],'-ascii');
  76. fprintf('%5d Digits of class %d\n',size(D,1),d);
  77. save(['digit' num2str(d) '.mat'],'D','-mat');
  78. end;
  79. dos('rm *.ascii');

机器学习

Contributors (1)