You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

1-numpy_tutorial.ipynb 164 kB

6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago

  1. {
  2. "cells": [
  3. {
  4. "cell_type": "markdown",
  5. "metadata": {},
  6. "source": [
  7. "# Numpy - 多维数据的数组"
  8. ]
  9. },
  10. {
  11. "cell_type": "markdown",
  12. "metadata": {},
  13. "source": [
  14. "J.R. Johansson (jrjohansson at gmail.com)\n",
  15. "\n",
  16. "最新的[IPython notebook](http://ipython.org/notebook.html)课程可以在[http://github.com/jrjohansson/scientific-python-lectures](http://github.com/jrjohansson/scientific-python-lectures) 找到.\n",
  17. "\n",
  18. "其他有关这个课程的参考书在这里标注出[http://jrjohansson.github.io](http://jrjohansson.github.io).\n"
  19. ]
  20. },
  21. {
  22. "cell_type": "code",
  23. "execution_count": 1,
  24. "metadata": {},
  25. "outputs": [],
  26. "source": [
  27. "# 这一行的作用会在课程4中回答\n",
  28. "%matplotlib inline\n",
  29. "import matplotlib.pyplot as plt"
  30. ]
  31. },
  32. {
  33. "cell_type": "markdown",
  34. "metadata": {},
  35. "source": [
  36. "## 1. 简介"
  37. ]
  38. },
  39. {
  40. "cell_type": "markdown",
  41. "metadata": {},
  42. "source": [
  43. "这个`numpy`包(模块)用在几乎所有使用Python的数值计算中。他是一个为Python提供高性能向量,矩阵和高维数据结构的模块。它是用C和Fortran语言实现的,因此当计算向量化数据(用向量和矩阵表示)时,性能非常的好。\n",
  44. "\n",
  45. "为了使用`numpy`模块,你先要像下面的例子一样导入这个模块:"
  46. ]
  47. },
  48. {
  49. "cell_type": "code",
  50. "execution_count": 2,
  51. "metadata": {},
  52. "outputs": [],
  53. "source": [
  54. "from numpy import *\n",
  55. "import numpy as np"
  56. ]
  57. },
  58. {
  59. "cell_type": "markdown",
  60. "metadata": {},
  61. "source": [
  62. "在`numpy`模块中,用于向量,矩阵和高维数据集的术语是*数组*。\n",
  63. "\n",
  64. "**建议大家使用第二种导入方法** `import numpy as np`\n"
  65. ]
  66. },
  67. {
  68. "cell_type": "markdown",
  69. "metadata": {},
  70. "source": [
  71. "## 2. 创建`numpy`数组"
  72. ]
  73. },
  74. {
  75. "cell_type": "markdown",
  76. "metadata": {},
  77. "source": [
  78. "有很多种方法去初始化新的numpy数组, 例如从\n",
  79. "\n",
  80. "* Python列表或元组\n",
  81. "* 使用专门用来创建numpy arrays的函数,例如 `arange`, `linspace`等\n",
  82. "* 从文件中读取数据"
  83. ]
  84. },
  85. {
  86. "cell_type": "markdown",
  87. "metadata": {},
  88. "source": [
  89. "### 2.1 从列表中"
  90. ]
  91. },
  92. {
  93. "cell_type": "markdown",
  94. "metadata": {},
  95. "source": [
  96. "例如,为了从Python列表创建新的向量和矩阵我们可以用`numpy.array`函数。\n"
  97. ]
  98. },
  99. {
  100. "cell_type": "code",
  101. "execution_count": 3,
  102. "metadata": {},
  103. "outputs": [
  104. {
  105. "data": {
  106. "text/plain": [
  107. "array([1, 2, 3, 4])"
  108. ]
  109. },
  110. "execution_count": 3,
  111. "metadata": {},
  112. "output_type": "execute_result"
  113. }
  114. ],
  115. "source": [
  116. "import numpy as np\n",
  117. "\n",
  118. "# a vector: the argument to the array function is a Python list\n",
  119. "v = np.array([1,2,3,4])\n",
  120. "\n",
  121. "v"
  122. ]
  123. },
  124. {
  125. "cell_type": "code",
  126. "execution_count": 4,
  127. "metadata": {},
  128. "outputs": [
  129. {
  130. "name": "stdout",
  131. "output_type": "stream",
  132. "text": [
  133. "[[1 2]\n",
  134. " [3 4]\n",
  135. " [5 6]]\n",
  136. "(3, 2)\n"
  137. ]
  138. }
  139. ],
  140. "source": [
  141. "# 矩阵:数组函数的参数是一个嵌套的Python列表\n",
  142. "M = array([[1, 2], [3, 4], [5, 6]])\n",
  143. "\n",
  144. "print(M)\n",
  145. "print(M.shape)"
  146. ]
  147. },
  148. {
  149. "cell_type": "markdown",
  150. "metadata": {},
  151. "source": [
  152. "`v`和`M`两个都是属于`numpy`模块提供的`ndarray`类型。"
  153. ]
  154. },
  155. {
  156. "cell_type": "code",
  157. "execution_count": 5,
  158. "metadata": {},
  159. "outputs": [
  160. {
  161. "data": {
  162. "text/plain": [
  163. "(numpy.ndarray, numpy.ndarray)"
  164. ]
  165. },
  166. "execution_count": 5,
  167. "metadata": {},
  168. "output_type": "execute_result"
  169. }
  170. ],
  171. "source": [
  172. "type(v), type(M)"
  173. ]
  174. },
  175. {
  176. "cell_type": "markdown",
  177. "metadata": {},
  178. "source": [
  179. "`v`和`M`之间的区别仅在于他们的形状。我们可以用属性函数`ndarray.shape`得到数组形状的信息。"
  180. ]
  181. },
  182. {
  183. "cell_type": "code",
  184. "execution_count": 6,
  185. "metadata": {},
  186. "outputs": [
  187. {
  188. "data": {
  189. "text/plain": [
  190. "(4,)"
  191. ]
  192. },
  193. "execution_count": 6,
  194. "metadata": {},
  195. "output_type": "execute_result"
  196. }
  197. ],
  198. "source": [
  199. "v.shape"
  200. ]
  201. },
  202. {
  203. "cell_type": "code",
  204. "execution_count": 7,
  205. "metadata": {},
  206. "outputs": [
  207. {
  208. "data": {
  209. "text/plain": [
  210. "(3, 2)"
  211. ]
  212. },
  213. "execution_count": 7,
  214. "metadata": {},
  215. "output_type": "execute_result"
  216. }
  217. ],
  218. "source": [
  219. "M.shape"
  220. ]
  221. },
  222. {
  223. "cell_type": "markdown",
  224. "metadata": {},
  225. "source": [
  226. "通过属性函数`ndarray.size`我们可以得到数组中元素的个数"
  227. ]
  228. },
  229. {
  230. "cell_type": "code",
  231. "execution_count": 8,
  232. "metadata": {},
  233. "outputs": [
  234. {
  235. "data": {
  236. "text/plain": [
  237. "6"
  238. ]
  239. },
  240. "execution_count": 8,
  241. "metadata": {},
  242. "output_type": "execute_result"
  243. }
  244. ],
  245. "source": [
  246. "M.size"
  247. ]
  248. },
  249. {
  250. "cell_type": "markdown",
  251. "metadata": {},
  252. "source": [
  253. "同样,我们可以用函数`numpy.shape`和`numpy.size`"
  254. ]
  255. },
  256. {
  257. "cell_type": "code",
  258. "execution_count": 9,
  259. "metadata": {},
  260. "outputs": [
  261. {
  262. "data": {
  263. "text/plain": [
  264. "(3, 2)"
  265. ]
  266. },
  267. "execution_count": 9,
  268. "metadata": {},
  269. "output_type": "execute_result"
  270. }
  271. ],
  272. "source": [
  273. "np.shape(M)"
  274. ]
  275. },
  276. {
  277. "cell_type": "code",
  278. "execution_count": 10,
  279. "metadata": {},
  280. "outputs": [
  281. {
  282. "data": {
  283. "text/plain": [
  284. "6"
  285. ]
  286. },
  287. "execution_count": 10,
  288. "metadata": {},
  289. "output_type": "execute_result"
  290. }
  291. ],
  292. "source": [
  293. "np.size(M)"
  294. ]
  295. },
  296. {
  297. "cell_type": "markdown",
  298. "metadata": {},
  299. "source": [
  300. "到目前为止`numpy.ndarray`看起来非常像Python列表(或嵌套列表)。为什么不简单地使用Python列表来进行计算,而不是创建一个新的数组类型?\n",
  301. "\n",
  302. "下面有几个原因:\n",
  303. "\n",
  304. "* Python列表非常普遍。它们可以包含任何类型的对象。它们是动态类型的。它们不支持矩阵和点乘等数学函数。由于动态类型的关系,为Python列表实现这类函数的效率不是很高。\n",
  305. "* Numpy数组是**静态类型的**和**同构的**。元素的类型是在创建数组时确定的。\n",
  306. "* Numpy数组是内存高效的。\n",
  307. "* 由于是静态类型,数学函数的快速实现,比如“numpy”数组的乘法和加法可以用编译语言实现(使用C和Fortran).\n",
  308. "\n",
  309. "利用`ndarray`的属性函数`dtype`(数据类型),我们可以看出数组的数据是那种类型。\n"
  310. ]
  311. },
  312. {
  313. "cell_type": "code",
  314. "execution_count": 11,
  315. "metadata": {},
  316. "outputs": [
  317. {
  318. "data": {
  319. "text/plain": [
  320. "dtype('int64')"
  321. ]
  322. },
  323. "execution_count": 11,
  324. "metadata": {},
  325. "output_type": "execute_result"
  326. }
  327. ],
  328. "source": [
  329. "M.dtype"
  330. ]
  331. },
  332. {
  333. "cell_type": "markdown",
  334. "metadata": {},
  335. "source": [
  336. "如果我们试图给一个numpy数组中的元素赋一个错误类型的值,我们会得到一个错误:"
  337. ]
  338. },
  339. {
  340. "cell_type": "code",
  341. "execution_count": 12,
  342. "metadata": {},
  343. "outputs": [
  344. {
  345. "ename": "ValueError",
  346. "evalue": "invalid literal for int() with base 10: 'hello'",
  347. "output_type": "error",
  348. "traceback": [
  349. "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
  350. "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
  351. "\u001b[0;32m<ipython-input-12-e1f336250f69>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mM\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m\"hello\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
  352. "\u001b[0;31mValueError\u001b[0m: invalid literal for int() with base 10: 'hello'"
  353. ]
  354. }
  355. ],
  356. "source": [
  357. "M[0,0] = \"hello\""
  358. ]
  359. },
  360. {
  361. "cell_type": "markdown",
  362. "metadata": {},
  363. "source": [
  364. "如果我们想的话,我们可以利用`dtype`关键字参数显式地定义我们创建的数组数据类型:"
  365. ]
  366. },
  367. {
  368. "cell_type": "code",
  369. "execution_count": 13,
  370. "metadata": {},
  371. "outputs": [
  372. {
  373. "data": {
  374. "text/plain": [
  375. "array([[1.+0.j, 2.+0.j],\n",
  376. " [3.+0.j, 4.+0.j]])"
  377. ]
  378. },
  379. "execution_count": 13,
  380. "metadata": {},
  381. "output_type": "execute_result"
  382. }
  383. ],
  384. "source": [
  385. "M = np.array([[1, 2], [3, 4]], dtype=complex)\n",
  386. "\n",
  387. "M"
  388. ]
  389. },
  390. {
  391. "cell_type": "markdown",
  392. "metadata": {},
  393. "source": [
  394. "常规可以伴随`dtype`使用的数据类型是:`int`, `float`, `complex`, `bool`, `object`等\n",
  395. "\n",
  396. "我们也可以显式地定义数据类型的大小,例如:`int64`, `int16`, `float128`, `complex128`。"
  397. ]
  398. },
  399. {
  400. "cell_type": "markdown",
  401. "metadata": {},
  402. "source": [
  403. "### 2.2 使用数组生成函数"
  404. ]
  405. },
  406. {
  407. "cell_type": "markdown",
  408. "metadata": {},
  409. "source": [
  410. "对于较大的数组,使用显式的Python列表人为地初始化数据是不切实际的。除此之外我们可以用`numpy`的很多函数得到不同类型的数组。有一些常用的分别是:"
  411. ]
  412. },
  413. {
  414. "cell_type": "markdown",
  415. "metadata": {},
  416. "source": [
  417. "#### arange"
  418. ]
  419. },
  420. {
  421. "cell_type": "code",
  422. "execution_count": 14,
  423. "metadata": {},
  424. "outputs": [
  425. {
  426. "name": "stdout",
  427. "output_type": "stream",
  428. "text": [
  429. "[0 1 2 3 4 5 6 7 8 9]\n",
  430. "[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]\n"
  431. ]
  432. }
  433. ],
  434. "source": [
  435. "# 创建一个范围\n",
  436. "\n",
  437. "x = np.arange(0, 10, 1) # 参数:start, stop, step: \n",
  438. "y = range(0, 10, 1)\n",
  439. "print(x)\n",
  440. "print(list(y))"
  441. ]
  442. },
  443. {
  444. "cell_type": "code",
  445. "execution_count": 15,
  446. "metadata": {},
  447. "outputs": [
  448. {
  449. "data": {
  450. "text/plain": [
  451. "array([-1.00000000e+00, -9.00000000e-01, -8.00000000e-01, -7.00000000e-01,\n",
  452. " -6.00000000e-01, -5.00000000e-01, -4.00000000e-01, -3.00000000e-01,\n",
  453. " -2.00000000e-01, -1.00000000e-01, -2.22044605e-16, 1.00000000e-01,\n",
  454. " 2.00000000e-01, 3.00000000e-01, 4.00000000e-01, 5.00000000e-01,\n",
  455. " 6.00000000e-01, 7.00000000e-01, 8.00000000e-01, 9.00000000e-01])"
  456. ]
  457. },
  458. "execution_count": 15,
  459. "metadata": {},
  460. "output_type": "execute_result"
  461. }
  462. ],
  463. "source": [
  464. "x = np.arange(-1, 1, 0.1)\n",
  465. "\n",
  466. "x"
  467. ]
  468. },
  469. {
  470. "cell_type": "markdown",
  471. "metadata": {},
  472. "source": [
  473. "#### linspace and logspace"
  474. ]
  475. },
  476. {
  477. "cell_type": "code",
  478. "execution_count": 16,
  479. "metadata": {},
  480. "outputs": [
  481. {
  482. "data": {
  483. "text/plain": [
  484. "array([ 0. , 0.41666667, 0.83333333, 1.25 , 1.66666667,\n",
  485. " 2.08333333, 2.5 , 2.91666667, 3.33333333, 3.75 ,\n",
  486. " 4.16666667, 4.58333333, 5. , 5.41666667, 5.83333333,\n",
  487. " 6.25 , 6.66666667, 7.08333333, 7.5 , 7.91666667,\n",
  488. " 8.33333333, 8.75 , 9.16666667, 9.58333333, 10. ])"
  489. ]
  490. },
  491. "execution_count": 16,
  492. "metadata": {},
  493. "output_type": "execute_result"
  494. }
  495. ],
  496. "source": [
  497. "# 使用linspace两边的端点也被包含进去\n",
  498. "np.linspace(0, 10, 25)"
  499. ]
  500. },
  501. {
  502. "cell_type": "code",
  503. "execution_count": 17,
  504. "metadata": {},
  505. "outputs": [
  506. {
  507. "data": {
  508. "text/plain": [
  509. "array([1.00000000e+00, 3.03773178e+00, 9.22781435e+00, 2.80316249e+01,\n",
  510. " 8.51525577e+01, 2.58670631e+02, 7.85771994e+02, 2.38696456e+03,\n",
  511. " 7.25095809e+03, 2.20264658e+04])"
  512. ]
  513. },
  514. "execution_count": 17,
  515. "metadata": {},
  516. "output_type": "execute_result"
  517. }
  518. ],
  519. "source": [
  520. "np.logspace(0, 10, 10, base=e)"
  521. ]
  522. },
  523. {
  524. "cell_type": "markdown",
  525. "metadata": {},
  526. "source": [
  527. "#### mgrid"
  528. ]
  529. },
  530. {
  531. "cell_type": "code",
  532. "execution_count": 18,
  533. "metadata": {},
  534. "outputs": [],
  535. "source": [
  536. "x, y = np.mgrid[0:5, 0:5] # 和MATLAB中的meshgrid类似"
  537. ]
  538. },
  539. {
  540. "cell_type": "code",
  541. "execution_count": 19,
  542. "metadata": {},
  543. "outputs": [
  544. {
  545. "data": {
  546. "text/plain": [
  547. "array([[0, 0, 0, 0, 0],\n",
  548. " [1, 1, 1, 1, 1],\n",
  549. " [2, 2, 2, 2, 2],\n",
  550. " [3, 3, 3, 3, 3],\n",
  551. " [4, 4, 4, 4, 4]])"
  552. ]
  553. },
  554. "execution_count": 19,
  555. "metadata": {},
  556. "output_type": "execute_result"
  557. }
  558. ],
  559. "source": [
  560. "x"
  561. ]
  562. },
  563. {
  564. "cell_type": "code",
  565. "execution_count": 20,
  566. "metadata": {},
  567. "outputs": [
  568. {
  569. "data": {
  570. "text/plain": [
  571. "array([[0, 1, 2, 3, 4],\n",
  572. " [0, 1, 2, 3, 4],\n",
  573. " [0, 1, 2, 3, 4],\n",
  574. " [0, 1, 2, 3, 4],\n",
  575. " [0, 1, 2, 3, 4]])"
  576. ]
  577. },
  578. "execution_count": 20,
  579. "metadata": {},
  580. "output_type": "execute_result"
  581. }
  582. ],
  583. "source": [
  584. "y"
  585. ]
  586. },
  587. {
  588. "cell_type": "markdown",
  589. "metadata": {},
  590. "source": [
  591. "#### random data"
  592. ]
  593. },
  594. {
  595. "cell_type": "code",
  596. "execution_count": 21,
  597. "metadata": {},
  598. "outputs": [],
  599. "source": [
  600. "from numpy import random"
  601. ]
  602. },
  603. {
  604. "cell_type": "code",
  605. "execution_count": 22,
  606. "metadata": {},
  607. "outputs": [
  608. {
  609. "data": {
  610. "text/plain": [
  611. "array([[0.31850549, 0.64755869, 0.93737096, 0.06141188, 0.17055487],\n",
  612. " [0.95771684, 0.88466718, 0.81119863, 0.95268744, 0.73734857],\n",
  613. " [0.51036326, 0.8779331 , 0.41560197, 0.300393 , 0.42244209],\n",
  614. " [0.50866631, 0.84322931, 0.34459543, 0.47379641, 0.03312725],\n",
  615. " [0.96519922, 0.20557788, 0.38343937, 0.21493144, 0.27541461]])"
  616. ]
  617. },
  618. "execution_count": 22,
  619. "metadata": {},
  620. "output_type": "execute_result"
  621. }
  622. ],
  623. "source": [
  624. "# 均匀随机数在[0,1)区间\n",
  625. "random.rand(5,5)"
  626. ]
  627. },
  628. {
  629. "cell_type": "code",
  630. "execution_count": 23,
  631. "metadata": {},
  632. "outputs": [
  633. {
  634. "data": {
  635. "text/plain": [
  636. "array([[ 1.12204579, 2.90667688, -1.06379302, 1.52801804, 1.34553205],\n",
  637. " [ 2.22610261, -0.18597008, 1.12948162, -1.44339033, 0.14366645],\n",
  638. " [ 0.12767746, -0.04534549, 0.1536468 , 0.7333602 , 0.96510913],\n",
  639. " [ 0.30848743, -2.31710677, 0.37803085, -0.52433003, 1.39883453],\n",
  640. " [-0.52307504, 0.40612781, 0.48341866, -1.96277249, 1.1671546 ]])"
  641. ]
  642. },
  643. "execution_count": 23,
  644. "metadata": {},
  645. "output_type": "execute_result"
  646. }
  647. ],
  648. "source": [
  649. "# 标准正态分布随机数\n",
  650. "random.randn(5,5)"
  651. ]
  652. },
  653. {
  654. "cell_type": "markdown",
  655. "metadata": {},
  656. "source": [
  657. "#### diag"
  658. ]
  659. },
  660. {
  661. "cell_type": "code",
  662. "execution_count": 24,
  663. "metadata": {},
  664. "outputs": [
  665. {
  666. "data": {
  667. "text/plain": [
  668. "array([[1, 0, 0],\n",
  669. " [0, 2, 0],\n",
  670. " [0, 0, 3]])"
  671. ]
  672. },
  673. "execution_count": 24,
  674. "metadata": {},
  675. "output_type": "execute_result"
  676. }
  677. ],
  678. "source": [
  679. "# 一个对角矩阵\n",
  680. "np.diag([1,2,3])"
  681. ]
  682. },
  683. {
  684. "cell_type": "code",
  685. "execution_count": 25,
  686. "metadata": {},
  687. "outputs": [
  688. {
  689. "data": {
  690. "text/plain": [
  691. "array([[0, 1, 0, 0],\n",
  692. " [0, 0, 2, 0],\n",
  693. " [0, 0, 0, 3],\n",
  694. " [0, 0, 0, 0]])"
  695. ]
  696. },
  697. "execution_count": 25,
  698. "metadata": {},
  699. "output_type": "execute_result"
  700. }
  701. ],
  702. "source": [
  703. "# 从主对角线偏移的对角线\n",
  704. "diag([1,2,3], k=1) "
  705. ]
  706. },
  707. {
  708. "cell_type": "markdown",
  709. "metadata": {},
  710. "source": [
  711. "#### zeros and ones"
  712. ]
  713. },
  714. {
  715. "cell_type": "code",
  716. "execution_count": 26,
  717. "metadata": {},
  718. "outputs": [
  719. {
  720. "data": {
  721. "text/plain": [
  722. "array([[0., 0., 0.],\n",
  723. " [0., 0., 0.],\n",
  724. " [0., 0., 0.]])"
  725. ]
  726. },
  727. "execution_count": 26,
  728. "metadata": {},
  729. "output_type": "execute_result"
  730. }
  731. ],
  732. "source": [
  733. "np.zeros((3,3))"
  734. ]
  735. },
  736. {
  737. "cell_type": "code",
  738. "execution_count": 27,
  739. "metadata": {},
  740. "outputs": [
  741. {
  742. "data": {
  743. "text/plain": [
  744. "array([[1., 1., 1.],\n",
  745. " [1., 1., 1.],\n",
  746. " [1., 1., 1.]])"
  747. ]
  748. },
  749. "execution_count": 27,
  750. "metadata": {},
  751. "output_type": "execute_result"
  752. }
  753. ],
  754. "source": [
  755. "np.ones((3,3))"
  756. ]
  757. },
  758. {
  759. "cell_type": "markdown",
  760. "metadata": {},
  761. "source": [
  762. "## 3. 文件 I/O"
  763. ]
  764. },
  765. {
  766. "cell_type": "markdown",
  767. "metadata": {},
  768. "source": [
  769. "### 3.1 逗号分隔值 (CSV)"
  770. ]
  771. },
  772. {
  773. "cell_type": "markdown",
  774. "metadata": {},
  775. "source": [
  776. "对于数据文件来说一种非常常见的文件格式是逗号分割值(CSV),或者有关的格式例如TSV(制表符分隔的值)。为了从这些文件中读取数据到Numpy数组中,我们可以用`numpy.genfromtxt`函数。例如:"
  777. ]
  778. },
  779. {
  780. "cell_type": "code",
  781. "execution_count": 28,
  782. "metadata": {},
  783. "outputs": [
  784. {
  785. "name": "stdout",
  786. "output_type": "stream",
  787. "text": [
  788. "1800 1 1 -6.1 -6.1 -6.1 1\r\n",
  789. "1800 1 2 -15.4 -15.4 -15.4 1\r\n",
  790. "1800 1 3 -15.0 -15.0 -15.0 1\r\n",
  791. "1800 1 4 -19.3 -19.3 -19.3 1\r\n",
  792. "1800 1 5 -16.8 -16.8 -16.8 1\r\n",
  793. "1800 1 6 -11.4 -11.4 -11.4 1\r\n",
  794. "1800 1 7 -7.6 -7.6 -7.6 1\r\n",
  795. "1800 1 8 -7.1 -7.1 -7.1 1\r\n",
  796. "1800 1 9 -10.1 -10.1 -10.1 1\r\n",
  797. "1800 1 10 -9.5 -9.5 -9.5 1\r\n"
  798. ]
  799. }
  800. ],
  801. "source": [
  802. "!head stockholm_td_adj.dat"
  803. ]
  804. },
  805. {
  806. "cell_type": "code",
  807. "execution_count": 29,
  808. "metadata": {},
  809. "outputs": [],
  810. "source": [
  811. "import numpy as np\n",
  812. "data = np.genfromtxt('stockholm_td_adj.dat')"
  813. ]
  814. },
  815. {
  816. "cell_type": "code",
  817. "execution_count": 30,
  818. "metadata": {},
  819. "outputs": [
  820. {
  821. "data": {
  822. "text/plain": [
  823. "(77431, 7)"
  824. ]
  825. },
  826. "execution_count": 30,
  827. "metadata": {},
  828. "output_type": "execute_result"
  829. }
  830. ],
  831. "source": [
  832. "data.shape"
  833. ]
  834. },
  835. {
  836. "cell_type": "code",
  837. "execution_count": 31,
  838. "metadata": {},
  839. "outputs": [
  840. {
  841. "data": {
  842. "image/png": "\n",
  843. "text/plain": [
  844. "<Figure size 1008x288 with 1 Axes>"
  845. ]
  846. },
  847. "metadata": {
  848. "needs_background": "light"
  849. },
  850. "output_type": "display_data"
  851. }
  852. ],
  853. "source": [
  854. "%matplotlib inline\n",
  855. "import matplotlib.pyplot as plt\n",
  856. "\n",
  857. "fig, ax = plt.subplots(figsize=(14,4))\n",
  858. "ax.plot(data[:,0]+data[:,1]/12.0+data[:,2]/365, data[:,5])\n",
  859. "ax.axis('tight')\n",
  860. "ax.set_title('tempeatures in Stockholm')\n",
  861. "ax.set_xlabel('year')\n",
  862. "ax.set_ylabel('temperature (C)');"
  863. ]
  864. },
  865. {
  866. "cell_type": "markdown",
  867. "metadata": {},
  868. "source": [
  869. "使用`numpy.savetxt`我们可以将一个Numpy数组以CSV格式存入:"
  870. ]
  871. },
  872. {
  873. "cell_type": "code",
  874. "execution_count": 32,
  875. "metadata": {},
  876. "outputs": [
  877. {
  878. "data": {
  879. "text/plain": [
  880. "array([[0.73171836, 0.46544202, 0.72372739],\n",
  881. " [0.32390603, 0.09679475, 0.95467059],\n",
  882. " [0.36051701, 0.78361037, 0.00716923]])"
  883. ]
  884. },
  885. "execution_count": 32,
  886. "metadata": {},
  887. "output_type": "execute_result"
  888. }
  889. ],
  890. "source": [
  891. "M = np.random.rand(3,3)\n",
  892. "\n",
  893. "M"
  894. ]
  895. },
  896. {
  897. "cell_type": "code",
  898. "execution_count": 33,
  899. "metadata": {},
  900. "outputs": [],
  901. "source": [
  902. "np.savetxt(\"random-matrix.csv\", M)"
  903. ]
  904. },
  905. {
  906. "cell_type": "code",
  907. "execution_count": 34,
  908. "metadata": {},
  909. "outputs": [
  910. {
  911. "name": "stdout",
  912. "output_type": "stream",
  913. "text": [
  914. "7.317183558113176112e-01 4.654420244898096470e-01 7.237273924754552556e-01\r\n",
  915. "3.239060308567449642e-01 9.679474636543183852e-02 9.546705930168928322e-01\r\n",
  916. "3.605170063363589694e-01 7.836103655978251536e-01 7.169228636445423852e-03\r\n"
  917. ]
  918. }
  919. ],
  920. "source": [
  921. "!cat random-matrix.csv"
  922. ]
  923. },
  924. {
  925. "cell_type": "code",
  926. "execution_count": 35,
  927. "metadata": {},
  928. "outputs": [
  929. {
  930. "name": "stdout",
  931. "output_type": "stream",
  932. "text": [
  933. "0.73172 0.46544 0.72373\r\n",
  934. "0.32391 0.09679 0.95467\r\n",
  935. "0.36052 0.78361 0.00717\r\n"
  936. ]
  937. }
  938. ],
  939. "source": [
  940. "np.savetxt(\"random-matrix.csv\", M, fmt='%.5f') # fmt 确定格式\n",
  941. "\n",
  942. "!cat random-matrix.csv"
  943. ]
  944. },
  945. {
  946. "cell_type": "markdown",
  947. "metadata": {},
  948. "source": [
  949. "### 3.2 numpy 的本地文件格式"
  950. ]
  951. },
  952. {
  953. "cell_type": "markdown",
  954. "metadata": {},
  955. "source": [
  956. "当存储和读取numpy数组时非常有用。利用函数`numpy.save`和`numpy.load`:"
  957. ]
  958. },
  959. {
  960. "cell_type": "code",
  961. "execution_count": 36,
  962. "metadata": {},
  963. "outputs": [
  964. {
  965. "name": "stdout",
  966. "output_type": "stream",
  967. "text": [
  968. "random-matrix.npy: data\r\n"
  969. ]
  970. }
  971. ],
  972. "source": [
  973. "np.save(\"random-matrix.npy\", M)\n",
  974. "\n",
  975. "!file random-matrix.npy"
  976. ]
  977. },
  978. {
  979. "cell_type": "code",
  980. "execution_count": 37,
  981. "metadata": {},
  982. "outputs": [
  983. {
  984. "data": {
  985. "text/plain": [
  986. "array([[0.73171836, 0.46544202, 0.72372739],\n",
  987. " [0.32390603, 0.09679475, 0.95467059],\n",
  988. " [0.36051701, 0.78361037, 0.00716923]])"
  989. ]
  990. },
  991. "execution_count": 37,
  992. "metadata": {},
  993. "output_type": "execute_result"
  994. }
  995. ],
  996. "source": [
  997. "np.load(\"random-matrix.npy\")"
  998. ]
  999. },
  1000. {
  1001. "cell_type": "markdown",
  1002. "metadata": {},
  1003. "source": [
  1004. "## 4. 更多Numpy数组的性质"
  1005. ]
  1006. },
  1007. {
  1008. "cell_type": "code",
  1009. "execution_count": 38,
  1010. "metadata": {},
  1011. "outputs": [
  1012. {
  1013. "name": "stdout",
  1014. "output_type": "stream",
  1015. "text": [
  1016. "float64\n",
  1017. "8\n"
  1018. ]
  1019. }
  1020. ],
  1021. "source": [
  1022. "print(M.dtype)\n",
  1023. "print(M.itemsize) # 每个元素的字节数\n"
  1024. ]
  1025. },
  1026. {
  1027. "cell_type": "code",
  1028. "execution_count": 39,
  1029. "metadata": {},
  1030. "outputs": [
  1031. {
  1032. "data": {
  1033. "text/plain": [
  1034. "72"
  1035. ]
  1036. },
  1037. "execution_count": 39,
  1038. "metadata": {},
  1039. "output_type": "execute_result"
  1040. }
  1041. ],
  1042. "source": [
  1043. "M.nbytes # 字节数"
  1044. ]
  1045. },
  1046. {
  1047. "cell_type": "code",
  1048. "execution_count": 40,
  1049. "metadata": {},
  1050. "outputs": [
  1051. {
  1052. "data": {
  1053. "text/plain": [
  1054. "2"
  1055. ]
  1056. },
  1057. "execution_count": 40,
  1058. "metadata": {},
  1059. "output_type": "execute_result"
  1060. }
  1061. ],
  1062. "source": [
  1063. "M.ndim # 维度"
  1064. ]
  1065. },
  1066. {
  1067. "cell_type": "markdown",
  1068. "metadata": {},
  1069. "source": [
  1070. "## 5. 操作数组"
  1071. ]
  1072. },
  1073. {
  1074. "cell_type": "markdown",
  1075. "metadata": {},
  1076. "source": [
  1077. "### 5.1 索引"
  1078. ]
  1079. },
  1080. {
  1081. "cell_type": "markdown",
  1082. "metadata": {},
  1083. "source": [
  1084. "我们可以用方括号和下标索引元素:"
  1085. ]
  1086. },
  1087. {
  1088. "cell_type": "code",
  1089. "execution_count": 41,
  1090. "metadata": {},
  1091. "outputs": [
  1092. {
  1093. "data": {
  1094. "text/plain": [
  1095. "1"
  1096. ]
  1097. },
  1098. "execution_count": 41,
  1099. "metadata": {},
  1100. "output_type": "execute_result"
  1101. }
  1102. ],
  1103. "source": [
  1104. "v = np.array([1, 2, 3, 4, 5])\n",
  1105. "# v 是一个向量,仅仅只有一维,取一个索引\n",
  1106. "v[0]"
  1107. ]
  1108. },
  1109. {
  1110. "cell_type": "code",
  1111. "execution_count": 42,
  1112. "metadata": {},
  1113. "outputs": [
  1114. {
  1115. "name": "stdout",
  1116. "output_type": "stream",
  1117. "text": [
  1118. "0.09679474636543184\n",
  1119. "0.09679474636543184\n",
  1120. "[0.32390603 0.09679475 0.95467059]\n"
  1121. ]
  1122. }
  1123. ],
  1124. "source": [
  1125. "\n",
  1126. "# M 是一个矩阵或者是一个二维的数组,取两个索引 \n",
  1127. "print(M[1,1])\n",
  1128. "print(M[1][1])\n",
  1129. "print(M[1])"
  1130. ]
  1131. },
  1132. {
  1133. "cell_type": "markdown",
  1134. "metadata": {},
  1135. "source": [
  1136. "如果我们省略了一个多维数组的索引,它将会返回整行(或者,总的来说,一个 N-1 维的数组)"
  1137. ]
  1138. },
  1139. {
  1140. "cell_type": "code",
  1141. "execution_count": 43,
  1142. "metadata": {},
  1143. "outputs": [
  1144. {
  1145. "data": {
  1146. "text/plain": [
  1147. "array([[0.73171836, 0.46544202, 0.72372739],\n",
  1148. " [0.32390603, 0.09679475, 0.95467059],\n",
  1149. " [0.36051701, 0.78361037, 0.00716923]])"
  1150. ]
  1151. },
  1152. "execution_count": 43,
  1153. "metadata": {},
  1154. "output_type": "execute_result"
  1155. }
  1156. ],
  1157. "source": [
  1158. "M"
  1159. ]
  1160. },
  1161. {
  1162. "cell_type": "code",
  1163. "execution_count": 44,
  1164. "metadata": {},
  1165. "outputs": [
  1166. {
  1167. "data": {
  1168. "text/plain": [
  1169. "array([0.32390603, 0.09679475, 0.95467059])"
  1170. ]
  1171. },
  1172. "execution_count": 44,
  1173. "metadata": {},
  1174. "output_type": "execute_result"
  1175. }
  1176. ],
  1177. "source": [
  1178. "M[1]"
  1179. ]
  1180. },
  1181. {
  1182. "cell_type": "markdown",
  1183. "metadata": {},
  1184. "source": [
  1185. "相同的事情可以利用`:`而不是索引来实现:"
  1186. ]
  1187. },
  1188. {
  1189. "cell_type": "code",
  1190. "execution_count": 45,
  1191. "metadata": {},
  1192. "outputs": [
  1193. {
  1194. "data": {
  1195. "text/plain": [
  1196. "array([0.32390603, 0.09679475, 0.95467059])"
  1197. ]
  1198. },
  1199. "execution_count": 45,
  1200. "metadata": {},
  1201. "output_type": "execute_result"
  1202. }
  1203. ],
  1204. "source": [
  1205. "M[1,:] # 行 1"
  1206. ]
  1207. },
  1208. {
  1209. "cell_type": "code",
  1210. "execution_count": 46,
  1211. "metadata": {},
  1212. "outputs": [
  1213. {
  1214. "data": {
  1215. "text/plain": [
  1216. "array([0.46544202, 0.09679475, 0.78361037])"
  1217. ]
  1218. },
  1219. "execution_count": 46,
  1220. "metadata": {},
  1221. "output_type": "execute_result"
  1222. }
  1223. ],
  1224. "source": [
  1225. "M[:,1] # 列 1"
  1226. ]
  1227. },
  1228. {
  1229. "cell_type": "markdown",
  1230. "metadata": {},
  1231. "source": [
  1232. "我们可以用索引赋新的值给数组中的元素:"
  1233. ]
  1234. },
  1235. {
  1236. "cell_type": "code",
  1237. "execution_count": 47,
  1238. "metadata": {},
  1239. "outputs": [],
  1240. "source": [
  1241. "M[0,0] = 1"
  1242. ]
  1243. },
  1244. {
  1245. "cell_type": "code",
  1246. "execution_count": 48,
  1247. "metadata": {},
  1248. "outputs": [
  1249. {
  1250. "data": {
  1251. "text/plain": [
  1252. "array([[1. , 0.46544202, 0.72372739],\n",
  1253. " [0.32390603, 0.09679475, 0.95467059],\n",
  1254. " [0.36051701, 0.78361037, 0.00716923]])"
  1255. ]
  1256. },
  1257. "execution_count": 48,
  1258. "metadata": {},
  1259. "output_type": "execute_result"
  1260. }
  1261. ],
  1262. "source": [
  1263. "M"
  1264. ]
  1265. },
  1266. {
  1267. "cell_type": "code",
  1268. "execution_count": 49,
  1269. "metadata": {},
  1270. "outputs": [],
  1271. "source": [
  1272. "# 对行和列也同样有用\n",
  1273. "M[1,:] = 0\n",
  1274. "M[:,2] = -1"
  1275. ]
  1276. },
  1277. {
  1278. "cell_type": "code",
  1279. "execution_count": 50,
  1280. "metadata": {},
  1281. "outputs": [
  1282. {
  1283. "data": {
  1284. "text/plain": [
  1285. "array([[ 1. , 0.46544202, -1. ],\n",
  1286. " [ 0. , 0. , -1. ],\n",
  1287. " [ 0.36051701, 0.78361037, -1. ]])"
  1288. ]
  1289. },
  1290. "execution_count": 50,
  1291. "metadata": {},
  1292. "output_type": "execute_result"
  1293. }
  1294. ],
  1295. "source": [
  1296. "M"
  1297. ]
  1298. },
  1299. {
  1300. "cell_type": "markdown",
  1301. "metadata": {},
  1302. "source": [
  1303. "### 5.2 切片索引"
  1304. ]
  1305. },
  1306. {
  1307. "cell_type": "markdown",
  1308. "metadata": {},
  1309. "source": [
  1310. "切片索引是语法`M[lower:upper:step]`的技术名称,用于提取数组的一部分:"
  1311. ]
  1312. },
  1313. {
  1314. "cell_type": "code",
  1315. "execution_count": 51,
  1316. "metadata": {},
  1317. "outputs": [
  1318. {
  1319. "data": {
  1320. "text/plain": [
  1321. "array([1, 2, 3, 4, 5])"
  1322. ]
  1323. },
  1324. "execution_count": 51,
  1325. "metadata": {},
  1326. "output_type": "execute_result"
  1327. }
  1328. ],
  1329. "source": [
  1330. "A = np.array([1,2,3,4,5])\n",
  1331. "A"
  1332. ]
  1333. },
  1334. {
  1335. "cell_type": "code",
  1336. "execution_count": 52,
  1337. "metadata": {},
  1338. "outputs": [
  1339. {
  1340. "data": {
  1341. "text/plain": [
  1342. "array([2, 3])"
  1343. ]
  1344. },
  1345. "execution_count": 52,
  1346. "metadata": {},
  1347. "output_type": "execute_result"
  1348. }
  1349. ],
  1350. "source": [
  1351. "A[1:3]"
  1352. ]
  1353. },
  1354. {
  1355. "cell_type": "markdown",
  1356. "metadata": {},
  1357. "source": [
  1358. "切片索引是*可变的*: 如果它们被分配了一个新值,那么从其中提取切片的原始数组将被修改:\n"
  1359. ]
  1360. },
  1361. {
  1362. "cell_type": "code",
  1363. "execution_count": 53,
  1364. "metadata": {},
  1365. "outputs": [
  1366. {
  1367. "data": {
  1368. "text/plain": [
  1369. "array([ 1, -2, -3, 4, 5])"
  1370. ]
  1371. },
  1372. "execution_count": 53,
  1373. "metadata": {},
  1374. "output_type": "execute_result"
  1375. }
  1376. ],
  1377. "source": [
  1378. "A[1:3] = [-2,-3] # auto convert type\n",
  1379. "A[1:3] = np.array([-2, -3]) \n",
  1380. "\n",
  1381. "A"
  1382. ]
  1383. },
  1384. {
  1385. "cell_type": "markdown",
  1386. "metadata": {},
  1387. "source": [
  1388. "我们可以省略`M[lower:upper:step]`中任意的三个值\n",
  1389. "We can omit any of the three parameters in `M[lower:upper:step]`:"
  1390. ]
  1391. },
  1392. {
  1393. "cell_type": "code",
  1394. "execution_count": 54,
  1395. "metadata": {},
  1396. "outputs": [
  1397. {
  1398. "data": {
  1399. "text/plain": [
  1400. "array([ 1, -2, -3, 4, 5])"
  1401. ]
  1402. },
  1403. "execution_count": 54,
  1404. "metadata": {},
  1405. "output_type": "execute_result"
  1406. }
  1407. ],
  1408. "source": [
  1409. "A[::] # lower, upper, step 都取默认值"
  1410. ]
  1411. },
  1412. {
  1413. "cell_type": "code",
  1414. "execution_count": 55,
  1415. "metadata": {},
  1416. "outputs": [
  1417. {
  1418. "data": {
  1419. "text/plain": [
  1420. "array([ 1, -2, -3, 4, 5])"
  1421. ]
  1422. },
  1423. "execution_count": 55,
  1424. "metadata": {},
  1425. "output_type": "execute_result"
  1426. }
  1427. ],
  1428. "source": [
  1429. "A[:]"
  1430. ]
  1431. },
  1432. {
  1433. "cell_type": "code",
  1434. "execution_count": 56,
  1435. "metadata": {},
  1436. "outputs": [
  1437. {
  1438. "data": {
  1439. "text/plain": [
  1440. "array([ 1, -3, 5])"
  1441. ]
  1442. },
  1443. "execution_count": 56,
  1444. "metadata": {},
  1445. "output_type": "execute_result"
  1446. }
  1447. ],
  1448. "source": [
  1449. "A[::2] # step is 2, lower and upper 代表数组的开始和结束"
  1450. ]
  1451. },
  1452. {
  1453. "cell_type": "code",
  1454. "execution_count": 57,
  1455. "metadata": {},
  1456. "outputs": [
  1457. {
  1458. "data": {
  1459. "text/plain": [
  1460. "array([ 1, -2, -3])"
  1461. ]
  1462. },
  1463. "execution_count": 57,
  1464. "metadata": {},
  1465. "output_type": "execute_result"
  1466. }
  1467. ],
  1468. "source": [
  1469. "A[:3] # 前3个元素"
  1470. ]
  1471. },
  1472. {
  1473. "cell_type": "code",
  1474. "execution_count": 58,
  1475. "metadata": {},
  1476. "outputs": [
  1477. {
  1478. "data": {
  1479. "text/plain": [
  1480. "array([4, 5])"
  1481. ]
  1482. },
  1483. "execution_count": 58,
  1484. "metadata": {},
  1485. "output_type": "execute_result"
  1486. }
  1487. ],
  1488. "source": [
  1489. "A[3:] # 从索引3开始的元素"
  1490. ]
  1491. },
  1492. {
  1493. "cell_type": "markdown",
  1494. "metadata": {},
  1495. "source": [
  1496. "负索引计数从数组的结束(正索引从开始):"
  1497. ]
  1498. },
  1499. {
  1500. "cell_type": "code",
  1501. "execution_count": 59,
  1502. "metadata": {},
  1503. "outputs": [],
  1504. "source": [
  1505. "A = np.array([1,2,3,4,5])"
  1506. ]
  1507. },
  1508. {
  1509. "cell_type": "code",
  1510. "execution_count": 60,
  1511. "metadata": {},
  1512. "outputs": [
  1513. {
  1514. "data": {
  1515. "text/plain": [
  1516. "5"
  1517. ]
  1518. },
  1519. "execution_count": 60,
  1520. "metadata": {},
  1521. "output_type": "execute_result"
  1522. }
  1523. ],
  1524. "source": [
  1525. "A[-1] # 数组中最后一个元素"
  1526. ]
  1527. },
  1528. {
  1529. "cell_type": "code",
  1530. "execution_count": 61,
  1531. "metadata": {},
  1532. "outputs": [
  1533. {
  1534. "data": {
  1535. "text/plain": [
  1536. "array([3, 4, 5])"
  1537. ]
  1538. },
  1539. "execution_count": 61,
  1540. "metadata": {},
  1541. "output_type": "execute_result"
  1542. }
  1543. ],
  1544. "source": [
  1545. "A[-3:] # 最后三个元素"
  1546. ]
  1547. },
  1548. {
  1549. "cell_type": "markdown",
  1550. "metadata": {},
  1551. "source": [
  1552. "索引切片的工作方式与多维数组完全相同:"
  1553. ]
  1554. },
  1555. {
  1556. "cell_type": "code",
  1557. "execution_count": 62,
  1558. "metadata": {},
  1559. "outputs": [
  1560. {
  1561. "data": {
  1562. "text/plain": [
  1563. "array([[ 0, 1, 2, 3, 4],\n",
  1564. " [10, 11, 12, 13, 14],\n",
  1565. " [20, 21, 22, 23, 24],\n",
  1566. " [30, 31, 32, 33, 34],\n",
  1567. " [40, 41, 42, 43, 44]])"
  1568. ]
  1569. },
  1570. "execution_count": 62,
  1571. "metadata": {},
  1572. "output_type": "execute_result"
  1573. }
  1574. ],
  1575. "source": [
  1576. "A = np.array([[n+m*10 for n in range(5)] for m in range(5)])\n",
  1577. "\n",
  1578. "A"
  1579. ]
  1580. },
  1581. {
  1582. "cell_type": "code",
  1583. "execution_count": 63,
  1584. "metadata": {},
  1585. "outputs": [
  1586. {
  1587. "data": {
  1588. "text/plain": [
  1589. "array([[11, 12, 13],\n",
  1590. " [21, 22, 23],\n",
  1591. " [31, 32, 33]])"
  1592. ]
  1593. },
  1594. "execution_count": 63,
  1595. "metadata": {},
  1596. "output_type": "execute_result"
  1597. }
  1598. ],
  1599. "source": [
  1600. "# 原始数组中的一个块\n",
  1601. "A[1:4, 1:4]"
  1602. ]
  1603. },
  1604. {
  1605. "cell_type": "code",
  1606. "execution_count": 64,
  1607. "metadata": {},
  1608. "outputs": [
  1609. {
  1610. "data": {
  1611. "text/plain": [
  1612. "array([[ 0, 2, 4],\n",
  1613. " [20, 22, 24],\n",
  1614. " [40, 42, 44]])"
  1615. ]
  1616. },
  1617. "execution_count": 64,
  1618. "metadata": {},
  1619. "output_type": "execute_result"
  1620. }
  1621. ],
  1622. "source": [
  1623. "# 步长\n",
  1624. "A[::2, ::2]"
  1625. ]
  1626. },
  1627. {
  1628. "cell_type": "markdown",
  1629. "metadata": {},
  1630. "source": [
  1631. "### 5.3 花式索引"
  1632. ]
  1633. },
  1634. {
  1635. "cell_type": "markdown",
  1636. "metadata": {},
  1637. "source": [
  1638. "Fancy索引是一个名称时,一个数组或列表被使用在一个索引:"
  1639. ]
  1640. },
  1641. {
  1642. "cell_type": "code",
  1643. "execution_count": 65,
  1644. "metadata": {},
  1645. "outputs": [
  1646. {
  1647. "name": "stdout",
  1648. "output_type": "stream",
  1649. "text": [
  1650. "[[10 11 12 13 14]\n",
  1651. " [20 21 22 23 24]\n",
  1652. " [30 31 32 33 34]]\n",
  1653. "[[ 0 1 2 3 4]\n",
  1654. " [10 11 12 13 14]\n",
  1655. " [20 21 22 23 24]\n",
  1656. " [30 31 32 33 34]\n",
  1657. " [40 41 42 43 44]]\n"
  1658. ]
  1659. }
  1660. ],
  1661. "source": [
  1662. "row_indices = [1, 2, 3]\n",
  1663. "print(A[row_indices])\n",
  1664. "print(A)"
  1665. ]
  1666. },
  1667. {
  1668. "cell_type": "code",
  1669. "execution_count": 66,
  1670. "metadata": {},
  1671. "outputs": [
  1672. {
  1673. "data": {
  1674. "text/plain": [
  1675. "array([11, 22, 34])"
  1676. ]
  1677. },
  1678. "execution_count": 66,
  1679. "metadata": {},
  1680. "output_type": "execute_result"
  1681. }
  1682. ],
  1683. "source": [
  1684. "col_indices = [1, 2, -1] # 索引-1 代表最后一个元素\n",
  1685. "A[row_indices, col_indices]"
  1686. ]
  1687. },
  1688. {
  1689. "cell_type": "markdown",
  1690. "metadata": {},
  1691. "source": [
  1692. "我们也可以使用索引掩码:如果索引掩码是一个数据类型`bool`的Numpy数组,那么一个元素被选择(True)或不(False)取决于索引掩码在每个元素位置的值:"
  1693. ]
  1694. },
  1695. {
  1696. "cell_type": "code",
  1697. "execution_count": 67,
  1698. "metadata": {},
  1699. "outputs": [
  1700. {
  1701. "data": {
  1702. "text/plain": [
  1703. "array([0, 1, 2, 3, 4])"
  1704. ]
  1705. },
  1706. "execution_count": 67,
  1707. "metadata": {},
  1708. "output_type": "execute_result"
  1709. }
  1710. ],
  1711. "source": [
  1712. "B = array([n for n in range(5)])\n",
  1713. "B"
  1714. ]
  1715. },
  1716. {
  1717. "cell_type": "code",
  1718. "execution_count": 68,
  1719. "metadata": {},
  1720. "outputs": [
  1721. {
  1722. "data": {
  1723. "text/plain": [
  1724. "array([0, 2])"
  1725. ]
  1726. },
  1727. "execution_count": 68,
  1728. "metadata": {},
  1729. "output_type": "execute_result"
  1730. }
  1731. ],
  1732. "source": [
  1733. "row_mask = array([True, False, True, False, False])\n",
  1734. "B[row_mask]"
  1735. ]
  1736. },
  1737. {
  1738. "cell_type": "code",
  1739. "execution_count": 69,
  1740. "metadata": {},
  1741. "outputs": [
  1742. {
  1743. "data": {
  1744. "text/plain": [
  1745. "array([0, 2])"
  1746. ]
  1747. },
  1748. "execution_count": 69,
  1749. "metadata": {},
  1750. "output_type": "execute_result"
  1751. }
  1752. ],
  1753. "source": [
  1754. "# 相同的事情\n",
  1755. "row_mask = array([1,0,1,0,0], dtype=bool)\n",
  1756. "B[row_mask]"
  1757. ]
  1758. },
  1759. {
  1760. "cell_type": "markdown",
  1761. "metadata": {},
  1762. "source": [
  1763. "这个特性对于有条件地从数组中选择元素非常有用,例如使用比较运算符:"
  1764. ]
  1765. },
  1766. {
  1767. "cell_type": "code",
  1768. "execution_count": 70,
  1769. "metadata": {},
  1770. "outputs": [
  1771. {
  1772. "data": {
  1773. "text/plain": [
  1774. "array([0. , 0.5, 1. , 1.5, 2. , 2.5, 3. , 3.5, 4. , 4.5, 5. , 5.5, 6. ,\n",
  1775. " 6.5, 7. , 7.5, 8. , 8.5, 9. , 9.5])"
  1776. ]
  1777. },
  1778. "execution_count": 70,
  1779. "metadata": {},
  1780. "output_type": "execute_result"
  1781. }
  1782. ],
  1783. "source": [
  1784. "x = np.arange(0, 10, 0.5)\n",
  1785. "x"
  1786. ]
  1787. },
  1788. {
  1789. "cell_type": "code",
  1790. "execution_count": 71,
  1791. "metadata": {},
  1792. "outputs": [
  1793. {
  1794. "data": {
  1795. "text/plain": [
  1796. "array([False, False, False, False, False, False, False, False, False,\n",
  1797. " False, False, True, True, True, True, False, False, False,\n",
  1798. " False, False])"
  1799. ]
  1800. },
  1801. "execution_count": 71,
  1802. "metadata": {},
  1803. "output_type": "execute_result"
  1804. }
  1805. ],
  1806. "source": [
  1807. "mask = (5 < x) * (x < 7.5)\n",
  1808. "\n",
  1809. "mask"
  1810. ]
  1811. },
  1812. {
  1813. "cell_type": "code",
  1814. "execution_count": 72,
  1815. "metadata": {},
  1816. "outputs": [
  1817. {
  1818. "data": {
  1819. "text/plain": [
  1820. "array([5.5, 6. , 6.5, 7. ])"
  1821. ]
  1822. },
  1823. "execution_count": 72,
  1824. "metadata": {},
  1825. "output_type": "execute_result"
  1826. }
  1827. ],
  1828. "source": [
  1829. "x[mask]"
  1830. ]
  1831. },
  1832. {
  1833. "cell_type": "code",
  1834. "execution_count": 73,
  1835. "metadata": {},
  1836. "outputs": [
  1837. {
  1838. "data": {
  1839. "text/plain": [
  1840. "array([3.5, 4. , 4.5, 5. , 5.5])"
  1841. ]
  1842. },
  1843. "execution_count": 73,
  1844. "metadata": {},
  1845. "output_type": "execute_result"
  1846. }
  1847. ],
  1848. "source": [
  1849. "x[(3<x) * (x<6)]"
  1850. ]
  1851. },
  1852. {
  1853. "cell_type": "markdown",
  1854. "metadata": {},
  1855. "source": [
  1856. "## 6. 用于从数组中提取数据和创建数组的函数"
  1857. ]
  1858. },
  1859. {
  1860. "cell_type": "markdown",
  1861. "metadata": {},
  1862. "source": [
  1863. "### 6.1 where"
  1864. ]
  1865. },
  1866. {
  1867. "cell_type": "markdown",
  1868. "metadata": {},
  1869. "source": [
  1870. "索引掩码可以使用`where`函数转换为位置索引"
  1871. ]
  1872. },
  1873. {
  1874. "cell_type": "code",
  1875. "execution_count": 76,
  1876. "metadata": {},
  1877. "outputs": [
  1878. {
  1879. "data": {
  1880. "text/plain": [
  1881. "(array([11, 12, 13, 14]),)"
  1882. ]
  1883. },
  1884. "execution_count": 76,
  1885. "metadata": {},
  1886. "output_type": "execute_result"
  1887. }
  1888. ],
  1889. "source": [
  1890. "indices = np.where(mask)\n",
  1891. "\n",
  1892. "indices"
  1893. ]
  1894. },
  1895. {
  1896. "cell_type": "code",
  1897. "execution_count": 75,
  1898. "metadata": {},
  1899. "outputs": [
  1900. {
  1901. "data": {
  1902. "text/plain": [
  1903. "array([5.5, 6. , 6.5, 7. ])"
  1904. ]
  1905. },
  1906. "execution_count": 75,
  1907. "metadata": {},
  1908. "output_type": "execute_result"
  1909. }
  1910. ],
  1911. "source": [
  1912. "x[indices] # 这个索引等同于花式索引x[mask]"
  1913. ]
  1914. },
  1915. {
  1916. "cell_type": "markdown",
  1917. "metadata": {},
  1918. "source": [
  1919. "### 6.2 diag"
  1920. ]
  1921. },
  1922. {
  1923. "cell_type": "markdown",
  1924. "metadata": {},
  1925. "source": [
  1926. "使用diag函数,我们还可以提取一个数组的对角线和亚对角线:"
  1927. ]
  1928. },
  1929. {
  1930. "cell_type": "code",
  1931. "execution_count": 74,
  1932. "metadata": {},
  1933. "outputs": [
  1934. {
  1935. "data": {
  1936. "text/plain": [
  1937. "array([ 0, 11, 22, 33, 44])"
  1938. ]
  1939. },
  1940. "execution_count": 74,
  1941. "metadata": {},
  1942. "output_type": "execute_result"
  1943. }
  1944. ],
  1945. "source": [
  1946. "diag(A)"
  1947. ]
  1948. },
  1949. {
  1950. "cell_type": "code",
  1951. "execution_count": 75,
  1952. "metadata": {},
  1953. "outputs": [
  1954. {
  1955. "data": {
  1956. "text/plain": [
  1957. "array([10, 21, 32, 43])"
  1958. ]
  1959. },
  1960. "execution_count": 75,
  1961. "metadata": {},
  1962. "output_type": "execute_result"
  1963. }
  1964. ],
  1965. "source": [
  1966. "diag(A, -1)"
  1967. ]
  1968. },
  1969. {
  1970. "cell_type": "markdown",
  1971. "metadata": {},
  1972. "source": [
  1973. "### 6.3 take"
  1974. ]
  1975. },
  1976. {
  1977. "cell_type": "markdown",
  1978. "metadata": {},
  1979. "source": [
  1980. "`take` 函数和上面描述的花式索引类似"
  1981. ]
  1982. },
  1983. {
  1984. "cell_type": "code",
  1985. "execution_count": 76,
  1986. "metadata": {},
  1987. "outputs": [
  1988. {
  1989. "data": {
  1990. "text/plain": [
  1991. "array([-3, -2, -1, 0, 1, 2])"
  1992. ]
  1993. },
  1994. "execution_count": 76,
  1995. "metadata": {},
  1996. "output_type": "execute_result"
  1997. }
  1998. ],
  1999. "source": [
  2000. "v2 = arange(-3,3)\n",
  2001. "v2"
  2002. ]
  2003. },
  2004. {
  2005. "cell_type": "code",
  2006. "execution_count": 77,
  2007. "metadata": {},
  2008. "outputs": [
  2009. {
  2010. "data": {
  2011. "text/plain": [
  2012. "array([-2, 0, 2])"
  2013. ]
  2014. },
  2015. "execution_count": 77,
  2016. "metadata": {},
  2017. "output_type": "execute_result"
  2018. }
  2019. ],
  2020. "source": [
  2021. "row_indices = [1, 3, 5]\n",
  2022. "v2[row_indices] # 花式索引"
  2023. ]
  2024. },
  2025. {
  2026. "cell_type": "code",
  2027. "execution_count": 78,
  2028. "metadata": {},
  2029. "outputs": [
  2030. {
  2031. "data": {
  2032. "text/plain": [
  2033. "array([-2, 0, 2])"
  2034. ]
  2035. },
  2036. "execution_count": 78,
  2037. "metadata": {},
  2038. "output_type": "execute_result"
  2039. }
  2040. ],
  2041. "source": [
  2042. "v2.take(row_indices)"
  2043. ]
  2044. },
  2045. {
  2046. "cell_type": "markdown",
  2047. "metadata": {},
  2048. "source": [
  2049. "但是`take`也作用在列表和其他的物体上:"
  2050. ]
  2051. },
  2052. {
  2053. "cell_type": "code",
  2054. "execution_count": 79,
  2055. "metadata": {},
  2056. "outputs": [
  2057. {
  2058. "data": {
  2059. "text/plain": [
  2060. "array([-2, 0, 2])"
  2061. ]
  2062. },
  2063. "execution_count": 79,
  2064. "metadata": {},
  2065. "output_type": "execute_result"
  2066. }
  2067. ],
  2068. "source": [
  2069. "take([-3, -2, -1, 0, 1, 2], row_indices)"
  2070. ]
  2071. },
  2072. {
  2073. "cell_type": "markdown",
  2074. "metadata": {},
  2075. "source": [
  2076. "### 6.4 choose"
  2077. ]
  2078. },
  2079. {
  2080. "cell_type": "markdown",
  2081. "metadata": {},
  2082. "source": [
  2083. "通过从几个数组中选择元素来构造一个数组:"
  2084. ]
  2085. },
  2086. {
  2087. "cell_type": "code",
  2088. "execution_count": 49,
  2089. "metadata": {},
  2090. "outputs": [
  2091. {
  2092. "data": {
  2093. "text/plain": [
  2094. "array([ 5, -2, 5, -2])"
  2095. ]
  2096. },
  2097. "execution_count": 49,
  2098. "metadata": {},
  2099. "output_type": "execute_result"
  2100. }
  2101. ],
  2102. "source": [
  2103. "which = [1, 0, 1, 0]\n",
  2104. "choices = [[-2,-2,-2,-2], [5,5,5,5]]\n",
  2105. "\n",
  2106. "np.choose(which, choices)"
  2107. ]
  2108. },
  2109. {
  2110. "cell_type": "markdown",
  2111. "metadata": {},
  2112. "source": [
  2113. "## 7. 线性代数"
  2114. ]
  2115. },
  2116. {
  2117. "cell_type": "markdown",
  2118. "metadata": {},
  2119. "source": [
  2120. "向量化代码是使用Python/Numpy编写高效数值计算的关键。这意味着尽可能多的程序应该用矩阵和向量运算来表示,比如矩阵-矩阵乘法。"
  2121. ]
  2122. },
  2123. {
  2124. "cell_type": "markdown",
  2125. "metadata": {},
  2126. "source": [
  2127. "### 7.1 Scalar-array 操作"
  2128. ]
  2129. },
  2130. {
  2131. "cell_type": "markdown",
  2132. "metadata": {},
  2133. "source": [
  2134. "我们可以使用常用的算术运算符来对标量数组进行乘、加、减和除运算。"
  2135. ]
  2136. },
  2137. {
  2138. "cell_type": "code",
  2139. "execution_count": 93,
  2140. "metadata": {},
  2141. "outputs": [],
  2142. "source": [
  2143. "v1 = np.arange(0, 5)"
  2144. ]
  2145. },
  2146. {
  2147. "cell_type": "code",
  2148. "execution_count": 94,
  2149. "metadata": {},
  2150. "outputs": [
  2151. {
  2152. "data": {
  2153. "text/plain": [
  2154. "array([0, 2, 4, 6, 8])"
  2155. ]
  2156. },
  2157. "execution_count": 94,
  2158. "metadata": {},
  2159. "output_type": "execute_result"
  2160. }
  2161. ],
  2162. "source": [
  2163. "v1 * 2"
  2164. ]
  2165. },
  2166. {
  2167. "cell_type": "code",
  2168. "execution_count": 95,
  2169. "metadata": {},
  2170. "outputs": [
  2171. {
  2172. "data": {
  2173. "text/plain": [
  2174. "array([2, 3, 4, 5, 6])"
  2175. ]
  2176. },
  2177. "execution_count": 95,
  2178. "metadata": {},
  2179. "output_type": "execute_result"
  2180. }
  2181. ],
  2182. "source": [
  2183. "v1 + 2"
  2184. ]
  2185. },
  2186. {
  2187. "cell_type": "code",
  2188. "execution_count": 96,
  2189. "metadata": {},
  2190. "outputs": [
  2191. {
  2192. "data": {
  2193. "text/plain": [
  2194. "(array([[ 0, 2, 4, 6, 8],\n",
  2195. " [20, 22, 24, 26, 28],\n",
  2196. " [40, 42, 44, 46, 48],\n",
  2197. " [60, 62, 64, 66, 68],\n",
  2198. " [80, 82, 84, 86, 88]]), array([[ 2, 3, 4, 5, 6],\n",
  2199. " [12, 13, 14, 15, 16],\n",
  2200. " [22, 23, 24, 25, 26],\n",
  2201. " [32, 33, 34, 35, 36],\n",
  2202. " [42, 43, 44, 45, 46]]))"
  2203. ]
  2204. },
  2205. "execution_count": 96,
  2206. "metadata": {},
  2207. "output_type": "execute_result"
  2208. }
  2209. ],
  2210. "source": [
  2211. "A * 2, A + 2"
  2212. ]
  2213. },
  2214. {
  2215. "cell_type": "markdown",
  2216. "metadata": {},
  2217. "source": [
  2218. "### 7.2 数组间的元素操作"
  2219. ]
  2220. },
  2221. {
  2222. "cell_type": "markdown",
  2223. "metadata": {},
  2224. "source": [
  2225. "当我们对数组进行加法、减法、乘法和除法时,默认的行为是**element-wise**操作:"
  2226. ]
  2227. },
  2228. {
  2229. "cell_type": "code",
  2230. "execution_count": 97,
  2231. "metadata": {},
  2232. "outputs": [
  2233. {
  2234. "data": {
  2235. "text/plain": [
  2236. "array([[0.41002411, 0.52156709, 0.77687362],\n",
  2237. " [0.86406459, 0.00587552, 0.04683701]])"
  2238. ]
  2239. },
  2240. "execution_count": 97,
  2241. "metadata": {},
  2242. "output_type": "execute_result"
  2243. }
  2244. ],
  2245. "source": [
  2246. "A = np.random.rand(2, 3)\n",
  2247. "\n",
  2248. "A * A # element-wise 乘法"
  2249. ]
  2250. },
  2251. {
  2252. "cell_type": "code",
  2253. "execution_count": 98,
  2254. "metadata": {},
  2255. "outputs": [
  2256. {
  2257. "data": {
  2258. "text/plain": [
  2259. "array([ 0, 1, 4, 9, 16])"
  2260. ]
  2261. },
  2262. "execution_count": 98,
  2263. "metadata": {},
  2264. "output_type": "execute_result"
  2265. }
  2266. ],
  2267. "source": [
  2268. "v1 * v1"
  2269. ]
  2270. },
  2271. {
  2272. "cell_type": "markdown",
  2273. "metadata": {},
  2274. "source": [
  2275. "如果我们用兼容的形状进行数组的乘法,我们会得到每一行的对位相乘结果:"
  2276. ]
  2277. },
  2278. {
  2279. "cell_type": "code",
  2280. "execution_count": 99,
  2281. "metadata": {},
  2282. "outputs": [
  2283. {
  2284. "data": {
  2285. "text/plain": [
  2286. "((2, 3), (5,))"
  2287. ]
  2288. },
  2289. "execution_count": 99,
  2290. "metadata": {},
  2291. "output_type": "execute_result"
  2292. }
  2293. ],
  2294. "source": [
  2295. "A.shape, v1.shape"
  2296. ]
  2297. },
  2298. {
  2299. "cell_type": "code",
  2300. "execution_count": 100,
  2301. "metadata": {},
  2302. "outputs": [
  2303. {
  2304. "ename": "ValueError",
  2305. "evalue": "operands could not be broadcast together with shapes (2,3) (5,) ",
  2306. "output_type": "error",
  2307. "traceback": [
  2308. "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
  2309. "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
  2310. "\u001b[0;32m<ipython-input-100-1af134c5c5d0>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mA\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0mv1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
  2311. "\u001b[0;31mValueError\u001b[0m: operands could not be broadcast together with shapes (2,3) (5,) "
  2312. ]
  2313. }
  2314. ],
  2315. "source": [
  2316. "A * v1"
  2317. ]
  2318. },
  2319. {
  2320. "cell_type": "markdown",
  2321. "metadata": {},
  2322. "source": [
  2323. "### 7.4 矩阵代数"
  2324. ]
  2325. },
  2326. {
  2327. "cell_type": "markdown",
  2328. "metadata": {},
  2329. "source": [
  2330. "那么矩阵的乘法呢?有两种方法。我们可以使用点函数,它对两个参数应用矩阵-矩阵、矩阵-向量或内向量乘法"
  2331. ]
  2332. },
  2333. {
  2334. "cell_type": "code",
  2335. "execution_count": 102,
  2336. "metadata": {},
  2337. "outputs": [
  2338. {
  2339. "data": {
  2340. "text/plain": [
  2341. "array([[0.3767892 , 1.47079714, 0.31117826, 1.29726746, 0.51486767],\n",
  2342. " [0.25604237, 0.97247777, 0.34479677, 0.93969314, 0.3976715 ],\n",
  2343. " [0.81557228, 1.22841789, 0.86636095, 0.93499185, 0.28560187],\n",
  2344. " [0.52515694, 1.56792282, 1.1443364 , 1.84965072, 0.74141231],\n",
  2345. " [0.78004097, 1.51298694, 1.22023006, 1.42991218, 0.71648303]])"
  2346. ]
  2347. },
  2348. "execution_count": 102,
  2349. "metadata": {},
  2350. "output_type": "execute_result"
  2351. }
  2352. ],
  2353. "source": [
  2354. "A = np.random.rand(5, 5)\n",
  2355. "v = np.random.rand(5, 1)\n",
  2356. "\n",
  2357. "np.dot(A, A)"
  2358. ]
  2359. },
  2360. {
  2361. "cell_type": "code",
  2362. "execution_count": 107,
  2363. "metadata": {},
  2364. "outputs": [
  2365. {
  2366. "data": {
  2367. "text/plain": [
  2368. "array([3.03824466, 2.65209134, 2.94637897, 6.50153897, 5.54270391])"
  2369. ]
  2370. },
  2371. "execution_count": 107,
  2372. "metadata": {},
  2373. "output_type": "execute_result"
  2374. }
  2375. ],
  2376. "source": [
  2377. "np.dot(A, v1)"
  2378. ]
  2379. },
  2380. {
  2381. "cell_type": "code",
  2382. "execution_count": 108,
  2383. "metadata": {},
  2384. "outputs": [
  2385. {
  2386. "data": {
  2387. "text/plain": [
  2388. "30"
  2389. ]
  2390. },
  2391. "execution_count": 108,
  2392. "metadata": {},
  2393. "output_type": "execute_result"
  2394. }
  2395. ],
  2396. "source": [
  2397. "np.dot(v1, v1)"
  2398. ]
  2399. },
  2400. {
  2401. "cell_type": "markdown",
  2402. "metadata": {},
  2403. "source": [
  2404. "另外,我们可以将数组对象投到`matrix`类型上。这将改变标准算术运算符`+, -, *` 的行为,以使用矩阵代数。"
  2405. ]
  2406. },
  2407. {
  2408. "cell_type": "code",
  2409. "execution_count": 111,
  2410. "metadata": {},
  2411. "outputs": [],
  2412. "source": [
  2413. "M = np.matrix(A)\n",
  2414. "v = np.matrix(v1).T # make it a column vector"
  2415. ]
  2416. },
  2417. {
  2418. "cell_type": "code",
  2419. "execution_count": 112,
  2420. "metadata": {},
  2421. "outputs": [
  2422. {
  2423. "data": {
  2424. "text/plain": [
  2425. "matrix([[0],\n",
  2426. " [1],\n",
  2427. " [2],\n",
  2428. " [3],\n",
  2429. " [4]])"
  2430. ]
  2431. },
  2432. "execution_count": 112,
  2433. "metadata": {},
  2434. "output_type": "execute_result"
  2435. }
  2436. ],
  2437. "source": [
  2438. "v"
  2439. ]
  2440. },
  2441. {
  2442. "cell_type": "code",
  2443. "execution_count": 113,
  2444. "metadata": {},
  2445. "outputs": [
  2446. {
  2447. "data": {
  2448. "text/plain": [
  2449. "matrix([[0.3767892 , 1.47079714, 0.31117826, 1.29726746, 0.51486767],\n",
  2450. " [0.25604237, 0.97247777, 0.34479677, 0.93969314, 0.3976715 ],\n",
  2451. " [0.81557228, 1.22841789, 0.86636095, 0.93499185, 0.28560187],\n",
  2452. " [0.52515694, 1.56792282, 1.1443364 , 1.84965072, 0.74141231],\n",
  2453. " [0.78004097, 1.51298694, 1.22023006, 1.42991218, 0.71648303]])"
  2454. ]
  2455. },
  2456. "execution_count": 113,
  2457. "metadata": {},
  2458. "output_type": "execute_result"
  2459. }
  2460. ],
  2461. "source": [
  2462. "M * M"
  2463. ]
  2464. },
  2465. {
  2466. "cell_type": "code",
  2467. "execution_count": 114,
  2468. "metadata": {},
  2469. "outputs": [
  2470. {
  2471. "data": {
  2472. "text/plain": [
  2473. "matrix([[3.03824466],\n",
  2474. " [2.65209134],\n",
  2475. " [2.94637897],\n",
  2476. " [6.50153897],\n",
  2477. " [5.54270391]])"
  2478. ]
  2479. },
  2480. "execution_count": 114,
  2481. "metadata": {},
  2482. "output_type": "execute_result"
  2483. }
  2484. ],
  2485. "source": [
  2486. "M * v"
  2487. ]
  2488. },
  2489. {
  2490. "cell_type": "code",
  2491. "execution_count": 117,
  2492. "metadata": {},
  2493. "outputs": [
  2494. {
  2495. "data": {
  2496. "text/plain": [
  2497. "matrix([[30]])"
  2498. ]
  2499. },
  2500. "execution_count": 117,
  2501. "metadata": {},
  2502. "output_type": "execute_result"
  2503. }
  2504. ],
  2505. "source": [
  2506. "# 內积\n",
  2507. "v.T * v"
  2508. ]
  2509. },
  2510. {
  2511. "cell_type": "code",
  2512. "execution_count": 118,
  2513. "metadata": {},
  2514. "outputs": [
  2515. {
  2516. "data": {
  2517. "text/plain": [
  2518. "matrix([[3.03824466],\n",
  2519. " [3.65209134],\n",
  2520. " [4.94637897],\n",
  2521. " [9.50153897],\n",
  2522. " [9.54270391]])"
  2523. ]
  2524. },
  2525. "execution_count": 118,
  2526. "metadata": {},
  2527. "output_type": "execute_result"
  2528. }
  2529. ],
  2530. "source": [
  2531. "# 对于矩阵对象,适用标准的矩阵代数\n",
  2532. "v + M*v"
  2533. ]
  2534. },
  2535. {
  2536. "cell_type": "markdown",
  2537. "metadata": {},
  2538. "source": [
  2539. "如果我们尝试用不相配的矩阵形状加,减或者乘我们会得到错误:"
  2540. ]
  2541. },
  2542. {
  2543. "cell_type": "code",
  2544. "execution_count": 125,
  2545. "metadata": {},
  2546. "outputs": [],
  2547. "source": [
  2548. "v = np.matrix([1,2,3,4,5,6]).T"
  2549. ]
  2550. },
  2551. {
  2552. "cell_type": "code",
  2553. "execution_count": 123,
  2554. "metadata": {},
  2555. "outputs": [
  2556. {
  2557. "data": {
  2558. "text/plain": [
  2559. "((5, 5), (5, 1))"
  2560. ]
  2561. },
  2562. "execution_count": 123,
  2563. "metadata": {},
  2564. "output_type": "execute_result"
  2565. }
  2566. ],
  2567. "source": [
  2568. "np.shape(M), np.shape(v)"
  2569. ]
  2570. },
  2571. {
  2572. "cell_type": "code",
  2573. "execution_count": 124,
  2574. "metadata": {},
  2575. "outputs": [
  2576. {
  2577. "data": {
  2578. "text/plain": [
  2579. "matrix([[5.06458489],\n",
  2580. " [4.08471675],\n",
  2581. " [4.990684 ],\n",
  2582. " [9.17423165],\n",
  2583. " [8.08502244]])"
  2584. ]
  2585. },
  2586. "execution_count": 124,
  2587. "metadata": {},
  2588. "output_type": "execute_result"
  2589. }
  2590. ],
  2591. "source": [
  2592. "M * v"
  2593. ]
  2594. },
  2595. {
  2596. "cell_type": "markdown",
  2597. "metadata": {},
  2598. "source": [
  2599. "同样了解相关的函数:`inner`, `outer`, `cross`, `kron`, `tensordot`。例如用`help(kron)`。"
  2600. ]
  2601. },
  2602. {
  2603. "cell_type": "markdown",
  2604. "metadata": {},
  2605. "source": [
  2606. "### 7.5 数组/矩阵转换"
  2607. ]
  2608. },
  2609. {
  2610. "cell_type": "markdown",
  2611. "metadata": {},
  2612. "source": [
  2613. "同样我们也用`.T`对矩阵目标`v`进行转置。我们也可以利用`transpose`函数去实现同样的事情。\n",
  2614. "\n",
  2615. "变换矩阵对象的其他数学函数有:"
  2616. ]
  2617. },
  2618. {
  2619. "cell_type": "code",
  2620. "execution_count": 126,
  2621. "metadata": {},
  2622. "outputs": [
  2623. {
  2624. "name": "stdout",
  2625. "output_type": "stream",
  2626. "text": [
  2627. "[[0.04208911 0.65828119 0.21987187 0.10069326]\n",
  2628. " [0.61960112 0.52726045 0.35884175 0.51931613]\n",
  2629. " [0.66708619 0.76886997 0.06792093 0.6548313 ]]\n",
  2630. "[[0.04208911 0.61960112 0.66708619]\n",
  2631. " [0.65828119 0.52726045 0.76886997]\n",
  2632. " [0.21987187 0.35884175 0.06792093]\n",
  2633. " [0.10069326 0.51931613 0.6548313 ]]\n"
  2634. ]
  2635. }
  2636. ],
  2637. "source": [
  2638. "A = np.random.rand(3,4)\n",
  2639. "print(A)\n",
  2640. "print(A.T)"
  2641. ]
  2642. },
  2643. {
  2644. "cell_type": "code",
  2645. "execution_count": 127,
  2646. "metadata": {},
  2647. "outputs": [
  2648. {
  2649. "data": {
  2650. "text/plain": [
  2651. "matrix([[0.+1.j, 0.+2.j],\n",
  2652. " [0.+3.j, 0.+4.j]])"
  2653. ]
  2654. },
  2655. "execution_count": 127,
  2656. "metadata": {},
  2657. "output_type": "execute_result"
  2658. }
  2659. ],
  2660. "source": [
  2661. "C = np.matrix([[1j, 2j], [3j, 4j]])\n",
  2662. "C"
  2663. ]
  2664. },
  2665. {
  2666. "cell_type": "code",
  2667. "execution_count": 128,
  2668. "metadata": {},
  2669. "outputs": [
  2670. {
  2671. "data": {
  2672. "text/plain": [
  2673. "matrix([[0.-1.j, 0.-2.j],\n",
  2674. " [0.-3.j, 0.-4.j]])"
  2675. ]
  2676. },
  2677. "execution_count": 128,
  2678. "metadata": {},
  2679. "output_type": "execute_result"
  2680. }
  2681. ],
  2682. "source": [
  2683. "conjugate(C)"
  2684. ]
  2685. },
  2686. {
  2687. "cell_type": "markdown",
  2688. "metadata": {},
  2689. "source": [
  2690. "厄米共轭:转置+共轭"
  2691. ]
  2692. },
  2693. {
  2694. "cell_type": "code",
  2695. "execution_count": 129,
  2696. "metadata": {},
  2697. "outputs": [
  2698. {
  2699. "data": {
  2700. "text/plain": [
  2701. "matrix([[0.-1.j, 0.-3.j],\n",
  2702. " [0.-2.j, 0.-4.j]])"
  2703. ]
  2704. },
  2705. "execution_count": 129,
  2706. "metadata": {},
  2707. "output_type": "execute_result"
  2708. }
  2709. ],
  2710. "source": [
  2711. "C.H"
  2712. ]
  2713. },
  2714. {
  2715. "cell_type": "markdown",
  2716. "metadata": {},
  2717. "source": [
  2718. "我们可以将复数数组的实部和虚部提取出来并用`real`和`imag`来表示:"
  2719. ]
  2720. },
  2721. {
  2722. "cell_type": "code",
  2723. "execution_count": 130,
  2724. "metadata": {},
  2725. "outputs": [
  2726. {
  2727. "data": {
  2728. "text/plain": [
  2729. "matrix([[0., 0.],\n",
  2730. " [0., 0.]])"
  2731. ]
  2732. },
  2733. "execution_count": 130,
  2734. "metadata": {},
  2735. "output_type": "execute_result"
  2736. }
  2737. ],
  2738. "source": [
  2739. "real(C) # same as: C.real"
  2740. ]
  2741. },
  2742. {
  2743. "cell_type": "code",
  2744. "execution_count": 131,
  2745. "metadata": {},
  2746. "outputs": [
  2747. {
  2748. "data": {
  2749. "text/plain": [
  2750. "matrix([[1., 2.],\n",
  2751. " [3., 4.]])"
  2752. ]
  2753. },
  2754. "execution_count": 131,
  2755. "metadata": {},
  2756. "output_type": "execute_result"
  2757. }
  2758. ],
  2759. "source": [
  2760. "imag(C) # same as: C.imag"
  2761. ]
  2762. },
  2763. {
  2764. "cell_type": "markdown",
  2765. "metadata": {},
  2766. "source": [
  2767. "或者说复数和绝对值"
  2768. ]
  2769. },
  2770. {
  2771. "cell_type": "code",
  2772. "execution_count": 106,
  2773. "metadata": {},
  2774. "outputs": [
  2775. {
  2776. "data": {
  2777. "text/plain": [
  2778. "array([[ 0.78539816, 1.10714872],\n",
  2779. " [ 1.24904577, 1.32581766]])"
  2780. ]
  2781. },
  2782. "execution_count": 106,
  2783. "metadata": {},
  2784. "output_type": "execute_result"
  2785. }
  2786. ],
  2787. "source": [
  2788. "angle(C+1) # heads up MATLAB Users, angle is used instead of arg"
  2789. ]
  2790. },
  2791. {
  2792. "cell_type": "code",
  2793. "execution_count": 107,
  2794. "metadata": {},
  2795. "outputs": [
  2796. {
  2797. "data": {
  2798. "text/plain": [
  2799. "matrix([[ 1., 2.],\n",
  2800. " [ 3., 4.]])"
  2801. ]
  2802. },
  2803. "execution_count": 107,
  2804. "metadata": {},
  2805. "output_type": "execute_result"
  2806. }
  2807. ],
  2808. "source": [
  2809. "abs(C)"
  2810. ]
  2811. },
  2812. {
  2813. "cell_type": "markdown",
  2814. "metadata": {},
  2815. "source": [
  2816. "### 7.6 矩阵计算"
  2817. ]
  2818. },
  2819. {
  2820. "cell_type": "markdown",
  2821. "metadata": {},
  2822. "source": [
  2823. "#### 求逆"
  2824. ]
  2825. },
  2826. {
  2827. "cell_type": "code",
  2828. "execution_count": 132,
  2829. "metadata": {},
  2830. "outputs": [
  2831. {
  2832. "data": {
  2833. "text/plain": [
  2834. "matrix([[0.+2.j , 0.-1.j ],\n",
  2835. " [0.-1.5j, 0.+0.5j]])"
  2836. ]
  2837. },
  2838. "execution_count": 132,
  2839. "metadata": {},
  2840. "output_type": "execute_result"
  2841. }
  2842. ],
  2843. "source": [
  2844. "np.linalg.inv(C) # equivalent to C.I "
  2845. ]
  2846. },
  2847. {
  2848. "cell_type": "code",
  2849. "execution_count": 133,
  2850. "metadata": {},
  2851. "outputs": [
  2852. {
  2853. "data": {
  2854. "text/plain": [
  2855. "matrix([[1.00000000e+00+0.j, 0.00000000e+00+0.j],\n",
  2856. " [2.22044605e-16+0.j, 1.00000000e+00+0.j]])"
  2857. ]
  2858. },
  2859. "execution_count": 133,
  2860. "metadata": {},
  2861. "output_type": "execute_result"
  2862. }
  2863. ],
  2864. "source": [
  2865. "C.I * C"
  2866. ]
  2867. },
  2868. {
  2869. "cell_type": "markdown",
  2870. "metadata": {},
  2871. "source": [
  2872. "#### 行列式"
  2873. ]
  2874. },
  2875. {
  2876. "cell_type": "code",
  2877. "execution_count": 134,
  2878. "metadata": {},
  2879. "outputs": [
  2880. {
  2881. "data": {
  2882. "text/plain": [
  2883. "(2.0000000000000004+0j)"
  2884. ]
  2885. },
  2886. "execution_count": 134,
  2887. "metadata": {},
  2888. "output_type": "execute_result"
  2889. }
  2890. ],
  2891. "source": [
  2892. "np.linalg.det(C)"
  2893. ]
  2894. },
  2895. {
  2896. "cell_type": "code",
  2897. "execution_count": 135,
  2898. "metadata": {},
  2899. "outputs": [
  2900. {
  2901. "data": {
  2902. "text/plain": [
  2903. "(0.49999999999999967+0j)"
  2904. ]
  2905. },
  2906. "execution_count": 135,
  2907. "metadata": {},
  2908. "output_type": "execute_result"
  2909. }
  2910. ],
  2911. "source": [
  2912. "linalg.det(C.I)"
  2913. ]
  2914. },
  2915. {
  2916. "cell_type": "markdown",
  2917. "metadata": {},
  2918. "source": [
  2919. "### 7.7 数据处理"
  2920. ]
  2921. },
  2922. {
  2923. "cell_type": "markdown",
  2924. "metadata": {},
  2925. "source": [
  2926. "通常将数据集存储在Numpy数组中是非常有用的。Numpy提供了许多函数用于计算数组中数据集的统计。\n",
  2927. "\n",
  2928. "例如,让我们从上面使用的斯德哥尔摩温度数据集计算一些属性。"
  2929. ]
  2930. },
  2931. {
  2932. "cell_type": "code",
  2933. "execution_count": 136,
  2934. "metadata": {},
  2935. "outputs": [
  2936. {
  2937. "data": {
  2938. "text/plain": [
  2939. "(77431, 7)"
  2940. ]
  2941. },
  2942. "execution_count": 136,
  2943. "metadata": {},
  2944. "output_type": "execute_result"
  2945. }
  2946. ],
  2947. "source": [
  2948. "# 提醒一下,温度数据集存储在数据变量中:\n",
  2949. "np.shape(data)"
  2950. ]
  2951. },
  2952. {
  2953. "cell_type": "markdown",
  2954. "metadata": {},
  2955. "source": [
  2956. "#### mean"
  2957. ]
  2958. },
  2959. {
  2960. "cell_type": "code",
  2961. "execution_count": 88,
  2962. "metadata": {},
  2963. "outputs": [
  2964. {
  2965. "name": "stdout",
  2966. "output_type": "stream",
  2967. "text": [
  2968. "(77431, 7)\n"
  2969. ]
  2970. },
  2971. {
  2972. "data": {
  2973. "text/plain": [
  2974. "6.197109684751585"
  2975. ]
  2976. },
  2977. "execution_count": 88,
  2978. "metadata": {},
  2979. "output_type": "execute_result"
  2980. }
  2981. ],
  2982. "source": [
  2983. "# 温度数据在第三列中\n",
  2984. "print(data.shape)\n",
  2985. "np.mean(data[:,3])"
  2986. ]
  2987. },
  2988. {
  2989. "cell_type": "code",
  2990. "execution_count": 137,
  2991. "metadata": {},
  2992. "outputs": [
  2993. {
  2994. "data": {
  2995. "text/plain": [
  2996. "0.4764047026464162"
  2997. ]
  2998. },
  2999. "execution_count": 137,
  3000. "metadata": {},
  3001. "output_type": "execute_result"
  3002. }
  3003. ],
  3004. "source": [
  3005. "A = np.random.rand(4, 3)\n",
  3006. "np.mean(A)"
  3007. ]
  3008. },
  3009. {
  3010. "cell_type": "markdown",
  3011. "metadata": {},
  3012. "source": [
  3013. "在过去的200年里,斯德哥尔摩每天的平均气温大约是6.2 C。"
  3014. ]
  3015. },
  3016. {
  3017. "cell_type": "markdown",
  3018. "metadata": {},
  3019. "source": [
  3020. "#### 标准差和方差"
  3021. ]
  3022. },
  3023. {
  3024. "cell_type": "code",
  3025. "execution_count": 138,
  3026. "metadata": {},
  3027. "outputs": [
  3028. {
  3029. "data": {
  3030. "text/plain": [
  3031. "(8.282271621340573, 68.59602320966341)"
  3032. ]
  3033. },
  3034. "execution_count": 138,
  3035. "metadata": {},
  3036. "output_type": "execute_result"
  3037. }
  3038. ],
  3039. "source": [
  3040. "np.std(data[:,3]), np.var(data[:,3])"
  3041. ]
  3042. },
  3043. {
  3044. "cell_type": "markdown",
  3045. "metadata": {},
  3046. "source": [
  3047. "#### 最小值和最大值"
  3048. ]
  3049. },
  3050. {
  3051. "cell_type": "code",
  3052. "execution_count": 139,
  3053. "metadata": {},
  3054. "outputs": [
  3055. {
  3056. "data": {
  3057. "text/plain": [
  3058. "-25.8"
  3059. ]
  3060. },
  3061. "execution_count": 139,
  3062. "metadata": {},
  3063. "output_type": "execute_result"
  3064. }
  3065. ],
  3066. "source": [
  3067. "# 最低日平均温度\n",
  3068. "data[:,3].min()"
  3069. ]
  3070. },
  3071. {
  3072. "cell_type": "code",
  3073. "execution_count": 140,
  3074. "metadata": {},
  3075. "outputs": [
  3076. {
  3077. "data": {
  3078. "text/plain": [
  3079. "28.3"
  3080. ]
  3081. },
  3082. "execution_count": 140,
  3083. "metadata": {},
  3084. "output_type": "execute_result"
  3085. }
  3086. ],
  3087. "source": [
  3088. "# 最高日平均温度\n",
  3089. "data[:,3].max()"
  3090. ]
  3091. },
  3092. {
  3093. "cell_type": "markdown",
  3094. "metadata": {},
  3095. "source": [
  3096. "#### sum, prod, and trace"
  3097. ]
  3098. },
  3099. {
  3100. "cell_type": "code",
  3101. "execution_count": 141,
  3102. "metadata": {},
  3103. "outputs": [
  3104. {
  3105. "data": {
  3106. "text/plain": [
  3107. "array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])"
  3108. ]
  3109. },
  3110. "execution_count": 141,
  3111. "metadata": {},
  3112. "output_type": "execute_result"
  3113. }
  3114. ],
  3115. "source": [
  3116. "d = np.arange(0, 10)\n",
  3117. "d"
  3118. ]
  3119. },
  3120. {
  3121. "cell_type": "code",
  3122. "execution_count": 142,
  3123. "metadata": {},
  3124. "outputs": [
  3125. {
  3126. "data": {
  3127. "text/plain": [
  3128. "45"
  3129. ]
  3130. },
  3131. "execution_count": 142,
  3132. "metadata": {},
  3133. "output_type": "execute_result"
  3134. }
  3135. ],
  3136. "source": [
  3137. "# 将所有的元素相加\n",
  3138. "np.sum(d)"
  3139. ]
  3140. },
  3141. {
  3142. "cell_type": "code",
  3143. "execution_count": 143,
  3144. "metadata": {},
  3145. "outputs": [
  3146. {
  3147. "data": {
  3148. "text/plain": [
  3149. "3628800"
  3150. ]
  3151. },
  3152. "execution_count": 143,
  3153. "metadata": {},
  3154. "output_type": "execute_result"
  3155. }
  3156. ],
  3157. "source": [
  3158. "# 全元素积分\n",
  3159. "np.prod(d+1)"
  3160. ]
  3161. },
  3162. {
  3163. "cell_type": "code",
  3164. "execution_count": 144,
  3165. "metadata": {},
  3166. "outputs": [
  3167. {
  3168. "data": {
  3169. "text/plain": [
  3170. "array([ 0, 1, 3, 6, 10, 15, 21, 28, 36, 45])"
  3171. ]
  3172. },
  3173. "execution_count": 144,
  3174. "metadata": {},
  3175. "output_type": "execute_result"
  3176. }
  3177. ],
  3178. "source": [
  3179. "# 累计求和\n",
  3180. "np.cumsum(d)"
  3181. ]
  3182. },
  3183. {
  3184. "cell_type": "code",
  3185. "execution_count": 147,
  3186. "metadata": {},
  3187. "outputs": [
  3188. {
  3189. "data": {
  3190. "text/plain": [
  3191. "array([ 1, 2, 6, 24, 120, 720, 5040,\n",
  3192. " 40320, 362880, 3628800])"
  3193. ]
  3194. },
  3195. "execution_count": 147,
  3196. "metadata": {},
  3197. "output_type": "execute_result"
  3198. }
  3199. ],
  3200. "source": [
  3201. "# 累计成绩\n",
  3202. "np.cumprod(d+1)"
  3203. ]
  3204. },
  3205. {
  3206. "cell_type": "code",
  3207. "execution_count": 148,
  3208. "metadata": {},
  3209. "outputs": [
  3210. {
  3211. "data": {
  3212. "text/plain": [
  3213. "1.04879166276667"
  3214. ]
  3215. },
  3216. "execution_count": 148,
  3217. "metadata": {},
  3218. "output_type": "execute_result"
  3219. }
  3220. ],
  3221. "source": [
  3222. "# 计算对角线元素的和,和diag(A).sum()一样\n",
  3223. "np.trace(A)"
  3224. ]
  3225. },
  3226. {
  3227. "cell_type": "markdown",
  3228. "metadata": {},
  3229. "source": [
  3230. "### 7.8 数组子集的计算"
  3231. ]
  3232. },
  3233. {
  3234. "cell_type": "markdown",
  3235. "metadata": {},
  3236. "source": [
  3237. "我们可以使用索引、花式索引和从数组中提取数据的其他方法(如上所述)来计算数组中的数据子集。\n",
  3238. "\n",
  3239. "例如,让我们回到温度数据集:"
  3240. ]
  3241. },
  3242. {
  3243. "cell_type": "code",
  3244. "execution_count": 149,
  3245. "metadata": {},
  3246. "outputs": [
  3247. {
  3248. "name": "stdout",
  3249. "output_type": "stream",
  3250. "text": [
  3251. "1800 1 1 -6.1 -6.1 -6.1 1\r\n",
  3252. "1800 1 2 -15.4 -15.4 -15.4 1\r\n",
  3253. "1800 1 3 -15.0 -15.0 -15.0 1\r\n"
  3254. ]
  3255. }
  3256. ],
  3257. "source": [
  3258. "!head -n 3 stockholm_td_adj.dat"
  3259. ]
  3260. },
  3261. {
  3262. "cell_type": "markdown",
  3263. "metadata": {},
  3264. "source": [
  3265. "数据集的格式是:年,月,日,日平均气温,低,高,位置。\n",
  3266. "\n",
  3267. "如果我们对某个特定月份的平均温度感兴趣,比如二月,然后我们可以创建一个索引掩码,使用它来选择当月的数据:"
  3268. ]
  3269. },
  3270. {
  3271. "cell_type": "code",
  3272. "execution_count": 99,
  3273. "metadata": {},
  3274. "outputs": [
  3275. {
  3276. "data": {
  3277. "text/plain": [
  3278. "array([ 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.])"
  3279. ]
  3280. },
  3281. "execution_count": 99,
  3282. "metadata": {},
  3283. "output_type": "execute_result"
  3284. }
  3285. ],
  3286. "source": [
  3287. "np.unique(data[:,1]) # 列的值从1到12"
  3288. ]
  3289. },
  3290. {
  3291. "cell_type": "code",
  3292. "execution_count": 150,
  3293. "metadata": {},
  3294. "outputs": [
  3295. {
  3296. "name": "stdout",
  3297. "output_type": "stream",
  3298. "text": [
  3299. "[False False False ... False False False]\n"
  3300. ]
  3301. }
  3302. ],
  3303. "source": [
  3304. "mask_feb = data[:,1] == 2\n",
  3305. "print(mask_feb)"
  3306. ]
  3307. },
  3308. {
  3309. "cell_type": "code",
  3310. "execution_count": 151,
  3311. "metadata": {},
  3312. "outputs": [
  3313. {
  3314. "name": "stdout",
  3315. "output_type": "stream",
  3316. "text": [
  3317. "-3.212109570736596\n",
  3318. "5.090390768766271\n"
  3319. ]
  3320. }
  3321. ],
  3322. "source": [
  3323. "# 温度数据实在第三行\n",
  3324. "print(np.mean(data[mask_feb,3]))\n",
  3325. "print(np.std(data[mask_feb,3]))"
  3326. ]
  3327. },
  3328. {
  3329. "cell_type": "markdown",
  3330. "metadata": {},
  3331. "source": [
  3332. "有了这些工具,我们就有了非常强大的数据处理能力。例如,提取每年每个月的平均气温只需要几行代码:"
  3333. ]
  3334. },
  3335. {
  3336. "cell_type": "code",
  3337. "execution_count": 153,
  3338. "metadata": {},
  3339. "outputs": [
  3340. {
  3341. "data": {
  3342. "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAEKCAYAAAAfGVI8AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4wLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvqOYd8AAAEhtJREFUeJzt3X20ZXVdx/H3JyYTeQiNiQwcL7pYuIgQbRZpWKFGYZhUy8opjcrEInyoVjVZLfAfG1PyYdXSRiGfMRepYTOiRgE9mDooIagE0aBDyEMWkRUGfPvj7NE7E/fezb3n7H3v/b1fa511z/6dfff+7jV37uf+9m/v305VIUlq19eNXYAkaVwGgSQ1ziCQpMYZBJLUOINAkhpnEEhS4wwCSWqcQSBJjTMIJKlxG8YuoI/DDz+85ubmxi5DktaUq6666s6q2rjUemsiCObm5ti1a9fYZUjSmpLk5j7reWpIkhpnEEhS4wwCSWqcQSBJjTMIJKlxBoEkNc4gkKTGGQSS1Lg1cUOZtBbMbd0x9W3u3nb61Lcp7c8egSQ1ziCQpMYZBJLUOINAkhpnEEhS4wwCSWqcQSBJjTMIJKlxBoEkNc4gkKTGGQSS1DjnGpLWmGnPaeR8RrJHIEmNMwgkqXEGgSQ1ziCQpMYZBJLUOINAkho3syBIcmGS25NcO6/tvCS3JLm6e/3grPYvSepnlj2CtwCnPUD7a6rqxO61c4b7lyT1MLMgqKorgS/NavuSpOkYY4zgnCTXdKeOHj7C/iVJ8wwdBG8AHgucCNwKnL/QiknOSrIrya477rhjqPokqTmDBkFV3VZV91XV/cCbgJMWWXd7VW2uqs0bN24crkhJasygQZDkkfMWfwS4dqF1JUnDmNnso0kuAk4BDk+yBzgXOCXJiUABu4EXzmr/0l7Tnq0TnLFT68vMgqCqtjxA8wWz2p8kaXm8s1iSGmcQSFLjDAJJapxBIEmNMwgkqXEGgSQ1ziCQpMYZBJLUOINAkhpnEEhS4wwCSWqcQSBJjTMIJKlxBoEkNc4gkKTGGQSS1DiDQJIaZxBIUuMMAklqnEEgSY0zCCSpcQaBJDXOIJCkxhkEktQ4g0CSGrdhOd+U5M+r6pnTLkbS6jG3dcdUt7d72+lT3Z6mZ7k9ghdMtQpJ0mh69QiSPAR4HFDA9VV160yrkiQNZskgSHI68Ebgn4AARyd5YVV9cNbFSZJmr0+P4HzgqVV1I0CSxwI7AINAktaBPmMEd+8Ngc5NwN0zqkeSNLA+PYJdSXYC72EyRvBjwCeS/ChAVb13hvVJkmasTxA8FLgN+N5u+Q7gQOCHmASDQSBJa9iSQVBVPztEIZKkcfS5auho4EXA3Pz1q+pZsytLkjSUPqeG3g9cAHwAuH+25UiShtYnCP6nql4/80okSaPoEwSvS3Iu8GHgnr2NVfXJmVUlSRpMnyD4duB5wNP42qmh6pYlSWtcnyD4MeAxVfWVB7PhJBcCzwRur6rju7ZHAH/CZOB5N/DjVfVvD2a7kqTp6nNn8bXAYcvY9luA0/Zr2wpcVlXHAJd1y5KkEfXpERwGfC7JJ9h3jGDRy0er6sokc/s1nwGc0r1/K3A58Bv9SpUkzUKfIDh3ivs7Yt4U1l8EjpjitiVJy9DnzuIrkjwaOKaq/iLJw4ADVrrjqqoktdDnSc4CzgLYtGnTSncnSVrAkmMESV4AXAz8Udd0JJObzJbjtiSP7Lb7SOD2hVasqu1VtbmqNm/cuHGZu5MkLaXPYPEvAScD/wFQVTcA37zM/V0CnNm9PxP4s2VuR5I0JX2C4J75l44m2cDkPoJFJbkI+ChwbJI9SZ4PbANOTXID8H3dsiRpRH0Gi69I8jLgwCSnAmczmXdoUVW1ZYGPnv4g6tM6Nrd1x9S3uXvb6VPfprTe9ekRbGXyDIJPAy8EdlbVb820KknSYPr0CF5UVa8D3rS3IclLujZJ0hrXp0dw5gO0/cyU65AkjWTBHkGSLcBPAkcnuWTeR4cAX5p1YZKkYSx2aujvgFuBw4Hz57XfDVwzy6IkScNZMAiq6mbgZuDJw5UjSRpanzECSdI6ZhBIUuMMAklq3LKCIMl5U65DkjSS5fYIrppqFZKk0SwrCKpqybmGJElrw5JTTCR5/QM03wXsqiqnkZakNa5Pj+ChwInADd3rBOAo4PlJXjvD2iRJA+gz6dwJwMlVdR9AkjcAfw08hcmMpJKkNaxPj+DhwMHzlg8CHtEFwz0zqUqSNJg+PYLfA65OcjkQ4HuAVyQ5CPiLGdYmSRrAkkFQVRck2Qmc1DW9rKr+pXv/azOrTJI0iD5XDX0AeBdwSVV9efYlSZKG1GeM4NXAdwOfSXJxkmcneeiM65IkDaTPqaErmDzA/gDgacALgAuBQ2dcmyRpAH0Gi0lyIPBDwE8ATwTeOsuiJEnD6TNG8B4mA8WXAn8AXFFV98+6MEnSMPr0CC4Atuy9oUyStL70GSP4UJLjkxzHZLqJve1vm2llkqRB9Dk1dC5wCnAcsBN4BvA3gEEgSetAn1NDzwYeD3yqqn42yRHAO2ZblqQWzG3dMfVt7t52+tS3ud71uY/gv7vB4XuTHArcDjxqtmVJkobSp0ewK8lhwJuYPJnsP4GPzrQqSdJg+gwWn929fWOSS4FDq+qa2ZYlSRpKrxvK9qqq3TOqQ5I0kuU+vF6StE4YBJLUuCWDIMn5Sb5tiGIkScPr0yP4LLA9yceS/EKSb5x1UZKk4SwZBFX15qo6GfhpYA64Jsm7kjx11sVJkmav1xhB9yyCx3WvO4F/AH4lybtnWJskaQB95hp6DZNnEVwGvKKqPt599Mok18+yOEnS7PW5j+Aa4LcXeF7xSQ/QJklaQxYMgiRP7N7+A3Bskn0+r6pPVtVdy9lpkt3A3cB9wL1VtXk525EkrdxiPYLzF/msmDy/eCWeWlV3rnAbkqQVWjAIqsqrgiSpAX0fXv9dTC4d/er6K3xCWQEfTlLAH1XV9hVsS5K0An2uGno78Fjgaibn9GHyi3wlQfCUqrolyTcDH0nyuaq6cr/9ngWcBbBp06YV7EqStJg+PYLNwHFVVdPaaVXd0n29Pcn7mFx9dOV+62wHtgNs3rx5avuWJO2rzw1l1wLfMq0dJjkoySF73wPf3+1DkjSCxS4f/QCTU0CHAJ9J8nHgnr2fV9WzlrnPI4D3dZejbgDeVVWXLnNbkqQVWuzU0KtnscOqugl4/Cy2LUl68Ba7fPQKgCSvrKrfmP9ZklcCV8y4NknSAPqMEZz6AG3PmHYhkqRxLDZG8IvA2cBjksx/WP0hwN/NujBJ0jAWGyN4F/BB4HeBrfPa766qL820KknSYBYbI7gLuAvY0j2P4Ihu/YOTHFxVnx+oRknSDPW5s/gc4DzgNuD+rrmAE2ZXliRpKH3uLH4pcGxV/eusi9HqMbd1x1S3t3vb6VPdnqTp6XPV0BeYnCKSJK1DfXoENwGXJ9nBvncW//7MqpIkDaZPEHy+ez2ke0mS1pElg6CqXg6Q5OBu+T9nXZQkaThLjhEkOT7Jp4DrgOuSXJXk22ZfmiRpCH0Gi7cDv1JVj66qRwO/CrxptmVJkobSJwgOqqq/2rtQVZcDB82sIknSoHpdNZTkd4C3d8vPZXIlkSRpHejTI/g5YCPw3u61sWuTJK0Dfa4a+jfgxQPUIkkawWLTUF+y2Deu4FGVkqRVZLEewZOZTC9xEfAxIINUJEka1GJB8C1Mnk62BfhJYAdwUVVdN0RhkqRhLDhYXFX3VdWlVXUm8CTgRiZzDp0zWHWSpJlbdLA4yTcApzPpFcwBrwfeN/uyJElDWWyw+G3A8cBO4OVVde1gVUmSBrNYj+C5wJeBlwAvTr46VhygqurQGdcmSRrAYs8s7nOzmSStetN+4h6sr6fu+ctekhpnEEhS4wwCSWqcQSBJjTMIJKlxBoEkNc4gkKTGGQSS1DiDQJIaZxBIUuMMAklqnEEgSY0zCCSpcQaBJDVulCBIclqS65PcmGTrGDVIkiYGD4IkBwB/CDwDOA7YkuS4oeuQJE2M0SM4Cbixqm6qqq8A7wbOGKEOSRLjBMGRwBfmLe/p2iRJI0hVDbvD5NnAaVX1893y84DvrKpz9lvvLOAsgE2bNn3HzTffvKz9DfWIurW6n/X0uD1pbKvtkZhJrqqqzUutN0aP4BbgUfOWj+ra9lFV26tqc1Vt3rhx42DFSVJrxgiCTwDHJDk6yUOA5wCXjFCHJAnYMPQOq+reJOcAHwIOAC6squuGrkOSNDF4EABU1U5g5xj7liTtyzuLJalxBoEkNc4gkKTGjTJGoOXzun9J02aPQJIaZxBIUuMMAklqnGMEkjQla3UMzx6BJDXOIJCkxhkEktQ4g0CSGmcQSFLjDAJJapxBIEmNMwgkqXEGgSQ1ziCQpMYZBJLUOINAkhpnEEhS4wwCSWqcQSBJjTMIJKlx6/7BNGv1QRGSNBR7BJLUuHXfIxiKPQ9Ja5U9AklqnEEgSY0zCCSpcQaBJDXOIJCkxhkEktQ4g0CSGmcQSFLjDAJJalyqauwalpTkbuD6seuYksOBO8cuYorW0/Gsp2MBj2c1G+pYHl1VG5daaa1MMXF9VW0eu4hpSLJrvRwLrK/jWU/HAh7ParbajsVTQ5LUOINAkhq3VoJg+9gFTNF6OhZYX8ezno4FPJ7VbFUdy5oYLJYkzc5a6RFIkmZkVQdBktOSXJ/kxiRbx65nJZI8KslfJflMkuuSvGTsmlYqyQFJPpXkz8euZaWSHJbk4iSfS/LZJE8eu6aVSPLL3c/ZtUkuSvLQsWvqK8mFSW5Pcu28tkck+UiSG7qvDx+zxgdjgeN5Vfezdk2S9yU5bMwaV20QJDkA+EPgGcBxwJYkx41b1YrcC/xqVR0HPAn4pTV+PAAvAT47dhFT8jrg0qp6HPB41vBxJTkSeDGwuaqOBw4AnjNuVQ/KW4DT9mvbClxWVccAl3XLa8Vb+P/H8xHg+Ko6AfhH4DeHLmq+VRsEwEnAjVV1U1V9BXg3cMbINS1bVd1aVZ/s3t/N5BfNkeNWtXxJjgJOB948di0rleQbge8BLgCoqq9U1b+PW9WKbQAOTLIBeBjwLyPX01tVXQl8ab/mM4C3du/fCvzwoEWtwAMdT1V9uKru7Rb/Hjhq8MLmWc1BcCTwhXnLe1jDvzjnSzIHPAH42LiVrMhrgV8H7h+7kCk4GrgD+OPuVNebkxw0dlHLVVW3AK8GPg/cCtxVVR8et6oVO6Kqbu3efxE4YsxipuzngA+OWcBqDoJ1KcnBwJ8CL62q/xi7nuVI8kzg9qq6auxapmQD8ETgDVX1BODLrK1TD/vozp+fwSTgvhU4KMlzx61qempyqeO6uNwxyW8xOW38zjHrWM1BcAvwqHnLR3Vta1aSr2cSAu+sqveOXc8KnAw8K8luJqfsnpbkHeOWtCJ7gD1VtbeHdjGTYFirvg/456q6o6r+F3gv8F0j17RStyV5JED39faR61mxJD8DPBP4qRr5Ov7VHASfAI5JcnSShzAZ7Lpk5JqWLUmYnIP+bFX9/tj1rERV/WZVHVVVc0z+Xf6yqtbsX5xV9UXgC0mO7ZqeDnxmxJJW6vPAk5I8rPu5ezprePC7cwlwZvf+TODPRqxlxZKcxuTU6rOq6r/GrmfVBkE3kHIO8CEmP8Tvqarrxq1qRU4Gnsfkr+eru9cPjl2UvupFwDuTXAOcCLxi5HqWrevZXAx8Evg0k//nq+pO1sUkuQj4KHBskj1Jng9sA05NcgOTHs+2MWt8MBY4nj8ADgE+0v0ueOOoNXpnsSS1bdX2CCRJwzAIJKlxBoEkNc4gkKTGGQSS1DiDQAKS1Pyb4pJsSHLHcmdW7WYzPXve8inrYZZWrU8GgTTxZeD4JAd2y6eysjvZDwPOXnItaRUwCKSv2clkRlWALcBFez/o5sN/fzd//N8nOaFrP6+bb/7yJDcleXH3LduAx3Y3C72qazt43jMP3tnd9SuNziCQvubdwHO6h7icwL6zw74c+FQ3f/zLgLfN++xxwA8wmTr93G5Oqa3AP1XViVX1a916TwBeyuT5Go9hcre5NDqDQOpU1TXAHJPewM79Pn4K8PZuvb8EvinJod1nO6rqnqq6k8lkaAtNkfzxqtpTVfcDV3f7kka3YewCpFXmEiZz+Z8CfFPP77ln3vv7WPj/Vd/1pEHZI5D2dSHw8qr69H7tfw38FEyuAALuXOJ5EnczmVRMWvX8i0Sap6r2AK9/gI/OAy7sZif9L742JfJC2/nXJH/bPbD8g8COadcqTYuzj0pS4zw1JEmNMwgkqXEGgSQ1ziCQpMYZBJLUOINAkhpnEEhS4wwCSWrc/wHL2ncwPAAPTwAAAABJRU5ErkJggg==\n",
  3343. "text/plain": [
  3344. "<Figure size 432x288 with 1 Axes>"
  3345. ]
  3346. },
  3347. "metadata": {
  3348. "needs_background": "light"
  3349. },
  3350. "output_type": "display_data"
  3351. }
  3352. ],
  3353. "source": [
  3354. "months = np.arange(1,13)\n",
  3355. "monthly_mean = [np.mean(data[data[:,1] == month, 3]) for month in months]\n",
  3356. "\n",
  3357. "fig, ax = plt.subplots()\n",
  3358. "ax.bar(months, monthly_mean)\n",
  3359. "ax.set_xlabel(\"Month\")\n",
  3360. "ax.set_ylabel(\"Monthly avg. temp.\");"
  3361. ]
  3362. },
  3363. {
  3364. "cell_type": "markdown",
  3365. "metadata": {},
  3366. "source": [
  3367. "### 7.9 高维数据的计算"
  3368. ]
  3369. },
  3370. {
  3371. "cell_type": "markdown",
  3372. "metadata": {},
  3373. "source": [
  3374. "当例如`min`, `max`等函数应用在高维数组上时,有时将计算应用于整个数组是有用的,而且很多时候有时只基于行或列。用`axis`参数我们可以决定这个函数应该怎样表现:"
  3375. ]
  3376. },
  3377. {
  3378. "cell_type": "code",
  3379. "execution_count": 157,
  3380. "metadata": {},
  3381. "outputs": [
  3382. {
  3383. "data": {
  3384. "text/plain": [
  3385. "array([[0.99782852, 0.15992805, 0.31262638],\n",
  3386. " [0.51702607, 0.45658172, 0.66789036],\n",
  3387. " [0.77771351, 0.42574723, 0.14011317]])"
  3388. ]
  3389. },
  3390. "execution_count": 157,
  3391. "metadata": {},
  3392. "output_type": "execute_result"
  3393. }
  3394. ],
  3395. "source": [
  3396. "import numpy as np\n",
  3397. "\n",
  3398. "m = np.random.rand(3,3)\n",
  3399. "m"
  3400. ]
  3401. },
  3402. {
  3403. "cell_type": "code",
  3404. "execution_count": 158,
  3405. "metadata": {},
  3406. "outputs": [
  3407. {
  3408. "data": {
  3409. "text/plain": [
  3410. "0.997828517861979"
  3411. ]
  3412. },
  3413. "execution_count": 158,
  3414. "metadata": {},
  3415. "output_type": "execute_result"
  3416. }
  3417. ],
  3418. "source": [
  3419. "# global max\n",
  3420. "m.max()"
  3421. ]
  3422. },
  3423. {
  3424. "cell_type": "code",
  3425. "execution_count": 159,
  3426. "metadata": {},
  3427. "outputs": [
  3428. {
  3429. "data": {
  3430. "text/plain": [
  3431. "array([0.99782852, 0.45658172, 0.66789036])"
  3432. ]
  3433. },
  3434. "execution_count": 159,
  3435. "metadata": {},
  3436. "output_type": "execute_result"
  3437. }
  3438. ],
  3439. "source": [
  3440. "# max in each column\n",
  3441. "m.max(axis=0)"
  3442. ]
  3443. },
  3444. {
  3445. "cell_type": "code",
  3446. "execution_count": 160,
  3447. "metadata": {},
  3448. "outputs": [
  3449. {
  3450. "data": {
  3451. "text/plain": [
  3452. "array([0.99782852, 0.66789036, 0.77771351])"
  3453. ]
  3454. },
  3455. "execution_count": 160,
  3456. "metadata": {},
  3457. "output_type": "execute_result"
  3458. }
  3459. ],
  3460. "source": [
  3461. "# max in each row\n",
  3462. "m.max(axis=1)"
  3463. ]
  3464. },
  3465. {
  3466. "cell_type": "markdown",
  3467. "metadata": {},
  3468. "source": [
  3469. "许多其他的在`array` 和`matrix`类中的函数和方法接受同样(可选的)的关键字参数`axis`"
  3470. ]
  3471. },
  3472. {
  3473. "cell_type": "markdown",
  3474. "metadata": {},
  3475. "source": [
  3476. "## 8. 阵列的重塑、调整大小和堆叠"
  3477. ]
  3478. },
  3479. {
  3480. "cell_type": "markdown",
  3481. "metadata": {},
  3482. "source": [
  3483. "Numpy数组的形状可以被确定而无需复制底层数据,这使得即使对于大型数组也能有较快的操作。"
  3484. ]
  3485. },
  3486. {
  3487. "cell_type": "code",
  3488. "execution_count": 162,
  3489. "metadata": {},
  3490. "outputs": [
  3491. {
  3492. "name": "stdout",
  3493. "output_type": "stream",
  3494. "text": [
  3495. "[[0.97579482 0.78668761 0.61373444]\n",
  3496. " [0.58850244 0.9784108 0.08465447]\n",
  3497. " [0.57262123 0.44795615 0.75564229]\n",
  3498. " [0.36770219 0.34095592 0.16259103]]\n"
  3499. ]
  3500. }
  3501. ],
  3502. "source": [
  3503. "import numpy as np\n",
  3504. "\n",
  3505. "A = np.random.rand(4, 3)\n",
  3506. "print(A)"
  3507. ]
  3508. },
  3509. {
  3510. "cell_type": "code",
  3511. "execution_count": 163,
  3512. "metadata": {},
  3513. "outputs": [
  3514. {
  3515. "name": "stdout",
  3516. "output_type": "stream",
  3517. "text": [
  3518. "4 3\n"
  3519. ]
  3520. }
  3521. ],
  3522. "source": [
  3523. "n, m = A.shape\n",
  3524. "print(n, m)"
  3525. ]
  3526. },
  3527. {
  3528. "cell_type": "code",
  3529. "execution_count": 166,
  3530. "metadata": {},
  3531. "outputs": [
  3532. {
  3533. "data": {
  3534. "text/plain": [
  3535. "array([[0.97579482, 0.78668761, 0.61373444, 0.58850244, 0.9784108 ,\n",
  3536. " 0.08465447, 0.57262123, 0.44795615, 0.75564229, 0.36770219,\n",
  3537. " 0.34095592, 0.16259103]])"
  3538. ]
  3539. },
  3540. "execution_count": 166,
  3541. "metadata": {},
  3542. "output_type": "execute_result"
  3543. }
  3544. ],
  3545. "source": [
  3546. "B = A.reshape((1,n*m))\n",
  3547. "B"
  3548. ]
  3549. },
  3550. {
  3551. "cell_type": "code",
  3552. "execution_count": 167,
  3553. "metadata": {},
  3554. "outputs": [
  3555. {
  3556. "name": "stdout",
  3557. "output_type": "stream",
  3558. "text": [
  3559. "[[0.97579482]\n",
  3560. " [0.78668761]\n",
  3561. " [0.61373444]\n",
  3562. " [0.58850244]\n",
  3563. " [0.9784108 ]\n",
  3564. " [0.08465447]\n",
  3565. " [0.57262123]\n",
  3566. " [0.44795615]\n",
  3567. " [0.75564229]\n",
  3568. " [0.36770219]\n",
  3569. " [0.34095592]\n",
  3570. " [0.16259103]]\n"
  3571. ]
  3572. }
  3573. ],
  3574. "source": [
  3575. "B2 = A.reshape((n*m, 1))\n",
  3576. "print(B2)"
  3577. ]
  3578. },
  3579. {
  3580. "cell_type": "code",
  3581. "execution_count": 168,
  3582. "metadata": {},
  3583. "outputs": [
  3584. {
  3585. "data": {
  3586. "text/plain": [
  3587. "array([[5. , 5. , 5. , 5. , 5. ,\n",
  3588. " 0.08465447, 0.57262123, 0.44795615, 0.75564229, 0.36770219,\n",
  3589. " 0.34095592, 0.16259103]])"
  3590. ]
  3591. },
  3592. "execution_count": 168,
  3593. "metadata": {},
  3594. "output_type": "execute_result"
  3595. }
  3596. ],
  3597. "source": [
  3598. "B[0,0:5] = 5 # modify the array\n",
  3599. "\n",
  3600. "B"
  3601. ]
  3602. },
  3603. {
  3604. "cell_type": "code",
  3605. "execution_count": 169,
  3606. "metadata": {},
  3607. "outputs": [
  3608. {
  3609. "data": {
  3610. "text/plain": [
  3611. "array([[5. , 5. , 5. ],\n",
  3612. " [5. , 5. , 0.08465447],\n",
  3613. " [0.57262123, 0.44795615, 0.75564229],\n",
  3614. " [0.36770219, 0.34095592, 0.16259103]])"
  3615. ]
  3616. },
  3617. "execution_count": 169,
  3618. "metadata": {},
  3619. "output_type": "execute_result"
  3620. }
  3621. ],
  3622. "source": [
  3623. "A # and the original variable is also changed. B is only a different view of the same data"
  3624. ]
  3625. },
  3626. {
  3627. "cell_type": "markdown",
  3628. "metadata": {},
  3629. "source": [
  3630. "We can also use the function `flatten` to make a higher-dimensional array into a vector. But this function create a copy of the data."
  3631. ]
  3632. },
  3633. {
  3634. "cell_type": "code",
  3635. "execution_count": 170,
  3636. "metadata": {},
  3637. "outputs": [
  3638. {
  3639. "data": {
  3640. "text/plain": [
  3641. "array([5. , 5. , 5. , 5. , 5. ,\n",
  3642. " 0.08465447, 0.57262123, 0.44795615, 0.75564229, 0.36770219,\n",
  3643. " 0.34095592, 0.16259103])"
  3644. ]
  3645. },
  3646. "execution_count": 170,
  3647. "metadata": {},
  3648. "output_type": "execute_result"
  3649. }
  3650. ],
  3651. "source": [
  3652. "B = A.flatten()\n",
  3653. "\n",
  3654. "B"
  3655. ]
  3656. },
  3657. {
  3658. "cell_type": "code",
  3659. "execution_count": 171,
  3660. "metadata": {},
  3661. "outputs": [
  3662. {
  3663. "name": "stdout",
  3664. "output_type": "stream",
  3665. "text": [
  3666. "(12,)\n"
  3667. ]
  3668. }
  3669. ],
  3670. "source": [
  3671. "print(B.shape)"
  3672. ]
  3673. },
  3674. {
  3675. "cell_type": "code",
  3676. "execution_count": 172,
  3677. "metadata": {},
  3678. "outputs": [
  3679. {
  3680. "name": "stdout",
  3681. "output_type": "stream",
  3682. "text": [
  3683. "[0.0643267 0.02070895 0.01127191 0.36318507 0.26309744 0.8332378\n",
  3684. " 0.79477743 0.52745619 0.35675021 0.55907373 0.18993756 0.15919449\n",
  3685. " 0.54789401 0.23186893 0.02898541 0.43545343 0.80684175 0.44014057\n",
  3686. " 0.05129167 0.95111801 0.40743132 0.57197596 0.6692788 0.80824496\n",
  3687. " 0.40301441 0.84369196 0.95294593 0.14876807 0.58005171 0.30849079\n",
  3688. " 0.27846197 0.01062528 0.62870079 0.6416306 0.76945123 0.39443503\n",
  3689. " 0.76619764 0.42833327 0.60720341 0.16246792 0.76067082 0.27134944\n",
  3690. " 0.36268568 0.78501742 0.36935191 0.43410334 0.10594888 0.12941728\n",
  3691. " 0.51760718 0.57260509 0.09756568 0.13216908 0.32918105 0.9338644\n",
  3692. " 0.71681907 0.58218819 0.58798528 0.81665138 0.73604797 0.91730101]\n"
  3693. ]
  3694. }
  3695. ],
  3696. "source": [
  3697. "T = np.random.rand(3, 4, 5)\n",
  3698. "T2 = T.flatten()\n",
  3699. "print(T2)"
  3700. ]
  3701. },
  3702. {
  3703. "cell_type": "code",
  3704. "execution_count": 176,
  3705. "metadata": {},
  3706. "outputs": [
  3707. {
  3708. "data": {
  3709. "text/plain": [
  3710. "array([10. , 10. , 10. , 10. , 10. ,\n",
  3711. " 0.08465447, 0.57262123, 0.44795615, 0.75564229, 0.36770219,\n",
  3712. " 0.34095592, 0.16259103])"
  3713. ]
  3714. },
  3715. "execution_count": 176,
  3716. "metadata": {},
  3717. "output_type": "execute_result"
  3718. }
  3719. ],
  3720. "source": [
  3721. "B[0:5] = 10\n",
  3722. "\n",
  3723. "B"
  3724. ]
  3725. },
  3726. {
  3727. "cell_type": "code",
  3728. "execution_count": 177,
  3729. "metadata": {},
  3730. "outputs": [
  3731. {
  3732. "data": {
  3733. "text/plain": [
  3734. "array([[5. , 5. , 5. ],\n",
  3735. " [5. , 5. , 0.08465447],\n",
  3736. " [0.57262123, 0.44795615, 0.75564229],\n",
  3737. " [0.36770219, 0.34095592, 0.16259103]])"
  3738. ]
  3739. },
  3740. "execution_count": 177,
  3741. "metadata": {},
  3742. "output_type": "execute_result"
  3743. }
  3744. ],
  3745. "source": [
  3746. "A # 现在A并没有改变,因为B的数值是A的复制,并不指向同样的值。"
  3747. ]
  3748. },
  3749. {
  3750. "cell_type": "markdown",
  3751. "metadata": {},
  3752. "source": [
  3753. "## 9. 添加新的维度:newaxis"
  3754. ]
  3755. },
  3756. {
  3757. "cell_type": "markdown",
  3758. "metadata": {},
  3759. "source": [
  3760. "有了`newaxis`,我们可以在数组中插入新的维度,例如将一个向量转换为列或行矩阵:"
  3761. ]
  3762. },
  3763. {
  3764. "cell_type": "code",
  3765. "execution_count": 178,
  3766. "metadata": {},
  3767. "outputs": [],
  3768. "source": [
  3769. "v = np.array([1,2,3])"
  3770. ]
  3771. },
  3772. {
  3773. "cell_type": "code",
  3774. "execution_count": 179,
  3775. "metadata": {},
  3776. "outputs": [
  3777. {
  3778. "data": {
  3779. "text/plain": [
  3780. "(3,)"
  3781. ]
  3782. },
  3783. "execution_count": 179,
  3784. "metadata": {},
  3785. "output_type": "execute_result"
  3786. }
  3787. ],
  3788. "source": [
  3789. "np.shape(v)"
  3790. ]
  3791. },
  3792. {
  3793. "cell_type": "code",
  3794. "execution_count": 180,
  3795. "metadata": {},
  3796. "outputs": [
  3797. {
  3798. "name": "stdout",
  3799. "output_type": "stream",
  3800. "text": [
  3801. "[1 2 3]\n"
  3802. ]
  3803. }
  3804. ],
  3805. "source": [
  3806. "print(v)"
  3807. ]
  3808. },
  3809. {
  3810. "cell_type": "code",
  3811. "execution_count": 182,
  3812. "metadata": {},
  3813. "outputs": [
  3814. {
  3815. "name": "stdout",
  3816. "output_type": "stream",
  3817. "text": [
  3818. "(3, 1)\n"
  3819. ]
  3820. }
  3821. ],
  3822. "source": [
  3823. "v2 = v.reshape(3, 1)\n",
  3824. "print(v2.shape)"
  3825. ]
  3826. },
  3827. {
  3828. "cell_type": "code",
  3829. "execution_count": 190,
  3830. "metadata": {},
  3831. "outputs": [
  3832. {
  3833. "name": "stdout",
  3834. "output_type": "stream",
  3835. "text": [
  3836. "(3,)\n",
  3837. "(3, 1)\n"
  3838. ]
  3839. }
  3840. ],
  3841. "source": [
  3842. "# 做一个向量v的列矩阵\n",
  3843. "v2 = v[:, np.newaxis]\n",
  3844. "print(v.shape)\n",
  3845. "print(v2.shape)\n"
  3846. ]
  3847. },
  3848. {
  3849. "cell_type": "code",
  3850. "execution_count": 191,
  3851. "metadata": {},
  3852. "outputs": [
  3853. {
  3854. "data": {
  3855. "text/plain": [
  3856. "(3, 1)"
  3857. ]
  3858. },
  3859. "execution_count": 191,
  3860. "metadata": {},
  3861. "output_type": "execute_result"
  3862. }
  3863. ],
  3864. "source": [
  3865. "# 列矩阵\n",
  3866. "v[:,newaxis].shape"
  3867. ]
  3868. },
  3869. {
  3870. "cell_type": "code",
  3871. "execution_count": 144,
  3872. "metadata": {},
  3873. "outputs": [
  3874. {
  3875. "data": {
  3876. "text/plain": [
  3877. "(1, 3)"
  3878. ]
  3879. },
  3880. "execution_count": 144,
  3881. "metadata": {},
  3882. "output_type": "execute_result"
  3883. }
  3884. ],
  3885. "source": [
  3886. "# 行矩阵\n",
  3887. "v[newaxis,:].shape"
  3888. ]
  3889. },
  3890. {
  3891. "cell_type": "markdown",
  3892. "metadata": {},
  3893. "source": [
  3894. "## 10. 叠加和重复数组"
  3895. ]
  3896. },
  3897. {
  3898. "cell_type": "markdown",
  3899. "metadata": {},
  3900. "source": [
  3901. "利用函数`repeat`, `tile`, `vstack`, `hstack`, 和`concatenate` 我们可以用较小的向量和矩阵来创建更大的向量和矩阵:"
  3902. ]
  3903. },
  3904. {
  3905. "cell_type": "markdown",
  3906. "metadata": {},
  3907. "source": [
  3908. "### 10.1 tile and repeat"
  3909. ]
  3910. },
  3911. {
  3912. "cell_type": "code",
  3913. "execution_count": 192,
  3914. "metadata": {},
  3915. "outputs": [],
  3916. "source": [
  3917. "a = np.array([[1, 2], [3, 4]])"
  3918. ]
  3919. },
  3920. {
  3921. "cell_type": "code",
  3922. "execution_count": 194,
  3923. "metadata": {},
  3924. "outputs": [
  3925. {
  3926. "name": "stdout",
  3927. "output_type": "stream",
  3928. "text": [
  3929. "[[1 2]\n",
  3930. " [3 4]]\n"
  3931. ]
  3932. },
  3933. {
  3934. "data": {
  3935. "text/plain": [
  3936. "array([1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4])"
  3937. ]
  3938. },
  3939. "execution_count": 194,
  3940. "metadata": {},
  3941. "output_type": "execute_result"
  3942. }
  3943. ],
  3944. "source": [
  3945. "print(a)\n",
  3946. "\n",
  3947. "# 重复每一个元素三次\n",
  3948. "np.repeat(a, 3)"
  3949. ]
  3950. },
  3951. {
  3952. "cell_type": "code",
  3953. "execution_count": 195,
  3954. "metadata": {},
  3955. "outputs": [
  3956. {
  3957. "data": {
  3958. "text/plain": [
  3959. "array([[1, 2, 1, 2, 1, 2],\n",
  3960. " [3, 4, 3, 4, 3, 4]])"
  3961. ]
  3962. },
  3963. "execution_count": 195,
  3964. "metadata": {},
  3965. "output_type": "execute_result"
  3966. }
  3967. ],
  3968. "source": [
  3969. "# tile the matrix 3 times \n",
  3970. "np.tile(a, 3)"
  3971. ]
  3972. },
  3973. {
  3974. "cell_type": "code",
  3975. "execution_count": 196,
  3976. "metadata": {},
  3977. "outputs": [
  3978. {
  3979. "data": {
  3980. "text/plain": [
  3981. "array([[1, 2, 1, 2, 1, 2],\n",
  3982. " [3, 4, 3, 4, 3, 4]])"
  3983. ]
  3984. },
  3985. "execution_count": 196,
  3986. "metadata": {},
  3987. "output_type": "execute_result"
  3988. }
  3989. ],
  3990. "source": [
  3991. "# 更好的方案\n",
  3992. "np.tile(a, (1, 3))"
  3993. ]
  3994. },
  3995. {
  3996. "cell_type": "code",
  3997. "execution_count": 34,
  3998. "metadata": {},
  3999. "outputs": [
  4000. {
  4001. "data": {
  4002. "text/plain": [
  4003. "array([[1, 2],\n",
  4004. " [3, 4],\n",
  4005. " [1, 2],\n",
  4006. " [3, 4],\n",
  4007. " [1, 2],\n",
  4008. " [3, 4]])"
  4009. ]
  4010. },
  4011. "execution_count": 34,
  4012. "metadata": {},
  4013. "output_type": "execute_result"
  4014. }
  4015. ],
  4016. "source": [
  4017. "np.tile(a, (3, 1))"
  4018. ]
  4019. },
  4020. {
  4021. "cell_type": "markdown",
  4022. "metadata": {},
  4023. "source": [
  4024. "### 10.2 concatenate"
  4025. ]
  4026. },
  4027. {
  4028. "cell_type": "code",
  4029. "execution_count": 197,
  4030. "metadata": {},
  4031. "outputs": [],
  4032. "source": [
  4033. "b = np.array([[5, 6]])"
  4034. ]
  4035. },
  4036. {
  4037. "cell_type": "code",
  4038. "execution_count": 198,
  4039. "metadata": {},
  4040. "outputs": [
  4041. {
  4042. "data": {
  4043. "text/plain": [
  4044. "array([[1, 2],\n",
  4045. " [3, 4],\n",
  4046. " [5, 6]])"
  4047. ]
  4048. },
  4049. "execution_count": 198,
  4050. "metadata": {},
  4051. "output_type": "execute_result"
  4052. }
  4053. ],
  4054. "source": [
  4055. "np.concatenate((a, b), axis=0)"
  4056. ]
  4057. },
  4058. {
  4059. "cell_type": "code",
  4060. "execution_count": 200,
  4061. "metadata": {},
  4062. "outputs": [
  4063. {
  4064. "data": {
  4065. "text/plain": [
  4066. "array([[1, 2, 5],\n",
  4067. " [3, 4, 6]])"
  4068. ]
  4069. },
  4070. "execution_count": 200,
  4071. "metadata": {},
  4072. "output_type": "execute_result"
  4073. }
  4074. ],
  4075. "source": [
  4076. "np.concatenate((a, b.T), axis=1)"
  4077. ]
  4078. },
  4079. {
  4080. "cell_type": "markdown",
  4081. "metadata": {},
  4082. "source": [
  4083. "### 10.3 hstack and vstack"
  4084. ]
  4085. },
  4086. {
  4087. "cell_type": "code",
  4088. "execution_count": 201,
  4089. "metadata": {},
  4090. "outputs": [
  4091. {
  4092. "data": {
  4093. "text/plain": [
  4094. "array([[1, 2],\n",
  4095. " [3, 4],\n",
  4096. " [5, 6]])"
  4097. ]
  4098. },
  4099. "execution_count": 201,
  4100. "metadata": {},
  4101. "output_type": "execute_result"
  4102. }
  4103. ],
  4104. "source": [
  4105. "np.vstack((a,b))"
  4106. ]
  4107. },
  4108. {
  4109. "cell_type": "code",
  4110. "execution_count": 202,
  4111. "metadata": {},
  4112. "outputs": [
  4113. {
  4114. "data": {
  4115. "text/plain": [
  4116. "array([[1, 2, 5],\n",
  4117. " [3, 4, 6]])"
  4118. ]
  4119. },
  4120. "execution_count": 202,
  4121. "metadata": {},
  4122. "output_type": "execute_result"
  4123. }
  4124. ],
  4125. "source": [
  4126. "np.hstack((a,b.T))"
  4127. ]
  4128. },
  4129. {
  4130. "cell_type": "markdown",
  4131. "metadata": {},
  4132. "source": [
  4133. "## 11. 复制和“深度复制”"
  4134. ]
  4135. },
  4136. {
  4137. "cell_type": "markdown",
  4138. "metadata": {},
  4139. "source": [
  4140. "为了获得高性能,Python中的赋值通常不复制底层对象。例如,在函数之间传递对象时,这一点非常重要,以避免不必要时大量的内存复制(技术术语:通过引用传递)。"
  4141. ]
  4142. },
  4143. {
  4144. "cell_type": "code",
  4145. "execution_count": 203,
  4146. "metadata": {},
  4147. "outputs": [
  4148. {
  4149. "data": {
  4150. "text/plain": [
  4151. "array([[1, 2],\n",
  4152. " [3, 4]])"
  4153. ]
  4154. },
  4155. "execution_count": 203,
  4156. "metadata": {},
  4157. "output_type": "execute_result"
  4158. }
  4159. ],
  4160. "source": [
  4161. "A = np.array([[1, 2], [3, 4]])\n",
  4162. "\n",
  4163. "A"
  4164. ]
  4165. },
  4166. {
  4167. "cell_type": "code",
  4168. "execution_count": 204,
  4169. "metadata": {},
  4170. "outputs": [],
  4171. "source": [
  4172. "# 现在B和A指的是同一个数组数据\n",
  4173. "B = A "
  4174. ]
  4175. },
  4176. {
  4177. "cell_type": "code",
  4178. "execution_count": 205,
  4179. "metadata": {},
  4180. "outputs": [
  4181. {
  4182. "data": {
  4183. "text/plain": [
  4184. "array([[10, 2],\n",
  4185. " [ 3, 4]])"
  4186. ]
  4187. },
  4188. "execution_count": 205,
  4189. "metadata": {},
  4190. "output_type": "execute_result"
  4191. }
  4192. ],
  4193. "source": [
  4194. "# 改变B影响A\n",
  4195. "B[0,0] = 10\n",
  4196. "\n",
  4197. "B"
  4198. ]
  4199. },
  4200. {
  4201. "cell_type": "code",
  4202. "execution_count": 206,
  4203. "metadata": {},
  4204. "outputs": [
  4205. {
  4206. "data": {
  4207. "text/plain": [
  4208. "array([[10, 2],\n",
  4209. " [ 3, 4]])"
  4210. ]
  4211. },
  4212. "execution_count": 206,
  4213. "metadata": {},
  4214. "output_type": "execute_result"
  4215. }
  4216. ],
  4217. "source": [
  4218. "A"
  4219. ]
  4220. },
  4221. {
  4222. "cell_type": "markdown",
  4223. "metadata": {},
  4224. "source": [
  4225. "如果我们想避免这种行为,那么当我们从`A`中复制一个新的完全独立的对象`B`时,我们需要使用函数`copy`来做一个所谓的“深度复制”:"
  4226. ]
  4227. },
  4228. {
  4229. "cell_type": "code",
  4230. "execution_count": 207,
  4231. "metadata": {},
  4232. "outputs": [],
  4233. "source": [
  4234. "B = np.copy(A)"
  4235. ]
  4236. },
  4237. {
  4238. "cell_type": "code",
  4239. "execution_count": 208,
  4240. "metadata": {},
  4241. "outputs": [
  4242. {
  4243. "data": {
  4244. "text/plain": [
  4245. "array([[-5, 2],\n",
  4246. " [ 3, 4]])"
  4247. ]
  4248. },
  4249. "execution_count": 208,
  4250. "metadata": {},
  4251. "output_type": "execute_result"
  4252. }
  4253. ],
  4254. "source": [
  4255. "# 现在如果我们改变B,A不受影响\n",
  4256. "B[0,0] = -5\n",
  4257. "\n",
  4258. "B"
  4259. ]
  4260. },
  4261. {
  4262. "cell_type": "code",
  4263. "execution_count": 209,
  4264. "metadata": {},
  4265. "outputs": [
  4266. {
  4267. "data": {
  4268. "text/plain": [
  4269. "array([[10, 2],\n",
  4270. " [ 3, 4]])"
  4271. ]
  4272. },
  4273. "execution_count": 209,
  4274. "metadata": {},
  4275. "output_type": "execute_result"
  4276. }
  4277. ],
  4278. "source": [
  4279. "A"
  4280. ]
  4281. },
  4282. {
  4283. "cell_type": "markdown",
  4284. "metadata": {},
  4285. "source": [
  4286. "## 12. 遍历数组元素"
  4287. ]
  4288. },
  4289. {
  4290. "cell_type": "markdown",
  4291. "metadata": {},
  4292. "source": [
  4293. "通常,我们希望尽可能避免遍历数组元素(不惜一切代价)。原因是在像Python(或MATLAB)这样的解释语言中,迭代与向量化操作相比真的很慢。\n",
  4294. "\n",
  4295. "然而,有时迭代是不可避免的。对于这种情况,Python的For循环是最方便的遍历数组的方法:"
  4296. ]
  4297. },
  4298. {
  4299. "cell_type": "code",
  4300. "execution_count": 210,
  4301. "metadata": {},
  4302. "outputs": [
  4303. {
  4304. "name": "stdout",
  4305. "output_type": "stream",
  4306. "text": [
  4307. "1\n",
  4308. "2\n",
  4309. "3\n",
  4310. "4\n"
  4311. ]
  4312. }
  4313. ],
  4314. "source": [
  4315. "v = np.array([1,2,3,4])\n",
  4316. "\n",
  4317. "for element in v:\n",
  4318. " print(element)"
  4319. ]
  4320. },
  4321. {
  4322. "cell_type": "code",
  4323. "execution_count": 211,
  4324. "metadata": {},
  4325. "outputs": [
  4326. {
  4327. "name": "stdout",
  4328. "output_type": "stream",
  4329. "text": [
  4330. "row [1 2]\n",
  4331. "1\n",
  4332. "2\n",
  4333. "row [3 4]\n",
  4334. "3\n",
  4335. "4\n"
  4336. ]
  4337. }
  4338. ],
  4339. "source": [
  4340. "M = np.array([[1,2], [3,4]])\n",
  4341. "\n",
  4342. "for row in M:\n",
  4343. " print(\"row\", row)\n",
  4344. " \n",
  4345. " for element in row:\n",
  4346. " print(element)"
  4347. ]
  4348. },
  4349. {
  4350. "cell_type": "markdown",
  4351. "metadata": {},
  4352. "source": [
  4353. "当我们需要去\n",
  4354. "当我们需要遍历一个数组的每个元素并修改它的元素时,使用`enumerate`函数可以方便地在`for`循环中获得元素及其索引:"
  4355. ]
  4356. },
  4357. {
  4358. "cell_type": "code",
  4359. "execution_count": 162,
  4360. "metadata": {},
  4361. "outputs": [
  4362. {
  4363. "name": "stdout",
  4364. "output_type": "stream",
  4365. "text": [
  4366. "('row_idx', 0, 'row', array([1, 2]))\n",
  4367. "('col_idx', 0, 'element', 1)\n",
  4368. "('col_idx', 1, 'element', 2)\n",
  4369. "('row_idx', 1, 'row', array([3, 4]))\n",
  4370. "('col_idx', 0, 'element', 3)\n",
  4371. "('col_idx', 1, 'element', 4)\n"
  4372. ]
  4373. }
  4374. ],
  4375. "source": [
  4376. "for row_idx, row in enumerate(M):\n",
  4377. " print(\"row_idx\", row_idx, \"row\", row)\n",
  4378. " \n",
  4379. " for col_idx, element in enumerate(row):\n",
  4380. " print(\"col_idx\", col_idx, \"element\", element)\n",
  4381. " \n",
  4382. " # 更新矩阵:对每个元素求平方\n",
  4383. " M[row_idx, col_idx] = element ** 2"
  4384. ]
  4385. },
  4386. {
  4387. "cell_type": "code",
  4388. "execution_count": 163,
  4389. "metadata": {},
  4390. "outputs": [
  4391. {
  4392. "data": {
  4393. "text/plain": [
  4394. "array([[ 1, 4],\n",
  4395. " [ 9, 16]])"
  4396. ]
  4397. },
  4398. "execution_count": 163,
  4399. "metadata": {},
  4400. "output_type": "execute_result"
  4401. }
  4402. ],
  4403. "source": [
  4404. "# 现在矩阵里的每一个元素都已经求得平方\n",
  4405. "M"
  4406. ]
  4407. },
  4408. {
  4409. "cell_type": "markdown",
  4410. "metadata": {},
  4411. "source": [
  4412. "## 13. 向量化功能"
  4413. ]
  4414. },
  4415. {
  4416. "cell_type": "markdown",
  4417. "metadata": {},
  4418. "source": [
  4419. "正如前面多次提到的,为了获得良好的性能,我们应该尽量避免对向量和矩阵中的元素进行循环,而应该使用向量化算法。将标量算法转换为向量化算法的第一步是确保我们编写的函数使用向量输入。"
  4420. ]
  4421. },
  4422. {
  4423. "cell_type": "code",
  4424. "execution_count": 213,
  4425. "metadata": {},
  4426. "outputs": [],
  4427. "source": [
  4428. "def Theta(x):\n",
  4429. " \"\"\"\n",
  4430. " Heaviside阶跃函数的标量实现\n",
  4431. " \"\"\"\n",
  4432. " if x >= 0:\n",
  4433. " return 1\n",
  4434. " else:\n",
  4435. " return 0"
  4436. ]
  4437. },
  4438. {
  4439. "cell_type": "code",
  4440. "execution_count": 214,
  4441. "metadata": {},
  4442. "outputs": [
  4443. {
  4444. "ename": "ValueError",
  4445. "evalue": "The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()",
  4446. "output_type": "error",
  4447. "traceback": [
  4448. "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
  4449. "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
  4450. "\u001b[0;32m<ipython-input-214-2cb2062a7e18>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mTheta\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
  4451. "\u001b[0;32m<ipython-input-213-f72d7f42be84>\u001b[0m in \u001b[0;36mTheta\u001b[0;34m(x)\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mScalar\u001b[0m \u001b[0mimplemenation\u001b[0m \u001b[0mof\u001b[0m \u001b[0mthe\u001b[0m \u001b[0mHeaviside\u001b[0m \u001b[0mstep\u001b[0m \u001b[0mfunction\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \"\"\"\n\u001b[0;32m----> 5\u001b[0;31m \u001b[0;32mif\u001b[0m \u001b[0mx\u001b[0m \u001b[0;34m>=\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 6\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
  4452. "\u001b[0;31mValueError\u001b[0m: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()"
  4453. ]
  4454. }
  4455. ],
  4456. "source": [
  4457. "Theta(array([-3,-2,-1,0,1,2,3]))"
  4458. ]
  4459. },
  4460. {
  4461. "cell_type": "markdown",
  4462. "metadata": {},
  4463. "source": [
  4464. "这个操作并不可行因为我们没有写`Theta`函数去解决一个向量输入\n",
  4465. "\n",
  4466. "为了得到向量化的版本,我们可以使用Numpy函数`vectorize`。在许多情况下,它可以自动向量化一个函数:"
  4467. ]
  4468. },
  4469. {
  4470. "cell_type": "code",
  4471. "execution_count": 215,
  4472. "metadata": {},
  4473. "outputs": [],
  4474. "source": [
  4475. "Theta_vec = np.vectorize(Theta)"
  4476. ]
  4477. },
  4478. {
  4479. "cell_type": "code",
  4480. "execution_count": 216,
  4481. "metadata": {},
  4482. "outputs": [
  4483. {
  4484. "data": {
  4485. "text/plain": [
  4486. "array([0, 0, 0, 1, 1, 1, 1])"
  4487. ]
  4488. },
  4489. "execution_count": 216,
  4490. "metadata": {},
  4491. "output_type": "execute_result"
  4492. }
  4493. ],
  4494. "source": [
  4495. "Theta_vec(np.array([-3,-2,-1,0,1,2,3]))"
  4496. ]
  4497. },
  4498. {
  4499. "cell_type": "markdown",
  4500. "metadata": {},
  4501. "source": [
  4502. "我们也可以实现从一开始就接受矢量输入的函数(需要更多的计算,但可能会有更好的性能):"
  4503. ]
  4504. },
  4505. {
  4506. "cell_type": "code",
  4507. "execution_count": 217,
  4508. "metadata": {},
  4509. "outputs": [],
  4510. "source": [
  4511. "def Theta(x):\n",
  4512. " \"\"\"\n",
  4513. " Heaviside阶跃函数的矢量感知实现。\n",
  4514. " \"\"\"\n",
  4515. " return 1 * (x >= 0)"
  4516. ]
  4517. },
  4518. {
  4519. "cell_type": "code",
  4520. "execution_count": 219,
  4521. "metadata": {},
  4522. "outputs": [
  4523. {
  4524. "data": {
  4525. "text/plain": [
  4526. "array([0, 0, 0, 1, 1, 1, 1])"
  4527. ]
  4528. },
  4529. "execution_count": 219,
  4530. "metadata": {},
  4531. "output_type": "execute_result"
  4532. }
  4533. ],
  4534. "source": [
  4535. "Theta(np.array([-3,-2,-1,0,1,2,3]))"
  4536. ]
  4537. },
  4538. {
  4539. "cell_type": "code",
  4540. "execution_count": 221,
  4541. "metadata": {},
  4542. "outputs": [
  4543. {
  4544. "name": "stdout",
  4545. "output_type": "stream",
  4546. "text": [
  4547. "[False False False True True True True]\n"
  4548. ]
  4549. },
  4550. {
  4551. "data": {
  4552. "text/plain": [
  4553. "array([0, 0, 0, 1, 1, 1, 1])"
  4554. ]
  4555. },
  4556. "execution_count": 221,
  4557. "metadata": {},
  4558. "output_type": "execute_result"
  4559. }
  4560. ],
  4561. "source": [
  4562. "a = np.array([-3,-2,-1,0,1,2,3])\n",
  4563. "b = a>=0\n",
  4564. "print(b)\n",
  4565. "b*1"
  4566. ]
  4567. },
  4568. {
  4569. "cell_type": "code",
  4570. "execution_count": 222,
  4571. "metadata": {},
  4572. "outputs": [
  4573. {
  4574. "data": {
  4575. "text/plain": [
  4576. "(0, 1)"
  4577. ]
  4578. },
  4579. "execution_count": 222,
  4580. "metadata": {},
  4581. "output_type": "execute_result"
  4582. }
  4583. ],
  4584. "source": [
  4585. "# 同样适用于标量\n",
  4586. "Theta(-1.2), Theta(2.6)"
  4587. ]
  4588. },
  4589. {
  4590. "cell_type": "markdown",
  4591. "metadata": {},
  4592. "source": [
  4593. "## 14. 在条件中使用数组"
  4594. ]
  4595. },
  4596. {
  4597. "cell_type": "markdown",
  4598. "metadata": {},
  4599. "source": [
  4600. "当在条件中使用数组时,例如`if`语句和其他布尔表达,一个需要用`any`或者`all`,这让数组任何或者所有元素都等于`True`。"
  4601. ]
  4602. },
  4603. {
  4604. "cell_type": "code",
  4605. "execution_count": 223,
  4606. "metadata": {},
  4607. "outputs": [
  4608. {
  4609. "data": {
  4610. "text/plain": [
  4611. "array([[1, 2],\n",
  4612. " [3, 4]])"
  4613. ]
  4614. },
  4615. "execution_count": 223,
  4616. "metadata": {},
  4617. "output_type": "execute_result"
  4618. }
  4619. ],
  4620. "source": [
  4621. "M = np.array([[1, 2], [3, 4]])\n",
  4622. "M"
  4623. ]
  4624. },
  4625. {
  4626. "cell_type": "code",
  4627. "execution_count": 224,
  4628. "metadata": {},
  4629. "outputs": [
  4630. {
  4631. "data": {
  4632. "text/plain": [
  4633. "True"
  4634. ]
  4635. },
  4636. "execution_count": 224,
  4637. "metadata": {},
  4638. "output_type": "execute_result"
  4639. }
  4640. ],
  4641. "source": [
  4642. "(M > 2).any()"
  4643. ]
  4644. },
  4645. {
  4646. "cell_type": "code",
  4647. "execution_count": 225,
  4648. "metadata": {},
  4649. "outputs": [
  4650. {
  4651. "name": "stdout",
  4652. "output_type": "stream",
  4653. "text": [
  4654. "at least one element in M is larger than 2\n"
  4655. ]
  4656. }
  4657. ],
  4658. "source": [
  4659. "if (M > 2).any():\n",
  4660. " print(\"at least one element in M is larger than 2\")\n",
  4661. "else:\n",
  4662. " print(\"no element in M is larger than 2\")"
  4663. ]
  4664. },
  4665. {
  4666. "cell_type": "code",
  4667. "execution_count": 226,
  4668. "metadata": {},
  4669. "outputs": [
  4670. {
  4671. "name": "stdout",
  4672. "output_type": "stream",
  4673. "text": [
  4674. "all elements in M are not larger than 5\n"
  4675. ]
  4676. }
  4677. ],
  4678. "source": [
  4679. "if (M > 5).all():\n",
  4680. " print(\"all elements in M are larger than 5\")\n",
  4681. "else:\n",
  4682. " print(\"all elements in M are not larger than 5\")"
  4683. ]
  4684. },
  4685. {
  4686. "cell_type": "markdown",
  4687. "metadata": {},
  4688. "source": [
  4689. "## 15. 类型转换"
  4690. ]
  4691. },
  4692. {
  4693. "cell_type": "markdown",
  4694. "metadata": {},
  4695. "source": [
  4696. "因为Numpy数组是*静态类型*,数组的类型一旦创建就不会改变。但是我们可以用`astype`函数(参见类似的“asarray”函数)显式地转换一个数组的类型到其他的类型,这总是创建一个新类型的新数组。"
  4697. ]
  4698. },
  4699. {
  4700. "cell_type": "code",
  4701. "execution_count": 227,
  4702. "metadata": {},
  4703. "outputs": [
  4704. {
  4705. "data": {
  4706. "text/plain": [
  4707. "dtype('int64')"
  4708. ]
  4709. },
  4710. "execution_count": 227,
  4711. "metadata": {},
  4712. "output_type": "execute_result"
  4713. }
  4714. ],
  4715. "source": [
  4716. "M.dtype"
  4717. ]
  4718. },
  4719. {
  4720. "cell_type": "code",
  4721. "execution_count": 228,
  4722. "metadata": {},
  4723. "outputs": [
  4724. {
  4725. "data": {
  4726. "text/plain": [
  4727. "array([[1., 2.],\n",
  4728. " [3., 4.]])"
  4729. ]
  4730. },
  4731. "execution_count": 228,
  4732. "metadata": {},
  4733. "output_type": "execute_result"
  4734. }
  4735. ],
  4736. "source": [
  4737. "M2 = M.astype(float)\n",
  4738. "\n",
  4739. "M2"
  4740. ]
  4741. },
  4742. {
  4743. "cell_type": "code",
  4744. "execution_count": 229,
  4745. "metadata": {},
  4746. "outputs": [
  4747. {
  4748. "data": {
  4749. "text/plain": [
  4750. "dtype('float64')"
  4751. ]
  4752. },
  4753. "execution_count": 229,
  4754. "metadata": {},
  4755. "output_type": "execute_result"
  4756. }
  4757. ],
  4758. "source": [
  4759. "M2.dtype"
  4760. ]
  4761. },
  4762. {
  4763. "cell_type": "code",
  4764. "execution_count": 230,
  4765. "metadata": {},
  4766. "outputs": [
  4767. {
  4768. "data": {
  4769. "text/plain": [
  4770. "array([[ True, True],\n",
  4771. " [ True, True]])"
  4772. ]
  4773. },
  4774. "execution_count": 230,
  4775. "metadata": {},
  4776. "output_type": "execute_result"
  4777. }
  4778. ],
  4779. "source": [
  4780. "M3 = M.astype(bool)\n",
  4781. "\n",
  4782. "M3"
  4783. ]
  4784. },
  4785. {
  4786. "cell_type": "markdown",
  4787. "metadata": {},
  4788. "source": [
  4789. "## 进一步的阅读"
  4790. ]
  4791. },
  4792. {
  4793. "cell_type": "markdown",
  4794. "metadata": {},
  4795. "source": [
  4796. "* http://numpy.scipy.org\n",
  4797. "* http://scipy.org/Tentative_NumPy_Tutorial\n",
  4798. "* http://scipy.org/NumPy_for_Matlab_Users - 一个针对MATLAB使用者的Numpy教程."
  4799. ]
  4800. },
  4801. {
  4802. "cell_type": "markdown",
  4803. "metadata": {},
  4804. "source": [
  4805. "## 版本"
  4806. ]
  4807. },
  4808. {
  4809. "cell_type": "code",
  4810. "execution_count": 178,
  4811. "metadata": {},
  4812. "outputs": [
  4813. {
  4814. "data": {
  4815. "application/json": {
  4816. "Software versions": [
  4817. {
  4818. "module": "Python",
  4819. "version": "2.7.10 64bit [GCC 4.2.1 (Apple Inc. build 5577)]"
  4820. },
  4821. {
  4822. "module": "IPython",
  4823. "version": "3.2.1"
  4824. },
  4825. {
  4826. "module": "OS",
  4827. "version": "Darwin 14.1.0 x86_64 i386 64bit"
  4828. },
  4829. {
  4830. "module": "numpy",
  4831. "version": "1.9.2"
  4832. }
  4833. ]
  4834. },
  4835. "text/html": [
  4836. "<table><tr><th>Software</th><th>Version</th></tr><tr><td>Python</td><td>2.7.10 64bit [GCC 4.2.1 (Apple Inc. build 5577)]</td></tr><tr><td>IPython</td><td>3.2.1</td></tr><tr><td>OS</td><td>Darwin 14.1.0 x86_64 i386 64bit</td></tr><tr><td>numpy</td><td>1.9.2</td></tr><tr><td colspan='2'>Sat Aug 15 11:02:09 2015 JST</td></tr></table>"
  4837. ],
  4838. "text/latex": [
  4839. "\\begin{tabular}{|l|l|}\\hline\n",
  4840. "{\\bf Software} & {\\bf Version} \\\\ \\hline\\hline\n",
  4841. "Python & 2.7.10 64bit [GCC 4.2.1 (Apple Inc. build 5577)] \\\\ \\hline\n",
  4842. "IPython & 3.2.1 \\\\ \\hline\n",
  4843. "OS & Darwin 14.1.0 x86\\_64 i386 64bit \\\\ \\hline\n",
  4844. "numpy & 1.9.2 \\\\ \\hline\n",
  4845. "\\hline \\multicolumn{2}{|l|}{Sat Aug 15 11:02:09 2015 JST} \\\\ \\hline\n",
  4846. "\\end{tabular}\n"
  4847. ],
  4848. "text/plain": [
  4849. "Software versions\n",
  4850. "Python 2.7.10 64bit [GCC 4.2.1 (Apple Inc. build 5577)]\n",
  4851. "IPython 3.2.1\n",
  4852. "OS Darwin 14.1.0 x86_64 i386 64bit\n",
  4853. "numpy 1.9.2\n",
  4854. "Sat Aug 15 11:02:09 2015 JST"
  4855. ]
  4856. },
  4857. "execution_count": 178,
  4858. "metadata": {},
  4859. "output_type": "execute_result"
  4860. }
  4861. ],
  4862. "source": [
  4863. "%reload_ext version_information\n",
  4864. "\n",
  4865. "%version_information numpy"
  4866. ]
  4867. }
  4868. ],
  4869. "metadata": {
  4870. "kernelspec": {
  4871. "display_name": "Python 3",
  4872. "language": "python",
  4873. "name": "python3"
  4874. },
  4875. "language_info": {
  4876. "codemirror_mode": {
  4877. "name": "ipython",
  4878. "version": 3
  4879. },
  4880. "file_extension": ".py",
  4881. "mimetype": "text/x-python",
  4882. "name": "python",
  4883. "nbconvert_exporter": "python",
  4884. "pygments_lexer": "ipython3",
  4885. "version": "3.6.8"
  4886. }
  4887. },
  4888. "nbformat": 4,
  4889. "nbformat_minor": 1
  4890. }

机器学习越来越多应用到飞行器、机器人等领域,其目的是利用计算机实现类似人类的智能,从而实现装备的智能化与无人化。本课程旨在引导学生掌握机器学习的基本知识、典型方法与技术,通过具体的应用案例激发学生对该学科的兴趣,鼓励学生能够从人工智能的角度来分析、解决飞行器、机器人所面临的问题和挑战。本课程主要内容包括Python编程基础,机器学习模型,无监督学习、监督学习、深度学习基础知识与实现,并学习如何利用机器学习解决实际问题,从而全面提升自我的《综合能力》。