You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

1-numpy_tutorial.ipynb 164 kB

6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago

  1. {
  2. "cells": [
  3. {
  4. "cell_type": "markdown",
  5. "metadata": {},
  6. "source": [
  7. "# Numpy - 多维数据的数组"
  8. ]
  9. },
  10. {
  11. "cell_type": "markdown",
  12. "metadata": {},
  13. "source": [
  14. "J.R. Johansson (jrjohansson at gmail.com)\n",
  15. "\n",
  16. "最新的[IPython notebook](http://ipython.org/notebook.html)课程可以在[http://github.com/jrjohansson/scientific-python-lectures](http://github.com/jrjohansson/scientific-python-lectures) 找到.\n",
  17. "\n",
  18. "其他有关这个课程的参考书在这里标注出[http://jrjohansson.github.io](http://jrjohansson.github.io).\n"
  19. ]
  20. },
  21. {
  22. "cell_type": "code",
  23. "execution_count": 1,
  24. "metadata": {},
  25. "outputs": [],
  26. "source": [
  27. "# 这一行的作用会在课程4中回答\n",
  28. "%matplotlib inline\n",
  29. "import matplotlib.pyplot as plt"
  30. ]
  31. },
  32. {
  33. "cell_type": "markdown",
  34. "metadata": {},
  35. "source": [
  36. "## 简介"
  37. ]
  38. },
  39. {
  40. "cell_type": "markdown",
  41. "metadata": {},
  42. "source": [
  43. "这个`numpy`包(模块)用在几乎所有使用Python的数值计算中。他是一个为Python提供高性能向量,矩阵和高维数据结构的模块。它是用C和Fortran语言实现的,因此当计算被向量化(用向量和矩阵表示)时,性能非常的好。\n",
  44. "\n",
  45. "为了使用`numpy`模块,你先要向下面的例子一样导入这个模块:"
  46. ]
  47. },
  48. {
  49. "cell_type": "code",
  50. "execution_count": 2,
  51. "metadata": {},
  52. "outputs": [],
  53. "source": [
  54. "from numpy import *\n",
  55. "import numpy as np"
  56. ]
  57. },
  58. {
  59. "cell_type": "markdown",
  60. "metadata": {},
  61. "source": [
  62. "在`numpy`模块中,用于向量,矩阵和高维数据集的术语是*数组*。\n",
  63. "\n"
  64. ]
  65. },
  66. {
  67. "cell_type": "markdown",
  68. "metadata": {},
  69. "source": [
  70. "## 创建`numpy`数组"
  71. ]
  72. },
  73. {
  74. "cell_type": "markdown",
  75. "metadata": {},
  76. "source": [
  77. "有很多种方法去初始化新的numpy数组, 例如从\n",
  78. "\n",
  79. "* Python列表或元组\n",
  80. "* 使用专门用来创建numpy arrays的函数,例如 `arange`, `linspace`等\n",
  81. "* 从文件中读取数据"
  82. ]
  83. },
  84. {
  85. "cell_type": "markdown",
  86. "metadata": {},
  87. "source": [
  88. "### 从列表中"
  89. ]
  90. },
  91. {
  92. "cell_type": "markdown",
  93. "metadata": {},
  94. "source": [
  95. "例如,为了从Python列表创建新的向量和矩阵我们可以用`numpy.array`函数。\n"
  96. ]
  97. },
  98. {
  99. "cell_type": "code",
  100. "execution_count": 3,
  101. "metadata": {},
  102. "outputs": [
  103. {
  104. "data": {
  105. "text/plain": [
  106. "array([1, 2, 3, 4])"
  107. ]
  108. },
  109. "execution_count": 3,
  110. "metadata": {},
  111. "output_type": "execute_result"
  112. }
  113. ],
  114. "source": [
  115. "import numpy as np\n",
  116. "\n",
  117. "# a vector: the argument to the array function is a Python list\n",
  118. "v = np.array([1,2,3,4])\n",
  119. "\n",
  120. "v"
  121. ]
  122. },
  123. {
  124. "cell_type": "code",
  125. "execution_count": 4,
  126. "metadata": {},
  127. "outputs": [
  128. {
  129. "name": "stdout",
  130. "output_type": "stream",
  131. "text": [
  132. "[[1 2]\n",
  133. " [3 4]\n",
  134. " [5 6]]\n",
  135. "(3, 2)\n"
  136. ]
  137. }
  138. ],
  139. "source": [
  140. "# 矩阵:数组函数的参数是一个嵌套的Python列表\n",
  141. "M = array([[1, 2], [3, 4], [5, 6]])\n",
  142. "\n",
  143. "print(M)\n",
  144. "print(M.shape)"
  145. ]
  146. },
  147. {
  148. "cell_type": "markdown",
  149. "metadata": {},
  150. "source": [
  151. "`v`和`M`两个都是属于`numpy`模块提供的`ndarray`类型。"
  152. ]
  153. },
  154. {
  155. "cell_type": "code",
  156. "execution_count": 5,
  157. "metadata": {},
  158. "outputs": [
  159. {
  160. "data": {
  161. "text/plain": [
  162. "(numpy.ndarray, numpy.ndarray)"
  163. ]
  164. },
  165. "execution_count": 5,
  166. "metadata": {},
  167. "output_type": "execute_result"
  168. }
  169. ],
  170. "source": [
  171. "type(v), type(M)"
  172. ]
  173. },
  174. {
  175. "cell_type": "markdown",
  176. "metadata": {},
  177. "source": [
  178. "`v`和`M`之间的区别仅在于他们的形状。我们可以用属性函数`ndarray.shape`得到数组形状的信息。"
  179. ]
  180. },
  181. {
  182. "cell_type": "code",
  183. "execution_count": 6,
  184. "metadata": {},
  185. "outputs": [
  186. {
  187. "data": {
  188. "text/plain": [
  189. "(4,)"
  190. ]
  191. },
  192. "execution_count": 6,
  193. "metadata": {},
  194. "output_type": "execute_result"
  195. }
  196. ],
  197. "source": [
  198. "v.shape"
  199. ]
  200. },
  201. {
  202. "cell_type": "code",
  203. "execution_count": 7,
  204. "metadata": {},
  205. "outputs": [
  206. {
  207. "data": {
  208. "text/plain": [
  209. "(3, 2)"
  210. ]
  211. },
  212. "execution_count": 7,
  213. "metadata": {},
  214. "output_type": "execute_result"
  215. }
  216. ],
  217. "source": [
  218. "M.shape"
  219. ]
  220. },
  221. {
  222. "cell_type": "markdown",
  223. "metadata": {},
  224. "source": [
  225. "通过属性函数`ndarray.size`我们可以得到数组中元素的个数"
  226. ]
  227. },
  228. {
  229. "cell_type": "code",
  230. "execution_count": 8,
  231. "metadata": {},
  232. "outputs": [
  233. {
  234. "data": {
  235. "text/plain": [
  236. "6"
  237. ]
  238. },
  239. "execution_count": 8,
  240. "metadata": {},
  241. "output_type": "execute_result"
  242. }
  243. ],
  244. "source": [
  245. "M.size"
  246. ]
  247. },
  248. {
  249. "cell_type": "markdown",
  250. "metadata": {},
  251. "source": [
  252. "同样,我们可以用函数`numpy.shape`和`numpy.size`"
  253. ]
  254. },
  255. {
  256. "cell_type": "code",
  257. "execution_count": 9,
  258. "metadata": {},
  259. "outputs": [
  260. {
  261. "data": {
  262. "text/plain": [
  263. "(3, 2)"
  264. ]
  265. },
  266. "execution_count": 9,
  267. "metadata": {},
  268. "output_type": "execute_result"
  269. }
  270. ],
  271. "source": [
  272. "np.shape(M)"
  273. ]
  274. },
  275. {
  276. "cell_type": "code",
  277. "execution_count": 10,
  278. "metadata": {},
  279. "outputs": [
  280. {
  281. "data": {
  282. "text/plain": [
  283. "6"
  284. ]
  285. },
  286. "execution_count": 10,
  287. "metadata": {},
  288. "output_type": "execute_result"
  289. }
  290. ],
  291. "source": [
  292. "np.size(M)"
  293. ]
  294. },
  295. {
  296. "cell_type": "markdown",
  297. "metadata": {},
  298. "source": [
  299. "到目前为止`numpy.ndarray`看起来非常像Python列表(或嵌套列表)。为什么不简单地使用Python列表来进行计算,而不是创建一个新的数组类型?\n",
  300. "\n",
  301. "下面有几个原因:\n",
  302. "\n",
  303. "* Python列表非常普遍。它们可以包含任何类型的对象。它们是动态类型的。它们不支持矩阵和点乘等数学函数。由于动态类型的关系,为Python列表实现这类函数的效率不是很高。\n",
  304. "* Numpy数组是**静态类型的**和**同构的**。元素的类型是在创建数组时确定的。\n",
  305. "* Numpy数组是内存高效的。\n",
  306. "* 由于是静态类型,数学函数的快速实现,比如“numpy”数组的乘法和加法可以用编译语言实现(使用C和Fortran).\n",
  307. "\n",
  308. "利用`ndarray`的属性函数`dtype`(数据类型),我们可以看出数组的数据是那种类型。\n"
  309. ]
  310. },
  311. {
  312. "cell_type": "code",
  313. "execution_count": 11,
  314. "metadata": {},
  315. "outputs": [
  316. {
  317. "data": {
  318. "text/plain": [
  319. "dtype('int64')"
  320. ]
  321. },
  322. "execution_count": 11,
  323. "metadata": {},
  324. "output_type": "execute_result"
  325. }
  326. ],
  327. "source": [
  328. "M.dtype"
  329. ]
  330. },
  331. {
  332. "cell_type": "markdown",
  333. "metadata": {},
  334. "source": [
  335. "如果我们试图给一个numpy数组中的元素赋一个错误类型的值,我们会得到一个错误:"
  336. ]
  337. },
  338. {
  339. "cell_type": "code",
  340. "execution_count": 12,
  341. "metadata": {},
  342. "outputs": [
  343. {
  344. "ename": "ValueError",
  345. "evalue": "invalid literal for int() with base 10: 'hello'",
  346. "output_type": "error",
  347. "traceback": [
  348. "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
  349. "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
  350. "\u001b[0;32m<ipython-input-12-e1f336250f69>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mM\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m\"hello\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
  351. "\u001b[0;31mValueError\u001b[0m: invalid literal for int() with base 10: 'hello'"
  352. ]
  353. }
  354. ],
  355. "source": [
  356. "M[0,0] = \"hello\""
  357. ]
  358. },
  359. {
  360. "cell_type": "markdown",
  361. "metadata": {},
  362. "source": [
  363. "如果我们想的话,我们可以利用`dtype`关键字参数显式地定义我们创建的数组数据类型:"
  364. ]
  365. },
  366. {
  367. "cell_type": "code",
  368. "execution_count": 13,
  369. "metadata": {},
  370. "outputs": [
  371. {
  372. "data": {
  373. "text/plain": [
  374. "array([[1.+0.j, 2.+0.j],\n",
  375. " [3.+0.j, 4.+0.j]])"
  376. ]
  377. },
  378. "execution_count": 13,
  379. "metadata": {},
  380. "output_type": "execute_result"
  381. }
  382. ],
  383. "source": [
  384. "M = np.array([[1, 2], [3, 4]], dtype=complex)\n",
  385. "\n",
  386. "M"
  387. ]
  388. },
  389. {
  390. "cell_type": "markdown",
  391. "metadata": {},
  392. "source": [
  393. "常规可以伴随`dtype`使用的数据类型是:`int`, `float`, `complex`, `bool`, `object`等\n",
  394. "\n",
  395. "我们也可以显式地定义数据类型的大小,例如:`int64`, `int16`, `float128`, `complex128`。"
  396. ]
  397. },
  398. {
  399. "cell_type": "markdown",
  400. "metadata": {},
  401. "source": [
  402. "### 使用数组生成函数"
  403. ]
  404. },
  405. {
  406. "cell_type": "markdown",
  407. "metadata": {},
  408. "source": [
  409. "对于较大的数组,使用显式的Python列表人为地初始化数据是不切实际的。除此之外我们可以用`numpy`的很多函数得到不同类型的数组。有一些常用的分别是:"
  410. ]
  411. },
  412. {
  413. "cell_type": "markdown",
  414. "metadata": {},
  415. "source": [
  416. "#### arange"
  417. ]
  418. },
  419. {
  420. "cell_type": "code",
  421. "execution_count": 14,
  422. "metadata": {},
  423. "outputs": [
  424. {
  425. "name": "stdout",
  426. "output_type": "stream",
  427. "text": [
  428. "[0 1 2 3 4 5 6 7 8 9]\n",
  429. "[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]\n"
  430. ]
  431. }
  432. ],
  433. "source": [
  434. "# 创建一个范围\n",
  435. "\n",
  436. "x = np.arange(0, 10, 1) # 参数:start, stop, step: \n",
  437. "y = range(0, 10, 1)\n",
  438. "print(x)\n",
  439. "print(list(y))"
  440. ]
  441. },
  442. {
  443. "cell_type": "code",
  444. "execution_count": 15,
  445. "metadata": {},
  446. "outputs": [
  447. {
  448. "data": {
  449. "text/plain": [
  450. "array([-1.00000000e+00, -9.00000000e-01, -8.00000000e-01, -7.00000000e-01,\n",
  451. " -6.00000000e-01, -5.00000000e-01, -4.00000000e-01, -3.00000000e-01,\n",
  452. " -2.00000000e-01, -1.00000000e-01, -2.22044605e-16, 1.00000000e-01,\n",
  453. " 2.00000000e-01, 3.00000000e-01, 4.00000000e-01, 5.00000000e-01,\n",
  454. " 6.00000000e-01, 7.00000000e-01, 8.00000000e-01, 9.00000000e-01])"
  455. ]
  456. },
  457. "execution_count": 15,
  458. "metadata": {},
  459. "output_type": "execute_result"
  460. }
  461. ],
  462. "source": [
  463. "x = np.arange(-1, 1, 0.1)\n",
  464. "\n",
  465. "x"
  466. ]
  467. },
  468. {
  469. "cell_type": "markdown",
  470. "metadata": {},
  471. "source": [
  472. "#### linspace and logspace"
  473. ]
  474. },
  475. {
  476. "cell_type": "code",
  477. "execution_count": 16,
  478. "metadata": {},
  479. "outputs": [
  480. {
  481. "data": {
  482. "text/plain": [
  483. "array([ 0. , 0.41666667, 0.83333333, 1.25 , 1.66666667,\n",
  484. " 2.08333333, 2.5 , 2.91666667, 3.33333333, 3.75 ,\n",
  485. " 4.16666667, 4.58333333, 5. , 5.41666667, 5.83333333,\n",
  486. " 6.25 , 6.66666667, 7.08333333, 7.5 , 7.91666667,\n",
  487. " 8.33333333, 8.75 , 9.16666667, 9.58333333, 10. ])"
  488. ]
  489. },
  490. "execution_count": 16,
  491. "metadata": {},
  492. "output_type": "execute_result"
  493. }
  494. ],
  495. "source": [
  496. "# 使用linspace两边的端点也被包含进去\n",
  497. "np.linspace(0, 10, 25)"
  498. ]
  499. },
  500. {
  501. "cell_type": "code",
  502. "execution_count": 17,
  503. "metadata": {},
  504. "outputs": [
  505. {
  506. "data": {
  507. "text/plain": [
  508. "array([1.00000000e+00, 3.03773178e+00, 9.22781435e+00, 2.80316249e+01,\n",
  509. " 8.51525577e+01, 2.58670631e+02, 7.85771994e+02, 2.38696456e+03,\n",
  510. " 7.25095809e+03, 2.20264658e+04])"
  511. ]
  512. },
  513. "execution_count": 17,
  514. "metadata": {},
  515. "output_type": "execute_result"
  516. }
  517. ],
  518. "source": [
  519. "np.logspace(0, 10, 10, base=e)"
  520. ]
  521. },
  522. {
  523. "cell_type": "markdown",
  524. "metadata": {},
  525. "source": [
  526. "#### mgrid"
  527. ]
  528. },
  529. {
  530. "cell_type": "code",
  531. "execution_count": 18,
  532. "metadata": {},
  533. "outputs": [],
  534. "source": [
  535. "x, y = np.mgrid[0:5, 0:5] # 和MATLAB中的meshgrid类似"
  536. ]
  537. },
  538. {
  539. "cell_type": "code",
  540. "execution_count": 19,
  541. "metadata": {},
  542. "outputs": [
  543. {
  544. "data": {
  545. "text/plain": [
  546. "array([[0, 0, 0, 0, 0],\n",
  547. " [1, 1, 1, 1, 1],\n",
  548. " [2, 2, 2, 2, 2],\n",
  549. " [3, 3, 3, 3, 3],\n",
  550. " [4, 4, 4, 4, 4]])"
  551. ]
  552. },
  553. "execution_count": 19,
  554. "metadata": {},
  555. "output_type": "execute_result"
  556. }
  557. ],
  558. "source": [
  559. "x"
  560. ]
  561. },
  562. {
  563. "cell_type": "code",
  564. "execution_count": 20,
  565. "metadata": {},
  566. "outputs": [
  567. {
  568. "data": {
  569. "text/plain": [
  570. "array([[0, 1, 2, 3, 4],\n",
  571. " [0, 1, 2, 3, 4],\n",
  572. " [0, 1, 2, 3, 4],\n",
  573. " [0, 1, 2, 3, 4],\n",
  574. " [0, 1, 2, 3, 4]])"
  575. ]
  576. },
  577. "execution_count": 20,
  578. "metadata": {},
  579. "output_type": "execute_result"
  580. }
  581. ],
  582. "source": [
  583. "y"
  584. ]
  585. },
  586. {
  587. "cell_type": "markdown",
  588. "metadata": {},
  589. "source": [
  590. "#### random data"
  591. ]
  592. },
  593. {
  594. "cell_type": "code",
  595. "execution_count": 21,
  596. "metadata": {},
  597. "outputs": [],
  598. "source": [
  599. "from numpy import random"
  600. ]
  601. },
  602. {
  603. "cell_type": "code",
  604. "execution_count": 22,
  605. "metadata": {},
  606. "outputs": [
  607. {
  608. "data": {
  609. "text/plain": [
  610. "array([[0.31850549, 0.64755869, 0.93737096, 0.06141188, 0.17055487],\n",
  611. " [0.95771684, 0.88466718, 0.81119863, 0.95268744, 0.73734857],\n",
  612. " [0.51036326, 0.8779331 , 0.41560197, 0.300393 , 0.42244209],\n",
  613. " [0.50866631, 0.84322931, 0.34459543, 0.47379641, 0.03312725],\n",
  614. " [0.96519922, 0.20557788, 0.38343937, 0.21493144, 0.27541461]])"
  615. ]
  616. },
  617. "execution_count": 22,
  618. "metadata": {},
  619. "output_type": "execute_result"
  620. }
  621. ],
  622. "source": [
  623. "# 均匀随机数在[0,1)区间\n",
  624. "random.rand(5,5)"
  625. ]
  626. },
  627. {
  628. "cell_type": "code",
  629. "execution_count": 23,
  630. "metadata": {},
  631. "outputs": [
  632. {
  633. "data": {
  634. "text/plain": [
  635. "array([[ 1.12204579, 2.90667688, -1.06379302, 1.52801804, 1.34553205],\n",
  636. " [ 2.22610261, -0.18597008, 1.12948162, -1.44339033, 0.14366645],\n",
  637. " [ 0.12767746, -0.04534549, 0.1536468 , 0.7333602 , 0.96510913],\n",
  638. " [ 0.30848743, -2.31710677, 0.37803085, -0.52433003, 1.39883453],\n",
  639. " [-0.52307504, 0.40612781, 0.48341866, -1.96277249, 1.1671546 ]])"
  640. ]
  641. },
  642. "execution_count": 23,
  643. "metadata": {},
  644. "output_type": "execute_result"
  645. }
  646. ],
  647. "source": [
  648. "# 标准正态分布随机数\n",
  649. "random.randn(5,5)"
  650. ]
  651. },
  652. {
  653. "cell_type": "markdown",
  654. "metadata": {},
  655. "source": [
  656. "#### diag"
  657. ]
  658. },
  659. {
  660. "cell_type": "code",
  661. "execution_count": 24,
  662. "metadata": {},
  663. "outputs": [
  664. {
  665. "data": {
  666. "text/plain": [
  667. "array([[1, 0, 0],\n",
  668. " [0, 2, 0],\n",
  669. " [0, 0, 3]])"
  670. ]
  671. },
  672. "execution_count": 24,
  673. "metadata": {},
  674. "output_type": "execute_result"
  675. }
  676. ],
  677. "source": [
  678. "# 一个对角矩阵\n",
  679. "np.diag([1,2,3])"
  680. ]
  681. },
  682. {
  683. "cell_type": "code",
  684. "execution_count": 25,
  685. "metadata": {},
  686. "outputs": [
  687. {
  688. "data": {
  689. "text/plain": [
  690. "array([[0, 1, 0, 0],\n",
  691. " [0, 0, 2, 0],\n",
  692. " [0, 0, 0, 3],\n",
  693. " [0, 0, 0, 0]])"
  694. ]
  695. },
  696. "execution_count": 25,
  697. "metadata": {},
  698. "output_type": "execute_result"
  699. }
  700. ],
  701. "source": [
  702. "# 从主对角线偏移的对角线\n",
  703. "diag([1,2,3], k=1) "
  704. ]
  705. },
  706. {
  707. "cell_type": "markdown",
  708. "metadata": {},
  709. "source": [
  710. "#### zeros and ones"
  711. ]
  712. },
  713. {
  714. "cell_type": "code",
  715. "execution_count": 26,
  716. "metadata": {},
  717. "outputs": [
  718. {
  719. "data": {
  720. "text/plain": [
  721. "array([[0., 0., 0.],\n",
  722. " [0., 0., 0.],\n",
  723. " [0., 0., 0.]])"
  724. ]
  725. },
  726. "execution_count": 26,
  727. "metadata": {},
  728. "output_type": "execute_result"
  729. }
  730. ],
  731. "source": [
  732. "np.zeros((3,3))"
  733. ]
  734. },
  735. {
  736. "cell_type": "code",
  737. "execution_count": 27,
  738. "metadata": {},
  739. "outputs": [
  740. {
  741. "data": {
  742. "text/plain": [
  743. "array([[1., 1., 1.],\n",
  744. " [1., 1., 1.],\n",
  745. " [1., 1., 1.]])"
  746. ]
  747. },
  748. "execution_count": 27,
  749. "metadata": {},
  750. "output_type": "execute_result"
  751. }
  752. ],
  753. "source": [
  754. "np.ones((3,3))"
  755. ]
  756. },
  757. {
  758. "cell_type": "markdown",
  759. "metadata": {},
  760. "source": [
  761. "## 文件 I/O"
  762. ]
  763. },
  764. {
  765. "cell_type": "markdown",
  766. "metadata": {},
  767. "source": [
  768. "### 逗号分隔值 (CSV)"
  769. ]
  770. },
  771. {
  772. "cell_type": "markdown",
  773. "metadata": {},
  774. "source": [
  775. "对于数据文件来说一种非常常见的文件格式是逗号分割值(CSV),或者有关的格式例如TSV(制表符分隔的值)。为了从这些文件中读取数据到Numpy数组中,我们可以用`numpy.genfromtxt`函数。例如:"
  776. ]
  777. },
  778. {
  779. "cell_type": "code",
  780. "execution_count": 28,
  781. "metadata": {},
  782. "outputs": [
  783. {
  784. "name": "stdout",
  785. "output_type": "stream",
  786. "text": [
  787. "1800 1 1 -6.1 -6.1 -6.1 1\r\n",
  788. "1800 1 2 -15.4 -15.4 -15.4 1\r\n",
  789. "1800 1 3 -15.0 -15.0 -15.0 1\r\n",
  790. "1800 1 4 -19.3 -19.3 -19.3 1\r\n",
  791. "1800 1 5 -16.8 -16.8 -16.8 1\r\n",
  792. "1800 1 6 -11.4 -11.4 -11.4 1\r\n",
  793. "1800 1 7 -7.6 -7.6 -7.6 1\r\n",
  794. "1800 1 8 -7.1 -7.1 -7.1 1\r\n",
  795. "1800 1 9 -10.1 -10.1 -10.1 1\r\n",
  796. "1800 1 10 -9.5 -9.5 -9.5 1\r\n"
  797. ]
  798. }
  799. ],
  800. "source": [
  801. "!head stockholm_td_adj.dat"
  802. ]
  803. },
  804. {
  805. "cell_type": "code",
  806. "execution_count": 29,
  807. "metadata": {},
  808. "outputs": [],
  809. "source": [
  810. "import numpy as np\n",
  811. "data = np.genfromtxt('stockholm_td_adj.dat')"
  812. ]
  813. },
  814. {
  815. "cell_type": "code",
  816. "execution_count": 30,
  817. "metadata": {},
  818. "outputs": [
  819. {
  820. "data": {
  821. "text/plain": [
  822. "(77431, 7)"
  823. ]
  824. },
  825. "execution_count": 30,
  826. "metadata": {},
  827. "output_type": "execute_result"
  828. }
  829. ],
  830. "source": [
  831. "data.shape"
  832. ]
  833. },
  834. {
  835. "cell_type": "code",
  836. "execution_count": 31,
  837. "metadata": {},
  838. "outputs": [
  839. {
  840. "data": {
  841. "image/png": "\n",
  842. "text/plain": [
  843. "<Figure size 1008x288 with 1 Axes>"
  844. ]
  845. },
  846. "metadata": {
  847. "needs_background": "light"
  848. },
  849. "output_type": "display_data"
  850. }
  851. ],
  852. "source": [
  853. "%matplotlib inline\n",
  854. "import matplotlib.pyplot as plt\n",
  855. "\n",
  856. "fig, ax = plt.subplots(figsize=(14,4))\n",
  857. "ax.plot(data[:,0]+data[:,1]/12.0+data[:,2]/365, data[:,5])\n",
  858. "ax.axis('tight')\n",
  859. "ax.set_title('tempeatures in Stockholm')\n",
  860. "ax.set_xlabel('year')\n",
  861. "ax.set_ylabel('temperature (C)');"
  862. ]
  863. },
  864. {
  865. "cell_type": "markdown",
  866. "metadata": {},
  867. "source": [
  868. "使用`numpy.savetxt`我们可以将一个Numpy数组以CSV格式存入:"
  869. ]
  870. },
  871. {
  872. "cell_type": "code",
  873. "execution_count": 32,
  874. "metadata": {},
  875. "outputs": [
  876. {
  877. "data": {
  878. "text/plain": [
  879. "array([[0.73171836, 0.46544202, 0.72372739],\n",
  880. " [0.32390603, 0.09679475, 0.95467059],\n",
  881. " [0.36051701, 0.78361037, 0.00716923]])"
  882. ]
  883. },
  884. "execution_count": 32,
  885. "metadata": {},
  886. "output_type": "execute_result"
  887. }
  888. ],
  889. "source": [
  890. "M = np.random.rand(3,3)\n",
  891. "\n",
  892. "M"
  893. ]
  894. },
  895. {
  896. "cell_type": "code",
  897. "execution_count": 33,
  898. "metadata": {},
  899. "outputs": [],
  900. "source": [
  901. "np.savetxt(\"random-matrix.csv\", M)"
  902. ]
  903. },
  904. {
  905. "cell_type": "code",
  906. "execution_count": 34,
  907. "metadata": {},
  908. "outputs": [
  909. {
  910. "name": "stdout",
  911. "output_type": "stream",
  912. "text": [
  913. "7.317183558113176112e-01 4.654420244898096470e-01 7.237273924754552556e-01\r\n",
  914. "3.239060308567449642e-01 9.679474636543183852e-02 9.546705930168928322e-01\r\n",
  915. "3.605170063363589694e-01 7.836103655978251536e-01 7.169228636445423852e-03\r\n"
  916. ]
  917. }
  918. ],
  919. "source": [
  920. "!cat random-matrix.csv"
  921. ]
  922. },
  923. {
  924. "cell_type": "code",
  925. "execution_count": 35,
  926. "metadata": {},
  927. "outputs": [
  928. {
  929. "name": "stdout",
  930. "output_type": "stream",
  931. "text": [
  932. "0.73172 0.46544 0.72373\r\n",
  933. "0.32391 0.09679 0.95467\r\n",
  934. "0.36052 0.78361 0.00717\r\n"
  935. ]
  936. }
  937. ],
  938. "source": [
  939. "np.savetxt(\"random-matrix.csv\", M, fmt='%.5f') # fmt 确定格式\n",
  940. "\n",
  941. "!cat random-matrix.csv"
  942. ]
  943. },
  944. {
  945. "cell_type": "markdown",
  946. "metadata": {},
  947. "source": [
  948. "### Numpy 的本地文件格式"
  949. ]
  950. },
  951. {
  952. "cell_type": "markdown",
  953. "metadata": {},
  954. "source": [
  955. "当存储和读取numpy数组时非常有用。利用函数`numpy.save`和`numpy.load`:"
  956. ]
  957. },
  958. {
  959. "cell_type": "code",
  960. "execution_count": 36,
  961. "metadata": {},
  962. "outputs": [
  963. {
  964. "name": "stdout",
  965. "output_type": "stream",
  966. "text": [
  967. "random-matrix.npy: data\r\n"
  968. ]
  969. }
  970. ],
  971. "source": [
  972. "np.save(\"random-matrix.npy\", M)\n",
  973. "\n",
  974. "!file random-matrix.npy"
  975. ]
  976. },
  977. {
  978. "cell_type": "code",
  979. "execution_count": 37,
  980. "metadata": {},
  981. "outputs": [
  982. {
  983. "data": {
  984. "text/plain": [
  985. "array([[0.73171836, 0.46544202, 0.72372739],\n",
  986. " [0.32390603, 0.09679475, 0.95467059],\n",
  987. " [0.36051701, 0.78361037, 0.00716923]])"
  988. ]
  989. },
  990. "execution_count": 37,
  991. "metadata": {},
  992. "output_type": "execute_result"
  993. }
  994. ],
  995. "source": [
  996. "np.load(\"random-matrix.npy\")"
  997. ]
  998. },
  999. {
  1000. "cell_type": "markdown",
  1001. "metadata": {},
  1002. "source": [
  1003. "## 更多Numpy数组的性质"
  1004. ]
  1005. },
  1006. {
  1007. "cell_type": "code",
  1008. "execution_count": 38,
  1009. "metadata": {},
  1010. "outputs": [
  1011. {
  1012. "name": "stdout",
  1013. "output_type": "stream",
  1014. "text": [
  1015. "float64\n",
  1016. "8\n"
  1017. ]
  1018. }
  1019. ],
  1020. "source": [
  1021. "print(M.dtype)\n",
  1022. "print(M.itemsize) # 每个元素的字节数\n"
  1023. ]
  1024. },
  1025. {
  1026. "cell_type": "code",
  1027. "execution_count": 39,
  1028. "metadata": {},
  1029. "outputs": [
  1030. {
  1031. "data": {
  1032. "text/plain": [
  1033. "72"
  1034. ]
  1035. },
  1036. "execution_count": 39,
  1037. "metadata": {},
  1038. "output_type": "execute_result"
  1039. }
  1040. ],
  1041. "source": [
  1042. "M.nbytes # 字节数"
  1043. ]
  1044. },
  1045. {
  1046. "cell_type": "code",
  1047. "execution_count": 40,
  1048. "metadata": {},
  1049. "outputs": [
  1050. {
  1051. "data": {
  1052. "text/plain": [
  1053. "2"
  1054. ]
  1055. },
  1056. "execution_count": 40,
  1057. "metadata": {},
  1058. "output_type": "execute_result"
  1059. }
  1060. ],
  1061. "source": [
  1062. "M.ndim # 维度"
  1063. ]
  1064. },
  1065. {
  1066. "cell_type": "markdown",
  1067. "metadata": {},
  1068. "source": [
  1069. "## 操作数组"
  1070. ]
  1071. },
  1072. {
  1073. "cell_type": "markdown",
  1074. "metadata": {},
  1075. "source": [
  1076. "### 索引"
  1077. ]
  1078. },
  1079. {
  1080. "cell_type": "markdown",
  1081. "metadata": {},
  1082. "source": [
  1083. "我们可以用方括号和下标索引元素:"
  1084. ]
  1085. },
  1086. {
  1087. "cell_type": "code",
  1088. "execution_count": 41,
  1089. "metadata": {},
  1090. "outputs": [
  1091. {
  1092. "data": {
  1093. "text/plain": [
  1094. "1"
  1095. ]
  1096. },
  1097. "execution_count": 41,
  1098. "metadata": {},
  1099. "output_type": "execute_result"
  1100. }
  1101. ],
  1102. "source": [
  1103. "v = np.array([1, 2, 3, 4, 5])\n",
  1104. "# v 是一个向量,仅仅只有一维,取一个索引\n",
  1105. "v[0]"
  1106. ]
  1107. },
  1108. {
  1109. "cell_type": "code",
  1110. "execution_count": 42,
  1111. "metadata": {},
  1112. "outputs": [
  1113. {
  1114. "name": "stdout",
  1115. "output_type": "stream",
  1116. "text": [
  1117. "0.09679474636543184\n",
  1118. "0.09679474636543184\n",
  1119. "[0.32390603 0.09679475 0.95467059]\n"
  1120. ]
  1121. }
  1122. ],
  1123. "source": [
  1124. "\n",
  1125. "# M 是一个矩阵或者是一个二维的数组,取两个索引 \n",
  1126. "print(M[1,1])\n",
  1127. "print(M[1][1])\n",
  1128. "print(M[1])"
  1129. ]
  1130. },
  1131. {
  1132. "cell_type": "markdown",
  1133. "metadata": {},
  1134. "source": [
  1135. "如果我们省略了一个多维数组的索引,它将会返回整行(或者,总的来说,一个 N-1 维的数组)"
  1136. ]
  1137. },
  1138. {
  1139. "cell_type": "code",
  1140. "execution_count": 43,
  1141. "metadata": {},
  1142. "outputs": [
  1143. {
  1144. "data": {
  1145. "text/plain": [
  1146. "array([[0.73171836, 0.46544202, 0.72372739],\n",
  1147. " [0.32390603, 0.09679475, 0.95467059],\n",
  1148. " [0.36051701, 0.78361037, 0.00716923]])"
  1149. ]
  1150. },
  1151. "execution_count": 43,
  1152. "metadata": {},
  1153. "output_type": "execute_result"
  1154. }
  1155. ],
  1156. "source": [
  1157. "M"
  1158. ]
  1159. },
  1160. {
  1161. "cell_type": "code",
  1162. "execution_count": 44,
  1163. "metadata": {},
  1164. "outputs": [
  1165. {
  1166. "data": {
  1167. "text/plain": [
  1168. "array([0.32390603, 0.09679475, 0.95467059])"
  1169. ]
  1170. },
  1171. "execution_count": 44,
  1172. "metadata": {},
  1173. "output_type": "execute_result"
  1174. }
  1175. ],
  1176. "source": [
  1177. "M[1]"
  1178. ]
  1179. },
  1180. {
  1181. "cell_type": "markdown",
  1182. "metadata": {},
  1183. "source": [
  1184. "相同的事情可以利用`:`而不是索引来实现:"
  1185. ]
  1186. },
  1187. {
  1188. "cell_type": "code",
  1189. "execution_count": 45,
  1190. "metadata": {},
  1191. "outputs": [
  1192. {
  1193. "data": {
  1194. "text/plain": [
  1195. "array([0.32390603, 0.09679475, 0.95467059])"
  1196. ]
  1197. },
  1198. "execution_count": 45,
  1199. "metadata": {},
  1200. "output_type": "execute_result"
  1201. }
  1202. ],
  1203. "source": [
  1204. "M[1,:] # 行 1"
  1205. ]
  1206. },
  1207. {
  1208. "cell_type": "code",
  1209. "execution_count": 46,
  1210. "metadata": {},
  1211. "outputs": [
  1212. {
  1213. "data": {
  1214. "text/plain": [
  1215. "array([0.46544202, 0.09679475, 0.78361037])"
  1216. ]
  1217. },
  1218. "execution_count": 46,
  1219. "metadata": {},
  1220. "output_type": "execute_result"
  1221. }
  1222. ],
  1223. "source": [
  1224. "M[:,1] # 列 1"
  1225. ]
  1226. },
  1227. {
  1228. "cell_type": "markdown",
  1229. "metadata": {},
  1230. "source": [
  1231. "我们可以用索引赋新的值给数组中的元素:"
  1232. ]
  1233. },
  1234. {
  1235. "cell_type": "code",
  1236. "execution_count": 47,
  1237. "metadata": {},
  1238. "outputs": [],
  1239. "source": [
  1240. "M[0,0] = 1"
  1241. ]
  1242. },
  1243. {
  1244. "cell_type": "code",
  1245. "execution_count": 48,
  1246. "metadata": {},
  1247. "outputs": [
  1248. {
  1249. "data": {
  1250. "text/plain": [
  1251. "array([[1. , 0.46544202, 0.72372739],\n",
  1252. " [0.32390603, 0.09679475, 0.95467059],\n",
  1253. " [0.36051701, 0.78361037, 0.00716923]])"
  1254. ]
  1255. },
  1256. "execution_count": 48,
  1257. "metadata": {},
  1258. "output_type": "execute_result"
  1259. }
  1260. ],
  1261. "source": [
  1262. "M"
  1263. ]
  1264. },
  1265. {
  1266. "cell_type": "code",
  1267. "execution_count": 49,
  1268. "metadata": {},
  1269. "outputs": [],
  1270. "source": [
  1271. "# 对行和列也同样有用\n",
  1272. "M[1,:] = 0\n",
  1273. "M[:,2] = -1"
  1274. ]
  1275. },
  1276. {
  1277. "cell_type": "code",
  1278. "execution_count": 50,
  1279. "metadata": {},
  1280. "outputs": [
  1281. {
  1282. "data": {
  1283. "text/plain": [
  1284. "array([[ 1. , 0.46544202, -1. ],\n",
  1285. " [ 0. , 0. , -1. ],\n",
  1286. " [ 0.36051701, 0.78361037, -1. ]])"
  1287. ]
  1288. },
  1289. "execution_count": 50,
  1290. "metadata": {},
  1291. "output_type": "execute_result"
  1292. }
  1293. ],
  1294. "source": [
  1295. "M"
  1296. ]
  1297. },
  1298. {
  1299. "cell_type": "markdown",
  1300. "metadata": {},
  1301. "source": [
  1302. "### 切片索引"
  1303. ]
  1304. },
  1305. {
  1306. "cell_type": "markdown",
  1307. "metadata": {},
  1308. "source": [
  1309. "切片索引是语法`M[lower:upper:step]`的技术名称,用于提取数组的一部分:"
  1310. ]
  1311. },
  1312. {
  1313. "cell_type": "code",
  1314. "execution_count": 51,
  1315. "metadata": {},
  1316. "outputs": [
  1317. {
  1318. "data": {
  1319. "text/plain": [
  1320. "array([1, 2, 3, 4, 5])"
  1321. ]
  1322. },
  1323. "execution_count": 51,
  1324. "metadata": {},
  1325. "output_type": "execute_result"
  1326. }
  1327. ],
  1328. "source": [
  1329. "A = np.array([1,2,3,4,5])\n",
  1330. "A"
  1331. ]
  1332. },
  1333. {
  1334. "cell_type": "code",
  1335. "execution_count": 52,
  1336. "metadata": {},
  1337. "outputs": [
  1338. {
  1339. "data": {
  1340. "text/plain": [
  1341. "array([2, 3])"
  1342. ]
  1343. },
  1344. "execution_count": 52,
  1345. "metadata": {},
  1346. "output_type": "execute_result"
  1347. }
  1348. ],
  1349. "source": [
  1350. "A[1:3]"
  1351. ]
  1352. },
  1353. {
  1354. "cell_type": "markdown",
  1355. "metadata": {},
  1356. "source": [
  1357. "切片索引是*可变的*: 如果它们被分配了一个新值,那么从其中提取切片的原始数组将被修改:\n"
  1358. ]
  1359. },
  1360. {
  1361. "cell_type": "code",
  1362. "execution_count": 53,
  1363. "metadata": {},
  1364. "outputs": [
  1365. {
  1366. "data": {
  1367. "text/plain": [
  1368. "array([ 1, -2, -3, 4, 5])"
  1369. ]
  1370. },
  1371. "execution_count": 53,
  1372. "metadata": {},
  1373. "output_type": "execute_result"
  1374. }
  1375. ],
  1376. "source": [
  1377. "A[1:3] = [-2,-3] # auto convert type\n",
  1378. "A[1:3] = np.array([-2, -3]) \n",
  1379. "\n",
  1380. "A"
  1381. ]
  1382. },
  1383. {
  1384. "cell_type": "markdown",
  1385. "metadata": {},
  1386. "source": [
  1387. "我们可以省略`M[lower:upper:step]`中任意的三个值\n",
  1388. "We can omit any of the three parameters in `M[lower:upper:step]`:"
  1389. ]
  1390. },
  1391. {
  1392. "cell_type": "code",
  1393. "execution_count": 54,
  1394. "metadata": {},
  1395. "outputs": [
  1396. {
  1397. "data": {
  1398. "text/plain": [
  1399. "array([ 1, -2, -3, 4, 5])"
  1400. ]
  1401. },
  1402. "execution_count": 54,
  1403. "metadata": {},
  1404. "output_type": "execute_result"
  1405. }
  1406. ],
  1407. "source": [
  1408. "A[::] # lower, upper, step 都取默认值"
  1409. ]
  1410. },
  1411. {
  1412. "cell_type": "code",
  1413. "execution_count": 55,
  1414. "metadata": {},
  1415. "outputs": [
  1416. {
  1417. "data": {
  1418. "text/plain": [
  1419. "array([ 1, -2, -3, 4, 5])"
  1420. ]
  1421. },
  1422. "execution_count": 55,
  1423. "metadata": {},
  1424. "output_type": "execute_result"
  1425. }
  1426. ],
  1427. "source": [
  1428. "A[:]"
  1429. ]
  1430. },
  1431. {
  1432. "cell_type": "code",
  1433. "execution_count": 56,
  1434. "metadata": {},
  1435. "outputs": [
  1436. {
  1437. "data": {
  1438. "text/plain": [
  1439. "array([ 1, -3, 5])"
  1440. ]
  1441. },
  1442. "execution_count": 56,
  1443. "metadata": {},
  1444. "output_type": "execute_result"
  1445. }
  1446. ],
  1447. "source": [
  1448. "A[::2] # step is 2, lower and upper 代表数组的开始和结束"
  1449. ]
  1450. },
  1451. {
  1452. "cell_type": "code",
  1453. "execution_count": 57,
  1454. "metadata": {},
  1455. "outputs": [
  1456. {
  1457. "data": {
  1458. "text/plain": [
  1459. "array([ 1, -2, -3])"
  1460. ]
  1461. },
  1462. "execution_count": 57,
  1463. "metadata": {},
  1464. "output_type": "execute_result"
  1465. }
  1466. ],
  1467. "source": [
  1468. "A[:3] # 前3个元素"
  1469. ]
  1470. },
  1471. {
  1472. "cell_type": "code",
  1473. "execution_count": 58,
  1474. "metadata": {},
  1475. "outputs": [
  1476. {
  1477. "data": {
  1478. "text/plain": [
  1479. "array([4, 5])"
  1480. ]
  1481. },
  1482. "execution_count": 58,
  1483. "metadata": {},
  1484. "output_type": "execute_result"
  1485. }
  1486. ],
  1487. "source": [
  1488. "A[3:] # 从索引3开始的元素"
  1489. ]
  1490. },
  1491. {
  1492. "cell_type": "markdown",
  1493. "metadata": {},
  1494. "source": [
  1495. "负索引计数从数组的结束(正索引从开始):"
  1496. ]
  1497. },
  1498. {
  1499. "cell_type": "code",
  1500. "execution_count": 59,
  1501. "metadata": {},
  1502. "outputs": [],
  1503. "source": [
  1504. "A = np.array([1,2,3,4,5])"
  1505. ]
  1506. },
  1507. {
  1508. "cell_type": "code",
  1509. "execution_count": 60,
  1510. "metadata": {},
  1511. "outputs": [
  1512. {
  1513. "data": {
  1514. "text/plain": [
  1515. "5"
  1516. ]
  1517. },
  1518. "execution_count": 60,
  1519. "metadata": {},
  1520. "output_type": "execute_result"
  1521. }
  1522. ],
  1523. "source": [
  1524. "A[-1] # 数组中最后一个元素"
  1525. ]
  1526. },
  1527. {
  1528. "cell_type": "code",
  1529. "execution_count": 61,
  1530. "metadata": {},
  1531. "outputs": [
  1532. {
  1533. "data": {
  1534. "text/plain": [
  1535. "array([3, 4, 5])"
  1536. ]
  1537. },
  1538. "execution_count": 61,
  1539. "metadata": {},
  1540. "output_type": "execute_result"
  1541. }
  1542. ],
  1543. "source": [
  1544. "A[-3:] # 最后三个元素"
  1545. ]
  1546. },
  1547. {
  1548. "cell_type": "markdown",
  1549. "metadata": {},
  1550. "source": [
  1551. "索引切片的工作方式与多维数组完全相同:"
  1552. ]
  1553. },
  1554. {
  1555. "cell_type": "code",
  1556. "execution_count": 62,
  1557. "metadata": {},
  1558. "outputs": [
  1559. {
  1560. "data": {
  1561. "text/plain": [
  1562. "array([[ 0, 1, 2, 3, 4],\n",
  1563. " [10, 11, 12, 13, 14],\n",
  1564. " [20, 21, 22, 23, 24],\n",
  1565. " [30, 31, 32, 33, 34],\n",
  1566. " [40, 41, 42, 43, 44]])"
  1567. ]
  1568. },
  1569. "execution_count": 62,
  1570. "metadata": {},
  1571. "output_type": "execute_result"
  1572. }
  1573. ],
  1574. "source": [
  1575. "A = np.array([[n+m*10 for n in range(5)] for m in range(5)])\n",
  1576. "\n",
  1577. "A"
  1578. ]
  1579. },
  1580. {
  1581. "cell_type": "code",
  1582. "execution_count": 63,
  1583. "metadata": {},
  1584. "outputs": [
  1585. {
  1586. "data": {
  1587. "text/plain": [
  1588. "array([[11, 12, 13],\n",
  1589. " [21, 22, 23],\n",
  1590. " [31, 32, 33]])"
  1591. ]
  1592. },
  1593. "execution_count": 63,
  1594. "metadata": {},
  1595. "output_type": "execute_result"
  1596. }
  1597. ],
  1598. "source": [
  1599. "# 原始数组中的一个块\n",
  1600. "A[1:4, 1:4]"
  1601. ]
  1602. },
  1603. {
  1604. "cell_type": "code",
  1605. "execution_count": 64,
  1606. "metadata": {},
  1607. "outputs": [
  1608. {
  1609. "data": {
  1610. "text/plain": [
  1611. "array([[ 0, 2, 4],\n",
  1612. " [20, 22, 24],\n",
  1613. " [40, 42, 44]])"
  1614. ]
  1615. },
  1616. "execution_count": 64,
  1617. "metadata": {},
  1618. "output_type": "execute_result"
  1619. }
  1620. ],
  1621. "source": [
  1622. "# 步长\n",
  1623. "A[::2, ::2]"
  1624. ]
  1625. },
  1626. {
  1627. "cell_type": "markdown",
  1628. "metadata": {},
  1629. "source": [
  1630. "### 花式索引"
  1631. ]
  1632. },
  1633. {
  1634. "cell_type": "markdown",
  1635. "metadata": {},
  1636. "source": [
  1637. "Fancy索引是一个名称时,一个数组或列表被使用在一个索引:"
  1638. ]
  1639. },
  1640. {
  1641. "cell_type": "code",
  1642. "execution_count": 65,
  1643. "metadata": {},
  1644. "outputs": [
  1645. {
  1646. "name": "stdout",
  1647. "output_type": "stream",
  1648. "text": [
  1649. "[[10 11 12 13 14]\n",
  1650. " [20 21 22 23 24]\n",
  1651. " [30 31 32 33 34]]\n",
  1652. "[[ 0 1 2 3 4]\n",
  1653. " [10 11 12 13 14]\n",
  1654. " [20 21 22 23 24]\n",
  1655. " [30 31 32 33 34]\n",
  1656. " [40 41 42 43 44]]\n"
  1657. ]
  1658. }
  1659. ],
  1660. "source": [
  1661. "row_indices = [1, 2, 3]\n",
  1662. "print(A[row_indices])\n",
  1663. "print(A)"
  1664. ]
  1665. },
  1666. {
  1667. "cell_type": "code",
  1668. "execution_count": 66,
  1669. "metadata": {},
  1670. "outputs": [
  1671. {
  1672. "data": {
  1673. "text/plain": [
  1674. "array([11, 22, 34])"
  1675. ]
  1676. },
  1677. "execution_count": 66,
  1678. "metadata": {},
  1679. "output_type": "execute_result"
  1680. }
  1681. ],
  1682. "source": [
  1683. "col_indices = [1, 2, -1] # 索引-1 代表最后一个元素\n",
  1684. "A[row_indices, col_indices]"
  1685. ]
  1686. },
  1687. {
  1688. "cell_type": "markdown",
  1689. "metadata": {},
  1690. "source": [
  1691. "我们也可以使用索引掩码:如果索引掩码是一个数据类型`bool`的Numpy数组,那么一个元素被选择(True)或不(False)取决于索引掩码在每个元素位置的值:"
  1692. ]
  1693. },
  1694. {
  1695. "cell_type": "code",
  1696. "execution_count": 67,
  1697. "metadata": {},
  1698. "outputs": [
  1699. {
  1700. "data": {
  1701. "text/plain": [
  1702. "array([0, 1, 2, 3, 4])"
  1703. ]
  1704. },
  1705. "execution_count": 67,
  1706. "metadata": {},
  1707. "output_type": "execute_result"
  1708. }
  1709. ],
  1710. "source": [
  1711. "B = array([n for n in range(5)])\n",
  1712. "B"
  1713. ]
  1714. },
  1715. {
  1716. "cell_type": "code",
  1717. "execution_count": 68,
  1718. "metadata": {},
  1719. "outputs": [
  1720. {
  1721. "data": {
  1722. "text/plain": [
  1723. "array([0, 2])"
  1724. ]
  1725. },
  1726. "execution_count": 68,
  1727. "metadata": {},
  1728. "output_type": "execute_result"
  1729. }
  1730. ],
  1731. "source": [
  1732. "row_mask = array([True, False, True, False, False])\n",
  1733. "B[row_mask]"
  1734. ]
  1735. },
  1736. {
  1737. "cell_type": "code",
  1738. "execution_count": 69,
  1739. "metadata": {},
  1740. "outputs": [
  1741. {
  1742. "data": {
  1743. "text/plain": [
  1744. "array([0, 2])"
  1745. ]
  1746. },
  1747. "execution_count": 69,
  1748. "metadata": {},
  1749. "output_type": "execute_result"
  1750. }
  1751. ],
  1752. "source": [
  1753. "# 相同的事情\n",
  1754. "row_mask = array([1,0,1,0,0], dtype=bool)\n",
  1755. "B[row_mask]"
  1756. ]
  1757. },
  1758. {
  1759. "cell_type": "markdown",
  1760. "metadata": {},
  1761. "source": [
  1762. "这个特性对于有条件地从数组中选择元素非常有用,例如使用比较运算符:"
  1763. ]
  1764. },
  1765. {
  1766. "cell_type": "code",
  1767. "execution_count": 70,
  1768. "metadata": {},
  1769. "outputs": [
  1770. {
  1771. "data": {
  1772. "text/plain": [
  1773. "array([0. , 0.5, 1. , 1.5, 2. , 2.5, 3. , 3.5, 4. , 4.5, 5. , 5.5, 6. ,\n",
  1774. " 6.5, 7. , 7.5, 8. , 8.5, 9. , 9.5])"
  1775. ]
  1776. },
  1777. "execution_count": 70,
  1778. "metadata": {},
  1779. "output_type": "execute_result"
  1780. }
  1781. ],
  1782. "source": [
  1783. "x = np.arange(0, 10, 0.5)\n",
  1784. "x"
  1785. ]
  1786. },
  1787. {
  1788. "cell_type": "code",
  1789. "execution_count": 71,
  1790. "metadata": {},
  1791. "outputs": [
  1792. {
  1793. "data": {
  1794. "text/plain": [
  1795. "array([False, False, False, False, False, False, False, False, False,\n",
  1796. " False, False, True, True, True, True, False, False, False,\n",
  1797. " False, False])"
  1798. ]
  1799. },
  1800. "execution_count": 71,
  1801. "metadata": {},
  1802. "output_type": "execute_result"
  1803. }
  1804. ],
  1805. "source": [
  1806. "mask = (5 < x) * (x < 7.5)\n",
  1807. "\n",
  1808. "mask"
  1809. ]
  1810. },
  1811. {
  1812. "cell_type": "code",
  1813. "execution_count": 72,
  1814. "metadata": {},
  1815. "outputs": [
  1816. {
  1817. "data": {
  1818. "text/plain": [
  1819. "array([5.5, 6. , 6.5, 7. ])"
  1820. ]
  1821. },
  1822. "execution_count": 72,
  1823. "metadata": {},
  1824. "output_type": "execute_result"
  1825. }
  1826. ],
  1827. "source": [
  1828. "x[mask]"
  1829. ]
  1830. },
  1831. {
  1832. "cell_type": "code",
  1833. "execution_count": 73,
  1834. "metadata": {},
  1835. "outputs": [
  1836. {
  1837. "data": {
  1838. "text/plain": [
  1839. "array([3.5, 4. , 4.5, 5. , 5.5])"
  1840. ]
  1841. },
  1842. "execution_count": 73,
  1843. "metadata": {},
  1844. "output_type": "execute_result"
  1845. }
  1846. ],
  1847. "source": [
  1848. "x[(3<x) * (x<6)]"
  1849. ]
  1850. },
  1851. {
  1852. "cell_type": "markdown",
  1853. "metadata": {},
  1854. "source": [
  1855. "## 用于从数组中提取数据和创建数组的函数"
  1856. ]
  1857. },
  1858. {
  1859. "cell_type": "markdown",
  1860. "metadata": {},
  1861. "source": [
  1862. "### where"
  1863. ]
  1864. },
  1865. {
  1866. "cell_type": "markdown",
  1867. "metadata": {},
  1868. "source": [
  1869. "索引掩码可以使用`where`函数转换为位置索引"
  1870. ]
  1871. },
  1872. {
  1873. "cell_type": "code",
  1874. "execution_count": 76,
  1875. "metadata": {},
  1876. "outputs": [
  1877. {
  1878. "data": {
  1879. "text/plain": [
  1880. "(array([11, 12, 13, 14]),)"
  1881. ]
  1882. },
  1883. "execution_count": 76,
  1884. "metadata": {},
  1885. "output_type": "execute_result"
  1886. }
  1887. ],
  1888. "source": [
  1889. "indices = np.where(mask)\n",
  1890. "\n",
  1891. "indices"
  1892. ]
  1893. },
  1894. {
  1895. "cell_type": "code",
  1896. "execution_count": 75,
  1897. "metadata": {},
  1898. "outputs": [
  1899. {
  1900. "data": {
  1901. "text/plain": [
  1902. "array([5.5, 6. , 6.5, 7. ])"
  1903. ]
  1904. },
  1905. "execution_count": 75,
  1906. "metadata": {},
  1907. "output_type": "execute_result"
  1908. }
  1909. ],
  1910. "source": [
  1911. "x[indices] # 这个索引等同于花式索引x[mask]"
  1912. ]
  1913. },
  1914. {
  1915. "cell_type": "markdown",
  1916. "metadata": {},
  1917. "source": [
  1918. "### diag"
  1919. ]
  1920. },
  1921. {
  1922. "cell_type": "markdown",
  1923. "metadata": {},
  1924. "source": [
  1925. "使用diag函数,我们还可以提取一个数组的对角线和亚对角线:"
  1926. ]
  1927. },
  1928. {
  1929. "cell_type": "code",
  1930. "execution_count": 74,
  1931. "metadata": {},
  1932. "outputs": [
  1933. {
  1934. "data": {
  1935. "text/plain": [
  1936. "array([ 0, 11, 22, 33, 44])"
  1937. ]
  1938. },
  1939. "execution_count": 74,
  1940. "metadata": {},
  1941. "output_type": "execute_result"
  1942. }
  1943. ],
  1944. "source": [
  1945. "diag(A)"
  1946. ]
  1947. },
  1948. {
  1949. "cell_type": "code",
  1950. "execution_count": 75,
  1951. "metadata": {},
  1952. "outputs": [
  1953. {
  1954. "data": {
  1955. "text/plain": [
  1956. "array([10, 21, 32, 43])"
  1957. ]
  1958. },
  1959. "execution_count": 75,
  1960. "metadata": {},
  1961. "output_type": "execute_result"
  1962. }
  1963. ],
  1964. "source": [
  1965. "diag(A, -1)"
  1966. ]
  1967. },
  1968. {
  1969. "cell_type": "markdown",
  1970. "metadata": {},
  1971. "source": [
  1972. "### take"
  1973. ]
  1974. },
  1975. {
  1976. "cell_type": "markdown",
  1977. "metadata": {},
  1978. "source": [
  1979. "`take` 函数和上面描述的花式索引类似"
  1980. ]
  1981. },
  1982. {
  1983. "cell_type": "code",
  1984. "execution_count": 76,
  1985. "metadata": {},
  1986. "outputs": [
  1987. {
  1988. "data": {
  1989. "text/plain": [
  1990. "array([-3, -2, -1, 0, 1, 2])"
  1991. ]
  1992. },
  1993. "execution_count": 76,
  1994. "metadata": {},
  1995. "output_type": "execute_result"
  1996. }
  1997. ],
  1998. "source": [
  1999. "v2 = arange(-3,3)\n",
  2000. "v2"
  2001. ]
  2002. },
  2003. {
  2004. "cell_type": "code",
  2005. "execution_count": 77,
  2006. "metadata": {},
  2007. "outputs": [
  2008. {
  2009. "data": {
  2010. "text/plain": [
  2011. "array([-2, 0, 2])"
  2012. ]
  2013. },
  2014. "execution_count": 77,
  2015. "metadata": {},
  2016. "output_type": "execute_result"
  2017. }
  2018. ],
  2019. "source": [
  2020. "row_indices = [1, 3, 5]\n",
  2021. "v2[row_indices] # 花式索引"
  2022. ]
  2023. },
  2024. {
  2025. "cell_type": "code",
  2026. "execution_count": 78,
  2027. "metadata": {},
  2028. "outputs": [
  2029. {
  2030. "data": {
  2031. "text/plain": [
  2032. "array([-2, 0, 2])"
  2033. ]
  2034. },
  2035. "execution_count": 78,
  2036. "metadata": {},
  2037. "output_type": "execute_result"
  2038. }
  2039. ],
  2040. "source": [
  2041. "v2.take(row_indices)"
  2042. ]
  2043. },
  2044. {
  2045. "cell_type": "markdown",
  2046. "metadata": {},
  2047. "source": [
  2048. "但是`take`也作用在列表和其他的物体上:"
  2049. ]
  2050. },
  2051. {
  2052. "cell_type": "code",
  2053. "execution_count": 79,
  2054. "metadata": {},
  2055. "outputs": [
  2056. {
  2057. "data": {
  2058. "text/plain": [
  2059. "array([-2, 0, 2])"
  2060. ]
  2061. },
  2062. "execution_count": 79,
  2063. "metadata": {},
  2064. "output_type": "execute_result"
  2065. }
  2066. ],
  2067. "source": [
  2068. "take([-3, -2, -1, 0, 1, 2], row_indices)"
  2069. ]
  2070. },
  2071. {
  2072. "cell_type": "markdown",
  2073. "metadata": {},
  2074. "source": [
  2075. "### choose"
  2076. ]
  2077. },
  2078. {
  2079. "cell_type": "markdown",
  2080. "metadata": {},
  2081. "source": [
  2082. "通过从几个数组中选择元素来构造一个数组:"
  2083. ]
  2084. },
  2085. {
  2086. "cell_type": "code",
  2087. "execution_count": 49,
  2088. "metadata": {},
  2089. "outputs": [
  2090. {
  2091. "data": {
  2092. "text/plain": [
  2093. "array([ 5, -2, 5, -2])"
  2094. ]
  2095. },
  2096. "execution_count": 49,
  2097. "metadata": {},
  2098. "output_type": "execute_result"
  2099. }
  2100. ],
  2101. "source": [
  2102. "which = [1, 0, 1, 0]\n",
  2103. "choices = [[-2,-2,-2,-2], [5,5,5,5]]\n",
  2104. "\n",
  2105. "np.choose(which, choices)"
  2106. ]
  2107. },
  2108. {
  2109. "cell_type": "markdown",
  2110. "metadata": {},
  2111. "source": [
  2112. "## 线性代数"
  2113. ]
  2114. },
  2115. {
  2116. "cell_type": "markdown",
  2117. "metadata": {},
  2118. "source": [
  2119. "向量化代码是使用Python/Numpy编写高效数值计算的关键。这意味着尽可能多的程序应该用矩阵和向量运算来表示,比如矩阵-矩阵乘法。"
  2120. ]
  2121. },
  2122. {
  2123. "cell_type": "markdown",
  2124. "metadata": {},
  2125. "source": [
  2126. "### Scalar-array 操作"
  2127. ]
  2128. },
  2129. {
  2130. "cell_type": "markdown",
  2131. "metadata": {},
  2132. "source": [
  2133. "我们可以使用常用的算术运算符来对标量数组进行乘、加、减和除运算。"
  2134. ]
  2135. },
  2136. {
  2137. "cell_type": "code",
  2138. "execution_count": 93,
  2139. "metadata": {},
  2140. "outputs": [],
  2141. "source": [
  2142. "v1 = np.arange(0, 5)"
  2143. ]
  2144. },
  2145. {
  2146. "cell_type": "code",
  2147. "execution_count": 94,
  2148. "metadata": {},
  2149. "outputs": [
  2150. {
  2151. "data": {
  2152. "text/plain": [
  2153. "array([0, 2, 4, 6, 8])"
  2154. ]
  2155. },
  2156. "execution_count": 94,
  2157. "metadata": {},
  2158. "output_type": "execute_result"
  2159. }
  2160. ],
  2161. "source": [
  2162. "v1 * 2"
  2163. ]
  2164. },
  2165. {
  2166. "cell_type": "code",
  2167. "execution_count": 95,
  2168. "metadata": {},
  2169. "outputs": [
  2170. {
  2171. "data": {
  2172. "text/plain": [
  2173. "array([2, 3, 4, 5, 6])"
  2174. ]
  2175. },
  2176. "execution_count": 95,
  2177. "metadata": {},
  2178. "output_type": "execute_result"
  2179. }
  2180. ],
  2181. "source": [
  2182. "v1 + 2"
  2183. ]
  2184. },
  2185. {
  2186. "cell_type": "code",
  2187. "execution_count": 96,
  2188. "metadata": {},
  2189. "outputs": [
  2190. {
  2191. "data": {
  2192. "text/plain": [
  2193. "(array([[ 0, 2, 4, 6, 8],\n",
  2194. " [20, 22, 24, 26, 28],\n",
  2195. " [40, 42, 44, 46, 48],\n",
  2196. " [60, 62, 64, 66, 68],\n",
  2197. " [80, 82, 84, 86, 88]]), array([[ 2, 3, 4, 5, 6],\n",
  2198. " [12, 13, 14, 15, 16],\n",
  2199. " [22, 23, 24, 25, 26],\n",
  2200. " [32, 33, 34, 35, 36],\n",
  2201. " [42, 43, 44, 45, 46]]))"
  2202. ]
  2203. },
  2204. "execution_count": 96,
  2205. "metadata": {},
  2206. "output_type": "execute_result"
  2207. }
  2208. ],
  2209. "source": [
  2210. "A * 2, A + 2"
  2211. ]
  2212. },
  2213. {
  2214. "cell_type": "markdown",
  2215. "metadata": {},
  2216. "source": [
  2217. "### 数组间的元素操作"
  2218. ]
  2219. },
  2220. {
  2221. "cell_type": "markdown",
  2222. "metadata": {},
  2223. "source": [
  2224. "当我们对数组进行加法、减法、乘法和除法时,默认的行为是**element-wise**操作:"
  2225. ]
  2226. },
  2227. {
  2228. "cell_type": "code",
  2229. "execution_count": 97,
  2230. "metadata": {},
  2231. "outputs": [
  2232. {
  2233. "data": {
  2234. "text/plain": [
  2235. "array([[0.41002411, 0.52156709, 0.77687362],\n",
  2236. " [0.86406459, 0.00587552, 0.04683701]])"
  2237. ]
  2238. },
  2239. "execution_count": 97,
  2240. "metadata": {},
  2241. "output_type": "execute_result"
  2242. }
  2243. ],
  2244. "source": [
  2245. "A = np.random.rand(2, 3)\n",
  2246. "\n",
  2247. "A * A # element-wise 乘法"
  2248. ]
  2249. },
  2250. {
  2251. "cell_type": "code",
  2252. "execution_count": 98,
  2253. "metadata": {},
  2254. "outputs": [
  2255. {
  2256. "data": {
  2257. "text/plain": [
  2258. "array([ 0, 1, 4, 9, 16])"
  2259. ]
  2260. },
  2261. "execution_count": 98,
  2262. "metadata": {},
  2263. "output_type": "execute_result"
  2264. }
  2265. ],
  2266. "source": [
  2267. "v1 * v1"
  2268. ]
  2269. },
  2270. {
  2271. "cell_type": "markdown",
  2272. "metadata": {},
  2273. "source": [
  2274. "如果我们用兼容的形状进行数组的乘法,我们会得到每一行的对位相乘结果:"
  2275. ]
  2276. },
  2277. {
  2278. "cell_type": "code",
  2279. "execution_count": 99,
  2280. "metadata": {},
  2281. "outputs": [
  2282. {
  2283. "data": {
  2284. "text/plain": [
  2285. "((2, 3), (5,))"
  2286. ]
  2287. },
  2288. "execution_count": 99,
  2289. "metadata": {},
  2290. "output_type": "execute_result"
  2291. }
  2292. ],
  2293. "source": [
  2294. "A.shape, v1.shape"
  2295. ]
  2296. },
  2297. {
  2298. "cell_type": "code",
  2299. "execution_count": 100,
  2300. "metadata": {},
  2301. "outputs": [
  2302. {
  2303. "ename": "ValueError",
  2304. "evalue": "operands could not be broadcast together with shapes (2,3) (5,) ",
  2305. "output_type": "error",
  2306. "traceback": [
  2307. "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
  2308. "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
  2309. "\u001b[0;32m<ipython-input-100-1af134c5c5d0>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mA\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0mv1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
  2310. "\u001b[0;31mValueError\u001b[0m: operands could not be broadcast together with shapes (2,3) (5,) "
  2311. ]
  2312. }
  2313. ],
  2314. "source": [
  2315. "A * v1"
  2316. ]
  2317. },
  2318. {
  2319. "cell_type": "markdown",
  2320. "metadata": {},
  2321. "source": [
  2322. "### 矩阵代数"
  2323. ]
  2324. },
  2325. {
  2326. "cell_type": "markdown",
  2327. "metadata": {},
  2328. "source": [
  2329. "那么矩阵的乘法呢?有两种方法。我们可以使用点函数,它对两个参数应用矩阵-矩阵、矩阵-向量或内向量乘法"
  2330. ]
  2331. },
  2332. {
  2333. "cell_type": "code",
  2334. "execution_count": 102,
  2335. "metadata": {},
  2336. "outputs": [
  2337. {
  2338. "data": {
  2339. "text/plain": [
  2340. "array([[0.3767892 , 1.47079714, 0.31117826, 1.29726746, 0.51486767],\n",
  2341. " [0.25604237, 0.97247777, 0.34479677, 0.93969314, 0.3976715 ],\n",
  2342. " [0.81557228, 1.22841789, 0.86636095, 0.93499185, 0.28560187],\n",
  2343. " [0.52515694, 1.56792282, 1.1443364 , 1.84965072, 0.74141231],\n",
  2344. " [0.78004097, 1.51298694, 1.22023006, 1.42991218, 0.71648303]])"
  2345. ]
  2346. },
  2347. "execution_count": 102,
  2348. "metadata": {},
  2349. "output_type": "execute_result"
  2350. }
  2351. ],
  2352. "source": [
  2353. "A = np.random.rand(5, 5)\n",
  2354. "v = np.random.rand(5, 1)\n",
  2355. "\n",
  2356. "np.dot(A, A)"
  2357. ]
  2358. },
  2359. {
  2360. "cell_type": "code",
  2361. "execution_count": 107,
  2362. "metadata": {},
  2363. "outputs": [
  2364. {
  2365. "data": {
  2366. "text/plain": [
  2367. "array([3.03824466, 2.65209134, 2.94637897, 6.50153897, 5.54270391])"
  2368. ]
  2369. },
  2370. "execution_count": 107,
  2371. "metadata": {},
  2372. "output_type": "execute_result"
  2373. }
  2374. ],
  2375. "source": [
  2376. "np.dot(A, v1)"
  2377. ]
  2378. },
  2379. {
  2380. "cell_type": "code",
  2381. "execution_count": 108,
  2382. "metadata": {},
  2383. "outputs": [
  2384. {
  2385. "data": {
  2386. "text/plain": [
  2387. "30"
  2388. ]
  2389. },
  2390. "execution_count": 108,
  2391. "metadata": {},
  2392. "output_type": "execute_result"
  2393. }
  2394. ],
  2395. "source": [
  2396. "np.dot(v1, v1)"
  2397. ]
  2398. },
  2399. {
  2400. "cell_type": "markdown",
  2401. "metadata": {},
  2402. "source": [
  2403. "另外,我们可以将数组对象投到`matrix`类型上。这将改变标准算术运算符`+, -, *` 的行为,以使用矩阵代数。"
  2404. ]
  2405. },
  2406. {
  2407. "cell_type": "code",
  2408. "execution_count": 111,
  2409. "metadata": {},
  2410. "outputs": [],
  2411. "source": [
  2412. "M = np.matrix(A)\n",
  2413. "v = np.matrix(v1).T # make it a column vector"
  2414. ]
  2415. },
  2416. {
  2417. "cell_type": "code",
  2418. "execution_count": 112,
  2419. "metadata": {},
  2420. "outputs": [
  2421. {
  2422. "data": {
  2423. "text/plain": [
  2424. "matrix([[0],\n",
  2425. " [1],\n",
  2426. " [2],\n",
  2427. " [3],\n",
  2428. " [4]])"
  2429. ]
  2430. },
  2431. "execution_count": 112,
  2432. "metadata": {},
  2433. "output_type": "execute_result"
  2434. }
  2435. ],
  2436. "source": [
  2437. "v"
  2438. ]
  2439. },
  2440. {
  2441. "cell_type": "code",
  2442. "execution_count": 113,
  2443. "metadata": {},
  2444. "outputs": [
  2445. {
  2446. "data": {
  2447. "text/plain": [
  2448. "matrix([[0.3767892 , 1.47079714, 0.31117826, 1.29726746, 0.51486767],\n",
  2449. " [0.25604237, 0.97247777, 0.34479677, 0.93969314, 0.3976715 ],\n",
  2450. " [0.81557228, 1.22841789, 0.86636095, 0.93499185, 0.28560187],\n",
  2451. " [0.52515694, 1.56792282, 1.1443364 , 1.84965072, 0.74141231],\n",
  2452. " [0.78004097, 1.51298694, 1.22023006, 1.42991218, 0.71648303]])"
  2453. ]
  2454. },
  2455. "execution_count": 113,
  2456. "metadata": {},
  2457. "output_type": "execute_result"
  2458. }
  2459. ],
  2460. "source": [
  2461. "M * M"
  2462. ]
  2463. },
  2464. {
  2465. "cell_type": "code",
  2466. "execution_count": 114,
  2467. "metadata": {},
  2468. "outputs": [
  2469. {
  2470. "data": {
  2471. "text/plain": [
  2472. "matrix([[3.03824466],\n",
  2473. " [2.65209134],\n",
  2474. " [2.94637897],\n",
  2475. " [6.50153897],\n",
  2476. " [5.54270391]])"
  2477. ]
  2478. },
  2479. "execution_count": 114,
  2480. "metadata": {},
  2481. "output_type": "execute_result"
  2482. }
  2483. ],
  2484. "source": [
  2485. "M * v"
  2486. ]
  2487. },
  2488. {
  2489. "cell_type": "code",
  2490. "execution_count": 117,
  2491. "metadata": {},
  2492. "outputs": [
  2493. {
  2494. "data": {
  2495. "text/plain": [
  2496. "matrix([[30]])"
  2497. ]
  2498. },
  2499. "execution_count": 117,
  2500. "metadata": {},
  2501. "output_type": "execute_result"
  2502. }
  2503. ],
  2504. "source": [
  2505. "# 內积\n",
  2506. "v.T * v"
  2507. ]
  2508. },
  2509. {
  2510. "cell_type": "code",
  2511. "execution_count": 118,
  2512. "metadata": {},
  2513. "outputs": [
  2514. {
  2515. "data": {
  2516. "text/plain": [
  2517. "matrix([[3.03824466],\n",
  2518. " [3.65209134],\n",
  2519. " [4.94637897],\n",
  2520. " [9.50153897],\n",
  2521. " [9.54270391]])"
  2522. ]
  2523. },
  2524. "execution_count": 118,
  2525. "metadata": {},
  2526. "output_type": "execute_result"
  2527. }
  2528. ],
  2529. "source": [
  2530. "# 对于矩阵对象,适用标准的矩阵代数\n",
  2531. "v + M*v"
  2532. ]
  2533. },
  2534. {
  2535. "cell_type": "markdown",
  2536. "metadata": {},
  2537. "source": [
  2538. "如果我们尝试用不相配的矩阵形状加,减或者乘我们会得到错误:"
  2539. ]
  2540. },
  2541. {
  2542. "cell_type": "code",
  2543. "execution_count": 125,
  2544. "metadata": {},
  2545. "outputs": [],
  2546. "source": [
  2547. "v = np.matrix([1,2,3,4,5,6]).T"
  2548. ]
  2549. },
  2550. {
  2551. "cell_type": "code",
  2552. "execution_count": 123,
  2553. "metadata": {},
  2554. "outputs": [
  2555. {
  2556. "data": {
  2557. "text/plain": [
  2558. "((5, 5), (5, 1))"
  2559. ]
  2560. },
  2561. "execution_count": 123,
  2562. "metadata": {},
  2563. "output_type": "execute_result"
  2564. }
  2565. ],
  2566. "source": [
  2567. "np.shape(M), np.shape(v)"
  2568. ]
  2569. },
  2570. {
  2571. "cell_type": "code",
  2572. "execution_count": 124,
  2573. "metadata": {},
  2574. "outputs": [
  2575. {
  2576. "data": {
  2577. "text/plain": [
  2578. "matrix([[5.06458489],\n",
  2579. " [4.08471675],\n",
  2580. " [4.990684 ],\n",
  2581. " [9.17423165],\n",
  2582. " [8.08502244]])"
  2583. ]
  2584. },
  2585. "execution_count": 124,
  2586. "metadata": {},
  2587. "output_type": "execute_result"
  2588. }
  2589. ],
  2590. "source": [
  2591. "M * v"
  2592. ]
  2593. },
  2594. {
  2595. "cell_type": "markdown",
  2596. "metadata": {},
  2597. "source": [
  2598. "同样了解相关的函数:`inner`, `outer`, `cross`, `kron`, `tensordot`。例如用`help(kron)`。"
  2599. ]
  2600. },
  2601. {
  2602. "cell_type": "markdown",
  2603. "metadata": {},
  2604. "source": [
  2605. "### 数组/矩阵转换"
  2606. ]
  2607. },
  2608. {
  2609. "cell_type": "markdown",
  2610. "metadata": {},
  2611. "source": [
  2612. "同样我们也用`.T`对矩阵目标`v`进行转置。我们也可以利用`transpose`函数去实现同样的事情。\n",
  2613. "\n",
  2614. "变换矩阵对象的其他数学函数有:"
  2615. ]
  2616. },
  2617. {
  2618. "cell_type": "code",
  2619. "execution_count": 126,
  2620. "metadata": {},
  2621. "outputs": [
  2622. {
  2623. "name": "stdout",
  2624. "output_type": "stream",
  2625. "text": [
  2626. "[[0.04208911 0.65828119 0.21987187 0.10069326]\n",
  2627. " [0.61960112 0.52726045 0.35884175 0.51931613]\n",
  2628. " [0.66708619 0.76886997 0.06792093 0.6548313 ]]\n",
  2629. "[[0.04208911 0.61960112 0.66708619]\n",
  2630. " [0.65828119 0.52726045 0.76886997]\n",
  2631. " [0.21987187 0.35884175 0.06792093]\n",
  2632. " [0.10069326 0.51931613 0.6548313 ]]\n"
  2633. ]
  2634. }
  2635. ],
  2636. "source": [
  2637. "A = np.random.rand(3,4)\n",
  2638. "print(A)\n",
  2639. "print(A.T)"
  2640. ]
  2641. },
  2642. {
  2643. "cell_type": "code",
  2644. "execution_count": 127,
  2645. "metadata": {},
  2646. "outputs": [
  2647. {
  2648. "data": {
  2649. "text/plain": [
  2650. "matrix([[0.+1.j, 0.+2.j],\n",
  2651. " [0.+3.j, 0.+4.j]])"
  2652. ]
  2653. },
  2654. "execution_count": 127,
  2655. "metadata": {},
  2656. "output_type": "execute_result"
  2657. }
  2658. ],
  2659. "source": [
  2660. "C = np.matrix([[1j, 2j], [3j, 4j]])\n",
  2661. "C"
  2662. ]
  2663. },
  2664. {
  2665. "cell_type": "code",
  2666. "execution_count": 128,
  2667. "metadata": {},
  2668. "outputs": [
  2669. {
  2670. "data": {
  2671. "text/plain": [
  2672. "matrix([[0.-1.j, 0.-2.j],\n",
  2673. " [0.-3.j, 0.-4.j]])"
  2674. ]
  2675. },
  2676. "execution_count": 128,
  2677. "metadata": {},
  2678. "output_type": "execute_result"
  2679. }
  2680. ],
  2681. "source": [
  2682. "conjugate(C)"
  2683. ]
  2684. },
  2685. {
  2686. "cell_type": "markdown",
  2687. "metadata": {},
  2688. "source": [
  2689. "厄米共轭:转置+共轭"
  2690. ]
  2691. },
  2692. {
  2693. "cell_type": "code",
  2694. "execution_count": 129,
  2695. "metadata": {},
  2696. "outputs": [
  2697. {
  2698. "data": {
  2699. "text/plain": [
  2700. "matrix([[0.-1.j, 0.-3.j],\n",
  2701. " [0.-2.j, 0.-4.j]])"
  2702. ]
  2703. },
  2704. "execution_count": 129,
  2705. "metadata": {},
  2706. "output_type": "execute_result"
  2707. }
  2708. ],
  2709. "source": [
  2710. "C.H"
  2711. ]
  2712. },
  2713. {
  2714. "cell_type": "markdown",
  2715. "metadata": {},
  2716. "source": [
  2717. "我们可以将复数数组的实部和虚部提取出来并用`real`和`imag`来表示:"
  2718. ]
  2719. },
  2720. {
  2721. "cell_type": "code",
  2722. "execution_count": 130,
  2723. "metadata": {},
  2724. "outputs": [
  2725. {
  2726. "data": {
  2727. "text/plain": [
  2728. "matrix([[0., 0.],\n",
  2729. " [0., 0.]])"
  2730. ]
  2731. },
  2732. "execution_count": 130,
  2733. "metadata": {},
  2734. "output_type": "execute_result"
  2735. }
  2736. ],
  2737. "source": [
  2738. "real(C) # same as: C.real"
  2739. ]
  2740. },
  2741. {
  2742. "cell_type": "code",
  2743. "execution_count": 131,
  2744. "metadata": {},
  2745. "outputs": [
  2746. {
  2747. "data": {
  2748. "text/plain": [
  2749. "matrix([[1., 2.],\n",
  2750. " [3., 4.]])"
  2751. ]
  2752. },
  2753. "execution_count": 131,
  2754. "metadata": {},
  2755. "output_type": "execute_result"
  2756. }
  2757. ],
  2758. "source": [
  2759. "imag(C) # same as: C.imag"
  2760. ]
  2761. },
  2762. {
  2763. "cell_type": "markdown",
  2764. "metadata": {},
  2765. "source": [
  2766. "或者说复数和绝对值"
  2767. ]
  2768. },
  2769. {
  2770. "cell_type": "code",
  2771. "execution_count": 106,
  2772. "metadata": {},
  2773. "outputs": [
  2774. {
  2775. "data": {
  2776. "text/plain": [
  2777. "array([[ 0.78539816, 1.10714872],\n",
  2778. " [ 1.24904577, 1.32581766]])"
  2779. ]
  2780. },
  2781. "execution_count": 106,
  2782. "metadata": {},
  2783. "output_type": "execute_result"
  2784. }
  2785. ],
  2786. "source": [
  2787. "angle(C+1) # heads up MATLAB Users, angle is used instead of arg"
  2788. ]
  2789. },
  2790. {
  2791. "cell_type": "code",
  2792. "execution_count": 107,
  2793. "metadata": {},
  2794. "outputs": [
  2795. {
  2796. "data": {
  2797. "text/plain": [
  2798. "matrix([[ 1., 2.],\n",
  2799. " [ 3., 4.]])"
  2800. ]
  2801. },
  2802. "execution_count": 107,
  2803. "metadata": {},
  2804. "output_type": "execute_result"
  2805. }
  2806. ],
  2807. "source": [
  2808. "abs(C)"
  2809. ]
  2810. },
  2811. {
  2812. "cell_type": "markdown",
  2813. "metadata": {},
  2814. "source": [
  2815. "### 矩阵计算"
  2816. ]
  2817. },
  2818. {
  2819. "cell_type": "markdown",
  2820. "metadata": {},
  2821. "source": [
  2822. "#### 求逆"
  2823. ]
  2824. },
  2825. {
  2826. "cell_type": "code",
  2827. "execution_count": 132,
  2828. "metadata": {},
  2829. "outputs": [
  2830. {
  2831. "data": {
  2832. "text/plain": [
  2833. "matrix([[0.+2.j , 0.-1.j ],\n",
  2834. " [0.-1.5j, 0.+0.5j]])"
  2835. ]
  2836. },
  2837. "execution_count": 132,
  2838. "metadata": {},
  2839. "output_type": "execute_result"
  2840. }
  2841. ],
  2842. "source": [
  2843. "np.linalg.inv(C) # equivalent to C.I "
  2844. ]
  2845. },
  2846. {
  2847. "cell_type": "code",
  2848. "execution_count": 133,
  2849. "metadata": {},
  2850. "outputs": [
  2851. {
  2852. "data": {
  2853. "text/plain": [
  2854. "matrix([[1.00000000e+00+0.j, 0.00000000e+00+0.j],\n",
  2855. " [2.22044605e-16+0.j, 1.00000000e+00+0.j]])"
  2856. ]
  2857. },
  2858. "execution_count": 133,
  2859. "metadata": {},
  2860. "output_type": "execute_result"
  2861. }
  2862. ],
  2863. "source": [
  2864. "C.I * C"
  2865. ]
  2866. },
  2867. {
  2868. "cell_type": "markdown",
  2869. "metadata": {},
  2870. "source": [
  2871. "#### 行列式"
  2872. ]
  2873. },
  2874. {
  2875. "cell_type": "code",
  2876. "execution_count": 134,
  2877. "metadata": {},
  2878. "outputs": [
  2879. {
  2880. "data": {
  2881. "text/plain": [
  2882. "(2.0000000000000004+0j)"
  2883. ]
  2884. },
  2885. "execution_count": 134,
  2886. "metadata": {},
  2887. "output_type": "execute_result"
  2888. }
  2889. ],
  2890. "source": [
  2891. "np.linalg.det(C)"
  2892. ]
  2893. },
  2894. {
  2895. "cell_type": "code",
  2896. "execution_count": 135,
  2897. "metadata": {},
  2898. "outputs": [
  2899. {
  2900. "data": {
  2901. "text/plain": [
  2902. "(0.49999999999999967+0j)"
  2903. ]
  2904. },
  2905. "execution_count": 135,
  2906. "metadata": {},
  2907. "output_type": "execute_result"
  2908. }
  2909. ],
  2910. "source": [
  2911. "linalg.det(C.I)"
  2912. ]
  2913. },
  2914. {
  2915. "cell_type": "markdown",
  2916. "metadata": {},
  2917. "source": [
  2918. "### 数据处理"
  2919. ]
  2920. },
  2921. {
  2922. "cell_type": "markdown",
  2923. "metadata": {},
  2924. "source": [
  2925. "通常将数据集存储在Numpy数组中是非常有用的。Numpy提供了许多函数用于计算数组中数据集的统计。\n",
  2926. "\n",
  2927. "例如,让我们从上面使用的斯德哥尔摩温度数据集计算一些属性。"
  2928. ]
  2929. },
  2930. {
  2931. "cell_type": "code",
  2932. "execution_count": 136,
  2933. "metadata": {},
  2934. "outputs": [
  2935. {
  2936. "data": {
  2937. "text/plain": [
  2938. "(77431, 7)"
  2939. ]
  2940. },
  2941. "execution_count": 136,
  2942. "metadata": {},
  2943. "output_type": "execute_result"
  2944. }
  2945. ],
  2946. "source": [
  2947. "# 提醒一下,温度数据集存储在数据变量中:\n",
  2948. "np.shape(data)"
  2949. ]
  2950. },
  2951. {
  2952. "cell_type": "markdown",
  2953. "metadata": {},
  2954. "source": [
  2955. "#### mean"
  2956. ]
  2957. },
  2958. {
  2959. "cell_type": "code",
  2960. "execution_count": 88,
  2961. "metadata": {},
  2962. "outputs": [
  2963. {
  2964. "name": "stdout",
  2965. "output_type": "stream",
  2966. "text": [
  2967. "(77431, 7)\n"
  2968. ]
  2969. },
  2970. {
  2971. "data": {
  2972. "text/plain": [
  2973. "6.197109684751585"
  2974. ]
  2975. },
  2976. "execution_count": 88,
  2977. "metadata": {},
  2978. "output_type": "execute_result"
  2979. }
  2980. ],
  2981. "source": [
  2982. "# 温度数据在第三列中\n",
  2983. "print(data.shape)\n",
  2984. "np.mean(data[:,3])"
  2985. ]
  2986. },
  2987. {
  2988. "cell_type": "code",
  2989. "execution_count": 137,
  2990. "metadata": {},
  2991. "outputs": [
  2992. {
  2993. "data": {
  2994. "text/plain": [
  2995. "0.4764047026464162"
  2996. ]
  2997. },
  2998. "execution_count": 137,
  2999. "metadata": {},
  3000. "output_type": "execute_result"
  3001. }
  3002. ],
  3003. "source": [
  3004. "A = np.random.rand(4, 3)\n",
  3005. "np.mean(A)"
  3006. ]
  3007. },
  3008. {
  3009. "cell_type": "markdown",
  3010. "metadata": {},
  3011. "source": [
  3012. "在过去的200年里,斯德哥尔摩每天的平均气温大约是6.2 C。"
  3013. ]
  3014. },
  3015. {
  3016. "cell_type": "markdown",
  3017. "metadata": {},
  3018. "source": [
  3019. "#### 标准差和方差"
  3020. ]
  3021. },
  3022. {
  3023. "cell_type": "code",
  3024. "execution_count": 138,
  3025. "metadata": {},
  3026. "outputs": [
  3027. {
  3028. "data": {
  3029. "text/plain": [
  3030. "(8.282271621340573, 68.59602320966341)"
  3031. ]
  3032. },
  3033. "execution_count": 138,
  3034. "metadata": {},
  3035. "output_type": "execute_result"
  3036. }
  3037. ],
  3038. "source": [
  3039. "np.std(data[:,3]), np.var(data[:,3])"
  3040. ]
  3041. },
  3042. {
  3043. "cell_type": "markdown",
  3044. "metadata": {},
  3045. "source": [
  3046. "#### 最小值和最大值"
  3047. ]
  3048. },
  3049. {
  3050. "cell_type": "code",
  3051. "execution_count": 139,
  3052. "metadata": {},
  3053. "outputs": [
  3054. {
  3055. "data": {
  3056. "text/plain": [
  3057. "-25.8"
  3058. ]
  3059. },
  3060. "execution_count": 139,
  3061. "metadata": {},
  3062. "output_type": "execute_result"
  3063. }
  3064. ],
  3065. "source": [
  3066. "# 最低日平均温度\n",
  3067. "data[:,3].min()"
  3068. ]
  3069. },
  3070. {
  3071. "cell_type": "code",
  3072. "execution_count": 140,
  3073. "metadata": {},
  3074. "outputs": [
  3075. {
  3076. "data": {
  3077. "text/plain": [
  3078. "28.3"
  3079. ]
  3080. },
  3081. "execution_count": 140,
  3082. "metadata": {},
  3083. "output_type": "execute_result"
  3084. }
  3085. ],
  3086. "source": [
  3087. "# 最高日平均温度\n",
  3088. "data[:,3].max()"
  3089. ]
  3090. },
  3091. {
  3092. "cell_type": "markdown",
  3093. "metadata": {},
  3094. "source": [
  3095. "#### sum, prod, and trace"
  3096. ]
  3097. },
  3098. {
  3099. "cell_type": "code",
  3100. "execution_count": 141,
  3101. "metadata": {},
  3102. "outputs": [
  3103. {
  3104. "data": {
  3105. "text/plain": [
  3106. "array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])"
  3107. ]
  3108. },
  3109. "execution_count": 141,
  3110. "metadata": {},
  3111. "output_type": "execute_result"
  3112. }
  3113. ],
  3114. "source": [
  3115. "d = np.arange(0, 10)\n",
  3116. "d"
  3117. ]
  3118. },
  3119. {
  3120. "cell_type": "code",
  3121. "execution_count": 142,
  3122. "metadata": {},
  3123. "outputs": [
  3124. {
  3125. "data": {
  3126. "text/plain": [
  3127. "45"
  3128. ]
  3129. },
  3130. "execution_count": 142,
  3131. "metadata": {},
  3132. "output_type": "execute_result"
  3133. }
  3134. ],
  3135. "source": [
  3136. "# 将所有的元素相加\n",
  3137. "np.sum(d)"
  3138. ]
  3139. },
  3140. {
  3141. "cell_type": "code",
  3142. "execution_count": 143,
  3143. "metadata": {},
  3144. "outputs": [
  3145. {
  3146. "data": {
  3147. "text/plain": [
  3148. "3628800"
  3149. ]
  3150. },
  3151. "execution_count": 143,
  3152. "metadata": {},
  3153. "output_type": "execute_result"
  3154. }
  3155. ],
  3156. "source": [
  3157. "# 全元素积分\n",
  3158. "np.prod(d+1)"
  3159. ]
  3160. },
  3161. {
  3162. "cell_type": "code",
  3163. "execution_count": 144,
  3164. "metadata": {},
  3165. "outputs": [
  3166. {
  3167. "data": {
  3168. "text/plain": [
  3169. "array([ 0, 1, 3, 6, 10, 15, 21, 28, 36, 45])"
  3170. ]
  3171. },
  3172. "execution_count": 144,
  3173. "metadata": {},
  3174. "output_type": "execute_result"
  3175. }
  3176. ],
  3177. "source": [
  3178. "# 累计求和\n",
  3179. "np.cumsum(d)"
  3180. ]
  3181. },
  3182. {
  3183. "cell_type": "code",
  3184. "execution_count": 147,
  3185. "metadata": {},
  3186. "outputs": [
  3187. {
  3188. "data": {
  3189. "text/plain": [
  3190. "array([ 1, 2, 6, 24, 120, 720, 5040,\n",
  3191. " 40320, 362880, 3628800])"
  3192. ]
  3193. },
  3194. "execution_count": 147,
  3195. "metadata": {},
  3196. "output_type": "execute_result"
  3197. }
  3198. ],
  3199. "source": [
  3200. "# 累计成绩\n",
  3201. "np.cumprod(d+1)"
  3202. ]
  3203. },
  3204. {
  3205. "cell_type": "code",
  3206. "execution_count": 148,
  3207. "metadata": {},
  3208. "outputs": [
  3209. {
  3210. "data": {
  3211. "text/plain": [
  3212. "1.04879166276667"
  3213. ]
  3214. },
  3215. "execution_count": 148,
  3216. "metadata": {},
  3217. "output_type": "execute_result"
  3218. }
  3219. ],
  3220. "source": [
  3221. "# 计算对角线元素的和,和diag(A).sum()一样\n",
  3222. "np.trace(A)"
  3223. ]
  3224. },
  3225. {
  3226. "cell_type": "markdown",
  3227. "metadata": {},
  3228. "source": [
  3229. "### 数组子集的计算"
  3230. ]
  3231. },
  3232. {
  3233. "cell_type": "markdown",
  3234. "metadata": {},
  3235. "source": [
  3236. "我们可以使用索引、花式索引和从数组中提取数据的其他方法(如上所述)来计算数组中的数据子集。\n",
  3237. "\n",
  3238. "例如,让我们回到温度数据集:"
  3239. ]
  3240. },
  3241. {
  3242. "cell_type": "code",
  3243. "execution_count": 149,
  3244. "metadata": {},
  3245. "outputs": [
  3246. {
  3247. "name": "stdout",
  3248. "output_type": "stream",
  3249. "text": [
  3250. "1800 1 1 -6.1 -6.1 -6.1 1\r\n",
  3251. "1800 1 2 -15.4 -15.4 -15.4 1\r\n",
  3252. "1800 1 3 -15.0 -15.0 -15.0 1\r\n"
  3253. ]
  3254. }
  3255. ],
  3256. "source": [
  3257. "!head -n 3 stockholm_td_adj.dat"
  3258. ]
  3259. },
  3260. {
  3261. "cell_type": "markdown",
  3262. "metadata": {},
  3263. "source": [
  3264. "数据集的格式是:年,月,日,日平均气温,低,高,位置。\n",
  3265. "\n",
  3266. "如果我们对某个特定月份的平均温度感兴趣,比如二月,然后我们可以创建一个索引掩码,使用它来选择当月的数据:"
  3267. ]
  3268. },
  3269. {
  3270. "cell_type": "code",
  3271. "execution_count": 99,
  3272. "metadata": {},
  3273. "outputs": [
  3274. {
  3275. "data": {
  3276. "text/plain": [
  3277. "array([ 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.])"
  3278. ]
  3279. },
  3280. "execution_count": 99,
  3281. "metadata": {},
  3282. "output_type": "execute_result"
  3283. }
  3284. ],
  3285. "source": [
  3286. "np.unique(data[:,1]) # 列的值从1到12"
  3287. ]
  3288. },
  3289. {
  3290. "cell_type": "code",
  3291. "execution_count": 150,
  3292. "metadata": {},
  3293. "outputs": [
  3294. {
  3295. "name": "stdout",
  3296. "output_type": "stream",
  3297. "text": [
  3298. "[False False False ... False False False]\n"
  3299. ]
  3300. }
  3301. ],
  3302. "source": [
  3303. "mask_feb = data[:,1] == 2\n",
  3304. "print(mask_feb)"
  3305. ]
  3306. },
  3307. {
  3308. "cell_type": "code",
  3309. "execution_count": 151,
  3310. "metadata": {},
  3311. "outputs": [
  3312. {
  3313. "name": "stdout",
  3314. "output_type": "stream",
  3315. "text": [
  3316. "-3.212109570736596\n",
  3317. "5.090390768766271\n"
  3318. ]
  3319. }
  3320. ],
  3321. "source": [
  3322. "# 温度数据实在第三行\n",
  3323. "print(np.mean(data[mask_feb,3]))\n",
  3324. "print(np.std(data[mask_feb,3]))"
  3325. ]
  3326. },
  3327. {
  3328. "cell_type": "markdown",
  3329. "metadata": {},
  3330. "source": [
  3331. "有了这些工具,我们就有了非常强大的数据处理能力。例如,提取每年每个月的平均气温只需要几行代码:"
  3332. ]
  3333. },
  3334. {
  3335. "cell_type": "code",
  3336. "execution_count": 153,
  3337. "metadata": {},
  3338. "outputs": [
  3339. {
  3340. "data": {
  3341. "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAEKCAYAAAAfGVI8AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4wLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvqOYd8AAAEhtJREFUeJzt3X20ZXVdx/H3JyYTeQiNiQwcL7pYuIgQbRZpWKFGYZhUy8opjcrEInyoVjVZLfAfG1PyYdXSRiGfMRepYTOiRgE9mDooIagE0aBDyEMWkRUGfPvj7NE7E/fezb3n7H3v/b1fa511z/6dfff+7jV37uf+9m/v305VIUlq19eNXYAkaVwGgSQ1ziCQpMYZBJLUOINAkhpnEEhS4wwCSWqcQSBJjTMIJKlxG8YuoI/DDz+85ubmxi5DktaUq6666s6q2rjUemsiCObm5ti1a9fYZUjSmpLk5j7reWpIkhpnEEhS4wwCSWqcQSBJjTMIJKlxBoEkNc4gkKTGGQSS1Lg1cUOZtBbMbd0x9W3u3nb61Lcp7c8egSQ1ziCQpMYZBJLUOINAkhpnEEhS4wwCSWqcQSBJjTMIJKlxBoEkNc4gkKTGGQSS1DjnGpLWmGnPaeR8RrJHIEmNMwgkqXEGgSQ1ziCQpMYZBJLUOINAkho3syBIcmGS25NcO6/tvCS3JLm6e/3grPYvSepnlj2CtwCnPUD7a6rqxO61c4b7lyT1MLMgqKorgS/NavuSpOkYY4zgnCTXdKeOHj7C/iVJ8wwdBG8AHgucCNwKnL/QiknOSrIrya477rhjqPokqTmDBkFV3VZV91XV/cCbgJMWWXd7VW2uqs0bN24crkhJasygQZDkkfMWfwS4dqF1JUnDmNnso0kuAk4BDk+yBzgXOCXJiUABu4EXzmr/0l7Tnq0TnLFT68vMgqCqtjxA8wWz2p8kaXm8s1iSGmcQSFLjDAJJapxBIEmNMwgkqXEGgSQ1ziCQpMYZBJLUOINAkhpnEEhS4wwCSWqcQSBJjTMIJKlxBoEkNc4gkKTGGQSS1DiDQJIaZxBIUuMMAklqnEEgSY0zCCSpcQaBJDXOIJCkxhkEktQ4g0CSGrdhOd+U5M+r6pnTLkbS6jG3dcdUt7d72+lT3Z6mZ7k9ghdMtQpJ0mh69QiSPAR4HFDA9VV160yrkiQNZskgSHI68Ebgn4AARyd5YVV9cNbFSZJmr0+P4HzgqVV1I0CSxwI7AINAktaBPmMEd+8Ngc5NwN0zqkeSNLA+PYJdSXYC72EyRvBjwCeS/ChAVb13hvVJkmasTxA8FLgN+N5u+Q7gQOCHmASDQSBJa9iSQVBVPztEIZKkcfS5auho4EXA3Pz1q+pZsytLkjSUPqeG3g9cAHwAuH+25UiShtYnCP6nql4/80okSaPoEwSvS3Iu8GHgnr2NVfXJmVUlSRpMnyD4duB5wNP42qmh6pYlSWtcnyD4MeAxVfWVB7PhJBcCzwRur6rju7ZHAH/CZOB5N/DjVfVvD2a7kqTp6nNn8bXAYcvY9luA0/Zr2wpcVlXHAJd1y5KkEfXpERwGfC7JJ9h3jGDRy0er6sokc/s1nwGc0r1/K3A58Bv9SpUkzUKfIDh3ivs7Yt4U1l8EjpjitiVJy9DnzuIrkjwaOKaq/iLJw4ADVrrjqqoktdDnSc4CzgLYtGnTSncnSVrAkmMESV4AXAz8Udd0JJObzJbjtiSP7Lb7SOD2hVasqu1VtbmqNm/cuHGZu5MkLaXPYPEvAScD/wFQVTcA37zM/V0CnNm9PxP4s2VuR5I0JX2C4J75l44m2cDkPoJFJbkI+ChwbJI9SZ4PbANOTXID8H3dsiRpRH0Gi69I8jLgwCSnAmczmXdoUVW1ZYGPnv4g6tM6Nrd1x9S3uXvb6VPfprTe9ekRbGXyDIJPAy8EdlbVb820KknSYPr0CF5UVa8D3rS3IclLujZJ0hrXp0dw5gO0/cyU65AkjWTBHkGSLcBPAkcnuWTeR4cAX5p1YZKkYSx2aujvgFuBw4Hz57XfDVwzy6IkScNZMAiq6mbgZuDJw5UjSRpanzECSdI6ZhBIUuMMAklq3LKCIMl5U65DkjSS5fYIrppqFZKk0SwrCKpqybmGJElrw5JTTCR5/QM03wXsqiqnkZakNa5Pj+ChwInADd3rBOAo4PlJXjvD2iRJA+gz6dwJwMlVdR9AkjcAfw08hcmMpJKkNaxPj+DhwMHzlg8CHtEFwz0zqUqSNJg+PYLfA65OcjkQ4HuAVyQ5CPiLGdYmSRrAkkFQVRck2Qmc1DW9rKr+pXv/azOrTJI0iD5XDX0AeBdwSVV9efYlSZKG1GeM4NXAdwOfSXJxkmcneeiM65IkDaTPqaErmDzA/gDgacALgAuBQ2dcmyRpAH0Gi0lyIPBDwE8ATwTeOsuiJEnD6TNG8B4mA8WXAn8AXFFV98+6MEnSMPr0CC4Atuy9oUyStL70GSP4UJLjkxzHZLqJve1vm2llkqRB9Dk1dC5wCnAcsBN4BvA3gEEgSetAn1NDzwYeD3yqqn42yRHAO2ZblqQWzG3dMfVt7t52+tS3ud71uY/gv7vB4XuTHArcDjxqtmVJkobSp0ewK8lhwJuYPJnsP4GPzrQqSdJg+gwWn929fWOSS4FDq+qa2ZYlSRpKrxvK9qqq3TOqQ5I0kuU+vF6StE4YBJLUuCWDIMn5Sb5tiGIkScPr0yP4LLA9yceS/EKSb5x1UZKk4SwZBFX15qo6GfhpYA64Jsm7kjx11sVJkmav1xhB9yyCx3WvO4F/AH4lybtnWJskaQB95hp6DZNnEVwGvKKqPt599Mok18+yOEnS7PW5j+Aa4LcXeF7xSQ/QJklaQxYMgiRP7N7+A3Bskn0+r6pPVtVdy9lpkt3A3cB9wL1VtXk525EkrdxiPYLzF/msmDy/eCWeWlV3rnAbkqQVWjAIqsqrgiSpAX0fXv9dTC4d/er6K3xCWQEfTlLAH1XV9hVsS5K0An2uGno78Fjgaibn9GHyi3wlQfCUqrolyTcDH0nyuaq6cr/9ngWcBbBp06YV7EqStJg+PYLNwHFVVdPaaVXd0n29Pcn7mFx9dOV+62wHtgNs3rx5avuWJO2rzw1l1wLfMq0dJjkoySF73wPf3+1DkjSCxS4f/QCTU0CHAJ9J8nHgnr2fV9WzlrnPI4D3dZejbgDeVVWXLnNbkqQVWuzU0KtnscOqugl4/Cy2LUl68Ba7fPQKgCSvrKrfmP9ZklcCV8y4NknSAPqMEZz6AG3PmHYhkqRxLDZG8IvA2cBjksx/WP0hwN/NujBJ0jAWGyN4F/BB4HeBrfPa766qL820KknSYBYbI7gLuAvY0j2P4Ihu/YOTHFxVnx+oRknSDPW5s/gc4DzgNuD+rrmAE2ZXliRpKH3uLH4pcGxV/eusi9HqMbd1x1S3t3vb6VPdnqTp6XPV0BeYnCKSJK1DfXoENwGXJ9nBvncW//7MqpIkDaZPEHy+ez2ke0mS1pElg6CqXg6Q5OBu+T9nXZQkaThLjhEkOT7Jp4DrgOuSXJXk22ZfmiRpCH0Gi7cDv1JVj66qRwO/CrxptmVJkobSJwgOqqq/2rtQVZcDB82sIknSoHpdNZTkd4C3d8vPZXIlkSRpHejTI/g5YCPw3u61sWuTJK0Dfa4a+jfgxQPUIkkawWLTUF+y2Deu4FGVkqRVZLEewZOZTC9xEfAxIINUJEka1GJB8C1Mnk62BfhJYAdwUVVdN0RhkqRhLDhYXFX3VdWlVXUm8CTgRiZzDp0zWHWSpJlbdLA4yTcApzPpFcwBrwfeN/uyJElDWWyw+G3A8cBO4OVVde1gVUmSBrNYj+C5wJeBlwAvTr46VhygqurQGdcmSRrAYs8s7nOzmSStetN+4h6sr6fu+ctekhpnEEhS4wwCSWqcQSBJjTMIJKlxBoEkNc4gkKTGGQSS1DiDQJIaZxBIUuMMAklqnEEgSY0zCCSpcQaBJDVulCBIclqS65PcmGTrGDVIkiYGD4IkBwB/CDwDOA7YkuS4oeuQJE2M0SM4Cbixqm6qqq8A7wbOGKEOSRLjBMGRwBfmLe/p2iRJI0hVDbvD5NnAaVX1893y84DvrKpz9lvvLOAsgE2bNn3HzTffvKz9DfWIurW6n/X0uD1pbKvtkZhJrqqqzUutN0aP4BbgUfOWj+ra9lFV26tqc1Vt3rhx42DFSVJrxgiCTwDHJDk6yUOA5wCXjFCHJAnYMPQOq+reJOcAHwIOAC6squuGrkOSNDF4EABU1U5g5xj7liTtyzuLJalxBoEkNc4gkKTGjTJGoOXzun9J02aPQJIaZxBIUuMMAklqnGMEkjQla3UMzx6BJDXOIJCkxhkEktQ4g0CSGmcQSFLjDAJJapxBIEmNMwgkqXEGgSQ1ziCQpMYZBJLUOINAkhpnEEhS4wwCSWqcQSBJjTMIJKlx6/7BNGv1QRGSNBR7BJLUuHXfIxiKPQ9Ja5U9AklqnEEgSY0zCCSpcQaBJDXOIJCkxhkEktQ4g0CSGmcQSFLjDAJJalyqauwalpTkbuD6seuYksOBO8cuYorW0/Gsp2MBj2c1G+pYHl1VG5daaa1MMXF9VW0eu4hpSLJrvRwLrK/jWU/HAh7ParbajsVTQ5LUOINAkhq3VoJg+9gFTNF6OhZYX8ezno4FPJ7VbFUdy5oYLJYkzc5a6RFIkmZkVQdBktOSXJ/kxiRbx65nJZI8KslfJflMkuuSvGTsmlYqyQFJPpXkz8euZaWSHJbk4iSfS/LZJE8eu6aVSPLL3c/ZtUkuSvLQsWvqK8mFSW5Pcu28tkck+UiSG7qvDx+zxgdjgeN5Vfezdk2S9yU5bMwaV20QJDkA+EPgGcBxwJYkx41b1YrcC/xqVR0HPAn4pTV+PAAvAT47dhFT8jrg0qp6HPB41vBxJTkSeDGwuaqOBw4AnjNuVQ/KW4DT9mvbClxWVccAl3XLa8Vb+P/H8xHg+Ko6AfhH4DeHLmq+VRsEwEnAjVV1U1V9BXg3cMbINS1bVd1aVZ/s3t/N5BfNkeNWtXxJjgJOB948di0rleQbge8BLgCoqq9U1b+PW9WKbQAOTLIBeBjwLyPX01tVXQl8ab/mM4C3du/fCvzwoEWtwAMdT1V9uKru7Rb/Hjhq8MLmWc1BcCTwhXnLe1jDvzjnSzIHPAH42LiVrMhrgV8H7h+7kCk4GrgD+OPuVNebkxw0dlHLVVW3AK8GPg/cCtxVVR8et6oVO6Kqbu3efxE4YsxipuzngA+OWcBqDoJ1KcnBwJ8CL62q/xi7nuVI8kzg9qq6auxapmQD8ETgDVX1BODLrK1TD/vozp+fwSTgvhU4KMlzx61qempyqeO6uNwxyW8xOW38zjHrWM1BcAvwqHnLR3Vta1aSr2cSAu+sqveOXc8KnAw8K8luJqfsnpbkHeOWtCJ7gD1VtbeHdjGTYFirvg/456q6o6r+F3gv8F0j17RStyV5JED39faR61mxJD8DPBP4qRr5Ov7VHASfAI5JcnSShzAZ7Lpk5JqWLUmYnIP+bFX9/tj1rERV/WZVHVVVc0z+Xf6yqtbsX5xV9UXgC0mO7ZqeDnxmxJJW6vPAk5I8rPu5ezprePC7cwlwZvf+TODPRqxlxZKcxuTU6rOq6r/GrmfVBkE3kHIO8CEmP8Tvqarrxq1qRU4Gnsfkr+eru9cPjl2UvupFwDuTXAOcCLxi5HqWrevZXAx8Evg0k//nq+pO1sUkuQj4KHBskj1Jng9sA05NcgOTHs+2MWt8MBY4nj8ADgE+0v0ueOOoNXpnsSS1bdX2CCRJwzAIJKlxBoEkNc4gkKTGGQSS1DiDQAKS1Pyb4pJsSHLHcmdW7WYzPXve8inrYZZWrU8GgTTxZeD4JAd2y6eysjvZDwPOXnItaRUwCKSv2clkRlWALcBFez/o5sN/fzd//N8nOaFrP6+bb/7yJDcleXH3LduAx3Y3C72qazt43jMP3tnd9SuNziCQvubdwHO6h7icwL6zw74c+FQ3f/zLgLfN++xxwA8wmTr93G5Oqa3AP1XViVX1a916TwBeyuT5Go9hcre5NDqDQOpU1TXAHJPewM79Pn4K8PZuvb8EvinJod1nO6rqnqq6k8lkaAtNkfzxqtpTVfcDV3f7kka3YewCpFXmEiZz+Z8CfFPP77ln3vv7WPj/Vd/1pEHZI5D2dSHw8qr69H7tfw38FEyuAALuXOJ5EnczmVRMWvX8i0Sap6r2AK9/gI/OAy7sZif9L742JfJC2/nXJH/bPbD8g8COadcqTYuzj0pS4zw1JEmNMwgkqXEGgSQ1ziCQpMYZBJLUOINAkhpnEEhS4wwCSWrc/wHL2ncwPAAPTwAAAABJRU5ErkJggg==\n",
  3342. "text/plain": [
  3343. "<Figure size 432x288 with 1 Axes>"
  3344. ]
  3345. },
  3346. "metadata": {
  3347. "needs_background": "light"
  3348. },
  3349. "output_type": "display_data"
  3350. }
  3351. ],
  3352. "source": [
  3353. "months = np.arange(1,13)\n",
  3354. "monthly_mean = [np.mean(data[data[:,1] == month, 3]) for month in months]\n",
  3355. "\n",
  3356. "fig, ax = plt.subplots()\n",
  3357. "ax.bar(months, monthly_mean)\n",
  3358. "ax.set_xlabel(\"Month\")\n",
  3359. "ax.set_ylabel(\"Monthly avg. temp.\");"
  3360. ]
  3361. },
  3362. {
  3363. "cell_type": "markdown",
  3364. "metadata": {},
  3365. "source": [
  3366. "### 高维数据的计算"
  3367. ]
  3368. },
  3369. {
  3370. "cell_type": "markdown",
  3371. "metadata": {},
  3372. "source": [
  3373. "当例如`min`, `max`等函数应用在高维数组上时,有时将计算应用于整个数组是有用的,而且很多时候有时只基于行或列。用`axis`参数我们可以决定这个函数应该怎样表现:"
  3374. ]
  3375. },
  3376. {
  3377. "cell_type": "code",
  3378. "execution_count": 157,
  3379. "metadata": {},
  3380. "outputs": [
  3381. {
  3382. "data": {
  3383. "text/plain": [
  3384. "array([[0.99782852, 0.15992805, 0.31262638],\n",
  3385. " [0.51702607, 0.45658172, 0.66789036],\n",
  3386. " [0.77771351, 0.42574723, 0.14011317]])"
  3387. ]
  3388. },
  3389. "execution_count": 157,
  3390. "metadata": {},
  3391. "output_type": "execute_result"
  3392. }
  3393. ],
  3394. "source": [
  3395. "import numpy as np\n",
  3396. "\n",
  3397. "m = np.random.rand(3,3)\n",
  3398. "m"
  3399. ]
  3400. },
  3401. {
  3402. "cell_type": "code",
  3403. "execution_count": 158,
  3404. "metadata": {},
  3405. "outputs": [
  3406. {
  3407. "data": {
  3408. "text/plain": [
  3409. "0.997828517861979"
  3410. ]
  3411. },
  3412. "execution_count": 158,
  3413. "metadata": {},
  3414. "output_type": "execute_result"
  3415. }
  3416. ],
  3417. "source": [
  3418. "# global max\n",
  3419. "m.max()"
  3420. ]
  3421. },
  3422. {
  3423. "cell_type": "code",
  3424. "execution_count": 159,
  3425. "metadata": {},
  3426. "outputs": [
  3427. {
  3428. "data": {
  3429. "text/plain": [
  3430. "array([0.99782852, 0.45658172, 0.66789036])"
  3431. ]
  3432. },
  3433. "execution_count": 159,
  3434. "metadata": {},
  3435. "output_type": "execute_result"
  3436. }
  3437. ],
  3438. "source": [
  3439. "# max in each column\n",
  3440. "m.max(axis=0)"
  3441. ]
  3442. },
  3443. {
  3444. "cell_type": "code",
  3445. "execution_count": 160,
  3446. "metadata": {},
  3447. "outputs": [
  3448. {
  3449. "data": {
  3450. "text/plain": [
  3451. "array([0.99782852, 0.66789036, 0.77771351])"
  3452. ]
  3453. },
  3454. "execution_count": 160,
  3455. "metadata": {},
  3456. "output_type": "execute_result"
  3457. }
  3458. ],
  3459. "source": [
  3460. "# max in each row\n",
  3461. "m.max(axis=1)"
  3462. ]
  3463. },
  3464. {
  3465. "cell_type": "markdown",
  3466. "metadata": {},
  3467. "source": [
  3468. "许多其他的在`array` 和`matrix`类中的函数和方法接受同样(可选的)的关键字参数`axis`"
  3469. ]
  3470. },
  3471. {
  3472. "cell_type": "markdown",
  3473. "metadata": {},
  3474. "source": [
  3475. "## 阵列的重塑、调整大小和堆叠"
  3476. ]
  3477. },
  3478. {
  3479. "cell_type": "markdown",
  3480. "metadata": {},
  3481. "source": [
  3482. "Numpy数组的形状可以被确定而无需复制底层数据,这使得即使对于大型数组也能有较快的操作。"
  3483. ]
  3484. },
  3485. {
  3486. "cell_type": "code",
  3487. "execution_count": 162,
  3488. "metadata": {},
  3489. "outputs": [
  3490. {
  3491. "name": "stdout",
  3492. "output_type": "stream",
  3493. "text": [
  3494. "[[0.97579482 0.78668761 0.61373444]\n",
  3495. " [0.58850244 0.9784108 0.08465447]\n",
  3496. " [0.57262123 0.44795615 0.75564229]\n",
  3497. " [0.36770219 0.34095592 0.16259103]]\n"
  3498. ]
  3499. }
  3500. ],
  3501. "source": [
  3502. "import numpy as np\n",
  3503. "\n",
  3504. "A = np.random.rand(4, 3)\n",
  3505. "print(A)"
  3506. ]
  3507. },
  3508. {
  3509. "cell_type": "code",
  3510. "execution_count": 163,
  3511. "metadata": {},
  3512. "outputs": [
  3513. {
  3514. "name": "stdout",
  3515. "output_type": "stream",
  3516. "text": [
  3517. "4 3\n"
  3518. ]
  3519. }
  3520. ],
  3521. "source": [
  3522. "n, m = A.shape\n",
  3523. "print(n, m)"
  3524. ]
  3525. },
  3526. {
  3527. "cell_type": "code",
  3528. "execution_count": 166,
  3529. "metadata": {},
  3530. "outputs": [
  3531. {
  3532. "data": {
  3533. "text/plain": [
  3534. "array([[0.97579482, 0.78668761, 0.61373444, 0.58850244, 0.9784108 ,\n",
  3535. " 0.08465447, 0.57262123, 0.44795615, 0.75564229, 0.36770219,\n",
  3536. " 0.34095592, 0.16259103]])"
  3537. ]
  3538. },
  3539. "execution_count": 166,
  3540. "metadata": {},
  3541. "output_type": "execute_result"
  3542. }
  3543. ],
  3544. "source": [
  3545. "B = A.reshape((1,n*m))\n",
  3546. "B"
  3547. ]
  3548. },
  3549. {
  3550. "cell_type": "code",
  3551. "execution_count": 167,
  3552. "metadata": {},
  3553. "outputs": [
  3554. {
  3555. "name": "stdout",
  3556. "output_type": "stream",
  3557. "text": [
  3558. "[[0.97579482]\n",
  3559. " [0.78668761]\n",
  3560. " [0.61373444]\n",
  3561. " [0.58850244]\n",
  3562. " [0.9784108 ]\n",
  3563. " [0.08465447]\n",
  3564. " [0.57262123]\n",
  3565. " [0.44795615]\n",
  3566. " [0.75564229]\n",
  3567. " [0.36770219]\n",
  3568. " [0.34095592]\n",
  3569. " [0.16259103]]\n"
  3570. ]
  3571. }
  3572. ],
  3573. "source": [
  3574. "B2 = A.reshape((n*m, 1))\n",
  3575. "print(B2)"
  3576. ]
  3577. },
  3578. {
  3579. "cell_type": "code",
  3580. "execution_count": 168,
  3581. "metadata": {},
  3582. "outputs": [
  3583. {
  3584. "data": {
  3585. "text/plain": [
  3586. "array([[5. , 5. , 5. , 5. , 5. ,\n",
  3587. " 0.08465447, 0.57262123, 0.44795615, 0.75564229, 0.36770219,\n",
  3588. " 0.34095592, 0.16259103]])"
  3589. ]
  3590. },
  3591. "execution_count": 168,
  3592. "metadata": {},
  3593. "output_type": "execute_result"
  3594. }
  3595. ],
  3596. "source": [
  3597. "B[0,0:5] = 5 # modify the array\n",
  3598. "\n",
  3599. "B"
  3600. ]
  3601. },
  3602. {
  3603. "cell_type": "code",
  3604. "execution_count": 169,
  3605. "metadata": {},
  3606. "outputs": [
  3607. {
  3608. "data": {
  3609. "text/plain": [
  3610. "array([[5. , 5. , 5. ],\n",
  3611. " [5. , 5. , 0.08465447],\n",
  3612. " [0.57262123, 0.44795615, 0.75564229],\n",
  3613. " [0.36770219, 0.34095592, 0.16259103]])"
  3614. ]
  3615. },
  3616. "execution_count": 169,
  3617. "metadata": {},
  3618. "output_type": "execute_result"
  3619. }
  3620. ],
  3621. "source": [
  3622. "A # and the original variable is also changed. B is only a different view of the same data"
  3623. ]
  3624. },
  3625. {
  3626. "cell_type": "markdown",
  3627. "metadata": {},
  3628. "source": [
  3629. "We can also use the function `flatten` to make a higher-dimensional array into a vector. But this function create a copy of the data."
  3630. ]
  3631. },
  3632. {
  3633. "cell_type": "code",
  3634. "execution_count": 170,
  3635. "metadata": {},
  3636. "outputs": [
  3637. {
  3638. "data": {
  3639. "text/plain": [
  3640. "array([5. , 5. , 5. , 5. , 5. ,\n",
  3641. " 0.08465447, 0.57262123, 0.44795615, 0.75564229, 0.36770219,\n",
  3642. " 0.34095592, 0.16259103])"
  3643. ]
  3644. },
  3645. "execution_count": 170,
  3646. "metadata": {},
  3647. "output_type": "execute_result"
  3648. }
  3649. ],
  3650. "source": [
  3651. "B = A.flatten()\n",
  3652. "\n",
  3653. "B"
  3654. ]
  3655. },
  3656. {
  3657. "cell_type": "code",
  3658. "execution_count": 171,
  3659. "metadata": {},
  3660. "outputs": [
  3661. {
  3662. "name": "stdout",
  3663. "output_type": "stream",
  3664. "text": [
  3665. "(12,)\n"
  3666. ]
  3667. }
  3668. ],
  3669. "source": [
  3670. "print(B.shape)"
  3671. ]
  3672. },
  3673. {
  3674. "cell_type": "code",
  3675. "execution_count": 172,
  3676. "metadata": {},
  3677. "outputs": [
  3678. {
  3679. "name": "stdout",
  3680. "output_type": "stream",
  3681. "text": [
  3682. "[0.0643267 0.02070895 0.01127191 0.36318507 0.26309744 0.8332378\n",
  3683. " 0.79477743 0.52745619 0.35675021 0.55907373 0.18993756 0.15919449\n",
  3684. " 0.54789401 0.23186893 0.02898541 0.43545343 0.80684175 0.44014057\n",
  3685. " 0.05129167 0.95111801 0.40743132 0.57197596 0.6692788 0.80824496\n",
  3686. " 0.40301441 0.84369196 0.95294593 0.14876807 0.58005171 0.30849079\n",
  3687. " 0.27846197 0.01062528 0.62870079 0.6416306 0.76945123 0.39443503\n",
  3688. " 0.76619764 0.42833327 0.60720341 0.16246792 0.76067082 0.27134944\n",
  3689. " 0.36268568 0.78501742 0.36935191 0.43410334 0.10594888 0.12941728\n",
  3690. " 0.51760718 0.57260509 0.09756568 0.13216908 0.32918105 0.9338644\n",
  3691. " 0.71681907 0.58218819 0.58798528 0.81665138 0.73604797 0.91730101]\n"
  3692. ]
  3693. }
  3694. ],
  3695. "source": [
  3696. "T = np.random.rand(3, 4, 5)\n",
  3697. "T2 = T.flatten()\n",
  3698. "print(T2)"
  3699. ]
  3700. },
  3701. {
  3702. "cell_type": "code",
  3703. "execution_count": 176,
  3704. "metadata": {},
  3705. "outputs": [
  3706. {
  3707. "data": {
  3708. "text/plain": [
  3709. "array([10. , 10. , 10. , 10. , 10. ,\n",
  3710. " 0.08465447, 0.57262123, 0.44795615, 0.75564229, 0.36770219,\n",
  3711. " 0.34095592, 0.16259103])"
  3712. ]
  3713. },
  3714. "execution_count": 176,
  3715. "metadata": {},
  3716. "output_type": "execute_result"
  3717. }
  3718. ],
  3719. "source": [
  3720. "B[0:5] = 10\n",
  3721. "\n",
  3722. "B"
  3723. ]
  3724. },
  3725. {
  3726. "cell_type": "code",
  3727. "execution_count": 177,
  3728. "metadata": {},
  3729. "outputs": [
  3730. {
  3731. "data": {
  3732. "text/plain": [
  3733. "array([[5. , 5. , 5. ],\n",
  3734. " [5. , 5. , 0.08465447],\n",
  3735. " [0.57262123, 0.44795615, 0.75564229],\n",
  3736. " [0.36770219, 0.34095592, 0.16259103]])"
  3737. ]
  3738. },
  3739. "execution_count": 177,
  3740. "metadata": {},
  3741. "output_type": "execute_result"
  3742. }
  3743. ],
  3744. "source": [
  3745. "A # 现在A并没有改变,因为B的数值是A的复制,并不指向同样的值。"
  3746. ]
  3747. },
  3748. {
  3749. "cell_type": "markdown",
  3750. "metadata": {},
  3751. "source": [
  3752. "## 添加新的维度:newaxis"
  3753. ]
  3754. },
  3755. {
  3756. "cell_type": "markdown",
  3757. "metadata": {},
  3758. "source": [
  3759. "有了`newaxis`,我们可以在数组中插入新的维度,例如将一个向量转换为列或行矩阵:"
  3760. ]
  3761. },
  3762. {
  3763. "cell_type": "code",
  3764. "execution_count": 178,
  3765. "metadata": {},
  3766. "outputs": [],
  3767. "source": [
  3768. "v = np.array([1,2,3])"
  3769. ]
  3770. },
  3771. {
  3772. "cell_type": "code",
  3773. "execution_count": 179,
  3774. "metadata": {},
  3775. "outputs": [
  3776. {
  3777. "data": {
  3778. "text/plain": [
  3779. "(3,)"
  3780. ]
  3781. },
  3782. "execution_count": 179,
  3783. "metadata": {},
  3784. "output_type": "execute_result"
  3785. }
  3786. ],
  3787. "source": [
  3788. "np.shape(v)"
  3789. ]
  3790. },
  3791. {
  3792. "cell_type": "code",
  3793. "execution_count": 180,
  3794. "metadata": {},
  3795. "outputs": [
  3796. {
  3797. "name": "stdout",
  3798. "output_type": "stream",
  3799. "text": [
  3800. "[1 2 3]\n"
  3801. ]
  3802. }
  3803. ],
  3804. "source": [
  3805. "print(v)"
  3806. ]
  3807. },
  3808. {
  3809. "cell_type": "code",
  3810. "execution_count": 182,
  3811. "metadata": {},
  3812. "outputs": [
  3813. {
  3814. "name": "stdout",
  3815. "output_type": "stream",
  3816. "text": [
  3817. "(3, 1)\n"
  3818. ]
  3819. }
  3820. ],
  3821. "source": [
  3822. "v2 = v.reshape(3, 1)\n",
  3823. "print(v2.shape)"
  3824. ]
  3825. },
  3826. {
  3827. "cell_type": "code",
  3828. "execution_count": 190,
  3829. "metadata": {},
  3830. "outputs": [
  3831. {
  3832. "name": "stdout",
  3833. "output_type": "stream",
  3834. "text": [
  3835. "(3,)\n",
  3836. "(3, 1)\n"
  3837. ]
  3838. }
  3839. ],
  3840. "source": [
  3841. "# 做一个向量v的列矩阵\n",
  3842. "v2 = v[:, np.newaxis]\n",
  3843. "print(v.shape)\n",
  3844. "print(v2.shape)\n"
  3845. ]
  3846. },
  3847. {
  3848. "cell_type": "code",
  3849. "execution_count": 191,
  3850. "metadata": {},
  3851. "outputs": [
  3852. {
  3853. "data": {
  3854. "text/plain": [
  3855. "(3, 1)"
  3856. ]
  3857. },
  3858. "execution_count": 191,
  3859. "metadata": {},
  3860. "output_type": "execute_result"
  3861. }
  3862. ],
  3863. "source": [
  3864. "# 列矩阵\n",
  3865. "v[:,newaxis].shape"
  3866. ]
  3867. },
  3868. {
  3869. "cell_type": "code",
  3870. "execution_count": 144,
  3871. "metadata": {},
  3872. "outputs": [
  3873. {
  3874. "data": {
  3875. "text/plain": [
  3876. "(1, 3)"
  3877. ]
  3878. },
  3879. "execution_count": 144,
  3880. "metadata": {},
  3881. "output_type": "execute_result"
  3882. }
  3883. ],
  3884. "source": [
  3885. "# 行矩阵\n",
  3886. "v[newaxis,:].shape"
  3887. ]
  3888. },
  3889. {
  3890. "cell_type": "markdown",
  3891. "metadata": {},
  3892. "source": [
  3893. "## 叠加和重复数组"
  3894. ]
  3895. },
  3896. {
  3897. "cell_type": "markdown",
  3898. "metadata": {},
  3899. "source": [
  3900. "利用函数`repeat`, `tile`, `vstack`, `hstack`, 和`concatenate` 我们可以用较小的向量和矩阵来创建更大的向量和矩阵:"
  3901. ]
  3902. },
  3903. {
  3904. "cell_type": "markdown",
  3905. "metadata": {},
  3906. "source": [
  3907. "### tile and repeat"
  3908. ]
  3909. },
  3910. {
  3911. "cell_type": "code",
  3912. "execution_count": 192,
  3913. "metadata": {},
  3914. "outputs": [],
  3915. "source": [
  3916. "a = np.array([[1, 2], [3, 4]])"
  3917. ]
  3918. },
  3919. {
  3920. "cell_type": "code",
  3921. "execution_count": 194,
  3922. "metadata": {},
  3923. "outputs": [
  3924. {
  3925. "name": "stdout",
  3926. "output_type": "stream",
  3927. "text": [
  3928. "[[1 2]\n",
  3929. " [3 4]]\n"
  3930. ]
  3931. },
  3932. {
  3933. "data": {
  3934. "text/plain": [
  3935. "array([1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4])"
  3936. ]
  3937. },
  3938. "execution_count": 194,
  3939. "metadata": {},
  3940. "output_type": "execute_result"
  3941. }
  3942. ],
  3943. "source": [
  3944. "print(a)\n",
  3945. "\n",
  3946. "# 重复每一个元素三次\n",
  3947. "np.repeat(a, 3)"
  3948. ]
  3949. },
  3950. {
  3951. "cell_type": "code",
  3952. "execution_count": 195,
  3953. "metadata": {},
  3954. "outputs": [
  3955. {
  3956. "data": {
  3957. "text/plain": [
  3958. "array([[1, 2, 1, 2, 1, 2],\n",
  3959. " [3, 4, 3, 4, 3, 4]])"
  3960. ]
  3961. },
  3962. "execution_count": 195,
  3963. "metadata": {},
  3964. "output_type": "execute_result"
  3965. }
  3966. ],
  3967. "source": [
  3968. "# tile the matrix 3 times \n",
  3969. "np.tile(a, 3)"
  3970. ]
  3971. },
  3972. {
  3973. "cell_type": "code",
  3974. "execution_count": 196,
  3975. "metadata": {},
  3976. "outputs": [
  3977. {
  3978. "data": {
  3979. "text/plain": [
  3980. "array([[1, 2, 1, 2, 1, 2],\n",
  3981. " [3, 4, 3, 4, 3, 4]])"
  3982. ]
  3983. },
  3984. "execution_count": 196,
  3985. "metadata": {},
  3986. "output_type": "execute_result"
  3987. }
  3988. ],
  3989. "source": [
  3990. "# 更好的方案\n",
  3991. "np.tile(a, (1, 3))"
  3992. ]
  3993. },
  3994. {
  3995. "cell_type": "code",
  3996. "execution_count": 34,
  3997. "metadata": {},
  3998. "outputs": [
  3999. {
  4000. "data": {
  4001. "text/plain": [
  4002. "array([[1, 2],\n",
  4003. " [3, 4],\n",
  4004. " [1, 2],\n",
  4005. " [3, 4],\n",
  4006. " [1, 2],\n",
  4007. " [3, 4]])"
  4008. ]
  4009. },
  4010. "execution_count": 34,
  4011. "metadata": {},
  4012. "output_type": "execute_result"
  4013. }
  4014. ],
  4015. "source": [
  4016. "np.tile(a, (3, 1))"
  4017. ]
  4018. },
  4019. {
  4020. "cell_type": "markdown",
  4021. "metadata": {},
  4022. "source": [
  4023. "### concatenate"
  4024. ]
  4025. },
  4026. {
  4027. "cell_type": "code",
  4028. "execution_count": 197,
  4029. "metadata": {},
  4030. "outputs": [],
  4031. "source": [
  4032. "b = np.array([[5, 6]])"
  4033. ]
  4034. },
  4035. {
  4036. "cell_type": "code",
  4037. "execution_count": 198,
  4038. "metadata": {},
  4039. "outputs": [
  4040. {
  4041. "data": {
  4042. "text/plain": [
  4043. "array([[1, 2],\n",
  4044. " [3, 4],\n",
  4045. " [5, 6]])"
  4046. ]
  4047. },
  4048. "execution_count": 198,
  4049. "metadata": {},
  4050. "output_type": "execute_result"
  4051. }
  4052. ],
  4053. "source": [
  4054. "np.concatenate((a, b), axis=0)"
  4055. ]
  4056. },
  4057. {
  4058. "cell_type": "code",
  4059. "execution_count": 200,
  4060. "metadata": {},
  4061. "outputs": [
  4062. {
  4063. "data": {
  4064. "text/plain": [
  4065. "array([[1, 2, 5],\n",
  4066. " [3, 4, 6]])"
  4067. ]
  4068. },
  4069. "execution_count": 200,
  4070. "metadata": {},
  4071. "output_type": "execute_result"
  4072. }
  4073. ],
  4074. "source": [
  4075. "np.concatenate((a, b.T), axis=1)"
  4076. ]
  4077. },
  4078. {
  4079. "cell_type": "markdown",
  4080. "metadata": {},
  4081. "source": [
  4082. "### hstack and vstack"
  4083. ]
  4084. },
  4085. {
  4086. "cell_type": "code",
  4087. "execution_count": 201,
  4088. "metadata": {},
  4089. "outputs": [
  4090. {
  4091. "data": {
  4092. "text/plain": [
  4093. "array([[1, 2],\n",
  4094. " [3, 4],\n",
  4095. " [5, 6]])"
  4096. ]
  4097. },
  4098. "execution_count": 201,
  4099. "metadata": {},
  4100. "output_type": "execute_result"
  4101. }
  4102. ],
  4103. "source": [
  4104. "np.vstack((a,b))"
  4105. ]
  4106. },
  4107. {
  4108. "cell_type": "code",
  4109. "execution_count": 202,
  4110. "metadata": {},
  4111. "outputs": [
  4112. {
  4113. "data": {
  4114. "text/plain": [
  4115. "array([[1, 2, 5],\n",
  4116. " [3, 4, 6]])"
  4117. ]
  4118. },
  4119. "execution_count": 202,
  4120. "metadata": {},
  4121. "output_type": "execute_result"
  4122. }
  4123. ],
  4124. "source": [
  4125. "np.hstack((a,b.T))"
  4126. ]
  4127. },
  4128. {
  4129. "cell_type": "markdown",
  4130. "metadata": {},
  4131. "source": [
  4132. "## 复制和“深度复制”"
  4133. ]
  4134. },
  4135. {
  4136. "cell_type": "markdown",
  4137. "metadata": {},
  4138. "source": [
  4139. "为了获得高性能,Python中的赋值通常不复制底层对象。例如,在函数之间传递对象时,这一点非常重要,以避免不必要时大量的内存复制(技术术语:通过引用传递)。"
  4140. ]
  4141. },
  4142. {
  4143. "cell_type": "code",
  4144. "execution_count": 203,
  4145. "metadata": {},
  4146. "outputs": [
  4147. {
  4148. "data": {
  4149. "text/plain": [
  4150. "array([[1, 2],\n",
  4151. " [3, 4]])"
  4152. ]
  4153. },
  4154. "execution_count": 203,
  4155. "metadata": {},
  4156. "output_type": "execute_result"
  4157. }
  4158. ],
  4159. "source": [
  4160. "A = np.array([[1, 2], [3, 4]])\n",
  4161. "\n",
  4162. "A"
  4163. ]
  4164. },
  4165. {
  4166. "cell_type": "code",
  4167. "execution_count": 204,
  4168. "metadata": {},
  4169. "outputs": [],
  4170. "source": [
  4171. "# 现在B和A指的是同一个数组数据\n",
  4172. "B = A "
  4173. ]
  4174. },
  4175. {
  4176. "cell_type": "code",
  4177. "execution_count": 205,
  4178. "metadata": {},
  4179. "outputs": [
  4180. {
  4181. "data": {
  4182. "text/plain": [
  4183. "array([[10, 2],\n",
  4184. " [ 3, 4]])"
  4185. ]
  4186. },
  4187. "execution_count": 205,
  4188. "metadata": {},
  4189. "output_type": "execute_result"
  4190. }
  4191. ],
  4192. "source": [
  4193. "# 改变B影响A\n",
  4194. "B[0,0] = 10\n",
  4195. "\n",
  4196. "B"
  4197. ]
  4198. },
  4199. {
  4200. "cell_type": "code",
  4201. "execution_count": 206,
  4202. "metadata": {},
  4203. "outputs": [
  4204. {
  4205. "data": {
  4206. "text/plain": [
  4207. "array([[10, 2],\n",
  4208. " [ 3, 4]])"
  4209. ]
  4210. },
  4211. "execution_count": 206,
  4212. "metadata": {},
  4213. "output_type": "execute_result"
  4214. }
  4215. ],
  4216. "source": [
  4217. "A"
  4218. ]
  4219. },
  4220. {
  4221. "cell_type": "markdown",
  4222. "metadata": {},
  4223. "source": [
  4224. "如果我们想避免这种行为,那么当我们从`A`中复制一个新的完全独立的对象`B`时,我们需要使用函数`copy`来做一个所谓的“深度复制”:"
  4225. ]
  4226. },
  4227. {
  4228. "cell_type": "code",
  4229. "execution_count": 207,
  4230. "metadata": {},
  4231. "outputs": [],
  4232. "source": [
  4233. "B = np.copy(A)"
  4234. ]
  4235. },
  4236. {
  4237. "cell_type": "code",
  4238. "execution_count": 208,
  4239. "metadata": {},
  4240. "outputs": [
  4241. {
  4242. "data": {
  4243. "text/plain": [
  4244. "array([[-5, 2],\n",
  4245. " [ 3, 4]])"
  4246. ]
  4247. },
  4248. "execution_count": 208,
  4249. "metadata": {},
  4250. "output_type": "execute_result"
  4251. }
  4252. ],
  4253. "source": [
  4254. "# 现在如果我们改变B,A不受影响\n",
  4255. "B[0,0] = -5\n",
  4256. "\n",
  4257. "B"
  4258. ]
  4259. },
  4260. {
  4261. "cell_type": "code",
  4262. "execution_count": 209,
  4263. "metadata": {},
  4264. "outputs": [
  4265. {
  4266. "data": {
  4267. "text/plain": [
  4268. "array([[10, 2],\n",
  4269. " [ 3, 4]])"
  4270. ]
  4271. },
  4272. "execution_count": 209,
  4273. "metadata": {},
  4274. "output_type": "execute_result"
  4275. }
  4276. ],
  4277. "source": [
  4278. "A"
  4279. ]
  4280. },
  4281. {
  4282. "cell_type": "markdown",
  4283. "metadata": {},
  4284. "source": [
  4285. "## 遍历数组元素"
  4286. ]
  4287. },
  4288. {
  4289. "cell_type": "markdown",
  4290. "metadata": {},
  4291. "source": [
  4292. "通常,我们希望尽可能避免遍历数组元素(不惜一切代价)。原因是在像Python(或MATLAB)这样的解释语言中,迭代与向量化操作相比真的很慢。\n",
  4293. "\n",
  4294. "然而,有时迭代是不可避免的。对于这种情况,Python的For循环是最方便的遍历数组的方法:"
  4295. ]
  4296. },
  4297. {
  4298. "cell_type": "code",
  4299. "execution_count": 210,
  4300. "metadata": {},
  4301. "outputs": [
  4302. {
  4303. "name": "stdout",
  4304. "output_type": "stream",
  4305. "text": [
  4306. "1\n",
  4307. "2\n",
  4308. "3\n",
  4309. "4\n"
  4310. ]
  4311. }
  4312. ],
  4313. "source": [
  4314. "v = np.array([1,2,3,4])\n",
  4315. "\n",
  4316. "for element in v:\n",
  4317. " print(element)"
  4318. ]
  4319. },
  4320. {
  4321. "cell_type": "code",
  4322. "execution_count": 211,
  4323. "metadata": {},
  4324. "outputs": [
  4325. {
  4326. "name": "stdout",
  4327. "output_type": "stream",
  4328. "text": [
  4329. "row [1 2]\n",
  4330. "1\n",
  4331. "2\n",
  4332. "row [3 4]\n",
  4333. "3\n",
  4334. "4\n"
  4335. ]
  4336. }
  4337. ],
  4338. "source": [
  4339. "M = np.array([[1,2], [3,4]])\n",
  4340. "\n",
  4341. "for row in M:\n",
  4342. " print(\"row\", row)\n",
  4343. " \n",
  4344. " for element in row:\n",
  4345. " print(element)"
  4346. ]
  4347. },
  4348. {
  4349. "cell_type": "markdown",
  4350. "metadata": {},
  4351. "source": [
  4352. "当我们需要去\n",
  4353. "当我们需要遍历一个数组的每个元素并修改它的元素时,使用`enumerate`函数可以方便地在`for`循环中获得元素及其索引:"
  4354. ]
  4355. },
  4356. {
  4357. "cell_type": "code",
  4358. "execution_count": 162,
  4359. "metadata": {},
  4360. "outputs": [
  4361. {
  4362. "name": "stdout",
  4363. "output_type": "stream",
  4364. "text": [
  4365. "('row_idx', 0, 'row', array([1, 2]))\n",
  4366. "('col_idx', 0, 'element', 1)\n",
  4367. "('col_idx', 1, 'element', 2)\n",
  4368. "('row_idx', 1, 'row', array([3, 4]))\n",
  4369. "('col_idx', 0, 'element', 3)\n",
  4370. "('col_idx', 1, 'element', 4)\n"
  4371. ]
  4372. }
  4373. ],
  4374. "source": [
  4375. "for row_idx, row in enumerate(M):\n",
  4376. " print(\"row_idx\", row_idx, \"row\", row)\n",
  4377. " \n",
  4378. " for col_idx, element in enumerate(row):\n",
  4379. " print(\"col_idx\", col_idx, \"element\", element)\n",
  4380. " \n",
  4381. " # update the matrix M: square each element\n",
  4382. " M[row_idx, col_idx] = element ** 2"
  4383. ]
  4384. },
  4385. {
  4386. "cell_type": "code",
  4387. "execution_count": 163,
  4388. "metadata": {},
  4389. "outputs": [
  4390. {
  4391. "data": {
  4392. "text/plain": [
  4393. "array([[ 1, 4],\n",
  4394. " [ 9, 16]])"
  4395. ]
  4396. },
  4397. "execution_count": 163,
  4398. "metadata": {},
  4399. "output_type": "execute_result"
  4400. }
  4401. ],
  4402. "source": [
  4403. "# each element in M is now squared\n",
  4404. "M"
  4405. ]
  4406. },
  4407. {
  4408. "cell_type": "markdown",
  4409. "metadata": {},
  4410. "source": [
  4411. "## Vectorizing functions"
  4412. ]
  4413. },
  4414. {
  4415. "cell_type": "markdown",
  4416. "metadata": {},
  4417. "source": [
  4418. "As mentioned several times by now, to get good performance we should try to avoid looping over elements in our vectors and matrices, and instead use vectorized algorithms. The first step in converting a scalar algorithm to a vectorized algorithm is to make sure that the functions we write work with vector inputs."
  4419. ]
  4420. },
  4421. {
  4422. "cell_type": "code",
  4423. "execution_count": 213,
  4424. "metadata": {},
  4425. "outputs": [],
  4426. "source": [
  4427. "def Theta(x):\n",
  4428. " \"\"\"\n",
  4429. " Scalar implemenation of the Heaviside step function.\n",
  4430. " \"\"\"\n",
  4431. " if x >= 0:\n",
  4432. " return 1\n",
  4433. " else:\n",
  4434. " return 0"
  4435. ]
  4436. },
  4437. {
  4438. "cell_type": "code",
  4439. "execution_count": 214,
  4440. "metadata": {},
  4441. "outputs": [
  4442. {
  4443. "ename": "ValueError",
  4444. "evalue": "The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()",
  4445. "output_type": "error",
  4446. "traceback": [
  4447. "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
  4448. "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
  4449. "\u001b[0;32m<ipython-input-214-2cb2062a7e18>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mTheta\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
  4450. "\u001b[0;32m<ipython-input-213-f72d7f42be84>\u001b[0m in \u001b[0;36mTheta\u001b[0;34m(x)\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mScalar\u001b[0m \u001b[0mimplemenation\u001b[0m \u001b[0mof\u001b[0m \u001b[0mthe\u001b[0m \u001b[0mHeaviside\u001b[0m \u001b[0mstep\u001b[0m \u001b[0mfunction\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \"\"\"\n\u001b[0;32m----> 5\u001b[0;31m \u001b[0;32mif\u001b[0m \u001b[0mx\u001b[0m \u001b[0;34m>=\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 6\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
  4451. "\u001b[0;31mValueError\u001b[0m: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()"
  4452. ]
  4453. }
  4454. ],
  4455. "source": [
  4456. "Theta(array([-3,-2,-1,0,1,2,3]))"
  4457. ]
  4458. },
  4459. {
  4460. "cell_type": "markdown",
  4461. "metadata": {},
  4462. "source": [
  4463. "OK, that didn't work because we didn't write the `Theta` function so that it can handle a vector input... \n",
  4464. "\n",
  4465. "To get a vectorized version of Theta we can use the Numpy function `vectorize`. In many cases it can automatically vectorize a function:"
  4466. ]
  4467. },
  4468. {
  4469. "cell_type": "code",
  4470. "execution_count": 215,
  4471. "metadata": {},
  4472. "outputs": [],
  4473. "source": [
  4474. "Theta_vec = np.vectorize(Theta)"
  4475. ]
  4476. },
  4477. {
  4478. "cell_type": "code",
  4479. "execution_count": 216,
  4480. "metadata": {},
  4481. "outputs": [
  4482. {
  4483. "data": {
  4484. "text/plain": [
  4485. "array([0, 0, 0, 1, 1, 1, 1])"
  4486. ]
  4487. },
  4488. "execution_count": 216,
  4489. "metadata": {},
  4490. "output_type": "execute_result"
  4491. }
  4492. ],
  4493. "source": [
  4494. "Theta_vec(np.array([-3,-2,-1,0,1,2,3]))"
  4495. ]
  4496. },
  4497. {
  4498. "cell_type": "markdown",
  4499. "metadata": {},
  4500. "source": [
  4501. "We can also implement the function to accept a vector input from the beginning (requires more effort but might give better performance):"
  4502. ]
  4503. },
  4504. {
  4505. "cell_type": "code",
  4506. "execution_count": 217,
  4507. "metadata": {},
  4508. "outputs": [],
  4509. "source": [
  4510. "def Theta(x):\n",
  4511. " \"\"\"\n",
  4512. " Vector-aware implemenation of the Heaviside step function.\n",
  4513. " \"\"\"\n",
  4514. " return 1 * (x >= 0)"
  4515. ]
  4516. },
  4517. {
  4518. "cell_type": "code",
  4519. "execution_count": 219,
  4520. "metadata": {},
  4521. "outputs": [
  4522. {
  4523. "data": {
  4524. "text/plain": [
  4525. "array([0, 0, 0, 1, 1, 1, 1])"
  4526. ]
  4527. },
  4528. "execution_count": 219,
  4529. "metadata": {},
  4530. "output_type": "execute_result"
  4531. }
  4532. ],
  4533. "source": [
  4534. "Theta(np.array([-3,-2,-1,0,1,2,3]))"
  4535. ]
  4536. },
  4537. {
  4538. "cell_type": "code",
  4539. "execution_count": 221,
  4540. "metadata": {},
  4541. "outputs": [
  4542. {
  4543. "name": "stdout",
  4544. "output_type": "stream",
  4545. "text": [
  4546. "[False False False True True True True]\n"
  4547. ]
  4548. },
  4549. {
  4550. "data": {
  4551. "text/plain": [
  4552. "array([0, 0, 0, 1, 1, 1, 1])"
  4553. ]
  4554. },
  4555. "execution_count": 221,
  4556. "metadata": {},
  4557. "output_type": "execute_result"
  4558. }
  4559. ],
  4560. "source": [
  4561. "a = np.array([-3,-2,-1,0,1,2,3])\n",
  4562. "b = a>=0\n",
  4563. "print(b)\n",
  4564. "b*1"
  4565. ]
  4566. },
  4567. {
  4568. "cell_type": "code",
  4569. "execution_count": 222,
  4570. "metadata": {},
  4571. "outputs": [
  4572. {
  4573. "data": {
  4574. "text/plain": [
  4575. "(0, 1)"
  4576. ]
  4577. },
  4578. "execution_count": 222,
  4579. "metadata": {},
  4580. "output_type": "execute_result"
  4581. }
  4582. ],
  4583. "source": [
  4584. "# still works for scalars as well\n",
  4585. "Theta(-1.2), Theta(2.6)"
  4586. ]
  4587. },
  4588. {
  4589. "cell_type": "markdown",
  4590. "metadata": {},
  4591. "source": [
  4592. "## Using arrays in conditions"
  4593. ]
  4594. },
  4595. {
  4596. "cell_type": "markdown",
  4597. "metadata": {},
  4598. "source": [
  4599. "When using arrays in conditions,for example `if` statements and other boolean expressions, one needs to use `any` or `all`, which requires that any or all elements in the array evalutes to `True`:"
  4600. ]
  4601. },
  4602. {
  4603. "cell_type": "code",
  4604. "execution_count": 223,
  4605. "metadata": {},
  4606. "outputs": [
  4607. {
  4608. "data": {
  4609. "text/plain": [
  4610. "array([[1, 2],\n",
  4611. " [3, 4]])"
  4612. ]
  4613. },
  4614. "execution_count": 223,
  4615. "metadata": {},
  4616. "output_type": "execute_result"
  4617. }
  4618. ],
  4619. "source": [
  4620. "M = np.array([[1, 2], [3, 4]])\n",
  4621. "M"
  4622. ]
  4623. },
  4624. {
  4625. "cell_type": "code",
  4626. "execution_count": 224,
  4627. "metadata": {},
  4628. "outputs": [
  4629. {
  4630. "data": {
  4631. "text/plain": [
  4632. "True"
  4633. ]
  4634. },
  4635. "execution_count": 224,
  4636. "metadata": {},
  4637. "output_type": "execute_result"
  4638. }
  4639. ],
  4640. "source": [
  4641. "(M > 2).any()"
  4642. ]
  4643. },
  4644. {
  4645. "cell_type": "code",
  4646. "execution_count": 225,
  4647. "metadata": {},
  4648. "outputs": [
  4649. {
  4650. "name": "stdout",
  4651. "output_type": "stream",
  4652. "text": [
  4653. "at least one element in M is larger than 2\n"
  4654. ]
  4655. }
  4656. ],
  4657. "source": [
  4658. "if (M > 2).any():\n",
  4659. " print(\"at least one element in M is larger than 2\")\n",
  4660. "else:\n",
  4661. " print(\"no element in M is larger than 2\")"
  4662. ]
  4663. },
  4664. {
  4665. "cell_type": "code",
  4666. "execution_count": 226,
  4667. "metadata": {},
  4668. "outputs": [
  4669. {
  4670. "name": "stdout",
  4671. "output_type": "stream",
  4672. "text": [
  4673. "all elements in M are not larger than 5\n"
  4674. ]
  4675. }
  4676. ],
  4677. "source": [
  4678. "if (M > 5).all():\n",
  4679. " print(\"all elements in M are larger than 5\")\n",
  4680. "else:\n",
  4681. " print(\"all elements in M are not larger than 5\")"
  4682. ]
  4683. },
  4684. {
  4685. "cell_type": "markdown",
  4686. "metadata": {},
  4687. "source": [
  4688. "## Type casting"
  4689. ]
  4690. },
  4691. {
  4692. "cell_type": "markdown",
  4693. "metadata": {},
  4694. "source": [
  4695. "Since Numpy arrays are *statically typed*, the type of an array does not change once created. But we can explicitly cast an array of some type to another using the `astype` functions (see also the similar `asarray` function). This always create a new array of new type:"
  4696. ]
  4697. },
  4698. {
  4699. "cell_type": "code",
  4700. "execution_count": 227,
  4701. "metadata": {},
  4702. "outputs": [
  4703. {
  4704. "data": {
  4705. "text/plain": [
  4706. "dtype('int64')"
  4707. ]
  4708. },
  4709. "execution_count": 227,
  4710. "metadata": {},
  4711. "output_type": "execute_result"
  4712. }
  4713. ],
  4714. "source": [
  4715. "M.dtype"
  4716. ]
  4717. },
  4718. {
  4719. "cell_type": "code",
  4720. "execution_count": 228,
  4721. "metadata": {},
  4722. "outputs": [
  4723. {
  4724. "data": {
  4725. "text/plain": [
  4726. "array([[1., 2.],\n",
  4727. " [3., 4.]])"
  4728. ]
  4729. },
  4730. "execution_count": 228,
  4731. "metadata": {},
  4732. "output_type": "execute_result"
  4733. }
  4734. ],
  4735. "source": [
  4736. "M2 = M.astype(float)\n",
  4737. "\n",
  4738. "M2"
  4739. ]
  4740. },
  4741. {
  4742. "cell_type": "code",
  4743. "execution_count": 229,
  4744. "metadata": {},
  4745. "outputs": [
  4746. {
  4747. "data": {
  4748. "text/plain": [
  4749. "dtype('float64')"
  4750. ]
  4751. },
  4752. "execution_count": 229,
  4753. "metadata": {},
  4754. "output_type": "execute_result"
  4755. }
  4756. ],
  4757. "source": [
  4758. "M2.dtype"
  4759. ]
  4760. },
  4761. {
  4762. "cell_type": "code",
  4763. "execution_count": 230,
  4764. "metadata": {},
  4765. "outputs": [
  4766. {
  4767. "data": {
  4768. "text/plain": [
  4769. "array([[ True, True],\n",
  4770. " [ True, True]])"
  4771. ]
  4772. },
  4773. "execution_count": 230,
  4774. "metadata": {},
  4775. "output_type": "execute_result"
  4776. }
  4777. ],
  4778. "source": [
  4779. "M3 = M.astype(bool)\n",
  4780. "\n",
  4781. "M3"
  4782. ]
  4783. },
  4784. {
  4785. "cell_type": "markdown",
  4786. "metadata": {},
  4787. "source": [
  4788. "## Further reading"
  4789. ]
  4790. },
  4791. {
  4792. "cell_type": "markdown",
  4793. "metadata": {},
  4794. "source": [
  4795. "* http://numpy.scipy.org\n",
  4796. "* http://scipy.org/Tentative_NumPy_Tutorial\n",
  4797. "* http://scipy.org/NumPy_for_Matlab_Users - A Numpy guide for MATLAB users."
  4798. ]
  4799. },
  4800. {
  4801. "cell_type": "markdown",
  4802. "metadata": {},
  4803. "source": [
  4804. "## Versions"
  4805. ]
  4806. },
  4807. {
  4808. "cell_type": "code",
  4809. "execution_count": 178,
  4810. "metadata": {},
  4811. "outputs": [
  4812. {
  4813. "data": {
  4814. "application/json": {
  4815. "Software versions": [
  4816. {
  4817. "module": "Python",
  4818. "version": "2.7.10 64bit [GCC 4.2.1 (Apple Inc. build 5577)]"
  4819. },
  4820. {
  4821. "module": "IPython",
  4822. "version": "3.2.1"
  4823. },
  4824. {
  4825. "module": "OS",
  4826. "version": "Darwin 14.1.0 x86_64 i386 64bit"
  4827. },
  4828. {
  4829. "module": "numpy",
  4830. "version": "1.9.2"
  4831. }
  4832. ]
  4833. },
  4834. "text/html": [
  4835. "<table><tr><th>Software</th><th>Version</th></tr><tr><td>Python</td><td>2.7.10 64bit [GCC 4.2.1 (Apple Inc. build 5577)]</td></tr><tr><td>IPython</td><td>3.2.1</td></tr><tr><td>OS</td><td>Darwin 14.1.0 x86_64 i386 64bit</td></tr><tr><td>numpy</td><td>1.9.2</td></tr><tr><td colspan='2'>Sat Aug 15 11:02:09 2015 JST</td></tr></table>"
  4836. ],
  4837. "text/latex": [
  4838. "\\begin{tabular}{|l|l|}\\hline\n",
  4839. "{\\bf Software} & {\\bf Version} \\\\ \\hline\\hline\n",
  4840. "Python & 2.7.10 64bit [GCC 4.2.1 (Apple Inc. build 5577)] \\\\ \\hline\n",
  4841. "IPython & 3.2.1 \\\\ \\hline\n",
  4842. "OS & Darwin 14.1.0 x86\\_64 i386 64bit \\\\ \\hline\n",
  4843. "numpy & 1.9.2 \\\\ \\hline\n",
  4844. "\\hline \\multicolumn{2}{|l|}{Sat Aug 15 11:02:09 2015 JST} \\\\ \\hline\n",
  4845. "\\end{tabular}\n"
  4846. ],
  4847. "text/plain": [
  4848. "Software versions\n",
  4849. "Python 2.7.10 64bit [GCC 4.2.1 (Apple Inc. build 5577)]\n",
  4850. "IPython 3.2.1\n",
  4851. "OS Darwin 14.1.0 x86_64 i386 64bit\n",
  4852. "numpy 1.9.2\n",
  4853. "Sat Aug 15 11:02:09 2015 JST"
  4854. ]
  4855. },
  4856. "execution_count": 178,
  4857. "metadata": {},
  4858. "output_type": "execute_result"
  4859. }
  4860. ],
  4861. "source": [
  4862. "%reload_ext version_information\n",
  4863. "\n",
  4864. "%version_information numpy"
  4865. ]
  4866. }
  4867. ],
  4868. "metadata": {
  4869. "kernelspec": {
  4870. "display_name": "Python 3",
  4871. "language": "python",
  4872. "name": "python3"
  4873. },
  4874. "language_info": {
  4875. "codemirror_mode": {
  4876. "name": "ipython",
  4877. "version": 3
  4878. },
  4879. "file_extension": ".py",
  4880. "mimetype": "text/x-python",
  4881. "name": "python",
  4882. "nbconvert_exporter": "python",
  4883. "pygments_lexer": "ipython3",
  4884. "version": "3.6.8"
  4885. }
  4886. },
  4887. "nbformat": 4,
  4888. "nbformat_minor": 1
  4889. }

机器学习越来越多应用到飞行器、机器人等领域,其目的是利用计算机实现类似人类的智能,从而实现装备的智能化与无人化。本课程旨在引导学生掌握机器学习的基本知识、典型方法与技术,通过具体的应用案例激发学生对该学科的兴趣,鼓励学生能够从人工智能的角度来分析、解决飞行器、机器人所面临的问题和挑战。本课程主要内容包括Python编程基础,机器学习模型,无监督学习、监督学习、深度学习基础知识与实现,并学习如何利用机器学习解决实际问题,从而全面提升自我的《综合能力》。