Python 基础与科学计算常用方法 本文使用的是Jupyter Notebook
,Python3
。你可以将代码直接复制到 Jupyter Notebook 中运行,以便更好的学习。
导入所需要的头文件 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 import numpy as npimport numpy as npimport matplotlib as mplfrom mpl_toolkits.mplot3d import Axes3Dfrom matplotlib import cmimport timefrom scipy.optimize import leastsqfrom scipy import statsimport scipy.optimize as optimport matplotlib.pyplot as pltfrom scipy.stats import norm, poissonimport mathimport scipyfrom scipy.interpolate import BarycentricInterpolatorfrom scipy.interpolate import CubicSpline
1 2 a = np.arange(0 , 60 , 10 ).reshape((-1 , 1 )) + np.arange(6 ) print (a)
1.使用 array 创建 标准 Python 的列表(list)中,元素本质是对象。 如:L = [1, 2, 3],需要 3 个指针和三个整数对象,对于数值运算比较浪费内存和 CPU。 因此,Numpy 提供了 ndarray(N-dimensional array object)对象:存储单一数据类型的多维数组。
1 2 3 4 5 6 L = [1 , 2 , 3 , 4 , 5 , 6 ] print ("L = " , L)a = np.array(L) print ("a = " , a)print (type(a), type(L))
1 2 3 b = np.array([[1 , 2 , 3 , 4 ], [5 , 6 , 7 , 8 ], [9 , 10 , 11 , 12 ]]) print (b)
1 2 3 print (a.shape)print (b.shape)
1 2 3 4 b.shape = 4 , 3 print (b)
1 2 3 4 5 6 b.shape = 2 , -1 print (b)print (b.shape)b.shape = 3 , 4 print (b)
1 2 3 4 c = b.reshape((4 , -1 )) print ("b = \n" , b)print ('c = \n' , c)
1 2 3 4 b[0 ][1 ] = 20 print ("b = \n" , b)print ("c = \n" , c)
1 2 3 print (a.dtype)print (b.dtype)
1 2 3 4 5 d = np.array([[1 , 2 , 3 , 4 ], [5 , 6 , 7 , 8 ], [9 , 10 , 11 , 12 ]], dtype=np.float) f = np.array([[1 , 2 , 3 , 4 ], [5 , 6 , 7 , 8 ], [9 , 10 , 11 , 12 ]], dtype=np.complex) print (d)print (f)
1 2 3 f = d.astype(np.int) print (f)
1 2 3 d.dtype = np.int print (d)
2.使用函数创建 如果生成一定规则的数据,可以使用 NumPy 提供的专门函数 arange 函数类似于 python 的 range 函数:指定起始值、终止值和步长来创建数组,和 Python 的 range 类似,arange 同样不包括终值;但 arange 可以生成浮点类型,而 range 只能是整数类型
1 2 a = np.arange(1 , 10 , 0.5 ) print (a)
1 2 3 b = np.linspace(1 , 10 , 10 ) print ('b = ' , b)
1 2 3 c = np.linspace(1 , 10 , 10 , endpoint=False ) print ('c = ' , c)
1 2 3 4 d = np.logspace(1 , 2 , 9 , endpoint=True ) print (d)
1 2 3 f = np.logspace(0 , 10 , 10 , endpoint=True , base=2 ) print (f)
1 2 3 4 s = 'abcdz' g = np.fromstring(s, dtype=np.int8) print (g)
3.存取 3.1 常规方法 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 a = np.arange(10 ) print (a)print (a[3 ])print (a[3 :6 ])print (a[:5 ])print (a[3 :])print (a[1 :9 :2 ])print (a[::-1 ])a[1 :4 ] = 10 , 20 , 30 print (a)b = a[2 :5 ] b[0 ] = 200 print (a)
3.2 整数/布尔数组存取 3.2.1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 a = np.logspace(0 , 9 , 10 , base=2 ) print (a)i = np.arange(0 , 10 , 2 ) print (i)b = a[i] print (b)b[2 ] = 1.6 print (b)print (a)
3.2.2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 a = np.random.rand(10 ) print (a)print (a > 0.5 )b = a[a > 0.5 ] print (b)a[a > 0.5 ] = 0.5 print (a)print (b)
3.3 二维数组的切片 1 2 3 4 5 6 7 8 a = np.arange(0 , 60 , 10 ) print ('a = ' , a)b = a.reshape((-1 , 1 )) print (b)c = np.arange(6 ) print (c)f = b + c print (f)
1 2 3 a = np.arange(0 , 60 , 10 ).reshape((-1 , 1 )) + np.arange(6 ) print (a)
1 2 3 4 5 6 7 print (a[[0 , 1 , 2 ], [2 , 3 , 4 ]])print (a[4 , [2 , 3 , 4 ]])print (a[4 :, [2 , 3 , 4 ]])i = np.array([True , False , True , False , False , True ]) print (a[i])print (a[i, 3 ])
4.1 numpy 与 Python 数学库的时间比较 1 2 3 4 5 6 7 8 9 10 11 12 13 for j in np.logspace(0 , 7 , 10 ): j = int(j) x = np.linspace(0 , 10 , j) start = time.clock() y = np.sin(x) t1 = time.clock() - start x = x.tolist() start = time.clock() for i, t in enumerate(x): x[i] = math.sin(t) t2 = time.clock() - start print (j, ": " , t1, t2, t2/t1)
# 4.2 元素去重 4.2.1 直接使用库函数 1 2 3 4 5 6 a = np.array((1 , 2 , 3 , 4 , 5 , 5 , 7 , 3 , 2 , 2 , 8 , 8 )) print ('原始数组:' , a)b = np.unique(a) print ('去重后:' , b)
4.2.2 二维数组的去重,结果会是预期的么? 1 2 3 c = np.array(((1 , 2 ), (3 , 4 ), (5 , 6 ), (1 , 3 ), (3 , 4 ), (7 , 6 ))) print (u'二维数组:\n' , c)print ('去重后:' , np.unique(c))
4.2.3 方案 1:转换为虚数 1 2 3 4 5 6 7 8 x = c[:, 0 ] + c[:, 1 ] * 1j print ('转换成虚数:' , x)print ('虚数去重后:' , np.unique(x))print (np.unique(x, return_index=True )) idx = np.unique(x, return_index=True )[1 ] print ('二维数组去重:\n' , c[idx])
4.2.3 方案 2:利用 set 1 print ('去重方案2:\n' , np.array(list(set([tuple(t) for t in c]))))
4.3 stack and axis 1 2 3 4 5 6 7 8 9 a = np.arange(1 , 10 ).reshape((3 , 3 )) b = np.arange(11 , 20 ).reshape((3 , 3 )) c = np.arange(101 , 110 ).reshape((3 , 3 )) print ('a = \n' , a)print ('b = \n' , b)print ('c = \n' , c)print ('axis = 0 \n' , np.stack((a, b, c), axis=0 ))print ('axis = 1 \n' , np.stack((a, b, c), axis=1 ))print ('axis = 2 \n' , np.stack((a, b, c), axis=2 ))
1 2 3 4 5 6 a = np.arange(1 , 10 ).reshape(3 ,3 ) print (a)b = a + 10 print (b)print (np.dot(a, b)) print (a * b)
1 2 3 4 5 a = np.arange(1 , 10 ) print (a)b = np.arange(20 ,25 ) print (b)print (np.concatenate((a, b)))
5.绘图 5.1 绘制正态分布概率密度函数 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 mpl.rcParams['font.sans-serif' ] = [u'SimHei' ] mpl.rcParams['axes.unicode_minus' ] = False mu = 0 sigma = 1 x = np.linspace(mu - 3 * sigma, mu + 3 * sigma, 51 ) y = np.exp(-(x - mu) ** 2 / (2 * sigma ** 2 )) / (math.sqrt(2 * math.pi) * sigma) print (x.shape)print ('x = \n' , x)print (y.shape)print ('y = \n' , y)plt.figure(facecolor='w' ) plt.plot(x, y, 'r-' , x, y, 'go' , linewidth=2 , markersize=8 ) plt.xlabel('X' , fontsize=15 ) plt.ylabel('Y' , fontsize=15 ) plt.title(u'高斯分布函数' , fontsize=18 ) plt.grid(True ) plt.show()
5.2 损失函数 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 plt.figure(figsize=(10 ,8 ),dpi=100 ) x = np.array(np.linspace(start=-2 , stop=3 , num=1001 , dtype=np.float)) y_logit = np.log(1 + np.exp(-x)) / math.log(2 ) y_boost = np.exp(-x) y_01 = x < 0 y_hinge = 1.0 - x y_hinge[y_hinge < 0 ] = 0 plt.plot(x, y_logit, 'r-' , label='Logistic Loss' , linewidth=2 ) plt.plot(x, y_01, 'g-' , label='0/1 Loss' , linewidth=2 ) plt.plot(x, y_hinge, 'b-' , label='Hinge Loss' , linewidth=2 ) plt.plot(x, y_boost, 'm--' , label='Adaboost Loss' , linewidth=2 ) plt.grid() plt.legend(loc='upper right' ) plt.savefig('1.png' ) plt.show()
5.3 x^x 1 2 3 4 5 6 7 8 9 10 11 12 13 14 def f (x) : y = np.ones_like(x) i = x > 0 y[i] = np.power(x[i], x[i]) i = x < 0 y[i] = np.power(-x[i], -x[i]) return y x = np.linspace(-1.3 , 1.3 , 101 ) y = f(x) plt.plot(x, y, 'g-' , label='x^x' , linewidth=2 ) plt.grid() plt.legend(loc='upper left' ) plt.show()
5.4 胸型线 1 2 3 4 5 6 7 8 x = np.arange(1 , 0 , -0.001 ) y = (-3 * x * np.log(x) + np.exp(-(40 * (x - 1 / np.e)) ** 4 ) / 25 ) / 2 plt.figure(figsize=(5 ,7 ), facecolor='w' ) plt.plot(y, x, 'r-' , linewidth=2 ) plt.grid(True ) plt.title(u'胸型线' , fontsize=20 ) plt.show()
5.5 心形线 1 2 3 4 5 6 t = np.linspace(0 , 2 *np.pi, 100 ) x = 16 * np.sin(t) ** 3 y = 13 * np.cos(t) - 5 * np.cos(2 *t) - 2 * np.cos(3 *t) - np.cos(4 *t) plt.plot(x, y, 'r-' , linewidth=2 ) plt.grid(True ) plt.show()
5.6 渐开线 1 2 3 4 5 6 t = np.linspace(0 , 50 , num=1000 ) x = t*np.sin(t) + np.cos(t) y = np.sin(t) - t*np.cos(t) plt.plot(x, y, 'r-' , linewidth=2 ) plt.grid() plt.show()
5.7Bar 1 2 3 4 5 6 7 8 9 10 x = np.arange(0 , 10 , 0.1 ) y = np.sin(x) plt.bar(x, y, width=0.04 , linewidth=0.2 ) plt.plot(x, y, 'r--' , linewidth=2 ) plt.title(u'Sin曲线' ) plt.xticks(rotation=-60 ) plt.xlabel('X' ) plt.ylabel('Y' ) plt.grid() plt.show()
6. 概率分布 6.1 均匀分布 1 2 3 4 5 6 7 x = np.random.rand(10000 ) t = np.arange(len(x)) plt.plot(t, x, 'g.' , label=u'均匀分布' ) plt.legend(loc='upper left' ) plt.grid() plt.show()
6.2 验证中心极限定理 1 2 3 4 5 6 7 8 9 t = 1000 a = np.zeros(10000 ) for i in range(t): a += np.random.uniform(-5 , 5 , 10000 ) a /= t plt.hist(a, bins=30 , color='g' , alpha=0.5 , normed=True , label=u'均匀分布叠加' ) plt.legend(loc='upper left' ) plt.grid() plt.show()
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 lamda = 10 p = stats.poisson(lamda) y = p.rvs(size=1000 ) mx = 30 r = (0 , mx) bins = r[1 ] - r[0 ] plt.figure(figsize=(10 , 8 ), facecolor='w' ) plt.subplot(121 ) plt.hist(y, bins=bins, range=r, color='g' , alpha=0.8 , normed=True ) t = np.arange(0 , mx+1 ) plt.plot(t, p.pmf(t), 'ro-' , lw=2 ) plt.grid(True ) N = 1000 M = 10000 plt.subplot(122 ) a = np.zeros(M, dtype=np.float) p = stats.poisson(lamda) for i in np.arange(N): y = p.rvs(size=M) a += y a /= N plt.hist(a, bins=20 , color='g' , alpha=0.8 , normed=True ) plt.grid(b=True ) plt.show()
6.3 Poisson 分布 1 2 3 4 5 6 7 8 9 x = np.random.poisson(lam=5 , size=10000 ) print (x)pillar = 15 a = plt.hist(x, bins=pillar, normed=True , range=[0 , pillar], color='g' , alpha=0.5 ) plt.grid() print (a)print (a[0 ].sum())
6.4 直方图的使用 1 2 3 4 5 6 7 8 9 mu = 2 sigma = 3 data = mu + sigma * np.random.randn(1000 ) h = plt.hist(data, 30 , normed=1 , color='#a0a0ff' ) x = h[1 ] y = norm.pdf(x, loc=mu, scale=sigma) plt.plot(x, y, 'r--' , x, y, 'ro' , linewidth=2 , markersize=4 ) plt.grid() plt.show()
6.5 插值 1 2 3 4 5 6 7 8 9 10 11 12 13 rv = poisson(5 ) x1 = a[1 ] y1 = rv.pmf(x1) itp = BarycentricInterpolator(x1, y1) x2 = np.linspace(x.min(), x.max(), 50 ) y2 = itp(x2) cs = scipy.interpolate.CubicSpline(x1, y1) plt.plot(x2, cs(x2), 'm--' , linewidth=5 , label='CubicSpine' ) plt.plot(x2, y2, 'g-' , linewidth=3 , label='BarycentricInterpolator' ) plt.plot(x1, y1, 'r-' , linewidth=1 , label='Actural Value' ) plt.legend(loc='upper right' ) plt.grid() plt.show()
7. 绘制三维图像 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 u = np.linspace(-3 , 3 , 101 ) x, y = np.meshgrid(u, u) z = x*y*np.exp(-(x**2 + y**2 )/2 ) / math.sqrt(2 *math.pi) fig = plt.figure() ax = fig.add_subplot(111 , projection='3d' ) ax.plot_surface(x, y, z, rstride=5 , cstride=5 , cmap=cm.Accent, linewidth=0.5 ) plt.show()
欢迎关注我的公众号,阅读更多新手入门资料。