■8.1 統計的特徴量 表8-1 平均値の計算 # 0 1 2 3 4 5 6 7 8 9 ADT=[4.8, 3.7, 4.6, 5.2, 6.4, 4.1, 6.4, 5.0, 5.8, 5.0, #00 6.0, 4.7, 5.7, 6.4, 3.6, 3.9, 6.0, 3.7, 5.7, 4.3, #10 3.9, 4.0, 5.0, 4.9, 6.4, 5.1, 5.0, 4.3, 3.9, 5.5, #20 5.0, 4.6, 5.7, 4.7, 5.6, 5.1, 6.2, 4.2, 6.4, 6.2, #30 5.0, 6.3, 5.0, 4.5, 5.0, 4.3, 5.0, 4.7, 4.7] #40 # 0 1 2 3 4 5 6 7 8 9 BDT=[4.2, 2.0, 7.7, 7.8, 2.2, 4.6, 6.2, 7.4, 7.5, 7.4, #00 2.2, 7.3, 4.0, 7.3, 5.8, 5.9, 5.4, 6.9, 3.5, 6.5, #10 5.6, 2.0, 2.5, 4.7, 2.8, 6.3, 3.7, 2.7, 6.1, 6.0, #20 6.8, 5.8, 2.8, 5.6, 5.1, 3.4, 4.0, 3.0, 5.0, 4.9, #30 3.1, 2.5, 5.8, 5.4, 6.1, 5.3, 4.7, 6.6, 5.3] #40 def average(A): N=len(A); S=0 for i in range(N):S+=A[i] return S/N (上記定義に加え,以下の実行を行うと平均を求めることができます) print("A店の平均", average(ADT)) print("B店の平均", average(BDT)) 表8-2 度数分布表と度数分布図 ---(前略:表8-1を参照)--- def FreqDist(A):#度数分布 Freq=[0 for i in range(11)] for i in range(len(A)): Freq[int(A[i])]+=1 return Freq def dspTable(Title,Freq): print(Title);print(" 階級 頻度"); print("-"*20) for i in range(1,len(Freq)): print("%3d以上%3d未満:%4d" %(i-1,i, Freq[i])) from tkinter import * def drawText(canvas,X,Y,Text, size=8, color="black"): canvas.create_text(X,Y,text=Text, font=('Times',size),fill=color) def histogram(canvas, Title,Freq,XST,YST,SX=20,SY=10,color="red"): XED=XST+SX*10; YED=YST-20*SY drawText(canvas,(XST+XED)/2,YED-15,Title,size=10) for i in range(0,25,5):# Y目盛り描画 Y=YST-i*SY canvas.create_line(XST,Y,XED,Y,fill="gray") drawText(canvas,XST-15,Y,("%3d" % i)) for i in range(11):# X目盛り描画 X=XST+i*SX canvas.create_line(X,YST,X,YED,fill="gray") drawText(canvas,X,YST+15,("%3d" % i)) for i in range(len(Freq)):# グラフ描画 if Freq[i]>0: X1=XST+i*SX+3;X2=X1+15;Y=YST-Freq[i]*SY canvas.create_rectangle(X1,YST,X2,Y,fill=color) (上記に加え,以下を実行すると度数分布表,度数分布図が得られる) root=Tk() canvas=Canvas(root,width=500,height=400,bg="white") canvas.pack() AFreq=FreqDist(ADT); BFreq=FreqDist(BDT) histogram(canvas, "A店の度数分布",AFreq, 40,250,color="red") histogram(canvas, "B店の度数分布",BFreq,280,250,color="blue") dspTable("\n A店の度数分布",AFreq) dspTable("\n B店の度数分布",BFreq) 表8-3 ばらつきの程度を観察するための関数 ---(前略:表8-1,表8-2を参照)--- def sort(DT):#昇順にソート N=len(DT) for i in range(N-1): for j in range(N-1,i,-1): if DT[j-1]>DT[j]: T=DT[j-1];DT[j-1]=DT[j];DT[j]=T def drawUpperA(canvas,X,Y,width=1, color="blue"): canvas.create_line(X,Y,X-2,Y+4,width=width,fill=color) canvas.create_line(X,Y,X+2,Y+4,width=width,fill=color) def drawLowerA(canvas,X,Y,width=1,color="blue"): canvas.create_line(X,Y,X-2,Y-4,width=width,fill=color) canvas.create_line(X,Y,X+2,Y-4,width=width,fill=color) def drawHolizontal(canvas, X1,Y1,X2,width=1, color="blue"): canvas.create_line(X1,Y1,X2,Y1,width=width,fill=color) def drawVertical(canvas, X1,Y1,Y2,width=1, color="blue"): canvas.create_line(X1,Y1,X1,Y2,width=width,fill=color) #■平均,最大,最小 の範囲を描く def differAverage(canvas, Title,DT,XST,YST,SX=4,SY=10,color="red"): N=len(DT); sDT=[] for i in range(N): sDT.append(DT[i])#安全のため複写 sort(sDT)#データソート #タイトル表示 XED=XST+SX*48; YED=YST-10*SY drawText(canvas,(XST+XED)/2,YED-15,Title,size=10) for i in range(0,12,2):# Y目盛り描画 Y=YST-i*SY drawHolizontal(canvas, XST,Y,XED,color="gray") drawText(canvas,XST-15,Y,("%3d" % i)) #左右の縦線 drawVertical(canvas, XST,YST,YED,color="gray") drawVertical(canvas, XED,YST,YED,color="gray") # グラフ描画 for i in range(N):# グラフ描画 X1=XST+i*SX;X2=X1+15; Y=YST-sDT[i]*SY drawVertical(canvas, X1,YST,Y,width=2,color=color) #平均値,最小値,最大値のラインを描く V=average(sDT); Y0=YST-V*SY #平均値のライン(緑色) drawHolizontal(canvas, XST,Y0,XED+10,width=2,color="green") minV=sDT[0]; Y1=YST-minV*SY #最小値のライン(青色) drawHolizontal(canvas, XST,Y1,XED+10) maxV=sDT[N-1]; Y2=YST-maxV*SY #最大値のライン(青色) drawHolizontal(canvas, XST,Y2,XED+10) drawVertical(canvas, X1,YST,Y) #最大最小を結ぶライン(青色) canvas.create_line(XED+5,Y1,XED+5,Y2,width=1,fill="blue") #矢印等 (最大値付近)    (最小値付近) drawUpperA(canvas,XED+5,Y2); drawLowerA(canvas,XED+5,Y1) #   (平均値付近) drawUpperA(canvas,XED+5,Y0); drawLowerA(canvas,XED+5,Y0) (上記に加え,以下を実行すると平均値からのばらつきのグラフが得られる) root=Tk() canvas=Canvas(root,width=500,height=400,bg="white") canvas.pack() differAverage(canvas, "A店のばらつき",ADT, 40,150,color="red") differAverage(canvas, "B店のばらつき",BDT,280,150,color="red") 表8-4 分散と標準偏差 ---(前略:averageについては表8-1参照)--- import math def deviation(DT): N=len(DT); A=average(DT); S=0 for i in range(N): D=DT[i]-A; S+=D*D S/=N return [A, S,math.sqrt(S)] (上記に加え,以下を実行するとそれぞれの分散と標準偏差が得られる) A=deviation(ADT); B=deviation(BDT) print("A店の平均 %8.5f 分散 %8.5f 標準偏差 %8.5f" %(A[0],A[1],A[2])) print("B店の平均 %8.5f 分散 %8.5f 標準偏差 %8.5f" %(B[0],B[1],B[2])) 表8-5 度数分布のスムージング ---(前略:表8-1を参照)--- def FreqDist2(A):#度数分布 F1=[0 for i in range(101)];F2=[0 for i in range(101)] for i in range(len(A)): F1[int(A[i]*10)]+=1 for i in range(len(F1)): for j in range(i-5,i+5): if j>=0 and j<100:F2[i]+=F1[j] return F2 def histogram2(canvas, Title,Freq,XST,YST,SX=20,SY=10,color="red"): XED=XST+SX*10; YED=YST-25*SY drawText(canvas,(XST+XED)/2,YED-15,Title,size=10) for i in range(0,30,5):# Y目盛り描画 Y=YST-i*SY canvas.create_line(XST,Y,XED,Y,fill="gray") drawText(canvas,XST-15,Y,("%3d" % i)) for i in range(11):# X目盛り描画 X=XST+i*SX canvas.create_line(X,YST,X,YED,fill="gray") drawText(canvas,X,YST+15,("%3d" % i)) for i in range(len(Freq)):# グラフ描画 X=XST+i*SX/10;Y=YST-Freq[i]*SY canvas.create_line(X,YST,X,Y,fill=color) (上記に加え,以下を実行します) root=Tk() canvas=Canvas(root,width=500,height=400,bg="white") canvas.pack() AFreq=FreqDist2(ADT); BFreq=FreqDist2(BDT) histogram2(canvas, "A店の度数分布",AFreq, 40,300,color="red") histogram2(canvas, "B店の度数分布",BFreq,280,300,color="blue") ■8.2 回帰曲線 表8-6 例題データと散布図を描く関数 # 身長,ウェスト,体重 PHY=[[161,79,53],[169,76,63],[165,83,65],[176,76,67],[161,78,44],#00 [181,80,71],[176,78,66],[156,73,50],[171,82,61],[161,76,55], [179,84,70],[168,77,65],[162,83,58],[170,80,66],[183,87,74],#10 [169,76,53],[167,76,57],[174,79,66],[178,78,69],[163,75,58], [183,79,67],[182,87,74],[171,85,65],[170,88,71],[169,87,71],#20 [166,78,60],[170,83,63],[163,73,46],[156,76,50],[164,74,58], [167,78,51],[169,79,53],[169,74,56],[169,81,62],[169,74,58],#30 [166,84,60],[171,81,68],[162,83,56],[166,77,63],[174,88,72], [176,84,69],[168,77,63],[178,84,71],[168,72,55],[168,85,62],#40 [171,84,67],[182,84,72],[170,82,63],[179,79,68],[175,78,67], [166,79,60],[168,82,61],[169,83,58],[173,85,73],[175,81,73],#50 [173,77,64],[160,81,59],[172,83,60],[182,85,72],[161,77,56], [168,85,63],[164,76,46],[165,80,67],[162,81,58],[177,81,75],#60 [182,78,63],[169,81,62],[171,84,64],[170,82,64],[183,79,73], [170,79,61],[173,77,67],[170,83,63],[169,84,61],[159,76,52],#70 [175,77,55],[168,84,71],[183,79,73],[168,82,67],[169,84,65], [170,83,57],[164,79,57],[167,83,61],[169,78,57],[164,78,57],#80 [161,82,53],[179,86,67],[159,77,45],[168,83,69],[184,81,72], [169,78,54],[170,83,60],[170,85,64],[167,78,56],[170,82,63],#90 [171,83,75],[168,84,61],[174,84,66],[159,79,59],[158,82,55]] strPHY=["身長","ウェスト","体重"] from tkinter import * root=Tk() canvas=Canvas(root,width=500,height=400,bg="white") canvas.pack() def drawText(canvas,X,Y,Text, size=8, color="black"): canvas.create_text(X,Y,text=Text, font=('Times',size),fill=color) def scaleScatter(canvas, Title,GDT,XST,YST,ScXs, ScXe,ScYs,ScYe, SX=5,SY=4,stepX=10, stepY=10,color="red", xfm="%3d",yfm="%3d"): NY=ScYe-ScYs;NX=ScXe-ScXs;XED=XST+SX*NX; YED=YST-SY*NY; drawText(canvas,(XST+XED)/2,YED-15,Title,size=10) for i in range(0,NY+5,stepY):# Y目盛り描画 Y=YST-i*SY canvas.create_line(XST,Y,XED,Y,fill="gray") drawText(canvas,XST-15,Y,("%3d" % (ScYs+i))) for i in range(0,NX+5,stepX):# X目盛り描画 X=XST+i*SX canvas.create_line(X,YST,X,YED,fill="gray") drawText(canvas,X,YST+15,("%3d" %(ScXs+ i))) N=len(GDT) for i in range(N): X=GDT[i][0]-ScXs; Y=GDT[i][1]-ScYs XX=XST+X*SX; YY=YST-Y*SY canvas.create_rectangle(XX-1,YY-1,XX+1,YY+1, fill=color) def getScatterDT(X,Y): N=len(PHY); R=[] for i in range(N):R.append([PHY[i][X],PHY[i][Y]]) return R def minSC(L,ID): N=len(L); M=L[0][ID] for i in range(1,N): if L[i][ID]M: M=L[i][ID] return int((M+10)/10)*10 def setID(Str): N=len(strPHY) for i in range(N): if Str==strPHY[i]: return i return 0 表8-6の実行 D1="身長"; D2="体重"; X=setID(D1);Y=setID(D2) S1=getScatterDT(X,Y)#体格データから身長と体重を取り出す scaleScatter(canvas, D1+"-"+D2,S1,40,250, minSC(S1,0),maxSC(S1,0),minSC(S1,1),maxSC(S1,1)) 表8-7 最小二乗法による1次式の近似 def leastSQR(S): N=len(S);X=0;Y=0;X2=0;XY=0 for i in range(N): X+=S[i][0]; X2+=S[i][0]*S[i][0] Y+=S[i][1]; XY+=S[i][0]*S[i][1] B=N*X2-X*X A0=(Y*X2-X*XY)/B; A1=(N*XY-X*Y)/B return [A0,A1] def graphLine(canvas,A0,A1,XST,YST,ScXs, ScXe,ScYs,ScYe, SX=5,SY=4, stepY=10,color="red"): X1=ScXs;X2=ScXe Y1=A0+A1*X1; Y2=A0+A1*X2 NY=ScYe-ScYs;NX=ScXe-ScXs;XED=XST+SX*NX; YED=YST-SY*NY YY1=YST-(Y1-ScYs)*SY;YY2=YST-(Y2-ScYs)*SY; canvas.create_line(XST,YY1,XED,YY2, width=2,fill=color) (上記に加えて以下のプログラムを実行すると,散布図に近似直線を追加したグラフが得られる) D1="身長"; D2="体重"; X=setID(D1);Y=setID(D2) S1=getScatterDT(X,Y)#体格データから身長と体重を取り出す Xmin=minSC(S1,0);Xmax=maxSC(S1,0) Ymin=minSC(S1,1);Ymax=maxSC(S1,1) scaleScatter(canvas,D1+"-"+D2,S1,40,250,Xmin,Xmax,Ymin,Ymax) R=leastSQR(S1) graphLine(canvas, R[0],R[1],40,250,Xmin,Xmax,Ymin,Ymax) 表8-8 相関係数等の計算 import math def listAverage(L,k):#■k列目の平均値 N=len(L); A=0 for i in range(N):A+=L[i][k] return A/N def correlationCoefficient(DT):#■相関係数 N=len(DT); XA=listAverage(DT,0);YA=listAverage(DT,1) Sxy=0;Sx=0;Sy=0 for i in range(N): DX=DT[i][0]-XA;DY=DT[i][1]-YA Sxy+=DX*DY; Sx+=DX*DX; Sy+=DY*DY Sxy/=N # xとyの共分散 Sx=math.sqrt(Sx/N) #xの標準偏差 Sy=math.sqrt(Sy/N) #yの標準偏差 return Sxy/(Sx*Sy) def deviationLSQR(DT,A0,A1):#■近似式による標準偏差 N=len(DT); T=0 for i in range(N): Y=A0+A1*DT[i][0];D=Y-DT[i][1];T+=D*D T/=N return [T, math.sqrt(T)] def deviationAverage(DT):#■平均値による標準偏差 N=len(DT); A=listAverage(DT,1);T=0 for i in range(N): D=A-DT[i][1];T+=D*D T/=N return [T, math.sqrt(T)] def evalApp(D,A):return (1-D/A)*100 #■近似性 (上記に加えて以下のプログラムを実行すると,相関係数等がシェル画面に表示される) D1="身長"; D2="体重"; X=setID(D1);Y=setID(D2) S1=getScatterDT(X,Y)#体格データから身長と体重を取り出す R=leastSQR(S1)#最小二乗法 SR=correlationCoefficient(S1);print("相関係数",SR) DS=deviationLSQR(S1,R[0],R[1]) print("近似式による分散 %13.5e, 標準偏差 %13.5e" % (DS[0],DS[1])) DA=deviationAverage(S1); R2 = 1-DS[0]/DA[0];RR=math.sqrt(R2) print("平均値による分散 %13.5e, 標準偏差 %13.5e" % (DA[0],DA[1])) print("近似性 %4.1f% 決定係数 R2=%8.5f R値=%8.5f " % (evalApp(DS[1],DA[1]), R2,RR)) 表8-9 N次の多項式による近似 def array(N1, N2=0):#■配列宣言(表2-1 再掲) if N2!=0: return [[0 for i in range(N2)] for i in range(N1)] return [0 for i in range(N1)] def copyArray(A):#■配列複写(表2-8 再掲) B=[] for i in range(len(A)):B.append(A[i]) return B def GaussianELMPivot(MAT):#■ピボット選択連立方程式(表2-10 再掲) A=copyArray(MAT);N=len(A);N1=N+1;ESP=0.0000001 for k in range(N):# 前進消去 k1 = k + 1;Max = abs(A[k][k]);IR = k #----ピボット入れ替え if k != N : for i in range(k1,N): ab=abs(A[i][k]) if ab>= Max : Max = ab; IR = i if Max < ESP : print("解を求めることができません") return [] if IR != k : D=A[k]; A[k]=A[IR]; A[IR]=D #---ピボット入れ替えここまで for i in range(k + 1,N): ALFA = A[i][k] / A[k][k] for j in range(k + 1,N1):A[i][j]-=ALFA*A[k][j] B=array(N,1) for i in range(N-1, -1, -1):# 後退代入 T = A[i][N] for j in range(i + 1,N):T -=A[i][j] * A[j][N] A[i][N] = T / A[i][i]; B[i][0]=A[i][N] return B def setCoff(DT,n):#■係数設定 num=len(DT); m = n + 1 A=array(n+1,m+1) for i in range(m): A[i][m] = 0; for k in range(num):#右辺項の設定 A[i][m] += math.pow(DT[k][0],i)*DT[k][1] for j in range(m): #左辺行列の設定 A[i][j] = 0 for k in range(num): A[i][j] += math.pow(DT[k][0],(i + j)) return A def leastSQRN(S,n):#■N次式による近似 A=setCoff(S,n) #係数行列の設定 return GaussianELMPivot(A)#連立方程式の解法 def setY(R,X):#■N次式の値(Rは列ベクトルであることに注意) XX=1;Y=0 for i in range(len(R)):Y+=R[i][0]*XX;XX*=X return Y #■N次式のグラフ def graphND(canvas,R,XST,YST,ScXs, ScXe,ScYs,ScYe, SX=5,SY=4, stepY=10,color="red"): X2=ScXs;Xend=ScXe; DX=1; Y2=setY(R,X2) while X2<=Xend: X1=X2; Y1=Y2; X2+=DX; Y2=setY(R,X2) if Y2<=ScYe and Y2>=ScYs: XX1=XST+(X1-ScXs)*SX;XX2=XST+(X2-ScXs)*SX YY1=YST-(Y1-ScYs)*SY;YY2=YST-(Y2-ScYs)*SY (上記に加えて以下のプログラムを実行すると,散布図に近似曲線を追加されたグラフが得られる) S1=[[0,10],[10,30],[20,30],[30,10],[40,10],[50,30]] Xmin=minSC(S1,0);Xmax=maxSC(S1,0) Ymin=minSC(S1,1);Ymax=maxSC(S1,1) scaleScatter(canvas,"N次式用",S1,40,250,Xmin,Xmax,Ymin-10,Ymax) R=leastSQRN(S1,3) graphND(canvas, R,40,250,Xmin, Xmax,Ymin-10,Ymax) 表8-10 例題データと表示用関数 S1=[[2.48,39.11],[1.87,47.88],[2.77,39.97],[0.90,64.09],[1.91,47.68], [3.90,30.58],[0.10,70.24],[3.18,33.80],[3.74,33.13],[1.99,39.09], [2.71,34.40],[4.91,31.37],[3.89,38.17],[4.50,25.69],[0.92,63.16], [4.19,29.38],[0.24,74.06],[2.39,42.71],[3.30,32.42],[2.94,35.04], [0.14,76.50],[1.69,45.56],[4.09,31.52],[4.42,25.86],[2.86,38.03], [0.99,54.73],[0.74,61.27],[0.71,58.89],[2.16,41.06],[3.98,31.41], [1.42,49.52],[0.22,70.70],[3.73,29.07],[1.93,44.01],[2.47,40.74], [0.19,73.67],[2.72,38.03],[1.18,58.40],[0.97,57.05],[4.18,35.62], [1.22,53.31],[0.64,59.93],[2.18,40.38],[4.60,23.77],[0.81,60.38], [4.31,31.16],[2.07,40.54],[0.16,82.13],[4.97,30.40],[3.77,32.04], [4.81,31.68],[3.42,34.00],[1.69,45.32],[0.67,64.04],[0.28,68.71], [3.25,32.84],[0.86,65.94],[4.23,35.90],[1.55,46.84],[2.37,41.42], [1.86,48.77],[3.78,30.27],[2.41,38.51],[4.80,27.95],[1.00,49.77], [1.19,46.52],[4.84,27.34],[3.42,31.34],[4.07,30.05],[3.43,33.19], [1.54,53.38],[2.41,45.21],[3.79,32.27],[0.04,79.20],[3.38,37.79], [4.80,25.13],[2.34,32.60],[0.65,68.24],[1.97,42.33],[4.93,30.37], [3.85,34.48],[2.46,38.00],[2.10,41.03],[0.97,57.59],[1.91,38.22], [2.74,39.26],[4.28,26.86],[2.44,37.46],[2.06,38.42],[1.35,48.38], [2.20,39.96],[2.67,35.33],[4.29,31.40],[3.22,32.81],[0.28,73.05], [1.31,46.14],[0.55,66.58],[0.87,54.10],[1.12,51.18],[1.86,39.07]] from tkinter import * root=Tk() canvas=Canvas(root,width=500,height=400,bg="white") canvas.pack() def drawText(canvas,X,Y,Text, size=8, color="black"): canvas.create_text(X,Y,text=Text, font=('Times',size),fill=color) def scaleSpecFun(canvas, Title,GDT,XST,YST,ScXs, ScXe,ScYs,ScYe, SX=50,SY=2,stepX=1, stepY=1,color="red",GX=50,GY=2, xfm="%3d",yfm="%3d",NstepX=1,NstepY=10,NX=5,NY=90): XED=XST+GX*NX; YED=YST-GY*NY; drawText(canvas,(XST+XED)/2,YED-15,Title,size=10) for i in range(0,NY+1,NstepY):# Y目盛り描画 Y=YST-i*GY canvas.create_line(XST,Y,XED,Y,fill="gray") drawText(canvas,XST-15,Y,(yfm % (ScYs+i*stepY))) for i in range(0,NX+1,NstepX):# X目盛り描画 X=XST+i*GX canvas.create_line(X,YST,X,YED,fill="gray") drawText(canvas,X,YST+15,(xfm %(ScXs+ i*stepX))) N=len(GDT) for i in range(N): X=GDT[i][0]-ScXs; Y=GDT[i][1]-ScYs XX=XST+X*SX; YY=YST-Y*SY canvas.create_rectangle(XX-1,YY-1,XX+1,YY+1, fill=color) ---(後略:本表で未定義の関数については表8-6〜9を参照)---- 表8-11 単純な近似 Xmin=0;Xmax=5;Ymin=10;Ymax=90 scaleSpecFun(canvas,"特殊な式",S1,40,350,Xmin,Xmax,Ymin,Ymax, stepX=1,xfm="%.1f",SX=50, SY=2) R=leastSQR(S1)#最小二乗法 graphLine(canvas, R[0],R[1],40,350,Xmin,Xmax,Ymin,Ymax,SX=50,SY=2) print(R) 表8-12 データの性質を考慮した近似 def specFun(S1):# g(x)=1/(1+exp(x))の計算 L=[] for i in range(len(S1)): L.append([1/(1+math.exp(S1[i][0])),S1[i][1]]) return L SZ=specFun(S1) Xmin=0;Xmax=0.5;Ymin=10;Ymax=90 scaleSpecFun(canvas,"特殊な式",SZ,40,350,Xmin,Xmax,Ymin,Ymax, stepX=0.1,xfm="%.1f",SX=500, SY=2) R=leastSQR(SZ)#最小二乗法 graphLine(canvas, R[0],R[1],40,350,Xmin,Xmax,Ymin,Ymax,SX=500,SY=2) print(R) 表8-13 元データに近似式グラフを重ねる def setExp(R,X): return R[0]+R[1]/(1+math.exp(X)) def graphSpec(canvas,R,XST,YST,ScXs, ScXe,ScYs,ScYe, SX=5,SY=4, stepY=10,color="red"): X2=ScXs;Xend=ScXe; DX=1; Y2=setExp(R,X2) while X2=ScYs: XX1=XST+(X1-ScXs)*SX;XX2=XST+(X2-ScXs)*SX YY1=YST-(Y1-ScYs)*SY;YY2=YST-(Y2-ScYs)*SY canvas.create_line(XX1,YY1,XX2,YY2,width=2,fill=color) def specFun(S1): # g(x)=1/(1+exp(x))の計算 L=[] for i in range(len(S1)): L.append([1/(1+math.exp(S1[i][0])),S1[i][1]]) return L SZ=specFun(S1)# 説明変量をg(x)=1/(1+exp(x))とする Xmin=0;Xmax=5;Ymin=10;Ymax=90 scaleSpecFun(canvas,"特殊な式",S1,40,350,Xmin,Xmax,Ymin,Ymax, stepX=1,xfm="%.1f",SX=50, SY=2) R=leastSQR(SZ)#最小二乗法 graphSpec(canvas, R,40,350,Xmin,Xmax,Ymin,Ymax,SX=50,SY=2) ■8.3 重回帰分析 表8-14 重回帰分析プログラム本体 ---(前略:array, GaussianELMPivotについては表8-9参照) DT=[[5,5,2,55],[ 6,4,3,40],[4,3,4,30],[2,2, 5,25],[5,4, 6,45], [8,5,7,70],[10,6,8,90],[4,8,9,25],[7,2,10,45],[9,3,11,75]] def setMultC(DT):#■係数設定 num=len(DT); m=len(DT[0]);M=m-1 A=array(M,m) for k in range(M): for j in range(M): T = 0 for i in range(num): T+=DT[i][k]*DT[i][j] A[k][j] = T T = 0 for i in range(num): T += DT[i][k]*DT[i][M] A[k][M]= T return A def multipleRegression(DT): return GaussianELMPivot(setMultC(DT)) 以上に加えて,以下のプログラムを実行すると係数行列が得られる。 B=multipleRegression(DT) print(B) 表8-15 標準偏差等の計算 def compApp(B,DT):#近似値の計算(Xの代表値を先頭とする) N=len(DT);M=len(DT[0])-1; R=[] for i in range(N): Y=0 for j in range(M):Y+=B[j][0]*DT[i][j] R.append([DT[i][0],Y]) return R import math def deviateMulti(DT,AP):#近似式の標準偏差 N=len(DT);M=len(DT[0])-1; T=0 for i in range(N):DY=DT[i][M]-AP[i][1]; T+=DY*DY T/=N return [T, math.sqrt(T)] def listAverage(L,k):#■k列目の平均値 N=len(L); A=0 for i in range(N):A+=L[i][k] return A/N def deviateMultiAV(DT):#■平均値による標準偏差 N=len(DT);M=len(DT[0])-1; T=0;AV=listAverage(DT,M) for i in range(N):DY=DT[i][M]-AV; T+=DY*DY T/=N return [T, math.sqrt(T)] 以上に加えて,以下のプログラムを実行すると標準偏差等が得られる。 B=multipleRegression(DT);App=compApp(B,DT) DV=deviateMulti(DT,App) print("近似式による分散", DV[0], "標準偏差",DV[1]) ADV=deviateMultiAV(DT) print("平均値による分散", ADV[0], "標準偏差",ADV[1]) print("近似性",(1-DV[1]/ADV[1])*100,"%") 表8-16 定数項の付加 def insert1(DT):#■定数項の付加 N=len(DT); M=len(DT[0]);R=[] for i in range(N): E=[1] for j in range(M): E.append(DT[i][j]) R.append(E) return R 以上に加えて,以下のプログラムを実行します。 DT= insert1(DT) B=multipleRegression(DT);print(B) App=compApp(B,DT); DV=deviateMulti(DT,App) print("近似式による分散", DV[0], "標準偏差",DV[1]) ADV=deviateMultiAV(DT) print("平均値による分散", ADV[0], "標準偏差",ADV[1]) print("近似性",(1-DV[1]/ADV[1])*100,"%") 表8-17 グラフ用関数 from tkinter import * root=Tk() canvas=Canvas(root,width=800,height=400,bg="white") canvas.pack() def drawText(canvas,X,Y,Text, size=8, color="black"): canvas.create_text(X,Y,text=Text, font=('Times',size),fill=color) def scaleOnly(canvas, Title,XST,YST, ScXs,ScYs, stepX=1, stepY=1,SX=50,SY=2, xfm="%3d",yfm="%3d",NstepX=1,NstepY=10,NX=5,NY=90): XED=XST+SX*NX; YED=YST-SY*NY; drawText(canvas,(XST+XED)/2,YED-15,Title,size=10) for i in range(0,NY+1,NstepY):# Y目盛り描画 Y=YST-i*SY canvas.create_line(XST,Y,XED,Y,fill="gray") drawText(canvas,XST-15,Y,(yfm % (ScYs+i*stepY))) for i in range(0,NX+1,NstepX):# X目盛り描画 X=XST+i*SX canvas.create_line(X,YST,X,YED,fill="gray") drawText(canvas,X,YST+15,(xfm %(ScXs+ i*stepX))) def plotDT(canvas,GDT,XST,YST,ScXs,ScYs, SX=50,SY=2,color="red",size=1): N=len(GDT) for i in range(N): X=GDT[i][0]-ScXs; Y=GDT[i][1]-ScYs XX=XST+X*SX; YY=YST-Y*SY canvas.create_rectangle(XX-size,YY-size, XX+size,YY+size, fill=color) def select2DT(DT,k1,k2): L=[] for i in range(len(DT)): L.append([DT[i][k1],DT[i][k2]]) return L 以上に加えて,以下のプログラムを実行すると散布図が得られます。 B=multipleRegression(DT); A2=compApp(B,DT) scaleOnly(canvas, "X1 - Y",40,250,0, 0, SX=20,SY=2, stepX=1,NX=11,NY=100) M=len(DT[0])-1;A1=select2DT(DT,0,M) plotDT(canvas,A1,40,250,0,0,SX=20,SY=2,color="red" ,size=2) plotDT(canvas,A2,40,250,0,0,SX=20,SY=2,color="blue",size=2) 表8-18 多変数を固定して表示するための関数 def compYM(X,k, B,AVL): N=len(B);T=B[k][0]*X for i in range(N): if i!= k : T+=B[i][0]*AVL[i] return T def drawLine(canvas,XST,YST,B,AVL,k,ScXs,ScXe,ScYs,ScYe, SX=50,SY=2,width=2,color="red"): N=len(DT) X1=ScXs; Y1=compYM(X1,k,B,AVL) X2=ScXe; Y2=compYM(X2,k,B,AVL) XX1=XST+(X1-ScXs)*SX; YY1=YST-(Y1-ScYs)*SY XX2=XST+(X2-ScXs)*SX; YY2=YST-(Y2-ScYs)*SY canvas.create_line(XX1,YY1,XX2,YY2, width=width,fill=color) def listMax(L,k): N=len(L);M=L[0][k] for i in range(1,N): if L[i][k]>M: M=L[i][k] return M def listMin(L,k): N=len(L);M=L[0][k] for i in range(1,N): if L[i][k]