机器学习-Ridge 岭回归
·
前言:
当alpha 值为0,求解过程和最小二乘法是一样的
优点:
可以防止矩阵不可逆
惩罚某个维度权重系数过高

一 岭回归原理
输入:
m个样本,n个维度
: 标签值
模型
其中w 为权重系数 [n,1]
求解:
w
算法推导
对w 求导,分母形式
二 CODE
# -*- coding: utf-8 -*-
"""
Created on Thu Jul 8 17:05:06 2021
@author: chengxf2
"""
import numpy as np
from sklearn.metrics import mean_squared_error , r2_score
import matplotlib.pyplot as plt
class RidgeRegression():
'''
加载数据集
'''
def LoadData(self):
f = open(self.name)
lines = f.readlines()
self.n = len(lines[0].split())-1
dataMat =[]
labelMat =[]
for line in lines:
items = line.split('\t')
lineData =[]
for i in range(self.n):
val = float(items[i])
lineData.append(val)
dataMat.append(lineData)
y = float(items[-1])
labelMat.append(y)
return np.mat(dataMat),np.mat(labelMat).T
def Train(self, dataMat, labelMat,alpha=0.01):
#print("\n step1 ",dataMat)
xx = np.dot(dataMat.T,dataMat)
#print("\n step2 ")
A = xx+alpha*np.eye(self.n)
if np.linalg.det(A) ==0.0: #矩阵不可逆
#print("\n ===========error=========")
return
B =dataMat.T*labelMat
W = np.linalg.inv(A)*B
return W
'''
预测
'''
def Predict(self, W,dataMat):
y_predict = dataMat*W
return y_predict
def Test(self):
dataMat, labelMat =rd.LoadData()
alpha = np.arange(0.001,1.2,0.001)
y_score =[]
for a in alpha:
w = rd.Train(dataMat, labelMat, a)
y_predict = rd.Predict(w, dataMat)
score = r2_score(labelMat, y_predict)
y_score.append(score)
#print("\n score ",score)
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(alpha,y_score,'go-',linewidth=1,label="alpha-score")
ax.legend()
def __init__(self):
self.m = 0 #样本个数
self.n = 0 #样本维度
self.name ="ex0.txt"
if __name__ =="__main__":
rd = RidgeRegression()
rd.Test()
DAMO开发者矩阵,由阿里巴巴达摩院和中国互联网协会联合发起,致力于探讨最前沿的技术趋势与应用成果,搭建高质量的交流与分享平台,推动技术创新与产业应用链接,围绕“人工智能与新型计算”构建开放共享的开发者生态。
更多推荐

所有评论(0)