data = open('dataset/simulation_data_1d.txt', 'r').read() # print(data.split('\n'))
# 数据读取 x = [] y = [] for item in data.split('\n'): if item != '': item = item.split(',') x.append(float(item[0])) y.append(float(item[1]))
# 数据集构建 defseperateData(x, y, test_scale): # 将x,y进行矩阵组合 data = [] for _x in x: i = x.index(_x) data.append([]) iftype(_x) == 'list': for feature inrange(len(_x)): data[i].append(feature) data[i].append(y[i]) else: data[i].append(_x) data[i].append(y[i])
# 切割数据集 train = [] test = [] random.seed(3) for i inrange(len(data)): count = random.random() if count > test_scale: train.append(data[i]) elif count < test_scale: test.append(data[i]) return train, test
train, test = seperateData(x, y, 0.2)
# 处理成能够用于模型训练的数据结构 X_train = [] Y_train = [] X_test = [] Y_test = [] for item in train: X_train.append(item[:-1]) Y_train.append(item[-1]) for item in test: X_test.append(item[:-1]) Y_test.append(item[-1])
Xsum = 0.0 Ysum = 0.0 XY = 0.0 X2sum = 0.0 n = len(Y_train) # 推导的算法结果部分的实现 for i inrange(n): Xsum += X_train[i][0] Ysum += Y_train[i] XY += X_train[i][0] * Y_train[i] X2sum += X_train[i][0] ** 2 k = (n * XY - Xsum * Ysum) / (n * X2sum - Xsum ** 2) b = (Ysum - k * Xsum) / n print("拟合的曲线:y =",k,"* x +",b)