一些重要的数据统计函数

tech2026-02-06  1

import numpy as np from scipy.stats import pearsonr import matplotlib.pyplot as plt from scipy.stats import norm def data_analysis(data,type): ''' :param data: 输入数据 :param type: 算法名称 correlation p_value baseInfo std quartile percentile consistency(一致性) distribution(数据分布) :return: 字典形式结果 ''' # 相关性和p_value if type=="correlation" or type == "p_value" : correlation, p_value = pearsonr(data[0], data[1]) returt = {type: eval(type)} # 基础统计包括大小均值,标准差 if type == "baseInfo": max = np.nanmax(data) min = np.nanmin(data) mean = np.nanmean(data) sum = np.nansum(data) returt = {'max': max, 'min': min, 'mean':mean, 'sum': sum} # 标准差 if type == "std": std = np.nanstd(data) returt = {'std':std} # 四分位 if type == 'quartile': quartile = np.nanpercentile(data,[25,50,75]) deviation = quartile[2] - quartile[0] returt = {'quartile': quartile,'deviation':deviation} # 百分位 if type == 'percentile': percentile = np.nanpercentile(data, [100]) returt = {'percentile': percentile} # 数据分布(直方图) if type == 'distribution': mu = np.nanmean(data) sigma = np.nanstd(data) num_bins = data.shape[0] n, bins, patches = plt.hist(data, bins=num_bins, density=True) y = norm.pdf(bins, mu, sigma) plt.plot(bins, y, "r--") plt.xlabel("data") plt.ylabel("value") plt.title("Distribution") plt.savefig('tst.png') plt.show() returt = {'distribution': './test.jpg'} # ICC一致性检测 if type == 'consistency': returt = np.dot((data[:,0] - np.nanmean(data)), (data[:,1] - np.nanmean(data)))/(np.var(data) * data.shape[0]) return returt # 测试数据 data1 = np.random.randint(0,100,(50)) # 一维数据 data2 = np.random.randint(0,100,(50,50)) # 二维 ''' 算法名称: correlation 皮尔斯相关性 p_value baseInfo 基本统计(最大最小平均总和) std 标准差 quartile percentile(百分位) consistency(一致性) distribution(数据分布) ''' # import pandas as pd # # data1 = pd.DataFrame() # data1['d'] = [i for i in range(20)] # print(data1) # datam = data1['d'].values # # print(data1.columns[0].sum) # # # print(a.values) # # print(data1) print(data_analysis(data2,'consistency'))
最新回复(0)