import numpy as np
from scipy.stats import pearsonr
import matplotlib.pyplot as plt
from scipy.stats import norm
def data_analysis(data,type):
'''
:param data: 输入数据
:param type: 算法名称 correlation p_value baseInfo std quartile percentile consistency(一致性) distribution(数据分布)
:return: 字典形式结果
'''
# 相关性和p_value
if type=="correlation" or type == "p_value" :
correlation, p_value = pearsonr(data[0], data[1])
returt = {type: eval(type)}
# 基础统计包括大小均值,标准差
if type == "baseInfo":
max = np.nanmax(data)
min = np.nanmin(data)
mean = np.nanmean(data)
sum = np.nansum(data)
returt = {'max': max, 'min': min, 'mean':mean, 'sum': sum}
# 标准差
if type == "std":
std = np.nanstd(data)
returt = {'std':std}
# 四分位
if type == 'quartile':
quartile = np.nanpercentile(data,[25,50,75])
deviation = quartile[2] - quartile[0]
returt = {'quartile': quartile,'deviation':deviation}
# 百分位
if type == 'percentile':
percentile = np.nanpercentile(data, [100])
returt = {'percentile': percentile}
# 数据分布(直方图)
if type == 'distribution':
mu = np.nanmean(data)
sigma = np.nanstd(data)
num_bins = data.shape[0]
n, bins, patches = plt.hist(data, bins=num_bins, density=True)
y = norm.pdf(bins, mu, sigma)
plt.plot(bins, y, "r--")
plt.xlabel("data")
plt.ylabel("value")
plt.title("Distribution")
plt.savefig('tst.png')
plt.show()
returt = {'distribution': './test.jpg'}
# ICC一致性检测
if type == 'consistency':
returt = np.dot((data[:,0] - np.nanmean(data)), (data[:,1] - np.nanmean(data)))/(np.var(data) * data.shape[0])
return returt
# 测试数据
data1 = np.random.randint(0,100,(50)) # 一维数据
data2 = np.random.randint(0,100,(50,50)) # 二维
'''
算法名称:
correlation 皮尔斯相关性
p_value
baseInfo 基本统计(最大最小平均总和)
std 标准差
quartile
percentile(百分位)
consistency(一致性)
distribution(数据分布)
'''
# import pandas as pd
#
# data1 = pd.DataFrame()
# data1['d'] = [i for i in range(20)]
# print(data1)
# datam = data1['d'].values
# # print(data1.columns[0].sum)
# # # print(a.values)
# # print(data1)
print(data_analysis(data2,'consistency'))