Commit 350b02b2 by Milica Brkic

Initial commit

parents
import numpy as np
from skfusion import fusion as skf
import os
import pandas as pd
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.metrics import mean_absolute_error
from scipy import stats
def rmse(y_true, y_pred):
return np.sqrt(np.sum((y_true - y_pred) ** 2) / y_true.size)
def scale(X, amin, amax):
return (X - X.min()) / (X.max() - X.min()) * (amax - amin) + amin
def cum_gain(relevance):
if relevance is None or len(relevance) < 1:
return 0.0
return np.asarray(relevance).sum()
def dcg(relevance, alternate=True):
if relevance is None or len(relevance) < 1:
return 0.0
rel = np.asarray(relevance)
p = len(rel)
if alternate:
log2i = np.log2(np.asarray(range(1, p + 1)) + 1)
return ((np.power(2, rel) - 1) / log2i).sum()
else:
log2i = np.log2(range(2, p + 1))
return rel[0] + (rel[1:] / log2i).sum()
def idcg(relevance, alternate=True):
if relevance is None or len(relevance) < 1:
return 0.0
# guard copy before sort
rel = np.asarray(relevance).copy()
rel.sort()
return dcg(rel[::-1], alternate)
def ndcg(relevance, nranks, alternate=True):
if relevance is None or len(relevance) < 1:
return 0.0
if (nranks < 1):
raise Exception('nranks < 1')
rel = np.asarray(relevance)
pad = max(0, nranks - len(rel))
# pad could be zero in which case this will no-op
rel = np.pad(rel, (0, pad), 'constant')
# now slice down to nranks
rel = rel[0:min(nranks, len(rel))]
ideal_dcg = idcg(rel, alternate)
if ideal_dcg == 0:
return 0.0
return dcg(rel, alternate) / ideal_dcg
os.chdir("C:\\Users\\Milica\\Desktop\\CropChallengeData\\Dataset")
skaliranje = []
v = pd.read_csv('features_28_4seasonsx7features_created_by_me.csv')
v.index = list(v.iloc[:, 0])
v = v.drop(v.columns[[0]], axis=1)
p = pd.read_csv('data.csv')
p = p.sort_values(by=['ENV_ID'])
p = p.drop(p.columns[[0]], axis=1)
v = v.loc[p['ENV_ID'].unique(), :]
weather = v.reset_index()
weather.rename(columns={"index": "ENV_ID"}, inplace=True)
print('Check if exist columns in Weather dataset that contain only one value.')
for i in weather.columns:
if len(weather[i].unique())==1:
print(i)
print("Removing columns that are constant.") #!!!!!!!!!!!!!!!!!!!!!!!!!!!
weather = weather.drop(columns=['13', '14'])
#######################################################################################################################
# just for one year, needs to be extended for all 10 years
year = 2017
p08 = p[p.YEAR == year]
#########################################################################
orginal = p08.pivot_table(index=['HYBRID_ID'], columns=['ENV_ID'], values=['YIELD'])
A12 = p08.pivot_table(index=['HYBRID_ID'], columns=['ENV_ID'], values=['YIELD'])
A12.columns = np.sort(p08['ENV_ID'].unique())
kolone = np.sort(p08['ENV_ID'].unique())
A12 = A12.fillna(-1)
A12 = A12.values
R24 = v.loc[kolone, :]
R24 = R24.values
p.index = p.iloc[:, 0].values
R23 = p.loc[
kolone, ['ENV_MG', 'LAT', 'LONG', 'IRRIGATION', 'ELEVATION', 'CLAY', 'SILT', 'SAND', 'AWC', 'PH', 'OM', 'CEC', 'KSAT']]
R23 = R23.reset_index()
R23 = R23.drop_duplicates()
R23 = R23.drop(R23.columns[[0]], axis=1)
R23 = R23.values
# joint soil and weather as on object
R24 = scale(R24, 0, 1)
R23 = scale(R23, 0, 1)
A12 = np.ma.masked_equal(A12, -1)
org12 = A12.copy()
skaliranje.append(A12.min())
skaliranje.append(A12.max())
A12 = scale(A12, 0, 1)
R12 = A12.copy()
# cross validation 10-fold
np.random.seed(111)
numbers = np.random.randint(10, size=(A12.shape[0], A12.shape[1]))
########################################################################################################################
rmse_dfmf_test = []
rmse_dfmf_train = []
r2_dfmf_test = []
cc_test = []
cc_train = []
r2_dfmf_train = []
mae_test = []
mae_train = []
precision_on_location_level_test = []
std_test = []
std_train = []
sper_train = []
sper_test = []
std_test_org = []
std_train_org = []
mean_test = []
mean_train = []
mean_test_org = []
mean_train_org = []
dcg_metrics = []
ndcg_metrics = []
for m in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]:
hide = np.logical_and(numbers == m, ~A12.mask)
hide_train = np.logical_and(numbers != m, ~A12.mask)
R12 = A12.copy()
R12 = np.ma.masked_where(hide, R12)
mean_hybrid = np.mean(R12, 1)
mean_location = np.mean(R12, 0)
mean_yield = np.mean(R12)
means = mean_hybrid, mean_location, mean_yield
p = 0.05
t1 = skf.ObjectType('Hybrid', max(int(p * R12.shape[0]), 25))
t2 = skf.ObjectType('Location', max(int(p * R12.shape[1]), 15))
t3 = skf.ObjectType('Soil', max(int(p * R23.shape[1]), 3))
t4 = skf.ObjectType('Weather', max(int(p * R24.shape[1]), 7))
relations = [skf.Relation(R12, t1, t2, name='Yield'),
skf.Relation(R23, t2, t3, name='Soil'),
skf.Relation(R24, t2, t4, name='Weather')
]
graph = skf.FusionGraph(relations)
# print('Ranks:', ''.join(['\n{}: {}'.format(o.name, o.rank)
# for o in graph.object_types]))
min_in_sample = 1000
result_full = {'R^2:': 0, 'out-sample-error:': 0, 'in-sample-error:': 0, 'year:': year, 'p=': p,
'R2_train':0,'t1': max(int(p * R12.shape[0]), 0), 't2': max(int(p * R12.shape[1]), 0),'Correlation coefficient_train':0,
't3': max(int(p * R23.shape[1]), 0), 't4': max(int(p * R24.shape[1]), 0),'R2_train':0,'Correlation coefficient_test':0,
'mae_test':0,'mae_train':0}
for i in range(1):
scores = []
slika = []
for _ in range(1):
dfmc_fuser = skf.Dfmf(max_iter=100, init_type='random')
dfmc_mod = dfmc_fuser.fuse(graph)
R12_pred = dfmc_mod.complete(graph['Yield'])
skaliranje.append(R12_pred.min())
skaliranje.append(R12_pred.max())
R12_pred = scale(R12_pred, 0, 1)
R12_pred += np.tile(mean_hybrid.reshape((A12.shape[0], 1)), (1, A12.shape[1]))
R12_pred += np.tile(mean_location.reshape((1, A12.shape[1])), (A12.shape[0], 1))
skaliranje.append(R12_pred.min())
skaliranje.append(R12_pred.max())
R12_pred = scale(R12_pred, 0, 1)
scores.append(rmse(A12[hide], R12_pred[hide]))
rez12 = R12_pred * (org12.max() - org12.min()) + org12.min()
drugo = R12_pred * (skaliranje[3] - skaliranje[2]) + skaliranje[2]
rez12 = drugo * (org12.max() - org12.min()) + org12.min()
greska12 = rmse(org12[hide], rez12[hide])
print('RMSE(out-sample dfmc) for ', year, 'with soil and small weather attributes: {}'.format(greska12))
r2 = r2_score(org12[hide], rez12[hide])
print('R^2 for ', year, ' with soil and small weather attributes: {}'.format(r2))
gr12 = rmse(org12[hide_train], rez12[hide_train])
print('RMSE(in-sample dfmc) for', year, 'm=',m,', with soil and small weather attributes: {}'.format(gr12))
if greska12 < min_in_sample:
min_in_sample = greska12
result_full['R^2:'] = r2
result_full['out-sample-error:'] = greska12
result_full['in-sample-error:'] = gr12
result_full['R2_train'] = r2_score(org12[hide_train], rez12[hide_train])
result_full['Correlation coefficient_test'] = np.corrcoef(org12[hide], rez12[hide])[0,1]
result_full['Correlation coefficient_train'] = np.corrcoef(org12[hide_train], rez12[hide_train])[0,1]
result_full['mae_test'] = mean_absolute_error(org12[hide], rez12[hide])
result_full['mae_train'] = mean_absolute_error(org12[hide_train], rez12[hide_train])
pogodjeno = []
dcg_for_each_location = []
ndcg_for_each_location = []
for i in range(hide.transpose().shape[0]):
org_prinos = []
pred_prinos = []
hibrid_zasejan = []
for j in range(hide.transpose().shape[1]):
if hide.transpose()[i, j] == True:
org_prinos.append(org12.transpose().data[i, j])
pred_prinos.append(rez12.transpose().data[i, j])
hibrid_zasejan.append(orginal.index[j])
if len(org_prinos) > 3:
top = 0
real = {}
for v in [0,1,2]:
real[[hibrid_zasejan[i] for i in np.argsort(org_prinos)[::-1][:3]][v]] = 3-v
scores_alg = []
for d in [hibrid_zasejan[i] for i in np.argsort(pred_prinos)[::-1][:3]]:
if d in list(real.keys()):
scores_alg.append(real[d])
else:
scores_alg.append(0)
dcg_for_each_location.append(dcg(np.array(scores_alg)))
ndcg_for_each_location.append(ndcg(np.array(scores_alg),3))
for k in [hibrid_zasejan[i] for i in np.argsort(pred_prinos)[::-1][:3]]:
if k in [hibrid_zasejan[i] for i in np.argsort(org_prinos)[::-1][:3]]:
top += 1
pogodjeno.append(top)
dcg_metrics.append(np.mean(dcg_for_each_location))
ndcg_metrics.append(np.mean(ndcg_for_each_location))
print(dcg_metrics)
print(ndcg_metrics)
rmse_dfmf_test.append(result_full['out-sample-error:'])
rmse_dfmf_train.append(result_full['in-sample-error:'])
r2_dfmf_test.append(result_full['R^2:'])
cc_test.append(result_full['Correlation coefficient_test'])
cc_train.append(result_full['Correlation coefficient_train'])
r2_dfmf_train.append(result_full['R2_train'])
mae_test.append(result_full['mae_test'])
mae_train.append(result_full['mae_train'])
std_test.append(np.std(rez12[hide]))
std_train.append(np.std(rez12[hide_train]))
std_test_org.append(np.std(org12[hide]))
std_train_org.append(np.std(org12[hide_train]))
sper_test.append(stats.spearmanr(org12[hide], rez12[hide])[0])
sper_train.append(stats.spearmanr(org12[hide_train], rez12[hide_train])[0])
mean_test.append(np.mean(rez12[hide]))
mean_train.append(np.mean(rez12[hide_train]))
mean_test_org.append(np.mean(org12[hide]))
mean_train_org.append(np.mean(org12[hide_train]))
precision_on_location_level_test.append(np.mean(np.array(pogodjeno)/3))
zero_data = np.zeros(shape=(10, 21))
krajnje = pd.DataFrame(zero_data, columns=['RMSE_DFMF_test', 'RMSE_DFMF_train', 'R2_test', 'R2_train',
'cc_test', 'cc_train','MAE_test','MAE_train','Precision_on_location_level_test',
'Sperman_test','Sperman_train','std_test','std_train','std_test_org','std_train_org','mean_test',
'mean_train','mean_test_org','mean_train_org','DCG','NDCG'])
krajnje.iloc[:, 0] = rmse_dfmf_test
krajnje.iloc[:, 1] = rmse_dfmf_train
krajnje.iloc[:, 2] = r2_dfmf_test
krajnje.iloc[:, 3] = r2_dfmf_train
krajnje.iloc[:, 4] = cc_test
krajnje.iloc[:, 5] = cc_train
krajnje.iloc[:, 6] = mae_test
krajnje.iloc[:, 7] = mae_train
krajnje.iloc[:, 8] = precision_on_location_level_test
krajnje.iloc[:, 9] = sper_test
krajnje.iloc[:, 10] = sper_train
krajnje.iloc[:, 11] = std_test
krajnje.iloc[:, 12] = std_train
krajnje.iloc[:, 13] = std_test_org
krajnje.iloc[:, 14] = std_train_org
krajnje.iloc[:, 15] = mean_test
krajnje.iloc[:, 16] = mean_train
krajnje.iloc[:, 17] = mean_test_org
krajnje.iloc[:, 18] = mean_train_org
krajnje.iloc[:, 19] = dcg_metrics
krajnje.iloc[:, 20] = ndcg_metrics
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment