Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
M
maize_hybrid_evaluation
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Milica Brkic
maize_hybrid_evaluation
Commits
350b02b2
Commit
350b02b2
authored
Jul 13, 2024
by
Milica Brkic
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Initial commit
parents
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
310 additions
and
0 deletions
+310
-0
maize_evaluation.py
+310
-0
No files found.
maize_evaluation.py
0 → 100644
View file @
350b02b2
import
numpy
as
np
from
skfusion
import
fusion
as
skf
import
os
import
pandas
as
pd
from
sklearn.metrics
import
mean_squared_error
,
r2_score
from
sklearn.metrics
import
mean_absolute_error
from
scipy
import
stats
def
rmse
(
y_true
,
y_pred
):
return
np
.
sqrt
(
np
.
sum
((
y_true
-
y_pred
)
**
2
)
/
y_true
.
size
)
def
scale
(
X
,
amin
,
amax
):
return
(
X
-
X
.
min
())
/
(
X
.
max
()
-
X
.
min
())
*
(
amax
-
amin
)
+
amin
def
cum_gain
(
relevance
):
if
relevance
is
None
or
len
(
relevance
)
<
1
:
return
0.0
return
np
.
asarray
(
relevance
)
.
sum
()
def
dcg
(
relevance
,
alternate
=
True
):
if
relevance
is
None
or
len
(
relevance
)
<
1
:
return
0.0
rel
=
np
.
asarray
(
relevance
)
p
=
len
(
rel
)
if
alternate
:
log2i
=
np
.
log2
(
np
.
asarray
(
range
(
1
,
p
+
1
))
+
1
)
return
((
np
.
power
(
2
,
rel
)
-
1
)
/
log2i
)
.
sum
()
else
:
log2i
=
np
.
log2
(
range
(
2
,
p
+
1
))
return
rel
[
0
]
+
(
rel
[
1
:]
/
log2i
)
.
sum
()
def
idcg
(
relevance
,
alternate
=
True
):
if
relevance
is
None
or
len
(
relevance
)
<
1
:
return
0.0
# guard copy before sort
rel
=
np
.
asarray
(
relevance
)
.
copy
()
rel
.
sort
()
return
dcg
(
rel
[::
-
1
],
alternate
)
def
ndcg
(
relevance
,
nranks
,
alternate
=
True
):
if
relevance
is
None
or
len
(
relevance
)
<
1
:
return
0.0
if
(
nranks
<
1
):
raise
Exception
(
'nranks < 1'
)
rel
=
np
.
asarray
(
relevance
)
pad
=
max
(
0
,
nranks
-
len
(
rel
))
# pad could be zero in which case this will no-op
rel
=
np
.
pad
(
rel
,
(
0
,
pad
),
'constant'
)
# now slice down to nranks
rel
=
rel
[
0
:
min
(
nranks
,
len
(
rel
))]
ideal_dcg
=
idcg
(
rel
,
alternate
)
if
ideal_dcg
==
0
:
return
0.0
return
dcg
(
rel
,
alternate
)
/
ideal_dcg
os
.
chdir
(
"C:
\\
Users
\\
Milica
\\
Desktop
\\
CropChallengeData
\\
Dataset"
)
skaliranje
=
[]
v
=
pd
.
read_csv
(
'features_28_4seasonsx7features_created_by_me.csv'
)
v
.
index
=
list
(
v
.
iloc
[:,
0
])
v
=
v
.
drop
(
v
.
columns
[[
0
]],
axis
=
1
)
p
=
pd
.
read_csv
(
'data.csv'
)
p
=
p
.
sort_values
(
by
=
[
'ENV_ID'
])
p
=
p
.
drop
(
p
.
columns
[[
0
]],
axis
=
1
)
v
=
v
.
loc
[
p
[
'ENV_ID'
]
.
unique
(),
:]
weather
=
v
.
reset_index
()
weather
.
rename
(
columns
=
{
"index"
:
"ENV_ID"
},
inplace
=
True
)
print
(
'Check if exist columns in Weather dataset that contain only one value.'
)
for
i
in
weather
.
columns
:
if
len
(
weather
[
i
]
.
unique
())
==
1
:
print
(
i
)
print
(
"Removing columns that are constant."
)
#!!!!!!!!!!!!!!!!!!!!!!!!!!!
weather
=
weather
.
drop
(
columns
=
[
'13'
,
'14'
])
#######################################################################################################################
# just for one year, needs to be extended for all 10 years
year
=
2017
p08
=
p
[
p
.
YEAR
==
year
]
#########################################################################
orginal
=
p08
.
pivot_table
(
index
=
[
'HYBRID_ID'
],
columns
=
[
'ENV_ID'
],
values
=
[
'YIELD'
])
A12
=
p08
.
pivot_table
(
index
=
[
'HYBRID_ID'
],
columns
=
[
'ENV_ID'
],
values
=
[
'YIELD'
])
A12
.
columns
=
np
.
sort
(
p08
[
'ENV_ID'
]
.
unique
())
kolone
=
np
.
sort
(
p08
[
'ENV_ID'
]
.
unique
())
A12
=
A12
.
fillna
(
-
1
)
A12
=
A12
.
values
R24
=
v
.
loc
[
kolone
,
:]
R24
=
R24
.
values
p
.
index
=
p
.
iloc
[:,
0
]
.
values
R23
=
p
.
loc
[
kolone
,
[
'ENV_MG'
,
'LAT'
,
'LONG'
,
'IRRIGATION'
,
'ELEVATION'
,
'CLAY'
,
'SILT'
,
'SAND'
,
'AWC'
,
'PH'
,
'OM'
,
'CEC'
,
'KSAT'
]]
R23
=
R23
.
reset_index
()
R23
=
R23
.
drop_duplicates
()
R23
=
R23
.
drop
(
R23
.
columns
[[
0
]],
axis
=
1
)
R23
=
R23
.
values
# joint soil and weather as on object
R24
=
scale
(
R24
,
0
,
1
)
R23
=
scale
(
R23
,
0
,
1
)
A12
=
np
.
ma
.
masked_equal
(
A12
,
-
1
)
org12
=
A12
.
copy
()
skaliranje
.
append
(
A12
.
min
())
skaliranje
.
append
(
A12
.
max
())
A12
=
scale
(
A12
,
0
,
1
)
R12
=
A12
.
copy
()
# cross validation 10-fold
np
.
random
.
seed
(
111
)
numbers
=
np
.
random
.
randint
(
10
,
size
=
(
A12
.
shape
[
0
],
A12
.
shape
[
1
]))
########################################################################################################################
rmse_dfmf_test
=
[]
rmse_dfmf_train
=
[]
r2_dfmf_test
=
[]
cc_test
=
[]
cc_train
=
[]
r2_dfmf_train
=
[]
mae_test
=
[]
mae_train
=
[]
precision_on_location_level_test
=
[]
std_test
=
[]
std_train
=
[]
sper_train
=
[]
sper_test
=
[]
std_test_org
=
[]
std_train_org
=
[]
mean_test
=
[]
mean_train
=
[]
mean_test_org
=
[]
mean_train_org
=
[]
dcg_metrics
=
[]
ndcg_metrics
=
[]
for
m
in
[
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
]:
hide
=
np
.
logical_and
(
numbers
==
m
,
~
A12
.
mask
)
hide_train
=
np
.
logical_and
(
numbers
!=
m
,
~
A12
.
mask
)
R12
=
A12
.
copy
()
R12
=
np
.
ma
.
masked_where
(
hide
,
R12
)
mean_hybrid
=
np
.
mean
(
R12
,
1
)
mean_location
=
np
.
mean
(
R12
,
0
)
mean_yield
=
np
.
mean
(
R12
)
means
=
mean_hybrid
,
mean_location
,
mean_yield
p
=
0.05
t1
=
skf
.
ObjectType
(
'Hybrid'
,
max
(
int
(
p
*
R12
.
shape
[
0
]),
25
))
t2
=
skf
.
ObjectType
(
'Location'
,
max
(
int
(
p
*
R12
.
shape
[
1
]),
15
))
t3
=
skf
.
ObjectType
(
'Soil'
,
max
(
int
(
p
*
R23
.
shape
[
1
]),
3
))
t4
=
skf
.
ObjectType
(
'Weather'
,
max
(
int
(
p
*
R24
.
shape
[
1
]),
7
))
relations
=
[
skf
.
Relation
(
R12
,
t1
,
t2
,
name
=
'Yield'
),
skf
.
Relation
(
R23
,
t2
,
t3
,
name
=
'Soil'
),
skf
.
Relation
(
R24
,
t2
,
t4
,
name
=
'Weather'
)
]
graph
=
skf
.
FusionGraph
(
relations
)
# print('Ranks:', ''.join(['\n{}: {}'.format(o.name, o.rank)
# for o in graph.object_types]))
min_in_sample
=
1000
result_full
=
{
'R^2:'
:
0
,
'out-sample-error:'
:
0
,
'in-sample-error:'
:
0
,
'year:'
:
year
,
'p='
:
p
,
'R2_train'
:
0
,
't1'
:
max
(
int
(
p
*
R12
.
shape
[
0
]),
0
),
't2'
:
max
(
int
(
p
*
R12
.
shape
[
1
]),
0
),
'Correlation coefficient_train'
:
0
,
't3'
:
max
(
int
(
p
*
R23
.
shape
[
1
]),
0
),
't4'
:
max
(
int
(
p
*
R24
.
shape
[
1
]),
0
),
'R2_train'
:
0
,
'Correlation coefficient_test'
:
0
,
'mae_test'
:
0
,
'mae_train'
:
0
}
for
i
in
range
(
1
):
scores
=
[]
slika
=
[]
for
_
in
range
(
1
):
dfmc_fuser
=
skf
.
Dfmf
(
max_iter
=
100
,
init_type
=
'random'
)
dfmc_mod
=
dfmc_fuser
.
fuse
(
graph
)
R12_pred
=
dfmc_mod
.
complete
(
graph
[
'Yield'
])
skaliranje
.
append
(
R12_pred
.
min
())
skaliranje
.
append
(
R12_pred
.
max
())
R12_pred
=
scale
(
R12_pred
,
0
,
1
)
R12_pred
+=
np
.
tile
(
mean_hybrid
.
reshape
((
A12
.
shape
[
0
],
1
)),
(
1
,
A12
.
shape
[
1
]))
R12_pred
+=
np
.
tile
(
mean_location
.
reshape
((
1
,
A12
.
shape
[
1
])),
(
A12
.
shape
[
0
],
1
))
skaliranje
.
append
(
R12_pred
.
min
())
skaliranje
.
append
(
R12_pred
.
max
())
R12_pred
=
scale
(
R12_pred
,
0
,
1
)
scores
.
append
(
rmse
(
A12
[
hide
],
R12_pred
[
hide
]))
rez12
=
R12_pred
*
(
org12
.
max
()
-
org12
.
min
())
+
org12
.
min
()
drugo
=
R12_pred
*
(
skaliranje
[
3
]
-
skaliranje
[
2
])
+
skaliranje
[
2
]
rez12
=
drugo
*
(
org12
.
max
()
-
org12
.
min
())
+
org12
.
min
()
greska12
=
rmse
(
org12
[
hide
],
rez12
[
hide
])
print
(
'RMSE(out-sample dfmc) for '
,
year
,
'with soil and small weather attributes: {}'
.
format
(
greska12
))
r2
=
r2_score
(
org12
[
hide
],
rez12
[
hide
])
print
(
'R^2 for '
,
year
,
' with soil and small weather attributes: {}'
.
format
(
r2
))
gr12
=
rmse
(
org12
[
hide_train
],
rez12
[
hide_train
])
print
(
'RMSE(in-sample dfmc) for'
,
year
,
'm='
,
m
,
', with soil and small weather attributes: {}'
.
format
(
gr12
))
if
greska12
<
min_in_sample
:
min_in_sample
=
greska12
result_full
[
'R^2:'
]
=
r2
result_full
[
'out-sample-error:'
]
=
greska12
result_full
[
'in-sample-error:'
]
=
gr12
result_full
[
'R2_train'
]
=
r2_score
(
org12
[
hide_train
],
rez12
[
hide_train
])
result_full
[
'Correlation coefficient_test'
]
=
np
.
corrcoef
(
org12
[
hide
],
rez12
[
hide
])[
0
,
1
]
result_full
[
'Correlation coefficient_train'
]
=
np
.
corrcoef
(
org12
[
hide_train
],
rez12
[
hide_train
])[
0
,
1
]
result_full
[
'mae_test'
]
=
mean_absolute_error
(
org12
[
hide
],
rez12
[
hide
])
result_full
[
'mae_train'
]
=
mean_absolute_error
(
org12
[
hide_train
],
rez12
[
hide_train
])
pogodjeno
=
[]
dcg_for_each_location
=
[]
ndcg_for_each_location
=
[]
for
i
in
range
(
hide
.
transpose
()
.
shape
[
0
]):
org_prinos
=
[]
pred_prinos
=
[]
hibrid_zasejan
=
[]
for
j
in
range
(
hide
.
transpose
()
.
shape
[
1
]):
if
hide
.
transpose
()[
i
,
j
]
==
True
:
org_prinos
.
append
(
org12
.
transpose
()
.
data
[
i
,
j
])
pred_prinos
.
append
(
rez12
.
transpose
()
.
data
[
i
,
j
])
hibrid_zasejan
.
append
(
orginal
.
index
[
j
])
if
len
(
org_prinos
)
>
3
:
top
=
0
real
=
{}
for
v
in
[
0
,
1
,
2
]:
real
[[
hibrid_zasejan
[
i
]
for
i
in
np
.
argsort
(
org_prinos
)[::
-
1
][:
3
]][
v
]]
=
3
-
v
scores_alg
=
[]
for
d
in
[
hibrid_zasejan
[
i
]
for
i
in
np
.
argsort
(
pred_prinos
)[::
-
1
][:
3
]]:
if
d
in
list
(
real
.
keys
()):
scores_alg
.
append
(
real
[
d
])
else
:
scores_alg
.
append
(
0
)
dcg_for_each_location
.
append
(
dcg
(
np
.
array
(
scores_alg
)))
ndcg_for_each_location
.
append
(
ndcg
(
np
.
array
(
scores_alg
),
3
))
for
k
in
[
hibrid_zasejan
[
i
]
for
i
in
np
.
argsort
(
pred_prinos
)[::
-
1
][:
3
]]:
if
k
in
[
hibrid_zasejan
[
i
]
for
i
in
np
.
argsort
(
org_prinos
)[::
-
1
][:
3
]]:
top
+=
1
pogodjeno
.
append
(
top
)
dcg_metrics
.
append
(
np
.
mean
(
dcg_for_each_location
))
ndcg_metrics
.
append
(
np
.
mean
(
ndcg_for_each_location
))
print
(
dcg_metrics
)
print
(
ndcg_metrics
)
rmse_dfmf_test
.
append
(
result_full
[
'out-sample-error:'
])
rmse_dfmf_train
.
append
(
result_full
[
'in-sample-error:'
])
r2_dfmf_test
.
append
(
result_full
[
'R^2:'
])
cc_test
.
append
(
result_full
[
'Correlation coefficient_test'
])
cc_train
.
append
(
result_full
[
'Correlation coefficient_train'
])
r2_dfmf_train
.
append
(
result_full
[
'R2_train'
])
mae_test
.
append
(
result_full
[
'mae_test'
])
mae_train
.
append
(
result_full
[
'mae_train'
])
std_test
.
append
(
np
.
std
(
rez12
[
hide
]))
std_train
.
append
(
np
.
std
(
rez12
[
hide_train
]))
std_test_org
.
append
(
np
.
std
(
org12
[
hide
]))
std_train_org
.
append
(
np
.
std
(
org12
[
hide_train
]))
sper_test
.
append
(
stats
.
spearmanr
(
org12
[
hide
],
rez12
[
hide
])[
0
])
sper_train
.
append
(
stats
.
spearmanr
(
org12
[
hide_train
],
rez12
[
hide_train
])[
0
])
mean_test
.
append
(
np
.
mean
(
rez12
[
hide
]))
mean_train
.
append
(
np
.
mean
(
rez12
[
hide_train
]))
mean_test_org
.
append
(
np
.
mean
(
org12
[
hide
]))
mean_train_org
.
append
(
np
.
mean
(
org12
[
hide_train
]))
precision_on_location_level_test
.
append
(
np
.
mean
(
np
.
array
(
pogodjeno
)
/
3
))
zero_data
=
np
.
zeros
(
shape
=
(
10
,
21
))
krajnje
=
pd
.
DataFrame
(
zero_data
,
columns
=
[
'RMSE_DFMF_test'
,
'RMSE_DFMF_train'
,
'R2_test'
,
'R2_train'
,
'cc_test'
,
'cc_train'
,
'MAE_test'
,
'MAE_train'
,
'Precision_on_location_level_test'
,
'Sperman_test'
,
'Sperman_train'
,
'std_test'
,
'std_train'
,
'std_test_org'
,
'std_train_org'
,
'mean_test'
,
'mean_train'
,
'mean_test_org'
,
'mean_train_org'
,
'DCG'
,
'NDCG'
])
krajnje
.
iloc
[:,
0
]
=
rmse_dfmf_test
krajnje
.
iloc
[:,
1
]
=
rmse_dfmf_train
krajnje
.
iloc
[:,
2
]
=
r2_dfmf_test
krajnje
.
iloc
[:,
3
]
=
r2_dfmf_train
krajnje
.
iloc
[:,
4
]
=
cc_test
krajnje
.
iloc
[:,
5
]
=
cc_train
krajnje
.
iloc
[:,
6
]
=
mae_test
krajnje
.
iloc
[:,
7
]
=
mae_train
krajnje
.
iloc
[:,
8
]
=
precision_on_location_level_test
krajnje
.
iloc
[:,
9
]
=
sper_test
krajnje
.
iloc
[:,
10
]
=
sper_train
krajnje
.
iloc
[:,
11
]
=
std_test
krajnje
.
iloc
[:,
12
]
=
std_train
krajnje
.
iloc
[:,
13
]
=
std_test_org
krajnje
.
iloc
[:,
14
]
=
std_train_org
krajnje
.
iloc
[:,
15
]
=
mean_test
krajnje
.
iloc
[:,
16
]
=
mean_train
krajnje
.
iloc
[:,
17
]
=
mean_test_org
krajnje
.
iloc
[:,
18
]
=
mean_train_org
krajnje
.
iloc
[:,
19
]
=
dcg_metrics
krajnje
.
iloc
[:,
20
]
=
ndcg_metrics
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment