Browse Source

Update model selection function.

v0.2.x
jajupmochi 4 years ago
parent
commit
7f66196251
1 changed files with 17 additions and 16 deletions
  1. +17
    -16
      gklearn/utils/model_selection_precomputed.py

+ 17
- 16
gklearn/utils/model_selection_precomputed.py View File

@@ -30,6 +30,7 @@ def model_selection_for_precomputed_kernel(datafile,
datafile_y=None,
extra_params=None,
ds_name='ds-unknown',
output_dir='outputs/',
n_jobs=1,
read_gm_from_file=False,
verbose=True):
@@ -56,7 +57,7 @@ def model_selection_for_precomputed_kernel(datafile,
model_type : string
Type of the problem, can be 'regression' or 'classification'.
NUM_TRIALS : integer
Number of random trials of outer cv loop. The default is 30.
Number of random trials of the outer CV loop. The default is 30.
datafile_y : string
Path of file storing y data. This parameter is optional depending on
the given dataset file.
@@ -89,9 +90,9 @@ def model_selection_for_precomputed_kernel(datafile,
"""
tqdm.monitor_interval = 0

results_dir = '../notebooks/results/' + estimator.__name__
if not os.path.exists(results_dir):
os.makedirs(results_dir)
output_dir += estimator.__name__
if not os.path.exists(output_dir):
os.makedirs(output_dir)
# a string to save all the results.
str_fw = '###################### log time: ' + datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") + '. ######################\n\n'
str_fw += '# This file contains results of ' + estimator.__name__ + ' on dataset ' + ds_name + ',\n# including gram matrices, serial numbers for gram matrix figures and performance.\n\n'
@@ -209,7 +210,7 @@ def model_selection_for_precomputed_kernel(datafile,
# threshold=np.inf,
# floatmode='unique') + '\n\n'

fig_file_name = results_dir + '/GM[ds]' + ds_name
fig_file_name = output_dir + '/GM[ds]' + ds_name
if params_out != {}:
fig_file_name += '[params]' + str(idx)
plt.imshow(Kmatrix)
@@ -244,7 +245,7 @@ def model_selection_for_precomputed_kernel(datafile,
str_fw += '\nall gram matrices are ignored, no results obtained.\n\n'
else:
# save gram matrices to file.
# np.savez(results_dir + '/' + ds_name + '.gm',
# np.savez(output_dir + '/' + ds_name + '.gm',
# gms=gram_matrices, params=param_list_pre_revised, y=y,
# gmtime=gram_matrix_time)
if verbose:
@@ -450,7 +451,7 @@ def model_selection_for_precomputed_kernel(datafile,
print()
print('2. Reading gram matrices from file...')
str_fw += '\nII. Gram matrices.\n\nGram matrices are read from file, see last log for detail.\n'
gmfile = np.load(results_dir + '/' + ds_name + '.gm.npz')
gmfile = np.load(output_dir + '/' + ds_name + '.gm.npz')
gram_matrices = gmfile['gms'] # a list to store gram matrices for all param_grid_precomputed
gram_matrix_time = gmfile['gmtime'] # time used to compute the gram matrices
param_list_pre_revised = gmfile['params'] # list to store param grids precomputed ignoring the useless ones
@@ -603,8 +604,8 @@ def model_selection_for_precomputed_kernel(datafile,
str_fw += 'training time with hyper-param choices who did not participate in calculation of gram matrices: {:.2f}s\n\n'.format(tt_poster)

# open file to save all results for this dataset.
if not os.path.exists(results_dir):
os.makedirs(results_dir)
if not os.path.exists(output_dir):
os.makedirs(output_dir)
# print out results as table.
str_fw += printResultsInTable(param_list, param_list_pre_revised, average_val_scores,
@@ -613,11 +614,11 @@ def model_selection_for_precomputed_kernel(datafile,
model_type, verbose)
# open file to save all results for this dataset.
if not os.path.exists(results_dir + '/' + ds_name + '.output.txt'):
with open(results_dir + '/' + ds_name + '.output.txt', 'w') as f:
if not os.path.exists(output_dir + '/' + ds_name + '.output.txt'):
with open(output_dir + '/' + ds_name + '.output.txt', 'w') as f:
f.write(str_fw)
else:
with open(results_dir + '/' + ds_name + '.output.txt', 'r+') as f:
with open(output_dir + '/' + ds_name + '.output.txt', 'r+') as f:
content = f.read()
f.seek(0, 0)
f.write(str_fw + '\n\n\n' + content)
@@ -797,7 +798,7 @@ def parallel_trial_do(param_list_pre_revised, param_list, y, model_type, trial):


def compute_gram_matrices(dataset, y, estimator, param_list_precomputed,
results_dir, ds_name,
output_dir, ds_name,
n_jobs=1, str_fw='', verbose=True):
gram_matrices = [
] # a list to store gram matrices for all param_grid_precomputed
@@ -867,7 +868,7 @@ def compute_gram_matrices(dataset, y, estimator, param_list_precomputed,
# threshold=np.inf,
# floatmode='unique') + '\n\n'

fig_file_name = results_dir + '/GM[ds]' + ds_name
fig_file_name = output_dir + '/GM[ds]' + ds_name
if params_out != {}:
fig_file_name += '[params]' + str(idx)
plt.imshow(Kmatrix)
@@ -897,8 +898,8 @@ def compute_gram_matrices(dataset, y, estimator, param_list_precomputed,
return gram_matrices, gram_matrix_time, param_list_pre_revised, y, str_fw


def read_gram_matrices_from_file(results_dir, ds_name):
gmfile = np.load(results_dir + '/' + ds_name + '.gm.npz')
def read_gram_matrices_from_file(output_dir, ds_name):
gmfile = np.load(output_dir + '/' + ds_name + '.gm.npz')
gram_matrices = gmfile['gms'] # a list to store gram matrices for all param_grid_precomputed
param_list_pre_revised = gmfile['params'] # list to store param grids precomputed ignoring the useless ones
y = gmfile['y'].tolist()


Loading…
Cancel
Save