Skip to content

Commit

Permalink
eval output + input changes back
Browse files Browse the repository at this point in the history
  • Loading branch information
Ubuntu committed Nov 21, 2022
1 parent 12a3954 commit be2f32b
Show file tree
Hide file tree
Showing 4 changed files with 29 additions and 21 deletions.
6 changes: 4 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ export PYTHONHASHSEED=31337
After setting the environment variable, reactivate your environment:
```
conda deactivate
conda acivate cherri
conda activate cherri
```

#### Manual installation
Expand Down Expand Up @@ -193,7 +193,9 @@ Input parameters for CheRRI's **eval** mode (`cherri eval`):
#### Output in evaluation mode

At the end of the run the location of the results table is given.
The final results table will have all columns of the input table and an additional prediction column, where you find the predicted class of each RRI (0 or 1).
The final results table will have your the query and target ID's or your input sequences (`target_ID`,`query_ID`), the score of your instance (`instance_score`), the predicted class of each RRI (0 or 1) (`predicted_label`), if you are running the validation mode with `-hf on` the positive or negative label is given (`true_lable`), and finally all features of the instance are provided.

The Ids are a summary of `chromosme;strand;start;stop` oft the first (target) and the second (query) sequence.

Throughout the program, several output files are generated and stored in the following structure:

Expand Down
26 changes: 20 additions & 6 deletions bin/cherri
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,9 @@ def setup_argument_parser():
p_mrg.add_argument("-fh", "--filter_hybrid",
default="off",
help= "Filter the data for hybrids already detected by ChiRA (set 'on' to filter, default:'off')")
p_mrg.add_argument("-on", "--out_name",
default="non",
help= "Name for the output directory, default 'date_Cherri_evaluating_RRIs' ")



Expand Down Expand Up @@ -502,15 +505,16 @@ def main_train(args):
| ├── test_train_context_50_pos_occ_neg.csv
| ├── test_train_context_50_pos_occ_pos.csv
| ├── feature_files
| ├── feature_filtered_test_eval_context_150_pos.csv
| ├── feature_filtered_test_eval_context_150_neg.csv
| ├── training_data_test_eval_context_150.npz
| ├── feature_filtered_test_train_context_150_pos.csv
| ├── feature_filtered_test_train_context_150_neg.csv
| ├── training_data_test_train_context_150.npz
| ├── model
| ├── features
| ├── test_train_context_50.npz
| ├── optimized
| ├── test_train_context_50.model
| ├── test_train_context_50.csv
| ├── full_test_train_context_50.model
"""

args = parser.parse_args()
Expand All @@ -537,6 +541,8 @@ def main_train(args):
n_jobs = args.n_jobs
mixed = args.mixed
filter_hybrid = args.filter_hybrid
out_name = args.out_name


methods = (f'extra_trees passive_aggressive random_forest sgd '
f'gradient_boosting mlp')
Expand All @@ -552,7 +558,12 @@ def main_train(args):

# define output folder
timestr = time.strftime("%Y%m%d")
out_path = f'{out_path}/{timestr}_Cherri_build_model/'

if out_name == 'non':
out_path = f'{out_path}/{timestr}_Cherri_build_model/'
else:
out_path = f'{out_path}/{out_name}/'

#if set_path == 'off' and mixed == 'off':
# if not os.path.exists(out_path):
# os.mkdir(out_path)
Expand Down Expand Up @@ -650,7 +661,8 @@ def main_train(args):
midel_name = f'{experiment_name}_context_{str(context)}'
X_list = []
y_list = []
for data in replicates:
# args.list_of_replicates
for data in args.list_of_replicates:
feature_path = f'{input_path_RRIs}/{data}/feature_files/'
feature_neg = (f'{feature_path}/feature_filtered_{data}_context_'
f'{str(context)}_pos_occ_neg.csv')
Expand Down Expand Up @@ -719,7 +731,9 @@ def main_train(args):
opt_call = (f'python -W ignore -m biofilm.biofilm-optimize6 {loaddata} '
f'--memoryMBthread {memoryPerThread} --folds 0 '
f'--out {opt_path}{midel_name} --preprocess True '
f'--n_jobs {n_jobs} --time {run_time} --methods {methods}')
f'--n_jobs {n_jobs} --time {run_time} --methods {methods}'
# f' --tmp_folder {opt_path}\autosklearn_temp'
)

print('4a. Optimize model\n')
#print(opt_call)
Expand Down
14 changes: 3 additions & 11 deletions bin/find_occupied_regions.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,16 +196,8 @@ def main():
#### Get RRI data by calling find trusted RRI with a very low overlap th of 5%
### only take uniquely mapped reads

####### Get RRI data
list_rep = []
for rep in replicates:
list_rep.append(input_path_RRIs + '/' + rep)
replicate_string = ' '.join(list_rep)

print(list_rep)


rri_call_param = ('-i ' + 'not_needed' + ' -r ' + replicate_string +
rri_call_param = ('-i ' + input_path_RRIs + ' -r ' + ' '.join(replicates) +
' -o ' + str(overlap_th) +' -n rri_occupied_regions -d ' +
out_path + ' -s ' + str(score_th))
if filter_hybrid == 'on':
Expand All @@ -215,9 +207,9 @@ def main():
rri_file = (out_path + 'rri_occupied_regions_overlap_' +
str(overlap_th) + '.csv')

if len(list_rep) == 1:
if len(replicates) == 1:
print('Info: only one experiment is used to build occupied regions')
in_file = list_rep[0]
in_file = replicates[0]
print(in_file)
# df_replicat = rl.read_chira_data(in_file)
sep = rl.check_file_type(in_file)
Expand Down
4 changes: 2 additions & 2 deletions bin/find_trusted_RRI.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,8 +152,8 @@ def build_replicat_library_to_compare(input_path, list_of_replicates, score_th):
inter_replicat_list = []
rep_size_list = []
for file in list_of_replicates:
# in_file = input_path + '/' + file
in_file = file
in_file = input_path + '/' + file
#in_file = file
df_test = pd.read_table(in_file, sep=',')
#print(df_test.info())
sep = rl.check_file_type(in_file)
Expand Down

0 comments on commit be2f32b

Please sign in to comment.