merge_jsonl(
output_file=merged_file,
input_files=[oss_instruction, python_instruction, 'code_bugfix_cleaned_5K.json', 'codeGPT_CN_cleaned_20K.json',
'code_summarization_CN_cleaned_10K.json', 'code_generation_CN_cleaned_5K.json', 'summary.jsonl',
'apis.jsonl', 'simliar-chunk-summary.jsonl'],
lines_per_file=[2000, 2000, 2000, 6000, 4000, 3000, 10000, 1000, 2000]
)