forked from rvankoert/loghi
-
Notifications
You must be signed in to change notification settings - Fork 16
/
htr-train-pipeline.sh
executable file
·121 lines (97 loc) · 2.99 KB
/
htr-train-pipeline.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
#!/bin/bash
VERSION=2.1.8
set -e
set -o pipefail
# User-configurable parameters
# Model configuration
HTRLOGHIMODELHEIGHT=64
# Set to 1 to finetune an existing model, 0 to train a new model
USEBASEMODEL=0
HTRBASEMODEL=PATH_TO_HTR_BASE_MODEL
# Define a VGSL model
# This is only used if USEBASEMODEL is set to 0
# We recommend using the recommended model from the model library
HTRNEWMODEL="recommended"
# Set channels to 1 to process input as grayscale, 3 for color, 4 for color and mask
channels=1
# Used gpu ids, set to "-1" to use CPU, "0" for first, "1" for second, etc
GPU=0
# Dataset and training configuration
listdir=PATH_TO_LISTDIR
trainlist=$listdir/training_all_train.txt
validationlist=$listdir/training_all_val.txt
# If the images are not in subpath of listdir add the path to your actual images,
# Defaults to /tmp/path_to_training_images
datadir=/tmp/path_to_training_images
# Training configuration
epochs=1
height=$HTRLOGHIMODELHEIGHT
multiply=1
# Replace the final layer during basemodel finetuning
# This is recommended if your data contains many characters that the model has not been trained on
REPLACEFINALLAYER=0
# Best not to go lower than 2 with batchsize
batch_size=40
model_name=myfirstmodel
learning_rate=0.0003
tmpdir=$(mktemp -d)
#set the outputdir
outputdir=$tmpdir/output
# example outputdir to /home/loghiuser/loghi-model-output
#outputdir=/home/loghiuser/loghi-model-output
# DO NOT MODIFY BELOW THIS LINE
# ------------------------------
DOCKERLOGHIHTR=loghi/docker.htr:$VERSION
# Set new model as default
MODEL=$HTRNEWMODEL
MODELDIR=""
REPLACEFINAL=""
#DO NOT REMOVE THIS PLACEHOLDER LINE, IT IS USED FOR AUTOMATIC TESTING"
#PLACEHOLDER#
mkdir -p $outputdir
# Base model option
if [[ $USEBASEMODEL -eq 1 ]]; then
MODEL=$HTRBASEMODEL
MODELDIR="-v $(dirname "${MODEL}"):$(dirname "${MODEL}")"
echo $MODELDIR
fi
# Replace final layer option
if [[ $REPLACEFINALLAYER -eq 1 ]]; then
REPLACEFINAL="--replace_final_layer"
echo $MODELDIR
fi
# GPU options
DOCKERGPUPARAMS=""
if [[ $GPU -gt -1 ]]; then
DOCKERGPUPARAMS="--gpus device=${GPU}"
echo "Using GPU ${GPU}"
fi
# Starting the training
echo "Starting Loghi HTR training with model $MODEL"
docker run $DOCKERGPUPARAMS --rm -u $(id -u ${USER}):$(id -g ${USER}) -m 32000m --shm-size 10240m -ti \
$MODELDIR \
-v $tmpdir:$tmpdir \
-v $listdir:$listdir \
-v $datadir:$datadir \
-v $outputdir:$outputdir \
$DOCKERLOGHIHTR \
python3 /src/loghi-htr/src/main.py \
--train_list $trainlist \
--do_validate \
--validation_list $validationlist \
--learning_rate $learning_rate \
--channels $channels \
--batch_size $batch_size \
--epochs $epochs \
--gpu $GPU \
--height $height \
--use_mask \
--seed 1 \
--beam_width 1 \
--model $MODEL \
--aug_multiply $multiply \
--model_name $model_name \
--output $outputdir \
$REPLACEFINAL
echo "Results can be found at:"
echo $tmpdir