generated from LPBeaulieu/Typewriter-OCR-TintypeText
-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathprintabook.py
2951 lines (2773 loc) · 197 KB
/
printabook.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
import glob
import numpy as np
import os
from PIL import Image, ImageDraw, ImageFont, ImageOps
import re
import sys
import math
numbers = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13',
'14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26',
'27', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39',
'40', '41', '42', '43', '44', '45', '46', '47', '48', '49', '50', '51', '52',
'53', '54', '55', '56', '57', '58', '59', '60', '61', '62', '63', '64', '65',
'66', '67', '68', '69', '70', '71', '72', '73', '74', '75', '76', '77', '78',
'79', '80', '81', '82', '83', '84', '85', '86', '87', '88', '89', '90', '91',
'92', '93', '94', '95', '96', '97', '98', '99', '100']
numbers_dots = ['1.', '2.', '3.', '4.', '5.', '6.', '7.', '8.', '9.', '10.',
'11.', '12.', '13.', '14.', '15.', '16.', '17.', '18.', '19.', '20.', '21.',
'22.', '23.', '24.', '25.', '26.', '27.', '28.', '29.', '30.', '31.', '32.',
'33.', '34.', '35.', '36.', '37.', '38.', '39.', '40.', '41.', '42.', '43.',
'44.', '45.', '46.', '47.', '48.', '49.', '50.', '51.', '52.', '53.', '54.',
'55.', '56.', '57.', '58.', '59.', '60.', '61.', '62.', '63.', '64.', '65.',
'66.', '67.', '68.', '69.', '70.', '71.', '72.', '73.', '74.', '75.', '76.',
'77.', '78.', '79.', '80.', '81.', '82.', '83.', '84.', '85.', '86.', '87.',
'88.', '89.', '90.', '91.', '92.', '93.', '94.', '95.', '96.', '97.', '98.',
'99.', '100.']
roman_numerals = ['I', 'II', 'III', 'IV', 'V', 'VI', 'VII', 'VIII', 'IX', 'X', 'XI', 'XII',
'XIII', 'XIV', 'XV', 'XVI', 'XVII', 'XVIII', 'XIX', 'XX', 'XXI', 'XXII', 'XXIII', 'XXIV',
'XXV', 'XXVI', 'XXVII', 'XXVIII', 'XXIX', 'XXX', 'XXXI', 'XXXII', 'XXXIII', 'XXXIV', 'XXXV',
'XXXVI', 'XXXVII', 'XXXVIII', 'XXXIX', 'XL', 'XLI', 'XLII', 'XLIII', 'XLIV', 'XLV', 'XLVI',
'XLVII', 'XLVIII', 'XLIX', 'L', 'LI', 'LII', 'LIII', 'LIV', 'LV', 'LVI', 'LVII', 'LVIII',
'LIX', 'LX', 'LXI', 'LXII', 'LXIII', 'LXIV', 'LXV', 'LXVI', 'LXVII', 'LXVIII', 'LXIX',
'LXX', 'LXXI', 'LXXII', 'LXXIII', 'LXXIV', 'LXXV', 'LXXVI', 'LXXVII', 'LXXVIII', 'LXXIX',
'LXXX', 'LXXXI', 'LXXXII', 'LXXXIII', 'LXXXIV', 'LXXXV', 'LXXXVI', 'LXXXVII', 'LXXXVIII',
'LXXXIX', 'XC', 'XCI', 'XCII', 'XCIII', 'XCIV', 'XCV', 'XCVI', 'XCVII', 'XCVIII', 'XCIX', 'C']
roman_numerals_dots = ['I.', 'II.', 'III.', 'IV.', 'V.', 'VI.', 'VII.', 'VIII.', 'IX.', 'X.',
'XI.', 'XII.', 'XIII.', 'XIV.', 'XV.', 'XVI.', 'XVII.', 'XVIII.', 'XIX.', 'XX.', 'XXI.', 'XXII.',
'XXIII.', 'XXIV.', 'XXV.', 'XXVI.', 'XXVII.', 'XXVIII.', 'XXIX.', 'XXX.', 'XXXI.', 'XXXII.',
'XXXIII.', 'XXXIV.', 'XXXV.', 'XXXVI.', 'XXXVII.', 'XXXVIII.', 'XXXIX.', 'XL.', 'XLI.', 'XLII.',
'XLIII.', 'XLIV.', 'XLV.', 'XLVI.', 'XLVII.', 'XLVIII.', 'XLIX.', 'L.', 'LI.', 'LII.', 'LIII.',
'LIV.', 'LV.', 'LVI.', 'LVII.', 'LVIII.', 'LIX.', 'LX.', 'LXI.', 'LXII.', 'LXIII.', 'LXIV.',
'LXV.', 'LXVI.', 'LXVII.', 'LXVIII.', 'LXIX.', 'LXX.', 'LXXI.', 'LXXII.', 'LXXIII.', 'LXXIV.',
'LXXV.', 'LXXVI.', 'LXXVII.', 'LXXVIII.', 'LXXIX.', 'LXXX.', 'LXXXI.', 'LXXXII.', 'LXXXIII.',
'LXXXIV.', 'LXXXV.', 'LXXXVI.', 'LXXXVII.', 'LXXXVIII.', 'LXXXIX.', 'XC.', 'XCI.', 'XCII.',
'XCIII.', 'XCIV.', 'XCV.', 'XCVI.', 'XCVII.', 'XCVIII.', 'XCIX.', 'C.']
numbers_letters_lower = ['one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight',
'nine', 'ten', 'eleven', 'twelve', 'thirteen', 'fourteen', 'fifteen', 'sixteen',
'seventeen', 'eighteen', 'nineteen', 'twenty', 'twenty-one', 'twenty-two',
'twenty-three', 'twenty-four', 'twenty-five', 'twenty-six', 'twenty-seven',
'twenty-eight', 'twenty-nine', 'thirty', 'thirty-one', 'thirty-two', 'thirty-three',
'thirty-four', 'thirty-five', 'thirty-six', 'thirty-seven', 'thirty-eight', 'thirty-nine',
'forty', 'forty-one', 'forty-two', 'forty-three', 'forty-four', 'forty-five', 'forty-six',
'forty-seven', 'forty-eight', 'forty-nine', 'fifty', 'fifty-one', 'fifty-two', 'fifty-three',
'fifty-four', 'fifty-five', 'fifty-six', 'fifty-seven', 'fifty-eight', 'fifty-nine', 'sixty',
'sixty-one', 'sixty-two', 'sixty-three', 'sixty-four', 'sixty-five', 'sixty-six', 'sixty-seven',
'sixty-eight', 'sixty-nine', 'seventy', 'seventy-one', 'seventy-two', 'seventy-three',
'seventy-four', 'seventy-five', 'seventy-six', 'seventy-seven', 'seventy-eight', 'seventy-nine',
'eighty', 'eighty-one', 'eighty-two', 'eighty-three', 'eighty-four', 'eighty-five', 'eighty-six',
'eighty-seven', 'eighty-eight', 'eighty-nine', 'ninety', 'ninety-one', 'ninety-two', 'ninety-three',
'ninety-four', 'ninety-five', 'ninety-six', 'ninety-seven', 'ninety-eight', 'ninety-nine', 'hundred']
numbers_letters_lower_dots = ['one.', 'two.', 'three.', 'four.', 'five.', 'six.', 'seven.',
'eight.', 'nine.', 'ten.', 'eleven.', 'twelve.', 'thirteen.', 'fourteen.', 'fifteen.',
'sixteen.', 'seventeen.', 'eighteen.', 'nineteen.', 'twenty.', 'twenty-one.', 'twenty-two.',
'twenty-three.', 'twenty-four.', 'twenty-five.', 'twenty-six.', 'twenty-seven.', 'twenty-eight.',
'twenty-nine.', 'thirty.', 'thirty-one.', 'thirty-two.', 'thirty-three.', 'thirty-four.',
'thirty-five.', 'thirty-six.', 'thirty-seven.', 'thirty-eight.', 'thirty-nine.', 'forty.',
'forty-one.', 'forty-two.', 'forty-three.', 'forty-four.', 'forty-five.', 'forty-six.',
'forty-seven.', 'forty-eight.', 'forty-nine.', 'fifty.', 'fifty-one.', 'fifty-two.', 'fifty-three.',
'fifty-four.', 'fifty-five.', 'fifty-six.', 'fifty-seven.', 'fifty-eight.', 'fifty-nine.', 'sixty.',
'sixty-one.', 'sixty-two.', 'sixty-three.', 'sixty-four.', 'sixty-five.', 'sixty-six.', 'sixty-seven.',
'sixty-eight.', 'sixty-nine.', 'seventy.', 'seventy-one.', 'seventy-two.', 'seventy-three.',
'seventy-four.', 'seventy-five.', 'seventy-six.', 'seventy-seven.', 'seventy-eight.', 'seventy-nine.',
'eighty.', 'eighty-one.', 'eighty-two.', 'eighty-three.', 'eighty-four.', 'eighty-five.', 'eighty-six.',
'eighty-seven.', 'eighty-eight.', 'eighty-nine.', 'ninety.', 'ninety-one.', 'ninety-two.',
'ninety-three.', 'ninety-four.', 'ninety-five.', 'ninety-six.', 'ninety-seven.', 'ninety-eight.',
'ninety-nine.', 'hundred.']
cwd = os.getcwd()
#The "problem" variable is initialized to "False"
#and will be set to "True" should the code encounter
#any problems, in order to give the user relevant error
#messages along the way.
problem = False
path_txt = os.path.join(cwd, "*.txt")
txt_files = glob.glob(path_txt)
if txt_files == []:
print('\nPlease include a TXT file containing the book that you wish to print in the working folder.')
problem = True
elif len(txt_files) > 1:
print("\nPlease include only one TXT file containing the book that you wish to print in the working folder.")
else:
txt_file_name = txt_files[0]
#The demonstration image entitled "Floral Pattern Background 843" was taken from the following source
#and is licenced for public domain use ("CC0 Public Domain"):
#https://www.publicdomainpictures.net/en/view-image.php?image=214080&picture=floral-pattern-background-843
#The user can select their own background image as well and the text box fill color on the cover page will
#be determined from the complementary color to one of the darkest pixels in the image. The text color on
#the cover page will be taken from the lightest pixel on the canvas.
path_jpeg = os.path.join(cwd, "*.jpg")
jpeg_files = glob.glob(path_jpeg)
if jpeg_files == []:
print('\nPlease include a JPEG file containing the image that you ' +
'wish to use as a background for the book cover in the working folder. Also, please ' +
'make sure that the provided background image is in JPEG format, ' +
"with a resolution of 300 ppi and a canvas size of US Legal dimensions in " +
"landscape mode (width of 4200 pixels and height of 2550 pixels).")
problem = True
elif len(jpeg_files) > 1:
print("\nPlease include only one JPEG file containing the image that you " +
"wish to use as a background for the book cover in the working folder.")
problem = True
else:
background_img = jpeg_files[0]
path_ttf = os.path.join(cwd, "*.ttf")
ttf_files = glob.glob(path_ttf)
if ttf_files == []:
print("\nPlease include a True Type Font (.ttf) file containing " +
"the font you wish to use on the cover page in the working folder.")
problem = True
elif len(ttf_files) > 1:
print("\nPlease include only one True Type Font (.ttf) file containing " +
"the font you wish to use on the cover page in the working folder.")
problem = True
else:
cover_font = ttf_files[0]
#The "small_caps" variable will
#determine if the forward slashes
#will be changed to smallcaps RTF
#commands (r"\scaps" and r"\scaps0").
#The default setting is "True", meaning
#that the forward slashes will be changed
#to smallcaps RTF commands, and the user
#can set "small_caps" to "False" if they
#wish to maintain the forward slashes.
small_caps = True
title = None
#The user has the option to go with the automatic
#splitting of the title, or manually indicate where the
#line breaks should be by placing at least two successive
#space in-between words that are to be split onto different lines.
custom_title_line_breaks = False
author = None
#If the title needs to be split
#in order to fit in the title page,
#the value of "asjusted_title_rtf"
#will be set to the string containing
#a "\line" linebreak RTF command and
#will be updated in text[title_index].
#A similar approach is taken for the
#title and author name on the cover.
adjusted_title_rtf = None
adjusted_title_cover = None
adjusted_author_rtf = None
adjusted_author_cover = None
#"spine_text" is initialized as "None",
#and a value can be supplied by the user
#should they want to use different text
#than the abbreviated author name, followed
#by a hyphen and the book title.
spine_text = None
#Should the cover title need to be split,
#the default line spacing in-between title lines
#is initialized at 5 pixels, and may be altered
#by the user.
cover_title_line_spacing = 5
cover_author_line_spacing = 4
cover_box_color = None
cover_text_color = None
cover_trim_width = 0.25
#The "cover_line" variable determines whether
#a dark border will be present on the cover,
#before the white trim. The default setting
#includes such a border, but as the users
#may wish to trim their pages using a stack
#page guillotine cutter, and the presence of
#a dark line would likely leave behind some
#uneven line after cutting, they may wish to remove
#such a line by passing the argument "no_cover_line"
#when running the Python code.
cover_line = True
#An extra 20 pixels are added to the cover width,
#to account for binding irregularities and the
#thickness of the glue:
cover_extra_pixels = 20
#The "pixels_from_bottom_cover_spine" variable
#determines how many pixels are added to the
#starting "y" coordinate (in the rotated image)
#from the bottom of the spine box to reach the
#point where the spine text will start to be written.
#Negative values will bring the text down.
pixels_from_bottom_cover_spine = 3
#A similar approach is taken with the variable
#"pixels_from_left_cover_spine" to determine how
#many pixels are added to the starting "x" coordinate
#(in the rotated image) from the left edge of the
#spine box to reach the point where the spine
#text will start to be written. Negative values
#will bring the text left.
pixels_from_left_cover_spine = 0
#The "pixels_from_top_cover_title_box" variable
#determines how many pixels are added to the
#starting "y" coordinate (in the unrotated image)
#from the top of the cover title box to reach the
#point where the cover title text will start to be
#written. Negative values will bring the text up.
pixels_from_top_cover_title_box = 10
#A similar approach is taken with the variable
#"pixels_from_left_cover_title_box" to determine how
#many pixels are added to the starting "x" coordinate
#(in the unrotated image) from the left edge of the
#cover title box to reach the point where the cover
#title text will start to be written. Negative
#values will bring the text left.
pixels_from_left_cover_title_box = 0
number_of_pages = None
inches_per_ream_500_pages = None
cm_per_ream_500_pages = None
grayscale = False
#The font sizes are in points and
#not half-points. This means that
#a title size of r"fs\112" is really
#in size 56.
title_size = r"\fs112 "
#The "cover_title_font_size" is initialized
#at 125 pixels and the code will determine the largest
#font size that fits within the front cover box.
#The user can specify another starting value for
#"cover_title_font_size".
cover_title_font_size = 125
subtitle_size = None
#The spacing on the cover in-between
#the title and the author name will be
#a certain proportion of the cover title
#height and is set to 20% by default.
cover_spacing_title_height_ratio = 0.20
#The "max_author_title_font_ratio" variables
#determines the max ratio between the title
#headings font size and that of the author name,
#to provide a starting font size while automatically
#adjusting the font size to the available space.
max_author_title_font_ratio = 0.75
max_subtitle_title_font_ratio = 0.75
#The "cover_author_font_size" is initialized
#at 94 pixels and the code will determine the largest
#font size that fits within the front cover box.
#The user can specify another starting
#value for "cover_author_font_size"
cover_author_font_size = 94
#Similarly, the "spine_font_size"
#default value is set at 100 pixels,
#and the code will determine the largest
#font size that fits within the spine.
#The user can specify another starting
#value for "spine_font_size".
spine_font_size = 100
#The font size of the divider
#separating the title and the
#subtitle and author name.
divider_size = r"\fs72 "
#To ensure that the spacing between
#Title, Subtitle and Author are the
#same, "title_page_spacing" is set as
#the font size before "\line" RTF commands.
title_page_spacing = r"\fs36 "
body_font_size = r"\fs34 "
header_font_size = r"\fs32 "
chapter_heading_font_size = r"\fs51 "
#The "max_chapter_heading_body_font_ratio"
#variable determines the maximum ratio between the
#chapter headings font size and that of the body text,
#to provide a starting font size while automatically
#adjusting the font size to the available space.
max_chapter_heading_body_font_ratio = 1.5
#By default, the chapter headings will not be in bold,
#but the user can pass in the argument "bold_chapter_headings"
#to make them bold.
bold_chapter_headings = False
#The default tab width of 360 twips
#corresponds to the width of four
#spaces written in size 17 Baskerville
#(r"\fs28") and may be adjusted.
tab_width = r"\deftab360 "
font = "Baskerville"
#"title_page_posy" determines
#the starting vertical distance (in twips), from
#the top left of the page, where the title page
#paragraph starts.
title_page_posy = r"\posy4040 "
#The top and bottom margins
#are set to 0, given that these are
#the lowest possible top and
#bottom margins when printing 2 pages
#on a sheet of letter paper, due to the
#different proportions of the resulting
#sheets (8.5x11" vs 5.5x8.5" for the
#individual pages).
header_top_margin = r"\headery0 "
top_margin = r"\margt720 "
bottom_margin = r"\margb0 "
#The default values in twips for
#the left and right margins is of
#1600, which equates to about 0.75 inches.
#According to the following link
#https://kdp.amazon.com/en_US/help/topic/GVBQ3CMEQW3W2VL6
#these specifications would be suitable
#for books up to 700 pages in length,
#which should cover most books.
left_margin = r"\margl1600 "
left_margin_twips = 1600
right_margin = r"\margr1600 "
right_margin_twips = 1600
#The default left and right
#margins on the cover page are set to 0.75 inches
#from the edges of the half-letter page (5.5 inches wide).
#The left margin can be determined by subtracting the space
#in-between the margins (4.75 inches) from the right edge
#pixel count: (4200 - 4.75*4200/14) = 2775 px
left_margin_cover_textbox = 2775
#The right margin can simply be calculated given the pixel
#width of the canvas: 4220-(0.75*4200/14) = 3995 px
right_margin_cover_textbox = 3995
#The top margin of the text box on the cover page can
#be determined by adding a 25% of the vertical
#pixels to the starting y corrdinate of 0. (0+(2550/4)).
top_margin_cover_textbox = 640
#A line spacing of 276 twips (r"\sl276\slmult1")
#is equivalent to 1.15 line spacing. Here is
#a list of common line spacing, but anything
#in-between may be specified:
# 240 twips single spacing by default
# 360 twips (1.5 spacing)
# 480 twips (double spacing)
# 720 twips (triple spacing)
line_spacing = r"\sl276\slmult1 "
#The "points_between_paragraphs" variable equates
#to the number of twips after a paragraph and will
#be concatenated to the "\saN" RTF command to provide
#The spacing in-between paragraphs. It is set to zero
#by default, as novels typically do not have spacing
#in-between paragraphs, but may be modified by the user,
#where they input the distance in terms of number of points
#(as those used to measure font sizes).
points_between_paragraphs = "0"
#The number of carriage returns before
#the section headings (prologue, chapters,
#epilogue, etc.) is set to 5 and may be
#adjusted.
number_of_lines_above_chapter_headings = 6
if len(sys.argv) > 1:
#The "try/except" statement will
#intercept any "ValueErrors" and
#ask the users to correctly enter
#the desired values for the variables
#directly after the colon separating
#the variable name from the value.
try:
for i in range(1, len(sys.argv)):
if len(sys.argv[i]) > 1 and sys.argv[i][:6] == "title:":
title = sys.argv[i][6:].strip()
elif len(sys.argv[i]) > 1 and sys.argv[i][:7] == "author:":
if len(sys.argv[i][7:]) > 3 and sys.argv[i][7:10].lower() == "by ":
author = sys.argv[i][10:]
author_names = re.split(r"( )", author)
for j in range(len(author_names)):
if author_names[j].lower() != "by":
author_names[j] == author_names[j].capitalize()
#"by" will not be included in the "author" variable, so
#index 0 in "author_names" is skipped over.
author = "".join(author_names[1:]).strip()
elif len(sys.argv[i][7:]) > 3 and sys.argv[i][7:10].lower() != "by ":
author = sys.argv[i][7:].strip()
author_names = re.split(r"( )", author)
for j in range(len(author_names)):
author_names[j] == author_names[j].capitalize()
author = "".join(author_names).strip()
else:
author = sys.argv[i][7:].strip()
#The font sizes are automatically multiplied by two, so that
#should a user enter "10.5" points, it would be registered as
#21 half-points by the code. The "round" method rounds the
#half-point value should it not be an integer.
elif sys.argv[i].lower()[:11] == "title_size:":
title_size = r"\fs" + str(round(float(sys.argv[i][11:].strip())*2))
elif sys.argv[i].lower()[:14] == "subtitle_size:":
subtitle_size = r"\fs" + str(round(float(sys.argv[i][14:].strip())*2))
elif sys.argv[i].lower()[:13] == "divider_size:":
divider_size = r"\fs" + str(round(float(sys.argv[i][13:].strip())*2))
elif sys.argv[i].lower()[:19] == "title_page_spacing:":
title_page_spacing = r"\fs" + str(round(float(sys.argv[i][19:].strip())*2))
elif sys.argv[i].lower()[:28] == "max_title_author_font_ratio:":
max_title_author_font_ratio = float(sys.argv[i].lower()[28:].strip())
elif sys.argv[i].lower()[:28] == "max_author_title_font_ratio:":
max_author_title_font_ratio = float(sys.argv[i][28:].strip())
elif sys.argv[i].lower()[:30] == "max_subtitle_title_font_ratio:":
max_subtitle_title_font_ratio = float(sys.argv[i][30:].strip())
elif sys.argv[i].lower()[:28] == "max_chapter_body_font_ratio:":
max_chapter_heading_body_font_ratio = float(sys.argv[i].lower()[28:].strip())
elif sys.argv[i].lower()[:21] == "bold_chapter_headings":
bold_chapter_headings = True
elif sys.argv[i].lower()[:15] == "body_font_size:":
body_font_size = r"\fs" + str(round(float(sys.argv[i][15:].strip())*2))
elif sys.argv[i].lower()[:17] == "header_font_size:":
header_font_size = r"\fs" + str(round(float(sys.argv[i][17:].strip())*2))
elif sys.argv[i].lower()[:26] == "chapter_heading_font_size:":
chapter_heading_font_size = r"\fs" + str(round(float(sys.argv[i][26:].strip())*2))
#The margins are entered by the user in inches, which
#are converted into twips (1/1440th of an inch)
elif sys.argv[i].lower()[:10] == "tab_width:":
inches = float(sys.argv[i][10:].strip())
twips = round(inches*1440)
tab_width = r"\deftab" + str(twips)
elif sys.argv[i].lower()[:13] == "tab_width_cm:":
cm = float(sys.argv[i][13:].strip())
twips = round(cm/2.54*1440)
tab_width = r"\deftab" + str(twips)
elif sys.argv[i].lower()[:5] == "font:":
font = sys.argv[i][5:]
elif sys.argv[i].lower()[:22] == "title_page_top_margin:":
inches = float(sys.argv[i][22:].strip())
twips = round(inches*1440)
title_page_posy = r"\title_page_posy" + str(twips)
elif sys.argv[i].lower()[:25] == "title_page_top_margin_cm:":
cm = float(sys.argv[i][25:].strip())
twips = round(cm/2.54*1440)
title_page_posy = r"\title_page_posy" + str(twips)
elif sys.argv[i].lower()[:12] == "left_margin:":
inches = float(sys.argv[i][12:].strip())
left_margin_twips = round(inches*1440)
left_margin = r"\margl" + str(left_margin_twips)
elif sys.argv[i].lower()[:15] == "left_margin_cm:":
cm = float(sys.argv[i][15:].strip())
left_margin_twips = round(cm/2.54*1440)
left_margin = r"\margl" + str(left_margin_twips)
elif sys.argv[i].lower()[:13] == "right_margin:":
inches = float(sys.argv[i][13:].strip())
right_margin_twips = round(inches*1440)
right_margin = r"\margr" + str(right_margin_twips)
elif sys.argv[i].lower()[:16] == "right_margin_cm:":
cm = float(sys.argv[i][16:].strip())
right_margin_twips = round(cm/2.54*1440)
right_margin = r"\margr" + str(right_margin_twips)
elif sys.argv[i].lower()[:11] == "top_margin:":
inches = float(sys.argv[i][11:].strip())
twips = round(inches*1440)
top_margin = r"\margt" + str(twips)
elif sys.argv[i].lower()[:14] == "top_margin_cm:":
cm = float(sys.argv[i][14:].strip())
twips = round(cm/2.54*1440)
top_margin = r"\margt" + str(twips)
elif sys.argv[i].lower()[:14] == "bottom_margin:":
inches = float(sys.argv[i][14:].strip())
twips = round(inches*1440)
bottom_margin = r"\margb" + str(twips)
elif sys.argv[i].lower()[:17] == "bottom_margin_cm:":
cm = float(sys.argv[i][17:].strip())
twips = round(cm/2.54*1440)
bottom_margin = r"\margb" + str(twips)
elif sys.argv[i].lower()[:18] == "header_top_margin:":
inches = float(sys.argv[i][18:].strip())
twips = round(inches*1440)
header_top_margin = r"\headery" + str(twips)
elif sys.argv[i].lower()[:21] == "header_top_margin_cm:":
cm = float(sys.argv[i][21:].strip())
twips = round(cm/2.54*1440)
header_top_margin = r"\headery" + str(twips)
elif sys.argv[i].lower()[:13] == "line_spacing:":
lines = float(sys.argv[i][13:].strip())
twips = round(lines*240)
line_spacing = r"\sl" + str(twips) + "\slmult1 "
elif sys.argv[i].lower()[:26] == "points_between_paragraphs:":
points = float(sys.argv[i][26:].strip())
points_between_paragraphs = str(round(points*1440/72))
elif sys.argv[i].lower()[:39] == "number_of_lines_above_chapter_headings:":
number_of_lines_above_chapter_headings = int(sys.argv[i][39:].strip())
elif sys.argv[i].lower()[:16] == "number_of_pages:":
number_of_pages = int(sys.argv[i].lower()[16:].strip())
elif sys.argv[i].lower()[:26] == "inches_per_ream_500_pages:":
make_cover = True
inches_per_ream_500_pages = float(sys.argv[i][26:].strip())
elif sys.argv[i].lower()[:22] == "cm_per_ream_500_pages:":
make_cover = True
cm_per_ream_500_pages = float(sys.argv[i][22:].strip())
inches_per_ream_500_pages = cm_per_ream_500_pages/2.54
elif sys.argv[i].strip().lower() == "grayscale" or sys.argv[i].strip().lower() == "greyscale":
grayscale = True
elif sys.argv[i].lower()[:16] == "cover_box_color:":
cover_box_color = sys.argv[i].lower()[16:].strip()
elif sys.argv[i].lower()[:17] == "cover_text_color:":
cover_text_color = sys.argv[i].lower()[17:].strip()
elif sys.argv[i].lower()[:22] == "cover_title_font_size:":
cover_title_font_size = round(sys.argv[i][22:].strip())
elif sys.argv[i].lower()[:23] == "cover_author_font_size:":
cover_author_font_size = round(sys.argv[i][23:].strip())
elif sys.argv[i].lower()[:16] == "spine_font_size:":
spine_font_size = round(sys.argv[i][16:].strip())
elif sys.argv[i].lower()[:33] == "cover_spacing_title_height_ratio:":
cover_spacing_title_height_ratio = float(sys.argv[i][33:].strip())
elif sys.argv[i].strip().lower()[:17] == "cover_trim_width:":
cover_trim_width = float(sys.argv[i][17:].strip())
elif sys.argv[i].strip().lower()[:20] == "cover_trim_width_cm:":
cover_trim_width = float(sys.argv[i][20:].strip())/2.54
elif sys.argv[i].strip().lower()[:13] == "no_cover_line":
cover_line = False
elif sys.argv[i].strip().lower()[:19] == "cover_extra_inches:":
inches = float(sys.argv[i].strip()[19:])
cover_extra_pixels = round(inches*4200/14)
elif sys.argv[i].strip().lower()[:15] == "cover_extra_cm:":
cm = float(sys.argv[i].strip()[15:])
cover_extra_pixels = round(cm/2.54*4200/14)
elif sys.argv[i].strip().lower()[:31] == "pixels_from_bottom_cover_spine:":
pixels_from_bottom_cover_spine = int(sys.argv[i].strip()[31:])
elif sys.argv[i].strip().lower()[:29] == "pixels_from_left_cover_spine:":
pixels_from_left_cover_spine = int(sys.argv[i].strip()[29:])
elif sys.argv[i].strip().lower()[:32] == "pixels_from_top_cover_title_box:":
pixels_from_top_cover_title_box = int(sys.argv[i].strip()[32:])
elif sys.argv[i].strip().lower()[:33] == "pixels_from_left_cover_title_box:":
pixels_from_left_cover_title_box = int(sys.argv[i].strip()[33:])
elif sys.argv[i].strip().lower()[:11] == "spine_text:":
spine_text = sys.argv[i].strip()[11:]
elif sys.argv[i].strip().lower()[:20] == "keep_forward_slashes":
small_caps = False
except:
problem = True
print("\nPlease enter the name of the parameter you wish to alter, followed " +
"by a colon, and the desired setting directly after the colon. For example, " +
'to set the title, you would enter: "title:Your Title Here" as an additional argument.')
#The code below only runs if the user has at least
#provided a title, author and valid file name.
if (problem == False and title != None and author != None and txt_file_name != None and
txt_file_name[-4:].lower() == ".txt"):
#Some extra pixels are subtracted from "left_margin_cover_textbox",
#(35 pixels by default), as there seems to be 3 mm missing on both
#sides of the cover due to binding irregularities and the thickness of
#the glue: (3 mm * inch/25.4 mm * 4200 pixels/14 inch = 35 pixels).
#By subtracting some pixels, the cover title box is shifted towards
#the left.
left_margin_cover_textbox -= cover_extra_pixels
#The same applies to the "right_margin_cover_textbox"
right_margin_cover_textbox -= cover_extra_pixels
#The space between the left edge of the textbox
#and the start of the text on the x axis is set to 100 pixels,
#so the text will start drawing at "left_margin_cover_textbox + 100" pixels
left_margin_cover_text = left_margin_cover_textbox + 100
#The space between the right edge of the textbox
#and the end of the text on the x axis is set to 100 pixels,
#so the text will start drawing at "right_margin_cover_textbox - 100" pixels
right_margin_cover_text = right_margin_cover_textbox - 100
#The space between the top margin of the textbox
#and where the top edge of the text on the y axis
#is set to 50 pixels: "top_margin_cover_textbox+100"
vertical_margin_cover_text = top_margin_cover_textbox + 100
#According to the following source,
#https://www.pacificu.edu/sites/default/files/documents/Individualcharacterlegibility.pdf
#the average width to height ratio of six different serif fonts
#(Rockwell, Georgia, Garamond, Centaur, Bodoni and Baskerville) is of 1.10
#To allow for a certain margin of error, a width to height ratio of 1.1
#will be used in this code.
#The title font size in points is determined by dividing the
#floating number extracted from "title_size" (in half_points)
#by two.
title_size_float = float(title_size.strip()[3:])/2
#The character height in twips is determined by using the conversion
#factor of 1440 twips for every 72 points.
title_character_height_twips = round(title_size_float*1440/72)
#The character width in twips is calculated by multiplying
#the "title_character_height_twips" by the average width to height
#ratio for serif fonts of "1.1" mentioned above.
title_character_width_twips = round(1.1*title_character_height_twips)
#The total width of the title (in twips) is determined by multiplying
#the number of characters by the individual character width.
title_width_twips = len(title)*title_character_width_twips
#If the title width in twips ("title_width_twips") is greater than the
#total width of the page in twips (5.5 inches times 1440 twips per inch)
#minus the sum of the left and right margins in twips, then the title is
#split and will span over two lines.
#A correction factor is applied to the page width, as conversion of font
#width in points into twips results in much too large character dimensions
#relative to the space available on the width of the page. The "correction_factor"
#should work similarly with other fonts, assuming that their width to height ratio
#is in line with the average value of 1.1 mentioned above.
correction_factor = 4.0
width_threshold = int((5.5*1440 - (left_margin_twips + right_margin_twips)) * correction_factor)
title_string = title
#If the title didn't contain sequences of at least two
#consecutive spaces, which would indicate that the user
#wants to manually insert line breaks at these locations,
#and that the title width in twips is too large to fit
#on one line, then the title string is split into
#individual words, which are assessed for length
#in the while loop in the "if" statement below.
#The "title_size" is decremented until both fragments
#of the title can fit onto their own line or a font size
#of 27 is reached.
if re.search('[" "]{2,}', title_string) == None and title_width_twips > width_threshold:
#The "re.split()" method with retention of spaces is
#used in case the user has inputted additional spaces
#to affect the splitting point of the title. Any sequences
#of two or more successive spaces, indicating where the user
#wants to split the title, are changed to a space, followed
#by a carriage return (" \n").
title_words = re.split(r"( )", re.sub('[" "]{2,}', " \n", title))
number_of_title_words = len(title_words)
#The middle index in the title will be the threshold
#for including a carriage return in the title.
middle_index_in_title = math.ceil(len(title_words)/2)
first_half_words = title_words[:middle_index_in_title]
first_half_words_string = "".join(first_half_words)
second_half_words = title_words[middle_index_in_title:]
second_half_words_string = "".join(second_half_words)
adjusted_title_rtf = first_half_words_string + "\line " + second_half_words_string
title_width_set = False
while title_width_set == False and title_size_float > 27:
title_character_height_twips = round(title_size_float*1440/72)
title_character_width_twips = round(1.1*title_character_height_twips)
title_width_twips = len(title)*title_character_width_twips
first_half_words_width = 0
for word in first_half_words:
#The width of every word (with a space, hence the +1) is determined
#in twips and added to "first_half_words_width".
first_half_words_width += (len(word)+1)*title_character_width_twips
second_half_words_width = 0
for word in second_half_words:
#The width of every word (with a space, hence the +1) is determined
#in twips and added to "first_half_words_width".
second_half_words_width += (len(word)+1)*title_character_width_twips
#If the two halves of the title are still too wide to fit into their
#own lines, the "title_size" is decremented until both fragments of
#the title can fit onto their own line or a font size of 27 is reached.
if first_half_words_width > width_threshold or second_half_words_width > width_threshold:
title_size_float -= 0.5
else:
title_size = r"\fs" + str(round(2*title_size_float))
title_width_set = True
#If "author_size_float" is 27, then the "while" loop was broken
#before anything else could take place. The "if" statement below
#then updates the value of "author_size_float".
if title_size_float == 27:
title_size = r"\fs54"
#Should the title contain sequences of at least two
#consecutive spaces, which indicate that the user
#wants to manually insert line breaks at these locations,
#these instances are changed for a space followed by a
#carriage return (" \n", the space being required here
#in order to prevent merged words on the title page),
#and the length of the longest line would be determined
#by splitting the resulting string along the "\n" dividers.
#The font size of the title is then automatically adjusted,
#such that the longest line may fit within the available
#horizontal space.
else:
#The variable "custom_title_line_breaks" indicates
#that the user has included sequences of at least
#two successive spaces within the title, so that
#linebreaks may be inserted at these locations.
custom_title_line_breaks = True
title_string = re.sub('[" "]{2,}', " \n", title)
adjusted_title_rtf = title_string
length_of_longest_title_line = max([len(line) for line in re.split(r'\n', title_string)])
title_width_set = False
while title_width_set == False and title_size_float > 27:
title_character_height_twips = round(title_size_float*1440/72)
title_character_width_twips = round(1.1*title_character_height_twips)
title_width_twips = length_of_longest_title_line*title_character_width_twips
#If the longest of the title lines is still too wide to fit into its own line,
#the "title_size" is decremented until either the longest line fits or a font
#size of 27 is reached.
if title_character_width_twips > width_threshold:
title_size_float -= 0.5
else:
title_size = r"\fs" + str(round(2*title_size_float))
title_width_set = True
#A similar automatic font scaling is applied to the
#author name string "author".
author_size_float = title_size_float * max_author_title_font_ratio
author_size = r"\fs" + str(round(2*author_size_float))
author_character_height_twips = round(author_size_float*1440/72)
author_character_width_twips = round(1.1*author_character_height_twips)
author_width_twips = len(author)*author_character_width_twips
correction_factor = 4.0
width_threshold = int((5.5*1440 - (left_margin_twips + right_margin_twips)) * correction_factor)
if author_width_twips > width_threshold:
author_words = re.split(r"( )", author)
number_of_author_words = len(author_words)
middle_index_in_author = math.ceil(len(author_words)/2)
first_half_words = author_words[:middle_index_in_author]
first_half_words_string = "".join(first_half_words)
second_half_words = author_words[middle_index_in_author:]
second_half_words_string = "".join(second_half_words)
adjusted_author_rtf = first_half_words_string + "\line " + second_half_words_string
author_width_set = False
while author_width_set == False and author_size_float > 27:
author_character_height_twips = round(author_size_float*1440/72)
author_character_width_twips = round(1.1*author_character_height_twips)
author_width_twips = len(author)*author_character_width_twips
first_half_words_width = 0
for word in first_half_words:
first_half_words_width += (len(word)+1)*author_character_width_twips
second_half_words_width = 0
for word in second_half_words:
second_half_words_width += (len(word)+1)*author_character_width_twips
#As the author name font size should be at most 75% of that of the title,
#"author_size_float" is scaled down to 75% of "title_size_float" if it is
#equal or above 75% of "title_size_float".
if first_half_words_width > width_threshold or second_half_words_width > width_threshold:
author_size_float -= 0.5
elif author_size_float > title_size_float*0.75:
author_size_float = title_size_float*0.75
author_size = r"\fs" + str(round(2*author_size_float))
author_width_set = True
else:
author_size = r"\fs" + str(round(2*author_size_float))
author_width_set = True
#If "author_size_float" is 27, then the "while" loop was broken
#before anything else could take place. The "if" statement below
#then updates the value of "author_size_float".
if author_size_float == 27:
author_size = r"\fs54"
with open(txt_file_name, "r", encoding="utf-8") as f:
text = f.readlines()
remove_spaces = False
for i in range(len(text)):
#Instances of three or more successive spaces would typically
#designate tabs and will be removed. Afterwards, any instances
#of two or more successive spaces will be changed for a single space,
#as there could have been a typo with an additional space.
#This needs to be done before introducing RTF commands, which are
#followed by an optional space, the removal of which would result
#in merged words. Instances of "[Illustration]" are removed and
#backslashes (if present in the TXT file) are changed to their
#corresponding RTF escapes, so as to avoid any issues when parsing
#the RTF code. Finally, instances of "_{", denoting subscript passages,
#are changed for the subscript RTF command (r"{\sub "). The same goes
#for superscript passages ("^{"), which are changed to r"{\super ".
#The successive spaces are only removed after the author name, as the
#user might want to incorporate some spaces in the title or author name
#in order for the text to be split differently, The "remove_spaces"
#variable (initialized to "False") will be set to "True" upon reaching
#the line containing the author name and successive spaces will be
#removed starting at the next line ("elif remove_spaces == True:").
#If the author name stored in the variable "author" is in the line
#under investigation ("text[i]") and the first word of "author" is
#either the first or second (in case it is preceded by "by") word in
#"text[i]", then "remove_spaces" is set to "True".
if author in text[i] and author.split()[0] in [text[i].split()[0], text[i].split()[1]]:
remove_spaces = True
text[i] = (text[i].replace("[Illustration]", "").replace('\\', r"\'5c")
.replace('_{', r'{\sub ').replace('^{', r'{\super '))
elif remove_spaces == False:
text[i] = (text[i].replace("[Illustration]", "").replace('\\', r"\'5c")
.replace('_{', r'{\sub ').replace('^{', r'{\super '))
elif remove_spaces == True:
text[i] = (re.sub('[" "]{3,}', "", text[i]).replace(" ", " ")
.replace("[Illustration]", "").replace('\\', r"\'5c").replace('_{', r'{\sub ')
.replace('^{', r'{\super '))
#If a line still contains a caret symbol, it is likely because there is
#a single character following it that should be in superscript. The indices
#of all carets in the line are gathered using the "finditer" method from
#the re module. These indices are screened in reverse order if there is at
#least one caret in the line. The reverse order prevents indexing issues
#given the substitution of multiple characters (r"{\super ") for a single
#character ("^").
caret_matches = re.finditer(r'\^', text[i])
caret_indices = [match.start() for match in caret_matches]
if caret_indices != []:
length_line = len(text[i])
for j in range(len(caret_indices)-1, -1, -1):
#If the caret is found before the penultimate character in
#the line (caret_indices[j] < length_line-3), then the line
#is overwritten with the slicing of text[i], skipping over the
#caret index and including a closing curly bracket ("}") after
#the character following the caret. Finally, the characters
#that follow are added ("+ text[i][caret_indices[j]+2:]").
if caret_indices[j] < length_line-3:
text[i] = (text[i][:caret_indices[j]] + r"{\super " +
text[i][caret_indices[j]+1] + "}" +
text[i][caret_indices[j]+2:])
#If the caret is the penultimate character on the line, it
#means that the only character after it will be in superscript,
#and a space needs to be added in order to prevent it from merging
#with the first word on the following line.
elif caret_indices[j] < length_line-2:
text[i] = (text[i][:caret_indices[j]] + r"{\super " +
text[i][caret_indices[j]+1] + "} ")
#The "title_index" variable (defaulted to None), will serve as line
#reference to locate the "contents" section (if present)
#and remove it from the manuscript, as the page numbers would not be
#accurate anymore, given the change in line length, line spacing,
#font size and margins.
title_index = None
contents_index_start = None
contents_index_end = None
first_line_index = 0
last_line_index = len(text)-1
for i in range(len(text)):
if title_index == None and text[i].lower().strip() == title.lower():
title_index = i
elif (contents_index_start == None and title_index != None and
i < title_index + 15 and (text[i].lower().strip() in ["content","content.",
"content:", "contents", "contents.", "contents:", "table of contents",
"table of contents.", "table of contents:", "toc", "toc.", "toc:"])):
#The index at which the table of contents starts (the header)
#is stored within "contents_index_start" to allow for slicing
#it out of the "text" list later on in the code.
contents_index_start = i
#The code below will locate the line index at which the table of
#contents ends, insofar as there is at most one space in-between
#the elements of the table of contents. However, there can be any
#number of empty lines between the "Contents" header and the first
#element of the table of contents.
contents_text = None
contents_text_before = None
first_line_empty = None
no_empty_lines_contents = None
contents_index_end = None
#The "index_modifier" is set to one in case there are
#no spaces between the "Contents" header and the first
#element of the table of contents, in which case the
#first element is found at text[i+1].
index_modifier = 1
for j in range(1,len(text)-i):
#If the line following the "Contents" header is an empty line,
#the line index at which the first element is found will be
#stored in "index_modifier", and will be found at the
#"text[i+index_modifier]" index.
if text[i+1].strip(" ") == "\n" and text[i+j].strip(" ") != "\n":
index_modifier = j
break
#The following "for" loop will cycle through the lines of the
#"text" list and determine the line index "contents_index_end",
#where the table of contents ends
for j in range(index_modifier+1,len(text)-(i+index_modifier+1)):
if contents_index_end != None:
break
#If the line right after the first element of the table
#of contents is not an empty line, then "no_empty_lines_contents",
#is set to True.
elif j == index_modifier + 1 and text[i+j].strip(" ") != "\n":
no_empty_lines_contents = True
#If the line right after the first element of the table
#of contents is an empty line, then "no_empty_lines_contents",
#is set to False.
elif j == index_modifier + 1 and text[i+j].strip(" ") == "\n":
no_empty_lines_contents = False
#If "no_empty_lines_contents" is "True", meaning that there are
#no empty lines in-between the elements of the table of contents,
#then "contents_index_end" is set as the index of the first empty
#line.
elif j > index_modifier + 1 and no_empty_lines_contents == True:
if text[i+j].strip(" ") == "\n":
contents_index_end = i+j
break
#If "no_empty_lines_contents" is "False", then one cannot assume
#that the table of contents ends upon reaching the next empty
#line. Instead, "contents_index_end" is set as the index of the
#first sequence of two empty lines or non-empty lines, as these
#should normally alternate within a table of contents with alternating
#elements and empty lines.
elif j > index_modifier + 1 and no_empty_lines_contents == False:
if text[i+j].strip(" ") != "\n" and text[i+j+1].strip(" ") != "\n":
contents_index_end = i+j
break
elif text[i+j].strip(" ") == "\n" and text[i+j+1].strip(" ") == "\n":
contents_index_end = i+j
break
#If "contents_text_end" is still not set, it means that the table of contents
#either doesn't have a "Contents" heading, or the line spacing is irregular
#(more than one empty line in-between elements) and the user should remove
#it by hand. The variable "Problem" is set to "True", which will prevent
#the code after this "for" loop to proceed.
if contents_index_end == None:
print("\nPlease manually remove the table of contents from the TXT file, " +
"save the file and run the code again.")
problem = True
break
#The line indices at which the opening and closing tags of the manuscript are located
#within the "text" list are stored in "first_line_index" and "last_line_index" and
#will enable to slice out the Project Gutenberg information and license.
elif (text[i].strip(" ") != "\n" and (text[i][:40] == "*** START OF THE PROJECT GUTENBERG EBOOK" or
text[i][:41] == "*** START OF THIS PROJECT GUTENBERG EBOOK")):
#If there are "***" in the line (after the opening stars),
#then the "first_line_index" is set to the next line ("i+1").
if text[i][3:].find("***") != -1:
first_line_index = i+1
#Otherwise, the next lines are screened for the presence of
#the closing stars and the first line encountered that
#contains "***" will be used to determine the index of
#"first_line_index" ("j+1")
else:
for j in range(i+1, len(text)-1):
if "***" in text[j]:
first_line_index = j+1
break
elif (text[i].strip(" ") != "\n" and (text[i][:38] == "*** END OF THE PROJECT GUTENBERG EBOOK" or
text[i][:39] == "*** END OF THIS PROJECT GUTENBERG EBOOK")):
last_line_index = i-1
if problem == False and contents_index_start != None and contents_index_end != None:
#The list of lines is sliced to remove both the "Project Gutenberg"
#information and the table of contents (if present), as the page
#numbers wouldn't line up with the original table of contents and
#table of contents aren't strictly required for novels at least.
text = text[first_line_index:contents_index_start] + text[contents_index_end:last_line_index+1]
elif problem == False:
#The list of lines text is sliced to exclude the "Project Gutenberg"
#information. This needs to be done after dealing with the tabs, as
#the indices "contents_index", "author_index" and "title_index" are
#used in that code and map to the unabridged list of lines.
text = text[first_line_index:last_line_index+1]
if problem == False:
title_index = None
author_index = None
line_skipping_first_word = None
#The variable "page_break_ok" is initialized to "True" and
#allows for inclusion of a page break and carriage returns before
#the chapter headings. After writing the heading, the variable
#will be set to "False" and only set to "True" again once a
#regular line of text will be encountered.
page_break_ok = True
chapter_indices = []
slash_n_line_indices = []
for i in range(len(text)):
#In case the author name is preceded by "by" (or an equivalent in another language),
#the first word of the line is skipped over and stored in "line_skipping_first_word",
#for comparison with "author" in the first "elif" statement.
line_words = text[i].split()
if len(line_words) > 1:
line_skipping_first_word = (" ".join(line_words[1:]).lower().strip())
#The title page is assembled if the "text" list element at index "i"
#corresponds to the title of the work.
if title_index == None and text[i].lower().strip() == title.lower():
title_index = i
#If the title needs to be split