This repository has been archived by the owner on Dec 21, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
2a6ba04
commit a02c784
Showing
24 changed files
with
180,165 additions
and
173,286 deletions.
There are no files selected for viewing
396 changes: 156 additions & 240 deletions
396
static/docs/tools/parallel/.ipynb_checkpoints/kings_ii-checkpoint.ipynb
Large diffs are not rendered by default.
Oops, something went wrong.
196 changes: 64 additions & 132 deletions
196
static/docs/tools/parallel/.ipynb_checkpoints/kings_ii_TF-checkpoint.ipynb
Large diffs are not rendered by default.
Oops, something went wrong.
6,778 changes: 4,279 additions & 2,499 deletions
6,778
static/docs/tools/parallel/.ipynb_checkpoints/parallels-checkpoint.ipynb
Large diffs are not rendered by default.
Oops, something went wrong.
4,738 changes: 2,394 additions & 2,344 deletions
4,738
static/docs/tools/parallel/Isaiah-mt-1QIsaa.html
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
186 changes: 74 additions & 112 deletions
186
static/docs/tools/parallel/kings_ii_TF.html → .../docs/tools/parallel/kings_ii_legacy.html
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -71,7 +71,18 @@ | |
"collapsed": false, | ||
"scrolled": true | ||
}, | ||
"outputs": [], | ||
"outputs": [ | ||
{ | ||
"name": "stderr", | ||
"output_type": "stream", | ||
"text": [ | ||
" 0.00s This is LAF-Fabric 4.5.19\n", | ||
"API reference: http://laf-fabric.readthedocs.org/en/latest/texts/API-reference.html\n", | ||
"Feature doc: https://shebanq.ancient-data.org/static/docs/featuredoc/texts/welcome.html\n", | ||
"\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"import sys,os, re, pickle\n", | ||
"import collections, difflib\n", | ||
|
@@ -83,7 +94,11 @@ | |
"import matplotlib.pyplot as plt\n", | ||
"%matplotlib inline\n", | ||
"\n", | ||
"from tf.fabric import Fabric" | ||
"import laf\n", | ||
"from laf.fabric import LafFabric\n", | ||
"from etcbc.preprocess import prepare\n", | ||
"from etcbc.lib import Transcription\n", | ||
"fabric = LafFabric()" | ||
] | ||
}, | ||
{ | ||
|
@@ -92,16 +107,11 @@ | |
"source": [ | ||
"## 0.3 Data source\n", | ||
"\n", | ||
"We use the ETCBC database in its version 4c, downloadable from the GitHub repo\n", | ||
"[text-fabric-data](https://github.com/ETCBC/text-fabric-data).\n", | ||
"The format of the data obtained through Github is immediately ready to be used by Text-Fabric,\n", | ||
"and hence by this notebook as well.\n", | ||
"\n", | ||
"A previous version of this notebook was based on version 4b,\n", | ||
"as archived at DANS, downloadable via DOI\n", | ||
"We use the ETCBC database in its version 4b, as archived at DANS, downloadable via DOI\n", | ||
"[10.17026/dans-z6y-skyh](http://dx.doi.org/10.17026/dans-z6y-skyh).\n", | ||
"It is also possible to get this data through Github:\n", | ||
"[etcbc/laf-fabric-data](https://github.com/ETCBC/laf-fabric-data).\n", | ||
"The format of the data obtained through Github is immediately ready to be used by LAF-Fabric, and hence by this notebook as well.\n", | ||
"\n", | ||
"The transcription of 1QIsa<sup>a</sup> is in a file produced by the ETCBC. This file is included \n", | ||
"[here](https://shebanq.ancient-data.org/shebanq/static/docs/tools/parallel/1QIsaa_an.txt)\n", | ||
|
@@ -118,7 +128,7 @@ | |
"outputs": [], | ||
"source": [ | ||
"source = 'etcbc'\n", | ||
"version = '4c'\n", | ||
"version = '4b'\n", | ||
"QISA_FILE = '1QIsaa_an.txt'" | ||
] | ||
}, | ||
|
@@ -139,54 +149,43 @@ | |
"cell_type": "code", | ||
"execution_count": 3, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"This is Text-Fabric 1.2.7\n", | ||
"Api reference : https://github.com/ETCBC/text-fabric/wiki/Api\n", | ||
"Tutorial : https://github.com/ETCBC/text-fabric/blob/master/docs/tutorial.ipynb\n", | ||
"Data sources : https://github.com/ETCBC/text-fabric-data\n", | ||
"Data docs : https://etcbc.github.io/text-fabric-data/features/hebrew/etcbc4c/0_overview.html\n", | ||
"Shebanq docs : https://shebanq.ancient-data.org/text\n", | ||
"Slack team : https://shebanq.slack.com/signup\n", | ||
"Questions? Ask [email protected] for an invite to Slack\n", | ||
"107 features found and 0 ignored\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"ETCBC = 'hebrew/{}{}'.format(source, version)\n", | ||
"TF = Fabric( modules=ETCBC )" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 4, | ||
"metadata": { | ||
"collapsed": false | ||
"collapsed": false, | ||
"scrolled": true | ||
}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"name": "stderr", | ||
"output_type": "stream", | ||
"text": [ | ||
" 0.00s loading features ...\n", | ||
" | 0.00s M otext from /Users/dirk/github/text-fabric-data/hebrew/etcbc4c\n", | ||
" | 0.19s B lex_utf8 from /Users/dirk/github/text-fabric-data/hebrew/etcbc4c\n", | ||
" | 0.13s B language from /Users/dirk/github/text-fabric-data/hebrew/etcbc4c\n", | ||
" 4.37s All features loaded/computed - for details use loadLog()\n" | ||
" 0.00s LOADING API: please wait ... \n", | ||
" 0.00s USING main DATA COMPILED AT: 2015-11-02T15-08-56\n", | ||
" 0.00s USING annox DATA COMPILED AT: 2016-01-27T19-01-17\n", | ||
" 3.04s LOGFILE=/Users/dirk/laf-fabric-output/etcbc4b/kings/__log__kings.txt\n", | ||
" 3.04s INFO: LOADING PREPARED data: please wait ... \n", | ||
" 3.04s prep prep: G.node_sort\n", | ||
" 3.18s prep prep: G.node_sort_inv\n", | ||
" 3.73s prep prep: L.node_up\n", | ||
" 7.52s prep prep: L.node_down\n", | ||
" 13s prep prep: V.verses\n", | ||
" 13s prep prep: V.books_la\n", | ||
" 13s ETCBC reference: http://laf-fabric.readthedocs.org/en/latest/texts/ETCBC-reference.html\n", | ||
" 15s INFO: LOADED PREPARED data\n", | ||
" 15s INFO: DATA LOADED FROM SOURCE etcbc4b AND ANNOX lexicon FOR TASK kings AT 2016-03-04T15-50-30\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"api = TF.load('''\n", | ||
" language lex_utf8\n", | ||
"''')\n", | ||
"api.makeAvailableIn(globals())" | ||
"API = fabric.load(source+version, 'lexicon', 'kings', {\n", | ||
" \"xmlids\": {\"node\": False, \"edge\": False},\n", | ||
" \"features\": ('''\n", | ||
" otype\n", | ||
" language lex_utf8\n", | ||
" book chapter verse\n", | ||
" ''',''),\n", | ||
" \"prepare\": prepare,\n", | ||
" \"primary\": False,\n", | ||
"}, verbose='NORMAL')\n", | ||
"exec(fabric.localnames.format(var='fabric'))" | ||
] | ||
}, | ||
{ | ||
|
@@ -211,71 +210,46 @@ | |
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 7, | ||
"execution_count": 4, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
"{'am': {'language': 'ኣማርኛ', 'languageEnglish': 'amharic'},\n", | ||
" 'ar': {'language': 'العَرَبِية', 'languageEnglish': 'arabic'},\n", | ||
" 'bn': {'language': 'বাংলা', 'languageEnglish': 'bengali'},\n", | ||
" 'da': {'language': 'Dansk', 'languageEnglish': 'danish'},\n", | ||
" 'de': {'language': 'Deutsch', 'languageEnglish': 'german'},\n", | ||
" 'el': {'language': 'Ελληνικά', 'languageEnglish': 'greek'},\n", | ||
" 'en': {'language': 'English', 'languageEnglish': 'english'},\n", | ||
" 'es': {'language': 'Español', 'languageEnglish': 'spanish'},\n", | ||
" 'fa': {'language': 'فارسی', 'languageEnglish': 'farsi'},\n", | ||
" 'fr': {'language': 'Français', 'languageEnglish': 'french'},\n", | ||
" 'he': {'language': 'עברית', 'languageEnglish': 'hebrew'},\n", | ||
" 'hi': {'language': 'हिन्दी', 'languageEnglish': 'hindi'},\n", | ||
" 'id': {'language': 'Bahasa Indonesia', 'languageEnglish': 'indonesian'},\n", | ||
" 'ja': {'language': '日本語', 'languageEnglish': 'japanese'},\n", | ||
" 'ko': {'language': '한국어', 'languageEnglish': 'korean'},\n", | ||
" 'la': {'language': 'Latina', 'languageEnglish': 'latin'},\n", | ||
" 'nl': {'language': 'Nederlands', 'languageEnglish': 'dutch'},\n", | ||
" 'pa': {'language': 'ਪੰਜਾਬੀ', 'languageEnglish': 'punjabi'},\n", | ||
" 'pt': {'language': 'Português', 'languageEnglish': 'portuguese'},\n", | ||
" 'ru': {'language': 'Русский', 'languageEnglish': 'russian'},\n", | ||
" 'sw': {'language': 'Kiswahili', 'languageEnglish': 'swahili'},\n", | ||
" 'syc': {'language': 'ܠܫܢܐ ܣܘܪܝܝܐ', 'languageEnglish': 'syriac'},\n", | ||
" 'tr': {'language': 'Türkçe', 'languageEnglish': 'turkish'},\n", | ||
" 'ur': {'language': 'اُردُو', 'languageEnglish': 'urdu'},\n", | ||
" 'yo': {'language': 'èdè Yorùbá', 'languageEnglish': 'yoruba'},\n", | ||
" 'zh': {'language': '中文', 'languageEnglish': 'chinese'}}" | ||
] | ||
}, | ||
"execution_count": 7, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"ar: arabic (aka العَرَبِية)\n", | ||
"de: german (aka Deutsch)\n", | ||
"el: greek (aka Ελληνικά)\n", | ||
"en: english (aka English)\n", | ||
"es: spanish (aka Español)\n", | ||
"fr: french (aka François)\n", | ||
"he: hebrew (aka עברית)\n", | ||
"id: indonesian (aka Bahasa Indonesia)\n", | ||
"ko: korean (aka 한국어)\n", | ||
"la: latin (aka Latina)\n", | ||
"nl: dutch (aka Nederlands)\n", | ||
"ru: russian (aka Русский)\n", | ||
"sw: swahili (aka Kiswahili)\n", | ||
"tr: turkish (aka Türkçe)\n", | ||
"zh: chinese (aka 中文)\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"T.languages" | ||
"for (ln, (en_name, own_name)) in sorted(T.langs.items()): \n", | ||
" print('{}: {:<10} (aka {})'.format(ln, en_name, own_name))" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 8, | ||
"execution_count": 5, | ||
"metadata": { | ||
"collapsed": false, | ||
"scrolled": true | ||
}, | ||
"outputs": [ | ||
{ | ||
"ename": "NameError", | ||
"evalue": "name 'API' is not defined", | ||
"output_type": "error", | ||
"traceback": [ | ||
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", | ||
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", | ||
"\u001b[0;32m<ipython-input-8-63cd9d63b4d9>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 9\u001b[0m \u001b[0mCROSSREF_APP\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'parallel'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[0;31m# directory of computed intermediary results of parallel.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 11\u001b[0;31m \u001b[0mPRECOMP_DIR\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'{}/{}{}/{}/{}'\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mAPI\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'output_dir'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msource\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mversion\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mCROSSREF_APP\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'stored'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 12\u001b[0m \u001b[0;31m# precomputed list of verse chunks\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 13\u001b[0m \u001b[0mCHUNK_GREP\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'{}/chunks/chunk_{}_{}'\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mPRECOMP_DIR\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'O'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'verse'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | ||
"\u001b[0;31mNameError\u001b[0m: name 'API' is not defined" | ||
] | ||
} | ||
], | ||
"outputs": [], | ||
"source": [ | ||
"# the language of the book names\n", | ||
"LANG = 'en'\n", | ||
|
Binary file not shown.
Oops, something went wrong.