Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

use alternate title if not ascii #123

Merged
merged 2 commits into from
Jan 29, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion gitenberg/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,6 @@

__title__ = 'gitberg'
__appname__ = 'gitberg'
__version__ = '0.3.0'
__version__ = '0.3.1'
__copyright__ = 'Copyright 2012-2016 Seth Woodworth and the Free Ebook Foundation'

32 changes: 20 additions & 12 deletions gitenberg/book.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,19 +106,27 @@ def remove(self):
shutil.rmtree(self.local_path)

def format_title(self):
def asciify(_title):
_title = unicodedata.normalize('NFD', unicode(_title))
ascii = True
out = []
ok=u"1234567890qwertyuiopasdfghjklzxcvbnmQWERTYUIOPASDFGHJKLZXCVBNM- ',"
for ch in _title:
if ch in ok:
out.append(ch)
elif unicodedata.category(ch)[0] == ("L"): #a letter
out.append(hex(ord(ch)))
ascii = False
elif ch in u'\r\n\t':
out.append(u'-')
return (ascii, sub("[ ',-]+", '-', "".join(out)) )

""" Takes a string and sanitizes it for Github's url name format """
_title = unicodedata.normalize('NFD', unicode(self.meta.title))
out = []
ok=u"1234567890qwertyuiopasdfghjklzxcvbnmQWERTYUIOPASDFGHJKLZXCVBNM- ',"
for ch in _title:
if ch in ok:
out.append(ch)
elif unicodedata.category(ch)[0] == ("L"): #a letter
out.append(hex(ord(ch)))
elif ch in u'\r\n\t':
out.append(u'-')
_title = sub("[ ',-]+", '-', "".join(out))

(ascii, _title) = asciify(self.meta.title)
if not ascii and self.meta.alternative_title:
(ascii, _title2) = asciify(self.meta.alternative_title)
if ascii:
_title = _title2
title_length = 99 - len(str(self.book_id)) - 1
if len(_title) > title_length:
# if the title was shortened, replace the trailing _ with an ellipsis
Expand Down
15 changes: 9 additions & 6 deletions gitenberg/tests/test_book.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,21 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import os
import unittest

from mock import patch

import gitenberg
from gitenberg.book import Book


class TestBookPath(unittest.TestCase):
def setUp(self):
with patch('github3.login') as login:
self.login = login
self.book = Book(3456)
def here(appname):
return os.path.join(os.path.dirname(__file__),'test_data')
with patch.object(gitenberg.config.appdirs, 'user_config_dir', here) as path:
with patch('github3.login') as login:
self.login = login
self.book = Book(3456)

def test_remote_path(self):
self.assertEqual(
Expand Down
5 changes: 5 additions & 0 deletions gitenberg/tests/test_data/config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
gh_email: [email protected]
gh_password: gh_password
gh_user: gh_user
library_path: .
rdf_library: rdf_library
96 changes: 96 additions & 0 deletions notebooks/new_title_slug.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0x30de0x30eb0x30c10x30f30x30eb0x30bf0x30fc0x306e0x5c0f0x4fe10x4ef00x554f0x7b540x66f8_2592\t---------------_2592\n"
]
}
],
"source": [
"import csv\n",
"import sys\n",
"sys.path.append(\"..\")\n",
"from gitenberg.util.catalog import BookMetadata\n",
"from gitenberg.book import Book\n",
"rdf_library='/Documents/gitenberg/cache/epub'\n",
"exit\n",
"\n",
"with open('../gitenberg/data/GITenberg_repo_list.tsv','r') as f:\n",
" for vals in csv.reader(f,delimiter='\\t', quotechar='\"'):\n",
" (pg_id, repo_name) = vals\n",
" if '-----' in repo_name:\n",
" pg_book=Book(pg_id)\n",
" pg_book.parse_book_metadata()\n",
" print '{}\\t{}'.format(pg_book.meta._repo,repo_name)\n",
" break\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Babylonia-or-the-corruption-of-the-Greek-language-in-various-places-A-Comedy-in-five-acts_31434\n",
"Babylonia; or, the corruption of the Greek language in various places.\r\n",
"A Comedy in five acts\n"
]
}
],
"source": [
"from gitenberg.book import Book\n",
"\n",
"book = Book(31434)\n",
"book.parse_book_metadata()\n",
"print book.meta._repo\n",
"print book.meta.alternative_title"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"anaconda-cloud": {},
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.9"
}
},
"nbformat": 4,
"nbformat_minor": 1
}
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
__version__ = m.group(1)
break

setup(name='xgitberg',
setup(name='gitberg',
version=__version__,
description="A library and command for interacting with the GITenberg books project",
long_description=open('README.md').read(),
Expand Down