From 68f59a131c17b1d931c92840a0a8906b9ab60e72 Mon Sep 17 00:00:00 2001 From: eric Date: Fri, 27 Jan 2017 12:01:49 -0500 Subject: [PATCH 1/2] use alternate title if not ascii --- gitenberg/book.py | 32 +++++++----- notebooks/new_title_slug.ipynb | 96 ++++++++++++++++++++++++++++++++++ 2 files changed, 116 insertions(+), 12 deletions(-) create mode 100644 notebooks/new_title_slug.ipynb diff --git a/gitenberg/book.py b/gitenberg/book.py index e8bf88d..12feb09 100644 --- a/gitenberg/book.py +++ b/gitenberg/book.py @@ -106,19 +106,27 @@ def remove(self): shutil.rmtree(self.local_path) def format_title(self): + def asciify(_title): + _title = unicodedata.normalize('NFD', unicode(_title)) + ascii = True + out = [] + ok=u"1234567890qwertyuiopasdfghjklzxcvbnmQWERTYUIOPASDFGHJKLZXCVBNM- '," + for ch in _title: + if ch in ok: + out.append(ch) + elif unicodedata.category(ch)[0] == ("L"): #a letter + out.append(hex(ord(ch))) + ascii = False + elif ch in u'\r\n\t': + out.append(u'-') + return (ascii, sub("[ ',-]+", '-', "".join(out)) ) + """ Takes a string and sanitizes it for Github's url name format """ - _title = unicodedata.normalize('NFD', unicode(self.meta.title)) - out = [] - ok=u"1234567890qwertyuiopasdfghjklzxcvbnmQWERTYUIOPASDFGHJKLZXCVBNM- '," - for ch in _title: - if ch in ok: - out.append(ch) - elif unicodedata.category(ch)[0] == ("L"): #a letter - out.append(hex(ord(ch))) - elif ch in u'\r\n\t': - out.append(u'-') - _title = sub("[ ',-]+", '-', "".join(out)) - + (ascii, _title) = asciify(self.meta.title) + if not ascii and self.meta.alternative_title: + (ascii, _title2) = asciify(self.meta.alternative_title) + if ascii: + _title = _title2 title_length = 99 - len(str(self.book_id)) - 1 if len(_title) > title_length: # if the title was shortened, replace the trailing _ with an ellipsis diff --git a/notebooks/new_title_slug.ipynb b/notebooks/new_title_slug.ipynb new file mode 100644 index 0000000..1320e5f --- /dev/null +++ b/notebooks/new_title_slug.ipynb @@ -0,0 +1,96 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0x30de0x30eb0x30c10x30f30x30eb0x30bf0x30fc0x306e0x5c0f0x4fe10x4ef00x554f0x7b540x66f8_2592\t---------------_2592\n" + ] + } + ], + "source": [ + "import csv\n", + "import sys\n", + "sys.path.append(\"..\")\n", + "from gitenberg.util.catalog import BookMetadata\n", + "from gitenberg.book import Book\n", + "rdf_library='/Documents/gitenberg/cache/epub'\n", + "exit\n", + "\n", + "with open('../gitenberg/data/GITenberg_repo_list.tsv','r') as f:\n", + " for vals in csv.reader(f,delimiter='\\t', quotechar='\"'):\n", + " (pg_id, repo_name) = vals\n", + " if '-----' in repo_name:\n", + " pg_book=Book(pg_id)\n", + " pg_book.parse_book_metadata()\n", + " print '{}\\t{}'.format(pg_book.meta._repo,repo_name)\n", + " break\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Babylonia-or-the-corruption-of-the-Greek-language-in-various-places-A-Comedy-in-five-acts_31434\n", + "Babylonia; or, the corruption of the Greek language in various places.\r\n", + "A Comedy in five acts\n" + ] + } + ], + "source": [ + "from gitenberg.book import Book\n", + "\n", + "book = Book(31434)\n", + "book.parse_book_metadata()\n", + "print book.meta._repo\n", + "print book.meta.alternative_title" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "anaconda-cloud": {}, + "kernelspec": { + "display_name": "Python 2", + "language": "python", + "name": "python2" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.9" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} From 3d6a71091eb328ad23e754e3595b710fb1590af6 Mon Sep 17 00:00:00 2001 From: eric Date: Sat, 28 Jan 2017 17:14:31 -0500 Subject: [PATCH 2/2] fix tests now properly mocking the github login --- gitenberg/__init__.py | 2 +- gitenberg/tests/test_book.py | 15 +++++++++------ gitenberg/tests/test_data/config.yaml | 5 +++++ setup.py | 2 +- 4 files changed, 16 insertions(+), 8 deletions(-) create mode 100644 gitenberg/tests/test_data/config.yaml diff --git a/gitenberg/__init__.py b/gitenberg/__init__.py index e9fe116..462fa2c 100644 --- a/gitenberg/__init__.py +++ b/gitenberg/__init__.py @@ -11,6 +11,6 @@ __title__ = 'gitberg' __appname__ = 'gitberg' -__version__ = '0.3.0' +__version__ = '0.3.1' __copyright__ = 'Copyright 2012-2016 Seth Woodworth and the Free Ebook Foundation' diff --git a/gitenberg/tests/test_book.py b/gitenberg/tests/test_book.py index 90d55bd..9851c6d 100644 --- a/gitenberg/tests/test_book.py +++ b/gitenberg/tests/test_book.py @@ -1,18 +1,21 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- - +import os import unittest from mock import patch - +import gitenberg from gitenberg.book import Book - + class TestBookPath(unittest.TestCase): def setUp(self): - with patch('github3.login') as login: - self.login = login - self.book = Book(3456) + def here(appname): + return os.path.join(os.path.dirname(__file__),'test_data') + with patch.object(gitenberg.config.appdirs, 'user_config_dir', here) as path: + with patch('github3.login') as login: + self.login = login + self.book = Book(3456) def test_remote_path(self): self.assertEqual( diff --git a/gitenberg/tests/test_data/config.yaml b/gitenberg/tests/test_data/config.yaml new file mode 100644 index 0000000..76087e1 --- /dev/null +++ b/gitenberg/tests/test_data/config.yaml @@ -0,0 +1,5 @@ +gh_email: gh_email@example.com +gh_password: gh_password +gh_user: gh_user +library_path: . +rdf_library: rdf_library diff --git a/setup.py b/setup.py index 56f7b21..108514f 100755 --- a/setup.py +++ b/setup.py @@ -15,7 +15,7 @@ __version__ = m.group(1) break -setup(name='xgitberg', +setup(name='gitberg', version=__version__, description="A library and command for interacting with the GITenberg books project", long_description=open('README.md').read(),