forked from seatgeek/fuzzywuzzy
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
fix seatgeek#189
- Loading branch information
Heitor Pascoal de Bittencourt
committed
Jan 8, 2020
1 parent
0cfb2c8
commit 3601277
Showing
12 changed files
with
321 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
# Minimal makefile for Sphinx documentation | ||
# | ||
|
||
# You can set these variables from the command line, and also | ||
# from the environment for the first two. | ||
SPHINXOPTS ?= | ||
SPHINXBUILD ?= sphinx-build | ||
SOURCEDIR = docs | ||
BUILDDIR = build | ||
|
||
# Put it first so that "make" without argument is like "make help". | ||
help: | ||
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) | ||
|
||
.PHONY: help Makefile | ||
|
||
# Catch-all target: route all unknown targets to Sphinx using the new | ||
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). | ||
%: Makefile | ||
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
API | ||
=== | ||
|
||
.. toctree:: | ||
:maxdepth: 2 | ||
:caption: Contents: | ||
|
||
fuzz | ||
process | ||
StringMatcher | ||
string_processing | ||
utils |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
StringMatcher | ||
~~~~~~~~~~~~~ | ||
|
||
.. automodule:: fuzzywuzzy.StringMatcher | ||
:members: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
# Configuration file for the Sphinx documentation builder. | ||
# | ||
# This file only contains a selection of the most common options. For a full | ||
# list see the documentation: | ||
# https://www.sphinx-doc.org/en/master/usage/configuration.html | ||
|
||
# -- Path setup -------------------------------------------------------------- | ||
|
||
# If extensions (or modules to document with autodoc) are in another directory, | ||
# add these directories to sys.path here. If the directory is relative to the | ||
# documentation root, use os.path.abspath to make it absolute, like shown here. | ||
# | ||
import os | ||
import sys | ||
sys.path.insert(0, os.path.abspath('.')) | ||
sys.path.insert(0, os.path.abspath('..')) | ||
sys.path.insert(0, os.path.abspath('../fuzzywuzzy/')) | ||
|
||
|
||
# -- Project information ----------------------------------------------------- | ||
|
||
project = 'FuzzyWuzzy' | ||
copyright = '2020, SeatGeek' | ||
author = 'SeatGeek' | ||
|
||
# The full version, including alpha/beta/rc tags | ||
release = '0.17.0' | ||
|
||
|
||
# -- General configuration --------------------------------------------------- | ||
|
||
# Add any Sphinx extension module names here, as strings. They can be | ||
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom | ||
# ones. | ||
extensions = ['sphinx.ext.autodoc', | ||
'sphinx.ext.napoleon', | ||
] | ||
|
||
autodoc_default_options = {'members': True, | ||
'undoc-members': True, | ||
'show-inheritance': True, | ||
} | ||
|
||
# Add any paths that contain templates here, relative to this directory. | ||
templates_path = ['_templates'] | ||
|
||
# List of patterns, relative to source directory, that match files and | ||
# directories to ignore when looking for source files. | ||
# This pattern also affects html_static_path and html_extra_path. | ||
exclude_patterns = [] | ||
|
||
|
||
# -- Options for HTML output ------------------------------------------------- | ||
|
||
on_rtd = os.environ.get('READTHEDOCS', None) == 'True' | ||
|
||
# The theme to use for HTML and HTML Help pages. See the documentation for | ||
# a list of builtin themes. | ||
if not on_rtd: | ||
html_theme = 'sphinx_rtd_theme' | ||
|
||
# Add any paths that contain custom static files (such as style sheets) here, | ||
# relative to this directory. They are copied after the builtin static files, | ||
# so a file named "default.css" will overwrite the builtin "default.css". | ||
#html_static_path = ['_static'] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
fuzz | ||
~~~~ | ||
|
||
.. automodule:: fuzzywuzzy.fuzz | ||
:members: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
.. FuzzyWuzzy documentation master file, created by | ||
sphinx-quickstart on Wed Jan 8 15:20:36 2020. | ||
You can adapt this file completely to your liking, but it should at least | ||
contain the root `toctree` directive. | ||
FuzzyWuzzy | ||
========== | ||
|
||
Fuzzy string matching like a boss. It uses `Levenshtein Distance <https://en.wikipedia.org/wiki/Levenshtein_distance>`_ to calculate the differences between sequences in a simple-to-use package. | ||
|
||
|
||
.. toctree:: | ||
:maxdepth: 2 | ||
:caption: Contents: | ||
|
||
installation | ||
usage | ||
API | ||
|
||
|
||
Indices and tables | ||
================== | ||
|
||
* :ref:`genindex` | ||
* :ref:`modindex` | ||
* :ref:`search` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
Installation | ||
============ | ||
|
||
Requirements | ||
~~~~~~~~~~~~ | ||
|
||
- Python 2.7 or higher | ||
- difflib | ||
- `python-Levenshtein <https://github.com/ztane/python-Levenshtein/>`_ (optional, provides a 4-10x speedup in String | ||
Matching, though may result in `differing results for certain cases <https://github.com/seatgeek/fuzzywuzzy/issues/128>`_) | ||
|
||
For testing | ||
----------- | ||
|
||
- pycodestyle | ||
- hypothesis | ||
- pytest | ||
|
||
Using PIP | ||
~~~~~~~~~ | ||
|
||
Via PyPI | ||
-------- | ||
|
||
.. code:: bash | ||
pip install fuzzywuzzy | ||
or the following to install `python-Levenshtein` too | ||
|
||
.. code:: bash | ||
pip install fuzzywuzzy[speedup] | ||
Via Github | ||
---------- | ||
|
||
.. code:: bash | ||
pip install git+git://github.com/seatgeek/[email protected]#egg=fuzzywuzzy | ||
Adding to your ``requirements.txt`` file (run ``pip install -r requirements.txt`` afterwards) | ||
|
||
.. code:: bash | ||
git+ssh://[email protected]/seatgeek/[email protected]#egg=fuzzywuzzy | ||
Manually via GIT | ||
~~~~~~~~~~~~~~~~ | ||
|
||
.. code:: bash | ||
git clone git://github.com/seatgeek/fuzzywuzzy.git fuzzywuzzy | ||
cd fuzzywuzzy | ||
python setup.py install |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
process | ||
~~~~~~~ | ||
|
||
.. automodule:: fuzzywuzzy.process | ||
:members: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
string_processing | ||
~~~~~~~~~~~~~~~~~ | ||
|
||
.. automodule:: fuzzywuzzy.string_processing | ||
:members: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,82 @@ | ||
Usage | ||
===== | ||
|
||
.. code:: python | ||
>>> from fuzzywuzzy import fuzz | ||
>>> from fuzzywuzzy import process | ||
Simple Ratio | ||
~~~~~~~~~~~~ | ||
|
||
.. code:: python | ||
>>> fuzz.ratio("this is a test", "this is a test!") | ||
97 | ||
Partial Ratio | ||
~~~~~~~~~~~~~ | ||
|
||
.. code:: python | ||
>>> fuzz.partial_ratio("this is a test", "this is a test!") | ||
100 | ||
Token Sort Ratio | ||
~~~~~~~~~~~~~~~~ | ||
|
||
.. code:: python | ||
>>> fuzz.ratio("fuzzy wuzzy was a bear", "wuzzy fuzzy was a bear") | ||
91 | ||
>>> fuzz.token_sort_ratio("fuzzy wuzzy was a bear", "wuzzy fuzzy was a bear") | ||
100 | ||
Token Set Ratio | ||
~~~~~~~~~~~~~~~ | ||
|
||
.. code:: python | ||
>>> fuzz.token_sort_ratio("fuzzy was a bear", "fuzzy fuzzy was a bear") | ||
84 | ||
>>> fuzz.token_set_ratio("fuzzy was a bear", "fuzzy fuzzy was a bear") | ||
100 | ||
Process | ||
~~~~~~~ | ||
|
||
.. code:: python | ||
>>> choices = ["Atlanta Falcons", "New York Jets", "New York Giants", "Dallas Cowboys"] | ||
>>> process.extract("new york jets", choices, limit=2) | ||
[('New York Jets', 100), ('New York Giants', 78)] | ||
>>> process.extractOne("cowboys", choices) | ||
("Dallas Cowboys", 90) | ||
You can also pass additional parameters to ``extractOne`` method to make it use a specific scorer. A typical use case is to match file paths: | ||
|
||
.. code:: python | ||
>>> process.extractOne("System of a down - Hypnotize - Heroin", songs) | ||
('/music/library/good/System of a Down/2005 - Hypnotize/01 - Attack.mp3', 86) | ||
>>> process.extractOne("System of a down - Hypnotize - Heroin", songs, scorer=fuzz.token_sort_ratio) | ||
("/music/library/good/System of a Down/2005 - Hypnotize/10 - She's Like Heroin.mp3", 61) | ||
.. |Build Status| image:: https://api.travis-ci.org/seatgeek/fuzzywuzzy.png?branch=master | ||
:target: https:travis-ci.org/seatgeek/fuzzywuzzy | ||
|
||
Known Ports | ||
============ | ||
|
||
FuzzyWuzzy is being ported to other languages too! Here are a few ports we know about: | ||
|
||
- Java: `xpresso's fuzzywuzzy implementation <https://github.com/WantedTechnologies/xpresso/wiki/Approximate-string-comparison-and-pattern-matching-in-Java>`_ | ||
- Java: `fuzzywuzzy (java port) <https://github.com/xdrop/fuzzywuzzy>`_ | ||
- Rust: `fuzzyrusty (Rust port) <https://github.com/logannc/fuzzyrusty>`_ | ||
- JavaScript: `fuzzball.js (JavaScript port) <https://github.com/nol13/fuzzball.js>`_ | ||
- C++: `Tmplt/fuzzywuzzy <https://github.com/Tmplt/fuzzywuzzy>`_ | ||
- C#: `fuzzysharp (.Net port) <https://github.com/BoomTownRoi/BoomTown.FuzzySharp>`_ | ||
- Go: `go-fuzzywuzz (Go port) <https://github.com/paul-mannino/go-fuzzywuzzy>`_ | ||
- Free Pascal: `FuzzyWuzzy.pas (Free Pascal port) <https://github.com/DavidMoraisFerreira/FuzzyWuzzy.pas>`_ | ||
- Kotlin multiplatform: `FuzzyWuzzy-Kotlin <https://github.com/willowtreeapps/fuzzywuzzy-kotlin>`_ | ||
- R: `fuzzywuzzyR (R port) <https://github.com/mlampros/fuzzywuzzyR>`_ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
utils | ||
~~~~~ | ||
|
||
.. automodule:: fuzzywuzzy.utils | ||
:members: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
@ECHO OFF | ||
|
||
pushd %~dp0 | ||
|
||
REM Command file for Sphinx documentation | ||
|
||
if "%SPHINXBUILD%" == "" ( | ||
set SPHINXBUILD=sphinx-build | ||
) | ||
set SOURCEDIR=docs | ||
set BUILDDIR=build | ||
|
||
if "%1" == "" goto help | ||
|
||
%SPHINXBUILD% >NUL 2>NUL | ||
if errorlevel 9009 ( | ||
echo. | ||
echo.The 'sphinx-build' command was not found. Make sure you have Sphinx | ||
echo.installed, then set the SPHINXBUILD environment variable to point | ||
echo.to the full path of the 'sphinx-build' executable. Alternatively you | ||
echo.may add the Sphinx directory to PATH. | ||
echo. | ||
echo.If you don't have Sphinx installed, grab it from | ||
echo.http://sphinx-doc.org/ | ||
exit /b 1 | ||
) | ||
|
||
%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% | ||
goto end | ||
|
||
:help | ||
%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% | ||
|
||
:end | ||
popd |