dhuppenkothen · ChristinaB · Jan 11, 2019 · Feb 1, 2019 · May 16, 2019 · May 16, 2019
diff --git a/.travis_dependencies.sh b/.travis_dependencies.sh
@@ -11,7 +11,7 @@ conda_create ()
     conda update -q conda
     conda config --add channels pypi
     conda info -a
-    deps='six nose coverage pip numpy scipy pandas matplotlib'
+    deps='six nose coverage pip numpy scipy pandas matplotlib future'
 
     conda create -q -n $ENV_NAME "python=$TRAVIS_PYTHON_VERSION" $deps
 }

diff --git a/README.md b/README.md
@@ -1,4 +1,5 @@
 # entrofy
+
 Participant selection for workshops and conferences made easy.
 
 Selection of participants for meetings as a discrete optimization problem.
@@ -31,18 +32,29 @@ Note that this is explicitly not the same as a quota: the underlying algorithim
 - Subset of candidates which approximate the target distributions
 - Distribution of each value in subset
 
-## Install Instructions
+## App Installation Instructions
 
 **Note**: The app is under active development and requires updates before running reliably. As of now, it's not really usable. Use the Python library instead (see below).
 
 Download the app/ directory
 
-```
+```bash
 pip install -r requirements.txt
 python server.py
 ```
+
 then go to http://localhost:5000/ in your web browser
 
+## Python Library Installation Instructions
+
+As noted above, at present, `entrofy` is best used as a Python package. Navigate to the top-level `entrofy` directory, then install it in your local environment.
+
+```bash
+python setup.py install
+```
+
+This should also install all necessary dependencies for the Python package.
+
 ## Usage Instructions
 
 Below are some practical considerations.

diff --git a/app/entrofy.py b/app/entrofy.py
@@ -1,25 +1,31 @@
 #!/usr/bin/env python
 from __future__ import print_function
+from __future__ import division
 
+from builtins import zip
+from builtins import range
+from past.utils import old_div
 import numpy as np
 import pandas as pd
 
+
 def obj(p, w, q):
     # Prevent numerical underflow in log
 
     amin = 1e-200
 
-    pbar = 1. - p
-    qbar = 1. - q
+    pbar = 1.0 - p
+    qbar = 1.0 - q
 
-    entropy = (p * (np.log(p + amin) - np.log(q + amin)) +
-               pbar * (np.log(pbar + amin) - np.log(qbar + amin)))
+    entropy = p * (np.log(p + amin) - np.log(q + amin)) + pbar * (
+        np.log(pbar + amin) - np.log(qbar + amin)
+    )
 
-    return - entropy.dot(w)
+    return -entropy.dot(w)
 
 
 def __entrofy(X, k, w=None, q=None, pre_selects=None, quantile=0.01):
-    '''See entrofy() for documentation'''
+    """See entrofy() for documentation"""
 
     n_participants, n_attributes = X.shape
 
@@ -60,7 +66,7 @@ def __entrofy(X, k, w=None, q=None, pre_selects=None, quantile=0.01):
         p[np.isnan(p)] = 0.0
 
         # Compute the candidate distributions
-        p_new = (p * i + X) / (i + 1.0)
+        p_new = old_div((p * i + X), (i + 1.0))
 
         # Wherever X is nan, propagate the old p since we have no new information
         p_new[Xn] = (Xn * p)[Xn]
@@ -73,16 +79,18 @@ def __entrofy(X, k, w=None, q=None, pre_selects=None, quantile=0.01):
 
         # Select the top score.  Break near-ties randomly.
         delta_real = delta[np.isfinite(delta)]
-        target_score = np.percentile(delta_real, 1.0-quantile)
+        target_score = np.percentile(delta_real, 1.0 - quantile)
 
         new_idx = np.random.choice(np.flatnonzero(delta >= target_score))
         y[new_idx] = True
 
     return obj(np.nanmean(X[y], axis=0), w, q), np.flatnonzero(y)
 
 
-def entrofy(X, k, w=None, q=None, pre_selects=None, quantile=0.01, n_samples=15):
-    '''Entrofy your panel.
+def entrofy(
+    X, k, w=None, q=None, pre_selects=None, quantile=0.01, n_samples=15
+):
+    """Entrofy your panel.
 
     Parameters
     ----------
@@ -105,7 +113,7 @@ def entrofy(X, k, w=None, q=None, pre_selects=None, quantile=0.01, n_samples=15)
         Optionally, you may pre-specify a set of rows to be forced into the solution.
 
     quantile : float, values in [0,1]
-        Define the quantile to be used in tie-breaking between top choices at 
+        Define the quantile to be used in tie-breaking between top choices at
         every step; choose e.g. 0.01 for the top 1% quantile
         By default, 0.01
 
@@ -122,13 +130,14 @@ def entrofy(X, k, w=None, q=None, pre_selects=None, quantile=0.01, n_samples=15)
     idx : np.ndarray, shape=(k,)
         Indicies of the selected rows
 
-    '''
+    """
     if pre_selects is not None and len(pre_selects):
         n_samples = 1
 
-    results = [__entrofy(X, k, w=w, q=q, pre_selects=pre_selects,
-                         quantile=quantile)
-               for _ in range(n_samples)]
+    results = [
+        __entrofy(X, k, w=w, q=q, pre_selects=pre_selects, quantile=quantile)
+        for _ in range(n_samples)
+    ]
 
     max_score, best = results[0]
     for score, solution in results[1:]:
@@ -165,13 +174,13 @@ def binarize(df, n_bins=5):
                 continue
 
             z += 1.0
-            new_name = '{}_{}'.format(column, value)
+            new_name = "{}_{}".format(column, value)
             df2[new_name] = new_series
             df2[new_name][pd.isnull(data)] = np.nan
             groupkeys.append(new_name)
 
         for k in groupkeys:
-            targets[k] = 1./z
+            targets[k] = 1.0 / z
 
     return df2, targets
 
@@ -189,25 +198,30 @@ def process_csv(fdesc):
     headers = []
     headers.extend([dict(title=_) for _ in df.columns])
 
-    return df.to_json(orient='values'), headers, targets, len(df), p_all
+    return df.to_json(orient="values"), headers, targets, len(df), p_all
 
 
 def process_table(data, index, columns, k, q, w, pre_selects):
 
-    df = pd.DataFrame(data=data, columns=[_['title'] for _ in columns])
+    df = pd.DataFrame(data=data, columns=[_["title"] for _ in columns])
     # Find the index column
     df = df.set_index(index)
 
     X = df.values.astype(np.float)
-    score, rows = entrofy(X, k, q=np.asarray([float(_) for _ in q]),
-                                w=np.asarray([float(_) for _ in w]),
-                                pre_selects=pre_selects)
+    score, rows = entrofy(
+        X,
+        k,
+        q=np.asarray([float(_) for _ in q]),
+        w=np.asarray([float(_) for _ in w]),
+        pre_selects=pre_selects,
+    )
 
     p_all = compute_p(X)
     p_selected = compute_p(X[rows])
 
     return score, rows, p_all, p_selected
 
+
 def compute_p(X):
 
     return np.nanmean(X, axis=0)
diff --git a/app/requirements.txt b/app/requirements.txt
@@ -2,5 +2,5 @@ argparse
 numpy>=1.7
 pandas>=0.15
 flask
-json
-ConfigParser
+configparser
+future
diff --git a/app/server.py b/app/server.py
@@ -2,10 +2,13 @@
 
 from __future__ import print_function
 
+from future import standard_library
+
+standard_library.install_aliases()
 import argparse
 from flask import Flask, request, redirect, url_for, render_template, Response
 from werkzeug import secure_filename
-import ConfigParser
+import configparser
 import os
 import re
 import sys
@@ -18,8 +21,9 @@
 app = Flask(__name__)
 app.config.from_object(__name__)
 
+
 def load_config(server_ini):
-    P = ConfigParser.RawConfigParser()
+    P = configparser.RawConfigParser()
 
     P.opionxform = str
     P.read(server_ini)
@@ -28,7 +32,7 @@ def load_config(server_ini):
     for section in P.sections():
         CFG[section] = dict(P.items(section))
 
-    for (k, v) in CFG['server'].iteritems():
+    for (k, v) in CFG["server"].items():
         app.config[k] = v
     return CFG
 
@@ -37,86 +41,100 @@ def run(**kwargs):
     app.run(**kwargs)
 
 
-@app.route('/h', methods=['POST'])
+@app.route("/h", methods=["POST"])
 def sample():
 
     data = request.get_json()
-    pre_selects = data['pre_selects']
+    pre_selects = data["pre_selects"]
     if len(pre_selects) == 0:
         pre_selects = None
-    score, rows, p_all, p_selected = entrofy.process_table(data['data'],
-                                 data['index'],
-                                 data['columns'],
-                                 int(data['n_select']),
-                                 data['target'],
-                                 data['weights'],
-                                 pre_selects)
-
-    return json.dumps(dict(selections=list(rows),
-                            p_all=list(p_all),
-                            p_selected=list(p_selected)))
-
-
-@app.route('/p', methods=['POST'])
+    score, rows, p_all, p_selected = entrofy.process_table(
+        data["data"],
+        data["index"],
+        data["columns"],
+        int(data["n_select"]),
+        data["target"],
+        data["weights"],
+        pre_selects,
+    )
+
+    return json.dumps(
+        dict(
+            selections=list(rows),
+            p_all=list(p_all),
+            p_selected=list(p_selected),
+        )
+    )
+
+
+@app.route("/p", methods=["POST"])
 def process():
 
-    fdesc = request.files['csv']
+    fdesc = request.files["csv"]
 
     table, columns, targets, n, raw_stats = entrofy.process_csv(fdesc)
 
-    return render_template('process.html',
-                           table=table,
-                           columns=json.dumps(columns),
-                           kmax=n,
-                           targets=json.dumps(targets),
-                           raw_stats=json.dumps(raw_stats))
+    return render_template(
+        "process.html",
+        table=table,
+        columns=json.dumps(columns),
+        kmax=n,
+        targets=json.dumps(targets),
+        raw_stats=json.dumps(raw_stats),
+    )
 
 
-@app.route('/')
+@app.route("/")
 def index():
-    '''Top-level web page'''
-    return render_template('index.html')
+    """Top-level web page"""
+    return render_template("index.html")
 
 
 # Main block
 def process_arguments(args):
 
-    parser = argparse.ArgumentParser(description='entrofy web server')
-
-    parser.add_argument('-i',
-                        '--ini',
-                        dest='ini',
-                        required=False,
-                        type=str,
-                        default='server.ini',
-                        help='Path to server.ini file')
-
-    parser.add_argument('-p',
-                        '--port',
-                        dest='port',
-                        required=False,
-                        type=int,
-                        default=5000,
-                        help='Port')
-
-    parser.add_argument('--host',
-                        dest='host',
-                        required=False,
-                        type=str,
-                        default='0.0.0.0',
-                        help='host')
+    parser = argparse.ArgumentParser(description="entrofy web server")
+
+    parser.add_argument(
+        "-i",
+        "--ini",
+        dest="ini",
+        required=False,
+        type=str,
+        default="server.ini",
+        help="Path to server.ini file",
+    )
+
+    parser.add_argument(
+        "-p",
+        "--port",
+        dest="port",
+        required=False,
+        type=int,
+        default=5000,
+        help="Port",
+    )
+
+    parser.add_argument(
+        "--host",
+        dest="host",
+        required=False,
+        type=str,
+        default="0.0.0.0",
+        help="host",
+    )
 
     return vars(parser.parse_args(args))
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     parameters = process_arguments(sys.argv[1:])
 
-    CFG = load_config(parameters['ini'])
+    CFG = load_config(parameters["ini"])
 
-    port = parameters['port']
+    port = parameters["port"]
 
-    if os.environ.get('ENV') == 'production':
-        port = int(os.environ.get('PORT'))
+    if os.environ.get("ENV") == "production":
+        port = int(os.environ.get("PORT"))
 
-    run(host=parameters['host'], port=port, debug=DEBUG, processes=3)
+    run(host=parameters["host"], port=port, debug=DEBUG, processes=3)