From b702b125f3d67d57580976542a45d9d883a7e02e Mon Sep 17 00:00:00 2001
From: Dylan Katz <git@dylankatz.com>
Date: Sun, 25 Oct 2020 23:44:43 -0700
Subject: [PATCH 1/5] Refactor paste site structure, added test

---
 pastehunter-cli                    |  39 ++++++++--
 pastehunter/inputs/base_input.py   |  52 +++++++++++++
 pastehunter/inputs/ixio.py         |   0
 pastehunter/inputs/pastebin.py     | 115 +++++++++++++++++++----------
 pastehunter/inputs/slexy.py        | 100 ++++++++++++-------------
 pastehunter/outputs/http_output.py |   3 +-
 test/test_paste_objects.py         |  76 +++++++++++++++++++
 7 files changed, 286 insertions(+), 99 deletions(-)
 create mode 100644 pastehunter/inputs/base_input.py
 create mode 100644 pastehunter/inputs/ixio.py
 create mode 100644 test/test_paste_objects.py

diff --git a/pastehunter-cli b/pastehunter-cli
index 29bf926..8dfbcd1 100644
--- a/pastehunter-cli
+++ b/pastehunter-cli
@@ -21,6 +21,11 @@ from pastehunter.common import parse_config
 
 VERSION = 1.0
 
+# Decided not to make this configurable as it currently really only applies to pastebin but may change in functionality later.
+# If someone would like this as a config key, please feel free to open an issue or a PR :)
+# TODO: @Plazmaz
+MAX_ITEM_RETRIES = 5
+
 # Setup Default logging
 root = logging.getLogger()
 ch = logging.StreamHandler()
@@ -50,16 +55,18 @@ class timeout:
     def __init__(self, seconds=1, error_message='Timeout'):
         self.seconds = seconds
         self.error_message = error_message
+
     def handle_timeout(self, signum, frame):
         raise TimeoutError("Process timeout: {0}".format(self.error_message))
+
     def __enter__(self):
         signal.signal(signal.SIGALRM, self.handle_timeout)
         signal.alarm(self.seconds)
+
     def __exit__(self, type, value, traceback):
         signal.alarm(0)
 
 
-
 # Set up the log file
 if "log" in conf and conf["log"]["log_to_file"]:
     if conf["log"]["log_path"] != "":
@@ -192,11 +199,31 @@ def paste_scanner(paste_data, rules_buff):
                         if paste_site == 'slexy.org':
                             headers['User-Agent'] = 'PasteHunter'
 
-                        req = requests.get(raw_paste_uri, headers=headers)
-                        if req.status_code == 200:
-                            raw_paste_data = req.text
-                        else:
-                            logger.error("Request returned unexpected response code {}: {}".format(req.status_code, req.text))
+                        attempt_count = 0
+                        while attempt_count < MAX_ITEM_RETRIES:
+                            attempt_count += 1
+                            req = requests.get(raw_paste_uri, headers=headers)
+                            if req.status_code == 200:
+                                raw_paste_data = req.text
+                                if attempt_count > 1:
+                                    logger.warning('Successfully resolved 429 exception')
+                                break
+
+                            # We may want to handle other status codes in the future,
+                            # for now 429 is the only code we retry for, just to avoid issues with
+                            # rate limiting and hammering sites for 404s or outages
+                            elif req.status_code == 429:
+                                logger.warning('Encountered unexpected 429 when requesting item at %s'
+                                               + ' for site "%s". Retrying (attempt %d)...', raw_paste_uri,
+                                               paste_site, attempt_count + 1)
+                                sleep(10)
+                            else:
+                                logger.error("Request returned unexpected response code %d: %s", req.status_code,
+                                             req.text)
+
+                        if attempt_count > 1:
+                            logger.error("Unable to resolve 429 exception after %d retries, giving up on item %s.",
+                                         MAX_ITEM_RETRIES, raw_paste_uri)
 
             # Cover fetch site SSLErrors
             except requests.exceptions.SSLError as e:
diff --git a/pastehunter/inputs/base_input.py b/pastehunter/inputs/base_input.py
new file mode 100644
index 0000000..68298ff
--- /dev/null
+++ b/pastehunter/inputs/base_input.py
@@ -0,0 +1,52 @@
+from abc import ABC, abstractmethod
+from typing import Any, Optional, Dict, List, Union
+
+import requests
+
+
+class BasePasteSite(ABC):
+    def make_request(self, url: str, timeout: Optional[int] = 10, headers: Optional[Dict[str, Any]] = None):
+        """
+        Make a request and return the results
+        :param url: The url to request
+        :param timeout: The timeout for the request
+        :param headers: The headers dict
+        :return:
+        """
+        req = requests.get(url, headers=headers, timeout=timeout)
+        return req
+
+    @abstractmethod
+    def remap_raw_item(self, raw_item: [str, Dict]) -> Dict[str, Any]:
+        """
+        Takes a raw item and remaps it to a normalize paste dict
+        :param raw_item:
+        :return: The paste dict
+        """
+        pass
+
+    @abstractmethod
+    def get_paste_for_id(self, paste_id: Any) -> str:
+        """
+        Returns a paste for the given paste_id
+        :param paste_id: The paste to retrieve
+        :return: A raw paste object
+        """
+        pass
+
+    @abstractmethod
+    def get_paste_id(self, paste_obj: Dict[str, Any]) -> Union[str, int]:
+        """
+        Returns an id for the given paste object
+        :param paste_obj: The raw paste dict
+        :return: The paste i
+        passd (str or int)
+        """
+
+    @abstractmethod
+    def get_recent_items(self, input_history: List[str]):
+        """
+        Gets recent items
+        :return: a list of recent items
+        """
+        pass
diff --git a/pastehunter/inputs/ixio.py b/pastehunter/inputs/ixio.py
new file mode 100644
index 0000000..e69de29
diff --git a/pastehunter/inputs/pastebin.py b/pastehunter/inputs/pastebin.py
index 23cb0a0..c25273e 100644
--- a/pastehunter/inputs/pastebin.py
+++ b/pastehunter/inputs/pastebin.py
@@ -1,49 +1,88 @@
+from typing import Any, Dict, Union, Optional
+
 import requests
 import logging
 from datetime import datetime
 
+from inputs.base_input import BasePasteSite
+
 logger = logging.getLogger('pastehunter')
 
-def recent_pastes(conf, input_history):
-    # populate vars from config
-    paste_limit = conf['inputs']['pastebin']['paste_limit']
-    api_scrape = conf['inputs']['pastebin']['api_scrape']
-    history = []
-    paste_list = []
-    try:
-        # Create the API uri
-        scrape_uri = '{0}?limit={1}'.format(api_scrape, paste_limit)
-        # Get some pastes and convert to json
-        # Get last 'paste_limit' pastes
-        paste_list_request = requests.get(scrape_uri)
-
-        # Check to see if our IP is whitelisted or not. 
+
+class PastebinPasteSite(BasePasteSite):
+
+    def __init__(self, conf):
+        self.conf = conf
+
+    def remap_raw_item(self, raw_item: Dict) -> Dict[str, Any]:
+        # Create a new paste dict for us to normalize
+        pid = self.get_paste_id(raw_item)
+        paste_data = raw_item
+        paste_data['filename'] = pid
+        paste_data['confname'] = 'pastebin'
+        paste_data['pasteid'] = pid
+        paste_data['pastesite'] = 'pastebin.com'
+        # Add a date field that kibana will map
+        date = datetime.utcfromtimestamp(float(paste_data['date'])).isoformat()
+        paste_data['@timestamp'] = date
+        return paste_data
+
+    def make_request(self, url: str, timeout: Optional[int] = 10, headers: Optional[Dict[str, Any]] = None):
+        paste_list_request = super(PastebinPasteSite, self).make_request(url, timeout, headers)
+
+        # Check to see if our IP is whitelisted or not.
         if 'DOES NOT HAVE ACCESS' in paste_list_request.text:
             logger.error("Your IP is not whitelisted visits 'https://pastebin.com/doc_scraping_api'")
-            return [], []
-        paste_list_json = paste_list_request.json()
-
-        for paste in paste_list_json:
-            # Track paste ids to prevent dupes
-            history.append(paste['key'])
-            if paste['key'] in input_history:
-                continue
-
-            # Create a new paste dict for us to normalize
-            paste_data = paste
-            paste_data['filename'] = paste['key']
-            paste_data['confname'] = 'pastebin'
-            paste_data['pasteid'] = paste['key']
-            paste_data['pastesite'] = 'pastebin.com'
-            # Add a date field that kibana will map
-            date = datetime.utcfromtimestamp(float(paste_data['date'])).isoformat()
-            paste_data['@timestamp'] = date
-            paste_list.append(paste_data)
-        return paste_list, history
-
-    except Exception as e:
-        logger.error("Unable to parse paste results: {0}".format(e))
-        return paste_list, history
+            return None
+        return paste_list_request
+
+    def get_paste_for_id(self, paste_id: Any) -> str:
+        pass
+
+    def get_paste_id(self, paste_obj: Dict[str, Any]) -> Union[str, int]:
+        return paste_obj['key']
+
+    def get_recent_items(self, input_history):
+        paste_limit = self.conf['inputs']['pastebin']['paste_limit']
+        api_scrape = self.conf['inputs']['pastebin']['api_scrape']
+
+        history = []
+        paste_list = []
+        try:
+            # Create the API uri
+            scrape_uri = '{0}?limit={1}'.format(api_scrape, paste_limit)
+            # Get some pastes and convert to json
+            # Get last 'paste_limit' pastes
+
+            paste_list_request = self.make_request(scrape_uri)
+
+            # IP not whitelisted
+            if not paste_list_request:
+                return [], []
+
+            paste_list_json = paste_list_request.json()
+
+            for paste in paste_list_json:
+                pid = self.get_paste_id(paste)
+                # Track paste ids to prevent dupes
+                history.append(pid)
+                if pid in input_history:
+                    continue
+
+                paste_data = self.remap_raw_item(paste)
+                paste_list.append(paste_data)
+
+            return paste_list, history
+
+        except Exception as e:
+            logger.error("Unable to parse paste results: {0}".format(e))
+            return paste_list, history
+
+
+def recent_pastes(conf, input_history):
+    site = PastebinPasteSite(conf)
+    # populate vars from config
+    return site.get_recent_items(input_history)
 
 
 
diff --git a/pastehunter/inputs/slexy.py b/pastehunter/inputs/slexy.py
index e762fca..af3013f 100644
--- a/pastehunter/inputs/slexy.py
+++ b/pastehunter/inputs/slexy.py
@@ -2,33 +2,29 @@
 import re
 from datetime import datetime
 from time import sleep
+from typing import Any, Dict, Optional, List, Union
 
-import requests
+from inputs.base_input import BasePasteSite
 
 logger = logging.getLogger('pastehunter')
 
 
-class SlexySite(object):
+class SlexyPasteSite(BasePasteSite):
 
     def __init__(self):
+        self.url = None
         self.site = "slexy.org"
         url_slexy = "https://" + self.site
         self.url_recent = url_slexy + "/recent"
         self.url_view = url_slexy + "/view"
         self.url_raw = url_slexy + "/raw"
-        self.url = None
-
-    def request_view_link(self, pid):
-        return self._make_request("%s/%s" % (self.url_view, pid))
-
-    def raw_link(self, pid, args):
-        return "%s/%s%s" % (self.url_raw, pid, args)
 
-    def _make_request(self, url):
-        req = requests.get(url, headers={
+    def make_request(self, url: str, timeout: Optional[int] = 10, headers: Optional[Dict[str, Any]] = None):
+        req = super(SlexyPasteSite, self).make_request(url, timeout, {
             'Referer': self.url_recent,
             'User-Agent': 'PasteHunter'
-        }, timeout=10)
+        })
+
         ratelimit_limit = int(req.headers.get('RateLimit-Limit', 30))
         remaining = int(req.headers.get('RateLimit-Remaining', 30))
         logger.debug('Remaining Slexy Ratelimit: {0}'.format(remaining))
@@ -36,59 +32,55 @@ def _make_request(self, url):
         if req.status_code == 429:
             timeout = req.headers.get('Retry-After', 60)
             sleep(timeout)
-            return self._make_request(url)
+            return self.make_request(url, timeout)
         # If ratelimit_limit = 60, 60/60 = 1
         # If ratelimit_limit = 30, 60/30 = 2
         sleep(30 / ratelimit_limit)
         return req.text
 
-
-class SlexyPaste(SlexySite):
-    def __init__(self, pid):
-        super(SlexyPaste, self).__init__()
-        self.pid = pid
-        self.site = self.site
-        self.timestamp = None
-        self.parse()
-
-    def parse(self):
-        data = self.request_view_link(self.pid)
-        self.timestamp = self.get_timestamp(data)
-        self.url = self.get_raw_link(data)
-
-    def get_raw_link(self, data):
-        pattern = '<a href="/raw/%s(.*?)"' % self.pid
-        token = re.findall(pattern, data)[0]
-        return self.raw_link(self.pid, token)
-
-    def get_raw_data(self):
-        return self._make_request(self.url_raw)
-
     def get_timestamp(self, data):
         pattern = 'Timestamp: <b>(.*?)</b>'
         ts = re.findall(pattern, data)[0]
         return datetime.strptime(ts, "%Y-%m-%d %H:%M:%S %z").isoformat()
 
-    def __repr__(self):
-        return self.pid
-
-
-class SlexyScraper(SlexySite):
-
-    def __init__(self):
-        super(SlexyScraper, self).__init__()
+    def get_paste_id(self, paste_obj: Dict[str, Any]) -> Union[str, int]:
+        return paste_obj.get('pasteid')
+
+    def remap_raw_item(self, raw_item: [str, Dict]) -> Dict[str, Any]:
+        timestamp = self.get_timestamp(raw_item)
+        paste_id = self.get_paste_id(raw_item)
+        raw_url = self.get_raw_link(raw_item, paste_id)
+        self.get_paste_id(raw_item)
+        return {
+            'confname': 'slexy',
+            'scrape_url': raw_url,
+            'pasteid': paste_id,
+            'pastesite': self.site,
+            '@timestamp': timestamp
+        }
+
+    def get_raw_data(self, raw_url):
+        return self.make_request(raw_url)
+
+    def get_paste_for_id(self, paste_id: Any) -> str:
+        return self.make_request("%s/%s" % (self.url_view, paste_id))
+
+    def get_raw_link(self, data, pid):
+        pattern = '<a href="/raw/%s(.*?)"' % pid
+        token = re.findall(pattern, data)[0]
+        return "%s/%s%s" % (self.url_raw, pid, token)
 
-    def get_recents(self):
-        getdata = self._make_request(self.url_recent)
-        pids = re.findall('<td><a href="/view/(.*?)">', getdata)
+    def get_recent_items(self, input_history: List[str]):
+        data = self.make_request(self.url_recent)
+        pids = re.findall('<td><a href="/view/(.*?)">', data)
         return list(set(pids))
 
 
 def recent_pastes(conf, input_history):
     history = []
     paste_list = []
-    my_scraper = SlexyScraper()
-    recent_pids = my_scraper.get_recents()
+    my_scraper = SlexyPasteSite()
+    recent_pids = my_scraper.get_recent_items(input_history)
     pid_to_process = set()
     for pid in recent_pids:
         if pid in input_history:
@@ -97,14 +89,14 @@ def recent_pastes(conf, input_history):
             pid_to_process.add(pid)
     try:
         for pid in pid_to_process:
-            paste = SlexyPaste(pid)
-            history.append(paste.pid)
+            paste_data = my_scraper.get_paste_for_id(pid)
+            raw = my_scraper.get_raw_link(paste_data, pid)
             paste_data = {
                 'confname': 'slexy',
-                'scrape_url': paste.url,
-                'pasteid': paste.pid,
-                'pastesite': paste.site,
-                '@timestamp': paste.timestamp
+                'scrape_url': raw,
+                'pasteid': pid,
+                'pastesite': my_scraper.site,
+                '@timestamp': my_scraper.get_timestamp(paste_data)
             }
             paste_list.append(paste_data)
         return paste_list, history
diff --git a/pastehunter/outputs/http_output.py b/pastehunter/outputs/http_output.py
index 430acf4..f90fc5f 100644
--- a/pastehunter/outputs/http_output.py
+++ b/pastehunter/outputs/http_output.py
@@ -1,6 +1,7 @@
 import logging
+
 import requests
-import json
+
 from pastehunter.common import parse_config
 
 logger = logging.getLogger('pastehunter')
diff --git a/test/test_paste_objects.py b/test/test_paste_objects.py
new file mode 100644
index 0000000..1aab169
--- /dev/null
+++ b/test/test_paste_objects.py
@@ -0,0 +1,76 @@
+from inputs import slexy
+from inputs.pastebin import PastebinPasteSite
+from inputs.slexy import SlexyPasteSite
+
+
+pids_found = []
+def mock_get_paste_for_pid(pid):
+    pids_found.append(pid)
+    return "pid_is_" + pid
+
+
+class FakeRequestJson(object):
+    def __init__(self, ret):
+        self.ret = ret
+
+    def json(self):
+        return self.ret
+
+
+def test_slexy_site():
+    pid_list_fake = [0, 1, 2, 3, 4]
+    slexy_site = SlexyPasteSite()
+    slexy_site.get_recent_items = lambda: pid_list_fake
+    slexy_site.get_paste_for_id = lambda pid: mock_get_paste_for_pid(str(pid))
+    slexy_site.remap_raw_item = lambda raw_data, pid: {"pid": 123}
+    recent_pids = slexy_site.get_recent_items()
+    assert recent_pids == pid_list_fake
+    for pid in recent_pids:
+        paste = slexy_site.get_paste_for_id(pid)
+        paste_data = slexy_site.remap_raw_item(paste, pid)
+        assert paste == 'pid_is_' + str(pid)
+        assert paste_data == {"pid": 123}
+
+def test_pastebin_site_remap():
+    fake_conf = {
+        'inputs': {
+            'pastebin': {
+                'paste_limit': 100,
+                'api_scrape': 'https://scrape.pastebin.com/api_scraping.php'
+            }
+        }
+    }
+    data = {
+        'key': 'a',
+        'test': 'b',
+        'date': '1582595793'
+    }
+    pastebin_site = PastebinPasteSite(fake_conf)
+    out = pastebin_site.remap_raw_item(data)
+    assert  out == {'key': 'a', 'test': 'b', 'date': '1582595793', 'filename': 'a', 'confname': 'pastebin',
+                    'pasteid': 'a', 'pastesite': 'pastebin.com', '@timestamp': '2020-02-25T01:56:33'}
+
+def test_pastebin_site():
+    fake_conf = {
+        'inputs': {
+            'pastebin': {
+                'paste_limit': 100,
+                'api_scrape': 'https://scrape.pastebin.com/api_scraping.php'
+            }
+        }
+    }
+    pastebin_site = PastebinPasteSite(fake_conf)
+    pastebin_site.make_request = lambda url: FakeRequestJson([
+        {
+            'key': 'ab',
+            'date': '1582595793'
+        },
+        {
+            'key': 'bc',
+            'date': '1582595793'
+         }
+    ])
+    pastes, paste_ids = pastebin_site.get_recent_items([])
+    assert paste_ids == ['ab', 'bc']
+    assert pastes[0].get('key') == 'ab'
+    assert pastes[1].get('key') == 'bc'

From 583c2078c7ee0b4694b2c0e5f396b78af6b13e95 Mon Sep 17 00:00:00 2001
From: Dylan Katz <git@dylankatz.com>
Date: Sat, 31 Oct 2020 00:07:38 -0700
Subject: [PATCH 2/5] Added support for ixio

Resolves #95
Since this is a smallish site I've disabled the input by default. To the owner of ixio, if you'd like us to implement ratelimiting mechanisms please reach out!
---
 pastehunter/common.py            |  26 ++
 pastehunter/inputs/base_input.py |   2 +-
 pastehunter/inputs/ixio.py       | 114 ++++++
 settings.json.sample             |   9 +-
 test/test_base62.py              |  11 +
 test/test_ix.py                  | 607 +++++++++++++++++++++++++++++++
 6 files changed, 766 insertions(+), 3 deletions(-)
 create mode 100644 test/test_base62.py
 create mode 100644 test/test_ix.py

diff --git a/pastehunter/common.py b/pastehunter/common.py
index 54fd4a3..08023df 100644
--- a/pastehunter/common.py
+++ b/pastehunter/common.py
@@ -5,6 +5,10 @@
 logger = logging.getLogger('pastehunter')
 home = os.path.expanduser("~")
 
+BASE62_CHARS = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
+BASE_LOOKUP = dict((c, i) for i, c in enumerate(BASE62_CHARS))
+BASE_LEN = len(BASE62_CHARS)
+
 # Parse the config file in to a dict
 def parse_config():
     conf = None
@@ -26,3 +30,25 @@ def parse_config():
         logger.error("Unable to read config file '~/.config/pastehunter.json'")
 
     return conf
+
+
+# Most of this was pulled from https://stackoverflow.com/a/2549514
+def base62_decode(input: str) -> int:
+    length = len(BASE_LOOKUP)
+    ret = 0
+    for i, c in enumerate(input[::-1]):
+        ret += (length ** i) * BASE_LOOKUP[c]
+
+    return ret
+
+
+def base62_encode(integer) -> str:
+    if integer == 0:
+        return BASE62_CHARS[0]
+
+    ret = ''
+    while integer != 0:
+        ret = BASE62_CHARS[integer % BASE_LEN] + ret
+        integer //= BASE_LEN
+
+    return ret
diff --git a/pastehunter/inputs/base_input.py b/pastehunter/inputs/base_input.py
index 68298ff..f224223 100644
--- a/pastehunter/inputs/base_input.py
+++ b/pastehunter/inputs/base_input.py
@@ -39,7 +39,7 @@ def get_paste_id(self, paste_obj: Dict[str, Any]) -> Union[str, int]:
         """
         Returns an id for the given paste object
         :param paste_obj: The raw paste dict
-        :return: The paste i
+        :return: The paste id
         passd (str or int)
         """
 
diff --git a/pastehunter/inputs/ixio.py b/pastehunter/inputs/ixio.py
index e69de29..bb0a191 100644
--- a/pastehunter/inputs/ixio.py
+++ b/pastehunter/inputs/ixio.py
@@ -0,0 +1,114 @@
+import logging
+import re
+from datetime import datetime
+from time import sleep
+from typing import List, Any, Dict, Union
+
+import requests
+
+from common import base62_decode, base62_encode
+from inputs.base_input import BasePasteSite
+
+logger = logging.getLogger('pastehunter')
+
+
+class IxDotIoSite(BasePasteSite):
+    # Yeah, yeah, I know, no regex for HTML parsing...
+    # If we end up doing a lot more of this, then maybe we'll use beautifulsoup or something.
+    # Capturing groups:
+    # 1. Paste ID
+    # 2. Timestamp
+    _ITEM_ID_RE: re.Pattern = re.compile('<div class="t">[\\sa-zA-Z0-9]+'
+                                         '<a href="/(.*?)">\\[r][^\r\n]+'
+                                         '\\s+@ (.*?)[\r\n]')
+
+    def __init__(self, conf):
+        self.conf = conf
+        self.site = "ix.io"
+        url_main = "http://" + self.site
+        self.url_recent = url_main + "/user/"
+        self.view_pattern = url_main + "/{}/"
+        self.raw_pattern = url_main + "/{}"
+        self.url = None
+
+    def remap_raw_item(self, raw_item: [str, Dict]) -> Dict[str, Any]:
+        pid = raw_item['pid']
+        paste_data = {
+            # at a
+            'filename': str(pid),
+            'confname': 'ixio',
+            'pastesite': self.site,
+            'pasteid': pid,
+        }
+        # Timezone is UTC/Zulu
+        date = datetime.strptime(raw_item['date'], '%a %b %d %H:%M:%S %Y').isoformat()
+        paste_data['@timestamp'] = date
+        encoded_pid = self.get_paste_id(paste_data)
+        paste_data['scrape_url'] = self.raw_pattern.format(encoded_pid)
+        return paste_data
+
+    def get_paste_for_id(self, paste_id: Any) -> str:
+        self.make_request(self.raw_pattern.format(paste_id))
+
+    def get_paste_id(self, paste_obj: Dict[str, Any]) -> str:
+        decoded = paste_obj.get('pasteid')
+        return base62_encode(decoded)
+
+    def get_recent_items(self, input_history: List[str]):
+
+        history = []
+        paste_list = []
+        try:
+            recent_page = self.make_request(self.url_recent)
+            item_data = self.get_data_for_page(recent_page.text)
+
+            for val in item_data:
+                # Track paste ids to prevent dupes
+                pid = val['pid']
+                history.append(pid)
+                if pid in input_history:
+                    continue
+                paste_data = self.remap_raw_item(val)
+                paste_list.append(paste_data)
+
+            return paste_list, history
+
+        except Exception as e:
+            logger.error("Unable to parse ixio items: {0}".format(e))
+            return paste_list, history
+
+    def get_data_for_page(self, page_data: str) -> List[Dict[str, Union[int, str]]]:
+        page: List[Dict[str, Union[int, str]]] = []
+        last_item_id = -1
+        regex_matches = self._ITEM_ID_RE.findall(page_data)
+        # We are going to reverse the order because ix pages are structured newest -> oldest, and this makes it simpler.
+        regex_matches.reverse()
+        for encoded_id, created_at in regex_matches:
+            # Okay so the logic here is a bit tricky. Basically, ix's all user page only returns anonymous pastes
+            # BUT! We can infer the paste ids that aren't present by filling in the blanks, because ix IDs are
+            # incremental. So first, we base62 decode the value so we can use it as an int
+            item_id = base62_decode(encoded_id)
+            # Then, we check if we've seen another value. If this is our first, we can skip a lot of this logic.
+            # (we probably don't want to go back and grab every ix paste historically for most use cases)
+            if last_item_id == -1:
+                page.append({'pid': item_id, 'date': created_at})
+                last_item_id = item_id
+            # If there has been a delta, let's traverse it.
+            elif item_id - last_item_id > 1:
+                # We've already hit last_item_id so we skip that and fill in the delta
+                for i in range(last_item_id + 1, item_id + 1):
+                    # Copy the created date as a best guess
+                    page.append({'pid': i, 'date': created_at})
+                last_item_id = item_id
+            else:
+                # If there's no delta, just add this nromally
+                page.append({'pid': item_id, 'date': created_at})
+                last_item_id = item_id
+        return page
+
+
+def recent_pastes(conf, input_history):
+    site = IxDotIoSite(conf)
+
+    # populate vars from config
+    return site.get_recent_items(input_history)
diff --git a/settings.json.sample b/settings.json.sample
index 5a032d8..4a43f5d 100644
--- a/settings.json.sample
+++ b/settings.json.sample
@@ -5,7 +5,12 @@
       "module": "pastehunter.inputs.pastebin",
       "api_scrape": "https://scrape.pastebin.com/api_scraping.php",
       "api_raw": "https://scrape.pastebin.com/api_scrape_item.php?i=",
-      "paste_limit": 200,
+      "paste_limit": 100,
+      "store_all": false
+    },
+    "ixio":{
+      "enabled": false,
+      "module": "pastehunter.inputs.ixio",
       "store_all": false
     },
     "dumpz": {
@@ -21,7 +26,7 @@
       "enabled": true,
       "module": "pastehunter.inputs.gists",
       "api_token": "",
-      "api_limit": 100,
+      "api_limit": 200,
       "store_all": false,
       "user_blacklist": [],
       "file_blacklist": ["grahamcofborg-eval-package-list", "Changed Paths"]
diff --git a/test/test_base62.py b/test/test_base62.py
new file mode 100644
index 0000000..241cae2
--- /dev/null
+++ b/test/test_base62.py
@@ -0,0 +1,11 @@
+from common import base62_decode, base62_encode
+
+def test_b62_encode():
+    assert base62_encode(622708) == '2BZG'
+    assert base62_encode(622707) == '2BZF'
+
+def test_b62_decode():
+    assert base62_decode('1') == 1
+    assert base62_decode('a') == 10
+    assert base62_decode('2BZF') == 622707
+    assert base62_decode('2BZG') == 622708
\ No newline at end of file
diff --git a/test/test_ix.py b/test/test_ix.py
new file mode 100644
index 0000000..8892602
--- /dev/null
+++ b/test/test_ix.py
@@ -0,0 +1,607 @@
+from inputs.ixio import IxDotIoSite
+test_data = '''
+<body>
+2CmG
+<a href="/2CmG">[r]</a> <a href="/2CmG/">[h]</a>
+@ Thu Oct 29 07:00:19 2020
+</div>
+</div>
+<div class="t">
+2CmF
+<a href="/2CmF">[r]</a> <a href="/2CmF/">[h]</a>
+@ Thu Oct 29 07:00:14 2020
+</div>
+</div>
+<div class="t">
+2CmE
+<a href="/2CmE">[r]</a> <a href="/2CmE/">[h]</a>
+@ Thu Oct 29 07:00:13 2020
+</div>
+</div>
+<div class="t">
+2CmD
+<a href="/2CmD">[r]</a> <a href="/2CmD/">[h]</a>
+@ Thu Oct 29 06:47:22 2020
+</div>
+</div>
+<div class="t">
+2CmC
+<a href="/2CmC">[r]</a> <a href="/2CmC/">[h]</a>
+@ Thu Oct 29 06:26:48 2020
+</div>
+</div>
+<div class="t">
+2CmB
+<a href="/2CmB">[r]</a> <a href="/2CmB/">[h]</a>
+@ Thu Oct 29 06:21:48 2020
+</div>
+</div>
+<div class="t">
+2CmA
+<a href="/2CmA">[r]</a> <a href="/2CmA/">[h]</a>
+@ Thu Oct 29 06:19:33 2020
+</div>
+</div>
+<div class="t">
+2Cmz
+<a href="/2Cmz">[r]</a> <a href="/2Cmz/">[h]</a>
+@ Thu Oct 29 06:08:17 2020
+</div>
+</div>
+<div class="t">
+2Cmy
+<a href="/2Cmy">[r]</a> <a href="/2Cmy/">[h]</a>
+@ Thu Oct 29 06:00:16 2020
+</div>
+</div>
+<div class="t">
+2Cmx
+<a href="/2Cmx">[r]</a> <a href="/2Cmx/">[h]</a>
+@ Thu Oct 29 06:00:14 2020
+</div>
+</div>
+<div class="t">
+2Cmw
+<a href="/2Cmw">[r]</a> <a href="/2Cmw/">[h]</a>
+@ Thu Oct 29 05:03:04 2020
+</div>
+</div>
+<div class="t">
+2Cmv
+<a href="/2Cmv">[r]</a> <a href="/2Cmv/">[h]</a>
+@ Thu Oct 29 05:03:02 2020
+</div>
+</div>
+<div class="t">
+2Cmu
+<a href="/2Cmu">[r]</a> <a href="/2Cmu/">[h]</a>
+@ Thu Oct 29 05:00:09 2020
+</div>
+</div>
+<div class="t">
+2Cmt
+<a href="/2Cmt">[r]</a> <a href="/2Cmt/">[h]</a>
+@ Thu Oct 29 04:58:15 2020
+</div>
+</div>
+<div class="t">
+2Cms
+<a href="/2Cms">[r]</a> <a href="/2Cms/">[h]</a>
+@ Thu Oct 29 04:57:39 2020
+</div>
+</div>
+<div class="t">
+2Cmr
+<a href="/2Cmr">[r]</a> <a href="/2Cmr/">[h]</a>
+@ Thu Oct 29 04:57:31 2020
+</div>
+</div>
+<div class="t">
+2Cmq
+<a href="/2Cmq">[r]</a> <a href="/2Cmq/">[h]</a>
+@ Thu Oct 29 04:57:24 2020
+</div>
+</div>
+<div class="t">
+2Cmp
+<a href="/2Cmp">[r]</a> <a href="/2Cmp/">[h]</a>
+@ Thu Oct 29 04:51:45 2020
+</div>
+</div>
+<div class="t">
+2Cmo
+<a href="/2Cmo">[r]</a> <a href="/2Cmo/">[h]</a>
+@ Thu Oct 29 04:10:10 2020
+</div>
+</div>
+<div class="t">
+2Cmn
+<a href="/2Cmn">[r]</a> <a href="/2Cmn/">[h]</a>
+@ Thu Oct 29 04:09:34 2020
+</div>
+</div>
+<div class="t">
+2Cmm
+<a href="/2Cmm">[r]</a> <a href="/2Cmm/">[h]</a>
+@ Thu Oct 29 04:02:17 2020
+</div>
+</div>
+<div class="t">
+2Cml
+<a href="/2Cml">[r]</a> <a href="/2Cml/">[h]</a>
+@ Thu Oct 29 04:00:14 2020
+</div>
+</div>
+<div class="t">
+2Cmk
+<a href="/2Cmk">[r]</a> <a href="/2Cmk/">[h]</a>
+@ Thu Oct 29 04:00:04 2020
+</div>
+</div>
+<div class="t">
+2Cmj
+<a href="/2Cmj">[r]</a> <a href="/2Cmj/">[h]</a>
+@ Thu Oct 29 03:58:55 2020
+</div>
+</div>
+<div class="t">
+2Cmi
+<a href="/2Cmi">[r]</a> <a href="/2Cmi/">[h]</a>
+@ Thu Oct 29 03:57:40 2020
+</div>
+</div>
+<div class="t">
+2Cmh
+<a href="/2Cmh">[r]</a> <a href="/2Cmh/">[h]</a>
+@ Thu Oct 29 03:50:57 2020
+</div>
+</div>
+<div class="t">
+2Cmg
+<a href="/2Cmg">[r]</a> <a href="/2Cmg/">[h]</a>
+@ Thu Oct 29 03:42:28 2020
+</div>
+</div>
+<div class="t">
+2Cmf
+<a href="/2Cmf">[r]</a> <a href="/2Cmf/">[h]</a>
+@ Thu Oct 29 03:40:56 2020
+</div>
+</div>
+<div class="t">
+2Cme
+<a href="/2Cme">[r]</a> <a href="/2Cme/">[h]</a>
+@ Thu Oct 29 03:27:14 2020
+</div>
+</div>
+<div class="t">
+2Cmd
+<a href="/2Cmd">[r]</a> <a href="/2Cmd/">[h]</a>
+@ Thu Oct 29 03:26:44 2020
+</div>
+</div>
+<div class="t">
+2Cmc
+<a href="/2Cmc">[r]</a> <a href="/2Cmc/">[h]</a>
+@ Thu Oct 29 03:26:29 2020
+</div>
+</div>
+<div class="t">
+2Cmb
+<a href="/2Cmb">[r]</a> <a href="/2Cmb/">[h]</a>
+@ Thu Oct 29 03:22:12 2020
+</div>
+</div>
+<div class="t">
+2Cma
+<a href="/2Cma">[r]</a> <a href="/2Cma/">[h]</a>
+@ Thu Oct 29 03:19:14 2020
+</div>
+</div>
+<div class="t">
+2Cm9
+<a href="/2Cm9">[r]</a> <a href="/2Cm9/">[h]</a>
+@ Thu Oct 29 03:19:00 2020
+</div>
+</div>
+<div class="t">
+2Cm8
+<a href="/2Cm8">[r]</a> <a href="/2Cm8/">[h]</a>
+@ Thu Oct 29 03:18:46 2020
+</div>
+</div>
+<div class="t">
+2Cm7
+<a href="/2Cm7">[r]</a> <a href="/2Cm7/">[h]</a>
+@ Thu Oct 29 03:18:05 2020
+</div>
+</div>
+<div class="t">
+2Cm6
+<a href="/2Cm6">[r]</a> <a href="/2Cm6/">[h]</a>
+@ Thu Oct 29 03:00:16 2020
+</div>
+</div>
+<div class="t">
+2Cm5
+<a href="/2Cm5">[r]</a> <a href="/2Cm5/">[h]</a>
+@ Thu Oct 29 02:59:56 2020
+</div>
+</div>
+<div class="t">
+2Cm4
+<a href="/2Cm4">[r]</a> <a href="/2Cm4/">[h]</a>
+@ Thu Oct 29 02:54:27 2020
+</div>
+</div>
+<div class="t">
+2Cm3
+<a href="/2Cm3">[r]</a> <a href="/2Cm3/">[h]</a>
+@ Thu Oct 29 02:30:04 2020
+</div>
+</div>
+<div class="t">
+2Cm1
+<a href="/2Cm1">[r]</a> <a href="/2Cm1/">[h]</a>
+@ Thu Oct 29 02:09:03 2020
+</div>
+</div>
+<div class="t">
+2Cm0
+<a href="/2Cm0">[r]</a> <a href="/2Cm0/">[h]</a>
+@ Thu Oct 29 02:04:08 2020
+</div>
+</div>
+<div class="t">
+2ClZ
+<a href="/2ClZ">[r]</a> <a href="/2ClZ/">[h]</a>
+@ Thu Oct 29 02:02:27 2020
+</div>
+</div>
+<div class="t">
+2ClY
+<a href="/2ClY">[r]</a> <a href="/2ClY/">[h]</a>
+@ Thu Oct 29 02:00:14 2020
+</div>
+</div>
+<div class="t">
+2ClX
+<a href="/2ClX">[r]</a> <a href="/2ClX/">[h]</a>
+@ Thu Oct 29 02:00:13 2020
+</div>
+</div>
+<div class="t">
+2ClW
+<a href="/2ClW">[r]</a> <a href="/2ClW/">[h]</a>
+@ Thu Oct 29 02:00:08 2020
+</div>
+</div>
+<div class="t">
+2ClV
+<a href="/2ClV">[r]</a> <a href="/2ClV/">[h]</a>
+@ Thu Oct 29 01:56:47 2020
+</div>
+</div>
+<div class="t">
+2ClU
+<a href="/2ClU">[r]</a> <a href="/2ClU/">[h]</a>
+@ Thu Oct 29 01:41:09 2020
+</div>
+</div>
+<div class="t">
+2ClS
+<a href="/2ClS">[r]</a> <a href="/2ClS/">[h]</a>
+@ Thu Oct 29 01:30:02 2020
+</div>
+</div>
+<div class="t">
+2ClR
+<a href="/2ClR">[r]</a> <a href="/2ClR/">[h]</a>
+@ Thu Oct 29 01:19:24 2020
+</div>
+</div>
+<div class="t">
+2ClQ
+<a href="/2ClQ">[r]</a> <a href="/2ClQ/">[h]</a>
+@ Thu Oct 29 01:17:03 2020
+</div>
+</div>
+<div class="t">
+2ClP
+<a href="/2ClP">[r]</a> <a href="/2ClP/">[h]</a>
+@ Thu Oct 29 01:00:13 2020
+</div>
+</div>
+<div class="t">
+2ClO
+<a href="/2ClO">[r]</a> <a href="/2ClO/">[h]</a>
+@ Thu Oct 29 01:00:09 2020
+</div>
+</div>
+<div class="t">
+2ClN
+<a href="/2ClN">[r]</a> <a href="/2ClN/">[h]</a>
+@ Thu Oct 29 00:46:53 2020
+</div>
+</div>
+<div class="t">
+2ClM
+<a href="/2ClM">[r]</a> <a href="/2ClM/">[h]</a>
+@ Thu Oct 29 00:42:01 2020
+</div>
+</div>
+<div class="t">
+2ClL
+<a href="/2ClL">[r]</a> <a href="/2ClL/">[h]</a>
+@ Thu Oct 29 00:27:03 2020
+</div>
+</div>
+<div class="t">
+2ClK
+<a href="/2ClK">[r]</a> <a href="/2ClK/">[h]</a>
+@ Thu Oct 29 00:26:44 2020
+</div>
+</div>
+<div class="t">
+2ClJ
+<a href="/2ClJ">[r]</a> <a href="/2ClJ/">[h]</a>
+@ Thu Oct 29 00:26:25 2020
+</div>
+</div>
+<div class="t">
+2ClI
+<a href="/2ClI">[r]</a> <a href="/2ClI/">[h]</a>
+@ Thu Oct 29 00:26:05 2020
+</div>
+</div>
+<div class="t">
+2ClH
+<a href="/2ClH">[r]</a> <a href="/2ClH/">[h]</a>
+@ Thu Oct 29 00:16:21 2020
+</div>
+</div>
+<div class="t">
+2ClG
+<a href="/2ClG">[r]</a> <a href="/2ClG/">[h]</a>
+@ Thu Oct 29 00:16:07 2020
+</div>
+</div>
+<div class="t">
+2ClF
+<a href="/2ClF">[r]</a> <a href="/2ClF/">[h]</a>
+@ Thu Oct 29 00:00:14 2020
+</div>
+</div>
+<div class="t">
+2ClE
+<a href="/2ClE">[r]</a> <a href="/2ClE/">[h]</a>
+@ Thu Oct 29 00:00:07 2020
+</div>
+</div>
+<div class="t">
+2ClD
+<a href="/2ClD">[r]</a> <a href="/2ClD/">[h]</a>
+@ Wed Oct 28 23:56:36 2020
+</div>
+</div>
+<div class="t">
+2ClC
+<a href="/2ClC">[r]</a> <a href="/2ClC/">[h]</a>
+@ Wed Oct 28 23:54:07 2020
+</div>
+2ClB
+<a href="/2ClB">[r]</a> <a href="/2ClB/">[h]</a>
+@ Wed Oct 28 23:53:07 2020
+</div>
+2ClA
+<a href="/2ClA">[r]</a> <a href="/2ClA/">[h]</a>
+@ Wed Oct 28 23:51:55 2020
+</div>
+</div>
+<div class="t">
+2Clz
+<a href="/2Clz">[r]</a> <a href="/2Clz/">[h]</a>
+@ Wed Oct 28 23:50:24 2020
+</div>
+</div>
+<div class="t">
+2Cly
+<a href="/2Cly">[r]</a> <a href="/2Cly/">[h]</a>
+@ Wed Oct 28 23:44:58 2020
+</div>
+</div>
+<div class="t">
+2Clx
+<a href="/2Clx">[r]</a> <a href="/2Clx/">[h]</a>
+@ Wed Oct 28 23:40:54 2020
+</div>
+</div>
+<div class="t">
+2Clw
+<a href="/2Clw">[r]</a> <a href="/2Clw/">[h]</a>
+@ Wed Oct 28 23:40:13 2020
+</div>
+</div>
+<div class="t">
+2Clv
+<a href="/2Clv">[r]</a> <a href="/2Clv/">[h]</a>
+@ Wed Oct 28 23:38:37 2020
+</div>
+</div>
+<div class="t">
+2Clu
+<a href="/2Clu">[r]</a> <a href="/2Clu/">[h]</a>
+@ Wed Oct 28 23:37:22 2020
+</div>
+</div>
+<div class="t">
+2Clt
+<a href="/2Clt">[r]</a> <a href="/2Clt/">[h]</a>
+@ Wed Oct 28 23:31:22 2020
+</div>
+</div>
+<div class="t">
+2Cls
+<a href="/2Cls">[r]</a> <a href="/2Cls/">[h]</a>
+@ Wed Oct 28 23:30:27 2020
+</div>
+</div>
+<div class="t">
+2Clr
+<a href="/2Clr">[r]</a> <a href="/2Clr/">[h]</a>
+@ Wed Oct 28 23:25:57 2020
+</div>
+</div>
+<div class="t">
+2Clq
+<a href="/2Clq">[r]</a> <a href="/2Clq/">[h]</a>
+@ Wed Oct 28 23:25:24 2020
+</div>
+</div>
+<div class="t">
+2Clo
+<a href="/2Clo">[r]</a> <a href="/2Clo/">[h]</a>
+@ Wed Oct 28 23:07:09 2020
+</div>
+</div>
+<div class="t">
+2Cln
+<a href="/2Cln">[r]</a> <a href="/2Cln/">[h]</a>
+@ Wed Oct 28 23:05:48 2020
+</div>
+</div>
+<div class="t">
+2Clm
+<a href="/2Clm">[r]</a> <a href="/2Clm/">[h]</a>
+@ Wed Oct 28 23:02:16 2020
+</div>
+</div>
+<div class="t">
+2Cll
+<a href="/2Cll">[r]</a> <a href="/2Cll/">[h]</a>
+@ Wed Oct 28 23:00:14 2020
+</div>
+</div>
+<div class="t">
+2Clk
+<a href="/2Clk">[r]</a> <a href="/2Clk/">[h]</a>
+@ Wed Oct 28 23:00:07 2020
+</div>
+</div>
+<div class="t">
+2Clj
+<a href="/2Clj">[r]</a> <a href="/2Clj/">[h]</a>
+@ Wed Oct 28 22:35:28 2020
+</div>
+</div>
+<div class="t">
+2Cli
+<a href="/2Cli">[r]</a> <a href="/2Cli/">[h]</a>
+@ Wed Oct 28 22:32:50 2020
+</div>
+</div>
+<div class="t">
+2Clh
+<a href="/2Clh">[r]</a> <a href="/2Clh/">[h]</a>
+@ Wed Oct 28 22:27:14 2020
+</div>
+</div>
+<div class="t">
+2Clg
+<a href="/2Clg">[r]</a> <a href="/2Clg/">[h]</a>
+@ Wed Oct 28 22:16:44 2020
+</div>
+</div>
+<div class="t">
+2Clf
+<a href="/2Clf">[r]</a> <a href="/2Clf/">[h]</a>
+@ Wed Oct 28 22:15:30 2020
+</div>
+</div>
+<div class="t">
+2Cle
+<a href="/2Cle">[r]</a> <a href="/2Cle/">[h]</a>
+@ Wed Oct 28 22:14:18 2020
+</div>
+</div>
+<div class="t">
+2Cld
+<a href="/2Cld">[r]</a> <a href="/2Cld/">[h]</a>
+@ Wed Oct 28 22:13:33 2020
+</div>
+</div>
+<div class="t">
+2Clc
+<a href="/2Clc">[r]</a> <a href="/2Clc/">[h]</a>
+@ Wed Oct 28 22:11:11 2020
+</div>
+</div>
+<div class="t">
+2Clb
+<a href="/2Clb">[r]</a> <a href="/2Clb/">[h]</a>
+issue #15767 @ Wed Oct 28 22:09:53 2020
+</div>
+</div>
+<div class="t">
+2Cla
+<a href="/2Cla">[r]</a> <a href="/2Cla/">[h]</a>
+@ Wed Oct 28 22:08:25 2020
+</div>
+</div>
+<div class="t">
+2Cl9
+<a href="/2Cl9">[r]</a> <a href="/2Cl9/">[h]</a>
+@ Wed Oct 28 22:04:26 2020
+</div>
+</div>
+<div class="t">
+2Cl7
+<a href="/2Cl7">[r]</a> <a href="/2Cl7/">[h]</a>
+@ Wed Oct 28 22:00:23 2020
+</div>
+</div>
+<div class="t">
+2Cl6
+<a href="/2Cl6">[r]</a> <a href="/2Cl6/">[h]</a>
+@ Wed Oct 28 22:00:13 2020
+</div>
+</div>
+<div class="t">
+2Cl5
+<a href="/2Cl5">[r]</a> <a href="/2Cl5/">[h]</a>
+@ Wed Oct 28 22:00:09 2020
+</div>
+</div>
+<div class="t">
+2Cl4
+<a href="/2Cl4">[r]</a> <a href="/2Cl4/">[h]</a>
+@ Wed Oct 28 21:59:27 2020
+</div>
+</div>
+<div class="t">
+2Cl3
+<a href="/2Cl3">[r]</a> <a href="/2Cl3/">[h]</a>
+0001-DTS-sun8i-h2-plus-orangepi-zero-added-audio-codec.patch @ Wed Oct 28 21:58:51 2020
+</div>
+</div>
+<div class="t">
+2Cl2
+<a href="/2Cl2">[r]</a> <a href="/2Cl2/">[h]</a>
+@ Wed Oct 28 21:58:17 2020
+</div>
+</div>
+<div class="t">
+2Cl1
+<a href="/2Cl1">[r]</a> <a href="/2Cl1/">[h]</a>
+@ Wed Oct 28 21:56:42 2020
+</div>
+</div>
+<div class="t">
+</body>
+'''
+def test_page_items():
+    site = IxDotIoSite(None)
+    ids = [x['pid'] for x in site.get_data_for_page(test_data)]
+    assert ids == [i for i in range(624031, 624134)]
+
+def
\ No newline at end of file

From 8975c6144da1049d47567293cf0ebdb081d6ed79 Mon Sep 17 00:00:00 2001
From: Dylan Katz <git@dylankatz.com>
Date: Sat, 31 Oct 2020 00:37:45 -0700
Subject: [PATCH 3/5] Whoops, fixed syntax error in  test

---
 test/test_ix.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/test_ix.py b/test/test_ix.py
index 8892602..a82f33d 100644
--- a/test/test_ix.py
+++ b/test/test_ix.py
@@ -599,9 +599,9 @@
 <div class="t">
 </body>
 '''
+
+
 def test_page_items():
     site = IxDotIoSite(None)
     ids = [x['pid'] for x in site.get_data_for_page(test_data)]
     assert ids == [i for i in range(624031, 624134)]
-
-def
\ No newline at end of file

From 1d21e024364ac07200c33b2fec3ada7d54fef894 Mon Sep 17 00:00:00 2001
From: Dylan Katz <git@dylankatz.com>
Date: Sat, 14 Nov 2020 22:21:02 -0800
Subject: [PATCH 4/5] A few linting and docs tweaks

---
 .travis.yml                | 3 ++-
 README.md                  | 4 ----
 conftest.py                | 2 --
 docs/inputs.rst            | 6 ++++++
 pastehunter/inputs/ixio.py | 7 ++-----
 test/test_base62.py        | 4 +++-
 test/test_paste_objects.py | 4 ++--
 7 files changed, 15 insertions(+), 15 deletions(-)
 delete mode 100644 conftest.py

diff --git a/.travis.yml b/.travis.yml
index 00ef9dc..afe4bdc 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -24,8 +24,9 @@ install:
 - pip install pytest
 - pip install -e .
 script:
+- wget https://raw.githubusercontent.com/kevthehermit/PasteHunter/master/settings.json.sample -O ~/.config/pastehunter.json
 - pastehunter-cli
-- pytest
+- python -m pytest
 after_success:
 - python setup.py sdist
 deploy:
diff --git a/README.md b/README.md
index 9198c03..7904126 100644
--- a/README.md
+++ b/README.md
@@ -3,10 +3,6 @@ PasteHunter is a python3 application that is designed to query a collection of s
 For all the pastes it finds it scans the raw contents against a series of Yara rules looking for information that can be used 
 by an organisation or a researcher.
 
-## Pastebin API Deprecated
-
-We are aware that the pastebin scraping API has been deprectated and are reviewing. 
-
 
 ## Setup 
 For setup instructions please see the official documentation https://pastehunter.readthedocs.io/en/latest/installation.html
diff --git a/conftest.py b/conftest.py
deleted file mode 100644
index c1ef2bd..0000000
--- a/conftest.py
+++ /dev/null
@@ -1,2 +0,0 @@
-# This is needed for some weird pytest hackery
-# See https://stackoverflow.com/a/50610630/1742813
\ No newline at end of file
diff --git a/docs/inputs.rst b/docs/inputs.rst
index 7e84ff7..7384394 100644
--- a/docs/inputs.rst
+++ b/docs/inputs.rst
@@ -63,6 +63,12 @@ Slexy has some heavy rate limits (30 requests per 30 seconds), but may still ret
 - **api_raw**: The URL endpoint for the raw paste.
 - **api_view**: The URL enpoint to view the paste.
 
+ix.io
+---------
+
+ix.io is a smaller site used primarily for console/command line pastes.
+
+- **store_all**: Store all pastes regardless of a rule match.
 
 StackExchange
 -------------
diff --git a/pastehunter/inputs/ixio.py b/pastehunter/inputs/ixio.py
index bb0a191..9f220fd 100644
--- a/pastehunter/inputs/ixio.py
+++ b/pastehunter/inputs/ixio.py
@@ -1,10 +1,7 @@
 import logging
 import re
 from datetime import datetime
-from time import sleep
-from typing import List, Any, Dict, Union
-
-import requests
+from typing import List, Any, Dict, Union, Pattern
 
 from common import base62_decode, base62_encode
 from inputs.base_input import BasePasteSite
@@ -18,7 +15,7 @@ class IxDotIoSite(BasePasteSite):
     # Capturing groups:
     # 1. Paste ID
     # 2. Timestamp
-    _ITEM_ID_RE: re.Pattern = re.compile('<div class="t">[\\sa-zA-Z0-9]+'
+    _ITEM_ID_RE: Pattern = re.compile('<div class="t">[\\sa-zA-Z0-9]+'
                                          '<a href="/(.*?)">\\[r][^\r\n]+'
                                          '\\s+@ (.*?)[\r\n]')
 
diff --git a/test/test_base62.py b/test/test_base62.py
index 241cae2..9d29df0 100644
--- a/test/test_base62.py
+++ b/test/test_base62.py
@@ -1,9 +1,11 @@
-from common import base62_decode, base62_encode
+from pastehunter.common import base62_decode, base62_encode
+
 
 def test_b62_encode():
     assert base62_encode(622708) == '2BZG'
     assert base62_encode(622707) == '2BZF'
 
+
 def test_b62_decode():
     assert base62_decode('1') == 1
     assert base62_decode('a') == 10
diff --git a/test/test_paste_objects.py b/test/test_paste_objects.py
index 1aab169..ce85bed 100644
--- a/test/test_paste_objects.py
+++ b/test/test_paste_objects.py
@@ -1,9 +1,9 @@
-from inputs import slexy
 from inputs.pastebin import PastebinPasteSite
 from inputs.slexy import SlexyPasteSite
 
-
 pids_found = []
+
+
 def mock_get_paste_for_pid(pid):
     pids_found.append(pid)
     return "pid_is_" + pid

From 938e1367636723e1a60df0ea035879fbaed34bd1 Mon Sep 17 00:00:00 2001
From: Dylan Katz <git@dylankatz.com>
Date: Sun, 22 Nov 2020 22:42:25 -0800
Subject: [PATCH 5/5] Updated changelog, tweaked a few formats and other values

---
 .travis.yml                |  1 -
 CHANGELOG.md               | 10 ++++++++++
 __init__.py                |  0
 pastehunter-cli            | 34 ++++++++++++++++++----------------
 pytest.ini                 |  3 ++-
 settings.json.sample       |  2 +-
 setup.py                   |  2 +-
 test/test_paste_objects.py |  8 +++++---
 8 files changed, 37 insertions(+), 23 deletions(-)
 create mode 100644 __init__.py

diff --git a/.travis.yml b/.travis.yml
index afe4bdc..e061b28 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -24,7 +24,6 @@ install:
 - pip install pytest
 - pip install -e .
 script:
-- wget https://raw.githubusercontent.com/kevthehermit/PasteHunter/master/settings.json.sample -O ~/.config/pastehunter.json
 - pastehunter-cli
 - python -m pytest
 after_success:
diff --git a/CHANGELOG.md b/CHANGELOG.md
index f7197a8..354fbf5 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,16 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [1.4.0] - 2020-11-22
+## Changed
+- Added some error state checks and retry logic to pastebin scraping (#116)
+- Refactored paste inputs to use a base class
+
+## Added
+- Support for ix.io (#95)
+- Additional unit tests (pytest still has some issues with import paths on travis)
+
+
 ## [1.3.2] - 2020-02-15
 ### Changed
 Minor patch fixing error in email yara regexp
diff --git a/__init__.py b/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/pastehunter-cli b/pastehunter-cli
index 8dfbcd1..8587179 100644
--- a/pastehunter-cli
+++ b/pastehunter-cli
@@ -19,7 +19,7 @@ import yara
 import pastehunter
 from pastehunter.common import parse_config
 
-VERSION = 1.0
+VERSION = '1.4.0'
 
 # Decided not to make this configurable as it currently really only applies to pastebin but may change in functionality later.
 # If someone would like this as a config key, please feel free to open an issue or a PR :)
@@ -38,7 +38,7 @@ logger = logging.getLogger('pastehunter')
 logger.setLevel(logging.INFO)
 
 # Version info
-logger.info("Starting PasteHunter Version: {0}".format(VERSION))
+logger.info("Starting PasteHunter Version: {}".format(VERSION))
 
 # Parse the config file
 logger.info("Reading Configs")
@@ -48,9 +48,11 @@ conf = parse_config()
 if not conf:
     sys.exit()
 
+
 class TimeoutError(Exception):
     pass
 
+
 class timeout:
     def __init__(self, seconds=1, error_message='Timeout'):
         self.seconds = seconds
@@ -72,17 +74,19 @@ if "log" in conf and conf["log"]["log_to_file"]:
     if conf["log"]["log_path"] != "":
         logfile = "{0}/{1}.log".format(conf["log"]["log_path"], conf["log"]["log_file"])
         # Assure directory exists
-        try: os.makedirs(conf["log"]["log_path"], exist_ok=True)  # Python>3.2
+        try:
+            os.makedirs(conf["log"]["log_path"], exist_ok=True)  # Python>3.2
         except TypeError:
             try:
                 os.makedirs(conf["log"]["log_path"])
-            except OSError as exc: # Python >2.5
+            except OSError as exc:  # Python >2.5
                 if exc.errno == errno.EEXIST and os.path.isdir(conf["log"]["log_path"]):
                     pass
-                else: logger.error("Can not create log file {0}: {1}".format(conf["log"]["log_path"], exc))
+                else:
+                    logger.error("Can not create log file {0}: {1}".format(conf["log"]["log_path"], exc))
     else:
         logfile = "{0}.log".format(conf["log"]["log_file"])
-    fileHandler = handlers.RotatingFileHandler(logfile, mode='a+', maxBytes=(1048576*5), backupCount=7)
+    fileHandler = handlers.RotatingFileHandler(logfile, mode='a+', maxBytes=(1048576 * 5), backupCount=7)
     if conf["log"]["format"] != "":
         fileFormatter = logging.Formatter("{0}".format(conf["log"]["format"]))
         fileHandler.setFormatter(fileFormatter)
@@ -118,7 +122,6 @@ for input_type, input_values in conf["inputs"].items():
         input_list.append(input_values["module"])
         logger.info("Enabled Input: {0}".format(input_type))
 
-
 # Configure Outputs
 logger.info("Configure Outputs")
 outputs = []
@@ -296,7 +299,6 @@ def paste_scanner(paste_data, rules_buff):
             # remove the confname key as its not really needed past this point
             del paste_data['confname']
 
-
             # Blacklist Check
             # If any of the blacklist rules appear then empty the result set
             blacklisted = False
@@ -310,7 +312,6 @@ def paste_scanner(paste_data, rules_buff):
                     return True
                 return False
 
-
             # Post Process
 
             # If post module is enabled and the paste has a matching rule.
@@ -322,14 +323,13 @@ def paste_scanner(paste_data, rules_buff):
                             logger.info("Running Post Module {0} on {1}".format(post_values["module"], paste_data["pasteid"]))
                             post_module = importlib.import_module(post_values["module"])
                             post_results = post_module.run(results,
-                                                            raw_paste_data,
-                                                            paste_data
-                                                            )
+                                                           raw_paste_data,
+                                                           paste_data
+                                                           )
 
             # Throw everything back to paste_data for ease.
             paste_data = post_results
 
-
             # If we have a result add some meta data and send to storage
             # If results is empty, ie no match, and store_all is True,
             # then append "no_match" to results. This will then force output.
@@ -356,6 +356,7 @@ def paste_scanner(paste_data, rules_buff):
     except Exception as e:
         logging.error(e)
 
+
 def main():
     logger.info("Compile Yara Rules")
     try:
@@ -364,7 +365,7 @@ def main():
             default_rules = os.path.join(pastehunter_path, "YaraRules")
         else:
             default_rules = False
-        
+
         if conf["yara"]["custom_rules"] != "none":
             custom_rules = conf["yara"]["custom_rules"]
         else:
@@ -376,7 +377,7 @@ def main():
             conf['yara']['exclude_rules'],
             conf['yara']['blacklist'],
             conf['yara']['test_rules']
-            )
+        )
 
         rules = yara.compile(filepaths=rule_files, externals={'filename': ''})
 
@@ -445,5 +446,6 @@ def main():
         pool.terminate()
         pool.join()
 
+
 if __name__ == '__main__':
-    main()
\ No newline at end of file
+    main()
diff --git a/pytest.ini b/pytest.ini
index 8aa36a7..dd6091b 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -1,2 +1,3 @@
 [pytest]
-addopts = test/
\ No newline at end of file
+addopts = test/
+norecursedirs = .git build docs logs
\ No newline at end of file
diff --git a/settings.json.sample b/settings.json.sample
index 4a43f5d..d333318 100644
--- a/settings.json.sample
+++ b/settings.json.sample
@@ -19,7 +19,7 @@
       "module": "pastehunter.inputs.dumpz",
       "api_scrape": "https://dumpz.org/api/recent",
       "api_raw": "https://dumpz.org/api/dump",
-      "paste_limit": 200,
+      "paste_limit": 100,
       "store_all": false
     },
     "gists": {
diff --git a/setup.py b/setup.py
index b96fff0..e0d0b8c 100644
--- a/setup.py
+++ b/setup.py
@@ -6,7 +6,7 @@
 
 setup(
     name='pastehunter',
-    version='1.3.2',
+    version='1.4.0',
     author='@kevthehermit @Plazmaz',
     author_email='info@pastehunter.com',
     description="Pastehunter",
diff --git a/test/test_paste_objects.py b/test/test_paste_objects.py
index ce85bed..6534d05 100644
--- a/test/test_paste_objects.py
+++ b/test/test_paste_objects.py
@@ -31,6 +31,7 @@ def test_slexy_site():
         assert paste == 'pid_is_' + str(pid)
         assert paste_data == {"pid": 123}
 
+
 def test_pastebin_site_remap():
     fake_conf = {
         'inputs': {
@@ -47,8 +48,9 @@ def test_pastebin_site_remap():
     }
     pastebin_site = PastebinPasteSite(fake_conf)
     out = pastebin_site.remap_raw_item(data)
-    assert  out == {'key': 'a', 'test': 'b', 'date': '1582595793', 'filename': 'a', 'confname': 'pastebin',
-                    'pasteid': 'a', 'pastesite': 'pastebin.com', '@timestamp': '2020-02-25T01:56:33'}
+    assert out == {'key': 'a', 'test': 'b', 'date': '1582595793', 'filename': 'a', 'confname': 'pastebin',
+                   'pasteid': 'a', 'pastesite': 'pastebin.com', '@timestamp': '2020-02-25T01:56:33'}
+
 
 def test_pastebin_site():
     fake_conf = {
@@ -68,7 +70,7 @@ def test_pastebin_site():
         {
             'key': 'bc',
             'date': '1582595793'
-         }
+        }
     ])
     pastes, paste_ids = pastebin_site.get_recent_items([])
     assert paste_ids == ['ab', 'bc']