From 95046410ece71d80866acaf74ed5b243d2a8dc2d Mon Sep 17 00:00:00 2001 From: Mark Adams Date: Thu, 19 Dec 2019 17:53:14 -0800 Subject: [PATCH 1/6] Added pydoc and comments --- .gitignore | 2 +- subscribers/cyberprobe/fsm.py | 65 ++++++++++- subscribers/cyberprobe/fsm_extract.py | 151 ++++++++++++++++++++++---- subscribers/cyberprobe/indicators.py | 48 ++++++++ subscribers/cyberprobe/logictree.py | 88 +++++++++++++-- 5 files changed, 315 insertions(+), 39 deletions(-) diff --git a/.gitignore b/.gitignore index b6e0238..aded763 100644 --- a/.gitignore +++ b/.gitignore @@ -11,7 +11,7 @@ Makefile.bak src/cybermon src/cyberprobe src/cyberprobe-cli -src/eventstream_service +src/eventstream-service src/etsi-rcvr src/nhis11-rcvr src/stream-rcvr diff --git a/subscribers/cyberprobe/fsm.py b/subscribers/cyberprobe/fsm.py index 111b9f5..f8d25bb 100644 --- a/subscribers/cyberprobe/fsm.py +++ b/subscribers/cyberprobe/fsm.py @@ -1,19 +1,40 @@ +""" +FSM utility class provides: +- Fsm: an FSM representing a single logictree rule. +- FsmState: State of a single FSM. +- FsmCollection: a set of FSMs representing a set of rules +""" + import json import cyberprobe.indicators as ind import cyberprobe.fsm_extract as fsme +# Represents a set of FSMs formed from a set of rules, and the current +# scan state. class FsmCollection: def __init__(self): + """ Constructor """ pass @classmethod def load_from(cls, inds): + """ Loads an FSM collection from a set of Indicator objects """ + + # Convert indiciators to a list of FSMs. fsc = cls() fsc.fsmindicators = inds fsc.fsms = [Fsm.load_from(v) for v in inds.indicators] + # Loop over the list, building a map of activator terms which + # will move beyond an FSMs 'init' term. The rationale here is + # that we only track FSMs which are active, but look out for the + # activator terms which will cause an FSM to come into action. + # This may prove to be a bad strategy if there are many FSMs with + # block terms, because it means we will track many FSMs which do + # nothing. If that's so, more intelligent tracking of FSMs may be + # needed. activators = {} for v in fsc.fsms: a = v.get_activator_terms() @@ -26,30 +47,47 @@ def load_from(cls, inds): return fsc def init_state(self): + """ + Initialises state. This is called at the start of scanning a + new object. + """ self.state = {} def update(self, term): + """ + Updates state based on seeing a term of the form (type, value). + A special form is ('end', '') for the end of scanning. + """ + # See if there is an FSM which would be activated by this term, and + # add to the state list. if term in self.activators: for fsm in self.activators[term]: if fsm not in self.state: self.state[fsm] = fsm.init_state() + # Update all active FSMs, including any just added to the list. for v in self.state: self.state[v].update(term) def get_hits(self): + """ + Return all Indicator hits. + """ return [ v.indicator for v in self.state if self.state[v].state == "hit" ] - + class Fsm: - + """ Represents an FSM (no scanning state) """ + def __init__(self): + """ Constructor """ pass @classmethod def load_from(cls, ind): + """ Initialises an FSM from an Indicator """ fsm = cls() fsm.indicator = ind f = fsme.extract(ind.value) @@ -57,12 +95,17 @@ def load_from(cls, ind): return fsm def dump(self): + """ Dumps out the FSM transitions, for debug """ for k in self.fsm: ins, term = k print("%s -- %s:%s -> %s" % (ins, term[0], term[1], self.fsm[k])) def get_activator_terms(self): + """ + Gets the list of terms which transition from the 'init' state, i.e. + cause the FSM to be active. + """ inits=[] for ins, term in self.fsm: if ins == 'init': @@ -70,24 +113,38 @@ def get_activator_terms(self): return inits def init_state(self): + """ + Initializes a state object to track the FSM state. + """ s = FsmState('init') s.fsm = self return s +# The state of scanning associated with a single FSM. class FsmState: def __init__(self, state): + """ + Constructor. + """ self.state = state def is_hit(self): + """ + True if FSM state has reached 'hit' + """ return self.state == 'hit' def is_fail(self): + """ + True if FSM state has reached 'fail' + """ return self.state == 'fail' def update(self, term): + """ + Advance the FSM state based on a term. + """ key = (self.state, term) if key in self.fsm.fsm: -# print("Term %s takes us %s -> %s" % -# (term, self.state, self.fsm.fsm[key])) self.state = self.fsm.fsm[key] diff --git a/subscribers/cyberprobe/fsm_extract.py b/subscribers/cyberprobe/fsm_extract.py index 112da31..69ed3e8 100644 --- a/subscribers/cyberprobe/fsm_extract.py +++ b/subscribers/cyberprobe/fsm_extract.py @@ -1,12 +1,33 @@ +""" +Converts a logictree into an FSM. The rationale is that an FSM provides a +highly optimized way to evaluate a logic tree. The conversion from +logictree form to FSM is expensive, but only needs to be performed once. +The subsequent FSM can be repeatedly applied. + +Use: + import cyberprobe.fsm_extract as fsme + import cyberprobe.logictree as lt + a = lt.And([lt.Match("ipv4", "10.0.0.1"), lt.Match("ipv4", "192.168.0.1")]) + fsm = fsme.extract(a) + m = fsme.mapify(fsm) + +FIXME: I think there's a bug where Not(Not(...)) doesn't work. +""" + import itertools import cyberprobe.logictree as lt -# A walker function used to find the FSM 'basic' states in a logic tree. -# Use with lt.walk: -# state=set() -# tree.walk(find_states, state) def find_states(e, states): + """ + A walker function used to find the FSM 'basic' states in a logic tree. + Use with lt.walk: + state=set() + tree.walk(find_states, state) + The basic states are the places in the tree where state information can + stored: Children of AND and children of NOT. NOT nodes are never themselves + basic state nodes. + """ if e.par == None: return if type(e) == lt.Not: @@ -16,19 +37,22 @@ def find_states(e, states): if type(e.par) == lt.Not: states.add(e) -# A walker function used to find the FSM match states in a logic tree. -# Use with lt.walk: -# terms=set() -# tree.walk(find_match_terms, terms) def find_match_terms(e, states): - + """ + A walker function used to find the FSM match states in a logic tree. + Use with lt.walk: + terms=set() + tree.walk(find_match_terms, terms) + """ if type(e) == lt.Match: states.add(e) -# Converts a combination state (list of states) into a string representing -# the combination state. -# e.g. ["s4", "s3", "s8"] -> "s3-4-8" def name_combined_state(sts, tree): + """ + Converts a combination state (list of states) into a string representing + the combination state. + e.g. ["s4", "s3", "s8"] -> "s3-4-8" + """ if len(sts) == 0: return "init" if tree.id in sts: return "hit" rval = "s" @@ -40,13 +64,23 @@ def name_combined_state(sts, tree): return rval class FsmExtractor: + """ FSM extractor knows how to convert a logictree to an FSM """ + def __init__(self): + """ Constructor. """ self.tree = None def find_states(self): + """ Studies a logictree and finds the FSM states """ + + # Get the 'basic' states, nodes which represent the state of a + # sub-expression. This is children of AND and NOT operations, + # removing all NOTs. basic_states=set() self.tree.walk(find_states, basic_states) + # The full tree state can be represented as a combination of basic + # states. state_combis = [] for i in range(0, len(basic_states)+1): cs = itertools.combinations(basic_states, i) @@ -57,19 +91,29 @@ def find_states(self): return basic_states, state_combis def find_terms(self): - + """ Finds the match terms in a logic tree """ terms=set() self.tree.walk(find_match_terms, terms) return terms def evaluate_term(self, instate, term): - + """ + Takes a logictree state, and works out what happens when a term node + is triggered. Returns the updated state. States is a dict which maps + a node to an ElementState object. Term is a (key, value) tuple. + As a special case, the term can be the string "end" to mark end of + scanning. + Input state is a tuple of active elements. + """ + + # Convert input state tuple to a state map state = {} for elt in instate: state[elt] = lt.ElementState() state[elt].active = True elt.evaluate(state) + # Activate the appropriate term. 'End' is a special case. if term != "end": term.activate(state) term = (term.type, term.value) @@ -77,23 +121,33 @@ def evaluate_term(self, instate, term): self.tree.record_end(state) term = ('end', '') + # Convert the input state to a symbol unique to the state instate = name_combined_state(instate, self.tree) + # If the root node of the state is 'active' we'll give that a + # special state name which is 'hit'. Otherwise get the + # combined state name symbol. if self.tree.is_active(state): outstate = "hit" else: - outstate = set([v for v in state if v in self.basic_states and v.is_active(state)]) + outstate = set([v for v in state + if v in self.basic_states and v.is_active(state)]) outstate = name_combined_state(outstate, self.tree) + # Construct the FSM transition. transition = (instate, [term], outstate) - return transition - def flatten(self, fsm): + """ + Flattens an FSM. Edges of the form (src, [term...] dest) are + coalesced to a single edge containing all terms. + """ + + # Flatten the FSM, so that 2nd elt is a complete list of terms that + # will make the transition happen. - # Flatten the FSM, so that 2nd elt is a list of terms that will make - # the transition happen. + # Convert to dict. fsm2 = {} for v in fsm: key = (v[0], v[2]) @@ -102,6 +156,7 @@ def flatten(self, fsm): for w in v[1]: fsm2[key].append(w) + # Convert back to list. fsm = [] for v in fsm2: fsm.append((v[0], fsm2[v], v[1])) @@ -109,6 +164,12 @@ def flatten(self, fsm): return fsm def make_strategy(self, terms): + """ + Form a strategy for discovering all term invocations. The less + optimal approach would be to try all terms for all state combinations. + This function optimises that - all terms which are children of an + OR result in the same outcome when any of them are executed. + """ strategy = {} @@ -139,6 +200,10 @@ def make_strategy(self, terms): return strategy def extract_transitions(self, combis, terms): + """ + Studies a logic tree discovering the FSM state transitions for + all combinations of states and terms. + """ fsm = [] @@ -147,23 +212,32 @@ def extract_transitions(self, combis, terms): # call because the result will be the same in all cases. strategy = self.make_strategy(terms) + # For all combination states... for instate in combis: + # Loop through strategy steps. for line in strategy: # Shouldn't happen. if len(strategy[line]) == 0: continue + # Get the term to try. term = strategy[line][0] + # Work out what transition happens. transition = self.evaluate_term(instate, term) + # Ignore non-state transitions if transition[0] != transition[2]: + # The result will be the same for all terms in this + # strategy line. for term in strategy[line]: fsm.append((transition[0], [(term.type, term.value)], transition[2])) + # Deal with the special 'end' case, and record the transition + # unless it's a noop. transition = self.evaluate_term(instate, "end") if transition[0] != transition[2]: fsm.append(transition) @@ -171,9 +245,17 @@ def extract_transitions(self, combis, terms): return fsm def remove_invalid_transitions(self, fsm): + """ + An FSM optimisation step. Removes all states and transitions which + can't be reached. Also works out nodes from which it is not possible + to get to a 'hit' state. Those can be called 'fail'. + """ # Get a list of states which can lead to 'hit'. All other states are - # fail states, because you can't travel to hit. + # fail states, because you can't travel to hit. Do this by starting + # with a set containing just the 'hit' state, and keep adding states + # which can get there. Then re-iterate adding all states that get + # to that state. hitstates = set(["hit"]) while True: nhs = hitstates.copy() @@ -200,6 +282,9 @@ def remove_invalid_transitions(self, fsm): fsm = fsm2 + # Remove transitions which can't be reached from init. There will + # areas of the FSM which cannot be reached, and this can be chopped + # out from the FSM. while True: # Get a list of navigable states, which is states it is possible @@ -211,43 +296,63 @@ def remove_invalid_transitions(self, fsm): fsm2 = [] + # Remove all transitions which don't go from navigable states. for v in fsm: if v[0] in navstates: fsm2.append(v) + # This won't succeed in a single operation. Keep going until + # chopping out transitions results in no change. if fsm == fsm2: break fsm = fsm2 return fsm - + def extract(self, tree): + """ + Extract an FSM from a tree. + """ + # Store the tree, and get the combination states. self.tree = tree (basic_states, state_combis) = self.find_states() - # pass through to function above. FIXME: + # Store basic states, used by support methods. self.basic_states = basic_states + # Get all match terms. terms = self.find_terms() + # First pass, get the FSM. fsm = self.extract_transitions(state_combis, terms) + # First pass flattening of the tree. This is largely cosmetic, but + # may make subsequent operates quicker. fsm = self.flatten(fsm) + # Remove innavigable areas of the FSM, and reduce all failure paths + # down to a single 'fail' state. fsm = self.remove_invalid_transitions(fsm) + # Final flattening, the single 'fail' state will open up more + # flattening options fsm = self.flatten(fsm) self.fsm = fsm - + def extract(a): + """ Convert a logictree to an FSM """ fsm = FsmExtractor() fsm.extract(a) return fsm.fsm def mapify(fsm): + """ + Convert an FSM (src, [term, ...], dest) to a dict (src, term) -> dest + which is more convenient for traversing. + """ fsm2 = {} diff --git a/subscribers/cyberprobe/indicators.py b/subscribers/cyberprobe/indicators.py index eff2984..c34c5c5 100644 --- a/subscribers/cyberprobe/indicators.py +++ b/subscribers/cyberprobe/indicators.py @@ -1,22 +1,41 @@ +""" +Cyberprobe indicators +""" + import uuid import json import cyberprobe.logictree as lt import cyberprobe.fsm_extract as fsme class Indicators: + """ + Represents a set of indicators. + """ + def __init__(self, description=None, version=None, indicators=[]): + """ Constructor. """ self.description = description self.version = version self.indicators = indicators + def add_indicator(self, i): + """ Adds an indicator """ self.indicators.append(i) + def get(self, id): + """ Gets an indicator by ID. """ for v in self.indicators: if v.id == id: return v raise RuntimeError("No such indicator ID") + def dump(self): + """ + Returns a dict object representing the indicator set which can be + JSON serialized. + """ + rval = {} if self.description: rval["description"] = self.description if self.version: rval["version"] = self.version @@ -24,20 +43,34 @@ def dump(self): v.dump() for v in self.indicators ] return rval + def dumps(self): + """ Dumps an indicator set to JSON string. """ return json.dumps(self.dump(), indent=4) class Descriptor: + """ + A descriptor object represents the information associated with an event + when a detection event occurs. + """ + def __init__(self, description=None, category=None, author=None, source=None, prob=1.0, type=None, value=None): + """ Constructor. """ self.description = description self.category = category self.author = author self.source = source self.type = type self.value = value + def dump(self): + """ + Returns a dict object representing the descriptor which can be JSON + serialized. + """ + rval = {} if self.description is not None: rval["description"] = self.description @@ -52,25 +85,37 @@ def dump(self): return rval class Indicator: + """ Represents an indicator object. """ + def __init__(self, descriptor, id=None): + """ Constructor """ if id == None: id = str(uuid.uuid4()) self.id = id self.descriptor = descriptor + def dump(self): + """ + Represents a dict object representing the indicator object + which can be serialized. + """ rval = {} rval["id"] = self.id rval["descriptor"] = self.descriptor.dump() rval.update(self.value.dump()) return rval + def extract_fsm(self): + """ Returns an FSM object from the indicator. """ return fsme.extract(self.value) def loads(data): + """ Loads an indicator set from a JSON string. """ obj = json.loads(data) return load(obj) def load_descriptor(obj): + """ Loads a descriptor from a Python dict object. """ des = Descriptor() if "description" in obj: des.description = obj["description"] if "category" in obj: des.category = obj["category"] @@ -82,12 +127,14 @@ def load_descriptor(obj): return des def load_indicator(obj): + """ Loads an indicator from a Python dict object """ des = load_descriptor(obj["descriptor"]) ii = Indicator(des, id = obj["id"]) ii.value = load_value(obj) return ii def load_value(obj): + """ Loads an value from a Python dict object """ if "type" in obj: return lt.Match(obj["type"], obj["value"]) elif "or" in obj: @@ -100,6 +147,7 @@ def load_value(obj): raise RuntimeError("Can't parse value") def load(obj): + """ Loads an indicator set from a Python dict object """ i = Indicators() if "description" in obj: i.description = obj["description"] if "version" in obj: i.version = obj["version"] diff --git a/subscribers/cyberprobe/logictree.py b/subscribers/cyberprobe/logictree.py index 8c61f42..53f402e 100644 --- a/subscribers/cyberprobe/logictree.py +++ b/subscribers/cyberprobe/logictree.py @@ -1,37 +1,50 @@ + +""" +Represents a decision tree built from boolean logic operators. +""" + import sys class Element: + """ Base class for logic operators. """ id = 1 def __init__(self): + """ Constructor """ self.id = "s" + str(Element.id) self.par = None Element.id = Element.id + 1 def is_active(self, state): + """ Returns true if the element is active in provided state """ if not self in state: return False if not state[self].active: return False return True class ElementState: - def __init__(self): - self.active = False + """ Tracks state of an element. """ + def __init__(self): + """ Constructor """ + self.active = False class And(Element): + """ Represents an AND operator """ + def __init__(self, e): + """ Constructor """ self.e = e for e in self.e: e.par = self Element.__init__(self) + def walk(self, fn, state=None): + """ Walks the tree of nodes, depth-first """ for e in self.e: e.walk(fn, state) fn(self, state) - def state_elt(self): - return self - def get_elt(self, id): + """ Walks the tree, hunting for a node with provided ID """ if id == self.id: return self for v in self.e: @@ -41,6 +54,11 @@ def get_elt(self, id): return None def evaluate(self, state): + """ + Causes node evaluation, which means studying the state and + working out if other state transitions should take place higher + in the tree. + """ if not self in state: state[self] = ElementState() if state[self].active: return @@ -60,33 +78,41 @@ def evaluate(self, state): if self.par != None: self.par.evaluate(state) def record_end(self, state): + """ Records the end of scanning, and works out the impact on state. """ for v in self.e: v.record_end(state) def dump_logic_tree(self, indent=0): + """ Dumps out a logic tree in human-readable form """ + for v in range(0, indent): sys.stdout.write(" ") print("%s: and" % self.id) for v in self.e: v.dump_logic_tree(indent+1) def dump(self): + """ Returns a Python dict object representing the state. """ return { "and": [ v.dump() for v in self.e ] } class Or(Element): + def __init__(self, e): + """ Constructor """ self.e = e for e in self.e: e.par = self Element.__init__(self) + def walk(self, fn, state=None): + """ Walks the tree of nodes, depth-first """ for e in self.e: e.walk(fn, state) fn(self, state) - def state_elt(self): - return self.par.state_elt() + def get_elt(self, id): + """ Walks the tree, hunting for a node with provided ID """ if id == self.id: return self for v in self.e: @@ -96,6 +122,11 @@ def get_elt(self, id): return None def evaluate(self, state): + """ + Causes node evaluation, which means studying the state and + working out if other state transitions should take place higher + in the tree. + """ if not self in state: state[self] = ElementState() if state[self].active: return @@ -114,31 +145,38 @@ def evaluate(self, state): if self.par != None: self.par.evaluate(state) def record_end(self, state): + """ Records the end of scanning, and works out the impact on state. """ for v in self.e: v.record_end(state) def dump_logic_tree(self, indent=0): + """ Dumps out a logic tree in human-readable form """ for v in range(0, indent): sys.stdout.write(" ") print("%s: or" % self.id) for v in self.e: v.dump_logic_tree(indent+1) def dump(self): + """ Returns a Python dict object representing the state. """ return { "or": [ v.dump() for v in self.e ] } class Not(Element): + def __init__(self, e): + """ Constructor """ self.e = e self.e.par = self Element.__init__(self) + def walk(self, fn, state=None): + """ Walks the tree of nodes, depth-first """ self.e.walk(fn, state) fn(self, state) - def state_elt(self): - return self + def get_elt(self, id): + """ Walks the tree, hunting for a node with provided ID """ if id == self.id: return self elt = self.e.get_elt(id) @@ -147,9 +185,15 @@ def get_elt(self, id): return None def evaluate(self, state): + """ + Causes node evaluation, which means studying the state and + working out if other state transitions should take place higher + in the tree. + """ pass def record_end(self, state): + """ Records the end of scanning, and works out the impact on state. """ if not self in state: state[self] = ElementState() if state[self].active: return @@ -164,34 +208,48 @@ def record_end(self, state): if self.par != None: self.par.evaluate(state) def dump_logic_tree(self, indent=0): + """ Dumps out a logic tree in human-readable form """ for v in range(0, indent): sys.stdout.write(" ") print("%s: not" % self.id) self.e.dump_logic_tree(indent+1) def dump(self): + """ Returns a Python dict object representing the state. """ return { "not": self.e.dump() } class Match(Element): + def __init__(self, type, value): + """ Constructor """ self.type = type self.value = value self.par = None Element.__init__(self) + def walk(self, fn, state=None): + """ Walks the tree of nodes, depth-first """ fn(self, state) - def state_elt(self): - return self.par.state_elt() + def get_elt(self, id): + """ Walks the tree, hunting for a node with provided ID """ if id == self.id: return self return None def evaluate(self, state): + """ + Causes node evaluation, which means studying the state and + working out if other state transitions should take place higher + in the tree. + """ pass def activate(self, state): + """ + Activates a node + """ if not self in state: state[self] = ElementState() if state[self].active: return @@ -199,19 +257,26 @@ def activate(self, state): if self.par != None: self.par.evaluate(state) def record_end(self, state): + """ Records the end of scanning, and works out the impact on state. """ pass def dump_logic_tree(self, indent=0): + """ Dumps out a logic tree in human-readable form """ for v in range(0, indent): sys.stdout.write(" ") print("%s: \"%s: %s\"" % (self.id, self.type, self.value)) def dump(self): + """ Returns a Python dict object representing the state. """ return { "type": self.type, "value": self.value } def parse_logic_tree(obj): + """ + Parses an Python dict object representing a logic tree, returning + a logic tree. + """ if type(obj) == str: return Match(obj) @@ -234,5 +299,6 @@ def parse_logic_tree(obj): return Match(obj["value"]) def dump_logic_tree(obj, indent=0): + """ Dumps out a logic tree in human-readable form """ obj.dump_logic_tree() From bf239c5e8242f65e2f273002677ee90021f8117b Mon Sep 17 00:00:00 2001 From: Mark Adams Date: Fri, 20 Dec 2019 08:28:42 -0800 Subject: [PATCH 2/6] Indicator documentation --- docs/Makefile.am | 3 +- docs/cyberprobe.texi | 4 + docs/ref-cybermon-detector.texi | 7 +- docs/ref-indicators.texi | 214 ++++++++++++++++++++++++++++++++ docs/ref-top.texi | 1 + 5 files changed, 227 insertions(+), 2 deletions(-) create mode 100644 docs/ref-indicators.texi diff --git a/docs/Makefile.am b/docs/Makefile.am index 15f92a8..f5dcc13 100644 --- a/docs/Makefile.am +++ b/docs/Makefile.am @@ -14,7 +14,8 @@ cyberprobe_TEXINFOS = architecture.texi aws-mirroring.texi \ qs-threat-indicators.texi qs-top.texi qs-visualisation.texi \ ref-cybermon-alert.texi ref-cybermon-bigquery.texi \ ref-cybermon-cassandra.texi ref-cybermon-configuration.texi \ - ref-cybermon-detector.texi ref-cybermon-dump.texi \ + ref-cybermon-detector.texi ref-indicators.texi \ + ref-cybermon-dump.texi \ ref-cybermon-elasticsearch.texi \ ref-cybermon-example-configs.texi ref-cybermon-gaffer.texi \ ref-cybermon-geoip.texi ref-cybermon-invocation.texi \ diff --git a/docs/cyberprobe.texi b/docs/cyberprobe.texi index 53c906b..88f52c8 100644 --- a/docs/cyberprobe.texi +++ b/docs/cyberprobe.texi @@ -186,6 +186,10 @@ Texts. @comment ---------------------------------------------------------------------- +@include ref-indicators.texi + +@comment ---------------------------------------------------------------------- + @include ref-cybermon-dump.texi @comment ---------------------------------------------------------------------- diff --git a/docs/ref-cybermon-detector.texi b/docs/ref-cybermon-detector.texi index e403b55..f356fbe 100644 --- a/docs/ref-cybermon-detector.texi +++ b/docs/ref-cybermon-detector.texi @@ -3,9 +3,9 @@ @section @command{cybermon-detector} invocation @cindex @command{cybermon-detector}, invocation -@cindex STIX @cindex IOC @cindex Indicator of compromise +@cindex Indicator @command{cybermon-detector} subscribes to a RabbitMQ pub/sub queue for @command{cybermon} events, inspects them for IOCs, and adds detection @@ -14,6 +14,11 @@ the elaborated events. This effectively creates a processing chain. The event subscription and publishing events should be different in order to avoid creating an infinite loop. +The indicator file is specified by the @samp{INDICATORS} environment variable, +the default is @file{indicators.json}. The file format is the +@ref{Cyberprobe indicator format}. When the file changes, the indicators are +reloaded without the needed to restart @command{cybermon-detector}. + Synopsis: @example diff --git a/docs/ref-indicators.texi b/docs/ref-indicators.texi new file mode 100644 index 0000000..0d6c195 --- /dev/null +++ b/docs/ref-indicators.texi @@ -0,0 +1,214 @@ + +@node Cyberprobe indicator format +@section Cyberprobe indicator format + +@cindex IOC +@cindex Indicator of compromise +@cindex Indicator + +@heading Overview + +The @command{cybermon-detector} subscriber implements detection against a +set of indicators (see @ref{@command{cybermon-detector} invocation}. The +indicators are presented in a file containing JSON describing the +indicators. + +Indicators consist of two parts: + +@itemize @bullet + +@item +A boolean expression which describes what to look for. + +@item +A descriptor which describes changes made to the output event when an +indicator selects data. + +@end itemize + +@heading Overall structure + +The indicators file consists of a simple header containing the +@samp{description} and @samp{version} fields. The @samp{version} field is +intended to record a revision history identified for the indicators, and +should be changed to a unique value when an update occurs. + +These fields are not used by @command{cybermon-detect}. + +@example +@{ + "description": "Some test data", + "version": "1", + "indicators": [ + ... + ] +@} +@end example + +The @samp{indicators} field consists of a list of indicators. + +@heading Indicator + +An indicator consists of: an ID, a descriptor which is tagged onto events +when they hit indicators, and a definition of what to hit on. The +example below shows ID and descriptor. + +@example +@{ + "id": "6b7aa83f-8c43-4aaa-817f-5039adef19ef", + "descriptor": @{ + "description": "URL of a page serving malware", + "category": "malware", + "author": "someone@@example.com", + "source": "id:3245edd9-e0f3-4982-9406-fbf93b874555", + "type": "url", + "value": "http://malware.org/malware.dat" + @}, + ... +@} +@end example + +A descriptor contains a type/value pair which is a seed of information that an +investigator would want to search for for further information. + +The descriptor fields are: + +@table @samp + +@item description +A human-readable description of the purpose of this indicator. + +@item category +A threat category for the indicator, which is used to group threats into +type of threat + +@item author +Email address of indicator author + +@item source +Source of the indicator, should be a URL e.g. a web-page address of a report + +@item type +The type of the @samp{value} field, can be one of @samp{hostname}, +@samp{url}, @samp{ipv4}, @samp{ipv6}, @samp{tcp}, @samp{udp}, @samp{email}. + +@end table + +@heading Indicator value + +The indicator value consists of four constructs: + +@itemize @bullet + +@item +@samp{and} + +@item +@samp{or} + +@item +@samp{not} + +@item +Match term + +@end itemize + +The value construct is included in the Indicator object alongside the +@samp{id} field e.g. + +@example +@{ + "id": "845bcc85-49f5-427c-806c-5fe5984c2c5c", + "descriptor": @{ + ... + @}, + "type": "tcp", + "value": "11111" +@} +@end example + +or + +@example +@{ + "id": "845bcc85-49f5-427c-806c-5fe5984c2c5c", + "descriptor": @{ + ... + @}, + "or": [ + @{ + "type": "tcp", + "value": "11111" + @}, + @{ + "type": "tcp", + "value": "11112" + @} + ] +@} +@end example + +The four value constructs can be combined with multiple layers to any +depth. + +@subheading Match term + +The simple matching form consists of a type and value. The type field +can be one of: @samp{hostname}, @samp{url}, @samp{ipv4}, @samp{ipv4.src}, +@samp{ipv4.dest}, @samp{ipv6}, @samp{ipv6.src}, @samp{ipv6.dest}, +@samp{tcp}, @samp{tcp.src}, @samp{tcp.dest}, @samp{udp}, +@samp{udp.src}, @samp{udp.dest}, @samp{email}. e.g. + +@example +@{ + "type": "ipv4.src", + "value": "192.168.0.1" +@} +@end example + +@subheading @samp{and} construct + +The @samp{and} construct is true if all of its children are true. + +@example +"and": [ + @{ + "type": "hostname", + "value": "example.com" + @}, + @{ + "type": "tcp", + "value": "11112" + @} +] +@end example + +@subheading @samp{or} construct + +The @samp{or} construct is true if any of its children are true. + +@example +"or": [ + @{ + "type": "hostname", + "value": "example.com" + @}, + @{ + "type": "hostname", + "value": "example.com" + @} +] +@end example + +@subheading @samp{not} construct + +The @samp{not} construct is true if its child is false. + +@example +"not": @{ + "type": "hostname", + "value": "example.com" +@} +@end example + diff --git a/docs/ref-top.texi b/docs/ref-top.texi index 24f73f8..90d7efc 100644 --- a/docs/ref-top.texi +++ b/docs/ref-top.texi @@ -21,6 +21,7 @@ * @command{cybermon-cassandra} invocation:: * @command{cybermon-geoip} invocation:: * @command{cybermon-detector} invocation:: +* Cyberprobe indicator format:: * @command{cybermon-dump} invocation:: * @command{cybermon-alert} invocation:: * @command{taxii-client} invocation:: From 787282674055b30049cfe7e10ced380a85e0632e Mon Sep 17 00:00:00 2001 From: Mark Adams Date: Fri, 20 Dec 2019 08:30:16 -0800 Subject: [PATCH 3/6] Added src/dest indicator testing --- subscribers/cybermon-detector | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/subscribers/cybermon-detector b/subscribers/cybermon-detector index da76c64..92e9f5d 100755 --- a/subscribers/cybermon-detector +++ b/subscribers/cybermon-detector @@ -60,22 +60,30 @@ def check_addresses(obj): for v in obj["src"]: if v.startswith("ipv4:"): fsc.update(('ipv4', v[5:])) + fsc.update(('ipv4.src', v[5:])) if v.startswith("ipv6:"): fsc.update(('ipv6', v[5:])) + fsc.update(('ipv6.src', v[5:])) if v.startswith("tcp:"): fsc.update(('tcp', v[4:])) + fsc.update(('tcp.src', v[4:])) if v.startswith("udp:"): fsc.update(('udp', v[4:])) + fsc.update(('udp.dest', v[4:])) for v in obj["dest"]: if v.startswith("ipv4:"): fsc.update(('ipv4', v[5:])) + fsc.update(('ipv4.dest', v[5:])) if v.startswith("ipv6:"): fsc.update(('ipv6', v[5:])) + fsc.update(('ipv6.dest', v[5:])) if v.startswith("tcp:"): fsc.update(('tcp', v[4:])) + fsc.update(('tcp.dest', v[4:])) if v.startswith("udp:"): fsc.update(('udp', v[4:])) + fsc.update(('udp.dest', v[4:])) def check_dns(obj): From d6c404e560d90713eea228e54b57ad31f010cd40 Mon Sep 17 00:00:00 2001 From: Mark Adams Date: Fri, 20 Dec 2019 15:41:21 -0800 Subject: [PATCH 4/6] Minor reorg, indicators stuff in own directory --- Makefile.am | 3 ++- configure.ac | 1 + {subscribers => indicators}/case1.json | 0 {subscribers => indicators}/case2.json | 0 {subscribers => indicators}/case3.json | 0 {subscribers => indicators}/case4.json | 0 {subscribers => indicators}/case5.json | 0 {subscribers => indicators}/case6.json | 0 {subscribers => indicators}/case7.json | 0 {subscribers => indicators}/case8.json | 0 {subscribers => indicators}/cybermon-detector | 0 {subscribers => indicators}/cyberprobe/fsm.py | 0 {subscribers => indicators}/cyberprobe/fsm_extract.py | 0 {subscribers => indicators}/cyberprobe/indicators.py | 0 {subscribers => indicators}/cyberprobe/logictree.py | 0 {subscribers => indicators}/dump-fsm | 0 {subscribers => indicators}/gen-indicators | 0 {subscribers => indicators}/gen-indicators2 | 0 {subscribers => indicators}/graph-fsm | 0 {subscribers => indicators}/indicators.json | 0 {subscribers => indicators}/load-test | 0 subscribers/Makefile.am | 9 +++------ 22 files changed, 6 insertions(+), 7 deletions(-) rename {subscribers => indicators}/case1.json (100%) rename {subscribers => indicators}/case2.json (100%) rename {subscribers => indicators}/case3.json (100%) rename {subscribers => indicators}/case4.json (100%) rename {subscribers => indicators}/case5.json (100%) rename {subscribers => indicators}/case6.json (100%) rename {subscribers => indicators}/case7.json (100%) rename {subscribers => indicators}/case8.json (100%) rename {subscribers => indicators}/cybermon-detector (100%) rename {subscribers => indicators}/cyberprobe/fsm.py (100%) rename {subscribers => indicators}/cyberprobe/fsm_extract.py (100%) rename {subscribers => indicators}/cyberprobe/indicators.py (100%) rename {subscribers => indicators}/cyberprobe/logictree.py (100%) rename {subscribers => indicators}/dump-fsm (100%) rename {subscribers => indicators}/gen-indicators (100%) rename {subscribers => indicators}/gen-indicators2 (100%) rename {subscribers => indicators}/graph-fsm (100%) rename {subscribers => indicators}/indicators.json (100%) rename {subscribers => indicators}/load-test (100%) diff --git a/Makefile.am b/Makefile.am index beb8672..6056001 100644 --- a/Makefile.am +++ b/Makefile.am @@ -1,5 +1,6 @@ -SUBDIRS = src include config docs stix www tests init subscribers utils +SUBDIRS = src include config docs indicators stix www tests init subscribers \ + utils ACLOCAL_AMFLAGS = -I m4 diff --git a/configure.ac b/configure.ac index f02a1e6..d708ab8 100644 --- a/configure.ac +++ b/configure.ac @@ -147,6 +147,7 @@ AC_CHECK_FUNCS([gethostbyname gettimeofday socket strdup uname]) AC_CONFIG_FILES([Makefile src/Makefile include/Makefile config/Makefile docs/Makefile stix/Makefile www/Makefile tests/Makefile tests/atlocal init/Makefile subscribers/Makefile pkg.mk utils/Makefile + indicators/Makefile cyberprobe.spec]) AC_OUTPUT diff --git a/subscribers/case1.json b/indicators/case1.json similarity index 100% rename from subscribers/case1.json rename to indicators/case1.json diff --git a/subscribers/case2.json b/indicators/case2.json similarity index 100% rename from subscribers/case2.json rename to indicators/case2.json diff --git a/subscribers/case3.json b/indicators/case3.json similarity index 100% rename from subscribers/case3.json rename to indicators/case3.json diff --git a/subscribers/case4.json b/indicators/case4.json similarity index 100% rename from subscribers/case4.json rename to indicators/case4.json diff --git a/subscribers/case5.json b/indicators/case5.json similarity index 100% rename from subscribers/case5.json rename to indicators/case5.json diff --git a/subscribers/case6.json b/indicators/case6.json similarity index 100% rename from subscribers/case6.json rename to indicators/case6.json diff --git a/subscribers/case7.json b/indicators/case7.json similarity index 100% rename from subscribers/case7.json rename to indicators/case7.json diff --git a/subscribers/case8.json b/indicators/case8.json similarity index 100% rename from subscribers/case8.json rename to indicators/case8.json diff --git a/subscribers/cybermon-detector b/indicators/cybermon-detector similarity index 100% rename from subscribers/cybermon-detector rename to indicators/cybermon-detector diff --git a/subscribers/cyberprobe/fsm.py b/indicators/cyberprobe/fsm.py similarity index 100% rename from subscribers/cyberprobe/fsm.py rename to indicators/cyberprobe/fsm.py diff --git a/subscribers/cyberprobe/fsm_extract.py b/indicators/cyberprobe/fsm_extract.py similarity index 100% rename from subscribers/cyberprobe/fsm_extract.py rename to indicators/cyberprobe/fsm_extract.py diff --git a/subscribers/cyberprobe/indicators.py b/indicators/cyberprobe/indicators.py similarity index 100% rename from subscribers/cyberprobe/indicators.py rename to indicators/cyberprobe/indicators.py diff --git a/subscribers/cyberprobe/logictree.py b/indicators/cyberprobe/logictree.py similarity index 100% rename from subscribers/cyberprobe/logictree.py rename to indicators/cyberprobe/logictree.py diff --git a/subscribers/dump-fsm b/indicators/dump-fsm similarity index 100% rename from subscribers/dump-fsm rename to indicators/dump-fsm diff --git a/subscribers/gen-indicators b/indicators/gen-indicators similarity index 100% rename from subscribers/gen-indicators rename to indicators/gen-indicators diff --git a/subscribers/gen-indicators2 b/indicators/gen-indicators2 similarity index 100% rename from subscribers/gen-indicators2 rename to indicators/gen-indicators2 diff --git a/subscribers/graph-fsm b/indicators/graph-fsm similarity index 100% rename from subscribers/graph-fsm rename to indicators/graph-fsm diff --git a/subscribers/indicators.json b/indicators/indicators.json similarity index 100% rename from subscribers/indicators.json rename to indicators/indicators.json diff --git a/subscribers/load-test b/indicators/load-test similarity index 100% rename from subscribers/load-test rename to indicators/load-test diff --git a/subscribers/Makefile.am b/subscribers/Makefile.am index 28d0377..b57efa2 100644 --- a/subscribers/Makefile.am +++ b/subscribers/Makefile.am @@ -1,13 +1,10 @@ bin_SCRIPTS = cybermon-elasticsearch cybermon-gaffer cybermon-monitor \ - cybermon-bigquery cybermon-cassandra cybermon-dump cybermon-geoip \ - cybermon-detector + cybermon-bigquery cybermon-cassandra cybermon-dump cybermon-geoip EXTRA_DIST = cybermon-elasticsearch cybermon-gaffer cybermon-monitor \ - cybermon-bigquery cybermon-cassandra cybermon-dump cybermon-geoip \ - cybermon-detector + cybermon-bigquery cybermon-cassandra cybermon-dump cybermon-geoip -cyberprobe_PYTHON = cyberprobe/qcomms.py cyberprobe/fsm_extract.py \ - cyberprobe/fsm.py cyberprobe/indicators.py cyberprobe/logictree.py +cyberprobe_PYTHON = cyberprobe/qcomms.py cyberprobedir = $(pythondir)/cyberprobe From d73b7fd7dbf833b7b5e76413cd58df93741be8ba Mon Sep 17 00:00:00 2001 From: Mark Adams Date: Fri, 20 Dec 2019 15:42:01 -0800 Subject: [PATCH 5/6] Added Makefile --- indicators/Makefile.am | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 indicators/Makefile.am diff --git a/indicators/Makefile.am b/indicators/Makefile.am new file mode 100644 index 0000000..dc4a095 --- /dev/null +++ b/indicators/Makefile.am @@ -0,0 +1,9 @@ + +bin_SCRIPTS = cybermon-detector + +EXTRA_DIST = cybermon-detector + +cyberprobe_PYTHON = cyberprobe/fsm_extract.py cyberprobe/fsm.py \ + cyberprobe/indicators.py cyberprobe/logictree.py +cyberprobedir = $(pythondir)/cyberprobe + From cfc859ab9e0c0add67d5f57a35fce4503e68c963 Mon Sep 17 00:00:00 2001 From: Mark Adams Date: Mon, 23 Dec 2019 22:24:19 -0800 Subject: [PATCH 6/6] - Fix Python3 API problems in STIX code. - Remove STIX documentation from quickstart, instead talk about native indicators.json. - Update docs to talk about indicator format. --- docs/qs-threat-indicators.texi | 150 ++++++++------------------------ docs/qs-top.texi | 2 +- docs/ref-json-event-format.texi | 9 ++ indicators/Makefile.am | 2 + stix/cyberprobe/stix_store.py | 10 +-- stix/cyberprobe/taxii_client.py | 4 +- stix/cyberprobe/taxii_server.py | 25 +++--- stix/stix-create | 2 +- stix/taxii-sync | 8 +- stix/taxii-sync-json | 6 +- subscribers/cybermon-dump | 2 +- 11 files changed, 75 insertions(+), 145 deletions(-) diff --git a/docs/qs-threat-indicators.texi b/docs/qs-threat-indicators.texi index b78bf15..738ec6c 100644 --- a/docs/qs-threat-indicators.texi +++ b/docs/qs-threat-indicators.texi @@ -1,133 +1,46 @@ -@node Threat indicators using STIX -@section Threat indicators using STIX -@cindex STIX -@cindex TAXII -@cindex STIX indicators -@cindex Threat indicators -@cindex Cyber threat indicators +@node Threat indicators +@section Threat indicators +@cindex IOC +@cindex Indicator of Compromise +@cindex Indicator +@cindex Threat indicator @cindex JSON -We've been experimenting with an open model for describing cyber -threats. STIX is a community-driven effort to standardise a model for cyber -theat information. TAXII defines a set of services for distributing STIX -information. There's some support in @command{Cyberprobe}, but you should -know that this is very prototype at the moment. +Cyberprobe includes a subscriber which apply indicators to events. When +an event matches an indicator, information about the indicator is tagged +into the event. -This is what we've got so far: +Indicator support is present in the @command{cybermon-detector} subscriber +which reads indicators from a JSON file. -@itemize +@heading Indicator files -@item -There's a simple CSV file format we've created to describe cyber -threats. This is just for convenience. +The installation bundle includes a a file containing some +sample indicators, search for @file{indicators.json}, which may be installed +at @file{@value{PREFIX}/share/doc/cyberprobe}. -@cindex @command{stix-create} -@item -A script, @command{stix-create} which reads the above configuration file, -and converts into a STIX document containing Indicator objects. -@cindex @command{taxii-server} -@item -A script, @command{taxii-server} which acts as a very simple TAXII server, -serving up STIX documents. -@cindex @command{taxii-client} -@item -A script, @command{taxii-client} which connects to a TAXII server, gets STIX -documents and dumps some stuff out. -@cindex @command{taxii-sync-json} -@item -A script @command{taxii-sync-json} which connects to a TAXII server, gets -STIX documents, massages the whole lot into a single JSON form, and dumps -that to a file. This is intended to be used with the -@file{cybermon-detector} subscriber. -See @ref{@command{cybermon-detector} invocation}. -@item -A configuration file for @command{cybermon} which reads the JSON threat -information and reports when theats are observed. -@end itemize - -@cindex @code{pyOpenSSL} -@cindex @code{libtaxii} -@cindex @code{stix} -@cindex @code{pip} - -Before taking this any further, you need to have Python installed, along -with various dependencies (@code{pyOpenSSL}, @code{libtaxii} and -@code{stix}). The easiest way to install the dependencies is to install -@code{pip}, and issue this command: - -@example -sudo pip install libtaxii pyOpenSSL stix -@end example - -@heading A STIX document service - -The installation bundle includes a couple of CSV files containing some -fictional cyber theats. Search for @file{example1.txt} and -@file{example2.txt}. They may be in @file{@value{PREFIX}/share/doc/cyberprobe} -once you've installed everything. You need to create a data area, and -convert these files into STIX ready for serving: - -@example -mkdir /tmp/stix -cd /tmp/stix -mkdir -p data/default -stix-create @value{PREFIX}/share/doc/cyberprobe/example1.txt \ - data/default/1 -i ex:1 -stix-create @value{PREFIX}/share/doc/cyberprobe/example2.txt \ - data/default/2 -i ex:2 -@end example - -Check that you have two new XML files in data/default directory. If they're -there, you're ready to start a STIX server. This will run on port 8080, so -you'll need to use a different port number if you don't like this one. It's -important that this is run from the directory where you just created the -data directory. - -@example -taxii-server --port 8080 -@end example - -If that works, use the test client to communicate: - -@example -taxii-client --port 8080 --poll -@end example - -And you should see some stuff that looks like cyber threat information -dumped on the screen. @heading Deploying theat information to @command{cybermon} -@cindex JSON - -Now, we use @command{taxii-sync-json} to fetch the STIX information in a -JSON form I can easily ingest into the LUA code: - -@example -taxii-sync-json --port 8080 -@end example - -This will create a JSON file called @file{stix-default-combined.json}. - -Finally, run processing. Stop any running @command{cybermon} and -@command{cybermon-elasticsearch} processes. Then run @command{cybermon} -to publish to a queue on RabbitMQ: +To run using your existing processing pipeline, stop any running +@command{cybermon} and @command{cybermon-elasticsearch} processes. Then run +@command{cybermon} to publish to a queue on RabbitMQ: @example cybermon -p 10000 -c @value{SYSCONFDIR}/cyberprobe/amqp-topic.lua @end example -Next run @command{cyberprobe-detector} to apply STIX rules. By default, +Next run @command{cyberprobe-detector} to apply indicator rules. By default, this will subscribe to @samp{cyberprobe} and publish to @samp{ioc}: @example -STIX_INDICATORS=stix-default-combined.json cybermon-detector \ +env INDICATORS=/path/to/indicators.json cybermon-detector \ cyberprobe ioc @end example @@ -148,7 +61,7 @@ cybermon-dump ioc | jq --unbuffered .indicators This activity should trigger a theat: @example -wget -q -O- http://www.malware.com/malware.dat +wget -q -O- http://www.malware.org/malware.dat @end example If this works, you should see the following output: @@ -156,10 +69,12 @@ If this works, you should see the following output: @example [ @{ + "description": "URL of a page serving malware", + "category": "malware", + "author": "someone@@example.com", + "source": "id:3245edd9-e0f3-4982-9406-fbf93b874555", "type": "url", - "id": "example1:7", - "value": "http://www.malware.com/malware.dat", - "description": "URL of a page serving malware" + "value": "http://malware.org/malware.dat" @} ] @end example @@ -167,14 +82,11 @@ If this works, you should see the following output: This hits on a number of theat indicators. The hostname www.malware.com is present in a theat indicator, and it is detected in the HTTP request, and both the DNS query and response. Also, the URL -@code{http://www.malware.com/malware.dat} is in a threat indicator and it is +@code{http://www.malware.org/malware.dat} is in a threat indicator and it is detected in both the HTTP request and response. @command{cybermon-detector} updates its state if -the JSON configuration file has changed. So, you can do a round-trip update -by changing the input files, re-running stix-create, using -@command{taxii-sync-json} to fetch the updates, and all without stopping the -monitoring. +the JSON configuration file has changed. If you want to load the output of @command{cybermon-detector} into ElasticSearch, you can, but you need to subscribe to @samp{ioc}: @@ -183,6 +95,12 @@ ElasticSearch, you can, but you need to subscribe to @samp{ioc}: cybermon-elasticsearch ioc @end example +This results in indicator hit information being loaded into ES. + +For more information on indicators, see +@ref{@command{cybermon-detector} invocation} and +@ref{Cyberprobe indicator format}. + @heading Conclusion All done, I hope you enjoyed the tutorial! Any comments on the software, or diff --git a/docs/qs-top.texi b/docs/qs-top.texi index 902bd45..842ca56 100644 --- a/docs/qs-top.texi +++ b/docs/qs-top.texi @@ -12,5 +12,5 @@ * Using @command{cybermon}:: * Writing your own configuration file:: * Visualisation:: -* Threat indicators using STIX:: +* Threat indicators:: @end menu diff --git a/docs/ref-json-event-format.texi b/docs/ref-json-event-format.texi index 56ae479..28343c4 100644 --- a/docs/ref-json-event-format.texi +++ b/docs/ref-json-event-format.texi @@ -986,6 +986,15 @@ IOC hit value. @item description Human-readable text describing the IOC. +@item category +Category tag for the indicator. + +@item source +Indicator source, a URL, could be the URL of a web page or report. + +@item author +Email address of the originator. + @end table @end table diff --git a/indicators/Makefile.am b/indicators/Makefile.am index dc4a095..7d382d8 100644 --- a/indicators/Makefile.am +++ b/indicators/Makefile.am @@ -3,6 +3,8 @@ bin_SCRIPTS = cybermon-detector EXTRA_DIST = cybermon-detector +dist_doc_DATA = indicators.json + cyberprobe_PYTHON = cyberprobe/fsm_extract.py cyberprobe/fsm.py \ cyberprobe/indicators.py cyberprobe/logictree.py cyberprobedir = $(pythondir)/cyberprobe diff --git a/stix/cyberprobe/stix_store.py b/stix/cyberprobe/stix_store.py index 8724b96..d7730cc 100644 --- a/stix/cyberprobe/stix_store.py +++ b/stix/cyberprobe/stix_store.py @@ -2,9 +2,9 @@ import uuid import sqlite3 import threading -from StringIO import StringIO +from io import StringIO import time -from urlparse import urlparse +import urllib from lxml import etree import datetime @@ -31,7 +31,7 @@ def stop(s): s.cond.release() def publish(s, content, collection, url): - u = urlparse(url) + u = urllib.parse(url) c = TaxiiClient(u.hostname, u.port) c.push(collection=collection, content=content) @@ -185,7 +185,7 @@ def subscribe_impl(s, id, query, collection, url): s.senders_lock.acquire() - if not s.subscriptions.has_key(collection): + if not connection in s.subscriptions: s.subscriptions[collection] = {} s.subscriptions[collection][id] = {} @@ -253,7 +253,7 @@ def store(s, content, collections): for collection in collections: - if not s.subscriptions.has_key(collection): continue + if not collection in s.subscriptions: continue for subs in s.subscriptions[collection]: diff --git a/stix/cyberprobe/taxii_client.py b/stix/cyberprobe/taxii_client.py index 57f90ad..85c390c 100644 --- a/stix/cyberprobe/taxii_client.py +++ b/stix/cyberprobe/taxii_client.py @@ -2,7 +2,7 @@ import sys import argparse import dateutil.parser -import StringIO +from io import StringIO import datetime from stix.core import STIXPackage, STIXHeader @@ -107,7 +107,7 @@ def poll(s, path="/", collection="default", query=None, content = cb.content # Parse the payload, should be a STIX document. - package = STIXPackage.from_xml(StringIO.StringIO(content)) + package = STIXPackage.from_xml(StringIO(content.decode('utf-8'))) pkgs.append(package) diff --git a/stix/cyberprobe/taxii_server.py b/stix/cyberprobe/taxii_server.py index c710b1d..04124fe 100644 --- a/stix/cyberprobe/taxii_server.py +++ b/stix/cyberprobe/taxii_server.py @@ -1,5 +1,5 @@ -import BaseHTTPServer +import http.server import time import argparse @@ -13,7 +13,7 @@ ############################################################################ # TAXII request handler ############################################################################ -class TAXIIHandler(BaseHTTPServer.BaseHTTPRequestHandler): +class TAXIIHandler(http.server.BaseHTTPRequestHandler): def get_matching(s, collection, begin, end, query, handle): return None @@ -205,20 +205,21 @@ def do_POST(s): ############################################################################ # TAXII Server ############################################################################ -class TAXIIServer(BaseHTTPServer.HTTPServer): +class TAXIIServer(http.server.HTTPServer): - def __init__(s, host, port, handler): - s.host = host - s.port = port - BaseHTTPServer.HTTPServer.__init__(s, (host, port), handler) + def __init__(self, host, port, handler): + self.host = host + self.port = port + http.server.HTTPServer.__init__(self, (host, port), handler) - def run(s): - print(time.asctime(), "Server Starts - %s:%d" % (s.host, s.port)) + def run(self): + print(time.asctime(), "Server Starts - %s:%d" % (self.host, self.port)) # Serve indefinitely. try: - s.serve_forever() + self.serve_forever() except KeyboardInterrupt: - s.server_close() - print(time.asctime(), "Server Stops - %s:%d" % (s.host, s.port)) + self.server_close() + print(time.asctime(), "Server Stops - %s:%d" % + (self.host, self.port)) diff --git a/stix/stix-create b/stix/stix-create index 93d4c97..22e1f54 100755 --- a/stix/stix-create +++ b/stix/stix-create @@ -187,7 +187,7 @@ for row in reader: # Parsing complete, write object to output file f = open(output_file, 'w') f.write('') -f.write(package.to_xml()) +f.write(package.to_xml().decode('utf-8')) f.close() # All done, exit diff --git a/stix/taxii-sync b/stix/taxii-sync index 58a0e36..b2fc0de 100755 --- a/stix/taxii-sync +++ b/stix/taxii-sync @@ -13,7 +13,7 @@ from cybox.objects.file_object import File from cybox.objects.port_object import Port from cybox.objects.uri_object import URI from cybox.objects.user_account_object import UserAccount -import StringIO +from io import StringIO import datetime ############################################################################ @@ -95,16 +95,16 @@ for cb in resp.content_blocks: content = cb.content # Hack an XML header on the top?! and add the payload body. - resp = "\n" + content + resp = "\n" + content.decode('utf-8') # Parse the payload, should be a STIX document. - package = STIXPackage.from_xml(StringIO.StringIO(resp)) + package = STIXPackage.from_xml(StringIO(resp)) # Dump package ID if package.id_: print("***** Package id: %s" % package.id_) fname = 'stix-%s-%s.xml' % (args.collection, package.id_) f = open(fname, 'w') - f.write(package.to_xml()) + f.write(package.to_xml().decode('utf-8')) f.close() diff --git a/stix/taxii-sync-json b/stix/taxii-sync-json index 4555576..5af17f6 100755 --- a/stix/taxii-sync-json +++ b/stix/taxii-sync-json @@ -13,7 +13,7 @@ from cybox.objects.file_object import File from cybox.objects.port_object import Port from cybox.objects.uri_object import URI from cybox.objects.user_account_object import UserAccount -import StringIO +from io import StringIO import datetime # Timezone class, representing GMT. @@ -119,10 +119,10 @@ for cb in resp.content_blocks: content = cb.content # Hack an XML header on the top?! and add the payload body. - resp = "\n" + content + resp = "\n" + content.decode('utf-8') # Parse the payload, should be a STIX document. - package = STIXPackage.from_xml(StringIO.StringIO(resp)) + package = STIXPackage.from_xml(StringIO(resp)) for ind in package.indicators: combined.add_indicator(ind) diff --git a/subscribers/cybermon-dump b/subscribers/cybermon-dump index 79bafcc..b7777a4 100755 --- a/subscribers/cybermon-dump +++ b/subscribers/cybermon-dump @@ -10,7 +10,7 @@ else: binding = sys.argv[1] def handle(msg, output): - print(msg) + print(msg.decode("utf-8")) try: q.subscribe(binding, handle)