Source code for firepit.woodchipper

import base64
import csv
import datetime
import json
import ntpath
import re
import socket
import sys
import uuid
import zipfile
from collections import OrderedDict
from ipaddress import ip_address

import dateutil.parser

from firepit.props import primary_prop
from firepit.props import ref_type
from firepit.timestamp import KNOWN_TIMESTAMPS
from firepit.timestamp import timefmt


## Code for generating STIX from intermediate format


INTEGER_PROPS = {
    # autonomous-system
    'number',

    # file (and others)
    'size',

    # network-traffic
    'src_port',
    'dst_port',
    'src_byte_count',
    'dst_byte_count',
    'src_packets',
    'dst_packets',

    # process
    'pid',
}


REG_HIVE_MAP = {
    'HKLM': 'HKEY_LOCAL_MACHINE',
    'HKCU': 'HKEY_CURRENT_USER',
    'HKCR': 'HKEY_CLASSES_ROOT',
    'HKCC': 'HKEY_CURRENT_CONFIG',
    'HKPD': 'HKEY_PERFORMANCE_DATA',
    'HKU':  'HKEY_USERS',
    'HKDD': 'HKEY_DYN_DATA',
}


[docs]def guess_ref_type(sco_type, prop, val): """Get data type for `sco_type`:`prop` reference""" rtypes = ref_type(sco_type, prop) # FIXME: need to parse_prop first rtype = rtypes[0] if len(rtypes) > 0 else None # FIXME if rtype == 'ipv4-addr' and ':' in val: rtype = 'ipv6-addr' if rtype is None: # just guess based on value if re.match(r'([0-9a-fA-F]{2}:){5}[0-9a-fA-F]{2}', val): rtype = 'mac-addr' elif re.match(r'([0-9]{1,3}\.){3}[0-9]{1,3}', val): rtype = 'ipv4-addr' return rtype
[docs]def recreate_dict(obj, prop, rest, val): thing = obj.get(prop, {}) first, _, rest = rest.partition('.') if not rest: thing[first.strip("'")] = val else: recreate_dict(thing, first, rest, val) obj[prop.strip("'")] = thing
[docs]def format_val(sco_type, prop, val): if prop in KNOWN_TIMESTAMPS: ts = dateutil.parser.parse(val) result = timefmt(ts) elif prop in INTEGER_PROPS: try: result = int(val) except ValueError: result = int(val, 16) elif prop == 'protocols': # HACKY result = [val] if not isinstance(val, list) else val elif prop == 'key': result = val for abbrev, full in REG_HIVE_MAP.items(): if val.startswith(abbrev): result = val.replace(abbrev, full, 1) elif sco_type == 'ipv4-addr' and prop == 'value': # DNS QueryResults have ; at the end of addr? result = val.strip(';') # TODO: need to check for multiple addrs? else: result = val return result
[docs]def set_obs_prop(observable, path, val, scos, key): prop, _, rest = path.partition('.') if prop.endswith(']'): #FIXME: not always a ref! ref_name, _, idx = prop.rstrip(']').partition('[') ref_type = guess_ref_type(observable['type'], ref_name, val) if not ref_type: pass #TODO ref_key = key + prop other = scos.get(ref_key, {'type': ref_type}) if '.' in rest: set_obs_prop(other, rest, val, scos, ref_key + '.') else: other[rest] = format_val(ref_type, rest, val) scos[ref_key] = other if ref_name in observable: refs = observable[ref_name] if ref_key not in refs: refs.append(ref_key) else: refs = [None for i in range(int(idx) + 1)] refs[int(idx)] = ref_key observable[ref_name] = refs elif prop.endswith('_ref') or prop.endswith('_refs'): ref_type = guess_ref_type(observable['type'], prop, val) ref_key = key + prop other = scos.get(ref_key, {'type': ref_type}) if '.' in rest: set_obs_prop(other, rest, val, scos, ref_key + '.') else: other[rest] = format_val(ref_type, rest, val) scos[ref_key] = other observable[prop] = ref_key elif not rest: observable[prop] = format_val(observable['type'], prop, val) elif '_refs[*].' in rest: # TODO # Trying to deal with e.g. extensions.'dns-ext'.resolved_ip_refs[*].value pass elif '_ref.' in rest: # I don't think this is working yet # Trying to deal with e.g. extensions.'dns-ext'.question.name_ref.value thing = observable.get(prop, {}) if key.endswith(':') or key.endswith('.'): ref_key = key + prop + '.' else: ref_key = key + prop set_obs_prop(thing, rest, val, scos, ref_key) observable[prop.strip("'")] = thing else: recreate_dict(observable, prop, rest, val)
[docs]def fixup_hashes(hashes: dict): result = [] for key, val in hashes.items(): key = key.replace('SHA', 'SHA-') if '-' in key: key = f"'{key}'" result.append(('process:binary_ref.hashes.' + key, val)) return result
def _translate_refs(obj, mapping): combos = {} for prop, val in obj.items(): if isinstance(val, dict): combos.update(_translate_refs(val, mapping)) if prop.endswith('_ref') or prop.endswith('_refs'): if isinstance(val, list): obj[prop] = [mapping[v] for v in val] else: obj[prop] = mapping[val] elif prop.endswith(']'): stub, _, _ = prop.partition('[') if stub not in combos: combos[stub] = [] combos[stub].append(mapping[val]) return combos
[docs]def dict2observation(creator, row): now = timefmt(datetime.datetime.utcnow()) od = OrderedDict( { 'type': 'observed-data', 'id': 'observed-data--' + str(uuid.uuid4()), 'created_by_ref': creator['id'], 'created': now, 'modified': now, 'number_observed': 1, } ) scos = {} # TODO: need a graph/tree instead? for key, val in row.items(): if not val: continue if '#' in key: key, _, sco_name = key.partition('#') else: sco_name = None if ':' not in key: # Not STIX object path -> property of observed-data if '.' not in key: if key in KNOWN_TIMESTAMPS: ts = dateutil.parser.parse(val) od[key] = timefmt(ts) else: od[key] = val else: prop, _, rest = key.partition('.') recreate_dict(od, prop, rest, val) else: sco_type, _, rest = key.partition(':') sco_key = sco_name or sco_type observable = scos.get(sco_key, {'type': sco_type}) if rest.endswith('_ref') and ':' in val: # Special case for referencing another value observable[rest] = val else: set_obs_prop(observable, rest, val, scos, sco_type + ':') scos[sco_key] = observable od['objects'] = {} # Create a mapping from ref_key to index num mapping = {} for key, obj in scos.items(): idx = len(od['objects']) od['objects'][str(idx)] = obj mapping[key] = str(idx) # Translate references repls = {} for key, obj in od['objects'].items(): combos = _translate_refs(obj, mapping) # Combine references for k, v in combos.items(): obj[k] = v new_obj = {k: v for k, v in obj.items() if not k.endswith(']')} repls[key] = new_obj for orig, repl in repls.items(): od['objects'][orig] = repl # Walk objects and fix up x-oca-event if present refs = {} for idx, sco in od['objects'].items(): sco_type = sco['type'] if sco_type == 'network-traffic': refs[sco_type] = idx elif sco_type == 'process': #'opened_connection_refs' in sco: if 'process' not in refs: refs[sco_type] = idx if 'parent_ref' in sco: refs[sco_type] = idx refs['parent_process'] = sco['parent_ref'] elif sco_type == 'domain-name': refs[sco_type] = idx elif sco_type == 'file': refs[sco_type] = idx elif sco_type == 'x-oca-event': refs[sco_type] = idx elif sco_type == 'x-oca-asset': refs[sco_type] = idx if 'x-oca-event' in refs: event = od['objects'][refs['x-oca-event']] for sco_type, idx in refs.items(): if sco_type == 'network-traffic': event['network_ref'] = idx elif sco_type in ['process', 'parent_process']: event[sco_type + '_ref'] = idx elif sco_type == 'file' and is_file_event(event['code']): event[sco_type + '_ref'] = idx elif sco_type == 'domain-name': event['domain_ref'] = idx elif sco_type == 'x-oca-asset': event['host_ref'] = idx return od
## End of STIX generation code ## Code for creating intermediate format
[docs]def from_unix_time(ts): if isinstance(ts, str): ts = float(ts) ts = datetime.datetime.fromtimestamp(ts).isoformat().replace('+00:00', 'Z') return [('first_observed', ts), ('last_observed', ts)]
[docs]def to_action_code(event_id): '''Convert windows event ID to x-oca-event action and code''' event_id = int(event_id) return [ ('x-oca-event:code', event_id), ('x-oca-event:action', windows_events.get(event_id)), ]
[docs]def to_cat_list(category): value = category if isinstance(category, list) else [category] return [('x-oca-event:category', value)]
[docs]def to_payload_bin(value): return [ ('artifact:payload_bin', base64.b64encode(value.encode()).decode('ascii')) ]
PROTO_TABLE = {num:name[8:] for name, num in vars(socket).items() if name.startswith("IPPROTO")}
[docs]def to_protocol(value): if value.isdigit(): try: value = PROTO_TABLE[int(value)].lower() except KeyError: pass return [ ("process:opened_connection_refs[0].protocols", value) ]
[docs]def is_file_event(event_id): return event_id in {6, 7, 9, 11, 15}
[docs]def split_hash(hash_string: str, prefix: str, tag: str = ''): token_dict = { "SHA1=": f"{prefix}hashes.'SHA-1'{tag}", "MD5=": f"{prefix}hashes.MD5{tag}", "SHA256=": f"{prefix}hashes.'SHA-256'{tag}" } hashes = [] for hstr in hash_string.split(','): for hash_token, _stix_key in token_dict.items(): if hash_token in hstr: hashes += [(token_dict[hash_token], hstr[len(hash_token):])] return hashes
[docs]def split_image_hash(hash_string: str): return split_hash(hash_string, 'process:binary_ref.')
[docs]def split_file_hash(hash_string: str): return split_hash(hash_string, 'file:')
[docs]def split_loaded_hash(hash_string: str): return split_hash(hash_string, 'file:', '#loaded')
[docs]def split_image(abs_name: str, prefix='process:'): name = ntpath.basename(abs_name) path = ntpath.dirname(abs_name) return [ (prefix + 'name', name), (prefix + 'binary_ref.name', name), (prefix + 'binary_ref.parent_directory_ref.path', path) ]
[docs]def split_parent_image(abs_name: str): return split_image(abs_name, prefix='process:parent_ref.')
[docs]def split_image_loaded(abs_name: str): name = ntpath.basename(abs_name) path = ntpath.dirname(abs_name) return [ ('file:name#loaded', name), ('file:parent_directory_ref.path#loaded', path) ]
[docs]def split_file_path(abs_name: str, prefix='file:'): name = ntpath.basename(abs_name) path = ntpath.dirname(abs_name) return [ (prefix + 'name', name), (prefix + 'parent_directory_ref.path', path) ]
[docs]def split_reg_key_value(path: str): key, _, value = path.rpartition('\\') return [ ('windows-registry-key:key', key), ('windows-registry-key:values', [{'name': value}]), ]
# Do we need this? Or can we extract it from the Message field? windows_events = { 1: 'Process Creation', 2: 'Process Changed a file creation time', 3: 'Network Connection', 4: 'Sysmon Service State Change', 5: 'Process Terminated', 6: 'Driver Loaded', 7: 'Image Loaded', 8: 'Create Remote Thread', 9: 'Raw File Access Read', 10: 'Process Access', 11: 'File Create', 12: 'Registry Create and Delete', 13: 'Registry Value Set', 14: 'Registry Key and Value Rename', 15: 'File Create Stream Hash', 16: 'Sysmon Config Change', 17: 'Pipe Event Created', 18: 'Pipe Event Connected', 19: 'WMI EventFilter activity', 20: 'WMI EventConsumer activity', 21: 'WMI EventConsumerToFilter activity', 22: 'DNS Query', 255: 'Sysmon error', } # Specialized mappings per Windows EventID # We should probably moved the shared ones out of here. # Only need this for properties whose meaning depends on ID. windows_mapping = { 1: { "UtcTime": ["first_observed", "last_observed", "process:created"], "Image": split_image, "ProcessId": "process:pid", "ProcessGuid": "process:x_unique_id", "CommandLine": ["process:command_line"], "ParentImage": split_parent_image, "ParentProcessId": "process:parent_ref.pid", "ParentProcessGuid": "process:parent_ref.x_unique_id", "ParentCommandLine": "process:parent_ref.command_line", #"UserID": "process:creator_user_ref.user_id", #"User": "process:creator_user_ref.account_login", "User": "process:creator_user_ref.user_id", "Hashes": split_image_hash, }, 3: { "UtcTime": ["first_observed", "last_observed"], "Image": split_image, "ProcessId": "process:pid", "ProcessGuid": "process:x_unique_id", "SourceIp": "process:opened_connection_refs[0].src_ref.value", "DestinationIp": "process:opened_connection_refs[0].dst_ref.value", "Protocol": "process:opened_connection_refs[0].protocols", "SourcePort": "process:opened_connection_refs[0].src_port", "DestinationPort": "process:opened_connection_refs[0].dst_port", }, 5: { "UtcTime": ["first_observed", "last_observed"], "Image": split_image, "ProcessId": "process:pid", "ProcessGuid": "process:x_unique_id", }, 7: { "UtcTime": ["first_observed", "last_observed"], "Image": split_image, "ImageLoaded": split_image_loaded, "ProcessId": "process:pid", "ProcessGuid": "process:x_unique_id", "CommandLine": ["process:command_line"], "Hashes": split_loaded_hash, }, 11: { "UtcTime": ["first_observed", "last_observed"], "Image": split_image, "ProcessId": "process:pid", "ProcessGuid": "process:x_unique_id", "TargetFilename": split_file_path, }, 12: { "UtcTime": ["first_observed", "last_observed"], "Image": split_image, "ProcessId": "process:pid", "ProcessGuid": "process:x_unique_id", "TargetObject": "windows-registry-key:key", # OR: "process:x_created_key_ref.key"? }, 13: { "UtcTime": ["first_observed", "last_observed"], "Image": split_image, "ProcessId": "process:pid", "ProcessGuid": "process:x_unique_id", "TargetObject": split_reg_key_value, }, 3018: { "QueryName": "domain-name:value", #"QueryType": "domain-name:resolves_to_refs[0].type", "QueryResults": "domain-name:resolves_to_refs[0].value", }, 4688: { "NewProcessName": split_image, "NewProcessId": "process:pid", "CommandLine": ["process:command_line"], "ParentProcessName": split_parent_image, "ParentProcessGuid": "process:parent_ref.x_unique_id", "ProcessId": "process:parent_ref.pid", "ProcessGuid": "process:x_unique_id", #"SubjectUserName": "process:creator_user_ref.account_login", "SubjectUserName": "process:creator_user_ref.user_id", }, 5156: { "Application": split_image, "TimeCreated": ["first_observed", "last_observed"], "ProcessId": "process:pid", "SourceAddress": "process:opened_connection_refs[0].src_ref.value", "SourcePort": "process:opened_connection_refs[0].src_port", "DestAddress": "process:opened_connection_refs[0].dst_ref.value", "DestPort": "process:opened_connection_refs[0].dst_port", "Protocol": to_protocol, }, }
[docs]def merge_mappings(common, specific, key=None): '''Merge common mapping into specific[key] mapping''' if key: return {k: {j: {**u, **common} if j == key else u for j, u in v.items()} for k, v in specific.items()} return {**common, **specific}
[docs]def process_mapping(event, mapping): tuples = [] for map_key, map_val in mapping.items(): if isinstance(map_val, dict): for inner_key, inner_val in map_val.items(): tuples += process_mapping(event[inner_key], inner_val) elif isinstance(map_val, list): event_val = event.get(map_key) if event_val is not None: for inner_key in map_val: tuples += [(inner_key, event_val)] elif isinstance(map_val, str): event_val = event.get(map_key) if event_val is not None: tuples += [(map_val, event_val)] elif callable(map_val): event_val = event.get(map_key) if event_val is not None: tuples += map_val(event_val) return tuples
[docs]def process_event(event, mapping, event_id=None): if event_id: # If we have a Windows event, merge the mappings event_mapping = windows_mapping.get(event_id) if event_mapping: mapping = merge_mappings(mapping, event_mapping) return dict(process_mapping(event, mapping))
## End of code for creating intermediate format
[docs]class Mapper:
[docs] def detect(self, event): raise NotImplementedError
[docs] def convert(self, event): raise NotImplementedError
## Datasource specific code # Security Datasets - https://github.com/OTRF/Security-Datasets
[docs]class SdsMapper(Mapper):
[docs] @staticmethod def enhanced_action(message): results = to_payload_bin(message) m = re.search(r'^([^:\.]*)', message) if m: results.append(('x-oca-event:action', m.group(1))) m = re.search(r'EventType: (\w+)', message) if m: event_type = m.group(1) event_id = SdsMapper.event_types.get(event_type) if event_id: results.append(('x-oca-event:action', windows_events.get(event_id) + ' - ' + event_type)) m = re.search(r'Details: ([^"]*)', message) if m: details = m.group(1) if details.startswith('DWORD') or details.startswith('QWORD'): parts = details.split() results.append(('windows-registry-key:values', [{'data': parts[1], 'data_type': parts[0]}])) return results
# TODO: Are these common to all Windows event sources? common_mapping = { "@timestamp": ["first_observed", "last_observed"], "TimeCreated": ["first_observed", "last_observed"], "Channel": "x-oca-event:module", "SourceName": "x-oca-event:provider", "Hostname": "x-oca-asset:hostname", "EventID": to_action_code, "Category": to_cat_list, # "x-oca-event:category" is defined to be a list "Message": lambda x: SdsMapper.enhanced_action(x), #"Message": to_payload_bin, "ProcessName": split_image, # At least some events use this instead of Image "ProcessId": "process:pid", "ProcessGuid": "process:x_unique_id", "Application": split_image, # At least some events use this instead of Image } # Mapping of EventType message field to event ID event_types = { 'SetValue': 13, 'DeleteValue': 12, 'CreateKey': 12, 'DeleteKey': 12, 'CreatePipe': 17, 'ConnectPipe': 18, }
[docs] def detect(self, event): tags = event.get('tags') return ((tags is not None and 'mordorDataset' in tags) or ('EventID' in event and 'TimeCreated' in event)) # FIXME: too generic?
[docs] def convert(self, event): event_id = event['EventID'] result = process_event(event, self.common_mapping, event_id) #if 'user-account:account_login' not in result: if 'user-account:user_id' not in result: username = event.get('TargetUserName') if not username: username = event.get('SubjectUserName') if username and username != '-': #result['user-account:account_login'] = username result['user-account:user_id'] = username return result
# Zeek logs # The problem here is that zeek logs span multiple files; this only covers conn OR dns. # TODO: figure out how to merge the different Zeek logs first, then process.
[docs]class ZeekCsvMapper(Mapper): zeek_mapping = { # FIXME: this is only conn log "ts": from_unix_time, "id.orig_h": "network-traffic:src_ref.value", "id.orig_p": "network-traffic:src_port", "orig_ip_bytes": "network-traffic:src_byte_count", "orig_pkts": "network-traffic:src_packets", "id.resp_h": "network-traffic:dst_ref.value", "id.resp_p": "network-traffic:dst_port", "resp_ip_bytes": "network-traffic:dst_byte_count", "resp_pkts": "network-traffic:dst_packets", "proto": "network-traffic:protocols", }
[docs] def detect(self, event): return 'id.orig_h' in event
[docs] def convert(self, event): return dict(process_event(event, self.zeek_mapping))
[docs]class ZeekJsonMapper(Mapper): common_mapping = { #"@system": "x-oca-asset:hostname", "ts": from_unix_time, "id_orig_h": "network-traffic:src_ref.value", "id_orig_p": "network-traffic:src_port", "id_resp_h": "network-traffic:dst_ref.value", "id_resp_p": "network-traffic:dst_port", "proto": "network-traffic:protocols", } zeek_mapping = { 'conn': { "orig_ip_bytes": "network-traffic:src_byte_count", "resp_ip_bytes": "network-traffic:dst_byte_count", "orig_pkts": "network-traffic:src_packets", "resp_pkts": "network-traffic:dst_packets", "orig_l2_addr": "network-traffic:src_ref.resolves_to_refs[0].value", "resp_l2_addr": "network-traffic:dst_ref.resolves_to_refs[0].value", }, 'dns': { #'query': "network-traffic:extensions.'dns-ext'.question.name_ref.value", 'query': 'domain-name:value', 'answers': lambda x: ZeekJsonMapper.process_answers(x), } }
[docs] @staticmethod def process_answers(answers): results = [] i = 0 for answer in answers: try: _ = ip_address(answer) #results.append(("network-traffic:extensions.'dns-ext'.resolved_ip_refs[*].value", answer)) results.append((f"domain-name:resolves_to_refs[{i}].value", answer)) i += 1 except ValueError: pass return results
[docs] def detect(self, event): return '@stream' in event
[docs] def convert(self, event): stream = event['@stream'] if stream in self.zeek_mapping: mapping = merge_mappings(self.common_mapping, self.zeek_mapping[stream]) else: mapping = self.common_mapping return dict(process_event(event, mapping))
# ISC Honeypot: e.g. https://isc.sans.edu/api/#webhoneypotreportsbyurl
[docs]class IscHoneypotJsonMapper(Mapper): mapping = { "url": "url:value", "user_agent": "network-traffic:extensions.'http-request-ext'.request_header.'User-Agent'", "source": "network-traffic:src_ref.value", "ts": ["first_observed", "last_observed"], "sport": "network-traffic:src_port", "dport": "network-traffic:dst_port", "dest": "network-traffic:dst_ref.value", "proto": "network-traffic:protocols", }
[docs] def detect(self, event): return 'url' in event and 'user_agent' in event and 'source' in event
[docs] def convert(self, event): # ISC Honeypot doesn't have ports or dest addr, so make them up event['sport'] = 0 event['dport'] = 80 event['dest'] = '127.0.0.1' event['proto'] = 'tcp' event['ts'] = event['date'] + 'T' + event['time'] + '.000Z' return dict(process_event(event, self.mapping))
# Generic "flat" JSON mapper
[docs]class FlatJsonMapper(Mapper):
[docs] def detect(self, event): otype = event.get('type') if otype: return primary_prop(otype) in event return False
[docs] def convert(self, event): result = {} otype = event.get('type') timestamp_key = None if otype: for key, value in event.items(): if key in ['first_observed', 'last_observed', 'number_observed']: new_key = key else: new_key = f'{otype}:{key}' if key in KNOWN_TIMESTAMPS: timestamp_key = key result[new_key] = value if timestamp_key and 'first_observed' not in result: ts = event[timestamp_key] result['first_observed'] = ts result['last_observed'] = ts return result return None
# TODO: "register" each data type # File format code
[docs]def process_events(events, mappers, ident): mapper = None results = [] for event in events: if not isinstance(event, dict): continue if not mapper: # Detect data type for m in mappers: if m.detect(event): mapper = m break if mapper: od = mapper.convert(event) if od: results.append(dict2observation(ident, od)) return results
[docs]def read_csv(fp, mappers, ident): # Currently this knows about Bro/Zeek CSV format # Ideally this would be agnostic to the CSV producer quoting = csv.QUOTE_NONE sep = '\t' linenum = 0 for line in fp: line = line.rstrip('\n') if line.startswith('#separator'): _, _, sep = line.partition(' ').decode('unicode_escape') elif line.startswith('#fields'): names = line[1:].split(sep)[1:] elif line.startswith('#types'): break elif not line.startswith('#') and linenum == 0: if sep not in line: # If not tab, assume comma. sep = ',' # Determine fieldnames from header # Also try to infer quoting style names = [] quoting = csv.QUOTE_NONNUMERIC for name in line.split(sep): if name.startswith('"'): name = name.strip('"') if name.isdigit(): quoting = csv.QUOTE_ALL break names.append(name) break reader = csv.DictReader(fp, delimiter=sep, fieldnames=names, quoting=quoting) events = [] for obj in reader: if obj.get('ts') == '#close': # Weird Zeek thing break events.append(obj) return process_events(events, mappers, ident)
[docs]def read_json(fp, mappers, ident): try: data = json.load(fp) except: fp.seek(0) data = (json.loads(line) for line in fp) return process_events(data, mappers, ident)
[docs]def read_log(fp, mappers, ident): try: data = (json.loads(line) for line in fp) result = process_events(data, mappers, ident) except json.decoder.JSONDecodeError: result = read_csv(fp, mappers, ident) return result
[docs]def detect_filetype(input_file): if input_file.endswith('.csv'): read_func = read_csv elif input_file.endswith('.json'): read_func = read_json elif input_file.endswith('.log'): read_func = read_log else: raise NotImplementedError return read_func
[docs]def convert_to_stix(input_file): now = timefmt(datetime.datetime.utcnow()) id1 = OrderedDict({ "type": "identity", "identity_class": "program", "name": "woodchipper", # TODO: pass this in as arg "id": "identity--" + str(uuid.uuid4()), "created": now, "modified": now, }) mappers = [ SdsMapper(), ZeekJsonMapper(), ZeekCsvMapper(), IscHoneypotJsonMapper(), FlatJsonMapper(), ] # TODO: STIX 2.1 bundle = { 'type': 'bundle', 'id': 'bundle--' + str(uuid.uuid4()), 'spec_version': '2.0', # TODO: If 2.1, omit this property 'objects': [] } objects = [id1] try: if input_file.endswith('.zip'): zf = zipfile.ZipFile(input_file) for filename in zf.namelist(): try: read_func = detect_filetype(filename) input_file = filename break except NotImplementedError: pass fp = zf.open(input_file, 'r') else: read_func = detect_filetype(input_file) fp = open(input_file, 'r') objects += read_func(fp, mappers, id1) except Exception as e: fp.close() raise e bundle['objects'] = objects return bundle
[docs]def convert(input_file, output_file=None): bundle = convert_to_stix(input_file) if output_file: with open(output_file, 'w') as fp: json.dump(bundle, fp, indent=4, ensure_ascii=False) else: print(json.dumps(bundle, indent=4, ensure_ascii=False))
if __name__ == '__main__': convert(sys.argv[1])