File carving


I’m quite new to bro, so please be kind :slight_smile:

I’m using this script:


module HTTP;

export {


redef enum Notice::Type += {

File Magic Bytes to look for

const file_magic_bytes = /^\x50\x4B\x03\x04\x14\x00\x08\x00\x08\x00/ &redef;

MIME types to look for

const extract_mime_types = /application/java-archive/ &redef;
redef HTTP::extract_mime_types += /application/x-java-archive/;

redef HTTP::extract_mime_types += /application/x-msdownload/;

redef HTTP::extract_mime_types += /application/exe/;

redef HTTP::extract_mime_types += /application/x-exe/;

redef HTTP::extract_mime_types += /application/dos-exe/;

redef HTTP::extract_mime_types += /application/x-winexe/;

redef HTTP::extract_mime_types += /application/msdos-windows/;

redef HTTP::extract_mime_types += /application/x-msdos-program/;

File extensions to look for

const extract_file_extensions = /.[jJ][aA][rR]$/ &redef;
redef HTTP::extract_file_extensions += /.[sS][cC][rR]$/;

Size constraints of file to extract (TODO)

const minimum_size = 10240 &redef; # 10K
const maximum_size = 8388608 &redef; # 8MB
const ufile = open(“malware_urls.txt”);

URL patterns to whitelist

const whitelist_url_patterns = /^http://[^/] &redef;
redef HTTP::whitelist_url_patterns += /^http://[^/];
redef HTTP::whitelist_url_patterns += /^http://[^/]*;
const hd: table[count] of mime_header_rec ={};
const chk = F;

#type mime_header_list: table[count] of mime_header_rec;

Information to track throughout session

redef record Info += {
extraction_prefix: string &optional;
extraction_file: file &log &optional;
extract_file: bool &default=F;
extracted_size: count &default=0;

event http_all_headers (c: connection, is_orig: bool, hlist: mime_header_list) &priority=-1
hd = hlist;
event http_entity_data(c: connection, is_orig: bool, length: count, data: string) &priority=-5

Ignore client communication

if ( is_orig )

If in first chunk of data

if ( c$http$first_chunk )

Get the URL for whitelisting and extension matching

local url = build_url_http(c$http);

Check for file magic byte matches

if ( HTTP::file_magic_bytes in data )
c$http$extraction_prefix = “magic-match”;
c$http$extract_file = T;

#Check for MIME type matches
else if ( HTTP::extract_mime_types in c$http$mime_type )
c$http$extraction_prefix = “mime-match”;
c$http$extract_file = T;


Check for file extension matches

else if ( HTTP::extract_file_extensions in url ) {
c$http$extraction_prefix = “extension-match”;
c$http$extract_file = T;


Content Disposition HTTP Header String - TODO?

If a magic byte, MIME, or Ext match…

if ( c$http$extract_file )

Check against whitelist

if ( HTTP::whitelist_url_patterns in url )
c$http$extract_file = F;

Open file to capture data

local suffix = fmt("%s_%d.jar", is_orig ? “orig” : “resp”, c$http_state$current_response);
local fname = generate_extraction_filename(c$http$extraction_prefix, c, suffix);
c$http$extraction_file = open(fname);
c$http$extracted_size = 0;
local output=fmt("%s,%s", c$id$resp_h,c$http$uri);
#local reshdr= fmt("%s %s %s", c$http$status_code, c$http$status_msg, c$http$info_msg);

print ufile,"########################";
print ufile, output;
print ufile,c$http$status_code,c$http$status_msg;
for (j in hd)

print ufile, hd[j];

print ufile,"########################";
local message = fmt(“Storing %s to %s”, url, c$http$extraction_file);
local method = “UNKNOWN”;
if ( c$http?$method )
method = c$http$method;

} # End first chunk if

Do the capture when a capture file is open

if ( c$http?$extraction_file )

print c$http$extraction_file, data;
c$http$extracted_size += length;

} # End HTTP entity data

event http_end_entity(c: connection, is_orig: bool)

if ( c$http?$extraction_file )



There is some problem with doing file extraction in 2.1 that pops up from time to time. I don't think there is anyone that is totally clear what the problem is and we've completely revamped file handling for 2.2. The video where I discuss file handling in the upcoming 2.2 release was just released today and there are some exercises available too.

