File carving

Hello,

I’m quite new to bro, so please be kind :slight_smile:

I’m using this script:

#######################################!

module HTTP;

export {

NOTICE Type

redef enum Notice::Type += {
Exe_File_Capture,
};

File Magic Bytes to look for

const file_magic_bytes = /^\x50\x4B\x03\x04\x14\x00\x08\x00\x08\x00/ &redef;

MIME types to look for

const extract_mime_types = /application/java-archive/ &redef;
redef HTTP::extract_mime_types += /application/x-java-archive/;

redef HTTP::extract_mime_types += /application/x-msdownload/;

redef HTTP::extract_mime_types += /application/exe/;

redef HTTP::extract_mime_types += /application/x-exe/;

redef HTTP::extract_mime_types += /application/dos-exe/;

redef HTTP::extract_mime_types += /application/x-winexe/;

redef HTTP::extract_mime_types += /application/msdos-windows/;

redef HTTP::extract_mime_types += /application/x-msdos-program/;

File extensions to look for

const extract_file_extensions = /.[jJ][aA][rR]$/ &redef;
redef HTTP::extract_file_extensions += /.[sS][cC][rR]$/;

Size constraints of file to extract (TODO)

const minimum_size = 10240 &redef; # 10K
const maximum_size = 8388608 &redef; # 8MB
const ufile = open(“malware_urls.txt”);

URL patterns to whitelist

const whitelist_url_patterns = /^http://[^/].windowsupdate.com// &redef;
redef HTTP::whitelist_url_patterns += /^http://[^/]
.microsoft.com//;
redef HTTP::whitelist_url_patterns += /^http://[^/]*.google.com//;
const hd: table[count] of mime_header_rec ={};
const chk = F;

#type mime_header_list: table[count] of mime_header_rec;

Information to track throughout session

redef record Info += {
extraction_prefix: string &optional;
extraction_file: file &log &optional;
extract_file: bool &default=F;
extracted_size: count &default=0;
};
}

event http_all_headers (c: connection, is_orig: bool, hlist: mime_header_list) &priority=-1
{
hd = hlist;
}
event http_entity_data(c: connection, is_orig: bool, length: count, data: string) &priority=-5
{

Ignore client communication

if ( is_orig )
return;

If in first chunk of data

if ( c$http$first_chunk )
{

Get the URL for whitelisting and extension matching

local url = build_url_http(c$http);

Check for file magic byte matches

if ( HTTP::file_magic_bytes in data )
{
c$http$extraction_prefix = “magic-match”;
c$http$extract_file = T;

}
#Check for MIME type matches
else if ( HTTP::extract_mime_types in c$http$mime_type )
{
c$http$extraction_prefix = “mime-match”;
c$http$extract_file = T;

}

Check for file extension matches

else if ( HTTP::extract_file_extensions in url ) {
c$http$extraction_prefix = “extension-match”;
c$http$extract_file = T;

}

Content Disposition HTTP Header String - TODO?

If a magic byte, MIME, or Ext match…

if ( c$http$extract_file )
{

Check against whitelist

if ( HTTP::whitelist_url_patterns in url )
{
c$http$extract_file = F;
}
else
{

Open file to capture data

local suffix = fmt("%s_%d.jar", is_orig ? “orig” : “resp”, c$http_state$current_response);
local fname = generate_extraction_filename(c$http$extraction_prefix, c, suffix);
c$http$extraction_file = open(fname);
enable_raw_output(c$http$extraction_file);
c$http$extracted_size = 0;
local output=fmt("%s,%s", c$id$resp_h,c$http$uri);
#local reshdr= fmt("%s %s %s", c$http$status_code, c$http$status_msg, c$http$info_msg);

print ufile,"########################";
print ufile, output;
print ufile,c$http$status_code,c$http$status_msg;
for (j in hd)
{

print ufile, hd[j];
}

print ufile,"########################";
local message = fmt(“Storing %s to %s”, url, c$http$extraction_file);
local method = “UNKNOWN”;
if ( c$http?$method )
method = c$http$method;

NOTICE([$note=Exe_File_Capture,
$msg=message,
$conn=c,
$method=method,
$URL=url]);
}
}
} # End first chunk if

Do the capture when a capture file is open

if ( c$http?$extraction_file )
{

print c$http$extraction_file, data;
c$http$extracted_size += length;
}

} # End HTTP entity data

event http_end_entity(c: connection, is_orig: bool)
{

if ( c$http?$extraction_file )
{

close(c$http$extraction_file);

}
}

There is some problem with doing file extraction in 2.1 that pops up from time to time. I don't think there is anyone that is totally clear what the problem is and we've completely revamped file handling for 2.2. The video where I discuss file handling in the upcoming 2.2 release was just released today and there are some exercises available too.

Video: http://security.ncsa.illinois.edu/BroExchange2013/Hall-File_Analysis-NCSA%20DSS%20H264%201.25Mbps.mp4
Exercises: http://bro.org/bro-exchange-2013/exercises/faf.html

  .Seth