Azinix

url.c

Go to the documentation of this file.
00001 #include "evl.h"
00002 
00003 /* Data structure for storing statistics on URLs*/
00004 
00005 typedef struct Uc_Urls_t {
00006   Hash_t *urls; // key is text string, value is count
00007 } Uc_Urls_t;
00008 
00009 /**AutomaticStart */
00010 
00011 // prototypes of static functions
00012 static int Uc_UpdateUrls( Hash_t *, Pkt_ProcessPkt_t * );
00013 static char* getUrl( char *, int );
00014 
00015 /**AutomaticEnd */
00016 
00017 
00018 /** \brief min function */
00019 
00020 static int min( int a, int b ) { return ( ( a < b ) ? a : b ) ; }
00021 
00022 
00023 /* \brief Code for keeping track of URLs */
00024 
00025 int
00026 urls (Evl_Manager_t * mgr, Pkt_ProcessPkt_t * pp, void **mystate, void *argument )
00027 {
00028   Hash_t *urlHash = (Hash_t *) *mystate;
00029 
00030   if ( urlHash == NIL( Hash_t ) ) {
00031     urlHash = Hash_InitTable( strcmp, st_strhash );
00032     *mystate = urlHash;
00033   }
00034   Uc_UpdateUrls( urlHash, pp );
00035 
00036   return 0;
00037 }
00038 
00039 
00040 /** \brief Update urls */
00041 
00042 static int 
00043 Uc_UpdateUrls(
00044   Hash_t *hashPtr,
00045   Pkt_ProcessPkt_t *pp
00046 )
00047 {
00048   Pkt_IpHdr_t *ip = Pkt_EthernetExtractIp( pp->pkt );
00049   Pkt_TcpHdr_t *tcp = Pkt_IpExtractPayload (ip);
00050   char *payload = Pkt_TcpHdrReadPayload (tcp);
00051   int payloadLength = pp->length - ( ( (char *) payload ) - ( (char *) pp->pkt ) );
00052   char *url = getUrl( payload, payloadLength );
00053   if ( url == NIL( char ) ) {
00054     return 0;
00055   }
00056   int count;
00057   char tmpbuf[20];
00058   Pkt_PrintIpAddressDottedGeneral( tmpbuf, ip->destIp );
00059   char *fullUrl = util_strcat( tmpbuf,  url );
00060   free( url );
00061   url = fullUrl;
00062   if ( !Hash_Lookup( hashPtr, url, & count ) ) {
00063     Hash_Insert( hashPtr, url, 1 );
00064     printf("New url:%s\n", url );
00065   }
00066   else {
00067     count++;
00068     Hash_Insert( hashPtr, url, count );
00069     printf("Previously seen url with count %d:\t%s\n", count, url );
00070   }
00071   return 0;
00072 }
00073 
00074 
00075 /** \brief Extract the url from a string
00076 
00077 */
00078 
00079 static char *getUrl(
00080   char *data,
00081   int length
00082 )
00083 {
00084   char *dup = malloc( length * sizeof( char )  );
00085   memcpy( dup, data, length );
00086   int i;
00087   char *start = "GET ";
00088   int startLen = strlen( start );
00089   char *anchor;
00090   for ( i = 0 ; i < length ; i++ ) {
00091     int N = min( startLen, length - i );
00092     if ( 0 == strncmp( start, dup + i, N ) ) {
00093       anchor = dup + i;
00094       break;
00095     }
00096   }
00097   if ( i == length ) {
00098     free( dup );
00099     return NIL( char );
00100   }
00101   char *urlStart =  anchor + startLen;
00102   char *urlEnd = dup + length;
00103   char buffer[2000];
00104   i = 0;
00105   while ( urlStart + i < urlEnd ) {
00106     if ( !isspace( *(urlStart + i) ) ) {
00107       buffer[i] = urlStart[i];
00108       i++;
00109     }
00110     else {
00111       buffer[i] = '\0';
00112       break;
00113     }
00114   }
00115   if ( urlStart + i == urlEnd ) {
00116     buffer[0] = '\0';
00117   }
00118   free( dup );
00119   return strdup( buffer );
00120 }