
/* web_collect.c */

/* Andrew Davison, May 1998 (ad@ratree.psu.ac.th) */

/* Processes the input from tcpdump/tcpshow, tcpmon,
   or tcpmon2. web_collect assumes that data lines begin with a tab
   and that packet information is separated by a line of dashes ('-').

   It only prints the packets which have a GET message on their data line
   or include a HTTP response that is not 'ok'.

   Only the address line and the GET/HTTP line from a packet are
   printed.
*/

#include <stdio.h>
#include <string.h>

#define MAXLINE 256        /* max length of a line */


int web_printable(char *ln);


int main()
{
  char line[MAXLINE], addr_line[MAXLINE];
  int skipping = 0;            /* used to switch off the examination of
                                  packet data lines after a GET or HTTP
                                  line has been printed */

  while (fgets(line, MAXLINE, stdin) != NULL) {
    if ((line[0] == '\t') && (!skipping))  {      /* a packet data line */
      if (web_printable(&line[1])) {           /* skip the '.' */
        fputs(addr_line, stdout);
        fputs(&line[1], stdout);
        fputs("-----------------------------------\n", stdout);
        fflush(stdout);
        skipping = 1;
      }
    }
    else if (line[0] != '-') {      /* packet address details */
      strcpy(addr_line, line);
      skipping = 0;
    }
  }
  return 0;
}


int web_printable(char *ln)
/* Does this packet data line have a GET message on it
   or a HTTP response which is not 'ok' (i.e. not the number
   200)?
*/
{
  if (strncmp(ln, "GET ", 4) == 0)      /* a GET message */
    return 1;

  if (strncmp(ln, "HTTP/", 5) == 0) {
    if ((strncmp(ln, "HTTP/1.0 200", 12) == 0) ||
        (strncmp(ln, "HTTP/1.1 200", 12) == 0) )   /* ok HTTP response */
      return 0;
    else
      return 1;
  }
  return 0;
}



