%{
/*
 *   Copyright (C) 1997, 1998, 1999 Loic Dachary
 *
 *   This program is free software; you can redistribute it and/or modify it
 *   under the terms of the GNU General Public License as published by the
 *   Free Software Foundation; either version 2, or (at your option) any
 *   later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program; if not, write to the Free Software
 *   Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 
 *
 */
/* 
 */

/* Head */
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif /* HAVE_UNISTD_H */
#include <string.h>
#include <ctype.h>
#include <sys/stat.h>

#include <salloc.h>

#include <crawl.h>
#include <crawl_private.h>
#include <robots_parser.h>
#include <sqlutil.h>

static char* allow = 0;
static int allow_size = 0;
static int allow_length = 0;
static char* disallow = 0;
static int disallow_size = 0;
static int disallow_length = 0;

/* Debugging. */
static int verbose = 0;

static void find_arg(char** arg, int* arg_length);
static int agent_match(char* line, int line_length);
static int reset_tables();
static void accumulate(char** tablep, int* table_sizep, int* table_lengthp, char* line, int line_length);
static void format_result(char** sql_string, char** tablep, int* table_sizep, int table_length);

%}

%option caseless noyywrap 8bit prefix="rp" outfile="lex.yy.c" nounput

%x ACCEPT_LINES

%%
<INITIAL,ACCEPT_LINES>{
	^[[:blank:]]*user-agent:[[:blank:]]*[^[:blank:]\r\n]* {
	   if(agent_match(rptext, rpleng)) {
	     reset_tables();
	     BEGIN(ACCEPT_LINES);
	   } else {
	     BEGIN(INITIAL);
	   }
	}
	.      ;
	\n      ;
}

<ACCEPT_LINES>{
        ^[[:blank:]]*disallow:[[:blank:]]*"/"[^[:blank:]\r\n]* {
	  accumulate(&disallow, &disallow_size, &disallow_length, rptext, rpleng);
	}
        ^[[:blank:]]*allow:[[:blank:]]*"/"[^[:blank:]\r\n]* {
	  accumulate(&allow, &allow_size, &allow_length, rptext, rpleng);
	}
}

<<EOF>> {
  yyterminate();
  BEGIN(INITIAL);
}

%%

static void find_arg(char** arg, int* arg_length)
{
  char* line = *arg;
  int line_length = *arg_length;

  *arg = strchr(line, ':');

  (*arg)++;
  while((*arg) - line < line_length && isspace(**arg))
    (*arg)++;

  *arg_length = line_length - ((*arg) - line);
}

static int agent_match(char* line, int line_length)
{
  find_arg(&line, &line_length);
  if(!strncmp("*", line, line_length))
    return 1;
  if(!strncmp(CRAWL_USER_AGENT, line, line_length))
    return 1;
  return 0;
}

static int reset_tables()
{
  allow_length = 0;
  disallow_length = 0;
  return 1;
}

static void accumulate(char** tablep, int* table_sizep, int* table_lengthp, char* line, int line_length)
{
  find_arg(&line, &line_length);
  /*
   * + 1 for space, + 1 for null
   */
  static_alloc(tablep, table_sizep, *table_lengthp + line_length + 1 + 1);
  {
    char* tmp = *tablep + *table_lengthp;
    *tmp++ = ' ';
    strncpy(tmp, line, line_length);
    *table_lengthp += line_length + 1;
  }
}

static void format_result(char** sql_string, char** tablep, int* table_sizep, int table_length)
{
  char* tmp = 0;
  int tmp_size = 0;
  int tmp_length = 0;

  if(table_length <= 0)
    return;

  /*
   * Quote SQL reserved chars.
   */
  (*tablep)[table_length] = '\0';
  sql_quote_char(&tmp, &tmp_size, (*tablep));
  tmp_length = strlen(tmp);

  /*
   * Format SQL insertion string
   */
  static_alloc(tablep, table_sizep, tmp_length + 10);
  sprintf(*tablep, "'%s'", tmp);

  *sql_string = *tablep;
}

int robots_parse(char* path, char** allowp, char** disallowp)
{
  /* 
   * Open the input file
   */
  FILE* fp = fopen(path, "r");
  YY_BUFFER_STATE file_buffer;

  if(fp == 0) {
    fprintf(stderr, "robots_parse: cannot open %s for reading\n", path);
    perror("");
    return -1;
  }

  file_buffer = yy_create_buffer(fp, YY_BUF_SIZE);
  yy_switch_to_buffer(file_buffer);

  reset_tables();
  rplex();

  yy_delete_buffer(file_buffer);
  fclose(fp);

  format_result(allowp, &allow, &allow_size, allow_length);
  format_result(disallowp, &disallow, &disallow_size, disallow_length);

  if(verbose) fprintf(stderr, "robots_parse: done\n");

  return 1;
}

/*
 Local Variables: ***
 mode: C ***
 End: ***
*/
