%option	noyywrap
%option prefix="hp"
%option outfile="html_parser.c"

%{
#include <stdlib.h>
#include <string.h>
#include <sys/time.h>
#include <cgi/html_tree.h>

#define YY_MAIN 0
#define YY_DECL	int hplex(yyscan_t yyscanner, TAG **root)
#define YY_BREAK	{ \
						if(!*root && current_tag) \
						{ \
							*root = current_tag; \
						} \
					} \
					break;

inline TAG *addTextNode(TAG* where, char* text)
{
	TAG *tn;
	if(!text)
		return where;

	if(strlen(text) < 1)
	{
		free(text);
		return where;
	}

	tn = createTextNode();
	if(!tn)
		return where;

	tn->is_changed = 0;

	tn->text = text;
	return appendSibling(where, tn);
};

%}
%s PROCESSTAG PROCESSATTRS PROCESSATTRNAME PROCESSATTREQ PROCESSATTRVAL CLOSETAG COMMENT TPLTAG

ALPHANUM		[0-9a-zA-Z]
SPACE			[ \r\n\t]
NEWLINE			(\r\n|\n|\r)
BLANK			[ \t]
WORD			({ALPHANUM}|_)+
VALUE			({WORD}%?|\"([^\"]|\n)*\"|'(^[']|\n)*')
ATTRIBUTE		{WORD}(={VALUE})?

TAG_START		({NEWLINE}?{BLANK}*)?<
TAG_INNER		({SPACE}*{WORD}({SPACE}+({ATTRIBUTE}|{WORD}))*){SPACE}*
TAG_END			>({BLANK}*{NEWLINE}?)?

TAG				{TAG_START}{TAG_INNER}{TAG_END}
CLOSING_TAG		{TAG_START}{SPACE}*\/{SPACE}*{WORD}{SPACE}*{TAG_END}
SELF_CLOSING	{TAG_START}{TAG_INNER}\/{TAG_END}

COMMENT_START	<!--
COMMENT_END		-->
TPLTAG_START	"{"
TPLTAG_END		"}"

%%
%{
	TAG *current_tag = NULL;
	TAG *tag = NULL, *children, *found;
	ATTR *current_attr = NULL;
	char *tmp = NULL, *text = NULL;
	long len, len2;
	int tag_level = 0;

	*root = NULL;
%}

<INITIAL>{TAG}				{
							current_tag = addTextNode(current_tag, text);
							text = NULL;
							if(tag)
								current_tag = appendSibling(current_tag, tag);
							tag = createElement();
							tag->text = strdup(yytext);
							yyless(0);
							BEGIN(PROCESSTAG);
							}
<PROCESSTAG>{TAG_START}		{
							/* EAT UP */
							}
<CLOSETAG>{TAG_START}{SPACE}*\/{SPACE}*	{
							/* EAT UP */
							}
<PROCESSTAG>{WORD}			{
							tag->tag_name = strdup(yytext);
							BEGIN(PROCESSATTRS);
							}
<PROCESSATTRS>{ATTRIBUTE}	{
							yyless(0);
							BEGIN(PROCESSATTRNAME);
							}
<PROCESSATTRNAME>{WORD}		{
							/* setattrname(yytext); */
							current_attr = addAttribute(tag);
							current_attr->name = strdup(yytext);
							current_attr->value = strdup("");
							BEGIN(PROCESSATTREQ);
							}
<PROCESSATTREQ>=			{
							BEGIN(PROCESSATTRVAL);
							}
<PROCESSATTRVAL>{VALUE}		{
							/* setattrval(yytext); */
							if(current_attr->value)
								free(current_attr->value);
							current_attr->value = copy_unquoted(yytext);
							current_attr = NULL; // We don't want to modify it any more
							BEGIN(PROCESSATTRS);
							}
<PROCESSATTREQ>{SPACE}+		{
							BEGIN(PROCESSATTRS);
							}
<PROCESSATTRS,PROCESSATTREQ>\/?{TAG_END}	{
							current_tag = appendSibling(current_tag, tag);
							tag = NULL;
							BEGIN(INITIAL);	// End of tag
							}
<INITIAL>{SELF_CLOSING}		{
							/* save_tag_text(yytext); */
							/* tag = create_new_tag(); */
							/* tag->self_closing = 1; */
							current_tag = addTextNode(current_tag, text);
							text = NULL;
							if(tag)
								appendSibling(current_tag, tag);
							tag = createElement();
							tag->text = strdup(yytext);
							tag->is_closed = 1;
							tag->is_closing = 1;
							yyless(1);
							BEGIN(PROCESSTAG);
							}
<INITIAL>{CLOSING_TAG}		{
							// If text or tag exists, we add them to tree
							current_tag = addTextNode(current_tag, text);
							text = NULL;
							if(tag)
								appendSibling(current_tag, tag);
							// Create new tag element (which will be closing tag)
							tag = createElement();
							tag->text = strdup(yytext);
							yyless(0);
							BEGIN(CLOSETAG);
							}
<CLOSETAG>{WORD}			{
							/* close_tag(yytext); */
							// Find previous tag with this name
							found = findBackPlain(current_tag, yytext);

							// If the tag is closed (can it happen at all?)
							// find previous tag with this name
							while(found)
							{
								if(!(found->is_closing || found->is_closed))
									break;
								
								found = findPrev(found);
							}
							
							// Copy a string to a tag name and mark tag as closing
							tag->tag_name = strdup(yytext);
							tag->is_closing = 1;
							// We shouldn't append tag as sibling, 
							// instead we'll add it to closing_tag of found tag
							// current_tag = appendSibling(current_tag, tag);
							// Move all right siblings to a new branch
							if(found)
							{
								found->is_closed = 1;
								found->closing_tag = tag;
								children = found->next_sibling;
								if(children)
								{
									found->children = children;
									children->prev_sibling = NULL;
									found->next_sibling = NULL;
									while(children)
									{
										children->parent = found;
										children = children->next_sibling;
									}
									current_tag = found;
								}
							}
							tag = NULL;
							}
<CLOSETAG>{TAG_END}			{
							BEGIN(INITIAL);
							}
<PROCESSTAG,PROCESSATTRS,PROCESSATTRNAME,PROCESSATTREQ,PROCESSATTRVAL,CLOSETAG>{SPACE}*	/* Eat up spaces */
<PROCESSTAG,PROCESSATTRS,PROCESSATTRNAME,PROCESSATTREQ,PROCESSATTRVAL,CLOSETAG>.	{
							/* invalid character in tag, consider it to be a text... */
							text = tag->text;
							tag->text = NULL;
							deleteNode(tag);
							BEGIN(INITIAL);
							}
<INITIAL,TPLTAG>"{"			{
							if(!text)
							{
								text = (char*)malloc(1);
								*text = '\0';
							}
							len = strlen(text);
							len2 = strlen(yytext);
							tmp = malloc(len + len2 + 1);
							strcpy(tmp, text);
							strncat(tmp, yytext, len2);
							free(text);
							text = tmp;
							tmp = NULL;
							tag_level ++;
							BEGIN(TPLTAG);
							}
<TPLTAG>"}"|\n				{
							if(!text)
							{
								text = (char*)malloc(1);
								*text = '\0';
							}
							len = strlen(text);
							len2 = strlen(yytext);
							tmp = malloc(len + len2 + 1);
							strcpy(tmp, text);
							strncat(tmp, yytext, len2);
							free(text);
							text = tmp;
							tmp = NULL;
							tag_level --;
							if(*yytext == '\n' || !tag_level)
							{
								tag_level = 0;
								BEGIN(INITIAL);
							}
							}
<INITIAL><!--				{
							if(!text)
							{
								text = (char*)malloc(1);
								*text = '\0';
							}
							len = strlen(text);
							len2 = strlen(yytext);
							tmp = malloc(len + len2 + 1);
							strcpy(tmp, text);
							strncat(tmp, yytext, len2);
							free(text);
							text = tmp;
							tmp = NULL;
							BEGIN(COMMENT);
							}
<COMMENT>-->				{
							if(!text)
							{
								text = (char*)malloc(1);
								*text = '\0';
							}
							len = strlen(text);
							len2 = strlen(yytext);
							tmp = malloc(len + len2 + 1);
							strcpy(tmp, text);
							strncat(tmp, yytext, len2);
							free(text);
							text = tmp;
							tmp = NULL;
							BEGIN(INITIAL);
							}
<COMMENT,TPLTAG>.|\n|\r		{
							if(!text)
							{
								text = (char*)malloc(1);
								*text = '\0';
							}
							len = strlen(text);
							len2 = strlen(yytext);
							tmp = malloc(len + len2 + 1);
							strcpy(tmp, text);
							strncat(tmp, yytext, len2);
							free(text);
							text = tmp;
							tmp = NULL;
							}
<INITIAL>([^<{]|\n)+|<|"{"	{
							if(!text)
							{
								text = (char*)malloc(1);
								*text = '\0';
							}
							len = strlen(text);
							len2 = strlen(yytext);
							tmp = malloc(len + len2 + 1);
							strcpy(tmp, text);
							strncat(tmp, yytext, len2);
							free(text);
							text = tmp;
							tmp = NULL;
							}
<<EOF>>						{
							current_tag = addTextNode(current_tag, text);
							if(*root)
								while((*root)->prev_sibling)
								{
									*root = (*root)->prev_sibling;
								}
							yy_delete_buffer(YY_CURRENT_BUFFER, yyscanner);
//							yy_init = 1;
							yyterminate();
//							return 0;
							}
%%

TAG *html_parse_file(FILE *f)
{
	TAG *root;
	yyscan_t scanner;

	hplex_init(&scanner);

	if(f)
		yyset_in(f, scanner);
	else
		yyset_in(stdin, scanner);

	hplex(scanner, &root);
	hplex_destroy(scanner);

	return root;
}
