/* 
 *  Copyright (C) 1999-2000 Bernd Gehrmann
 *                          bernd@physik.hu-berlin.de
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the "Artistic License" which comes with this Kit. 
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * Artistic License for more details.
 *
 * You should have received a copy of the Artistic License with 
 * this Kit, in the file named "Artistic".  If not, I'll be glad
 * to provide one.
 */


#include <stdlib.h>
#include <qregexp.h>
#include <kstddirs.h>

#include "misc.h"
#include "site.h"
#include "websearch.h"


class WebSite: public Site
{
public:
    WebSite() : Site() {}
    ~WebSite() {}
    
    QString relevanceStart;
    QString relevanceEnd;
};


class WebSiteFactory : public SiteFactory
{
public:
    virtual void interpret(Site *site, const QDomElement &el);
    virtual Site *newSite();
};


Site *WebSiteFactory::newSite()
{
    return new WebSite();
}


void WebSiteFactory::interpret(Site *site, const QDomElement &el)
{
    SiteFactory::interpret(site, el);
    
    WebSite *wsite = static_cast<WebSite*>(site);
    wsite->relevanceStart = el.attribute("relevanceStart");
    wsite->relevanceEnd = el.attribute("relevanceEnd");
}


WebSiteManager::WebSiteManager()
{
    WebSiteFactory factory;
    d = new SiteManager("web-sites", factory);
}


WebSiteManager::~WebSiteManager()
{
    delete d;
}


WebSite *WebSiteManager::find(const QString &name)
{
    return static_cast<WebSite*>(d->dict.find(name));
}


QStringList WebSiteManager::siteList()
{
    QStringList list;
    QDictIterator<Site> it(d->dict);
    for (;  it.current(); ++it)
        list.append(it.currentKey());
    return list;
}


WebSearch::WebSearch(WebSite *site, const QString &searchtext)
    : SearchJob(site, searchtext)
{}


void WebSearch::parseItem(const QString &item)
{
  WebSite *wsite = static_cast<WebSite*>(m_site);
  
  int pos1, pos2, pos3;
  QString url, title, relevstr;
  int relevance;

#if 0
  qDebug( "-----------------------------------------------------------" );
  qDebug( "Parsing Item: " + item + "|" );
  qDebug( "-----------------------------------------------------------" );
#endif
    
  // This parser sucks.
  pos1 = item.find(QRegExp("<a href=", false));
  if (pos1 == -1)
    return;
    
  pos2 = item.find(">", pos1+8);
  if (pos2 == -1)
    return;
    
  pos3 = item.find(QRegExp("</a>", false), pos2+1);
  if (pos3 == -1)
    return;

  url = item.mid(pos1+8, pos2-(pos1+8)).stripWhiteSpace();

  if (url.left(1) == "\"") {
    url.remove(0, 1);
  }
  if (url.right(1) == "\"") {
    url.remove(url.length()-1, 1);
  }

  qDebug( "Url: %s", url.latin1() );
  qDebug( "Rawtitle: %s", item.mid(pos2+1, pos3-(pos2+1)).latin1() );
    
  title = removeHTML(item.mid(pos2+1, pos3-(pos2+1))).stripWhiteSpace();
  if ( (pos3 = title.find('\n')) != -1) {
    title.truncate(pos3);
  }

  qDebug( "Title: " + title );

  relevance = -1;
  if (!wsite->relevanceStart.isEmpty() &&
      !wsite->relevanceEnd.isEmpty()) {
    pos1 = item.find(wsite->relevanceStart);
    if (pos1 != -1) {
      pos2 = item.find(wsite->relevanceEnd, pos1+1);
      if (pos2 != -1) {
	relevstr = item.mid(pos1+wsite->relevanceStart.length(),
			    pos2-(pos1+wsite->relevanceStart.length()));
	pos3 = relevstr.find(QRegExp("[0-9]"));
	if (pos3 != -1) {
	  relevstr.remove(0, pos3);
	  relevance = atoi(relevstr);

	  if (relevance > 100) {
	    relevance = -1;
	  }
	}
      }
    }
  }

  qDebug( "Relevance: %d", relevance );
  emit foundItem(m_site->name, title, url, relevance, item);
}
