package ie.ucd.sixth.core.cyber.utils.html;


import java.util.ArrayList;
import java.util.List;
import java.util.logging.Logger;

import com.gargoylesoftware.htmlunit.html.HtmlDivision;
import com.gargoylesoftware.htmlunit.html.HtmlTable;

/*
 * extended functionality for scraping webpage data
 * and allowing the use of the html->xml data modelling utility
 */
public class HTMLScraper extends BasicHtmlScraper{
	private Logger logger = Logger.getLogger(Logger.GLOBAL_LOGGER_NAME);
	/*
	 * to create a new scraper a url string must be specified
	 * this can be reset without creating a different scraper by using the
	 * setUrlString method
	 */
	public HTMLScraper(String urlString){
		super(urlString);
		
	}
	
	public ArrayList<String> listTableXML(){
		ArrayList<String> list = new ArrayList<String>();
		List<?> tableList = page.getByXPath("//table[@class='sortable IRLWNtable']");
		for (Object object : tableList) {
			HtmlTable table = (HtmlTable) object;
			String tableXML = table.asXml();
			list.add(tableXML);
			logger.info(table.asText());
		}
		
		
	
		return list;
	}

	/*
	 * * uses the htmlscraping and data modelling utilities * *
	 * retrieves the html elements using the xpathExpression 
	 * if retrieval has been successful the type of htmlElement is identified 
	 * and the relevant utilities are used to scrape the data and return it in the desired format
	 */
	public String scrape(String xpathExpression, String template){
		if(!template.startsWith("<")){ //this is not a valid template
			return "<SIXTH_response>invalid template</SIXTH_response>";
		}
		String response = "";
		List<?> list = page.getByXPath(xpathExpression);
		if(list.size()>0){
			logger.info(list.get(0).toString());
			boolean table = list.get(0) instanceof HtmlTable;
			boolean div = list.get(0) instanceof HtmlDivision;
			if(table && template.contains("<table")){ //then we know this is a nested table
				String nestedResponse = new HtmlTableScraper().scrapeNestedTables(page, xpathExpression, template);
				return new HtmlTableScraper().scrapeAllTables(page, xpathExpression, nestedResponse);
			}
			else if(table && template.startsWith("<row")){ //then we know that this is not a nested table and we can parse as normal

				return new HtmlTableScraper().scrapeAllTables(page, xpathExpression, template);
			}
			else if(div){
				return new HtmlDivisionScraper().scrapeAllDiv(page, xpathExpression, template);
			}

		}
		return response;
	}
	
	/*
	 * * uses the htmlscraping and data modelling utilities * *
	 * retrieves the html elements *containing the specified contentString* using the xpathExpression 
	 * if retrieval has been successful the type of htmlElement is identified 
	 * and the relevant utilities are used to scrape the data and return it in the desired format
	 * 
	 * specifiying the contentString is useful when the html element does not have an id or class (for eg) to identify it
	 */
	public String scrapeByContent(String xpathExpression, String template, String contentString){
		if(!template.startsWith("<")){ //this is not a valid template
			return "<SIXTH_response>invalid template</SIXTH_response>";
		}
		String response = "";
		List<?> list = page.getByXPath(xpathExpression);
		if(list.size()>0){
			logger.info(list.get(0).toString());
			boolean table = list.get(0) instanceof HtmlTable;
			boolean div = list.get(0) instanceof HtmlDivision;
			//TODO: nested tables not supported
//			if(table && template.contains("<table")){ //then we know this contains a nested table
//				String nestedResponse = new HtmlTableScraper().scrapeNestedTables(page, xpathExpression, template);
//				return new HtmlTableScraper().scrapeAllTables(page, xpathExpression, nestedResponse);
//			}
			if(table){ 

				return new HtmlTableScraper().scrapeAllTablesContainingString(page, xpathExpression, template, contentString);
			}
			else if(div){
				return new HtmlDivisionScraper().scrapeAllDivContainingString(page, xpathExpression, template, contentString);
			}

		}
		return response;
	}




	/*
	 * just for testing
	 */
	public static void main(String[] args){

		HTMLScraper s = new HTMLScraper("http://www.aaireland.ie/AA/AA-Roadwatch.aspx");
		//		System.err.println(s.scrape("//div[@class='mainTrafficItem']", "<attribute=class>divcontent</attribute=class>"));
		System.err.println( s.basicScrape("//div[@class='mainTrafficItem']", "text"));
	
	}




}
