package ie.ucd.sixth.core.cyber.utils.html;

import ie.ucd.sixth.core.cyber.utils.TextParser;

import java.io.IOException;
import java.net.MalformedURLException;
import java.util.ArrayList;
import java.util.List;
import java.util.logging.Logger;

import com.gargoylesoftware.htmlunit.FailingHttpStatusCodeException;
import com.gargoylesoftware.htmlunit.WaitingRefreshHandler;
import com.gargoylesoftware.htmlunit.WebClient;
import com.gargoylesoftware.htmlunit.html.HtmlDivision;
import com.gargoylesoftware.htmlunit.html.HtmlPage;
import com.gargoylesoftware.htmlunit.html.HtmlTable;
import com.gargoylesoftware.htmlunit.html.HtmlTableCell;
import com.gargoylesoftware.htmlunit.html.HtmlTableRow;
import com.gargoylesoftware.htmlunit.html.HtmlUnorderedList;

/*
 * Html scraper providing the basic tools for scraping data from a webpage using htmlunit
 */
public class BasicHtmlScraper {

	private Logger logger = Logger.getLogger(Logger.GLOBAL_LOGGER_NAME);
	private String urlString = "";
	HtmlPage page = null;
	private WebClient webClient;
	private TextParser textParser;


	/*
	 * to create a new scraper a url string must be specified
	 * this can be reset without creating a different scraper by using the
	 * setUrlString method
	 */
	public BasicHtmlScraper(String urlString){
		logger.info("creating html scraper");
		this.urlString = urlString;
		accessPage(urlString);
		textParser = new TextParser();
		System.out.println(page.asXml());
	}
	/*
	 * * uses manual methods for scraping and modelling the data
	 * 
	 * since no template is defined:
	 * 
	 * 1: nested tables will not be scraped individually for formatting
	 * but the nested table content will be returned as part of the main tables content
	 * which is returned as plain text or raw xml as specified
	 * 
	 * 2: div content will be returned as plain text without any structure or raw xml as specified
	 * 
	 * responses default to xml if invalid format is provided
	 * 
	 */
	public String basicScrape(String xpathExpression, String format){
		System.out.println("is page null before we start? " +(page==null));
		String response = "";
		List<?> list = page.getByXPath(xpathExpression);
		logger.info("number of elements found by this xpath: " +list.size());
		if(list.size()>0){
			logger.info(list.get(0).toString());
			boolean table = list.get(0) instanceof HtmlTable;
			boolean div = list.get(0) instanceof HtmlDivision;
			boolean ul = list.get(0) instanceof HtmlUnorderedList;
			if(table){ //then we know this is a nested table
				if(format.equalsIgnoreCase("text")){
					return new HtmlTableScraper().scrapeAllTables(page, xpathExpression, "text"); 
				}else{
					return new HtmlTableScraper().scrapeAllTables(page, xpathExpression, "xml");
				}

			}

			else if(div){
				if(format.equalsIgnoreCase("text")){
					logger.info("getting div");
					return new HtmlDivisionScraper().scrapeAllDiv(page, xpathExpression, "text");

				}else{
					return new HtmlDivisionScraper().scrapeAllDiv(page, xpathExpression, "xml");
				}
			}
			
			else if(ul){
				if(format.equalsIgnoreCase("text")){
					logger.info("getting div");
					return new HtmlListScrapet().scrapeAllLists(page, xpathExpression, "text");

				}else{
					return new HtmlListScrapet().scrapeAllLists(page, xpathExpression, "xml");
				}
			}

		}
		return response;
	}

	/*
	 * an example of how to manually model table data as xml without using the html->xml modelling utility
	 */
	public String scrapeAllTablesESB(String xpathExpression){

		logger.info("finding tables...");
		String tablexml="";
		List<?> list = page.getByXPath(xpathExpression);
		logger.info("size of list: " +list.size());
		for (Object object : list) {
			HtmlTable table = (HtmlTable)object;
			tablexml+="<tableData>";
			logger.info("table: " +table.getTextContent());
			tablexml+= convertTable_firstColumnTag(table);
			tablexml+="</tableData>";
		}
		return tablexml;
	}


	/*
	 * sample method for how to model data retrieved from a HtmlTable object
	 * this should be used as a guidline for modelling the data from tables with differing structures
	 * 
	 * converts a table that is structured so that the first row are main xmltags
	 * and the first column consists of tags that have the values in the following columns on same row for each tag
	 * 
	 * cell (1,1) holds the root xml node (A)
	 * cell (1,x) [row 1, column x]  holds the roots child nodes (B)
	 * 
	 * where cell (x,1) has content it becomes the child node (C) to all (B) nodes 
	 * and has the content in cell (x,y) as a value 
	 * where x= the row the tag was created and all rows following until a new tag is created 
	 * and y= the column number associated to its parent (B) node
	 * 
	 * eg:see table at: https://www.esbelectricireland.ie/switchchange/allPricePlansIE.htm#one
	 *
	 */
	public String convertTable_firstColumnTag(HtmlTable table){
			
		String xmlTable = "";
		String xmlRootTag = "";
		String xmlRootEndTag = "";

		ArrayList<String> xmlTagStrings = new ArrayList<String>();
		ArrayList<String> xmlEndTagStrings = new ArrayList<String>();
		List<HtmlTableRow> rowList = table.getRows();

		//if we have a list of rows we take the first row and make the text from each column an xml tag
		if(rowList != null && rowList.size()>0){
			HtmlTableRow firstRow = rowList.get(0);
			List<HtmlTableCell> firstRow_cellList = firstRow.getCells();
			String rtag = textParser.makeXmlTagSafe(firstRow_cellList.get(0).asText());
			xmlRootTag = "<"+rtag+">"; //set the tags for the root node (A)
			xmlRootEndTag = "</"+rtag+">";

			for(int i = 1; i< firstRow_cellList.size(); i++){ // keep track of the child (B) nodes in two lists
				String B_node = textParser.makeXmlTagSafe(firstRow_cellList.get(i).asText());
				xmlTagStrings.add("<"+B_node+">");
				xmlEndTagStrings.add("</"+B_node+">");
			}

			String tag = null;
			String endtag = null;
			//add the values and child tags (C) to (B) nodes where necessary
			for(int i = 1; i< rowList.size(); i++){

				HtmlTableRow row = rowList.get(i);
				List<HtmlTableCell> cellList = row.getCells();
				if(cellList!=null){

					if(!(Character.isWhitespace(cellList.get(0).asText().charAt(0)))){	//if cell (x,1) has content we want to make it a new node (C)

						String C_node = textParser.makeXmlTagSafe(cellList.get(0).asText());
						tag = "<"+C_node+">";
						endtag = "</"+C_node+">";
					}

					for(int j = 1; j<cellList.size(); j++){
						String value = "";
						if(tag!=null){ //if we have a tag we want to use to for all following values
							value = tag+cellList.get(j).asText()+endtag;
						}	
						else{
							value = cellList.get(j).asText();
						}

						String updatedXML = xmlTagStrings.get(j-1)+value;
						xmlTagStrings.set(j-1, updatedXML);

					}


				}

			}

			//add the root tag to the xmlTable string
			xmlTable=xmlRootTag;

			//add the end tags to the relevent strings
			for(int i = 0; i<xmlTagStrings.size(); i++){
				String updatedString = xmlTagStrings.get(i) + xmlEndTagStrings.get(i);
				xmlTagStrings.set(i, updatedString);
				xmlTable+=updatedString;
			}

			//add the closing tag
			xmlTable+=xmlRootEndTag;
		}


		return xmlTable;
	}




	public void setNewUrl(String url){
		accessPage(url);
	}

	private void accessPage(String url){
		webClient = new WebClient();
		try {
			webClient.setAppletEnabled(false);
			webClient.setJavaScriptEnabled(false);
//			webClient.setThrowExceptionOnScriptError(false);
			webClient.setRefreshHandler(new WaitingRefreshHandler());
			
			this.page = webClient.getPage(url);
		} catch (FailingHttpStatusCodeException e) {
			logger.severe("BasicHtmlScraper: catching exception in HTMLUNIT");
			e.printStackTrace();

		} catch (MalformedURLException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
		logger.info("BasicHtmlScraper: have page...");
	}

	public void setUrlString(String url){
		this.urlString = url;
		accessPage(urlString);
	}
	

	

	public void shutDown() {
		webClient.closeAllWindows();

	}


	/*
	 * just for testing
	 */
	public static void main(String[] args){
//		BasicHtmlScraper test = new BasicHtmlScraper("http://www.healthtechnica.com/blogsphere/clinical-medical-users/");
//		String response = test.basicScrape("/table[@id=wp-table-reloaded-id-67-no-']", "xml");
		
		BasicHtmlScraper test = new BasicHtmlScraper("http://www.bom.gov.au/tas/observations/tasall.shtml");
		
	}




}
