package ie.ucd.sixth.core.cyber.utils.html;

import java.util.List;
import java.util.logging.Logger;

import com.gargoylesoftware.htmlunit.html.HtmlDivision;
import com.gargoylesoftware.htmlunit.html.HtmlPage;

public class HtmlDivisionScraper {
	private Logger logger = Logger.getLogger(Logger.GLOBAL_LOGGER_NAME);

	public HtmlDivisionScraper(){

	}

	/*
	 * retrieves the HtmlDivision elements from a given webpage 
	 * returns the data formatted according to the specified format
	 * the format can either be a template if using the html->xml modelling utility
	 * or can specifiy to retrieve either the text content or raw xml of the division
	 */
	public String scrapeAllDiv(HtmlPage page, String xpathExpression, String format){
		String responseString = "";

		if(page!=null){

			List<?> list = page.getByXPath(xpathExpression);
			logger.info("size of list: " +list.size());
			if(list!=null && list.size()>0){

				for(int i =0; i< list.size(); i++){
					HtmlDivision div = (HtmlDivision) list.get(i);
					responseString+="<divisiondata>";
					if(format.startsWith("<")){
						HtmlDataToXmlConverter c = new HtmlDataToXmlConverter();
						responseString += c.convertDivToHTML(div, format);
					}
					else if(format.equalsIgnoreCase("text")){
						responseString += div.asText();
					}
					else{ //either xml or an invalid format has been specified so we default to xml
						responseString += div.asXml();
					}
					responseString+="</divisiondata>";

				}

			}
		}
		else{ //we could not access a webPage using the provided urlString
			responseString = "invalid url or was unable to parse HTML (e.g: html may be poorly formed)";
		}
		return responseString;
	}

	/*
	 * retrieves the HtmlDivision elements from a given webpage 
	 * returns the data formatted according to the specified format
	 * the format can either be a template if using the html->xml modelling utility
	 * or can specifiy to retrieve either the text content or raw xml of the division
	 */
	public String scrapeAllDivContainingString(HtmlPage page, String xpathExpression, String format, String contentString){
		String responseString = "";

		if(page!=null){

			List<?> list = page.getByXPath(xpathExpression);
			logger.info("size of list: " +list.size());
			if(list!=null && list.size()>0){

				for(int i =0; i< list.size(); i++){
					HtmlDivision div = (HtmlDivision) list.get(i);
					if(div.asText().contains(contentString)){
						responseString+="<divisiondata>";
						if(format.startsWith("<")){
							HtmlDataToXmlConverter c = new HtmlDataToXmlConverter();
							responseString += c.convertDivToHTML(div, format);
						}
						else if(format.equalsIgnoreCase("text")){
							responseString += div.asText();
						}
						else{ //either xml or an invalid format has been specified so we default to xml
							responseString += div.asXml();
						}
						responseString+="</divisiondata>";
					}

				}

			}
		}
		else{ //we could not access a webPage using the provided urlString
			responseString = "invalid url or was unable to parse HTML (e.g: html may be poorly formed)";
		}
		return responseString;
	}
	

	/*
	 * retrieves the HtmlDivision elements from a given webpage 
	 * returns the data in its raw xml format
	 */
	public String scrapeByDiv(HtmlPage page, String xpathExpression){
		String responseString = "";

		if(page!=null){

			List<?> list = page.getByXPath(xpathExpression);

			if(list!=null && list.size()>0){
				for(int i =0; i<list.size(); i++){
					responseString+="<divisiondata>";
					HtmlDivision div = (HtmlDivision) list.get(i);
					responseString +=div.asXml();
					responseString+="</divisiondata>";
				}

			}
		}
		else{ //we could not access a webPage using the provided urlString
			responseString = "invalid url or was unable to parse HTML (e.g: html may be poorly formed)";
		}
		return responseString;
	}


	/*
	 * retrieves html information from webpage based on a div ID and/or div class
	 * use null to indicate if one of the paramaters is not required
	 * divDepth is used to specifify the depth of the div in the html code
	 * divDepth of 1 or 2 is accepted and if an incorrect or no value is specified a depth of 1 will be used by default
	 */
	public String scrapeByDiv(HtmlPage page, int divDepth, String divID, String divClass){
		String responseString = "";

		if(page!=null){


			String xpathExpression = "/div["; //the default div of 1 starts the xpath expression
			if(divDepth == 2){
				xpathExpression = "//div["; //if user has requested a div depth of 2 we use it, otherwise default value of 1 is kept
			}


			if(divID != null && divClass != null){ //both id and class have been specified
				xpathExpression += "@id=\'"+divID+"\' and @class=\'"+divClass+"\']";
			}
			else if(divID == null && divClass != null){ //only class has been specified
				xpathExpression += "@class=\'"+divClass+"\']";
			}
			else if(divID != null && divClass == null){ //only id has been specified
				xpathExpression += "@id=\'"+divID+"\']";
			}
			else{ //Neither have been specified
				responseString = "invalid html request: scrapeByDiv(divDepth: " +divDepth+", divID: " +divID+", divClass:" +divClass+") - at least one valid id or class must be specified";
				return responseString;
			}

			HtmlDivision div = (HtmlDivision) page.getByXPath(xpathExpression).get(0);
			responseString = div.asXml();

		}
		else{ //we could not access a webPage using the provided urlString
			responseString = "invalid url or was unable to parse HTML (e.g: html may be poorly formed)";
		}
		return responseString;

	}
	
	/*
	 * just for local testing
	 */
	public static void main(String[] args){
		
	}
}
