package ie.ucd.sixth.core.cyber.utils.html;

import java.util.List;
import java.util.logging.Logger;

import com.gargoylesoftware.htmlunit.html.DomNodeList;
import com.gargoylesoftware.htmlunit.html.HtmlElement;
import com.gargoylesoftware.htmlunit.html.HtmlPage;
import com.gargoylesoftware.htmlunit.html.HtmlTable;

/*
 * provides the functionality for extracting data from htmlTable elements
 */
public class HtmlTableScraper {
	private static final Logger logger = Logger.getLogger(HtmlTableScraper.class.getName());
	

	/*
	 * allows the user to specify the format of the table so that the content can be parsed to xml
	 * if format is set to "text" or "xml" the text content or raw xml retrieveed from the page will be returned
	 */
	public String scrapeAllTables(HtmlPage page, String xpathExpression, String format){
		logger.info("scraping all tables... ");
		String tablexml="";
		List<?> list = page.getByXPath(xpathExpression);
		for (Object object : list) {
			HtmlTable table = (HtmlTable)object;
			tablexml+="<tableData>";
			if(format.startsWith("<")){ //we will assume this is a properly structured template
				HtmlDataToXmlConverter c = new HtmlDataToXmlConverter();
				tablexml+= c.convertFromTemplate(table, format);
			}
			else if(format.equalsIgnoreCase("text")){
				tablexml+=table.getTextContent();
			}
			else{ //either xml or an invalid template has been provided and we will respond with xml by default
				tablexml+=table.asXml();
			}
			tablexml+="</tableData>";


		}
		return tablexml;
	}

	/*
	 * no format specified so returns the table in raw xml form
	 */
	public String scrapeAllTables(HtmlPage page, String xpathExpression){

		String tablexml="";
		List<?> list = page.getByXPath(xpathExpression);
		for (Object object : list) {
			HtmlTable table = (HtmlTable)object;
			tablexml+="<tableData>";
			tablexml+= table.asXml();
			tablexml+="</tableData>";
		}
		return tablexml;
	}

	//	private void scrapeNestedTable(HtmlTable table, String template){
	//
	//		List<HtmlTableRow> list = table.getRows();
	//		for(int i = 0; i<list.size(); i++){
	//			HtmlTableRow row = list.get(i);
	//			//			logger.info("end column number: " +row.getEndColumnNumber() +" end line number: " +row.getEndLineNumber());
	//			//			logger.info("id: " + row.getId() + " start column number: "  + row.getStartColumnNumber() + " start line number: " +row.getStartLineNumber() + "");
	//			List<HtmlTableCell> cells = row.getCells();
	//			for(int j = 0; j<cells.size(); j++){
	//				HtmlTableCell cell = cells.get(j);
	////				logger.info("   Found cell" + i +", "+j+": " + cell.getTextContent());
	////				logger.info("cell element alt: " +getTableCellAttribute(cell, 1, "alt"));
	//
	//			}
	//		}
	//	}




	/*
	 * useful for distinguising between tables based on content
	 */
	public String scrapeAllTablesContainingString(HtmlPage page,String xpathExpression, String tableTemplate, String contentString){
		String tablexml="";
		List<?> list = page.getByXPath(xpathExpression);
		for (Object object : list) {
			HtmlTable table = (HtmlTable)object;
			if(table.asText().contains(contentString)){
				HtmlDataToXmlConverter c = new HtmlDataToXmlConverter();
				tablexml+="<tableData>";
				tablexml+= c.convertFromTemplate(table, tableTemplate);
				tablexml+="</tableData>";

			}
		}
		return tablexml;
	}




	/*
	 * allows the user to specify the format of the table so that the content can be parsed to xml
	 */
	public String scrapeFirstTable(HtmlPage page, String xpathExpression, String tableTemplate){

		String tablexml="";
		List<?> list = page.getByXPath(xpathExpression);
		if(list !=null && list.size()>0) {

			HtmlTable table = (HtmlTable)list.get(0);
			HtmlDataToXmlConverter c = new HtmlDataToXmlConverter();
			tablexml+="<tableData>";
			tablexml+= c.convertFromTemplate(table, tableTemplate);
			tablexml+="</tableData>";
		}
		return tablexml;
	}


	public String scrapeNestedTables(HtmlPage page, String xpathExpression, String template){
		HtmlDataToXmlConverter convert = new HtmlDataToXmlConverter();
		List<?> list = page.getByXPath(xpathExpression);
		for (Object object : list) {
			HtmlTable table = (HtmlTable)object;
			DomNodeList<HtmlElement> tablelist = table.getElementsByTagName("table");

			for(int i=0; i<tablelist.getLength(); i++){
				HtmlTable t = (HtmlTable)tablelist.get(i);		
				template = convert.convertNestedTableFromTemplate(t, i, template); //update template from each nested table
			}

			template = convert.convertFromTemplate(table, template); //update template from main table

		}
		return template;
	}


	/*
	 * unused methods:
	 */
	/*
	 * 
	 */
//		private String getTableRowAttribute(HtmlTableRow row){
//			String att = "";
	//
//			if(row.hasChildNodes()){
//				Iterable<DomNode> x = row.getChildren();
//				for (DomNode domNode : x) {
//					logger.info("domnode: " +domNode.getNodeName());
	//
	//
//					if(domNode.hasAttributes()){
//						NamedNodeMap y = domNode.getAttributes();
//						for(int i=0; i<y.getLength(); i++){
//							logger.info("attribute: " +y.item(i)+" ... alt: " + y.getNamedItem("alt"));
	//
//						}
//					}
//				}
//			}
	//
//			att = row.getAttribute("img alt").toString();
//			return att;
//		}

		/*
		 * set childnode to 0 for no children and 1, 2, .. etc.. to index from first to subsequent children
		 */
//		private String getTableCellAttribute(HtmlTableCell cell, int childNode, String attributeName){
//			String att = "";
	//
//			if(childNode>-1 && cell.hasChildNodes()){
//				logger.info("have child nodes");
//				//			Iterable<DomNode> x = cell.getChildren();
//				DomNodeList<DomNode> x = cell.getChildNodes();
	//
//				if(childNode<x.getLength()){
//					DomNode domNode = (DomNode) x.item(childNode);
//					if(domNode.hasAttributes()){
//						NamedNodeMap y = domNode.getAttributes();
//						if(y!=null){
//							att = y.getNamedItem(attributeName).getNodeValue();
//							return att;
	//
//						}
//					}
//				}
//			}
//			else{
//				if(cell.hasAttributes()){
//					NamedNodeMap y = cell.getAttributes();
//					if(y!=null){
//						att = y.getNamedItem(attributeName).getNodeValue();
//						return att;
	//
//					}
//				}
//			}
//			return att; 		
//		}





//		private String scrapeTableBody(HtmlPage page, String xpath) {
//			logger.info("finding tables...");
//			String tablexml="";
//			List<?> list = page.getByXPath(xpath);
//			logger.info("size of list: " +list.size());
//			for (Object object : list) {
//				HtmlTableBody tablebody = (HtmlTableBody)object;
//				logger.info("table as text: " +tablebody.asText());
	//
//				//			HtmlTableToXmlConverer c = new HtmlTableToXmlConverer();
//				//			tablexml+="<tableData>";
//				//			tablexml+= c.convertTable_firstColumnTag(table);
//				//			tablexml+="</tableData>";
//				//			c.printTable(table);
//			}
//			return tablexml;
//		}

}
