<?php
if(!isset($_SESSION)) session_start();
$_SESSION["loggedIn"] = "yes";
include 'analayzeFile.php';
include 'sparql.php';
require_once("demosettings.php");

function removeBrackets($str)
{
	$bracketInd = strpos($str, " (");
	if ($bracketInd > 0)
		return substr($str, 0, $bracketInd);

	return $str;
}

function appendValue(&$list, $value, $delimiter, $isUnique)
{
	if ($list == null)
		$list = $value;
	else
	{
		if (!$isUnique)
			$found = false;
		else
		{
			$existing = explode($delimiter, $list);
			$found = false;
			foreach ($existing as $e)
				if ($e == $value)
				$found = true;
		}
		if (!$found)
			$list .= $delimiter . $value;
	}
}

function isXML($xml){
   libxml_use_internal_errors(true);

   $doc = new DOMDocument('1.0', 'utf-8');
   $doc->loadXML($xml);

   $errors = libxml_get_errors();

   if(empty($errors)){
       return true;
   }

   $error = $errors[0];
   if($error->level < 3){
       return true;
   }

   $explodedxml = explode("r", $xml);
   $badxml = $explodedxml[($error->line)-1];

   $message = $error->message . ' at line ' . $error->line . '. Bad XML: ' . htmlentities($badxml);
   return $message;
}

function writeResult($handle, $string)
{
	//fwrite($handle, $string);
	echo $string;
	ob_flush();
	flush();
}

function fillInstancesParents($instances, &$parentsArray)
{
	$sparql = null;
	foreach ($instances as $inst)
	{
		$triple = "{ ?v rdfs:label \"" . unqualifyName($inst) . "\"} ";
		//create the query by unioning the triples
		if ($sparql == null)
			$sparql = "SELECT distinct ?x ?y WHERE { {" . $triple;
		else
			$sparql .= " UNION " . $triple;
	}
	$sparql = buildBasicSparql($sparql . "} . ?v rdfs:label ?x . ?v rdf:type ?u  . ?u rdfs:subClassOf ?w . ?w rdfs:label ?y}");
	//print_r($sparql);
	$json = executeSparqlOnCmService($sparql, "ont");
	//print_r($json);
	//iterate json results
	foreach ($json[results][bindings] as $value){
		//map each class to a list of sub-classes
		$instName = unqualifyName($value[x][value]);
		$parentName = unqualifyName($value[y][value]);
		if (!array_key_exists($instName, $parentsArray))
			$parentsArray[$instName] = array();
		if (!in_array($parentName, $parentsArray[$instName]))
			array_push($parentsArray[$instName], $parentName);
	}
	//print_r($parentsArray);
	return $parentsArray;
}

function ExtractFinancialRelations($mysqli, $content, $fileName, $outFile, $isFirst)
{
	$slots = array("commercial-entity-attribute", "commercial-entity-attribute-modifier", "date", "year", "quarter", "percentage", "currency-type", "national-identity", "region",  "mode-expectation-fulfillment", "conditional", "object-entity", "subject-entity");

	$tab = "	";
	if ($isFirst)
	{
		writeResult($outFile, "file name" . $tab . "text" . $tab);
		foreach($slots as $slot)
		{
			writeResult($outFile, $slot . $tab . $slot . " Caption" . $tab);
			if ($slot == "commercial-entity-attribute-modifier")
				writeResult($outFile, "is-anti" . $tab);
			if ($slot == "object-entity")
				writeResult($outFile, "naic" . $tab . "sic" . $tab);
		}

		writeResult($outFile, "\r\n");
	}

	//echo "analyzing file: " . $fileName;
	ob_flush();
	//analyse file while appending file name before content - as the file name might full contain company name, and some contents don't
	//$enc = mb_detect_encoding($content, mb_list_encodings(), true);
	//if ($enc !== false)
	//	$content = mb_convert_encoding($content, "UTF-8", $enc);
    $arr = analayzeFile($mysqli, $fileName . "\r\n\r\n" . $content, $fileName, "" );
	ob_clean();

    //echo "finished analyzing";
    if ($arr == null)
    {
    	die($fileName . " could not be analyzed.");
    }
	try
	{
	    //$xml = new SimpleXMLElement($arr[msg]);
	    $xml = new DOMDocument();
	    $result = $xml->loadXML('<?xml version="1.0" encoding="iso-8859-1"?>' . $arr[msg]);
	    if ($result === FALSE)
	    	die("failed to load xml". $arr[msg]);

	    $docXpath = new DOMXPath($xml);
	}
	catch (Exception $e) {
		die($e);
	}

	//echo $arr[msg];
	//print_r($xml);
    //
    $aggRDF = $docXpath->query('/IVEnvelope/Notification/AnalyzeFile/OutputParameters/FileTriples/AggregatedRDF');
    $digestRDF = $docXpath->query('/IVEnvelope/Notification/AnalyzeFile/OutputParameters/FileTriples/DigestRDF');
    //print_r($digestRDF);




    if ($aggRDF->length == 0 || $digestRDF->length == 0)
    {
    	echo $fileName . " has empty digest";
    	//echo $arr[msg];
    	return;
    }

    $aggRDFStr = $aggRDF->item(0)->nodeValue;

    $xml = new SimpleXMLElement($aggRDFStr);

   	$digestRDFStr = $digestRDF->item(0)->nodeValue;
    $digestXml = new SimpleXMLElement($digestRDFStr);

	//relations
	$relations = $digestXml->xpath('/rdf:RDF/rdf:Description[rdf:type/@rdf:resource="http://www.intuview.com/ontology#Financial-entity-relationship"]');

	$relationsSlots = array();
	$instancesToInquire = array();
	$parents = array();
    while(list( , $node) = each($relations)) {
    	//die("relations");
    	$texts = $node->xpath('iv:text');

    	$slotIndices = array();
		$slotIndices["sentenceText"] = (string) $texts[0];
    	$firstIndex = 0;

    	foreach ($slots as $slot)
    	{
    		$slotStr = "";
    		$slotCaption = "";
    		$slotPaths = array('iv:' . $slot . '/rdf:Seq/*/@rdf:resource', 'iv:' . $slot . '/@rdf:resource');
    		foreach ($slotPaths as $slotPath)
    		{
	    		$slotVals = $node->xpath($slotPath);
	    		foreach ($slotVals as $slotVal)
	    		{
	    			if (!empty($slotVal->nodeValue))
	    				$slotVal = $slotVal->nodeValue;

		    		$slotValRoot = $digestXml->xpath('/rdf:RDF/rdf:Description[@rdf:about="' . $slotVal . '"]');
		    		if (empty($slotValRoot))
		    		{
		    			//echo "empty root: " . $slotVal;
		    			$slotVal1 = $digestXml->xpath('/rdf:RDF/rdf:Description[iv:instance/@rdf:resource="' . $slotVal . '"]/@rdf:about');
		    			$slotVal1 = $slotVal1[0];
		    			if (empty($slotVal1))
		    				echo "Empty slotVal " . $slotVal;
		    			else
		    				$slotVal = $slotVal1;
		    			//echo "slotVal: " . $slotVal1;
		    		}	    			//echo "===slot Val: " . $slotPath . ": " . $slotVal . ";";
	    			//print_r($slotVal);
		    		$valCaptions = $digestXml->xpath('/rdf:RDF/rdf:Description[@rdf:about="' . $slotVal . '"]/iv:caption[@xml:lang="en"]');
		    		//if (empty($valCaptions))
		    		//	$valCaptions = $digestXml->xpath('/rdf:RDF/rdf:Description[@rdf:about="' . $slotVal->nodeValue . '"]/iv:caption[@xml:lang="en"]');

		    		if (empty($valCaptions))
		    		{
		    			$valCaptions = $digestXml->xpath('/rdf:RDF/rdf:Description[@rdf:about="' . $slotVal . '"]/iv:caption');
		    		}
	    			//if (empty($valCaptions))
		    		//	$valCaptions = explode("#", $slotVal);
	    			//$caption = (string)end($valCaptions);
	    			//if (strpos($caption, "dynamic-inst") === 0)
	    			//	$caption = substr($caption, 0, strrpos($caption, "-"));
	    			//$caption = str_replace(array("dynamic-inst-ref-", "dynamic-inst-rt", "-"), array("",""," "), $caption);

		    		$valIndex = $digestXml->xpath('/rdf:RDF/rdf:Description[@rdf:about="' . $slotVal . '"]/iv:main-span');
		    		$vInd = empty($valIndex)? 0 : (int)$valIndex[0];//->asXML();
		    		//if (array_key_exists($slot, $slotIndices) && array_key_exists($vInd, $slotIndices[$slot]))
		    		//	die("index already exists");

		    		//var_dump($vInd);
		    		//echo "slot: " . $slot . ", vInd: " . $vInd . ", caption: " . $valCaptions[0];
		    		if ($firstIndex == 0)
		    			$firstIndex = $vInd;

		    		$slotValStr = $digestXml->xpath('/rdf:RDF/rdf:Description[@rdf:about="' . $slotVal . '"]/iv:instance/@rdf:resource');
		    		$slotValStr = empty($slotValStr)? $slotVal : $slotValStr[0];
		    		$slotValShort = unqualifyName($slotValStr);
		    		$caption = empty($valCaptions)? $slotValShort : (string)end($valCaptions);

		    		if (!in_array($slotValStr, $instancesToInquire))
		    			array_push($instancesToInquire, $slotValStr);

		    		$slotIndices[$slot][$vInd]["str"] =  (string)$slotValStr;
		    		$slotIndices[$slot][$vInd]["caption"] =  $caption;
		    		$inversed = $digestXml->xpath('/rdf:RDF/rdf:Description[@rdf:about="' . $slotVal . '"]/iv:movement-sentiment-inversed');
		    		if (!empty($inversed) && $inversed[0] == "true")
		    			$slotIndices[$slot][$vInd]["inversed"] = true;

		    		$rdfType = $digestXml->xpath('/rdf:RDF/rdf:Description[@rdf:about="' . $slotVal . '"]/rdf:type/@rdf:resource');
		    		$rdfType = unqualifyName((string) $rdfType[0]);
		    		if (!array_key_exists($slotValShort, $parents))
		    			$parents[$slotValShort] = array();
		    		if (!in_array($rdfType, $parents[$slotValShort]))
		    			array_push($parents[$slotValShort], $rdfType);

		    		//NAICS/SIC
		    		foreach (array("naics", "sic") as $ecoName)
		    		{
		    			$ecoVal = $digestXml->xpath('/rdf:RDF/rdf:Description[@rdf:about="' . $slotVal . '"]/iv:' . $ecoName . '-economics/@rdf:resource');
		    			if (empty($ecoVal))
		    				continue;

		    			$ecoVal = (string) $ecoVal[0]["resource"];
		    			$ecoVal = explode("-", $ecoVal);
		    			$ecoVal = $ecoVal[1];
		    			//echo "ecoVal:" . $slot . $vInd . $slotValStr . $ecoName . $ecoVal;
		    			$slotIndices[$slot][$vInd][$ecoName] = $ecoVal;
		    		}
	    		}
    		}
    		//fwrite($outFile, '"' . str_replace('"','""', $slotStr) . '"' . $tab . '"' . str_replace('"', '""', $slotCaption) . '"' . $tab);
    	}
    	if ($firstIndex > 0)
    		$relationsSlots[$firstIndex] = $slotIndices;
    	else die("makorepo");
	    	//array_push($relationsSlots, $slotIndices);
    }
    ksort($relationsSlots);
    //print_r($relationsSlots);
    //print_r($parents);
    $parents = fillInstancesParents($instancesToInquire, $parents);



    foreach ($relationsSlots as $slotIndices1)
    {
    	//echo "slotIndices: ";
    	//var_dump($slotIndices);

    	//foreach ($slotIndices as $key =>$val)
    	//	echo "key: " . $key;

    	foreach ($slotIndices1["commercial-entity-attribute"] as $index => $arr)
    	{
	     	writeResult($outFile,
	    		'"' . str_replace('"', '""', $fileName) . '"' . $tab . //filename
	    		'"' . str_replace('"', '""', $slotIndices1["sentenceText"]) . '"' . $tab); //text

	     	$inversed = $arr["inversed"] == true;
	     	foreach ($slots as $slot)
	     	{
	     		$minDiff = 100000;
	     		$minDiffArr = null;
	     		foreach ($slotIndices1[$slot] as $sIndex => $sArr)
	     		{
	     			$indDiff = abs($index-$sIndex);
	     			if ($indDiff < $minDiff)
	     			{
	     				$minDiff = $indDiff;
	     				$minDiffArr = $sArr;
	     			}
	     		}
	     		$slotStr = "";
	     		$slotCaption = "";
	     		$sentiment = "";
	     		$naics = null;
	     		 $sic = null;
	     		if ($minDiffArr != null)
	     		{
	     			$slotStr = $minDiffArr["str"];
	     			$slotCaption = $minDiffArr["caption"];
	     			$sentiment = null;
	     			$naics = $minDiffArr["naics"];
	     			$sic = $minDiffArr["sic"];
	     			$slotParents = $parents[unqualifyName($slotStr)];
   					//if should inverse sentiment - replace negative with positive and vice versa
					if ($inversed && in_array("Positive-movement-momentum", $slotParents))
						$sentiment = "negative financial";
					else if ($inversed && in_array("Negative-movement-momentum", $slotParents))
						$sentiment = "positive financial";
					else
	     			foreach (array(	"Financial-market-negative-sentiment-indicator"=>"negative financial",
	     							"Financial-market-positive-sentiment-indicator"=>"positive financial",
	     							"Financial-market-neutral-sentiment-indicator" => "neutral",
	     							"Anti"=>"negative", "Pro"=>"positive") as $key=>$val)
	     				if (in_array($key, $slotParents))
	     				{
	     					if (empty($val)) $val = $key;
	     					$sentiment = $val;
	     					break;
	     				}

	     		}

    			writeResult($outFile, '"' . str_replace('"','""', $slotStr) . '"' . $tab . '"' . str_replace('"', '""', $slotCaption) . '"' . $tab);
	     		if ($slot == "commercial-entity-attribute-modifier")
	     			writeResult($outFile, '"' . $sentiment . '"' . $tab);
	     		else if ($slot == "object-entity")
	     		{
	     			writeResult($outFile, '"' . $naics . '"' . $tab . '"' . $sic . '"' . $tab);
	     		}
	     	}


	    	writeResult($outFile, "\r\n");
    	}


    }
}

function FolderExtractFinancialRelations($mysqli, $inputFolder, $outFile, $isFirst)
{
	//$outFile = fopen($outputFileName, 'w');

	$files = scandir($inputFolder);
	//echo $inputFolder . " contains files: " . count($files) . "\r\n";
	foreach ($files as $file)
	{
		if ($file == "." || $file == ".." || strpos($file, ").") !== false) //currently prevent "filename(1).pdf" - as they are duplicates in our examples
			continue;

		$inputPath = $inputFolder . "/" . $file;
		if (!file_exists($inputPath))
		{
			echo "file doesn't exist: " . $inputPath;
			continue;
		}
		//
		if (is_dir($inputPath))
		    FolderExtractFinancialRelations($mysqli, $inputPath, $outFile, $isFirst);
		else
			ExtractFinancialRelations($mysqli, file_get_contents($inputPath), $file, $outFile, $isFirst);

		$isFirst = false;
	}
    //fclose($outFile);
	//echo "finished";
}



try{
	if ($_FILES["file"]["error"] > 0)
		die("Upload error: " . $_FILES["file"]["error"]);

	//echo "tmp name: " . $_FILES["file"]["name"];
	//echo "hello:" . $_REQUEST["helloText"];
	if (empty($_FILES["file"]["tmp_name"]))
	{
		/*echo "<html><body><form id='uploadForm' action='ExtractFinancialRelationShips.php' method='post' enctype='multipart/form-data' >";
		echo "Select a file: <input type='file' name='fileInput' size='30' />";
		echo "<input type='test' value='hello' name='helloText' />";
		echo "<input type='submit' value='Submit' />";
		echo "</form></body></html>";*/
		echo '<html>
		<body>

		<form action="ExtractFinancialRelationShips.php" method="post" target="_blank"
		enctype="multipart/form-data">
   		<img src="images/logo.jpg" width="200"/>
				<br/><br/><br/>
		<label for="file">Please choose the file you want to analyze:</label>
				<br/>
		<input type="file" name="file" id="file">
				<br/>
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<input type="submit" name="submit" value="Submit">
		</form>
		</body>
		</html>';
		return;
	}

	$tmpName = $_FILES["file"]["tmp_name"];
	if (!file_exists($tmpName))
		die("Uploaded file doesn't exist");

	$fileName = $_FILES["file"]["name"];
	$ext = pathinfo($fileName, PATHINFO_EXTENSION);

	header("Content-Type: text/plain");
	if ($ext != "zip")
	{
	    $mysqli = sqlCreateConnection("extractFinanacialRelationship");
		ExtractFinancialRelations($mysqli, file_get_contents($tmpName), $fileName, $outFile, true);
		die;
	}
	//unzip file
	$zip = new ZipArchive;
	$res = $zip->open($tmpName);
	if ($res !== TRUE)
		die('Could not open zip file: ' . $tmpName);

	$zipFolder = "C:/tmp/UploadedFinancialZips";
	if (!is_dir($zipFolder) && !mkdir($zipFolder))
		die("Failed to create folder: " . $zipFolder);

	for ($zInd = 0; $zInd < 200000; $zInd++)
	{
		$zipSubFolder = $zipFolder . "/" . $zInd;
		if (is_dir($zipSubFolder))
			continue;

		$zip->extractTo($zipSubFolder);
		$zip->close();
		break;
	}
	/*if ($zInd == 20)
		die("Exceeded number of allowed zips");
	*/
	FolderExtractFinancialRelations($mysqli, $zipSubFolder, NULL, true);

	/*
	$outFile = fopen("C:/temp/financialOutput.tsv", 'w');
	if ($outFile===FALSE)
		die("couldn't open output file");

	FolderExtractFinancialRelations(//"U:\\Aziz\\Customers\\OmegaPoint\\FinShred_Downloads-2014-03-13\\2014-03-12-12-02"
									//. "\\ACO"
									//		,
									"I:\\apps\\docs\\txt",
			$outFile, true);
	fclose($outFile);
	*/
}
catch (exception $e) {
	die($e);
}