<?php
require_once("demosettings.php");
include 'sparql.php';

function analayzeFile($contents,$filePath)
{
	global $intuscanHost;
	global $intuscanPort;

	$arr = array();

	$socket = socket_create(AF_INET, SOCK_STREAM, SOL_TCP);
	if ($socket === false) {
	    $arr[probelm] = true;
	    return $arr;
	}

	$port = $_REQUEST["intuscanPort"];
	if (empty($port))
		$port = $intuscanPort;

	$result = socket_connect($socket, $intuscanHost, $port);
	if ($result === false) {
	    $arr[probelm] = true;
	    return $arr;
	}

	$base64Contents = base64_encode($contents);
	$docId = date('YmdHis');

	$out = "<IVEnvelope><Request id=\"1\"><AnalyzeFile><InputParameters><FileMetaParameters><structured><slot name=\"TerminalSystemId\"><integer>" . $docId . "</integer></slot></structured></FileMetaParameters><FilePath>" . htmlspecialchars($filePath) . "</FilePath><FileContent><Base64>" . $base64Contents . "</Base64>";
	$out .= "</FileContent></InputParameters><OutputParameters><FileText/><FileReport/><FileMetaParameters/><FileTriples/></OutputParameters></AnalyzeFile></Request></IVEnvelope>";
	if (socket_write($socket, $out, strlen($out)) === false){
		$arr[probelm] = true;
	    return $arr;
	}

	socket_set_option($socket,SOL_SOCKET, SO_RCVTIMEO, array("sec"=>3, "usec"=>0));
	$output="";
	$begin_time = time();

	while(true){
		$output = $output . socket_read($socket, 1024);
		$pos = strpos($output, "Completed");
		if (strpos($output, "Failed")){
			$arr[probelm] = true;
	    	return $arr;
		}
		if($pos !== false){
			$ending = strpos($output, "</IVEnvelope>", $pos);
			if($ending !== false)
				break;
		}
		$t = time();
		if (($t - $begin_time) > 180)
		{
			$arr[probelm] = true;
	    	return $arr;
		}
	}

	socket_close($socket);

	$lastEnvPosition = strrpos($output, "<IVEnvelope>");
	$msg = substr($output, $lastEnvPosition, strlen($output));

	$arr[msg] = $msg;
	$arr[docId] = $docId;
	return $arr;
}

function removeBrackets($str)
{
	$bracketInd = strpos($str, " (");
	if ($bracketInd > 0)
		return substr($str, 0, $bracketInd);

	return $str;
}

function appendValue(&$list, $value, $delimiter, $isUnique)
{
	if ($list == null)
		$list = $value;
	else
	{
		if (!$isUnique)
			$found = false;
		else
		{
			$existing = explode($delimiter, $list);
			$found = false;
			foreach ($existing as $e)
				if ($e == $value)
				$found = true;
		}
		if (!$found)
			$list .= $delimiter . $value;
	}
}

function isXML($xml){
   libxml_use_internal_errors(true);

   $doc = new DOMDocument('1.0', 'utf-8');
   $doc->loadXML($xml);

   $errors = libxml_get_errors();

   if(empty($errors)){
       return true;
   }

   $error = $errors[0];
   if($error->level < 3){
       return true;
   }

   $explodedxml = explode("r", $xml);
   $badxml = $explodedxml[($error->line)-1];

   $message = $error->message . ' at line ' . $error->line . '. Bad XML: ' . htmlentities($badxml);
   return $message;
}

function writeResult($handle, $string)
{
	//fwrite($handle, $string);
	file_put_contents($handle, $string,FILE_APPEND);
	//echo $string;
	//ob_flush();
	//flush();
}

function fillInstancesParents($instances, &$parentsArray)
{
	$sparql = null;
	foreach ($instances as $inst)
	{
		$triple = "{ ?v rdfs:label \"" . unqualifyName($inst) . "\"} ";
		//create the query by unioning the triples
		if ($sparql == null)
			$sparql = "SELECT distinct ?x ?y WHERE { {" . $triple;
		else
			$sparql .= " UNION " . $triple;
	}
	$sparql = buildBasicSparql($sparql . "} . ?v rdfs:label ?x . ?v rdf:type ?u  . ?u rdfs:subClassOf ?w . ?w rdfs:label ?y}");
	//print_r($sparql);
	$json = executeSparqlOnCmService($sparql, "ont");
	//print_r($json);
	//iterate json results
	foreach ($json[results][bindings] as $value){
		//map each class to a list of sub-classes
		$instName = unqualifyName($value[x][value]);
		$parentName = unqualifyName($value[y][value]);
		if (!array_key_exists($instName, $parentsArray))
			$parentsArray[$instName] = array();
		if (!in_array($parentName, $parentsArray[$instName]))
			array_push($parentsArray[$instName], $parentName);
	}
	//print_r($parentsArray);
	return $parentsArray;
}

function ExtractFinancialRelations($content, $fileName, $outFile, $isFirst,$inputPath)
{
	$slots = array("commercial-entity-attribute", "commercial-entity-attribute-modifier", "date", "year", "quarter", "percentage", "currency-type", "national-identity", "region",  "mode-expectation-fulfillment", "conditional", "object-entity", "subject-entity");

	$tab = "	";
		
	if ($isFirst)
	{
		if (strlen($_REQUEST["Appendfile"]) == 0)
		{
			if (file_exists($outFile))
			{
				die("The output file name exsist on the system, select the option of extending the file or select a different file name");
			}
		}	
		if (!file_exists($outFile)){	
			writeResult($outFile, "file name" . $tab . "text" . $tab);
			foreach($slots as $slot)
			{
				writeResult($outFile, $slot . $tab . $slot . " Caption" . $tab);
				if ($slot == "commercial-entity-attribute-modifier")
					writeResult($outFile, "is-anti" . $tab);
				if ($slot == "object-entity")
					writeResult($outFile, "naic" . $tab . "sic" . $tab);
			}
		}

		writeResult($outFile, "\r\n");
	}

	ob_flush();
	echo "<br>analyzing file: " . $fileName;
	//analyse file while appending file name before content - as the file name might full contain company name, and some contents don't
	//$enc = mb_detect_encoding($content, mb_list_encodings(), true);
	//if ($enc !== false)
	//	$content = mb_convert_encoding($content, "UTF-8", $enc);
    $arr = analayzeFile($fileName . "\r\n\r\n" . $content, $fileName);    
	//ob_clean();

    //echo "finished analyzing";
    if ($arr == null)
    {    	    	
		$res = rename($inputPath, "c:\\tmp\\intuview\\problem\\" . $fileName); 
    	echo "<br>$fileName could not be analyzed.";
    	return;
    }
	try
	{
	    //$xml = new SimpleXMLElement($arr[msg]);
	    $xml = new DOMDocument();
	    $result = $xml->loadXML('<?xml version="1.0" encoding="iso-8859-1"?>' . $arr[msg]);
	    if ($result === FALSE){	 	    	 	    
	    	$res = rename($inputPath, "c:\\tmp\\intuview\\problem\\" . $fileName);  		    
	    	echo "<br>failed to load xml ". $fileName;
	    	return;
	    }

	    $docXpath = new DOMXPath($xml);
	}
	catch (Exception $e) {
		$res = rename($inputPath, "c:\\tmp\\intuview\\problem\\" . $fileName);     	
    	echo "<br>Exception file name - $fileName - ". $e;
    	return;		
	}

	//echo $arr[msg];
	//print_r($xml);
    //
    $aggRDF = $docXpath->query('/IVEnvelope/Notification/AnalyzeFile/OutputParameters/FileTriples/AggregatedRDF');
    $digestRDF = $docXpath->query('/IVEnvelope/Notification/AnalyzeFile/OutputParameters/FileTriples/DigestRDF');
    //print_r($digestRDF);




    if ($aggRDF->length == 0 || $digestRDF->length == 0)
    {
    	$res = rename($inputPath, "c:\\tmp\\intuview\\problem\\" . $fileName); 
    	echo $fileName . " has empty digest";
    	//echo $arr[msg];
    	return;
    }

    $aggRDFStr = $aggRDF->item(0)->nodeValue;

    $xml = new SimpleXMLElement($aggRDFStr);

   	$digestRDFStr = $digestRDF->item(0)->nodeValue;
    $digestXml = new SimpleXMLElement($digestRDFStr);

	//relations
	$relations = $digestXml->xpath('/rdf:RDF/rdf:Description[rdf:type/@rdf:resource="http://www.intuview.com/ontology#Financial-entity-relationship"]');

	$relationsSlots = array();
	$instancesToInquire = array();
	$parents = array();
    while(list( , $node) = each($relations)) {
    	//die("relations");
    	$texts = $node->xpath('iv:text');

    	$slotIndices = array();
		$slotIndices["sentenceText"] = (string) $texts[0];
    	$firstIndex = 0;

    	foreach ($slots as $slot)
    	{
    		$slotStr = "";
    		$slotCaption = "";
    		$slotPaths = array('iv:' . $slot . '/rdf:Seq/*/@rdf:resource', 'iv:' . $slot . '/@rdf:resource');
    		foreach ($slotPaths as $slotPath)
    		{
	    		$slotVals = $node->xpath($slotPath);
	    		foreach ($slotVals as $slotVal)
	    		{
	    			if (!empty($slotVal->nodeValue))
	    				$slotVal = $slotVal->nodeValue;

		    		$slotValRoot = $digestXml->xpath('/rdf:RDF/rdf:Description[@rdf:about="' . $slotVal . '"]');
		    		if (empty($slotValRoot))
		    		{
		    			//echo "empty root: " . $slotVal;
		    			$slotVal1 = $digestXml->xpath('/rdf:RDF/rdf:Description[iv:instance/@rdf:resource="' . $slotVal . '"]/@rdf:about');
		    			$slotVal1 = $slotVal1[0];
		    			if (empty($slotVal1)){
		    				//echo "Empty slotVal " . $slotVal;
		    			}
		    			else
		    			{
		    				$slotVal = $slotVal1;
		    				$slotValRoot = $digestXml->xpath('/rdf:RDF/rdf:Description[@rdf:about="' . $slotVal . '"]');
		    			}
		    			//echo "slotVal: " . $slotVal1;
		    		}	    			//echo "===slot Val: " . $slotPath . ": " . $slotVal . ";";
	    			//print_r($slotVal);
	    			if (count($slotValRoot) != 1)
	    			{
	    				//echo $slotVal . " should be found once in digest rdf";
	    				continue;
	    			}
	    			$slotValRoot = $slotValRoot[0];

		    		$valCaptions = $slotValRoot->xpath('iv:caption[@xml:lang="en"]');
		    		//if (empty($valCaptions))
		    		//	$valCaptions = $digestXml->xpath('/rdf:RDF/rdf:Description[@rdf:about="' . $slotVal->nodeValue . '"]/iv:caption[@xml:lang="en"]');

		    		if (empty($valCaptions))
		    		{
		    			$valCaptions = $slotValRoot->xpath('iv:caption');
		    		}
	    			//if (empty($valCaptions))
		    		//	$valCaptions = explode("#", $slotVal);
	    			//$caption = (string)end($valCaptions);
	    			//if (strpos($caption, "dynamic-inst") === 0)
	    			//	$caption = substr($caption, 0, strrpos($caption, "-"));
	    			//$caption = str_replace(array("dynamic-inst-ref-", "dynamic-inst-rt", "-"), array("",""," "), $caption);

		    		$valIndex = $slotValRoot->xpath('iv:main-span');
		    		$vInd = empty($valIndex)? 0 : (int)$valIndex[0];//->asXML();
		    		//if (array_key_exists($slot, $slotIndices) && array_key_exists($vInd, $slotIndices[$slot]))
		    		//	die("index already exists");

		    		//var_dump($vInd);
		    		//echo "slot: " . $slot . ", vInd: " . $vInd . ", caption: " . $valCaptions[0];
		    		if ($firstIndex == 0)
		    			$firstIndex = $vInd;

		    		$slotValStr = $slotValRoot->xpath('iv:instance/@rdf:resource');
		    		$slotValStr = empty($slotValStr)? $slotVal : $slotValStr[0];
		    		$slotValShort = unqualifyName($slotValStr);
		    		$caption = empty($valCaptions)? $slotValShort : (string)end($valCaptions);

		    		if (!in_array($slotValStr, $instancesToInquire))
		    			array_push($instancesToInquire, $slotValStr);

		    		$slotIndices[$slot][$vInd]["str"] =  (string)$slotValStr;
		    		$slotIndices[$slot][$vInd]["caption"] =  $caption;
		    		$inversed = $slotValRoot->xpath('iv:movement-sentiment-inversed');
		    		if (!empty($inversed) && $inversed[0] == "true")
		    			$slotIndices[$slot][$vInd]["inversed"] = true;

		    		$rdfType = $slotValRoot->xpath('rdf:type/@rdf:resource');
		    		$rdfType = unqualifyName((string) $rdfType[0]);
		    		if (!array_key_exists($slotValShort, $parents))
		    			$parents[$slotValShort] = array();
		    		if (!in_array($rdfType, $parents[$slotValShort]))
		    			array_push($parents[$slotValShort], $rdfType);

		    		//NAICS/SIC
		    		foreach (array("naics", "sic") as $ecoName)
		    		{
		    			$ecoVal = $slotValRoot->xpath('iv:' . $ecoName . '-economics/@rdf:resource');
		    			if (empty($ecoVal))
		    				continue;

		    			$ecoVal = (string) $ecoVal[0]["resource"];
		    			$ecoVal = explode("-", $ecoVal);
		    			$ecoVal = $ecoVal[1];
		    			//echo "ecoVal:" . $slot . $vInd . $slotValStr . $ecoName . $ecoVal;
		    			$slotIndices[$slot][$vInd][$ecoName] = $ecoVal;
		    		}
	    		}
    		}
    		//fwrite($outFile, '"' . str_replace('"','""', $slotStr) . '"' . $tab . '"' . str_replace('"', '""', $slotCaption) . '"' . $tab);
    	}
    	if ($firstIndex > 0)
    		$relationsSlots[$firstIndex] = $slotIndices;
    	else{
    		$res = rename($inputPath, "c:\\tmp\\intuview\\problem\\" . $fileName); 	    	
	    	echo "<br>makorepo ". $fileName;
	    	return;
    	}
	    	//array_push($relationsSlots, $slotIndices);
    }
    ksort($relationsSlots);
    //print_r($relationsSlots);
    //print_r($parents);
    $parents = fillInstancesParents($instancesToInquire, $parents);



    foreach ($relationsSlots as $slotIndices1)
    {
    	//echo "slotIndices: ";
    	//var_dump($slotIndices);

    	//foreach ($slotIndices as $key =>$val)
    	//	echo "key: " . $key;

    	foreach ($slotIndices1["commercial-entity-attribute"] as $index => $arr)
    	{
	     	writeResult($outFile,
	    		'"' . str_replace('"', '""', $fileName) . '"' . $tab . //filename
	    		'"' . str_replace('"', '""', $slotIndices1["sentenceText"]) . '"' . $tab); //text

	     	$inversed = $arr["inversed"] == true;
	     	foreach ($slots as $slot)
	     	{
	     		$minDiff = 100000;
	     		$minDiffArr = null;
	     		foreach ($slotIndices1[$slot] as $sIndex => $sArr)
	     		{
	     			$indDiff = abs($index-$sIndex);
	     			if ($indDiff < $minDiff)
	     			{
	     				$minDiff = $indDiff;
	     				$minDiffArr = $sArr;
	     			}
	     		}
	     		$slotStr = "";
	     		$slotCaption = "";
	     		$sentiment = "";
	     		$naics = null;
	     		 $sic = null;
	     		if ($minDiffArr != null)
	     		{
	     			$slotStr = $minDiffArr["str"];
	     			$slotCaption = $minDiffArr["caption"];
	     			$sentiment = null;
	     			$naics = $minDiffArr["naics"];
	     			$sic = $minDiffArr["sic"];
	     			$slotParents = $parents[unqualifyName($slotStr)];
   					//if should inverse sentiment - replace negative with positive and vice versa
					if ($inversed && in_array("Positive-movement-momentum", $slotParents))
						$sentiment = "negative financial";
					else if ($inversed && in_array("Negative-movement-momentum", $slotParents))
						$sentiment = "positive financial";
					else
	     			foreach (array(	"Financial-market-negative-sentiment-indicator"=>"negative financial",
	     							"Financial-market-positive-sentiment-indicator"=>"positive financial",
	     							"Financial-market-neutral-sentiment-indicator" => "neutral",
	     							"Anti"=>"negative", "Pro"=>"positive") as $key=>$val)
	     				if (in_array($key, $slotParents))
	     				{
	     					if (empty($val)) $val = $key;
	     					$sentiment = $val;
	     					break;
	     				}

	     		}

    			writeResult($outFile, '"' . str_replace('"','""', $slotStr) . '"' . $tab . '"' . str_replace('"', '""', $slotCaption) . '"' . $tab);
	     		if ($slot == "commercial-entity-attribute-modifier")
	     			writeResult($outFile, '"' . $sentiment . '"' . $tab);
	     		else if ($slot == "object-entity")
	     		{
	     			writeResult($outFile, '"' . $naics . '"' . $tab . '"' . $sic . '"' . $tab);
	     		}
	     	}


	    	writeResult($outFile, "\r\n");
    	}


    }   
    unlink($inputPath);
}

function FolderExtractFinancialRelations($inputFolder, $outFile, $isFirst)
{
	//$outFile = fopen($outputFileName, 'w');

	$files = scandir($inputFolder);
	//echo $inputFolder . " contains files: " . count($files) . "\r\n";
	foreach ($files as $file)
	{
		if ($file == "." || $file == ".." || strpos($file, ").") !== false) //currently prevent "filename(1).pdf" - as they are duplicates in our examples
			continue;

		$inputPath = $inputFolder . "/" . $file;
		if (!file_exists($inputPath))
		{
			echo "file doesn't exist: " . $inputPath;
			continue;
		}
		//
		if (is_dir($inputPath))
			FolderExtractFinancialRelations($inputPath, $outFile, $isFirst);
		else					
			ExtractFinancialRelations(file_get_contents($inputPath), $file, $outFile, $isFirst,$inputPath);

		$isFirst = false;
	}
    //fclose($outFile);
	//echo "finished";
}

try{	
	$output = $_REQUEST["outputFile"];
	if (empty($output))
		$output = "c:\\tmp\\output" . $_REQUEST["intuscanPort"] . "_" . date('YmdHis') . ".tsv";

	FolderExtractFinancialRelations($_REQUEST["folderPath"], $output, true);
	echo "result file locate on : $output";
}
catch (exception $e) {
	die($e);
}