<?php
if(!isset($_SESSION)) session_start();
require_once("demosettings.php");
require_once("EmailAddressParser.php");
require_once("tags.php");

//for parsing email addresses
/*
 * //phpclasses library - not used
require_once('Extensions/rfc822_addresses.php');
 * //PEAR library - currently unused
require_once 'Extensions/Mail.php';
require_once 'Extensions/Mail/RFC822.php';
*/

//bypass authentication if runs from batch file
if (IS_CLI && isset($argv) && count($argv)) {
    $_SESSION["loggedIn"] = "yes";
   
}
require_once 'sqlUtils.php';
$mysqli = sqlCreateConnection("SendDocEngAjx");

require_once("loginCheckPoint.php");
include_once 'write_log.php';

require_once 'sparql.php';
require_once 'parseUtils.php';

include 'analayzeFile.php';
global $databaseHost;
global $databaseUser;
global $databasePass;
global $databaseName;
$intuscanHost = getSystemSettingsProp($mysqli, "intuscanHost");

//Write session data and end session
session_write_close();

$backgroundProcs = array();
function closeBackgrounds($max)
{
	global $backgroundProcs;
	for ($i = count($backgroundProcs)-1; $i >= 0; $i--)
	{
		$process = $backgroundProcs[$i];
		$removeProcess = false;
		if (is_resource($process))
		{
			$status = proc_get_status($process);
			if (!$status["running"])
			{
				if ($status["exitcode"])
					write_to_log("ERROR", "process exit abnormally: " . print_r($status, true));
				else
					write_to_log("TRACE", "closing process: " . print_r($status, true));
						
				proc_close($process);
				$removeProcess = true;
			}
		}
		else
		{
			$removeProcess = true;
			write_to_log("WARNING", "process is not a resource");
		}

		if ($removeProcess)
			array_splice($backgroundProcs, $i, 1);
	}
	return count($backgroundProcs) >= $max;
}

function waitForBackgrounds($max)
{
	global $backgroundProcs;
	$numSleeps = 0;
	while (closeBackgrounds($max))
	{
		usleep(100000); //sleep 100ms
		$numSleeps++;
	}
	if ($numSleeps > 0)
		write_to_log("TRACE", "Had to sleep " . $numSleeps . " times before number of background processes reduced below " . $max);
}

function stampTime($connection, $ts)
{
	$guard=100; //should be big enough to recover from collisions
	$innerSql="SELECT id FROM files WHERE TIMESTAMP IS NULL LIMIT 1";
	$sql="UPDATE files SET timeStamp='$ts' WHERE TIMESTAMP IS NULL AND id=($innerSql)";
	do {
		$resinner = sqlQuery($connection, $innerSql);
		if($resinner->num_rows != 1)
			break;
		$res = sqlQuery($connection, $sql);
	} while((--$guard > 0) && ($connection->affected_rows == 0) && ($res === TRUE));
	if($guard == 0 || $res === FALSE) {
		write_to_log("ERROR", "senddocEngAjx Update timeStamp failed");
	}
}

// function insertFile($mysqli, $contents,$filePath,$file_name,$intuscanHost,$intuscanPort,$type,$username, $dest_folder_id = null){	
//     global $CURRENT_SOURCE_FOLDER,$PHP_FOLDER, $isLangOverride;	
// 	$contents = appendHackingWords($contents, $filePath);
// 	write_to_log("INFO", "senddocEngAjx - analyze type - $type file - " . $filePath . DIRECTORY_SEPARATOR . $file_name);
// 	$arr = analayzeFile($mysqli, $contents,$file_name,"" , $intuscanHost, $intuscanPort, $type, null, null, $dest_folder_id);	
// 	dumpDbg("analayzeFile_result", $arr);
// 	$getmetadata = false;//$_REQUEST["getmetadata"] ?? false;  - no need for this in folder analyzing!
	
// 	if ($arr === false || empty($arr) || $arr === "timeout" || !is_array($arr) || !array_key_exists("docId",$arr)){		
// 		write_to_log("ERROR", "senddocEngAjx - problem analyze type - $type file - " . $arr);
// 		return;
// 	}elseif (0 > ($arr["docId"])){
// 		write_to_log("ERROR", "senddocEngAjx - problem analyze type - $type file - docId");
// 		return;
// 	}
// 	else{	
// 	    $pBacks = asyncRequst::getInstance()->backgroundAsync($arr,$getmetadata); 
// 	   write_to_log("INFO", "senddocEngAjx started backgroundAsync: " . print_r($pBacks, true));
// 	}	
// 	$sql = "UPDATE files SET results=\"\""; 			
// 	$timeStamp = microtime();
// 	$sql = "UPDATE files
// 	    SET timeStamp = '$timeStamp _ $intuscanHost'   
// 	    WHERE id=" . $arr["docId"];
	   
// 	$res = sqlQuery($mysqli, $sql);
// 	if($res != TRUE){
// 		sleep(1);
// 		write_to_log("WARNING", "senddocEngAjx Update timeStamp failed");
// 		$res = sqlQuery($mysqli, $sql);
// 		if($res != TRUE){
// 			write_to_log("ERROR", "senddocEngAjx Update timeStamp failed");			
// 		}
// 	}			
// 	$defaultLanguage = null;
// 	addMoreData($mysqli,$arr["docId"],null,"NULL",$file_name,$username, $defaultLanguage);
	
	

// 	//if $dest_folder_id defined return him
// 	if (!empty($_REQUEST["Angular"]) || !empty($_REQUEST["v2"])){
// 	    $response["fid"] =  $dest_folder_id ?? null;
// 	    $response["dcid"] = $response["docId"] =  $arr["docId"];
// 	    ob_clean();
// 	    echo json_encode($response);
// 	}else{
// 	    header("Location: PageLoader.php?PageName=document&dcid=" . $arr[docId] );
// 	}
	    
// }

//return true if insertUniqueDocToDB
function addMoreData($mysqli,$id,$dateValue,$emailid,$line,$username, &$defaultLanguage){
    global $skipDuplicate, $supported_formats;
	//Add file type and file format (extention)
	$file_format = null;						
	if ($emailid == "NULL"){
		$file_type = "file";
		$file_format = pathinfo($line);
		//write_to_log("ERROR", "path info of $line is:" . print_r($file_format, true));
		$file_format = in_array(strtolower($file_format['extension']), $supported_formats)? strtolower($file_format['extension']) : "unsupported format";
	}else{
		if (isFileNameEmailBody($line)){
			$file_type = "Message";
		}elseif (isFileNameEmailMessage($line)){
			$file_type = "Body";
		}else{
			$file_type = "Attachment";
			$file_format = pathinfo($line);
			$file_format = strtolower($file_format['extension']);
		}							
	}
	if (empty($file_format)) {
            $file_format = "txt";
        }

	
	$query = "INSERT INTO file_format (docId,value)
			  VALUES (\"$id\",\"$file_format\")";
	sqlQuery($mysqli, $query);
	$query = "INSERT INTO file_type (docId,value)
			  VALUES (\"$id\",\"$file_type\")";
	sqlQuery($mysqli, $query);	
	//write_to_log("TRACE", "updating file format - $file_format and file type - $file_type for doc id $id: $sqlString");
	//sqlQuery($mysqli, $sqlString);				

	
	//update duplicate
	$isDuplicate = updateSHA1($mysqli, $id, $emailid);
	
	if ($isDuplicate != false && $skipDuplicate === true){
		write_to_log("INFO", "senddocEngAjx - DUPLICATE  file - $line");
		//update Unique files
		updateUniqueDocToDB($mysqli, $isDuplicate);
		return false;
	}else{
		//insert new one
		if ($skipDuplicate === true) {
                    insertUniqueDocToDB($mysqli, $id, $dateValue, $username);
                }
            return true;
	}
}

$username = $_SESSION['username'] ?? null;
$analyse_text_ref = $_REQUEST["AnalyseText"] ?? null;
$analyse_url_ref = $_REQUEST["AnalyseURL"] ?? null;
$files_ref_bool = isset($_FILES["file"]) && isset($_FILES["file"]["tmp_name"]);
$timeoutSec = $_REQUEST["timeoutSec"] ?? null;

if (!is_dir($UPLOADS_PATH)){
    mkdir($UPLOADS_PATH);
}

//A - TEXT
if (isset($analyse_text_ref)) {
	$port = "2020";		
	$file_path = $UPLOADS_PATH;
	
	$file_name = date("YmdHis") . ".txt";
	//save the file with UTF8 BOM - for giving IntuScan a hint of the encoding
	$content = UTF8_BOM . $analyse_text_ref;
	file_put_contents($file_path. DIRECTORY_SEPARATOR . $file_name, $content);
	write_to_log("INFO", "senddocEngAjx line 209 - analyze file - $file_name");
	insertFile($mysqli, $content,$file_path,$file_name,$intuscanHost,$intuscanPort,"textBox",$username);
}
	
//B - URL	
else if (isset($analyse_url_ref)) {		
	$port = "2020";		
	$file_path = $UPLOADS_PATH;
	$file_name = date("YmdHis") . ".html";
	
	$url = $analyse_url_ref;		
	
	$data = file_get_contents($url);
	
	file_put_contents($file_path. DIRECTORY_SEPARATOR . $file_name, $data);	
	insertFile($mysqli, $data,$file_path,$file_name,$intuscanHost,$intuscanPort,"url",$username);
	
//C - single files	
}else if ($files_ref_bool)
    {	

	$port = "2020";
	$filePath = $_FILES["file"]["tmp_name"];
	$contents = file_get_contents($filePath);
	$filePath = $_FILES["file"]["name"];
	$file_name = $_FILES["file"]["name"];
    
    
    $ext = pathinfo($filePath, PATHINFO_EXTENSION);
    $file_path = $UPLOADS_PATH;
	$full_file_name = $UPLOADS_PATH . DIRECTORY_SEPARATOR . $file_name;
	
	if (file_exists($full_file_name)) {
        $success = false;
    } 
    else {
        $success = file_put_contents($full_file_name, $contents);
    }
    
    //if failed retry using date as filename (can happend if file already exists, or other failure such as filename contains unicode characters)
	if ($success === false)
	{
		$file_name = date("YmdHis") . ".$ext";
		$full_file_name = $UPLOADS_PATH . DIRECTORY_SEPARATOR . $file_name;
		file_put_contents($full_file_name, $contents);
	}
	
	
	insertFile($mysqli, $contents,$file_path,$file_name,$intuscanHost,$intuscanPort,"uploadedFiles",$username);
} 

//D - senddoc
else {
        if(empty($argv)){
            $argvstr = filter_input(INPUT_GET, "argv");
            $argv = explode(",",$argvstr);
        }
	if (count($argv)<6)
		die("missing arguments");
	
	$username = $argv[2];	
	$sendDocIndex = $argv[1];	
	$intuscanHost = $argv[3];
	$intuscanPort = $argv[4];//getSystemSettingsProp($mysqli, "intuscanPort");
	$file_id = $argv[5];
	$finishedAddingFiles = false;
	$skipBG = filter_var($argv[6], FILTER_VALIDATE_BOOLEAN);
	$isEmail = (!empty($argv[7]))? $argv[7] : null;
	write_to_log("INFO", "senddocEngAjx - file_id=$file_id intuscanHost=$intuscanHost intuscanPort=$intuscanPort");	
	$port = "2022";
	$part = 0;
	$count_until_restart = 0; // wait 5 document until restart
	$sum_file_size = 0;
	$bulkSize = 1;
	$bulkIds = [];
	//$mysqli = new mysqli($databaseHost, $databaseUser, $databasePass, $databaseName);	
	
	$running = true;
	$nextDocs = array();
	$checkUnscannedTimestamps = false; //set to true if needs to re-analyze docs which failed during sendDoc
	
	//for tags of files not analysed
	$tag_class = new Tags($mysqli, $username);
	
	while ($running){
		$mysqli->ping();
		if (empty($nextDocs))
		{
		include "sendDocRuntimeOptionalInclude.php"; //if exists - may change variables (e.g. $logLevel) at runtime
		$timeStamp = microtime();
		$timeStampSuffix = " _ $intuscanHost _ $intuscanPort _ $sendDocIndex";
		$timeStampSuffix .= " _ " . gethostbyname(gethostname()); //for uniqueness when running from different machines on shared DB
		$timeStamp .= $timeStampSuffix;
		$idsCondition = "";
		/*
		//check if this send-doc marked unscanned files with timeStamp (might occur if sendDoc crashed in the middle)
		if ($checkUnscannedTimestamps)
		{
			$res = sqlQuery($mysqli, "UPDATE files SET timeStamp = '$timeStamp' WHERE timeStamp LIKE '%$timeStampSuffix' AND scan_time IS NULL LIMIT 1000");
			if ($res && $res->num_rows)
			{
				write_to_log("ERROR", "SendDoc found unscanned timeStamp: $timeStamp");
			}
			else
				$checkUnscannedTimestamps = false; //no more unscanned timetamps
		}
		if ($checkUnscannedTimestamps == false)
		{
		*/
		//update files with new timeStamp
		if (true)
		{
			global $databaseName;
			$dbName = connectDetails($databaseName)["db"];
			
			do {
				sqlQuery($mysqli, "UPDATE senddoc_file_id SET id=LAST_INSERT_ID(id+$bulkSize)");
				
				$lastId = $mysqli->insert_id;				
				if (empty($lastId) || $mysqli->affected_rows < 1)
				{
					//in memSql insert_id return "0", but a SELECT on it works
					$res = sqlQuery($mysqli, "SELECT LAST_INSERT_ID() as lastId");
					if ($res && ($row = $res->fetch_object()))
						$lastId = $row->lastId;
					
					if (empty($lastId))
					{
						write_to_log("ERROR", "senddoc_file_id wasn't incremented (lastId=$lastId, #affectedrows={$mysqli->affected_rows}). Probably reached max files id");
						break;
					}
				}
				//$timeStampIds = range($lastId, $lastId + $mysqli->affected_rows -1);
				//senddoc_file_id is initialized to "1". adding bulk of 30 sets it to 31. we need the range from 1 to 30, i.e. 31-30 to 31-1 (then from 31 to 60...)
				$timeStampIds = range($lastId - $bulkSize, $lastId -1);
				$idsCondition = "id IN(" . join(",", $timeStampIds) . ") AND ";
				//updated timeStamp of auto-incremented id
				sqlQuery($mysqli, "UPDATE files SET timeStamp = '$timeStamp' WHERE $idsCondition timeStamp IS NULL" );				
				$affectedFiles = $mysqli->affected_rows;
				if (!$affectedFiles)
				{
					if ($mysqli->error)
						write_to_log("ERROR", "Failed to update timeStamp: " . $mysqli->error);
					else
					{
						$res = sqlQuery($mysqli, "SELECT MAX(files.id) AS MaxId,  ANY_VALUE(senddoc_file_id.id) AS SedDocId FROM files, senddoc_file_id");
						if ($res && ($row = $res->fetch_object()))
						{
							if (empty($row->MaxId))
							{
								write_to_log("ERROR", "MaxId is empty");
								break;
							}
							else if ($row->MaxId < $row->SedDocId)
							{
								sqlQuery($mysqli, "UPDATE senddoc_file_id SET id=" . $row->MaxId);
								break;
							}
						}
					}
				}
			}
			while ($affectedFiles < 1);
		}
		
                    $sql = "SELECT id,filename, shortFileName, origFolder, parentFolderId,
                                    file_last_modified_date, file_creation_date, file_last_accessed_date
                                    from files WHERE $idsCondition timeStamp = '$timeStamp' AND scan_time IS NULL";
		$res = sqlQuery($mysqli, $sql);					
		while ($res && ($row = $res->fetch_object())){
			$nextDocs[] = $row;
		}
		}
                     $row = array_shift($nextDocs); 
		if ($row)
		{
			$id = $row->id;
			$line = $row->filename;		
			$shortFileName = $row->shortFileName;		
			$folderPath = uploadedFileAbsolutPath($row->origFolder);
			$parentFolderId = $row->parentFolderId;
			$full_path = $folderPath . DIRECTORY_SEPARATOR . $line;
		}
		else $id = NULL;

		if(!empty($id)){
				try {
						$bulkIds[] = $id;
						$contents = file_get_contents($folderPath. DIRECTORY_SEPARATOR. $line);
						if ($contents === FALSE)
						{
							if (empty($shortFileName))
							{
								write_to_log("ERROR", "Short file name is missing for: $line. Consider executing 'fsutil 8dot3name set...' in command line");
								continue;
							}
							$tmpFileName = copyFileToTemp("$folderPath".DIRECTORY_SEPARATOR."$shortFileName", $id);
							$contents = file_get_contents($tmpFileName);
						}
						$contents = appendHackingWords($contents, $row->filename);
						
						$parentFolder = basename($folderPath);
						global $isEmailIndependent;
						if ($isEmailIndependent)
						{
							$emailid = 0;
							$emailCondition = "id = $id";
						}
						else if ($isEmail)
						{
							$emailid = $parentFolderId;
							$emailCondition = "parentFolderId =  $parentFolderId";
						}
						else $emailid = "NULL";
						
						write_to_log("TRACE", "is email: ($isEmail) of parent folder '$parentFolder' of folder path '$folderPath' and parentFolderId: '$parentFolderId' emailid is: $emailid");
						
						//email headers
						if ($emailid !== "NULL" && $isEmail)
						{
						    
						    $emailData = fetchEmailData($mysqli, $id, $contents, $line, $emailid, $parentFolderId);
						}
						else {
						    $emailData = null;
						}

						$email_date = isset($emailData["headers"]["date"]) ?  parseDate($emailData["headers"]["date"]) : null;
						$modified_date = isset($row->file_last_modified_date) ? $row->file_last_modified_date : null;
						$creation = isset($row->file_creation_date) ? $row->file_creation_date : null;
						$access = isset($row->file_last_accessed_date) ? $row->file_last_accessed_date : null;
						
						foreach (array(
						         "email date" => $email_date,
						         "modified date" => $modified_date,
						        "creation date" => $creation,
						         "access date" => $access) as $name => $dateValue)
						{
							if (empty($dateValue)) continue;
							//the actual UPDATE has moved to background.php, yet we need the $dateValue in addMoreData() to be inserted to unique_files
							//TODO: $dateValue should consider IntuScan's detected date (reuries moving the unique_files update to background.php)
							/*
							$sqlString = "UPDATE files SET date = '$dateValue' WHERE id =$id";
							write_to_log("TRACE", "updating date to $name ($dateValue) for doc id $id: $sqlString");
							sqlQuery($mysqli, $sqlString);
							*/
							break;
						}
						$defaultLanguage = null;
						if (addMoreData($mysqli,$id,$dateValue,$emailid,$line,$username, $defaultLanguage)){
						
						write_to_log("INFO", "senddocEngAjx - analyze file - " . $folderPath. DIRECTORY_SEPARATOR . $line ." intuscanHost=$intuscanHost");											
						$arr = analayzeFile($mysqli, $contents,$line,$username, $intuscanHost, $intuscanPort, $id, NULL, $defaultLanguage, $timeoutSec, $folderPath);
						$continue = true;	
						if ($arr === "down"){
							sleep(5);
							write_to_log("WARNING", "senddocEngAjx line 100 - sleep(1)");
							$arr = analayzeFile($mysqli, $contents,$line,$username, $intuscanHost, $intuscanPort,$id, NULL, $defaultLanguage, $timeoutSec, $folderPath);
							if ($arr === "down"){
								write_to_log("ERROR", "senddocEngAjx line 98 - down ". $row->filename);
								$continue = false;						
								$response = $tag_class->insertTag($row->id, "error_send_doc", "system_tags");
							}
							//break;
						}		
					
						if ($arr === "sql"){
							sleep(5);
							$arr = analayzeFile($mysqli, $contents,$line,$username, $intuscanHost, $intuscanPort,$id, NULL, $defaultLanguage, $timeoutSec, $folderPath);
							write_to_log("WARNING", "senddocEngAjx line 106 - sleep(2) - sql");
							if ($arr === "sql"){
								$continue = false;
								write_to_log("ERROR", "senddocEngAjx line 116 - arr === sql on ". $row->filename);
								$response = $tag_class->insertTag($row->id, "error_send_doc", "system_tags");
							}												
							//break;
						}
						
						if($arr === "contents error"){
						    $continue = false;    
						    write_to_log("ERROR", "contents error");
// 						    $response = $tag_class->insertTag($row->id, "error_send_doc", "system_tags");
						    
						}
						//add prefix " analyzed" to timeStamp, to prevent sendDoc from revisiting this file as unscanned
						//sqlQuery($mysqli, "UPDATE files SET timeStamp = '$timeStamp analyzed' WHERE id = $id");
						
						if ($continue){
							write_to_log("INFO", "senddocEngAjx waits for backgounds for doc " . $arr["docId"]);
							if (!$skipBG) {
								waitForBackgrounds(4);
							}
							if (empty($arr["docId"])) {
								write_to_log("ERROR", "empty doc id");
								continue;
							}
							if(!$skipBG){
								$backgroundProcs[] = asyncRequst::getInstance()->backgroundAsync($arr,$_REQUEST["getmetadata"] ?? false); 
							}
							else if (++$iterationNum > 5000)
							{
								//sleep if tmp_results contains too many res files (let backgrounds keep up)
								global $xml_results_path;
								if (iterator_count(new FilesystemIterator($xml_results_path, FilesystemIterator::SKIP_DOTS)) > 10000)
								{
									write_to_log("ERROR", "Too many res.xml files in tmp_results folder. sleeping a minute before next senddoc");
									sleep(60);
									$iterationNum -= 100;//check again within 100 docs
								}
								else $iterationNum = 0;
								}
							}
						}
						//
						if (count($bulkIds) >= $bulkSize)
						{
							postBulkInsertionCallback($mysqli, $bulkIds);
							$bulkIds = [];
						}
				}
				catch (Exception $e) {
					write_to_log("ERROR", "senddocEngAjx Exception - $e");	
					return;			
				}
			}else{
//				check if running (sql)
//				if not break		
//				if yes sleep(1)
//				counter until 60
// 				if worked counter = 0; 
			    $isFolderService = getSystemSettingsProp($mysqli, "isFolderService");
			    
				if($isFolderService){
					if($finishedAddingFiles){
						$running = false;
					}else{
						$sql = "SELECT isRunning AS r FROM user_status WHERE username='$username'";
						$res_run = sqlQuery($mysqli, $sql);
						if(!empty($res_run)){
							$row = $res_run->fetch_object();
							$row_int = $row->r;
							$status = (int)$row_int;
							if($status == 2){
								sleep(3);
							}else{
								$finishedAddingFiles = true;
							}
						}
					}
				}else{
					$running = false;
				}
			}					
	}
	//final post-bulk insertion
	postBulkInsertionCallback($mysqli, $bulkIds);
	
	
 	//wait for all opened background pipes
 	if(!$skipBG)waitForBackgrounds(1);

	if(!$isFolderService){
		$sql = "UPDATE user_status SET isRunning=0 WHERE username=\"$username\"";										
		$res = sqlQuery($mysqli, $sql);
		updateUsersLastModifiedDate($mysqli);
	}
	
	
	
 	//header("Location: listFilesPage.php" );	
}
sqlClose($mysqli, 'sendDocEngAjx', true);
write_to_log("TRACE", "finished senddocEndAdj");

/*$xml = new DOMDocument();
$loadRes = $xml->loadXml(utf8_encode($msg));

$file = 'c:\tmp\msg.txt';

file_put_contents($file, $msg);

// insert aggregated rdf to model
$tripples = $xml->getElementsByTagName("FileTriples")->item(0);
$aggr = $tripples->getElementsByTagName("AggregatedRDF")->item(0);

$content = $aggr->firstChild;
insertRDF($content);

$digest = $tripples->getElementsByTagName("DigestRDF")->item(0);
$content = $digest->firstChild;
insertRDF($content);

header("Location: document.php?dcid=" . $docId );*/
/*
	echo "Upload: " . $_FILES["file"]["name"] . "<br />";
	echo "Type: " . $_FILES["file"]["type"] . "<br />";
	echo "Size: " . ($_FILES["file"]["size"] / 1024) . " Kb<br />";
	echo "Stored in: " . $_FILES["file"]["tmp_name"];
*/




?>