<?php

function copyFilesFromTwitterSubdirectories($sourcePath, $outputPath, $maxFiles = 1000) {
    // Create the output path if it doesn't exist
    if (!is_dir($outputPath)) {
        mkdir($outputPath, 0777, true);
    }
    
    // Get all the subdirectories named "Twitter" inside the source path
    $twitterDirectories = findTwitterSubdirectories($sourcePath);
    
    // Copy files from each "Twitter" subdirectory to the output path
    foreach ($twitterDirectories as $twitterDir) {
        // Get the parent folder's name
        $parentFolderName = basename(dirname($twitterDir));
        $outputSubdirectory = $outputPath . DIRECTORY_SEPARATOR . $parentFolderName;

        // Check if the subdirectory already exists in the output path
        if (is_dir($outputSubdirectory)) {
            continue; // Skip this subdirectory, as it already exists in the output path
        }
        if (!is_dir($outputSubdirectory)) {
            mkdir($outputSubdirectory, 0777, true);
        }
        
        $fileIterator = new RecursiveIteratorIterator(
            new RecursiveDirectoryIterator($twitterDir, RecursiveDirectoryIterator::SKIP_DOTS),
            RecursiveIteratorIterator::LEAVES_ONLY
            );
        
        $fileCount = 0;
        foreach ($fileIterator as $file) {
            if ($file->isFile()) {
                $outputFile = $outputSubdirectory . DIRECTORY_SEPARATOR . $file->getFilename();
                copy($file->getPathname(), $outputFile);
                $fileCount++;
                
                if ($fileCount >= $maxFiles) {
                    break;
                }
            }
        }
    }
}

function findTwitterSubdirectories($path) {
    $twitterDirectories = [];
    $iterator = new DirectoryIterator($path);
    foreach ($iterator as $fileInfo) {
        if (!$fileInfo->isDot() && $fileInfo->isDir() && $fileInfo->getFilename() === 'Twitter') {
            $twitterDirectories[] = $fileInfo->getPathname();
        } elseif ($fileInfo->isDir() && !$fileInfo->isDot()) {
            $subDirs = findTwitterSubdirectories($fileInfo->getPathname());
            $twitterDirectories = array_merge($twitterDirectories, $subDirs);
        }
    }
    return $twitterDirectories;
}

function OCR($pdfFilePath, $htmlFilePath, $imageDirectory){
    // Convert PDF to images
    //     exec("gswin64c -dNOPAUSE -sDEVICE=png16m -r300 -sOutputFile=\"{$imageDirectory}/page%d.png\" \"{$pdfFilePath}\" -dBATCH");
    
    // Perform OCR on each image
    $ocrText = '';
    $imageFiles = glob($imageDirectory . '/*.png');
    foreach ($imageFiles as $imageFile) {
     
        
        try {
            $tesseract = new TesseractOCR($imageFile);
            $tesseract->lang('ara'); // Specify the Arabic language
            $tesseract->allowlist(range('ء', 'ي')); // Optional: Set a character whitelist for better accuracy
            $ocrText .= $tesseract->run();
        } catch (\Exception $e) {
            echo 'Error occurred while processing image: ' . $e->getMessage() . PHP_EOL;
        }
    }
    
    // Convert the OCR text to HTML
    $html = '<html><body>' . nl2br($ocrText) . '</body></html>';
    file_put_contents($htmlFilePath, $html);
    
    echo 'PDF converted to HTML with OCR successfully.';
}
function lebanon(){
    
    $coords = json_decode(file_get_contents("./files/coordinates.json"),true);
    $file = fopen('./files/lebanon_cities.csv', 'r');
    
    while ($row = fgetcsv($file, 1024, "\t")) {
        $row = array_filter($row);
        $ont = $row[0];
        $lon = $row[1];
        $lat = $row[2];
        
        if(!array_key_exists($ont, $coords)){
            $coords[$ont] = [doubleval($lon), doubleval($lat)];
        }
        
    }
    file_put_contents("./files/coordinates_new.json", json_encode($coords,true));
}


// $pdftool = new PDFTools();
// ini_set('memory_limit', '-1');
// $path = "C:\\ivProjects\\galgalanda\\docs\\text\\Impact Docs\\";
// if ($handle = opendir($path)) {

//     while (false !== ($entry = readdir($handle))) {
//         $i= 0;
//         if ($entry != "." && $entry != "..") {

//             $chunkSize = 1024 * 1024; // 1MB

//             $handle2= fopen($path.DIRECTORY_SEPARATOR.$entry, 'rb');
//             $text = [];
//             while (!feof($handle2)) {
//                 $chunk = fread($handle2, $chunkSize);
//                 $text[] = $pdftool->getPdfTextNew($path.DIRECTORY_SEPARATOR.$entry);
//             }

//             $text = implode("\n", $text);

//             $filename = pathinfo($entry, PATHINFO_FILENAME);

//             file_put_contents($path.DIRECTORY_SEPARATOR."output".DIRECTORY_SEPARATOR.$filename."_$i.txt", $text);
//             $i++;


//         }
//     }

//     closedir($handle);
// }

function getXml($filename){
    $myXMLData = file_get_contents($filename);
    $xml = simplexml_load_string($myXMLData) or die("Error: Cannot create object");
    $json = json_encode($xml);
    $array = json_decode($json,TRUE);
    
    $final_results = [];
    foreach($array["concept"] as $concept)
    {
        
        
        $name = $concept["@attributes"]["name"];
        $final_results[$name]["slots"]  = [];
        
        if(!empty($concept["slots"]["slot"])){
            foreach($concept["slots"]["slot"] as $slot){
                if(array_key_exists("value", $slot)){
                    continue;
                }
                if(count($slot) > 1 && !array_key_exists("name", $slot) && !array_key_exists("values", $slot)){
                    
                    foreach($slot as $slot_element){
                        if(array_key_exists("@attributes", $slot_element)){
                            $final_results[$name]["slots"][] = $slot_element["@attributes"]["name"];
                        }
                        else{
                            $final_results[$name]["slots"][] = $slot_element["name"];
                            
                        }
                    }
                }
                else{
                    if(array_key_exists("@attributes", $slot)){
                        $final_results[$name]["slots"][] = $slot["@attributes"]["name"];
                    }
                    else{
                        $final_results[$name]["slots"][] = $slot["name"];
                        
                    }
                }
                
            }
        }
        $final_results[$name]["superconcepts"] = [];
        if(!empty($concept['superconcepts'] )){
            foreach($concept['superconcepts'] as $sc){
                if(count($sc) > 1){
                    foreach($sc as $sc_element){
                        $final_results[$name]["superconcepts"][] = $sc_element["@attributes"]["name"];
                    }
                }
                else{
                    $final_results[$name]["superconcepts"][] = $sc["@attributes"]["name"];
                    
                }
            }
        }
        
        
    }
    
    foreach($final_results as $concept=>&$data){
        
        foreach($data['superconcepts'] as $sc){
            if(empty($final_results[$sc]['slots'])){
                continue;
            }
            $sc_slots = $final_results[$sc]['slots'];
            
            $data['slots'] = array_unique(array_merge($data['slots'],$sc_slots));
        }
    }
    
    file_put_contents("C:\\Users\\galar\\Downloads\\output_slots.json", json_encode($final_results,true));
    
}

function getRawStream($db){
    $mysqli = sqlCreateDBConnection($db);
    $files = [];
    $sql = "select id,REPLACE(filename,'.txt.json','') as n from files";
    $res = sqlQuery($mysqli, $sql);
    while($obj = $res->fetch_object()){
        $files[$obj->n] = $obj->id;
    }
    
    $mysqlic = sqlCreateDBConnection($db."_crawler");
    //get israeli-politics
    $sql = "SELECT data FROM `allconceptsarr` WHERE flavor='israeli-politics'";
    $res = sqlQuery($mysqli, $sql);
    $row = $res->fetch_object();
    $concepts = array_keys(json_decode($row->data, true));
    
    $sql = "SELECT element_id,data from raw_stream_data";
    $res = sqlQuery($mysqlic, $sql);
    $i = 0;
    while($row = $res->fetch_object()){
        $i++;
        $data = $row->data;
        $final_data = json_decode($data, true);
        if($row->element_id == 0){
            continue;
        }
        $docid = $files[$row->element_id];
        
        
        $intuview_info = getIntuviewInfo($mysqli, $concepts, $docid);
        $final_data["intuview_info"] = $intuview_info ?? [];
        file_put_contents("./raw_output/$db/"."$i.json", json_encode($final_data, true));
    }
}

function getIntuviewInfo($mysqli, $concepts, $docid){
    $intuview_info = [];
    foreach($concepts as $concept){
        $concept_arr = [];
        $concept = strtolower(str_replace('-','_',$concept));
        $sql = "SELECT NAME from  $concept WHERE docid=$docid";
        
        $res = sqlQuery($mysqli, $sql);
        while($row = $res->fetch_object()){
            if(!empty($row->name)){
                $concept_arr[] = $row->name;
            }
        }
        if(!empty($concept_arr)){
            $intuview_info[$concept] = $concept_arr;
        }
    }
    
    return $intuview_info;
}


function createAllConceptsCaptions($mysqli){
    $res = sqlQuery($mysqli, "DROP TABLE IF EXISTS allconceptscaptions");
    
    $sql = "CREATE TABLE IF NOT EXISTS allconceptscaptions(
    concept varchar(150),
    caption varchar(150),
    PRIMARY KEY (concept))";
    $res = sqlQuery($mysqli, $sql);
    
    $file = fopen('./files/concepts_caption.csv', 'r');
    
    while ($row = fgetcsv($file, 1024, "\t")) {
        $row = array_filter($row);
        if(count($row) != 2){
            write_to_log("ERROR", json_encode($row,true));
        }
        $sql = "INSERT INTO allconceptscaptions (concept, caption) VALUES(\"$row[0]\", \"$row[1]\")  ON DUPLICATE KEY UPDATE concept=concept";
        $res = sqlQuery($mysqli, $sql);
    }
    fclose($file);
    
}


function changeJsons(){
    $path = "C:\\Users\\galar\\Downloads\\NEW\\lenanon24_news\\jsons";
    $files = scandir($path);
    $files = array_diff(scandir($path), array('.', '..'));
    
    foreach ($files as $filename) {
        echo $path."\\".$filename."\n";
        $data = file_get_contents($path."\\".$filename, true);
        $data_new = [];
        $data = json_decode($data, true);
        foreach($data as $key=>&$value){
            if($key == "content"){
                $data["text"] = $data["title"] . "\n" . implode("\n", $data["content"]);
            }
            if($key == "website_name"){
                $key = "user_screen_name";
            }
            
            if(is_array($value)){
                $value = implode(",",array_values($value));
            }
            if($key == "article_url"){
                $key = "post_url";
            }
            if($key == "published"){
                $key = "created_at";
            }
            $data_new[$key] = $value;
        }
        $data_new = json_encode($data_new, true);
        file_put_contents($path."\\output\\".$filename, $data_new);
    }
    
}
exit();

require_once 'parseTextAndMetadata.php';
$data = file_get_contents("C:\\Users\\galar\\Desktop\\1627694248941301761.txt.json", true);
$json = json_decode($data, true);
saveJsonToMeta($json, null);
function addCoord(){
    if (($handle = fopen("C:\\Users\\galar\\Downloads\\Malaysian Towns for upload with Coordinates.txt", 'r')) === false) {
        die(print_r(error_get_last(),true));
    }
    $file_input = fopen("C:\\Users\\galar\\Downloads\\output.json","w");
    
    
    $final_ret = [];
    $first = true;
    $cols = [];
    
    $coord_json = json_decode(file_get_contents("C:\\Users\\galar\\Downloads\\coordinates.json", true),true);
    
    while ($row = fgetcsv($handle, 1024, "\t")) {
        if($first){
            $first = false;
            continue;;
        }
        $coord_json[$row[0]] = [doubleval($row[1]),doubleval($row[2])];
    }
    file_put_contents("C:\\Users\\galar\\Downloads\\output.json",json_encode($coord_json,true));
}
exit();

function calcScore(){
    if (($handle = fopen("C:\\Users\\galar\\Downloads\\final_count_comparator.csv", 'r')) === false) {
        die(print_r(error_get_last(),true));
    }
    $file_input = fopen("C:\\Users\\galar\\Downloads\\output.csv","w");
    
    
    $final_ret = [];
    $first = true;
    $cols = [];
    while ($row = fgetcsv($handle, 1024, ";")) {
        if($first){
            $first = false;
            $cols = $row;
            continue;
        }
        $row_num = $row[0];
        $from = $row[1];
        $to = $row[2];
        $org_score = $row[3];
        $src_cons = $row[4];
        $trg_cons = $row[5];
        $count = $row[6];
        
        $key="from:".$from."&src:".$src_cons."&trg:".$trg_cons;
        $final_ret[$key][] = $row;
    }
    
    foreach($final_ret as $key=>&$arr){
        if($key == "from: &src:word-final&trg:"){
            $g = "g";
        }
        $total_per_key = 0;
        foreach($arr as $temp_element){
            $total_per_key += $temp_element[6];
        }
        foreach($arr as &$element){
            $element[6] = calcNewScore($element,$total_per_key);
            fputcsv($file_input, $element);
            
        }
        
    }
    return $final_ret;
}

function calcNewScore($element, $total){
    //     177; dh ;D ;2; word-start;; 1
    //     178; dh ;* ;9; word-start;; 332
    //     179; dh ; Z;2; word-start;; 28
    //     We have a total of 361 cases of dh at the beginning of words. 8% are Z, 0.2% are D and 91% are * - The scores then would be 8, 0.2 and 91 respectively.
    $perc = round($element[6]/($total/100)/10); // 12135 / (209429/100) / 10
    if($perc == 0 ){
        $g = "g";
    }
    
    return $perc;
}

function getFreq(){
    if (($handle = fopen("I:\\apps\\intuscan\\data\\nlp\\nameAnalyzer\\surnames.tsv", 'r')) === false) {
        die(print_r(error_get_last(),true));
    }
    
    
    $final_ret = [];
    $first = true;
    while ($row = fgetcsv($handle, 1024, "\t")) {
        $instance = $row[0];
        $country = $row[1];
        $occ = $row[2];
        $rank = $row[3];
        
        if(!empty($rank) && $rank < 50){
            write_to_log("TRACE", $instance . " ". $country);
            if(!in_array($country, $final_ret[$instance] ?? [])){
                $final_ret[$instance][] = $country;
            }
        }
    }
    $final = [];
    foreach($final_ret as $instance=>$countries_arr){
        write_to_log("TRACE", $instance . " ". count($countries_arr));
        if(count($countries_arr) > 30){
            $final["30"][] = $instance;
        }
        elseif(count($countries_arr) > 20){
            $final["20"][] = $instance;
            
        }
    }
    $json = json_encode($final, true);
    file_put_contents("./getFreqFN.json", $json);
    
    return $final_ret;
}

function literals(){
    
    if (($handle = fopen("C:\\Users\\galar\\OneDrive\\Desktop\\literals hack\\literals.txt", 'r')) === false) {
        die(print_r(error_get_last(),true));
    }
    
    $ret = [];
    $final_ret = [];
    $lang_group_arr = parseLangGroup();
    $first = true;
    while ($row = fgetcsv($handle, 1024, " ")) {
        if(count($row) < 8){
            continue;
        }
        $unicode = $row[0];
        
        $type = $row[1];
        
        
        $first_char = mb_substr($unicode, 0, 1);
        
        if($first_char == "/"){
            continue;
        }
        $group = explode(";", $row[4]);
        $letter = $row[3];
        if(empty($letter)){
            $empty_arr[] = $row;
            continue;
        }
        
        if(empty($ret[$unicode])){
            $ret[$unicode] = [];
        }
        $groups = array_map('strtoupper', $group);
        
        if($type == 'ascii' && $group[0] == 'ALL_LATIN'){
            continue;
        }
        
        foreach($groups as $gr){
            $eth_arr = $lang_group_arr[$gr] ?? [];
            $ret[$unicode] = array_merge($ret[$unicode],$eth_arr);
        }
        if(empty($ret[$unicode])){
            unset($ret[$unicode]);
        }
        else{
            $ret[$unicode] =  implode(";", $ret[$unicode]);
        }
        
    }
    $json = json_encode($ret, true);
    file_put_contents("./output.json", $json);
    return $json;
    
}



function parseLangGroup(){
    $file_res = file_get_contents("C:\\Users\\galar\\OneDrive\\Desktop\\literals hack\\language_groups.xml");
    
    $xml=simplexml_load_string($file_res);
    if(empty($xml)){
        return false;
    }
    $json = json_encode($xml);
    $group_array = json_decode($json,TRUE);
    
    $ret = [];
    foreach($group_array["group"] as $element){
        $group_name = $element["@attributes"]["name"];
        $ethnicities = getLanguagues($element["language"]);
        $ret[$group_name] = $ethnicities;
    }
    return $ret;
}

function getlanguagues($arr){
    $final_res = [];
    foreach($arr as $lang){
        $name = $lang["@attributes"]["name"] ?? $lang["name"];
        $final_res[] = $name;
    }
    return $final_res;
}





function compareDirs($new_dir,$old_dir, $output_dir){
    $old_files = scanAllDir($old_dir);
    $new_files = scanAllDir($new_dir);
    
    $new_final_arr = [];
    $old_final_arr = [];
    
    foreach ($new_files as $file){
        $new_final_arr =  array_unique(array_merge($new_final_arr, getInstanceLexicon($new_dir."\\".$file)));
    }
    foreach ($old_files as $file){
        $old_final_arr =  array_unique(array_merge($old_final_arr, getInstanceLexicon($old_dir."\\".$file)));
    }
    
    $diff_arr = [];
    foreach($old_final_arr as $instance=>$lexicon){
        if(!array_key_exists($instance, $new_final_arr)){
            $diff_arr[$instance] = $lexicon;
        }
    }
    
    if(!empty($diff_arr)){
        $file_input = fopen($output_dir."\\output.csv","w");
        foreach ($diff_arr as $instance=>$lexicon) {
            fputcsv($file_input, ["$instance \t $lexicon"]);
        }
        
        fclose($file_input);
    }
    
    
}

function scanAllDir($dir) {
    $result = [];
    foreach(scandir($dir) as $filename) {
        if ($filename[0] === '.') continue;
        $filePath = $dir . '/' . $filename;
        if (is_dir($filePath)) {
            foreach (scanAllDir($filePath) as $childFilename) {
                if(substr( $childFilename, 0, 5 ) === "names"){
                    $result[] = $filename . '/' . $childFilename;
                }
            }
        } else {
            if(substr( $filename, 0, 5 ) === "names"){
                $result[] = $filename;
            }
        }
    }
    return $result;
}

function compareXml($new_path,$old_path){
    
    $curr_arr = getInstanceLexicon($new_path);
    $old_arr = getInstanceLexicon($old_path);
    
    $diff_arr = [];
    foreach($old_arr as $instance=>$lexicon){
        if(!array_key_exists($instance, $curr_arr)){
            $diff_arr[$instance] = $lexicon;
        }
    }
    
    return $diff_arr;
    
    
    
}

function getInstanceLexicon($file){
    ini_set('memory_limit', '-1');
    set_time_limit(0);
    $file_res = file_get_contents($file);
    
    $file_xml =simplexml_load_string($file_res, null, LIBXML_NOCDATA);
    $file_json = json_encode($file_xml);
    $file_array = json_decode($file_json,TRUE);
    
    $first_key = array_keys($file_array['lentry'])[0];
    $arr_loop = [];
    if($first_key !== '@attributes'){
        $arr_loop = $file_array['lentry'];
    }
    else{
        $arr_loop = $file_array;
    }
    
    foreach($arr_loop as $id=>$arr){
        if(!array_key_exists('description', $arr)){
            $no_description = "1";
        }
        $lexical_entry =$arr['description'];
        
        if(!array_key_exists('semantic', $arr)){
            $no_semantic = "1";
        }
        foreach($arr['semantic'] as $semantic_elements){
            if(array_key_exists("@attributes", $semantic_elements)){
                $instance = $semantic_elements['instance']['@attributes']['name'];
                
                $new_arr[$instance] = $lexical_entry;
            }
            else{
                foreach($semantic_elements as $semantic_element){
                    $instance = $semantic_element['instance']['@attributes']['name'];
                    
                    $new_arr[$instance] = $lexical_entry;
                }
            }
            
        }
    }
    return $new_arr;
}


function mergeDB(){
    $lang = 'arabeezi';
    $db_name = "ivannotator_$lang";
    $connection = new mysqli("192.168.10.77", "root", "intuview", $db_name);
    $sql = "SELECT MAX(ID) AS id FROM $db_name.documents";
    $res = $connection->query($sql);
    
    $row = $res->fetch_object();
    $document_id = $row->id+1;
    
    $res = $connection->query("SELECT * FROM ivannotator.documents WHERE uploadLang='$lang'");
    
    while ($row = $res->fetch_object()){
        $arr = (array)$row;
        $curr_id = $arr["ID"];
        
        //get annotations data first
        $sql = "SELECT * FROM ivannotator.annotations WHERE docid=$curr_id";
        $res_annotation = $connection->query($sql);
        
        $anon_obj = $res_annotation->fetch_all(MYSQLI_ASSOC);
        //         if(empty($anon_obj)){
        //             continue;
        //         }
        foreach($anon_obj as &$obj){
            $obj["docId"] = $document_id;
        }
        $arr["ID"] = $document_id;
        $document_id++;
        
        //insert documents & assos
        $cols = implode(',', array_keys($arr));
        $values = implode("','", array_values($arr));
        $sql_insert = "INSERT INTO $db_name.documents ($cols) VALUES('$values')";
        $connection->query($sql_insert);
        
        foreach($anon_obj as &$obj){
            $cols = implode(',', array_keys($obj));
            $values = implode("','", array_values($obj));
            $sql_insert = "INSERT INTO $db_name.annotations ($cols) VALUES('$values')";
            $connection->query($sql_insert);
            
            
        }
        
    }
}




require_once 'analayzeFile.php';
// $res = getSubClasses("Concrete-relations");

function getSubClasses($concept){
    $final = [];
    $res = runKBQuery($concept);
    $xml=simplexml_load_string($res);
    if(empty($xml)){
        return $final;
    }
    $json = json_encode($xml);
    $array = json_decode($json,TRUE);
    echo $json;
    $arr = $array["Reply"]["KBQuery"]["Answer"]["SubConcepts"]["Concept"];
    foreach ($arr as $e){
        $val = $e["@attributes"]["name"];
        $final[] = $val;
        $final = array_merge($final, getSubClasses($val));
    }
    return $final;
}

runKBQuery("Person-object");

function runKBQuery($concept){
    $mysqli = sqlCreateConnection("getKbInstanceSlots");
    $intuscanWordHost = getSystemSettingsProp($mysqli, "intuscanWordHost");
    $intuscanWordPort = getSystemSettingsProp($mysqli, "intuscanWordPort");
    sqlClose($mysqli, "getKbInstanceSlots System stats");
    global $lang_override_arr;
    
    $filePath = "a:\\word.txt";
    //$word = "U.S.A";
    
    $contentType = "newfile";
    
    $socket = socket_create(AF_INET, SOCK_STREAM, SOL_TCP);
    if ($socket === false) {
        // echo "socket_create() failed: reason: " . socket_strerror(socket_last_error()) . "\n";
        //die ("\nserver down");
        return "";
    }
    
    $result = socket_connect($socket, $intuscanWordHost, $intuscanWordPort);
    if ($result === false) {
        write_to_log("ERROR", "socket_connect() failed. host: $intuscanWordHost and port: $intuscanWordPort");
        //echo "socket_connect() failed.\nReason: ($result) " . socket_strerror(socket_last_error($socket)) . "\n";
        //die ("\nserver down");
        return "";
    }
    
    $out = "<IVEnvelope><Request id=\"1\"><KBQuery><Type name=\"\">
    <Argument name=\"$concept\"></KBQuery></Request></IVEnvelope>";
    
    write_to_log("ERROR", $out);
    
    socket_write($socket, $out, strlen($out));
    
    write_to_log("TRACE", "document was sent to intuscan $intuscanWordHost:$intuscanWordPort");
    //allowing 3 sendDocEngAjx.php access IntuScan simultaneously might return the 3rd file after 18 minutes (6 minutes max per file), so we try to wait TWICE 600 sec (10mins+10mins)
    //set_time_limit(700); //must be higher than IntuScan timeout - set to unlimited in demosettings.php
    socket_set_option($socket, SOL_SOCKET, SO_RCVTIMEO, array("sec" => 600, "usec" => 0));
    $output = "";
    $begin_time = time();
    $completed_time = NULL;
    while (true) {
        $count = 0;
        $read = socket_read($socket, 1024);
        //write_to_log("INFO", strlen($read) . " bytes read from intuscan - " . $filePath);
        if ($read === false) {
            write_to_log("ERROR", "analyze_file - read = false ");
            socket_close($socket);
            return;
        }
        $output = $output . $read;
        $pos = strpos($output, "<Status>Completed</Status>");
        if (strpos($output, "<Status>Failed</Status>")) {
            write_to_log("ERROR", "analyze_file line 98 - UPDATE files set files.session='" . $output . "'  where id=" . $docId);
            socket_close($socket);
            return;
        }
        if ($pos !== false) {
            if ($completed_time == NULL) {
                $completed_time = time();
            }
            
            $ending = strpos($output, "</IVEnvelope>", $pos);
            if ($ending !== false) {
                break;
            }
        }
        if ((time() - $begin_time) > 900) {
            socket_close($socket);
            return "timeout";
        }
    }
    
    socket_close($socket);
    $lastEnvPosition = strrpos($output, "<IVEnvelope>");
    $msg = substr($output, $lastEnvPosition, strlen($output));
    write_to_log("ERROR", $msg);
    echo $msg;
    
    
}
function detectLang($codes, $supported){
    $match = $no_match = $not_supported= $too_low_score = $detect_null = 0;
    $path = "C:\\Users\\galar\\Downloads\\twitter_lang.csv";
    
    if (($handle = fopen($path, 'r')) === false) {
        die(print_r(error_get_last(),true));
    }
    $not_supported_arr = [];
    $index = 1;
    while ($row = fgetcsv($handle, 1024, "\t")) {
        $file_lang = trim($row[0]);
        $text = trim($row[1] ?? "");
        
        $curr = $codes[$file_lang];
        
        if(empty($text)){
            //         if(empty($text) || !in_array($curr, $supported)){
            continue;
        }
        $count_tokens = count(preg_split('/\s+/', $text));
        
        
        $index++;
        
        $ld = new Text_LanguageDetect();
        $detect_lang = $ld->detect($text, 1);
        $lang = key($detect_lang);
        if($lang=='pidigin'){
            $lang = "english";
        }
        if(empty($lang)){
            $detect_null++;
            continue;
        }
        $score = $detect_lang[$lang];
        if($score < 0.1){
            $too_low_score++;
            continue;
        }
        $detect_lang_final = lang_mapper($lang) ?? $lang;
        $iso_code = array_search($detect_lang_final, $codes); // $key = 2;
        
        if($iso_code==$file_lang){
            $match++;
        }
        else{
            $curr = $codes[$file_lang];
            if(in_array($curr, $supported)){
                $no_match++;
            }
            else{
                if(!in_array($lang,$not_supported_arr)){
                    $not_supported_arr[] = $lang;
                    write_to_log("ERROR", "Not supported: " .$lang);
                }
                $not_supported++;
                
            }
            
            
        }
        if($index%10==0){
            write_to_log("TRACE", "match: $match, no_match: $no_match, too_low: $too_low_score, not_supported: $not_supported, detect_null: $detect_null");
        }
        if($index%1000==0){
            $check = "$";
        }
    }
    
    
}



function compressDayFiles($dir, $curr_date){
    if(!is_dir($dir.DIRECTORY_SEPARATOR."compressed")){
        mkdir_full($dir.DIRECTORY_SEPARATOR."compressed", 0777);
    }
    $zip_path = $dir.DIRECTORY_SEPARATOR."compressed".DIRECTORY_SEPARATOR."$curr_date.7z";
    $zip = new ZipArchive();
    $res = $zip->open($zip_path, ZIPARCHIVE::CREATE | ZIPARCHIVE::OVERWRITE);
    var_dump($res);
    if ($res !== TRUE) {
        die ("An error occurred creating your ZIP file.");
    }
    
    foreach(scandir($dir) as $f){
        $file_parts = pathinfo($f);
        
        if(is_dir($f) || $f == '.' || $f == '..' || endsWith($f, '.7z')){
            continue;
        }
        if(strpos($f, $curr_date) === 0){
            $zip->addFile($dir.DIRECTORY_SEPARATOR.$f,$f);
            unlink($dir.DIRECTORY_SEPARATOR.$f);
        }
        
    }
    $zip->close();
}

function startsWith( $haystack, $needle ) {
    $length = strlen( $needle );
    return substr( $haystack, 0, $length ) === $needle;
}

function removecol(){
    $csv_ukrain = 'C:\Users\galar\Downloads\keywords.txt';
    if (($handle = fopen($csv_ukrain, 'r')) === false) {
        die(print_r(error_get_last(),true));
    }
    $new_ukrain ="";
    $index = 1;
    while ($row = fgetcsv($handle, 1024, "\t")) {
        $new_ukrain.= $row[1].",".$row[2];
        if(strlen($new_ukrain) > 5000){
            file_put_contents("output_$index.txt", $new_ukrain);
            $new_ukrain = "";
            $index++ ;
        }
        
    }
    if(!empty($new_ukrain))
        file_put_contents("output_$index.txt", $new_ukrain);
        
}

function mergeCoord(){
    $csv_new = "C:\Users\galar\Downloads\coord.txt";
    $current = "C:\\xampp\\htdocs\\galgalanda\\allInOneWeb\\files\\coordinates.json";

    
    $current_data = json_decode(file_get_contents($current,true),true);
    
    
    
    
    if (($handle = fopen($csv_new, 'r')) === false) {
        die(print_r(error_get_last(),true));
    }
    
    $i = 0;
    $new_data = [];
    $first = true;
    while ($row = fgetcsv($handle, 1024, "\t")) {
        
        if($first){
            $first = false;
            continue;
        }
        if(array_key_exists($row[0], $current_data)){
            continue;
        }
        $current_data[$row[0]] = [doubleval($row[1]),doubleval($row[2])];
    }
    ksort($current_data);
    
    file_put_contents("C:\\xampp\\htdocs\\galgalanda\\allInOneWeb\\files\\coordinates_updated.json", json_encode($current_data,true));
}

// icl();

function getNewObj($row, &$final_res){
    $row_arr = (array)$row;
    $new_arr = [];
    $new_arr['ref'] = $row_arr['Ref'];
    $new_arr['Matnr'] = $row_arr['Matnr'];
    $new_arr['MPN_org'] = $row_arr['MPN_org'];
    $new_arr['MPN_no'] = $row_arr['MPN_no'];
    $new_arr['MPN_heb'] = $row_arr['MPN_heb'];
    $new_arr['Manufacture_new'] = $row_arr['Manufacture_new'];
    $new_arr['LongHeb_new'] = $row_arr['LongHeb_new'];
    $new_arr['ShortEng_new'] = $row_arr['ShortEng_new'];
    $new_arr['LongEng_new'] = $row_arr['LongEng_new'];
    $new_arr['LongEng_new'] = $row_arr['LongEng_new'];
    $new_arr['Possible_Match'] = "";//$row_arr['Possible_Match'];
    
    
    //     Ref = Original DB - DONE
    //     Matnr = SAP Mat. Nr. DONE
    //     MPN_new - ONLY the English without numbers that are separate - DONE
    //     MPN_no - ONLY the SEPARATED numbers  - DONE
    //     MPN-heb - ONLY with Hebrew and with associated numbers - DONE
    //     Manufacture_new - all the values in Manufacture (they are all catalog numbers) except the Hebrew - the Hebrew are Manufacturer names and go to - DONE
    //     ShortHeb_new - ONLY Hebrew and the associated English - DONE
    //     LongHeb_new-ONLY Hebrew and the associated English
    //     ShortEng_new - ONLY without Hebrew - DONE
    //     LongEng_new- ONLY without Hebrew
    //     Possible_Match (Boolean)
    
    //     //Manufacture
    $MPN_org = $row_arr['MPN_org'];
    if(!empty($MPN_org)){
        
        $eng = [];
        $heb = [];
        
        preg_match_all('/[a-zA-Z-0-9 ]/', $MPN_org, $eng);
        preg_match_all( "/[\\x{0590}-\\x{05FF}]+/u", $MPN_org, $heb );
        
        if((is_array($eng[0]) && !empty($eng[0][0]))){
            $mpn_str = implode('', $eng[0]);
            $mpn_arr = explode(' ', $mpn_str);
            
            $new_mpn = $new_mpn_num = [];
            foreach ($mpn_arr as $mpn_element){
                if (!preg_match('/[A-Za-z]/', $mpn_element) && preg_match('/[0-9]/', $mpn_element))
                {
                    if(strlen($mpn_element) > 4){
                        $new_mpn_num[] = $mpn_element;
                        continue;
                    }
                    
                }
                else{
                    $new_mpn[] = $mpn_element;
                }
            }
            if(!empty($new_mpn)){
                $new_arr['MPN_new'] = implode(' ', $new_mpn);
            }
            if(!empty($new_mpn_num)){
                $new_arr['MPN_num'] = implode(' ', $new_mpn_num);
                
            }
        }
        elseif((is_array($heb[0]) && !empty($heb[0][0]))){
            $new_arr['MPN_heb'] = $heb[0][0];
        }
        
    }
    
    /**
     * Manufacture_new
     */
    $Manufacture_org = $row_arr['Manufacture_org'];
    if(!empty($Manufacture_org)){
        $eng = [];
        $heb = [];
        
        preg_match_all('/[a-zA-Z-0-9 ]/', $Manufacture_org, $eng);
        preg_match_all( "/[\\x{0590}-\\x{05FF}]+/u", $Manufacture_org, $heb );
        if((is_array($eng[0]) && !empty($eng[0][0]))){
            $new_arr['Manufacture_new'] = implode('', $eng[0]);
        }
    }
    
    /**
     * ShortHeb_new
     */
    $ShortHeb_org = $row_arr['ShortHeb_new'];
    if(!empty($ShortHeb_org)){
        $eng = [];
        $heb = [];
        
        preg_match_all('/[a-zA-Z-0-9 ]/', $ShortHeb_org, $eng);
        preg_match_all( "/[\\x{0590}-\\x{05FF}]+/u", $ShortHeb_org, $heb );
        if((is_array($heb[0]) && !empty($heb[0][0]))){
            $new_arr['ShortHeb_new'] = $heb[0][0];
        }
    }
    
    /**
     * ShortEng_new
     */
    $ShortEng_new = $row_arr['ShortEng_new'];
    if(!empty($ShortEng_new)){
        $eng = [];
        $heb = [];
        
        preg_match_all('/[a-zA-Z-0-9 ]/', $ShortEng_new, $eng);
        preg_match_all( "/[\\x{0590}-\\x{05FF}]+/u", $ShortEng_new, $heb );
        if((is_array($eng[0]) && !empty($eng[0][0]))){
            $new_arr['ShortEng_new'] = implode('', $eng[0]);
        }
    }
    
    /**
     * longEng_new
     */
    $LongEng_new = $row_arr['LongEng_new'];
    if(!empty($LongEng_new)){
        $eng = [];
        $heb = [];
        
        preg_match_all('/[a-zA-Z-0-9 ]/', $LongEng_new, $eng);
        preg_match_all( "/[\\x{0590}-\\x{05FF}]+/u", $LongEng_new, $heb );
        if((is_array($eng[0]) && !empty($eng[0][0]))){
            $new_arr['LongEng_new'] = implode('', $eng[0]);
        }
    }
    
    /**
     * longHeb_new
     */
    $LongEng_new = $row_arr['LongHeb_new'];
    if(!empty($LongEng_new)){
        $eng = [];
        $heb = [];
        
        preg_match_all('/[a-zA-Z-0-9 ]/', $LongEng_new, $eng);
        preg_match_all( "/[\\x{0590}-\\x{05FF}]+/u", $LongEng_new, $heb );
        if((is_array($heb[0]) && !empty($heb[0][0]))){
            $new_arr['LongHeb_new'] = $heb[0][0];
        }
    }
    
    
    $new_arr = array_map('trimthat', $new_arr);
    
    $final_res[] = $new_arr;
    
    
    
}

function trimthat($element){
    $element = trim($element);
    return trim($element,'* ');
    
}

function cleanArr($arr){
    foreach($arr as &$element){
        $element = trim($element);
        
    }
    return $arr;
}

function intersection_str($a, $b)
{
    $result = '';
    $len = strlen($a) > strlen($b) ? strlen($b) : strlen($a);
    for($i=0; $i<$len; $i++)
    {
        if(substr($a, $i, 1) == substr($b, $i, 1))
        {
            $result .= substr($a, $i, 1);
        }
        else
        {
            break;
        }
    }
    $result = trim($result);
    if($result=="*"){
        return "";
    }
    return $result;
}




function icl(){
    $connection = new mysqli("127.0.0.1", "root", "intuview", "icl");
    $query = "SELECT * FROM 150222_final_table";
    $res = $connection->query($query);
    $final_res = [];
    while ($row = $res->fetch_object()) {
        $new_obj = getNewObj($row,$final_res);
    }
    $columns = implode("' ", array_keys($final_res[0]));
    
    //     foreach ($final_res as $array) {
    //         $sql  = "INSERT INTO 270222_final_table";
    //         $sql .= " (`".implode("`, `", array_keys($array))."`)";
    //         $sql .= " VALUES ('".implode("', '", $array)."') ";
    //         $res = $connection->query($sql);
    //     }
    
    return $final_res;
    
}

function contains_gibberish( $input )
{
    $result = array();
    
    for($i = 0; $i < strlen( $input ); $i++)
    {
        if ( isset( $result[ $input[ $i ] ] ) )
        {
            $result[ $input[ $i ] ]++;
        } else {
            $result[ $input[ $i ] ] = 1;
        }
    }
    
    return ( max( $result ) / strlen( $input ) * 100 >= 33 ) ? true : false;
}



function getNewObjObs($row, &$final_res){
    $res = ["ref"=>$row->ref, "Matnr" => $row->Matnr];
    
    $arr = ["MPN",
        "Manufacture",
        "ShortHebDesc",
        "ShortEngDesc",
        "HEB_LNG_TXT",
        "ENG_LNG_TXT"];
    
    foreach($arr as $col){
        $val = $row->$col ?? "";
        $heb = []; $eng = []; $num = [];
        preg_match_all('/[0-9]/', $val, $num );
        preg_match_all( "/[\\x{0590}-\\x{05FF} ]+/u", $val, $heb );
        preg_match_all('/[a-zA-Z ]/', $val, $eng);
        
        
        $res = array_merge($res,[$col."_org"=>$val, $col."_num"=>implode('', $num[0]), $col."_heb"=>implode('', $heb[0]), $col."_eng"=>implode('', $eng[0])]);
        
        
    }
    
    //SHORTS
    
    $ShortHebDesc = $res["ShortHebDesc_org"];
    $ShortEngDesc = $res["ShortEngDesc_org"];
    
    $ShortEngDesc_eng = $res["ShortHebDesc_eng"];
    $ShortEngDesc_heb = $res["ShortHebDesc_heb"];
    $ShortHebDesc_eng = $res["ShortEngDesc_eng"];
    $ShortHebDesc_heb = $res["ShortEngDesc_heb"];
    
    if($ShortHebDesc_eng==$ShortEngDesc ){
        unset($res["ShortEngDesc_eng"]);
    }
    
    if($ShortHebDesc == $ShortEngDesc){
        //if no hebrew in heb_col, and not hebrew in eng_col , and there is english in one of this cols:
        //unset the hebrew column!
        if((empty($ShortEngDesc_heb) && empty($ShortHebDesc_heb)) && !(empty($ShortEngDesc_eng) && empty($ShortHebDesc_eng))){
            write_to_log("TRACE", "unset ShortHebDesc_org. ref: $res[ref] & matnr: $res[Matnr]");
            unset($res["ShortHebDesc_org"]);
        }
        //if no english in heb_col, and not english in eng_col , and there is hebrew in one of this cols:
        //unset the english column!
        if(!(empty($ShortEngDesc_heb) && empty($ShortHebDesc_heb)) && (empty($ShortEngDesc_eng) && empty($ShortHebDesc_eng))){
            
            write_to_log("TRACE", "unset ShortEngDesc_org.  ref: $res[ref] & matnr: $res[Matnr]");
            
            unset($res["ShortEngDesc_org"]);
        }
    }
    else{
        //When ShortEnglish is empty and ShortHebrew has only English - to move to ShortEnglish
        if(empty($ShortEngDesc) && empty($ShortHebDesc_heb) && !empty($ShortHebDesc_eng)){
            $res["ShortEngDesc_org"] = $res["ShortHebDesc_org"];
            unset($res["ShortHebDesc_org"]);
        }
        //When ShortHebrew is empty and ShortEnglish has Hebrew (at all) to move to ShortHebrew
        if(empty($ShortHebDesc) && !empty($ShortEngDesc_heb)){
            $res["ShortHebDesc_org"] = $res["ShortEngDesc_org"];
            unset($res["ShortEngDesc_org"]);
        }
    }
    
    $intersect = intersection_str($ShortEngDesc,$ShortHebDesc);
    
    $res["shortDesc_mutual"] = $intersect;
    
    
    //LONG
    
    $longHebDesc = $res["HEB_LNG_TXT_org"];
    $longEngDesc = $res["ENG_LNG_TXT_org"];
    
    $longHebDesc_eng = $res["HEB_LNG_TXT_eng"];
    $longHebDesc_heb = $res["HEB_LNG_TXT_heb"];
    $longEngDesc_eng = $res["ENG_LNG_TXT_eng"];
    $longEngDesc_heb = $res["ENG_LNG_TXT_heb"];
    
    if($longHebDesc == $longEngDesc){
        //if no hebrew in heb_col, and not hebrew in eng_col , and there is english in one of this cols:
        //unset the hebrew column!
        if((empty($longHebDesc_heb) && empty($longEngDesc_heb)) && !(empty($longHebDesc_eng) && empty($longEngDesc_eng))){
            write_to_log("TRACE", "unset ENG_HEB_TXT_org.  ref: $res[ref] & matnr: $res[Matnr]");
            
            unset($res["ENG_HEB_TXT_org"]);
        }
        if(!(empty($longHebDesc_heb) && empty($longEngDesc_heb)) && (empty($longHebDesc_eng) && empty($longEngDesc_eng))){
            //if no english in heb_col, and not english in eng_col , and there is hebrew in one of this cols:
            //unset the english column!
            write_to_log("TRACE", "unset ENG_LNG_TXT_org.  ref: $res[ref] & matnr: $res[Matnr]");
            
            unset($res["ENG_LNG_TXT_org"]);
        }
        
    }
    else{
        //When ShortEnglish is empty and ShortHebrew has only English - to move to ShortEnglish
        if(empty($longEngDesc) && empty($longHebDesc_heb) && !empty($longHebDesc_eng)){
            $res["ENG_LNG_TXT_org"] = $res["HEB_LNG_TXT_org"];
            unset($res["HEB_LNG_TXT_org"]);
        }
        //When ShortHebrew is empty and ShortEnglish has Hebrew (at all) to move to ShortHebrew
        if(empty($longHebDesc) && !empty($longEngDesc_heb)){
            $res["HEB_LNG_TXT_org"] = $res["ENG_LNG_TXT_org"];
            unset($res["ENG_LNG_TXT_org"]);
        }
    }
    
    $intersect = intersection_str($longEngDesc,$longHebDesc);
    $res["longText_mutual"] = $intersect;
    
    //MPNS
    $mpn = $res["MPN_org"];
    $MPN_eng = $res["MPN_eng"];
    $MPN_heb = $res["MPN_heb"];
    
    if(empty($MPN_eng) && !empty($MPN_heb)){
        write_to_log("TRACE", "unset MPN_eng.  ref: $res[ref] & matnr: $res[Matnr]");
        
        unset($res["MPN_eng"]);
    }
    if(empty($MPN_eng) && !empty($MPN_heb)){
        write_to_log("TRACE", "unset MPN_heb.  ref: $res[ref] & matnr: $res[Matnr]");
        
        unset($res["MPN_heb"]);
    }
    
    
    //Manufacture
    $Manufacture = $res["Manufacture_org"];
    $Manufacture_eng = $res["Manufacture_eng"];
    $Manufacture_heb = $res["Manufacture_heb"];
    
    if(empty($Manufacture_eng) && !empty($Manufacture_heb)){
        write_to_log("TRACE", "unset Manufacture_eng");
        
        unset($res["Manufacture_eng"]);
    }
    if(empty($Manufacture_eng) && !empty($Manufacture_heb)){
        write_to_log("TRACE", "unset Manufacture_heb");
        
        unset($res["Manufacture_heb"]);
    }
    
    $final_res[] = $res;
    
}


// calc("C:\\Users\\galar\\OneDrive\\Desktop\\Names Task\\given-names.tsv");

function calc($path){
    if (($handle = fopen($path, 'r')) === false) {
        die(print_r(error_get_last(),true));
    }
    
    $names = [];
    $first = true;
    
    $i = 0;
    
    $AR_arr = $total_arr = $final_res = [];
    
    while ($row = fgetcsv($handle, 1024, "\t")) {
        //         $i++;
        //         if($i > 1000){
        //             break;
        //         }
        if($first){
            $first = false;
            continue;
        }
        $name = $row[0];
        $rank = $row[2];
        
        $total_arr[$name]["total"] += $rank;
        $total_arr[$name]["count"]++;
    }
    
    fclose($handle);
    
    foreach($total_arr as $name=>$element){
        $AR_arr[$name] = $element["total"]/$element["count"];
    }
    
    //SD of the name is = square root of 1/N*([RankA-AR]squared + ([RankB-AR]square +… + ([RankN-AR]squared
    if (($handle = fopen($path, 'r')) === false) {
        die(print_r(error_get_last(),true));
    }
    $temp_results = [];
    $first = true;
    $i = 0;
    while ($row = fgetcsv($handle, 1024, "\t")) {
        if($first){
            $first = false;
            continue;
        }
        //         $i++;
        //         if($i > 1000){
        //             break;
        //         }
        $name = $row[0];
        $rank = $row[2];
        $curr_AR = $AR_arr[$name];
        $calc = $rank-$curr_AR;
        $temp_results[$name] += pow(2, $calc);
    }
    
    foreach ($temp_results as $name=>$element){
        $count = $total_arr[$name]["count"];
        
        $tmp = $temp_results[$name]*$count;
        $final_res[] = ["name"=> $name, "value"=>sqrt(1 / $tmp)];
    }
    usort($final_res, function ($item1, $item2) {
        return $item1["value"] <=> $item2["value"];
    });
        $json_data = json_encode($final_res);
        
        
        
        file_put_contents("C:\\Users\\galar\\OneDrive\\Desktop\\Names Task\\output.json", $json_data);
        return $final_res;
        
        
}














function filterList($path){
    if (($handle = fopen($path, 'r')) === false) {
        die(print_r(error_get_last(),true));
    }
    $output = dirname($path).DIRECTORY_SEPARATOR."filtered.csv";
    $fp = fopen($output, 'wb');
    $names = [];
    $mysqli = sqlCreateDBConnection("rdfstore");
    $i = 1;
    while ($row = fgetcsv($handle, 1024, "\t")) {
        $name = $row[0];
        if(!in_array($name, $names)){
            $names[] = $name;
        }
        else{
            echo $name;
        }
        
        
    }
    
    fputcsv($fp, $names, "\n");
    fclose($fp);
    
    echo "DONE!";
    echo $path;
}

function applyNames($path){
    if (($handle = fopen($path, 'r')) === false) {
        die(print_r(error_get_last(),true));
    }
    
    $mysqli = sqlCreateDBConnection("rdfstore");
    $i = 1;
    while ($row = fgetcsv($handle, 1024, "\t")) {
        $instance = $row[0];
        echo "Running on $instance - number $i\n";
        $i++;
        $row = "";
        $res = sqlQuery($mysqli, "SELECT instance FROM instances WHERE label=\"$instance\" limit 1");
        $row = $res->fetch_object();
        if(!empty($row)){
            $qname = urlencode($row->instance);
            //             $url = "http://localhost/ivcontentm/updateEng.php?queryType=insertnewtype&typeLabel=Given-NAME&qname=$qname";
            $url = "http://192.168.10.110/ivcontentm/updateEng.php?queryType=insertnewtype&typeLabel=Family-name&qname=$qname";
            $xml = file_get_contents($url);
            
            
        }
        
        
    }
    echo "DONE!";
    echo $path;
}

function extractFB(){
    $mysqli = null;
    createCrawlerConnection($mysqli_c);
    $mysqli = sqlCreateConnection("na");
    
    //     $sql = "SELECT * FROM facebook limit $start, 100000";
    $uresult = $mysqli->query("SELECT * FROM facebook", MYSQLI_USE_RESULT);
    
    
    
    //     //C:\Users\galar\Downloads\facebook_posts
    //     if (($handle = fopen("C:\\Users\\galar\\Downloads\\facebook_posts\\facebook_posts.csv", 'r')) === false) {
    //         die('Error opening file');
    //     }
    //     $headers = fgetcsv($handle, 1024, ',');
    //     $headers = array_map('strtolower', $headers);
    $posts_tracker = array();
    $headers = [];
    $first = true;
    $comments_counter = 11100000;
    //     while ($row = fgetcsv($handle, 1024, ',')) {
    while ($row = $uresult->fetch_assoc()) {
        $data = $res = $res_comment = null;
        if($first){
            $first = false;
            continue;
        }
        //         if(empty($headers)){
        //             $headers =  array_keys((array)$row);
        //         }
        $data = (array)$row;
        $data = array_change_key_case($data);
        
        if(17 !== count((array)$row)){
            continue;
        }
        
        //check if the main post in the post tracker - if not add it first
        $unique_key = $data["competitor_name"].$data['date'];
        if(!array_key_exists($unique_key, $posts_tracker)){
            $posts_tracker[$unique_key] = $data["id"];
            $res = array("t_id" => $data["id"],
                "user_name" => $data["competitor_name"],
                "facebook_page" => $data["facebook_page"],
                "no_comment" => $data["no_comment"],
                "like" => $data["like"],
                "love" => $data["love"],
                "haha" => $data["haha"],
                "wow" => $data["wow"],
                "sad" => $data["sad"],
                "angry" => $data["angry"],
                "number_of_shared" => $data["number_of_shared"],
                "text" => $data["post"]
            );
            $bulk_values[] = create_vals($mysqli, $res);
        }
        
        $res_comment = array("t_id" =>$comments_counter++,
            "user_name" => $data["commenter_name"],
            "date" => $data["comment_date"],
            "text" => $data["comments"],
            "facebook_page" => $data["facebook_page"],
            "t_parent_id" => $posts_tracker[$unique_key]
        );
        
        
        $bulk_values[] = create_vals($mysqli, $res_comment, $posts_tracker[$unique_key]);
        
        
        $count_bulk = count($bulk_values);
        if($count_bulk > 0 && $count_bulk % 1000 == 0){
            $bulk_values_json = join(' , ', $bulk_values);
            $query = "INSERT into raw_stream_data(element_id, source, data) VALUES $bulk_values_json"; //source is 1 for twitter
            sqlQueryUnique($mysqli_c, $query,"Duplicate"); //will insert and prevent looping with this extra flag - the error of duplicated
            $bulk_values = [];
            
        }
        
        
        
    }
    if(!empty($bulk_values)){
        $bulk_values_json = join(' , ', $bulk_values);
        $query = "INSERT into raw_stream_data(element_id, source, data) VALUES $bulk_values_json"; //source is 1 for twitter
        sqlQueryUnique($mysqli_c, $query,"Duplicate"); //will insert and prevent looping with this extra flag - the error of duplicated
        $bulk_values = [];
        
    }
    $uresult->close();
    
}

function create_vals($mysqli, $data, $parent_data = null){
    $status_in_json = json_encode($data, JSON_UNESCAPED_UNICODE );
    $value = $mysqli->real_escape_string($status_in_json);
    
    
    return "($data[t_id], 2, '$value')"; //source is 1 for twitter
    //     sqlClose($mysqli, 'cli_filter');
}

function fetchSitesStates($sites_csv){
    $final_res = [];
    if (($handle = fopen($sites_csv, 'r')) === false) {
        die('Error opening file');
    }
    $headers = fgetcsv($handle, 1024, ',');
    $complete = array();
    
    while ($row = fgetcsv($handle, 1024, ',')) {
        $state = strtolower($row[0]);
        $urlData = parse_url($row[2]);
        $host = str_replace('www.', '', $urlData['host']);
        if(empty($state) && empty($host)){
            continue;
        }
        $final_res[$state][] = $host;
    }
    return $final_res;
}

function parseICL(){
    $path = './files/ZIN';
    $i = 0;
    $file = './files/ZIN.csv';
    if (($handle = fopen($file, 'r')) === false) {
        die('Error opening file');
    }
    
    $headers = fgetcsv($handle, 1024, ',');
    foreach($headers as &$head){
        $head = preg_replace('/[\x00-\x1F\x80-\xFF]/', '', $head);
    }
    $complete = array();
    
    while ($row = fgetcsv($handle, 1024, ',')) {
        $data = array_combine($headers, $row);
        $data["date"] = date("d-m-Y", strtotime($data["date"]));
        
        $i++;
        
        $json_data = json_encode($data);
        file_put_contents($path.DIRECTORY_SEPARATOR."report_$i.json", $json_data);
    }
    
    fclose($handle);
    
    echo json_encode($complete);
}

// createJsonsBLeumi();
function createJsonsBLeumi(){
    $list_names = array(
        "Eitan Ginsberg",
        "Jore Breuer",
        "Gershom Filipowski",
        "Yaphet Pollack",
        "Amiel Worms",
        "Iosep Jung",
        "Gedalyahu Hefetz",
        "Seanan Kantorowitsch",
        "Michel Grossman",
        "Gurion Munk"
    );
    
    $path = './files/b_leumi';
    $org_file = $path . DIRECTORY_SEPARATOR . "b_leumi.json";
    
    $string = file_get_contents($org_file);
    if ($string === false) {
        // deal with error...
    }
    
    $json_a = json_decode($string, true);
    
    for($i = 0; $i < 1000; $i++){
        $temp_json = $json_a;
        
        $temp_json["referent name"] = $list_names[rand(0,count($list_names)-1)];
        $temp_json["client name"] = $list_names[rand(0,count($list_names)-1)];
        $temp_json["banker in charge"] = $list_names[rand(0,count($list_names)-1)];
        
        
        
        $temp_json["request number"] = rand (  100 , 10000  );
        $temp_json["branch id"] = rand (  1 , 1000  );
        $temp_json["account"] = rand (  1 , 11111  );
        $temp_json["client id"] = rand (  1000000 , 9999999  );
        $temp_json["referent code"] = rand (  1 , 20  );
        $temp_json["date of request"] = mt_rand(1, time());
        $temp_json["date of update"] = mt_rand(1, time());
        $temp_json["date of closing"] = mt_rand(1, time());
        
        
        $json_data = json_encode($temp_json);
        file_put_contents($path.DIRECTORY_SEPARATOR.$i.'_bank.json', $json_data);
        
    }
}

function runAnalyzeFile()
{
    $content = file_get_contents("C:/tmp/hamas.txt");
    analyzeFileMadeSimple($content, "ivlab7", 6000);
}

function createXmlFiles()
{
    require_once("demosettings.php");
    
    $path = "c:\\tmp\\Yandex_text\\xml\\";
    
    $spanDom = new DOMDocument();
    $xpathDom = null;
    
    // loading
    $mysqli = new mysqli($databaseHost, $databaseUser, $databasePass, $databaseName);
    $sql = "SELECT id FROM files";
    $res = $mysqli->query($sql);
    while ($row = $res->fetch_object()) {
        $dcid = $row->id;
        $sql = "SELECT * FROM files WHERE id=" . $dcid;
        $res_id = $mysqli->query($sql);
        $row_id = $res_id->fetch_object();
        $filename = explode(".txt", $row_id->filename);
        
        file_put_contents($path . $filename[0] . ".xml", $row_id->results);
        
        //insertRDF($content);
    }
}

function createOneRdf()
{
    require_once("demosettings.php");
    $dcid = $_REQUEST["dcid"];
    
    $path = "c:\\tmp\\Yandex_text\\";
    $filename = "result.txt";
    
    $spanDom = new DOMDocument();
    $xpathDom = null;
    
    // loading
    $mysqli = new mysqli($databaseHost, $databaseUser, $databasePass, $databaseName);
    $sql = "SELECT id FROM files";
    $res = $mysqli->query($sql);
    while ($row = $res->fetch_object()) {
        $dcid = $row->id;
        $sql = "SELECT results FROM files WHERE id=" . $dcid;
        $res_id = $mysqli->query($sql);
        $row_id = $res_id->fetch_object();
        $blob =  $row_id->results;
        //$dcid = $number;
        //die(print_r($blob));
        $xml = new DOMDocument();
        $loadRes = $xml->loadXml(utf8_encode($blob));
        $xpathXml = new DOMXpath($xml);
        
        $tripples = $xml->getElementsByTagName("FileTriples")->item(0);
        
        $aggr = $tripples->getElementsByTagName("AggregatedRDF")->item(0);
        $content = $aggr->firstChild;
        $temp = explode("<rdf:RDF xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\" xmlns:rdfs=\"http://www.w3.org/2000/01/rdf-schema#\" xmlns:iv=\"http://www.intuview.com/ontology#\">", $content->wholeText);
        $t = explode("</rdf:RDF>",$temp[1]);
        file_put_contents($path . $filename, $t[0], FILE_APPEND);
        //insertRDF($content);
        
        $digest = $tripples->getElementsByTagName("DigestRDF")->item(0);
        $content = $digest->firstChild;
        $temp = explode("<rdf:RDF xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\" xmlns:rdfs=\"http://www.w3.org/2000/01/rdf-schema#\" xmlns:iv=\"http://www.intuview.com/ontology#\">", $content->wholeText);
        $t = explode("</rdf:RDF>",$temp[1]);
        file_put_contents($path . $filename, $t[0], FILE_APPEND);
        //insertRDF($content);
    }
}

//phpinfo();
function splitFiles()
{
    $path = "c:\\tmp\\Yandex\\";
    $handle = fopen($path . "texts.txt", "r");
    $filename = "Document 0";
    if ($handle) {
        $continue = true;
        while ($continue) {
            $stop = false;
            //read meta data
            while ((($buffer = fgets($handle, 4096)) !== false) && ($stop == false)) {
                $check = explode("CHARSET=", $buffer);
                if (sizeof($check) > 1) {
                    $stop = true;
                }
            }
            $stop = false;
            while ((($buffer = fgets($handle, 4096)) !== false) && ($stop == false)) {
                $check = explode("~~~~~~", $buffer);
                if (sizeof($check) > 1) {
                    $stop = true;
                    $filename = $check[1];
                }else{
                    file_put_contents($path . $filename . ".txt", $buffer, FILE_APPEND);
                }
            }
            if ($stop != true){
                $continue = false;
            }
        }
        if (!feof($handle)) {
            echo "Error: unexpected fgets() fail\n";
        }
        fclose($handle);
    }else
        print_r($handle);
        echo "error";
}


function fb(){
    $file = fopen('C:\\Users\\galar\\Downloads\\fb6.csv', 'r');
    $first = true;
    $cols = [];
    $i = 0;
    $missed = 0;
    while ($row = fgetcsv($file, 1024, ",")) {
        if($first){
            $first = false;
            $cols = $row;
            array_shift($cols);
            $cols = array_merge(['id'],$cols);
            foreach($cols as &$col){
                $col = strtolower($col);
            }
            
            continue;
            
        }
        if(count($row) !== count($cols)){
            echo $missed."\n";
            $missed++;
            continue;
        }
        $row = array_combine($cols, $row);
        
        $row['text'] = $row['post'];
        unset($row['post']);
        $row['t_type'] = 'facebook';
        $i++;
        file_put_contents("C:\\Users\\galar\\Downloads\\output6\\$i.json", json_encode($row, true));
        
    }
}
?>
