<?php

include_once("demosettings.php");
include_once ("sqlTable.php");
include_once 'write_log.php';

class PredictiveProcessClass{
    private $mysqli;
    private $username;
    private $isAdmin;
    private $type;
    private $total_docs;
    private $total_predictive_docs;
    private $saved_search_id;
    private $predictive_search_docid;
    private $selectedLeafs;
    private $mand_fields;
    private $predictiveSaveSearch;
    private $selectedFolder;
    
    function __construct($mysqli, $type, $saved_search_id, $predictive_search_docid, $extradata_pred, $username, $isAdmin){ 
        $this->mysqli = $mysqli;
        $this->username = $username;
        $this->isAdmin = $isAdmin;
        $this->type = $type;
        $this->saved_search_id = $saved_search_id;    
        $this->predictive_search_docid = $predictive_search_docid;
        $this->total_docs = self::getNumDocsFromSavedSearch($this->saved_search_id);

        if(!empty($extradata_pred)){
            if(array_key_exists('selectedLeafs', $extradata_pred)){
                $this->selectedLeafs = $extradata_pred['selectedLeafs'];
            }
            if(array_key_exists('mandatoryLeafs', $extradata_pred)){
                $predictive_mand_fields = $extradata_pred['mandatoryLeafs'];
                $this->mand_fields = checkMandFields($this->mysqli, $predictive_mand_fields);
            }
            if(array_key_exists('selectedSavedSearch', $extradata_pred)){
                $this->predictiveSaveSearch = $extradata_pred['selectedSavedSearch'];
            }
            if(array_key_exists('selectedFolder', $extradata_pred)){
                $this->selectedFolder = $extradata_pred['selectedFolder'];
            }
        }
       
  
    }
    
    
    /**
     * getNumDocsFromSavedSearch - gets the # of docs in the savedSearchId docs corpus
     */
    function getNumDocsFromSavedSearch($id) : int{
        $sql = "SELECT count(docid) as c FROM savedsearchid WHERE id=$id";
        $res = sqlQuery($this->mysqli, $sql);
        if(($row = $res->fetch_object()) != false){
            return intval($row->c);
        }
        return 0;
    }
    
    /**
     * search_process - its the main function
     */
    function search_process($mysqli, $total_files){
        //0 - get the number of docs in savedsearchid corpus
        
        //1 - get List of features (all concepts/instances on site)
        $listClass = new GetListOfFeaturesAll($mysqli, $this->isAdmin, $this->username);
        $feature_list = $listClass->getFeaturesList($this->selectedLeafs);
        
        //2 - get the model fro the search docids
        $modelClass = new GetIdfModelClass($mysqli, $this->saved_search_id, $this->predictive_search_docid, $feature_list,  $total_files);
        $model = $modelClass->getModelFromCorupus($this->selectedLeafs);
        
        //3 - decide the score/distance class
        $score_class = new EuclideanDistance();
        
        //4 - gets the docuemnts that fits the model
        $docsClass = new GetDocsClass($mysqli, $this->username, $this->isAdmin, $model, $modelClass, $this->saved_search_id, $feature_list, $this->total_docs, $score_class);
        
        $docsClass->getDocsInRange($this->mand_fields, $this->predictiveSaveSearch, $this->selectedFolder);
        $this->total_predictive_docs = self::getNumDocsFromSavedSearch($this->saved_search_id);
    }
    
    function getPredictive_search_id(){
        return $this->saved_search_id;
    }
    
    function getTotalPredictiveDocs(){
        return $this->total_predictive_docs;
    }
    
    /**
     * filter_limit($precent) - limit the results to the % closets to the model
     * deleting it from the savedsearchid
     * @return number
     */
    function filter_limit($precent){
        if(empty($precent) || $precent == "100"){
            return true;
        }
        //first count the docid in the predictive coding (now it's all documents but might change)
        $sql = "SELECT count(docid) AS c FROM savedsearchid WHERE id = $this->saved_search_id";
        $res = sqlQuery($this->mysqli, $sql);
        $row = $res->fetch_object();
        $count = $row->c;
        
        //calculate the index with the limit score
        $limit_index = $count <= 20 ? 20 : ceil(($count/100)*$precent);
        $sql = "SELECT score FROM (
                SELECT docid,score FROM savedsearchid WHERE id = $this->saved_search_id ORDER BY score ASC
                ) AS T LIMIT $limit_index,1";
        $res = sqlQuery($this->mysqli, $sql);
        $row = $res->fetch_object();
        $limit_score = $row->score;
        
        //delete the elements with higer score than limit_score
        if(!empty($limit_score)){
            $sql = "DELETE FROM savedsearchid WHERE id=$this->saved_search_id AND score > $limit_score";
            sqlQuery($this->mysqli, $sql);
        }
    }

}

//**********************************************************************************************************************************//
//**********************************************************GetListOfFeatures interface*********************************************//
//**********************************************************************************************************************************//
interface GetListOfFeatures
{
    public function getFeaturesList($selectedLeafs = null);
}

trait getListTrait{

    function getSql($username, $isAdmin, $binary) {
        $user_sql = (!$isAdmin) ? " username='$username'" : " TRUE";   

        if($binary){
            return "SELECT BINARY onturl as n, type as t, COUNT(docid) as d, SUM(occurrences) as o FROM concept WHERE $user_sql GROUP BY n";
        }
        return "SELECT onturl as n, type as t, COUNT(docid) as d, SUM(occurrences) as o FROM concept WHERE $user_sql GROUP BY n";
    }
}

//get concept list by all
class GetListOfFeaturesAll implements GetListOfFeatures
{
    use getListTrait;
    private $mysqli;
    private $isAdmin;
    private $username;
    
    function __construct($mysqli, $isAdmin, $username){
        $this->mysqli = $mysqli;
        $this->isAdmin = $isAdmin;
        $this->username = $username;
    }
    
    public function getFeaturesList($selectedLeafs = null)
    {
        $vector = array();
        
        //if its without selected leafts - fetch all concepts/instances list
        if(empty($selectedLeafs)){
            $sql = self::getSql($this->username, $this->isAdmin, true);
            $res = sqlQuery($this->mysqli, $sql);
            while (($row = $res->fetch_object()) != false){
                $vector[$row->n]['occ'] = intval($row->o);
                $vector[$row->n]['idf'] = intval($row->d);
            }
        }
        else{
            $vector = self::getLeafsVector($selectedLeafs);       
        }
       
        return $vector;
    }
    
    public function getLeafsVector($selectedLeafs){
        $vector = [];
        $selectedLeafsArr = is_array($selectedLeafs) ? $selectedLeafs :  json_decode($selectedLeafs,true);  
             
        //iterate over the selected optional leafs of instances/concepts
        foreach($selectedLeafsArr as $element){
            $ont = $element['ont'];
            if(is_array($element['concept'])){
                $concept_arr = [];
                foreach($element['concept'] as $c){
                    $c= str_replace("http://www.intuview.com/ontology#", '', $c);
                    $concept_arr[] = $c;      
                }
                $concept_str = implode(',', $concept_arr);
            }
            else{                
                $concept_str= str_replace("http://www.intuview.com/ontology#", '', $element['concept']);
            }
            $sql =  "SELECT COUNT(docid) as d, SUM(occurrences) as o FROM concept WHERE onturl='$ont' AND type='$concept_str'";
            $res = sqlQuery($this->mysqli, $sql);
            $row = $res->fetch_object();
            if(intval($row->o) == 0){
                write_to_log("ERROR", "getLeafsVector: $ont no occ");
                continue;
            }
            $vector[$ont]['occ'] = intval($row->o);
            $vector[$ont]['idf'] = intval($row->d);
        }
        return $vector;
    }
    

}
//**********************************************************************************************************************************//
//**********************************************************END GetListOfFeatures interface*****************************************//
//**********************************************************************************************************************************//

//**********************************************************************************************************************************//
//**********************************************************FetchModel interface****************************************************//
//**********************************************************************************************************************************//
interface FetchModel
{
    public function getModelFromCorupus($selectedLeafs);
    public function getSingleDocModel($id);
    public function accumlateVector($id, $selectedLeafs);
}

/**
 * GetIdfModelClass
 */
 class GetIdfModelClass implements FetchModel
{
    private $mysqli;
    private $features_vector;
    private $savedSearchId;
    private $predictive_search_docid;
    private $total_files;
    
    function __construct($mysqli, $saved_search_id, $predictive_search_docid, $features_vector, $total_files){
        $this->mysqli = $mysqli;
        $this->features_vector = $features_vector;
        $this->savedSearchId = $saved_search_id;
        $this->predictive_search_docid = $predictive_search_docid;
        $this->total_files = $total_files; //count of all files
    }
    
    public function getModelFromCorupus($selectedLeafs = null){
        
        $vector = self::accumlateVector(null, $selectedLeafs); //sum up to a vector all the occurences
        
        //gets the avarage of data in vectors - if its array of docids
        foreach($vector as &$element){
            $element = $element/$this->total_files;
        }
        
        $vector = self::getSingleDocModel($vector);
        
        return $vector;
        
    }
    
    
    /**
     * 
     * {@inheritDoc}
     * @see FetchModel::getSingleDocModel()
     */
    public function getSingleDocModel($vector){
        
        $tf_idf = array();
        $total_sum = array_sum($vector); //sum of all instances of the vector (could be narrowed to selected leafs and not all instances under concept)
        
        foreach($vector as $k=>$v){
            if(!array_key_exists($k, $this->features_vector)){
                continue;
            }
            
            /**
             * tf_ifd calculation per document
             */
            //idf - inverse document frequenct: total_files - number of ALL files in the corupus. features_vecotr[$k]['idf'] - # of docs contains $k
            $curr_idf = log($this->total_files/$this->features_vector[$k]['idf']);
            if($curr_idf==0){
                continue;
            }
            //tf - frequncy of $k in the current documents (in this case, the num is $v)
            $curr_tf = ($total_sum !== 0) ? ($v/$total_sum) : 0;
            
            $tf_idf[$k] = $curr_tf*$curr_idf;
        }
        return $tf_idf;
    }
    
    public function accumlateVector($ids = null, $selectedLeafs = null){
        $total_vector = array(); //only for ids not null
        $keys = array_keys($this->features_vector ?? []);
        $vector = array_fill_keys($keys,0);
        
        if(empty($ids)){
            $savedSearchIdStr = !empty($this->predictive_search_docid) ? "=$this->predictive_search_docid" : "IN(SELECT docid FROM savedsearchid WHERE id=$this->savedSearchId)";
        }
        else{
            $savedSearchIdStr = "IN(".implode(',', array_values($ids)).")";
        }
        
        if(empty($selectedLeafs)){
            $sql = "SELECT onturl as n, docid as d, occurrences as o FROM concept WHERE docid $savedSearchIdStr";
            $res = sqlQuery($this->mysqli, $sql);
            while (($row = $res->fetch_object()) != false){
                if(!isset($vector[$row->n])){
                    continue;
                }
                if(!empty($ids)){
                    if(!isset($total_vector[$row->d])){
                        $total_vector[$row->d] = $vector;
                    }
                    $total_vector[$row->d][$row->n] += intval($row->o); 
                }else{
                    $vector[$row->n] += intval($row->o);
                }
            }
            if(!empty($ids)){
                return $total_vector;
                
            }
            ksort($vector);
        }
        else{
            $vector = self::getLeafsVector($selectedLeafs, $savedSearchIdStr);

        }
        
        return $vector;
    }
    
    public function getLeafsVector($selectedLeafs, $savedSearchIdStr){
        $vector = [];
        $selectedLeafsArr = is_array($selectedLeafs) ? $selectedLeafs : json_decode($selectedLeafs,true);
        
        foreach($selectedLeafsArr as $element){
            $ont = $element['ont'];
            $concept= str_replace("http://www.intuview.com/ontology#", '', $element['concept']);
            $sql =  "SELECT COUNT(docid) as d, SUM(occurrences) as o FROM concept WHERE onturl='$ont' AND type='$concept' AND docid  $savedSearchIdStr";
            $res = sqlQuery($this->mysqli, $sql);
            $row = $res->fetch_object();
            if(!array_key_exists($ont, $vector)){
                $vector[$ont] = 0;
            }
            $vector[$ont] += intval($row->o);
            
        }
        return $vector;
    }
}

//**********************************************************************************************************************************//
//**********************************************************END FetchModel interface************************************************//
//**********************************************************************************************************************************//

//**********************************************************************************************************************************//
//**********************************************************FetchDocs interface*****************************************************//
//**********************************************************************************************************************************//
interface FetchDocs
{
    public function getDocsInRange($mand_fields);
}

//get regular model class
class GetDocsClass implements FetchDocs
{
    
    private $mysqli;
    private $features_vector;
    private $savedSearchId;
    private $count_docs;
    private $model;
    private $username;
    private $isAdmin;
    private $model_class;
  
    
    function __construct($mysqli, $username, $isAdmin, $model, $model_class, $saved_search_id, $features_vector, $total_files, $score_class){
        $this->mysqli = $mysqli;
        $this->username = $username;
        $this->isAdmin = $isAdmin;
        $this->model = $model;
        $this->model_class = $model_class;
        $this->features_vector = $features_vector;
        $this->savedSearchId = $saved_search_id;
        $this->score_class = $score_class;
        $this->total_files = $total_files;
    }


    /**
     *  getDocsInRange() - fetch docs that fits the model 
     * @return $ret_arr - docs id that are in the range of the threshold from the tested model (from $this->centroid)
     */
    public function getDocsInRange($mand_fields = null, $savedSearch = null, $selectedFolder = null){   
        $batch_limit = 1000;
        $values = "";
        $batch_index = 0;
        //get id of Predictive Search (saved in the table savedSearch - only one record of it)
        $res_sql = sqlQuery($this->mysqli, "DELETE FROM savedsearchid WHERE id=$this->savedSearchId");
        
        $mand_sql = $this->prepareMandSql($mand_fields);
        
        $savedSearchSql = !empty($savedSearch) ? " id IN (select docid from savedsearchid WHERE id=$savedSearch)" : " TRUE ";
        $folderSql = !empty($selectedFolder) ? "id IN (SELECT id FROM files WHERE origfolder IN('$selectedFolder'))" : " TRUE ";
        //gets arr of all files ids
        //TODO: change this SQL to search --> outputs sql that fetch user's all files
        $user_sql = (!$this->isAdmin) ? " username='$this->username'" : " TRUE";
        
        $counter_batch = 0;
        $sql_all_files = "SELECT distinct id FROM files WHERE $user_sql AND $mand_sql AND $savedSearchSql AND $folderSql";
        
        
        while($counter_batch < $this->total_files){
            $sql_batch = "SELECT id FROM ($sql_all_files) as t1 LIMIT $counter_batch, $batch_limit";
            $values = "";
        
            $res = sqlQuery($this->mysqli, $sql_batch);
            
            $counter_batch += $batch_limit;
            
            if (($doc_arr = $res->fetch_all()) != false){
                $doc_arr = array_merge(...$doc_arr);
                $total_vector = $this->model_class->accumlateVector($doc_arr);
                foreach($total_vector as $docid=>$curr_vector){
                    write_to_log("INFO", "calculated document num: $docid");
                    
                    $curr_model = $this->model_class->getSingleDocModel($curr_vector);
                    $diff = $this->score_class->getDistance($curr_model, $this->model); //gets the distance between the doc centroid to model centroid
                
                    //update savedsearchid with (id,docid,score)
                    $values .= "($this->savedSearchId,$docid,$diff),";
                }
               
            }
            if(empty($values)){
                write_to_log("ERROR", "getDocsInRange - no values for sql");
                break;
            }
            $values = rtrim($values, ',');
            $res_sql = sqlQuery($this->mysqli, "INSERT INTO savedsearchid (id,docid,score) VALUES $values ON DUPLICATE KEY UPDATE id=id");
        }
    }
    
    
    /**
     * prepare mandatory fields as sql query for getting total files
     * @param unknown $mand_fields
     * @return string
     */
    function prepareMandSql($mand_fields){
        if(empty($mand_fields)){
            return " TRUE ";
        }
        $sqls = [];
        foreach($mand_fields as $concept=>$instances){
            $table = TablesInformation::getTableName($concept);
            $sql = "(SELECT DISTINCT docid FROM $table WHERE ";
            foreach ($instances as $instance){
                $sql_arr[] = " docId IN (SELECT DISTINCT docid FROM concept WHERE  ontUrl = '$instance')";
            }
            $sql .= implode(" AND ", $sql_arr) . ")";
            $sqls[] = $sql;
        }
        if(empty($sqls)){
            return " TRUE ";
        }
        $final_sql = "";
        foreach($sqls as $query){
            $final_sql .= "id IN $query AND ";
        }
        $final_sql = trim($final_sql, "AND ");
        return $final_sql;
    }

}
//**********************************************************************************************************************************//
//**********************************************************END FetchDocs interface*************************************************//
//**********************************************************************************************************************************//


//**********************************************************************************************************************************//
//**************************************************************Distance interface**************************************************//
//**********************************************************************************************************************************//
interface Distance
{
    public function getDistance(array $vector1, array $vector2) : float;
}

//get regular model class
class EuclideanDistance implements Distance
{
     /**
     * distance($vector1, $vector2) - distance between 2 vectors
     * @param  $vector1
     * @param  $vector2
     * @return distance between vector 1 to 2
     */
    
    public function getDistance(array $vector1, array $vector2) : float
    {
        $n = count($vector1);
        $m = count($vector2);
        
        $sum = 0;
        foreach ($vector1 as $key => $value) {
            $sum += ($vector1[$key] - $vector2[$key]) * ($vector1[$key] - $vector2[$key]);
        }
        
        return sqrt($sum);
    }
}

//**********************************************************************************************************************************//
//**********************************************************END Distance interface**************************************************//
//**********************************************************************************************************************************//