<?php

/*
 * this class is to handle array of ngrams
 */

/**
 * Description of ngramEng
 *
 * @author hilit
 */

include 'ngram.php';
require_once 'sqlUtils.php';

class ngramEng {
    private static $ngrams=[]; 
    
//    public function __construct() {
//        $this->ngrams = [];
//    }
    
    public static function setNgramsFromDoc($textNoBOM)
    {
        // insert phrases to DB
        global $shouldInsertPhrases;
        if (!$shouldInsertPhrases) {
            return [];
        }
        $tokens = preg_split("/[\s[:punct:]]/u", $textNoBOM);
        $prevToken = "";
        $i = 0;
        foreach ($tokens as $token) {
            if (! preg_match("/^[[:alnum:]]+$/u", $token)) {
                // write_to_log("TRACE", "token is not alpha numeric: $token");
                $prevToken = "";
                continue;
            }
            if (! is_numeric($token)) {
                self::addNgram($token);
                $i ++;
            }
            if ($prevToken) {
                self::addNgram("$prevToken $token");
                $i ++;
            }
            
            $prevToken = $token;
        }
        usort(self::$ngrams, "self::cmp");
        return self::$ngrams;
    }
       
    /*
     * add ngram to the $ngrams array class property 
     */
     public static function addNgram($name){
        if (array_key_exists($name, self::$ngrams)){
                $occurrences = self::$ngrams[$name]->get_occurrences();
                $occurrences++;
                self::$ngrams[$name]->set_occurrences($occurrences); 
        }
        else{
            self::$ngrams[$name] =  new Ngram($name,1);
        }
    }
       
    /*
     * this function helps the usort function to sort 
     * ngrams array by occurrences
     */
    public static function cmp($a1, $a2) 
    {
        $a = $a1->get_occurrences();
        $b = $a2->get_occurrences();
        if ($a==$b) return 0;
        return ($a>$b)?-1:1;
    }

            
   public static function addPhraseToTableToValue($mysqli,$dcid){
        $ngramsColumns = self::getTableColumns($mysqli);
        $ngramsCount = 0;
        $table2valuesPhrases = array();
        $columnExists = TablesInformation::isColumnExists($mysqli, "phrases","doc_occurrences"); 
    
        foreach (self::$ngrams as $ngram){
            $sqlNgram = $ngram->checkNgram($mysqli);
            $occurrences = $ngram->get_occurrences();
            $doc_occurrences = $columnExists ? ",$occurrences" : "";
            $table2valuesPhrases[$ngramsColumns]["values"][] = "($dcid,'$sqlNgram' $doc_occurrences)";
            //take only top 100 phrases per doc, in order to avoid exausting DB, assuming that less frequent phrases are less indicative
            if ($ngramsCount++ >= 100){
                 break;  
            }
        }
        return $table2valuesPhrases;
    }
    
    public static function getTableColumns($mysqli){
        $columnExists = TablesInformation::isColumnExists($mysqli, "phrases","doc_occurrences");  
        if($columnExists){
            return $ngramsColumns = "(docId,value,doc_occurrences)";;
        }
        return $ngramsColumns = "(docId,value)";
    }
    
}
