<?php

class ProxyCrawl{
    
    private $normalToken = "";
    private $javascriptToken = "";
    private $path = "";
    private $id = 0;
    private $crawlerMysql= null;
    private $mysqli = null;
    
    
    public function __construct($normalToken, $javascriptToken) {
//         require_once '../../sqlUtils.php';
        
        
        $this->normalToken = $normalToken;
        $this->javascriptToken = $javascriptToken;
        
        //temp place to save the output
        $the_time = date('m_d_Y_H_i_s', time());
        mkdir("C:/new_posts/$the_time");
        $this->path = "C:/new_posts/$the_time";   
        
        $this->mysqli = new mysqli('localhost', 'root', 'intuview', 'galgalanda_db', '3306'); 
        mysqli_set_charset($this->mysqli, 'utf8mb4');
        
        $this->crawlerMysql = new mysqli('localhost', 'root', 'intuview', 'galgalanda_db_crawler', '3306');
        mysqli_set_charset($this->crawlerMysql , 'utf8mb4');
        
//         mysqli_query("SET NAMES 'utf8'");
//         mysqli_query('SET CHARACTER SET utf8');
        
//         createCrawlerConnection($this->crawlerMysql);
//         $this->mysqli = sqlCreateConnection("proxy crawler");
    }
    
    
    function crawlFBList($path){
        $ns_path = $this->path.DIRECTORY_SEPARATOR."not_supported.txt";
        $ns_file = fopen($ns_path, "w");
        
        $fh = fopen($path,'r');
        while ($line = fgets($fh)) {
            if(strpos($line, "/groups/") ==false){
                continue;
            }
            $res = $this->crawlFacebookGroup($line);
            if(empty($res)){
                echo "group $line is not supported";
                file_put_contents($ns_path, $line, FILE_APPEND );
            }
        }
        fclose($fh);
    }
    
    function crawlFacebookGroup($group){
        require_once('proxycrawl-api.php');
       
        $group_name = str_replace(' ','_', basename(trim($group)));
        
        $api = new ProxyCrawlAPI(['token' => $this->javascriptToken]);
        $response = $api->get($group,
            ['format' => 'json',
                'scraper' => 'facebook-group',
                'css_click_selector'=>'._4sxc._42ft'
            ]);
        
        if ($response->statusCode === 200) {
            //creates the folder if response is good
            $group_path = $this->path.DIRECTORY_SEPARATOR.$group_name;
            mkdir($group_path);
            
            $res = json_decode($response->body, true);
            
            $feeds = $res["body"]["feeds"];
            $counter = 0;
            foreach($feeds as $feed){
                $final_res = array();
                $counter++;
                
                //skip post with no text
                if(empty($feed['text'])){
                    continue;
                }
                
               
                //removing comments with no text
                foreach($feed['comments'] as $key=>$comment){
                    if(empty($comment['text'])){
                        unset($feed['comments'][$key]);
                    }
                }
                foreach($feed as $key=>$value){
                  
                    $final_res[$key] = $value;
                }
                $json = json_encode($final_res, true);
                $path = $group_path.DIRECTORY_SEPARATOR."fb_$counter.json";
                file_put_contents($path, $json);  
            }
            
        }
        else{
            return false;
        }
       
    }
    
    function parseOutPutFolder($path, $limit_comments = -1){ 
        mkdir($path.DIRECTORY_SEPARATOR.'output');
        $dir = new DirectoryIterator($path);
        foreach ($dir as $fileinfo) {
            $fileName = $fileinfo->getFilename();
            $full_name = $path.DIRECTORY_SEPARATOR.$fileName;
            $outputPath = $path.DIRECTORY_SEPARATOR.'output'.DIRECTORY_SEPARATOR.$fileName;
            if ($fileinfo->isDot() || $fileName == 'output') {
                continue;
            }
            if(is_dir($full_name)){
                $this->parseOutPutFolder($full_name, $limit_comments);
            }
            else{
                $this->parseOutPutFile($full_name, $outputPath.'_output', $limit_comments);
            }
        }
    }
    
    function parseOutPutFile($filePath, $outputPath, $limit = -1){
    
        
        $jsonFile = file_get_contents($filePath);
        $arrayFile = json_decode($jsonFile, JSON_UNESCAPED_UNICODE);
        
        $comments = $arrayFile["comments"];
        if(count($comments) < $limit){
            echo "not enough comments - skip";
            unlink($filePath);
            return;
        }
        mkdir($outputPath);
        
        unset($arrayFile["comments"]);
        $post = [];
        //skip if empty comment
        if(empty($arrayFile['text'])){
            return;
        }
        foreach($arrayFile as $key=>$value){
            if($key == 'userName'){
                $key = "user_screen_name";
            }
            elseif($key == 'date'){
                $key == "date_field";
            }
            $post[$key] = $value;
        }
        $post["t_id"] = $this->id;
        $json_post = json_encode($post, JSON_UNESCAPED_UNICODE);
        $json_post = $this->mysqli->real_escape_string($json_post);
        $sql = "INSERT INTO raw_stream_data (element_id, source, data) VALUES($this->id, 2, \"$json_post\")";
        $mysqli = $this->crawlerMysql;
        $mysqli->query($sql);
        $this->id++;
//         $raw_parent_res = sqlQuery($this->crawlerMysql, $sql);
        
        //write the post to json file
        $fp = fopen($outputPath.DIRECTORY_SEPARATOR."post.json", 'w');
        fwrite($fp, json_encode($post));
        fclose($fp);
        
        //write the comments to the file
        if(!empty($comments)){
            $comments_counter = 1;
            foreach($comments as $comment){
                $new_comment = array();
                foreach($comment as $c_k=>$c_v){
                    if($c_k == 'userName'){
                        $c_k = "user_screen_name";
                    }
                    elseif($c_k == 'date'){
                        $c_k == "date_field";
                    }
                    $new_comment[$c_k] = $c_v;
                }
                $new_comment["t_id"] = $this->id;
                
                $json_comment = json_encode($new_comment, JSON_UNESCAPED_UNICODE);
                $json_comment = $this->mysqli->real_escape_string($json_comment);
                $sql = "INSERT INTO raw_stream_data (element_id, source, data) VALUES($this->id, 2, \"$json_comment\")";
                $mysqli->query($sql);
                
//                 $raw_child_res = sqlQuery($this->crawlerMysql, $sql);

                $new_comment["t_parent_id"] = $post["t_id"];
                $this->id++;
                
                $fp = fopen($outputPath.DIRECTORY_SEPARATOR."comment_$comments_counter.json", 'w');
                fwrite($fp, json_encode($new_comment));
                fclose($fp);
                $comments_counter++;
            }
        }
        
        unlink($filePath);
        
        
    }
    
    function RemoveEmptySubFolders($path)
    {
        $empty=true;
        foreach (glob($path.DIRECTORY_SEPARATOR."*") as $file)
        {
            $empty &= is_dir($file) && $this->RemoveEmptySubFolders($file);
        }
        return $empty && rmdir($path);
    }
    
    
}




