<?php
use setasign\Fpdi\Fpdi;
use Monolog\Logger;
use Monolog\Handler\NullHandler;
use XPDF\PdfToText;
use Smalot\PdfParser\Parser;

class PDFTools{
    
    
    function __construct(){
        require 'vendor/autoload.php';
    }
    
    
    function shouldConvertToPdf($fullPath)
    {
        global $ext2fam;
        $file_parts =  pathinfo($fullPath);
        $ext =$file_parts['extension'];
        $extFam = $ext2fam[$ext];
        return !empty($extFam) && $extFam !== 'json'; //json shouldnt be converted to pdf since its tweets for now..
    }
    
    function fetchTextFromPdf($full_path){
        
    }
    
    function fetchSimpleTextFromPdf($full_path){
        include 'vendor/autoload.php';
        
        $parser = new \Smalot\PdfParser\Parser();
        $pdf    = $parser->parseFile($full_path);
        
        $contents = $pdf->getText();
        return $contents;
    }
    
    
    function convertToPdf($fullPath, $id)
    {
        $directory = getDocMetadataFolder($id);
        $pdfFile = $directory . DIRECTORY_SEPARATOR."$id.pdf";
        if (file_exists($pdfFile))
            return $pdfFile;
            
            if (!file_exists($fullPath)) {
                write_to_log("ERROR", "convertToPdf: fullPath doesn't exist: $fullPath");
                $pdfFile= FALSE;
            }
            else {
                $fullPath = realpath($fullPath);
                $sofficePath = OSSpecific::getInstance()->getOfficePath();
                
                $sofficePath_suffix = basename($sofficePath);
                $failure = executeCommand(dirname($sofficePath), "$sofficePath_suffix --headless --convert-to pdf --outdir \"$directory\" \"$fullPath\"");
                rename("$directory/" . pathinfo($fullPath, PATHINFO_FILENAME) . ".pdf", $pdfFile);
            }
            return $pdfFile;
    }
    
    function split_pdf($filename)
    {
        
        //version validation
        $version = $this->pdfVersion($filename);
        
        $versionArr = (explode('.', $version));
        $versionNo = array_pop($versionArr);
        
        if(intval($versionNo) > 4){
            write_to_log("INFO", "File version is $version, , requires version < 1.4: $filename");
            $noSpacePath = str_replace(' ', '_', $filename);
            rename($filename, $noSpacePath);
            $this->convertPDFFilesVersion($noSpacePath, $noSpacePath);
        }
        
        //create output path
        $path = $filename.'_SPLIT';
        
        if (!is_dir($path))
        {
            mkdir($path, 0777, true);
        }
        
        $pdf = new FPDI();
        $pagecount = $pdf->setSourceFile($filename); // How many pages?
        
        // Split each page into a new PDF
        for ($i = 1; $i <= $pagecount; $i++) {
            $new_pdf = new FPDI();
            $new_pdf->setSourceFile($filename);
            
            $tplIdx = $new_pdf->importPage($i);
            $new_pdf->AddPage();
            $new_pdf->useTemplate($tplIdx, null, null, null, null, true);
            
            
            try {
                $name = pathinfo(basename($filename), PATHINFO_FILENAME);
                
                
                $new_filename = $path.DIRECTORY_SEPARATOR.$name.'_'.$i.".pdf";
                $new_pdf->Output($new_filename, "F");
            } catch (Exception $e) {
                echo 'Caught exception: ',  $e->getMessage(), "\n";
                write_to_log("ERROR", "Issue with split PDF: ".$e->getMessage());
                
            }
        }
    }
    
    function pdfVersion($filename)
    {
        $fp = @fopen($filename, 'rb');
        
        if (!$fp) {
            return 0;
        }
        
        /* Reset file pointer to the start */
        fseek($fp, 0);
        
        /* Read 20 bytes from the start of the PDF */
        preg_match('/\d\.\d/',fread($fp,20),$match);
        
        fclose($fp);
        
        if (isset($match[0])) {
            return $match[0];
        } else {
            return 0;
        }
    }
    
    function convertPDFFilesVersion($path, $outputPath){
        $cmd = OSSpecific::getInstance()->getCMDPdfConvert();
        shell_exec($cmd);
        rename($outputPath."_new", $outputPath);
        
        
    }
    
    function convertPDFDirVersion($path, $outputPath){
        
        //getting generic info
        //     mkdir($outputPath, 0777);
        
        if ($handle = opendir($path)) {
            
            while (false !== ($entry = readdir($handle))) {
                
                if ($entry != "." && $entry != "..") {
                    if(is_dir($path.DIRECTORY_SEPARATOR.$entry) && $entry !== 'OUTPUT'){
                        $this->convertPDFDirVersion($path.DIRECTORY_SEPARATOR.$entry);
                    }
                    else{
                        write_to_log("ERROR", "gswin64 -sDEVICE=pdfwrite -dCompatibilityLevel=1.4 -dNOPAUSE -dQUIET -dBATCH -sOutputFile=".$outputPath.DIRECTORY_SEPARATOR.$entry." ".$path.DIRECTORY_SEPARATOR.$entry." ");
                        
                        shell_exec( "gswin64 -sDEVICE=pdfwrite -dCompatibilityLevel=1.4 -dNOPAUSE -dQUIET -dBATCH -sOutputFile=".$outputPath.DIRECTORY_SEPARATOR.$entry." ".$path.DIRECTORY_SEPARATOR.$entry." ");
                        
                    }
                    
                }
            }
        }
        
    }
    
    function splitProcess($mysqli, $folderPath){
        $dir = new RecursiveIteratorIterator(new RecursiveDirectoryIterator($folderPath));
        foreach ($dir as $fn) {
            $fileName = $mysqli->real_escape_string($fn->getFilename());
            $curr_folderPath = $fn->getPath();
            $ext = pathinfo($fileName, PATHINFO_EXTENSION);
            if($ext == 'pdf'){
                $this->split_pdf($curr_folderPath.DIRECTORY_SEPARATOR.$fileName, 'split/');
                rename($curr_folderPath.DIRECTORY_SEPARATOR.$fileName, $curr_folderPath.DIRECTORY_SEPARATOR.$fileName.".original");
            }
        }
    }
    
    function recurse_copy($src,$dst) {
        $dir = opendir($src);
        @mkdir($dst);
        while(false !== ( $file = readdir($dir)) ) {
            if (( $file != '.' ) && ( $file != '..' )) {
                if ( is_dir($src . '/' . $file) ) {
                    $this->recurse_copy($src . '/' . $file,$dst . '/' . $file);
                }
                else {
                    copy($src . '/' . $file,$dst . '/' . $file);
                }
            }
        }
        closedir($dir);
    }
    
    function pdfToImage($fileName, $dcid)
    {
        $prefix = getFileImagesPrefix($dcid);
        $folderName = dirname ($prefix);
        if (!file_exists($folderName))
        {
            mkdir_full($folderName, "777");
            $partialCommandLine = "org.apache.pdfbox.PDFToImage -imageType png -outputPrefix \"$prefix\" \"$fileName\"";
            executeTika($partialCommandLine);
        }
        //return number of generated images
        return count(glob($prefix . "*.png"));
    }
    
    function getPdfTextNew($pdfFile){


        // Parse the PDF file
        $parser = new Parser();
        $pdf = $parser->parseFile($pdfFile);
        
        
        // Get the text from the parsed PDF
        $text = $pdf->getText();
        
        return $text;
    }
}