﻿<?php

require __DIR__ . '/fb_api_interface.php';

/**
 * @param $data_item
 * @return mixed
 */
function get_element_id($data_item){
    return $data_item;
}

$convert_to_list = array();

$convert_to_array["array"] = function($data_set){
//    $csv = array_map('str_getcsv', file('data.csv'));
    return $data_set;
};

/**
 * @param $data_set
 * @return array
 */
$convert_to_array["json"] = function($data_set){
    $list = [];
    $data_from_json = json_decode($data_set);
    $data_arr = $data_from_json["data"];
    foreach($data_arr as $index => $element){
        $list[$index] = $element["id"];
    }
    return $list;
};

/**
 * @param string $input_id
 * @param string $api_fn_name
 * @return array
 */
function get_element_data($input_id, $api_fn_name, $children_type){
    try {
        $response_data = get_data($input_id, $api_fn_name);
    } catch(Exception $exception) {
        echo 'Message: ' . $exception->getMessage() . "\n";
        return [];
    }
    if (empty($response_data)){
        return [];
    } else {
        $data_arr = extract_data($response_data, $children_type, "data");
        $datum_added = [
            "data" => $data_arr,
            "format" => FB_DEFAULTS["extradata_format"],
            "parent_id" => $input_id // not currently used
        ];
        return $datum_added;
    }
};

/**
 * Make a nested path, creating directories down the path (recursively)
 * source: http://www.binarytides.com/php-create-nested-directories-for-a-given-path/
 * make_path('/var/www/a/b/c/d/abc.zip')
 *
 * @param $path_to_file
 * @return bool
 */
function make_path($path_to_file){
    $dir = pathinfo($path_to_file, PATHINFO_DIRNAME);
    if (is_dir($dir)){
        return true;
    } else {
        if (make_path($dir)){
            if (mkdir($dir)){
                chmod($dir , 0777);
                return true;
            }
        }
    }
    return false;
}


/**
 * @param $project_name
 * @param $data_set_name
 * @param $element_id
 * @param $element_obj
 * @param $element_type
 * @param $grandparent_id - ID of parent element of parent element (page/user/group/event)
 * @return bool
 */
function save_data($folderName, $data_set_name, $element_id, $element_obj, $element_type, $grandparent_id){
    global $sharedFolder;
    
    $date_short = substr($element_obj["created_time"], 0, FB_DEFAULTS["date_length"]); // remove datetime offset
    unset($element_obj["created_time"]); // remove unneeded property (we will be using "date_field" instead)
    $element_obj["date_field"] = preg_replace("/T/", " ", $date_short); // remove 'T' delimiter
    
    $dir_path = "$sharedFolder/docs/";
    //$dir_path = __DIR__ . "/output_data/" . $project_name;
    // 'real' ID of element starts only after the underscore ("_")
    // this is what we use for the filename of a post
    $ids_arr = explode("_", $element_id);
    if (count($ids_arr) != 2){
        echo "Problem extracting ids from '$element_obj'/\n";
        return false;
    } else {
        $element_obj["t_parent_id"] = $ids_arr[0];
        $element_obj["t_id"] = $ids_arr[1];
    }
    if ($element_type == "post"){
        $file_id = $ids_arr[1];
    } else { // for comments etc
        $file_id = $element_id;
    }
    $file_path = $folderName . "/" . $element_type . $file_id . "." . FB_DEFAULTS["message_format"];
    // deal with message
        $message = $element_obj["message"];
        $message_path = $dir_path . "/" . FB_DEFAULTS["doc_text_dir"] . "/" . $file_path; // put it all together
        make_path($message_path); // create directories as required
        echo("\nWrite MESSAGE to file: " . print_r($message) . " -> '$message_path'.\n");
        file_put_contents($message_path, $message);
    // deal with meta data
    // label as type "Facebook"
    $element_obj["#Type"] = FB_DEFAULTS["source_type"];
    // add author name
    if (!empty($element_obj["from"])){
        if (!empty($element_obj["from"]["name"])){
            $element_obj["file_author"] = $element_obj["from"]["name"];
            $element_obj["user_screen_name"] = $element_obj["from"]["id"];
        }
    }
    if (!empty($grandparent_id)){
        $element_obj["grandparent_id"] = $grandparent_id;
    }
    $meta_data_path = $dir_path . "/" . FB_DEFAULTS["doc_extradata_dir"] . "/" . $file_path . "." . FB_DEFAULTS["extradata_format"]; // put it all together
    make_path($meta_data_path); // create directories as required
    echo("\nWrite EXTRADATA to file: " . print_r($element_obj) . " -> '$meta_data_path'.\n");
    file_put_contents($meta_data_path, json_encode($element_obj));
    return true;
}

/**
 * @param $hierarchy
 * @param $startingPoint
 * @param $endPoint
 * @return bool
 */
function is_start_end_valid($hierarchy, $startingPoint, $endPoint){
    return (
        !empty($startingPoint) &&
        !empty($endPoint) &&
        is_string($startingPoint) &&
        is_string($endPoint) &&
        !empty($hierarchy[$startingPoint]) &&
        !empty($hierarchy[$endPoint]) &&
        !empty($hierarchy[$startingPoint]["hierarchy_level"]) &&
        !empty($hierarchy[$endPoint]["hierarchy_level"]) &&
        $hierarchy[$startingPoint]["hierarchy_level"] < $hierarchy[$endPoint]["hierarchy_level"] && // have we reached our end point already
        count($hierarchy[$startingPoint]["children"]) > 0 // if there are still lower level to go to
    );
}

/**
 * @param $response
 * @param $key
 * @param string $children_type
 * @return mixed
 */
function extract_data($response, $children_type, $key){
    if (!empty($children_type) && !empty($response[$children_type]) && is_array($response[$children_type])){
        return $response[$children_type][$key]; // todo handle errors
    } elseif (!empty($response[$key])) {
        return $response[$key]; // todo handle errors
    } else {
        echo "Unable to extract data by children type '$children_type' and key '$key' from '" . print_r($response) . "\n";
    }
}

/**
 * @param array $input_data
 * @return array
 */
function extract_children($input_data){
    return array_map(function ($element_obj){
        return $element_obj["id"];
    }, $input_data);
}

/**
 * process 1 item from the list at a time:
 * #1 - get data for given entity (e.g. page)
 * #2 - save data, child element by child element (e.g. post by post) in different files
 * #3 - re-run this process on each child entity (e.g. each post belonging to the page)
 *
 * @param string $item_id - the ID of the element we are interested in!
 * @param $api_function - name of function used to process request response
 * @param $first_child
 * @param $end_entity_type - e.g. (we keep going until we get to) "post" / "comment" level
 * @param $connection - use existing connection if there is one (default: NULL)
 * @param $element_type - page/post etc - type of element we are processing
 * @param $parent_id
 * @param $project_name
 * @param $data_set_name
 * @return bool
 */
function process_list_item($item_id, $api_function, $first_child, $end_entity_type, $connection, $element_type, $parent_id, $project_name, $data_set_name,$folderName){
    $children_type = $first_child . "s";
    
    // currently duplicate posts are skipped
    if ($element_type == "post"){
        // check element id against list of existing elements (in database table)
        // if new entry, add it to the list of existing entries and continue
        // if entry already exists, do not process it again - return false
        // we are currently checking by unique posts (before we even get to their comments)
        // i.e. each post is only parsed once
        // this means more recent comments will not be picked up
        // however, the comments we do get will be added to the existing elements list
        try {
            $db_elem_type = "fb_" . $element_type;
            $new_entry_rows = tryInsertUnique($connection, $db_elem_type, $item_id);
            // see if we have new element ID(s)
            if ($new_entry_rows < 1){ // have we handled this element (post) yet
                echo "We have already processed this element (id: $item_id).\n";
                return false;
            }
        } catch(Exception $exception) {
            echo "Message: " . $exception->getMessage() . "\n";
            return false;
        }
    }
    try {
        $data_obj = get_element_data($item_id, $api_function, $children_type); // #1
        if (empty($data_obj) || empty($data_obj["data"])){
            echo "empty response\n";
            return false;
        } else {
            $data_arr = extract_data($data_obj, $children_type, "data");
            $element_list = extract_children($data_arr);
            foreach ($data_arr as $element_id => $element_obj){
                save_data($folderName, $data_set_name, $element_obj["id"], $element_obj, $first_child, $parent_id); // #2
                fb_process_list($element_list, $first_child, $end_entity_type, $connection, $item_id, $project_name, $data_set_name,$folderName); // #3
            }
            echo "List item processing completed (id: $item_id).\n";
            return true;
        }
    } catch(Exception $exception) {
        echo "Message: " . $exception->getMessage() . "\n";
        return false;
    }
}

// input_arr is a non-associative array of IDs to search for
/**
 * @param $input_arr
 * @param $start_entity_type
 * @param $end_entity_type
 * @param $connection - connection to DB, default: null
 * @param $parent_id - id of parent element
 * @param $project_name
 * @param $data_set_name
 * @return bool
 */
function fb_process_list($input_arr, $start_entity_type, $end_entity_type, $connection, $parent_id, $project_name, $data_set_name,$folderName){
    if (!is_start_end_valid(DATA_HIERARCHY, $start_entity_type, $end_entity_type)){
        return false;
    };
    // find data for current starting point, including new starting point for next recursive step
//     this is the 'skeleton' structure of the generic types of elements
    $element_structure = DATA_HIERARCHY[$start_entity_type];
//    need to split data, e.g. "/posts" edge API call gives a number of post,
//    which must be split and saved separately

    foreach ($input_arr as $item_id){
        $first_child = $element_structure["children"][0]; // for time being we assume only 1 child (in generic element structure)
        process_list_item($item_id, $element_structure["api_function"], $first_child, $end_entity_type, $connection, $start_entity_type, $parent_id, $project_name, $data_set_name,$folderName);
    }
    return true;
}

/**
 * @param $url_list
 * @return array
 */
function strip_entities_from_urls($url_list){
    return array_map(function ($url_single){
		$url_parsed = parse_url($url_single);
		$path = $url_parsed['path'];
		$path_sections = explode("/", $path);
		$entity_name = $path_sections[1];
		return $entity_name;
    }, $url_list);
}

/**
 * @param $project_name
 * @param $data_set_name
 * @param $file_extension
 * @param $file_format
 * @return array|bool
 */
function fb_load_data_set($project_name, $data_set_name, $file_extension, $file_format){
    $file_path = __DIR__ . '/data_sets/' . $project_name . "/" . $data_set_name . "." . $file_extension;
    echo "Read from '$file_path'\n";
    if ($file_format == "file"){
        return file($file_path);
    } else {
        return array();
    }
}

/**
 * @param $entity_type
 * @param $data_set
 * @param $target_name
 * @return mixed
 */
function fb_determine_entity_type($entity_type, $data_set, $target_name){
    if (!empty($entity_type)){
        return $entity_type;
    } elseif ($target_name == "start"){
        return FB_DEFAULTS["entity_type"][$target_name]; // just for time being
    } else {
        return FB_DEFAULTS["entity_type"][$target_name];
    }
}

