/**
 * Copyright 2020 (C) Hailo Technologies Ltd.
 * All rights reserved.
 *
 * Hailo Technologies Ltd. ("Hailo") disclaims any warranties, including, but not limited to,
 * the implied warranties of merchantability and fitness for a particular purpose.
 * This software is provided on an "AS IS" basis, and Hailo has no obligation to provide maintenance,
 * support, updates, enhancements, or modifications.
 *
 * You may use this software in the development of any project.
 * You shall not reproduce, modify or distribute this software without prior written permission.
 **/
/**
 * @ file example.c
 * This example demonstrates the basic data-path on HailoRT.
 * The program scans for Hailo-8 devices connected to a provided Ethernet interface, generates random dataset,
 * and runs it through the device.
 **/
#include <net/if.h>
#include <pthread.h>
#include <unistd.h>
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <dirent.h>
#include <linux/limits.h>
#include <time.h>

#include <hailo/hailort.h>
#include "example_utils.h"

#define NOF_STREAMS (6)
#define NSEC_IN_SEC (1e+9)
#define LATENCY_MEASUREMENTS (100)

struct timespec sent_clock_t[LATENCY_MEASUREMENTS];
struct timespec recv_clock_t[NOF_STREAMS-1][LATENCY_MEASUREMENTS];
unsigned int actual_measurments;

void* _send_thread(void *args) {
    hailo_status status = HAILO_SUCCESS;
    write_thread_args_t *write_args = (write_thread_args_t*)args;
    uint8_t* src_data = (uint8_t*)malloc(write_args->host_frame_size);
    unsigned lat_counter = 0;
    uint32_t flag_100 = 0;
    struct timespec ts;

    if (NULL == src_data) {
        printf("-E- Failed to allocate buffers\n");
        status = HAILO_OUT_OF_HOST_MEMORY;
        goto l_release_buffers;
    }
    for(size_t i = 0; i < write_args->host_frame_size; i++) {
        //src_data[i] = (float32_t)(rand()%256);
        src_data[i] = (uint8_t)(rand() % 256);
    }
    flag_100 = (uint32_t)write_args->num_images / 100;
    if (flag_100==0)
        flag_100 = 1;
    for (uint32_t i = 1; i <= (uint32_t)write_args->num_images; i++) {
        if ((i % flag_100==0) && (lat_counter < LATENCY_MEASUREMENTS)) {
            clock_gettime(CLOCK_REALTIME, &ts);
            printf("-I- [%10ld.%ld s] Send frame [%3d/%3d] 0x%x%x%x%x\n", (long)ts.tv_sec, ts.tv_nsec/1000000, i, write_args->num_images, src_data[0], src_data[1], src_data[2], src_data[3]);
            sent_clock_t[lat_counter++] = ts;
        }
        status = hailo_stream_sync_write_all_raw_buffer(write_args->input_stream, src_data, 0, write_args->host_frame_size);
        if (status != HAILO_SUCCESS) {
            printf("-E- hailo_stream_sync_write_all_raw_buffer failed: %d",status);
            break;
        }
    }

l_release_buffers:
    FREE(src_data);
l_exit:
    write_args->status = status;
    return NULL;
}

void* _recv_thread(void *args) {
    hailo_status status = HAILO_SUCCESS;
    recv_thread_args_t *recv_args = (recv_thread_args_t *)args;
    struct timespec ts;
    FILE *fp;
    uint8_t *dst_data = (uint8_t*)malloc(recv_args->host_frame_size);
    unsigned lat_counter = 0;
    uint32_t flag_100 = 0;

    printf("-I- Recv thread %d started\n", recv_args->tid);
    if (recv_args->write_log==1) {
        char *log_name = (char*)malloc(20 * sizeof(char));
        sprintf(log_name, "tid_%d.log", recv_args->tid);
        fp = fopen(log_name, "w");
        free(log_name);
    }
    flag_100 = (uint32_t)recv_args->num_images / 100;
    if (flag_100==0)
        flag_100 = 1;
    
    actual_measurments = 0;

    for (uint32_t j = 1; j <= (uint32_t)recv_args->num_images; j++) {
        status = hailo_stream_sync_read_all_raw_buffer(recv_args->output_stream, dst_data, 0, recv_args->host_frame_size);
        REQUIRE_SUCCESS(status, l_exit, "hailo_stream_sync_read_all_raw_buffer failed");
        if ((j % flag_100==0) && (lat_counter < LATENCY_MEASUREMENTS)) {
            clock_gettime(CLOCK_REALTIME, &ts);
            printf("-I- [%10ld.%ld s] TID:%d Recv [%3d/%3d] 0x%x%x%x%x\n", (long)ts.tv_sec, ts.tv_nsec/1000000, recv_args->tid, j, recv_args->num_images, dst_data[0], dst_data[1], dst_data[2], dst_data[3]);
            recv_clock_t[recv_args->tid][lat_counter++] = ts;
            actual_measurments++;
        }
        if (recv_args->write_log==1)
            fprintf(fp, "%d Recv [%10ld.%ld s] %3d/%3d %x\n", recv_args->tid, (long)ts.tv_sec, ts.tv_nsec/1000000, j, recv_args->num_images, *dst_data);
    }
    if (recv_args->write_log==1)
        fclose(fp);
    
l_exit:
    free(dst_data);
    recv_args->status = status;
    return NULL;
}

hailo_status infer(
    hailo_input_stream input_stream, hailo_stream_info_t *input_stream_info,
    hailo_output_stream *output_streams, hailo_stream_info_t *output_streams_info,
    int output_stream_cnt, int num_images, int write_log, hailo_eth_output_stream_params_t* params, hailo_eth_input_stream_params_t in_param)
{
    hailo_status status = HAILO_SUCCESS;
    pthread_t write_thread = 1;
    recv_thread_args_t recv_args[NOF_STREAMS-1];
    pthread_t recv_threads[NOF_STREAMS-1];

    write_thread_args_t write_args = {
        .input_stream_info = input_stream_info,
        .input_stream = input_stream,
        .status = HAILO_UNINITIALIZED,
        .output_streams_cnt = output_stream_cnt,
        .num_images = num_images,
        .host_frame_size = hailo_get_host_frame_size(input_stream_info, &in_param.base_params)
    };
    for (int ii=0; ii<output_stream_cnt; ii++) {
        recv_args[ii].output_stream_info = &output_streams_info[ii];
        recv_args[ii].output_stream = output_streams[ii];
        recv_args[ii].tid = ii;
        recv_args[ii].status = HAILO_UNINITIALIZED;
        recv_args[ii].num_images = num_images;
        recv_args[ii].write_log = write_log;
        recv_args[ii].host_frame_size = hailo_get_host_frame_size(&output_streams_info[ii], &params[ii].base_params);
        (void) pthread_create(&recv_threads[ii], NULL, _recv_thread, &recv_args[ii]);
    };

    (void) pthread_create(&write_thread, NULL, _send_thread, &write_args);
    pthread_join(write_thread, NULL);

    if (HAILO_SUCCESS != write_args.status) {
        printf("-E- write_thread failed\n");
        status = HAILO_INTERNAL_FAILURE;
    }
    for (int ii=0; ii<output_stream_cnt; ii++) {
        pthread_join(recv_threads[ii], NULL);
        printf("-I- Closing Recv thread #%d: %d\n", recv_args[ii].tid, recv_args[ii].status);
        if (HAILO_SUCCESS != recv_args[ii].status) {
            printf("-E- Recv thread %d failed\n", recv_args[ii].tid);
            status = HAILO_INTERNAL_FAILURE;
        }
    }
    return status;
}

void print_net_banner(hailo_stream_info_t *all_stream_infos, int cnt) {
    printf(BOLDCYAN);
    printf("-I-----------------------------------------------\n");
    printf("-I- Input: %s (%d, %d, %d)\n", all_stream_infos[0].name, all_stream_infos[0].shape.height, all_stream_infos[0].shape.width, 
        all_stream_infos[0].shape.features);
    for (int ii=1; ii<=cnt; ii++) {
        printf("-I- Output[%d]: %s (%d, %d, %d)\n", ii-1, all_stream_infos[ii].name, all_stream_infos[ii].shape.height, all_stream_infos[ii].shape.width, 
            all_stream_infos[ii].shape.features);
    }
    printf("-I-----------------------------------------------\n");
    printf(RESET);
}

double get_time_from_ts(struct timespec ts) {
    double result = (double)(ts.tv_sec * 1000 + (double)(ts.tv_nsec / 1000000.0));
    return result;
}

double calc_latency(int count) {
    double result = 0;
    double cur_rcv;
    double cur_snd;

    for (int j=0; j<actual_measurments; j++) {
        cur_snd = get_time_from_ts(sent_clock_t[j]);
        cur_rcv = get_time_from_ts(recv_clock_t[0][j]);
        for (int ii=1; ii<count; ii++) {
            if (get_time_from_ts(recv_clock_t[ii][j]) > cur_rcv) {
                cur_rcv = get_time_from_ts(recv_clock_t[ii][j]);
            }
        }
        result += (cur_rcv - cur_snd);
    }
    return result/actual_measurments;
}

void print_inference_stats(struct timespec start_time, struct timespec end_time, hailo_stream_info_t *all_stream_infos, int output_stream_cnt, int num_images, uint32_t latency_readout)
{
    double start_time_secs = (double)start_time.tv_sec + ((double)start_time.tv_nsec / NSEC_IN_SEC);
    double end_time_secs = (double)end_time.tv_sec + ((double)end_time.tv_nsec / NSEC_IN_SEC);
    double infer_time_secs = end_time_secs - start_time_secs;
    static float mbit_per_byte = 8.0f / 1024.0f / 1024.0f;
    uint32_t send_frame_size = all_stream_infos[0].hw_frame_size;
    uint32_t recv_frame_size = 0;

    printf("-I- print stats\n");
    printf(BOLDGREEN);
    printf("-I-----------------------------------------------\n");
    printf("-I- Total time:        %4.2lf sec\n", infer_time_secs);
    printf("-I- Average FPS:       %4.2lf\n", num_images / infer_time_secs);
    printf("-I- Average Latency:   %3.2lf ms\n", calc_latency(output_stream_cnt));
    #ifdef FW_LAT
    printf("-I- FW latency (Beta): %3.2f ms\n", latency_readout/1000000.0f);
    #endif
    printf("-I- Send data rate:    %-4.2lf Mbit/s\n",
        (double)(num_images) * send_frame_size * mbit_per_byte / infer_time_secs);
    for (int i=1; i<=output_stream_cnt; i++) {
        recv_frame_size = all_stream_infos[i].hw_frame_size;
        printf("-I- Recv[%d] data rate: %-4.2lf Mbit/s\n", i,
            (double)(num_images) * recv_frame_size * mbit_per_byte / infer_time_secs);
        printf("-I-----------------------------------------------\n");
    }
    printf(RESET);
}

const char** get_jlf_files_form_path(const char *dir_name, uint8_t *actual_number_of_jlfs_files) {
    static char jlf_files[HAILO_MAX_NUMBER_OF_JLFS][PATH_MAX];
    static const char *res_jlf_files[HAILO_MAX_NUMBER_OF_JLFS];
    DIR *dir = NULL;
    struct dirent *entry = NULL;
    uint8_t i = 0;

    dir = opendir(dir_name);
    if (NULL == dir) {
        return NULL;
    }

    entry = readdir(dir);
    while (NULL != entry) {
        if (entry->d_name[0] != '.') {
            (void)snprintf(jlf_files[i], sizeof(jlf_files[i]), "%s%s", dir_name, entry->d_name);
            res_jlf_files[i] = jlf_files[i];
            i++;
        }
        entry = readdir(dir);
    }
    (void) closedir(dir);
    *actual_number_of_jlfs_files = i;
    return res_jlf_files;
}

const char* get_direction_name(hailo_stream_direction_t dir) {
    switch (dir) {
        case HAILO_H2D_STREAM: return "Input";
        case HAILO_D2H_STREAM: return "Output";
        case HAILO_STREAM_DIRECTION_MAX_ENUM: return "Wrong";
    }
    return "Wrong";
}

hailo_status print_debug_stats(hailo_device device) {
    uint32_t address;
    uint8_t* data = (uint8_t*)malloc(4);
    uint32_t size = 4;
    hailo_status status = HAILO_SUCCESS;

    // rx_jabbers
    address = 0x0010918C;
    status = hailo_read_memory(device, address, data, size);
    printf(CYAN);
    printf("-I-----------------------------------------------\n");
    printf("-D- RX_JABBERS: 0x%d\n", *data);

    // fcs_errors
    address = 0x00109190;
    status = hailo_read_memory(device, address, data, size);
    printf("-D- FCS_ERRORS: 0x%d\n", *data);
    printf("-I-----------------------------------------------\n");
    printf(RESET);
    free(data);
    return status;
}

void set_default_output_stream_params(hailo_eth_output_stream_params_t* stream) {
    stream->base_params.transform_mode = HAILO_STREAM_TRANSFORM_COPY;
    stream->base_params.user_buffer_format.order = HAILO_FORMAT_ORDER_NHWC;
    stream->base_params.user_buffer_format.flags = HAILO_FORMAT_FLAGS_QUANTIZE;
    stream->base_params.user_buffer_format.type = HAILO_FORMAT_TYPE_UINT8;
    stream->device_port = 0;
    stream->host_address.sin_family = AF_INET;
    stream->host_address.sin_port = 0;
    stream->host_address.sin_addr.s_addr = INADDR_ANY;
    stream->is_sync_enabled = true;
    memset(&stream->host_address.sin_zero, 0, sizeof(stream->host_address.sin_zero));
}
#ifdef FW_LAT
hailo_status set_latency_measurement(hailo_device device, int image_index, hailo_stream_info_t input_stream_info, hailo_stream_info_t output_stream_info)
{
    return hailo_latency_measurement_config(device, 1, (image_index-1)*input_stream_info.hw_shape.height,
            output_stream_info.hw_shape.height*image_index, input_stream_info.index, output_stream_info.index);
}
#endif
int main(int argc, char **argv)
{
    hailo_status status = HAILO_UNINITIALIZED;
    size_t number_of_devices = 0;
    hailo_eth_device_info_t device_info = {0};
    hailo_device device = NULL;
    hailo_jlf jlf = NULL;
    uint8_t jlf_buffer[48*1024];
    uint8_t actual_number_of_jlfs_files = 0;
    hailo_input_stream input_stream = NULL;
    hailo_output_stream output_streams[NOF_STREAMS];
    hailo_eth_input_stream_params_t input_stream_params = HAILO_ETH_INPUT_STREAM_PARAMS_DEFAULT;
    hailo_eth_output_stream_params_t output_streams_params[NOF_STREAMS-1];
    hailo_stream_info_t all_stream_infos[NOF_STREAMS];
    hailo_stream_info_t output_streams_info[NOF_STREAMS-1];
    const char **jlf_files = NULL;
    struct timespec start_time = {0};
    struct timespec end_time = {0};
    size_t number_of_streams = 0;
    uint8_t input_fifo_index = 0;
    int output_stream_cnt = 0;
    uint8_t output_fifo_indexes[NOF_STREAMS-1];
    char *jlf_dir = "./JLFs/";
    int opt;
    char *iface = "eno2";
    int num_img = 100;
    int debug = 0;
    int write_log = 0;
    uint32_t latency_readout = 0;

    while ((opt = getopt(argc, argv, "i:j:n:dl")) != -1) {
        switch (opt) {
            case 'j': jlf_dir = optarg; break;
            case 'i': iface =   optarg; break;
            case 'n': num_img = atoi(optarg); break;
            case 'd': debug = 1; break;
            case 'l': write_log = 1; break;
            case '?': fprintf (stderr, "Option -%c requires an argument.\n", optopt);
            default:
              fprintf(stderr, "Usage: %s -i INTERFACE -j JLF-DIR [-n NUM-IMAGES]\n\n", argv[0]);
              fprintf(stderr, "     -i INTERFACE      The Ethernet interface, defaults to \'eno2\'\n");
              fprintf(stderr, "     -j JLF-DIR        The JLFs directory, defaults to \'./JLFs/\'\n");
              fprintf(stderr, "     -n NUM-IMAGES     The number of images to process, defaults to 100\n");
              fprintf(stderr, "     -d                Read and print debug registers from FW\n");
              fprintf(stderr, "     -l                Each receive thread will write a log file\n");
              exit(EXIT_FAILURE);
        }
    }
    printf(BOLDCYAN);
    printf("-I- Running on interface: %s %d images\n", iface, num_img);
    printf("-I- Reading JLFs from: %s\n", jlf_dir);
    printf(RESET);
    status = hailo_scan_ethernet_devices(iface, &device_info, 1, &number_of_devices, HAILO_DEFAULT_ETH_SCAN_TIMEOUT_MS);
    REQUIRE_SUCCESS(status, l_exit, "Failed to scan for eth_devices");
    if (0 == number_of_devices) {
        printf("-E- No device found on the given interface\n");
        status = HAILO_INTERNAL_FAILURE;
        goto l_exit;
    }

    status = hailo_create_ethernet_device(&device_info, &device);
    REQUIRE_SUCCESS(status, l_exit, "Failed to create eth_device");

    jlf_files = get_jlf_files_form_path(jlf_dir, &actual_number_of_jlfs_files);
    if (NULL == jlf_files) {
        printf("-E- Failed to get jlf files from path\n");
        status = HAILO_INTERNAL_FAILURE;
        goto l_release_device;
    }

    status = hailo_create_jlf_files(jlf_files, actual_number_of_jlfs_files, jlf_buffer, sizeof(jlf_buffer), &jlf);
    REQUIRE_SUCCESS(status, l_release_device, "Failed to create jlf files");

    status = hailo_configure_device_from_jlf(device, jlf, jlf_buffer, sizeof(jlf_buffer));
    REQUIRE_SUCCESS(status, l_release_jlf, "Failed to configure device from jlf");
    
    // NEDEN
    status = hailo_jlf_get_all_stream_infos(jlf, all_stream_infos, NOF_STREAMS, &number_of_streams);
    REQUIRE_SUCCESS(status, l_release_jlf, "Failed to get all stream info");
    
    #ifdef FW_LAT 
    status = set_latency_measurement(device, num_img/2 /*Image index*/, all_stream_infos[0], all_stream_infos[1]);
    #endif
    REQUIRE_SUCCESS(status, l_release_jlf, "Failed to activate latecny measurement in FW");

    for (size_t i=0;i<number_of_streams;i++) {
        printf("-D- [%d] direction:%-7s index=%d output_index=%d\n",(int)i, get_direction_name(all_stream_infos[i].direction), all_stream_infos[i].index, output_stream_cnt);
        if (all_stream_infos[i].direction==HAILO_H2D_STREAM) {
            input_fifo_index = all_stream_infos[i].index;
        } else {
            output_fifo_indexes[output_stream_cnt++] = all_stream_infos[i].index;
        }
    }

    if (all_stream_infos[0].format.order == HAILO_FORMAT_ORDER_NC ||
        all_stream_infos[0].format.order == HAILO_FORMAT_ORDER_NHW) {
            input_stream_params.base_params.user_buffer_format.order = all_stream_infos[0].format.order;
    }
    
    // input_stream_params.base_params.user_buffer_format.type = HAILO_FORMAT_TYPE_FLOAT32;
    // input_stream_params.base_params.user_buffer_format.flags = HAILO_FORMAT_FLAGS_NONE;
    // input_stream_params.base_params.transform_mode = HAILO_STREAM_TRANSFORM_INPLACE;

    status = hailo_create_eth_input_stream_from_jlf_by_index(device, jlf, input_fifo_index, &input_stream_params, &input_stream);
    REQUIRE_SUCCESS(status, l_release_jlf, "Failed to create eth_input_stream");

    status = hailo_activate_input_stream(device, input_stream);
    REQUIRE_SUCCESS(status, l_release_output_stream, "Failed to activate input stream");

    // NEDEN - get all outputs
    for (int i=0;i<output_stream_cnt;i++) {
        set_default_output_stream_params(&output_streams_params[i]);
        
        status = hailo_jlf_get_stream_info_by_index(jlf, output_fifo_indexes[i], HAILO_D2H_STREAM, &output_streams_info[i]);
        REQUIRE_SUCCESS(status, l_release_output_stream, "-E- Failed to get eth_output_stream");

        if (output_streams_info[i].format.order == HAILO_FORMAT_ORDER_NC ||
            output_streams_info[i].format.order == HAILO_FORMAT_ORDER_NHW ||
            output_streams_info[i].format.order == HAILO_FORMAT_ORDER_HAILO_NMS) {
                output_streams_params[i].base_params.user_buffer_format.order = output_streams_info[i].format.order;
        }
        status = hailo_create_eth_output_stream_from_jlf_by_index(device, jlf, output_fifo_indexes[i], &output_streams_params[i], &output_streams[i]);
        REQUIRE_SUCCESS(status, l_release_input_stream, "Failed to create eth_output_stream");

        status = hailo_activate_output_stream(device, output_streams[i]);
        REQUIRE_SUCCESS(status, l_release_output_stream, "Failed to activate output stream");
    }
    //NEDEN: Assuming the input is always on index #0
    print_net_banner(all_stream_infos, output_stream_cnt);

    // Run inference and compare results
    (void) clock_gettime(CLOCK_MONOTONIC, &start_time);    
    status = infer(input_stream, &all_stream_infos[0], output_streams, output_streams_info, output_stream_cnt, num_img, write_log, output_streams_params, input_stream_params);
    
    #ifdef FW_LAT
    status = hailo_latency_measurement_read(device, &latency_readout);
    #endif
    REQUIRE_SUCCESS(status, l_release_output_stream, "Inference failure");
    (void) clock_gettime(CLOCK_MONOTONIC, &end_time);

    print_inference_stats(start_time, end_time, all_stream_infos, output_stream_cnt, num_img, latency_readout);
    status = HAILO_SUCCESS;

l_release_output_stream:
    for (int i=0;i<output_stream_cnt;i++) {
        (void) hailo_release_output_stream(device, output_streams[i]);
    }
    if (debug==1) {
        print_debug_stats(device);
        REQUIRE_SUCCESS(status, l_release_input_stream, "Failed to read debug registers");
    }
l_release_input_stream:
    (void) hailo_release_input_stream(device, input_stream);
l_release_jlf:
    (void) hailo_release_jlf(jlf);
l_release_device:
    (void) hailo_release_device(device);
l_exit:
    return status;
}
