#include <stdio.h>
#include <stdlib.h>
#include <iostream>
#include <thread>
#include <vector>
#include <regex>
#include <chrono>
#include <future>
#include <fstream>
#include <memory>
#include <example_device.hpp>
#include <example_utils.hpp>
#include <hailo/hailort.h>
#include <yolov5_processing.hpp>
#ifdef DEBUG
#include "npy.hpp"
#endif

using namespace std;

example_device::example_device(std::string& iface, std::string& hef_file, unsigned int num_imgs, unsigned int batch, int write_log, int debug, bool yolo_post, std::string source) {
        cout << CYAN 
             << "-I- Running on interface: " << iface << " " << num_imgs << " images" << endl
             << RESET;
        example_device::yolo_post = yolo_post;
        example_device::iface = iface;
        example_device::hef_file = hef_file;
        example_device::debug = debug;
        example_device::num_imgs = num_imgs;
        example_device::write_log = write_log;
        example_device::output_stream_cnt = NOF_STREAMS;
        example_device::input_stream_cnt = 1;
        example_device::batch_size = batch;
        example_device::source_path = source;
        active_net_g = NULL;
        if (yolo_post) {
            qp_zp_scale = {0};
            use_one_rx_thrd = true;
        }
}

void example_device::init_qp_zp_struct() {
    qp_zp_scale.qp_scale_1 = all_stream_infos[1].quant_info.qp_scale;
    qp_zp_scale.qp_scale_2 = all_stream_infos[2].quant_info.qp_scale;
    qp_zp_scale.qp_scale_3 = all_stream_infos[3].quant_info.qp_scale;
    qp_zp_scale.qp_zp_1 = all_stream_infos[1].quant_info.qp_zp;
    qp_zp_scale.qp_zp_2 = all_stream_infos[2].quant_info.qp_zp;
    qp_zp_scale.qp_zp_3 = all_stream_infos[3].quant_info.qp_zp;
}

example_device::~example_device() {}

void example_device::print_net_banner() {
    printf(GREEN);
    printf("-I-----------------------------------------------\n");
    for (size_t ii=0; ii<output_stream_cnt+input_stream_cnt; ii++) {
        printf("-I- %s[%ld]: %s (%d, %d, %d)\n", get_direction_name(all_stream_infos[ii].direction), ii, all_stream_infos[ii].name, all_stream_infos[ii].shape.height, all_stream_infos[ii].shape.width, 
            all_stream_infos[ii].shape.features);
    }
    printf("-I-----------------------------------------------\n");
    printf(RESET);
}

double example_device::get_time_from_ts(struct timespec ts) {
    double result = (double)(ts.tv_sec * 1000 + (ts.tv_nsec / 1000000));
    return result;
}

hailo_status example_device::create_eth_device() {
    hailo_status status = HAILO_SUCCESS;
    size_t number_of_devices = 0;

    try {
        status = hailo_scan_ethernet_devices(iface.c_str(), &device_info, 1, &number_of_devices, HAILO_DEFAULT_ETH_SCAN_TIMEOUT_MS);
        if (0 == number_of_devices) {
            cout << "-E- No device found on the given interface:" << iface << endl;
            status = HAILO_INTERNAL_FAILURE;
        }
        if (status != HAILO_SUCCESS) return status;
        
        status = hailo_create_ethernet_device(&device_info, &device);
        if (status != HAILO_SUCCESS) return status;
        
    } catch (std::exception const& e) {
        std::cout << "-E- create device failed" << e.what() << std::endl;
        return HAILO_INTERNAL_FAILURE;
    }
    return status;
}

hailo_status example_device::create_pcie_device() {
    hailo_status status = HAILO_SUCCESS;
    size_t number_of_devices = 0;

    try {
        status = hailo_scan_pcie_devices(&pcie_device_info, 1, &number_of_devices);
        if (0 == number_of_devices) {
            cout << "-E- No device found on the given interface:" << iface << endl;
            status = HAILO_INTERNAL_FAILURE;
        }
        if (status != HAILO_SUCCESS) return status;
        
        status = hailo_create_pcie_device(&pcie_device_info, &device);
        if (status != HAILO_SUCCESS) return status;

    } catch (std::exception const& e) {
        std::cout << "-E- create device failed" << e.what() << std::endl;
        return HAILO_INTERNAL_FAILURE;
    }
    return status;
}

hailo_status example_device::print_debug_stats() {
    uint32_t address;
    std::vector<uint8_t> data(4);
    uint32_t size = 4;
    hailo_status status = HAILO_SUCCESS;

    address = 0x0010918C;// rx_jabbers
    status = hailo_read_memory(device, address, data.data(), size);
    cout << CYAN
    << "-I-----------------------------------------------" << endl
    << "-D- RX_JABBERS: 0x" << data.data() << endl;

    address = 0x00109190;// fcs_errors
    status = hailo_read_memory(device, address, data.data(), size);
    cout << "-D- FCS_ERRORS: 0x" << data.data() << endl
    << "-I-----------------------------------------------" << endl
    << RESET;

    return status;
}

const char* example_device::get_direction_name(hailo_stream_direction_t dir) {
    switch (dir) {
        case HAILO_H2D_STREAM: return "Input";
        case HAILO_D2H_STREAM: return "Output";
        case HAILO_STREAM_DIRECTION_MAX_ENUM: return "Wrong";
    }
    return "Wrong";
}

double example_device::calc_latency(int count) {
    double result = 0;
    double cur_rcv;
    double cur_snd;

    for (int j=0; j<LATENCY_MEASUREMENTS; j++) {
        cur_snd = get_time_from_ts(sent_clock_t[j]);
        cur_rcv = get_time_from_ts(recv_clock_t[0][j]);
        for (int ii=1; ii<count; ii++) {
            if (get_time_from_ts(recv_clock_t[ii][j]) > cur_rcv) {
                cur_rcv = get_time_from_ts(recv_clock_t[ii][j]);
            }
        }
        result += (cur_rcv - cur_snd);
    }
    return result/LATENCY_MEASUREMENTS;
}

void example_device::print_inference_stats() {
    double start_time_secs = (double)start_time.tv_sec + ((double)start_time.tv_nsec / NSEC_IN_SEC);
    double end_time_secs = (double)end_time.tv_sec + ((double)end_time.tv_nsec / NSEC_IN_SEC);
    double infer_time_secs = end_time_secs - start_time_secs;
    static float mbit_per_byte = 8.0f / 1024.0f / 1024.0f;

    cout << GREEN
         << "-I-----------------------------------------------" << endl
         << "-I- Total time:      " << infer_time_secs << endl
         << "-I- Average FPS:     " << (num_imgs * input_stream_cnt)/ infer_time_secs << endl
         << "-I- Send data rate:  " << (double)(num_imgs) * host_input_frame_size * mbit_per_byte / infer_time_secs << " Mbit/s" << endl;
    for (size_t i=0;i<output_stream_cnt;i++) {
        printf("-I- Recv[%ld] data rate: %-4.2lf Mbit/s\n", i,
            (double)(num_imgs) * host_output_frame_size[i] * mbit_per_byte / infer_time_secs);
        printf("-I-----------------------------------------------\n");
    }
    printf(RESET);
}

template<class T>
void example_device::_send_thread(void *args) {
    hailo_status status = HAILO_SUCCESS;
    write_thread_args_t *write_args = (write_thread_args_t*)args;
    std::vector<T> src_data;
    unsigned lat_counter = 0;
    uint32_t flag_100 = 0;
    struct timespec ts;

    src_data.resize(write_args->host_input_frame_size);
    if (src_data.empty()) {
        cout << "-E- Failed to allocate buffers" << endl;
        status = HAILO_OUT_OF_HOST_MEMORY;
    } else {
        for(size_t i = 0; i < write_args->host_input_frame_size; i++) {
            src_data[i] = (T)(rand() % 256);
        }
        flag_100 = (uint32_t)write_args->num_images / 100;
        if (flag_100==0)
            flag_100 = 1;
        for (uint32_t i = 1; i <= (uint32_t)write_args->num_images; i++) {
            if ((i % flag_100==0) && (lat_counter < LATENCY_MEASUREMENTS)) {
                clock_gettime(CLOCK_REALTIME, &ts);
                printf("-I- [%10ld.%ld s] TID:%d Send frame [%3d/%3d]\n", (long)ts.tv_sec, ts.tv_nsec/1000000, write_args->tid, i, write_args->num_images);
                sent_clock_t[lat_counter++] = ts;
            }
            status = hailo_stream_sync_write_all_raw_buffer(write_args->input_stream,
            src_data.data(),
            0, 
            write_args->host_input_frame_size);
            if (status != HAILO_SUCCESS) {
                cout << "-E- hailo_stream_sync_write_all_raw_buffer failed" << endl;
                break;
            }
        }
    }
    write_args->status = status;
}

template<class T>
void example_device::_recv_thread(void *args) {
    hailo_status status = HAILO_SUCCESS;
    recv_thread_args_t *recv_args = (recv_thread_args_t *)args;
    struct timespec ts;
    ofstream outFile;
    std::vector<T> recv_array;
    unsigned lat_counter = 0;
    uint32_t flag_100 = 0;

    cout << CYAN << "-I- Recv thread " << recv_args->tid << " started" << RESET << endl;
    if (yolo_post)
        cout << CYAN << "-I- Recv thread: Doing YOLOv5 post-processing" << RESET << endl;

    if (recv_args->write_log==1) {
        string log_name = "rx_tid_0.log";
        outFile = ofstream(log_name);
    }
    flag_100 = (uint32_t)recv_args->num_images / 100;
    if (flag_100==0)
        flag_100 = 1;
    recv_array.resize(recv_args->host_output_frame_size);
    for (uint32_t j = 1; j <= (uint32_t)recv_args->num_images; j++) {        
        status = hailo_stream_sync_read_all_raw_buffer(recv_args->output_stream, recv_array.data(), 0, recv_args->host_output_frame_size);
        REQUIRE_SUCCESS(status, l_exit, "Failed at hailo_stream_sync_read_all_raw_buffer");

        if ((j % flag_100==0) && (lat_counter < LATENCY_MEASUREMENTS)) {
            clock_gettime(CLOCK_REALTIME, &ts);
            printf("-I- [%10ld.%ld s] TID:%d Recv [%3d/%3d] \n",  (long)ts.tv_sec, ts.tv_nsec/1000000, recv_args->tid, j, recv_args->num_images);
            recv_clock_t[recv_args->tid][lat_counter++] = ts;
        }
    }
l_exit:    
    recv_args->status = status;
}

void* example_device::_send_from_source_thread(void *args) {
    hailo_status status = HAILO_SUCCESS;
    write_thread_args_t *write_args = (write_thread_args_t*)args;
    // unsigned lat_counter = 0;
    struct timespec ts;
    cv::Mat curr_img(all_stream_infos->shape.width, all_stream_infos->shape.width, CV_8UC3);
    int counter = 0;
    vector<uint8_t> array;
    
    array.resize(host_input_frame_size);

    while (true) {
        if (input_image_queue.empty()) {
            cout << "-I- Waiting for the image queue to fill" << endl;
            sleep(1);
            continue;
        }
        pthread_mutex_lock(&input_image_queue_m);
        curr_img = input_image_queue.front();
        input_image_queue.pop();
        pthread_mutex_unlock(&input_image_queue_m);

        if (counter % 500==0) {
            clock_gettime(CLOCK_REALTIME, &ts);
            printf("-I- [%10ld.%ld s] Send frame [%3d] 0x%x%x\n", (long)ts.tv_sec, ts.tv_nsec/1000000, counter, curr_img.data[0], curr_img.data[1]);
        }
        counter++;
        if (curr_img.data[0] == 17 && curr_img.data[1] == 71 && curr_img.data[2] == 17 && curr_img.data[3] == 71) {
            cout << "-I- [TX-Thread] Received stop pattern - " << counter << endl;
            sleep (1);
            break;
        }

        if (curr_img.isContinuous()) {
            int totalsz = curr_img.dataend-curr_img.datastart;
            array.assign(curr_img.datastart, curr_img.datastart + totalsz);
        } else {
            int rowsz = CV_ELEM_SIZE(curr_img.type()) * curr_img.cols;
            for (int i = 0; i < curr_img.rows; ++i) {
                array.insert(array.end(), curr_img.ptr<uint8_t>(i), curr_img.ptr<uint8_t>(i) + rowsz);
            }
        }
        status = hailo_stream_sync_write_all_raw_buffer(input_stream, array.data(), 0, host_input_frame_size);
        if (status != HAILO_SUCCESS) {
            printf("-E- hailo_stream_sync_write_all_raw_buffer failed");
            break;
        }
    }
    write_args->status = status;
    write_args->num_images = counter;
    return NULL;
}

template<class T>
void example_device::_recv_thread1(std::future<void> &futureObj) {
    hailo_status status = HAILO_SUCCESS;
    std::vector< std::vector<T> > recv_array;
    size_t counter = 0;
    if (yolo_post)
        cout << CYAN << "-I- Recv thread: Doing YOLOv5 post-processing" << RESET << endl;

    recv_array.resize(output_stream_cnt);
    for (size_t i=0; i<output_stream_cnt;++i)
        recv_array[i].resize(host_output_frame_size[i]);
    
    while (true) {        
        for (size_t i=0; i<output_stream_cnt;++i) {
            status = hailo_stream_sync_read_all_raw_buffer(output_streams[i], recv_array[i].data(), 0, host_output_frame_size[i]);
            if (status==HAILO_PCIE_DRIVER_FAIL) goto l_exit;
            REQUIRE_SUCCESS(status, l_exit, "Failed at hailo_stream_sync_read_all_raw_buffer");
        }
        #ifdef DEBUG
        size_t shape2 [] = {20, 20, 255};
        cout << "-I- Saving logit ouput2 to cpp-output-2.npy" << endl;
        npy::SaveArrayAsNumpy("cpp-output-2.npy", false, 3, shape2, recv_array[2]);
        size_t shape1 [] = {40, 40, 255};
        cout << "-I- Saving logit ouput1 to cpp-output-1.npy" << endl;
        npy::SaveArrayAsNumpy("cpp-output-1.npy", false, 3, shape1, recv_array[1]);
        size_t shape0 [] = {80, 80, 255};
        cout << "-I- Saving logit ouput0 to cpp-output-0.npy" << endl;
        npy::SaveArrayAsNumpy("cpp-output-0.npy", false, 3, shape0, recv_array[0]);
        #endif
        vector<float32_t> detections;
        auto num_dets = get_detections(recv_array[0], recv_array[1], recv_array[2], qp_zp_scale, 0.2, detections);
        if (num_dets > 0) {
            #ifdef DBEUG
            cout << "-I- Saving the detections to cpp_detections.npy: " << endl;
            size_t shape [] = {4, 1};
            npy::SaveArrayAsNumpy("cpp_detections.npy", false, 2, shape, detections);
            #endif
            cout << "-I- Num detections: " << num_dets << " Classes: ["; 
            for (size_t i = 0; i < num_dets;i++) 
                cout << get_coco_name_from_int(detections[i*6+4]) << " ";
            cout << "]" << endl;
        }
        counter++;
        if (counter%100 == 0)
            cout << CYAN << "-I- Received frame #"<< counter << RESET << endl;

    }
l_exit:    
    cout << CYAN << "-I- Exit Recv thread code:"<< status << RESET << endl;
}

bool example_device::source_is_video(string& path) {
    const vector<string> suffixs = {"mkv", "avi", "webm", "mp4"};
    for (string suffix : suffixs) {
        if (regex_search(path, regex(string(suffix) + "$")))
            return true;
    }
    return false;
}

void* example_device::_frame_lib_thread(void *args) {
    video_thread_args_t *v_args = (video_thread_args_t*)args;
    cv::Mat org_frame;
    vector< cv::String > file_names;
    cv::Mat pp_frame(v_args->input_stream_info->hw_shape.height, v_args->input_stream_info->hw_shape.height, CV_8UC3);
    unsigned int idx=0;
    cv::Size input_size;

    cout << BOLDCYAN << "-I-----------------------------------------------" << endl
                     << "-I- [Frames-thread] Source: " << v_args->video_path << endl
                     << "-I- [Frames-thread] Net input shape (" << v_args->input_stream_info->hw_shape.height << 
                        ", " << v_args->input_stream_info->hw_shape.height << 
                        ", " << v_args->input_stream_info->hw_shape.features << ")" << endl
                     << "-I-----------------------------------------------" 
         << RESET    << endl;

    cv::glob(v_args->video_path, file_names, false);
    input_size.width = v_args->input_stream_info->hw_shape.width;
    input_size.height = v_args->input_stream_info->hw_shape.height;
    for (std::string file : file_names) {
        if (not(file.ends_with(".jpg") || file.ends_with(".png"))) {
            continue;
        }
        org_frame = cv::imread(file);
        if (org_frame.channels() == 3) {
            cv::cvtColor(org_frame, org_frame, cv::COLOR_BGR2RGB);
        }
        cout << "-I- [Frames-thread] Getting frame #"<< idx++ << ":" << file << endl;

        // size_t shape [] = {640, 640, 3};
        pp_frame = yolov5_pre_process(org_frame);
        #ifdef DEBUG
        cout << "-I- Saving the preprocessed image to cpp-prep-image.npy" << endl;
        size_t shape [] = {640, 640, 3};
        std::vector<float> image;
        int totalsz = pp_frame.dataend-pp_frame.datastart;
        image.assign(pp_frame.datastart, pp_frame.datastart + totalsz);
        npy::SaveArrayAsNumpy("cpp-prep-image.npy", false, 3, shape, image);
        #endif
        pthread_mutex_lock(&input_image_queue_m); 
        input_image_queue.push(pp_frame.clone());
        pthread_mutex_unlock(&input_image_queue_m); 
    }
    // Mark the final frame
    pp_frame.data[0] = 17;
    pp_frame.data[1] = 71;
    pp_frame.data[2] = 17;
    pp_frame.data[3] = 71;
    idx++;
    pthread_mutex_lock(&input_image_queue_m); 
    input_image_queue.push(pp_frame.clone());
    pthread_mutex_unlock(&input_image_queue_m); 
    cout << BOLDCYAN << "-I-----------------------------------------------" << endl
                     << "-I- [Frames-thread] FINISHED READING DIR - #" << idx-1 << endl
                     << "-I-----------------------------------------------" 
         << RESET    << endl;
    
    return NULL;
}

void* example_device::_video_source_thread(void *args) {
    hailo_status status = HAILO_SUCCESS;
    video_thread_args_t *v_args = (video_thread_args_t*)args;
    cv::Mat org_frame;
    cv::Mat pp_frame(v_args->input_stream_info->hw_shape.height, v_args->input_stream_info->hw_shape.height, CV_8UC3);
    unsigned int idx=0;
    cv::VideoCapture cap(v_args->video_path);

    if (!cap.isOpened()) {
        cout << "-E- Unable to open video stream" << endl;
        status = HAILO_INTERNAL_FAILURE;
    }
    // cv::Size shape = cv::Size((int) cap.get(CV_CAP_PROP_FRAME_WIDTH), (int) cap.get(CV_CAP_PROP_FRAME_HEIGHT));

    cout << BOLDCYAN << "-I-----------------------------------------------" << endl;
    cout << "-I- [Video-thread] Source: " << v_args->video_path << endl;
    cout << "-I- [Frames-thread] Net input shape (" << v_args->input_stream_info->hw_shape.height << 
            ", " << v_args->input_stream_info->hw_shape.height << 
            ", " << v_args->input_stream_info->hw_shape.features << ")" << endl;   
    cout << "-I-----------------------------------------------" << RESET << endl;

    while (true && status==HAILO_SUCCESS) {
        if (++idx%500==0) 
            cout << "-I- [Video-thread] Getting frame: "<< idx << endl;
        cap >> org_frame;
        if (org_frame.empty())
            break;
        
        resize(org_frame, pp_frame, pp_frame.size());

        pthread_mutex_lock(&input_image_queue_m); 
        input_image_queue.push(pp_frame.clone());
        pthread_mutex_unlock(&input_image_queue_m); 
    }
    // Mark the final frame
    pp_frame.data[0] = 17;
    pp_frame.data[1] = 71;
    pp_frame.data[2] = 17;
    pp_frame.data[3] = 71;
    idx++;
    pthread_mutex_lock(&input_image_queue_m); 
    input_image_queue.push(pp_frame);
    pthread_mutex_unlock(&input_image_queue_m); 
    cout << BOLDCYAN << "-I-----------------------------------------------" << endl;
    cout             << "-I- [Video-thread] FINISHED MOVIE STREAM - " << idx-1 << endl;
    cout             << "-I-----------------------------------------------" << RESET << endl;
    cap.release();

    return (void *)status;
}

template<class T>
hailo_status example_device::infer() {
    hailo_status status = HAILO_SUCCESS;
    std::vector<std::thread> recv_threads;
    std::vector<std::thread> write_threads;
    std::thread source_thread;
    std::vector<recv_thread_args_t> recv_args;
    std::vector<write_thread_args_t> write_args;
    video_thread_args_t source_args;
    std::promise<void> exitSignal;
    std::future<void> futureObj = exitSignal.get_future();

    write_threads.resize(input_stream_cnt);
    write_args.resize(input_stream_cnt);
    
    source_args.input_stream_info = &all_stream_infos[0];
    source_args.video_path = source_path.c_str();
    if (yolo_post) {
        cout << CYAN << "-I- Creating YOLOv5 threads scheme (1\\1\\1)" << RESET << endl;
        if (source_is_video(source_path))
            source_thread = std::thread(&example_device::_video_source_thread, this, &source_args);
        else
            source_thread = std::thread(&example_device::_frame_lib_thread, this, &source_args);
        write_threads[0] = std::thread(&example_device::_send_from_source_thread, this, &write_args[0]);
        recv_threads.resize(1);
        recv_threads[0] = std::thread(&example_device::_recv_thread1<T>, this, std::ref(futureObj));
        recv_threads[0].detach();
    } else {
        for (size_t s=0;s<input_stream_cnt;s++) {
            write_args[s].tid                = s;
            write_args[s].input_stream       = input_stream;
            write_args[s].status             = HAILO_SUCCESS;
            write_args[s].output_streams_cnt = output_stream_cnt;
            write_args[s].num_images         = num_imgs;
            write_args[s].stream_info        = all_stream_infos[s];
            write_args[s].host_input_frame_size = host_input_frame_size;
            write_threads[s] = std::thread(&example_device::_send_thread<T>, this, &write_args[s]);
        }

        recv_args.resize(output_stream_cnt);
        recv_threads.resize(output_stream_cnt);
        for (size_t s=0;s<output_stream_cnt;s++) {
            cout << CYAN << "-I- Creating RECV Thread #" << s << RESET << endl;
            recv_args[s].output_stream = output_streams[s];
            recv_args[s].tid           = s;
            recv_args[s].status        = HAILO_SUCCESS;
            recv_args[s].num_images    = num_imgs;
            recv_args[s].write_log     = write_log;
            recv_args[s].stream_info   = all_stream_infos[s+1];
            recv_args[s].host_output_frame_size = host_output_frame_size[s];
            recv_threads[s] = std::thread(&example_device::_recv_thread<T>, this, &recv_args[s]);
        }
    }

    (void) clock_gettime(CLOCK_MONOTONIC, &start_time);
    
    if (yolo_post)
        source_thread.join();
    for (auto& t: write_threads) t.join();
    cout << CYAN << "-I- Finished 2 all threads" << RESET << endl;
    if (yolo_post)
        exitSignal.set_value();
    // else
    for (auto& t: recv_threads) if (t.joinable()) t.join();
    cout << CYAN << "-I- Finished all threads" << RESET << endl;

    (void) clock_gettime(CLOCK_MONOTONIC, &end_time);

    for (auto& a: write_args) {
        if (HAILO_SUCCESS != a.status) {
            cout << "-E- write_thread failed" << endl;
            status = HAILO_INTERNAL_FAILURE;
        }
    }
    return status;
}

hailo_status example_device::create_and_load_hef()
{
    hailo_status status = HAILO_SUCCESS;
    size_t number_of_network_groups = 1;
    hailo_configure_params_t configure_params = {0};

    status = hailo_create_hef_file(&hef, hef_file.c_str());
    REQUIRE_SUCCESS(status, l_exit, "Failed to create hef file");
    cout << CYAN << "-I- Loading HEF file from " << hef_file << RESET << endl;
    configure_params.network_group_params[0].batch_size = batch_size;
    status = hailo_configure_device(device, hef, &configure_params, &network_group, &number_of_network_groups);
    REQUIRE_SUCCESS(status, l_exit, "Failed to configure device from hef");
    if (number_of_network_groups > 1) {
        status = HAILO_UNINITIALIZED;
        cout << "-E- Got network_group=" <<number_of_network_groups << endl;
        REQUIRE_SUCCESS(status, l_exit, "Failed to extract network group, larger than 1");
    }
    
l_exit:
    return status;
}
template<class T>
hailo_status example_device::set_stream_infos(hailo_input_stream_params_by_name_t *input_stream_params, hailo_output_stream_params_by_name_t *output_stream_params)
{
    hailo_status status = HAILO_SUCCESS;
    size_t number_of_streams = 0;
    bool quantized = false;
    hailo_stream_transform_mode_t transform = HAILO_STREAM_TRANSFORM_COPY;
    hailo_format_type_t format = HAILO_FORMAT_TYPE_UINT8;

    if (std::is_same<T, float32_t>::value)
        format = HAILO_FORMAT_TYPE_FLOAT32;


    status = hailo_hef_get_all_stream_infos(hef, NULL, all_stream_infos, NOF_STREAMS, &number_of_streams);
    REQUIRE_SUCCESS(status, l_exit, "Failed to get pcie_stream_infos");

    cout << CYAN << "-I- Activating the input streams with transform=" << transform << ", quantized=" << quantized << RESET << endl;
    status = hailo_make_input_stream_params(network_group, transform, quantized, format, input_stream_params, &input_stream_cnt);
    REQUIRE_SUCCESS(status, l_exit, "Failed to get input stream params");

    // Passing here quantized=true, this is unique to YOLOv5, where the post-processing function will take
    // care of the scaling of the output, only on the BOXES that are above the IOU threashold.
    format = HAILO_FORMAT_TYPE_UINT8;
    quantized = true;
    cout << CYAN << "-I- Activating the output streams with transform=" << transform << ", quantized=" << quantized << RESET << endl;
    status = hailo_make_output_stream_params(network_group, transform, quantized, format, output_stream_params, &output_stream_cnt);
    REQUIRE_SUCCESS(status, l_exit, "Failed to get input stream params");

l_exit:
    return status;
}

hailo_status example_device::activate_network_group(hailo_input_stream_params_by_name_t *input_stream_params, hailo_output_stream_params_by_name_t *output_stream_params)
{
    hailo_status status = HAILO_SUCCESS;
    hailo_network_group_params_t network_group_params;
    
    // network_group_params.context_switch_batch_size = 1;
    // for (int i = 0; i < output_stream_cnt;++i) 
    //     output_stream_params[i].params.eth_params.base_params.buffers_threshold = 1;
    // input_stream_params->params.eth_params.base_params.buffers_threshold = 1;
    status = hailo_activate_network_group(network_group, 
                                          &network_group_params, 
                                          input_stream_params,  // Input params
                                          input_stream_cnt,     // number of inputs
                                          output_stream_params, // Output params
                                          output_stream_cnt,    // number of ouputs
                                          &active_net_g);
    REQUIRE_SUCCESS(status, l_exit, "Failed to activate network group");    

l_exit:
    return status;
}

template<class T>
hailo_status example_device::setup_device_for_inference() {
    hailo_status status = HAILO_SUCCESS;
    hailo_input_stream_params_by_name_t input_stream_params = {0};
    hailo_output_stream_params_by_name_t output_stream_params[NOF_STREAMS];


    if (iface.compare("pcie") == 0) {
        status = create_pcie_device();
    } else {
        status = create_eth_device();
    }
    if (status!=HAILO_SUCCESS) return status;

    status = create_and_load_hef();
    if (status != HAILO_SUCCESS) {
        cout << "-E- Failed to get all stream info" << endl;
        release_hef();
        return status;
    }
    status = set_stream_infos<T>(&input_stream_params, output_stream_params);
    REQUIRE_SUCCESS(status, l_exit, "Failed get_stream_infos");
    status = activate_network_group(&input_stream_params, output_stream_params);    
    REQUIRE_SUCCESS(status, l_exit, "Failed activate_network_group");
    status = hailo_get_input_stream_by_name(active_net_g, all_stream_infos[0].name, &input_stream);
    REQUIRE_SUCCESS(status, l_exit, "Failed get_input_stream_by_name");
    host_input_frame_size = hailo_get_input_stream_frame_size(input_stream);
    for (size_t i = 0; i < output_stream_cnt; ++i) {
        status = hailo_get_output_stream_by_name(active_net_g, all_stream_infos[i+1].name, &output_streams[i]);
        REQUIRE_SUCCESS(status, l_exit, "Failed get_output_stream_by_name");
        host_output_frame_size[i] = hailo_get_output_stream_frame_size(output_streams[i]);
    }
    print_net_banner();
l_exit:
    return status;
}

void example_device::release_output_streams() {
    // for (size_t i = 0; i < output_stream_cnt; ++i)
    //    (void) hailo_release_output_stream(device, output_streams[i]);
    if (example_device::debug==1) {
        print_debug_stats();
    }
}

void example_device::release_input_stream() {
    // for (const auto &s: input_streams) (void) hailo_release_input_stream(device, s.GetStream());
    //( void) hailo_release_input_stream(device, input_stream);
}

void example_device::release_hef() {
    (void) hailo_release_hef(example_device::hef);
}

void example_device::release_device() {
    (void) hailo_release_device(example_device::device);
}

void example_device::run_inference() {
    hailo_status status = HAILO_SUCCESS;
    
    status = setup_device_for_inference<uint8_t>();
    REQUIRE_SUCCESS(status, l_exit, "setup_device_for_inference failed");
    if (yolo_post)
        init_qp_zp_struct();

    status = infer<uint8_t>();
    
    example_device::print_inference_stats();
    
    example_device::release_output_streams();
    example_device::release_input_stream();
    example_device::release_hef();
    example_device::release_device();
l_exit:
    cout << GREEN << "-I- Finished Gracefully" << RESET << endl;
}
