Program Listing for File OpenclBackend.hpp

Return to documentation for file (include\util\function\stft\OpenclBackend.hpp)

#pragma once

#include "STFT_Parallel.hpp"
#include <CL/cl.h>
#include <CL/opencl.hpp>
#include <cmrc/cmrc.hpp>
#include <cstddef>
#include <cstdint>
#include <exception>
#include <optional>
#include <stdexcept>
#include <string_view>
#include <utility>

CMRC_DECLARE(pdje_okl);

namespace PDJE_PARALLEL {

using namespace cl;
using REAL_VEC = std::vector<float>;
using IMAG_VEC = std::vector<float>;

class OPENCL_STFT final : public IStftBackend {

  private:
    static constexpr uint32_t kMelBins           = 80;
    static constexpr int      kDefaultSampleRate = 48000;

    uint32_t prev_origin_size                = 0;
    uint32_t prev_overlap_fullsize           = 0;
    uint32_t prev_overlap_subbuffer_fullsize = 0;
    uint32_t prev_bin_fullsize               = 0;
    uint32_t prev_mel_fullsize               = 0;
    int      prev_fft_size                   = 0;
    Program  opencl_kernel_code;
    struct {
        std::optional<Kernel> EXP6STFT;
        std::optional<Kernel> EXP7STFT;
        std::optional<Kernel> EXP8STFT;
        std::optional<Kernel> EXP9STFT;
        std::optional<Kernel> EXP10STFT;
        std::optional<Kernel> EXP11STFT;

        std::optional<Kernel> EXPCommon;
        std::optional<Kernel> Overlap;

        std::optional<Kernel> DCRemove;
        std::optional<Kernel> Hanning;
        std::optional<Kernel> Hamming;
        std::optional<Kernel> Blackman;
        std::optional<Kernel> Nuttall;
        std::optional<Kernel> Blackman_Nuttall;
        std::optional<Kernel> Blackman_Harris;
        std::optional<Kernel> FlatTop;
        std::optional<Kernel> Gaussian;
        std::optional<Kernel> toBinOnly;
        std::optional<Kernel> BinPowerChain;
        std::optional<Kernel> toPower;
        std::optional<Kernel> MelScale;
        std::optional<Kernel> MelDBChain;
        std::optional<Kernel> toDB;
    } built_kernels;

    struct {
        std::optional<Buffer> origin;
        std::optional<Buffer> real;
        std::optional<Buffer> imag;
        std::optional<Buffer> subreal;
        std::optional<Buffer> subimag;
        std::optional<Buffer> bin_real;
        std::optional<Buffer> bin_imag;
        std::optional<Buffer> power;
        std::optional<Buffer> mel;
        std::optional<Buffer> mel_filter_bank;
    } memories;

    std::optional<cl::Device>       gpu;
    std::optional<cl::CommandQueue> CQ;
    std::optional<cl::Context>      gpu_ctxt;
    std::optional<cl::Program>      gpu_codes;
    std::vector<float>              mel_filter_bank_host;

    bool
    SetMemory(const uint32_t      origin_cpu_memory_sz,
              const StftArgs     &args,
              const POST_PROCESS &post_process,
              const bool          needSubBuffer);

    void
    EnsureMelFilterBank(int windowSize);

    bool
    GetResult()
    {
        if (CQ->flush() != CL_SUCCESS) {
            return false;
        }
        if (CQ->finish() != CL_SUCCESS) {
            return false;
        }
        return true;
    }

  public:
    StftResult
    Execute(REAL_VEC       &origin_cpu_memory,
            WINDOW_LIST     window,
            POST_PROCESS    post_process,
            unsigned int    win_expsz,
            const StftArgs &args) override;
    OPENCL_STFT()
    {

        std::vector<cl::Platform> platforms;

        int device_power_score = 0;
        cl::Platform::get(&platforms);
        for (auto &i : platforms) {
            std::vector<cl::Device> calc_devs;
            i.getDevices(CL_DEVICE_TYPE_ALL, &calc_devs);
            for (auto target_dev : calc_devs) {
                int local_power_score = 0;
                target_dev.getInfo(CL_DEVICE_MAX_COMPUTE_UNITS,
                                   &local_power_score);
                if (local_power_score > device_power_score) {

                    gpu                = std::move(target_dev);
                    device_power_score = local_power_score;
                }
            }
        }
        if (device_power_score == 0 || !gpu.has_value()) {
            throw std::runtime_error("failed to load opencl device.");
        } else {
            gpu_ctxt = cl::Context(gpu.value());
        }
        auto fs   = cmrc::pdje_okl::get_filesystem();
        auto file = fs.open("STFT_MAIN.cl");

        std::string cl_codes(file.begin(), file.end());
        gpu_codes.emplace(gpu_ctxt.value(), cl_codes);
        if (gpu_codes->build(gpu.value()) != CL_SUCCESS) {
            throw std::runtime_error("failed to build cl kernel codes.");
        }
        CQ.emplace(gpu_ctxt.value(), gpu.value());
    }
    ~OPENCL_STFT() override;
};

} // namespace PDJE_PARALLEL