// Copyright © 2023-2025 Advanced Micro Devices, Inc.
// SPDX-License-Identifier: MIT

// clang-format off
#include "shim.bwd_preprocess_varlen.h"
#include <aotriton/util.h>
#include <tuple>
#include "iface.op_attn_bwd.h"

namespace AOTRITON_NS::v3::flash {

#if 1
using AOTRITON_NS::v3::flash::OpAttnBwdParams;
#endif

#define CAST(x) const_cast<void*>(static_cast<const void*>(x))
typedef std::vector<void*>(*PP_FUNC)(const OpAttnBwdParams& context, hipDeviceptr_t*);

namespace {
extern PP_FUNC prepare_arguments[ 1 ];
}

int64_t BwdPreprocessVarlenContext::godel_number() const
{
    int64_t sum = 0;
    const auto& args = *params;
    {
        int64_t number = 0;
        if (args.Out->dtype() == DType::kFloat16) number = 0 ;
        if (args.Out->dtype() == DType::kBFloat16) number = 1 ;
        if (args.Out->dtype() == DType::kFloat32) number = 2 ;
        sum += number * 24;
    }
    {
        int64_t number = 0;
        if (args.BLOCK_DMODEL == 16) number = 0 ;
        if (args.BLOCK_DMODEL == 32) number = 1 ;
        if (args.BLOCK_DMODEL == 48) number = 2 ;
        if (args.BLOCK_DMODEL == 64) number = 3 ;
        if (args.BLOCK_DMODEL == 80) number = 4 ;
        if (args.BLOCK_DMODEL == 96) number = 5 ;
        if (args.BLOCK_DMODEL == 128) number = 6 ;
        if (args.BLOCK_DMODEL == 160) number = 7 ;
        if (args.BLOCK_DMODEL == 192) number = 8 ;
        if (args.BLOCK_DMODEL == 224) number = 9 ;
        if (args.BLOCK_DMODEL == 256) number = 10 ;
        if (args.BLOCK_DMODEL == 512) number = 11 ;
        sum += number * 2;
    }
    {
        int64_t number = 0;
        if (args.PADDED_HEAD == false) number = 0 ;
        if (args.PADDED_HEAD == true) number = 1 ;
        sum += number * 1;
    }

    return sum;
}

hipError_t
BwdPreprocessVarlenContext::lookup_optimal(Gpu gpu) {
    auto [arch_number, mod_number] = get_archmod_number(gpu);
    if (arch_number < 0) {
        return hipErrorNoBinaryForGpu;
    }
    kernel_on_device = nullptr;
    auto tune_func = autotune_table[arch_number][godel_number()];
    if (!tune_func)
        return hipErrorProfilerNotInitialized;
    tune_func(*this, mod_number);
    if (!kernel_on_device)
        return hipErrorSharedObjectSymbolNotFound;
    return hipSuccess;
}

hipError_t
BwdPreprocessVarlenContext::launch(hipStream_t stream) const {
    constexpr std::string_view triton_kernel_name { "bwd_preprocess_varlen" };
    hipDeviceptr_t global_scratch = 0;
    auto args = prepare_arguments[pp_args_index](*this->params, &global_scratch);
    dim3 grid;
    if (custom_grid_calculator) {
        grid = custom_grid_calculator(*this);
    } else {
        grid = grid_calculator();
    }
#if AOTRITON_BUILD_FOR_TUNING
    return kernel_on_device->invoke(triton_kernel_name,
                                    package_path,
                                    func_name,
                                    arch_name,
                                    grid,
                                    args,
                                    peek_kernel_image,
                                    stream);
#else
    return kernel_on_device->invoke(triton_kernel_name,
                                    package_path,
                                    func_name,
                                    arch_name,
                                    grid,
                                    args,
                                    stream);
#endif
}

std::tuple<int, int>
BwdPreprocessVarlenContext::get_archmod_number(Gpu gpu) {
    if (gpu == GPU_AMD_ARCH_GFX950_MOD0) return { 0, 0 };
    if (gpu == GPU_AMD_ARCH_GFX1151_MOD0) return { 1, 0 };
    if (gpu == GPU_AMD_ARCH_GFX1150_MOD0) return { 2, 0 };
    if (gpu == GPU_AMD_ARCH_GFX1201_MOD0) return { 3, 0 };
    if (gpu == GPU_AMD_ARCH_GFX1200_MOD0) return { 4, 0 };
    // TODO: print warning about tuning for this GPU mod is not built.
    // Note: if some mod does not have tuning info in the database at all, the
    //       getGpuFromStream should not return that mod from beginning.
    return std::make_tuple(-1, 0);
}


static std::vector<void*>
bwd_preprocess_varlen_pp_args_0(const OpAttnBwdParams& params,
                                hipDeviceptr_t* global_scratch) {
  return { params.Out->kparam_data_ptr(), // Out
           params.DO->kparam_data_ptr(), // DO
           params.D->kparam_data_ptr(), // D
           params.Out->kparam_stride(0), // stride_oz
           params.Out->kparam_stride(1), // stride_oh
           params.Out->kparam_stride(2), // stride_om
           params.DO->kparam_stride(0), // stride_doz
           params.DO->kparam_stride(1), // stride_doh
           params.DO->kparam_stride(2), // stride_dom
           params.cu_seqlens_q->kparam_data_ptr(), // cu_seqlens_q
           CAST(&params.max_seqlen_q), // max_seqlen_q
           CAST(&params.head_dim), // head_dim
           CAST(global_scratch)
         };
}

namespace {
PP_FUNC prepare_arguments[ 1 ] = {
  bwd_preprocess_varlen_pp_args_0
};
}


const std::vector<std::string>& BwdPreprocessVarlenMetadata::get_Out_choices()
{
    static const std::vector<std::string> choices = { "*fp16:16", "*bf16:16", "*fp32:16" };
    return choices;
}

const std::vector<std::string>& BwdPreprocessVarlenMetadata::get_D_choices()
{
    static const std::vector<std::string> choices = { "*fp32:16" };
    return choices;
}

const std::vector<std::string>& BwdPreprocessVarlenMetadata::get_cu_seqlens_q_choices()
{
    static const std::vector<std::string> choices = { "*i32:16" };
    return choices;
}

const std::vector<std::string>& BwdPreprocessVarlenMetadata::get_max_seqlen_q_choices()
{
    static const std::vector<std::string> choices = { "i32" };
    return choices;
}

const std::vector<std::string>& BwdPreprocessVarlenMetadata::get_head_dim_choices()
{
    static const std::vector<std::string> choices = { "i32" };
    return choices;
}

const std::vector<int>& BwdPreprocessVarlenMetadata::get_BLOCK_DMODEL_choices()
{
    static const std::vector<int> choices = { 16, 32, 48, 64, 80, 96, 128, 160, 192, 224, 256, 512 };
    return choices;
}

const std::vector<bool>& BwdPreprocessVarlenMetadata::get_PADDED_HEAD_choices()
{
    static const std::vector<bool> choices = { false, true };
    return choices;
}

namespace autotune {

const char bwd_preprocess_varlen_packed_string[] =
"128\0"
"wave2_warp4_stg1\0";

int bwd_preprocess_varlen__lut_lambda__0 (const OpAttnBwdParams& params, int mod_number, int8_t lut[1][1]) {
    
    return lut[mod_number][0];
};

} // namespace autotune

BwdPreprocessVarlenContext::AutoTuneTableEntry
BwdPreprocessVarlenContext::autotune_table[][ 72 ] = {
    {
        &autotune::Autotune_bwd_preprocess_varlen__A0__F0,
        &autotune::Autotune_bwd_preprocess_varlen__A0__F1,
        &autotune::Autotune_bwd_preprocess_varlen__A0__F2,
        &autotune::Autotune_bwd_preprocess_varlen__A0__F3,
        &autotune::Autotune_bwd_preprocess_varlen__A0__F4,
        &autotune::Autotune_bwd_preprocess_varlen__A0__F5,
        &autotune::Autotune_bwd_preprocess_varlen__A0__F6,
        &autotune::Autotune_bwd_preprocess_varlen__A0__F7,
        &autotune::Autotune_bwd_preprocess_varlen__A0__F8,
        &autotune::Autotune_bwd_preprocess_varlen__A0__F9,
        &autotune::Autotune_bwd_preprocess_varlen__A0__F10,
        &autotune::Autotune_bwd_preprocess_varlen__A0__F11,
        &autotune::Autotune_bwd_preprocess_varlen__A0__F12,
        &autotune::Autotune_bwd_preprocess_varlen__A0__F13,
        &autotune::Autotune_bwd_preprocess_varlen__A0__F14,
        &autotune::Autotune_bwd_preprocess_varlen__A0__F15,
        &autotune::Autotune_bwd_preprocess_varlen__A0__F16,
        &autotune::Autotune_bwd_preprocess_varlen__A0__F17,
        &autotune::Autotune_bwd_preprocess_varlen__A0__F18,
        &autotune::Autotune_bwd_preprocess_varlen__A0__F19,
        &autotune::Autotune_bwd_preprocess_varlen__A0__F20,
        &autotune::Autotune_bwd_preprocess_varlen__A0__F21,
        &autotune::Autotune_bwd_preprocess_varlen__A0__F22,
        &autotune::Autotune_bwd_preprocess_varlen__A0__F23,
        &autotune::Autotune_bwd_preprocess_varlen__A0__F24,
        &autotune::Autotune_bwd_preprocess_varlen__A0__F25,
        &autotune::Autotune_bwd_preprocess_varlen__A0__F26,
        &autotune::Autotune_bwd_preprocess_varlen__A0__F27,
        &autotune::Autotune_bwd_preprocess_varlen__A0__F28,
        &autotune::Autotune_bwd_preprocess_varlen__A0__F29,
        &autotune::Autotune_bwd_preprocess_varlen__A0__F30,
        &autotune::Autotune_bwd_preprocess_varlen__A0__F31,
        &autotune::Autotune_bwd_preprocess_varlen__A0__F32,
        &autotune::Autotune_bwd_preprocess_varlen__A0__F33,
        &autotune::Autotune_bwd_preprocess_varlen__A0__F34,
        &autotune::Autotune_bwd_preprocess_varlen__A0__F35,
        &autotune::Autotune_bwd_preprocess_varlen__A0__F36,
        &autotune::Autotune_bwd_preprocess_varlen__A0__F37,
        &autotune::Autotune_bwd_preprocess_varlen__A0__F38,
        &autotune::Autotune_bwd_preprocess_varlen__A0__F39,
        &autotune::Autotune_bwd_preprocess_varlen__A0__F40,
        &autotune::Autotune_bwd_preprocess_varlen__A0__F41,
        &autotune::Autotune_bwd_preprocess_varlen__A0__F42,
        &autotune::Autotune_bwd_preprocess_varlen__A0__F43,
        &autotune::Autotune_bwd_preprocess_varlen__A0__F44,
        &autotune::Autotune_bwd_preprocess_varlen__A0__F45,
        &autotune::Autotune_bwd_preprocess_varlen__A0__F46,
        &autotune::Autotune_bwd_preprocess_varlen__A0__F47,
        &autotune::Autotune_bwd_preprocess_varlen__A0__F48,
        &autotune::Autotune_bwd_preprocess_varlen__A0__F49,
        &autotune::Autotune_bwd_preprocess_varlen__A0__F50,
        &autotune::Autotune_bwd_preprocess_varlen__A0__F51,
        &autotune::Autotune_bwd_preprocess_varlen__A0__F52,
        &autotune::Autotune_bwd_preprocess_varlen__A0__F53,
        &autotune::Autotune_bwd_preprocess_varlen__A0__F54,
        &autotune::Autotune_bwd_preprocess_varlen__A0__F55,
        &autotune::Autotune_bwd_preprocess_varlen__A0__F56,
        &autotune::Autotune_bwd_preprocess_varlen__A0__F57,
        &autotune::Autotune_bwd_preprocess_varlen__A0__F58,
        &autotune::Autotune_bwd_preprocess_varlen__A0__F59,
        &autotune::Autotune_bwd_preprocess_varlen__A0__F60,
        &autotune::Autotune_bwd_preprocess_varlen__A0__F61,
        &autotune::Autotune_bwd_preprocess_varlen__A0__F62,
        &autotune::Autotune_bwd_preprocess_varlen__A0__F63,
        &autotune::Autotune_bwd_preprocess_varlen__A0__F64,
        &autotune::Autotune_bwd_preprocess_varlen__A0__F65,
        &autotune::Autotune_bwd_preprocess_varlen__A0__F66,
        &autotune::Autotune_bwd_preprocess_varlen__A0__F67,
        &autotune::Autotune_bwd_preprocess_varlen__A0__F68,
        &autotune::Autotune_bwd_preprocess_varlen__A0__F69,
        &autotune::Autotune_bwd_preprocess_varlen__A0__F70,
        &autotune::Autotune_bwd_preprocess_varlen__A0__F71,
    },
    {
        &autotune::Autotune_bwd_preprocess_varlen__A1__F0,
        &autotune::Autotune_bwd_preprocess_varlen__A1__F1,
        &autotune::Autotune_bwd_preprocess_varlen__A1__F2,
        &autotune::Autotune_bwd_preprocess_varlen__A1__F3,
        &autotune::Autotune_bwd_preprocess_varlen__A1__F4,
        &autotune::Autotune_bwd_preprocess_varlen__A1__F5,
        &autotune::Autotune_bwd_preprocess_varlen__A1__F6,
        &autotune::Autotune_bwd_preprocess_varlen__A1__F7,
        &autotune::Autotune_bwd_preprocess_varlen__A1__F8,
        &autotune::Autotune_bwd_preprocess_varlen__A1__F9,
        &autotune::Autotune_bwd_preprocess_varlen__A1__F10,
        &autotune::Autotune_bwd_preprocess_varlen__A1__F11,
        &autotune::Autotune_bwd_preprocess_varlen__A1__F12,
        &autotune::Autotune_bwd_preprocess_varlen__A1__F13,
        &autotune::Autotune_bwd_preprocess_varlen__A1__F14,
        &autotune::Autotune_bwd_preprocess_varlen__A1__F15,
        &autotune::Autotune_bwd_preprocess_varlen__A1__F16,
        &autotune::Autotune_bwd_preprocess_varlen__A1__F17,
        &autotune::Autotune_bwd_preprocess_varlen__A1__F18,
        &autotune::Autotune_bwd_preprocess_varlen__A1__F19,
        &autotune::Autotune_bwd_preprocess_varlen__A1__F20,
        &autotune::Autotune_bwd_preprocess_varlen__A1__F21,
        &autotune::Autotune_bwd_preprocess_varlen__A1__F22,
        &autotune::Autotune_bwd_preprocess_varlen__A1__F23,
        &autotune::Autotune_bwd_preprocess_varlen__A1__F24,
        &autotune::Autotune_bwd_preprocess_varlen__A1__F25,
        &autotune::Autotune_bwd_preprocess_varlen__A1__F26,
        &autotune::Autotune_bwd_preprocess_varlen__A1__F27,
        &autotune::Autotune_bwd_preprocess_varlen__A1__F28,
        &autotune::Autotune_bwd_preprocess_varlen__A1__F29,
        &autotune::Autotune_bwd_preprocess_varlen__A1__F30,
        &autotune::Autotune_bwd_preprocess_varlen__A1__F31,
        &autotune::Autotune_bwd_preprocess_varlen__A1__F32,
        &autotune::Autotune_bwd_preprocess_varlen__A1__F33,
        &autotune::Autotune_bwd_preprocess_varlen__A1__F34,
        &autotune::Autotune_bwd_preprocess_varlen__A1__F35,
        &autotune::Autotune_bwd_preprocess_varlen__A1__F36,
        &autotune::Autotune_bwd_preprocess_varlen__A1__F37,
        &autotune::Autotune_bwd_preprocess_varlen__A1__F38,
        &autotune::Autotune_bwd_preprocess_varlen__A1__F39,
        &autotune::Autotune_bwd_preprocess_varlen__A1__F40,
        &autotune::Autotune_bwd_preprocess_varlen__A1__F41,
        &autotune::Autotune_bwd_preprocess_varlen__A1__F42,
        &autotune::Autotune_bwd_preprocess_varlen__A1__F43,
        &autotune::Autotune_bwd_preprocess_varlen__A1__F44,
        &autotune::Autotune_bwd_preprocess_varlen__A1__F45,
        &autotune::Autotune_bwd_preprocess_varlen__A1__F46,
        &autotune::Autotune_bwd_preprocess_varlen__A1__F47,
        &autotune::Autotune_bwd_preprocess_varlen__A1__F48,
        &autotune::Autotune_bwd_preprocess_varlen__A1__F49,
        &autotune::Autotune_bwd_preprocess_varlen__A1__F50,
        &autotune::Autotune_bwd_preprocess_varlen__A1__F51,
        &autotune::Autotune_bwd_preprocess_varlen__A1__F52,
        &autotune::Autotune_bwd_preprocess_varlen__A1__F53,
        &autotune::Autotune_bwd_preprocess_varlen__A1__F54,
        &autotune::Autotune_bwd_preprocess_varlen__A1__F55,
        &autotune::Autotune_bwd_preprocess_varlen__A1__F56,
        &autotune::Autotune_bwd_preprocess_varlen__A1__F57,
        &autotune::Autotune_bwd_preprocess_varlen__A1__F58,
        &autotune::Autotune_bwd_preprocess_varlen__A1__F59,
        &autotune::Autotune_bwd_preprocess_varlen__A1__F60,
        &autotune::Autotune_bwd_preprocess_varlen__A1__F61,
        &autotune::Autotune_bwd_preprocess_varlen__A1__F62,
        &autotune::Autotune_bwd_preprocess_varlen__A1__F63,
        &autotune::Autotune_bwd_preprocess_varlen__A1__F64,
        &autotune::Autotune_bwd_preprocess_varlen__A1__F65,
        &autotune::Autotune_bwd_preprocess_varlen__A1__F66,
        &autotune::Autotune_bwd_preprocess_varlen__A1__F67,
        &autotune::Autotune_bwd_preprocess_varlen__A1__F68,
        &autotune::Autotune_bwd_preprocess_varlen__A1__F69,
        &autotune::Autotune_bwd_preprocess_varlen__A1__F70,
        &autotune::Autotune_bwd_preprocess_varlen__A1__F71,
    },
    {
        &autotune::Autotune_bwd_preprocess_varlen__A2__F0,
        &autotune::Autotune_bwd_preprocess_varlen__A2__F1,
        &autotune::Autotune_bwd_preprocess_varlen__A2__F2,
        &autotune::Autotune_bwd_preprocess_varlen__A2__F3,
        &autotune::Autotune_bwd_preprocess_varlen__A2__F4,
        &autotune::Autotune_bwd_preprocess_varlen__A2__F5,
        &autotune::Autotune_bwd_preprocess_varlen__A2__F6,
        &autotune::Autotune_bwd_preprocess_varlen__A2__F7,
        &autotune::Autotune_bwd_preprocess_varlen__A2__F8,
        &autotune::Autotune_bwd_preprocess_varlen__A2__F9,
        &autotune::Autotune_bwd_preprocess_varlen__A2__F10,
        &autotune::Autotune_bwd_preprocess_varlen__A2__F11,
        &autotune::Autotune_bwd_preprocess_varlen__A2__F12,
        &autotune::Autotune_bwd_preprocess_varlen__A2__F13,
        &autotune::Autotune_bwd_preprocess_varlen__A2__F14,
        &autotune::Autotune_bwd_preprocess_varlen__A2__F15,
        &autotune::Autotune_bwd_preprocess_varlen__A2__F16,
        &autotune::Autotune_bwd_preprocess_varlen__A2__F17,
        &autotune::Autotune_bwd_preprocess_varlen__A2__F18,
        &autotune::Autotune_bwd_preprocess_varlen__A2__F19,
        &autotune::Autotune_bwd_preprocess_varlen__A2__F20,
        &autotune::Autotune_bwd_preprocess_varlen__A2__F21,
        &autotune::Autotune_bwd_preprocess_varlen__A2__F22,
        &autotune::Autotune_bwd_preprocess_varlen__A2__F23,
        &autotune::Autotune_bwd_preprocess_varlen__A2__F24,
        &autotune::Autotune_bwd_preprocess_varlen__A2__F25,
        &autotune::Autotune_bwd_preprocess_varlen__A2__F26,
        &autotune::Autotune_bwd_preprocess_varlen__A2__F27,
        &autotune::Autotune_bwd_preprocess_varlen__A2__F28,
        &autotune::Autotune_bwd_preprocess_varlen__A2__F29,
        &autotune::Autotune_bwd_preprocess_varlen__A2__F30,
        &autotune::Autotune_bwd_preprocess_varlen__A2__F31,
        &autotune::Autotune_bwd_preprocess_varlen__A2__F32,
        &autotune::Autotune_bwd_preprocess_varlen__A2__F33,
        &autotune::Autotune_bwd_preprocess_varlen__A2__F34,
        &autotune::Autotune_bwd_preprocess_varlen__A2__F35,
        &autotune::Autotune_bwd_preprocess_varlen__A2__F36,
        &autotune::Autotune_bwd_preprocess_varlen__A2__F37,
        &autotune::Autotune_bwd_preprocess_varlen__A2__F38,
        &autotune::Autotune_bwd_preprocess_varlen__A2__F39,
        &autotune::Autotune_bwd_preprocess_varlen__A2__F40,
        &autotune::Autotune_bwd_preprocess_varlen__A2__F41,
        &autotune::Autotune_bwd_preprocess_varlen__A2__F42,
        &autotune::Autotune_bwd_preprocess_varlen__A2__F43,
        &autotune::Autotune_bwd_preprocess_varlen__A2__F44,
        &autotune::Autotune_bwd_preprocess_varlen__A2__F45,
        &autotune::Autotune_bwd_preprocess_varlen__A2__F46,
        &autotune::Autotune_bwd_preprocess_varlen__A2__F47,
        &autotune::Autotune_bwd_preprocess_varlen__A2__F48,
        &autotune::Autotune_bwd_preprocess_varlen__A2__F49,
        &autotune::Autotune_bwd_preprocess_varlen__A2__F50,
        &autotune::Autotune_bwd_preprocess_varlen__A2__F51,
        &autotune::Autotune_bwd_preprocess_varlen__A2__F52,
        &autotune::Autotune_bwd_preprocess_varlen__A2__F53,
        &autotune::Autotune_bwd_preprocess_varlen__A2__F54,
        &autotune::Autotune_bwd_preprocess_varlen__A2__F55,
        &autotune::Autotune_bwd_preprocess_varlen__A2__F56,
        &autotune::Autotune_bwd_preprocess_varlen__A2__F57,
        &autotune::Autotune_bwd_preprocess_varlen__A2__F58,
        &autotune::Autotune_bwd_preprocess_varlen__A2__F59,
        &autotune::Autotune_bwd_preprocess_varlen__A2__F60,
        &autotune::Autotune_bwd_preprocess_varlen__A2__F61,
        &autotune::Autotune_bwd_preprocess_varlen__A2__F62,
        &autotune::Autotune_bwd_preprocess_varlen__A2__F63,
        &autotune::Autotune_bwd_preprocess_varlen__A2__F64,
        &autotune::Autotune_bwd_preprocess_varlen__A2__F65,
        &autotune::Autotune_bwd_preprocess_varlen__A2__F66,
        &autotune::Autotune_bwd_preprocess_varlen__A2__F67,
        &autotune::Autotune_bwd_preprocess_varlen__A2__F68,
        &autotune::Autotune_bwd_preprocess_varlen__A2__F69,
        &autotune::Autotune_bwd_preprocess_varlen__A2__F70,
        &autotune::Autotune_bwd_preprocess_varlen__A2__F71,
    },
    {
        &autotune::Autotune_bwd_preprocess_varlen__A3__F0,
        &autotune::Autotune_bwd_preprocess_varlen__A3__F1,
        &autotune::Autotune_bwd_preprocess_varlen__A3__F2,
        &autotune::Autotune_bwd_preprocess_varlen__A3__F3,
        &autotune::Autotune_bwd_preprocess_varlen__A3__F4,
        &autotune::Autotune_bwd_preprocess_varlen__A3__F5,
        &autotune::Autotune_bwd_preprocess_varlen__A3__F6,
        &autotune::Autotune_bwd_preprocess_varlen__A3__F7,
        &autotune::Autotune_bwd_preprocess_varlen__A3__F8,
        &autotune::Autotune_bwd_preprocess_varlen__A3__F9,
        &autotune::Autotune_bwd_preprocess_varlen__A3__F10,
        &autotune::Autotune_bwd_preprocess_varlen__A3__F11,
        &autotune::Autotune_bwd_preprocess_varlen__A3__F12,
        &autotune::Autotune_bwd_preprocess_varlen__A3__F13,
        &autotune::Autotune_bwd_preprocess_varlen__A3__F14,
        &autotune::Autotune_bwd_preprocess_varlen__A3__F15,
        &autotune::Autotune_bwd_preprocess_varlen__A3__F16,
        &autotune::Autotune_bwd_preprocess_varlen__A3__F17,
        &autotune::Autotune_bwd_preprocess_varlen__A3__F18,
        &autotune::Autotune_bwd_preprocess_varlen__A3__F19,
        &autotune::Autotune_bwd_preprocess_varlen__A3__F20,
        &autotune::Autotune_bwd_preprocess_varlen__A3__F21,
        &autotune::Autotune_bwd_preprocess_varlen__A3__F22,
        &autotune::Autotune_bwd_preprocess_varlen__A3__F23,
        &autotune::Autotune_bwd_preprocess_varlen__A3__F24,
        &autotune::Autotune_bwd_preprocess_varlen__A3__F25,
        &autotune::Autotune_bwd_preprocess_varlen__A3__F26,
        &autotune::Autotune_bwd_preprocess_varlen__A3__F27,
        &autotune::Autotune_bwd_preprocess_varlen__A3__F28,
        &autotune::Autotune_bwd_preprocess_varlen__A3__F29,
        &autotune::Autotune_bwd_preprocess_varlen__A3__F30,
        &autotune::Autotune_bwd_preprocess_varlen__A3__F31,
        &autotune::Autotune_bwd_preprocess_varlen__A3__F32,
        &autotune::Autotune_bwd_preprocess_varlen__A3__F33,
        &autotune::Autotune_bwd_preprocess_varlen__A3__F34,
        &autotune::Autotune_bwd_preprocess_varlen__A3__F35,
        &autotune::Autotune_bwd_preprocess_varlen__A3__F36,
        &autotune::Autotune_bwd_preprocess_varlen__A3__F37,
        &autotune::Autotune_bwd_preprocess_varlen__A3__F38,
        &autotune::Autotune_bwd_preprocess_varlen__A3__F39,
        &autotune::Autotune_bwd_preprocess_varlen__A3__F40,
        &autotune::Autotune_bwd_preprocess_varlen__A3__F41,
        &autotune::Autotune_bwd_preprocess_varlen__A3__F42,
        &autotune::Autotune_bwd_preprocess_varlen__A3__F43,
        &autotune::Autotune_bwd_preprocess_varlen__A3__F44,
        &autotune::Autotune_bwd_preprocess_varlen__A3__F45,
        &autotune::Autotune_bwd_preprocess_varlen__A3__F46,
        &autotune::Autotune_bwd_preprocess_varlen__A3__F47,
        &autotune::Autotune_bwd_preprocess_varlen__A3__F48,
        &autotune::Autotune_bwd_preprocess_varlen__A3__F49,
        &autotune::Autotune_bwd_preprocess_varlen__A3__F50,
        &autotune::Autotune_bwd_preprocess_varlen__A3__F51,
        &autotune::Autotune_bwd_preprocess_varlen__A3__F52,
        &autotune::Autotune_bwd_preprocess_varlen__A3__F53,
        &autotune::Autotune_bwd_preprocess_varlen__A3__F54,
        &autotune::Autotune_bwd_preprocess_varlen__A3__F55,
        &autotune::Autotune_bwd_preprocess_varlen__A3__F56,
        &autotune::Autotune_bwd_preprocess_varlen__A3__F57,
        &autotune::Autotune_bwd_preprocess_varlen__A3__F58,
        &autotune::Autotune_bwd_preprocess_varlen__A3__F59,
        &autotune::Autotune_bwd_preprocess_varlen__A3__F60,
        &autotune::Autotune_bwd_preprocess_varlen__A3__F61,
        &autotune::Autotune_bwd_preprocess_varlen__A3__F62,
        &autotune::Autotune_bwd_preprocess_varlen__A3__F63,
        &autotune::Autotune_bwd_preprocess_varlen__A3__F64,
        &autotune::Autotune_bwd_preprocess_varlen__A3__F65,
        &autotune::Autotune_bwd_preprocess_varlen__A3__F66,
        &autotune::Autotune_bwd_preprocess_varlen__A3__F67,
        &autotune::Autotune_bwd_preprocess_varlen__A3__F68,
        &autotune::Autotune_bwd_preprocess_varlen__A3__F69,
        &autotune::Autotune_bwd_preprocess_varlen__A3__F70,
        &autotune::Autotune_bwd_preprocess_varlen__A3__F71,
    },
    {
        &autotune::Autotune_bwd_preprocess_varlen__A4__F0,
        &autotune::Autotune_bwd_preprocess_varlen__A4__F1,
        &autotune::Autotune_bwd_preprocess_varlen__A4__F2,
        &autotune::Autotune_bwd_preprocess_varlen__A4__F3,
        &autotune::Autotune_bwd_preprocess_varlen__A4__F4,
        &autotune::Autotune_bwd_preprocess_varlen__A4__F5,
        &autotune::Autotune_bwd_preprocess_varlen__A4__F6,
        &autotune::Autotune_bwd_preprocess_varlen__A4__F7,
        &autotune::Autotune_bwd_preprocess_varlen__A4__F8,
        &autotune::Autotune_bwd_preprocess_varlen__A4__F9,
        &autotune::Autotune_bwd_preprocess_varlen__A4__F10,
        &autotune::Autotune_bwd_preprocess_varlen__A4__F11,
        &autotune::Autotune_bwd_preprocess_varlen__A4__F12,
        &autotune::Autotune_bwd_preprocess_varlen__A4__F13,
        &autotune::Autotune_bwd_preprocess_varlen__A4__F14,
        &autotune::Autotune_bwd_preprocess_varlen__A4__F15,
        &autotune::Autotune_bwd_preprocess_varlen__A4__F16,
        &autotune::Autotune_bwd_preprocess_varlen__A4__F17,
        &autotune::Autotune_bwd_preprocess_varlen__A4__F18,
        &autotune::Autotune_bwd_preprocess_varlen__A4__F19,
        &autotune::Autotune_bwd_preprocess_varlen__A4__F20,
        &autotune::Autotune_bwd_preprocess_varlen__A4__F21,
        &autotune::Autotune_bwd_preprocess_varlen__A4__F22,
        &autotune::Autotune_bwd_preprocess_varlen__A4__F23,
        &autotune::Autotune_bwd_preprocess_varlen__A4__F24,
        &autotune::Autotune_bwd_preprocess_varlen__A4__F25,
        &autotune::Autotune_bwd_preprocess_varlen__A4__F26,
        &autotune::Autotune_bwd_preprocess_varlen__A4__F27,
        &autotune::Autotune_bwd_preprocess_varlen__A4__F28,
        &autotune::Autotune_bwd_preprocess_varlen__A4__F29,
        &autotune::Autotune_bwd_preprocess_varlen__A4__F30,
        &autotune::Autotune_bwd_preprocess_varlen__A4__F31,
        &autotune::Autotune_bwd_preprocess_varlen__A4__F32,
        &autotune::Autotune_bwd_preprocess_varlen__A4__F33,
        &autotune::Autotune_bwd_preprocess_varlen__A4__F34,
        &autotune::Autotune_bwd_preprocess_varlen__A4__F35,
        &autotune::Autotune_bwd_preprocess_varlen__A4__F36,
        &autotune::Autotune_bwd_preprocess_varlen__A4__F37,
        &autotune::Autotune_bwd_preprocess_varlen__A4__F38,
        &autotune::Autotune_bwd_preprocess_varlen__A4__F39,
        &autotune::Autotune_bwd_preprocess_varlen__A4__F40,
        &autotune::Autotune_bwd_preprocess_varlen__A4__F41,
        &autotune::Autotune_bwd_preprocess_varlen__A4__F42,
        &autotune::Autotune_bwd_preprocess_varlen__A4__F43,
        &autotune::Autotune_bwd_preprocess_varlen__A4__F44,
        &autotune::Autotune_bwd_preprocess_varlen__A4__F45,
        &autotune::Autotune_bwd_preprocess_varlen__A4__F46,
        &autotune::Autotune_bwd_preprocess_varlen__A4__F47,
        &autotune::Autotune_bwd_preprocess_varlen__A4__F48,
        &autotune::Autotune_bwd_preprocess_varlen__A4__F49,
        &autotune::Autotune_bwd_preprocess_varlen__A4__F50,
        &autotune::Autotune_bwd_preprocess_varlen__A4__F51,
        &autotune::Autotune_bwd_preprocess_varlen__A4__F52,
        &autotune::Autotune_bwd_preprocess_varlen__A4__F53,
        &autotune::Autotune_bwd_preprocess_varlen__A4__F54,
        &autotune::Autotune_bwd_preprocess_varlen__A4__F55,
        &autotune::Autotune_bwd_preprocess_varlen__A4__F56,
        &autotune::Autotune_bwd_preprocess_varlen__A4__F57,
        &autotune::Autotune_bwd_preprocess_varlen__A4__F58,
        &autotune::Autotune_bwd_preprocess_varlen__A4__F59,
        &autotune::Autotune_bwd_preprocess_varlen__A4__F60,
        &autotune::Autotune_bwd_preprocess_varlen__A4__F61,
        &autotune::Autotune_bwd_preprocess_varlen__A4__F62,
        &autotune::Autotune_bwd_preprocess_varlen__A4__F63,
        &autotune::Autotune_bwd_preprocess_varlen__A4__F64,
        &autotune::Autotune_bwd_preprocess_varlen__A4__F65,
        &autotune::Autotune_bwd_preprocess_varlen__A4__F66,
        &autotune::Autotune_bwd_preprocess_varlen__A4__F67,
        &autotune::Autotune_bwd_preprocess_varlen__A4__F68,
        &autotune::Autotune_bwd_preprocess_varlen__A4__F69,
        &autotune::Autotune_bwd_preprocess_varlen__A4__F70,
        &autotune::Autotune_bwd_preprocess_varlen__A4__F71,
    },
};

}

// vim: set fileencoding=utf-8

