Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@
/out/
/tmp/
/autogen-*.md
/common/build-info.cpp

# Deprecated

Expand Down
23 changes: 16 additions & 7 deletions ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_funcs.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -240,21 +240,30 @@ void load_a_to_shmem(const uint pos_a, const uint row, const uint col, const uin
const uint idx = pos_a + col * p.stride_a / LOAD_VEC_A + row;
const uint buf_idx = col * SHMEM_STRIDE + row * LOAD_VEC_A / 2;

const uint ib = idx / 128; // 2 values per idx
const uint iqs = idx % 128; // 0..127
const uint ib = idx / 32; // 8 values per idx
const uint iqs = (idx % 32) * 4; // 0,4,8..124

const uint n = iqs / 64; // 0,1
const uint b = (iqs % 64) / 32; // 0,1
const uint b = ((iqs % 64) / 32) * 4; // 0,4
const uint is_b = (iqs % 16) / 8; // 0,1
const uint qhshift = ((iqs % 64) / 16) * 2; // 0,2,4,6
const uint is = 8 * n + qhshift + is_b; // 0..15
const uint qsi = n * 64 + (iqs % 32) * 2; // 0,2,4..126
const uint qhi = n * 32 + (iqs % 16) * 2; // 0,2,4..62
const uint qsi = n * 32 + (iqs % 32); // 0,4,8..60
const uint qhi = n * 16 + (iqs % 16); // 0,4,8..28

const float dscale = float(data_a[ib].d) * float(data_a[ib].scales[is]);

buf_a[buf_idx] = FLOAT_TYPE_VEC2(dscale * float(int8_t(((data_a[ib].ql[qsi ] >> (b * 4)) & 0xF) | (((data_a[ib].qh[qhi ] >> qhshift) & 3) << 4)) - 32),
dscale * float(int8_t(((data_a[ib].ql[qsi + 1] >> (b * 4)) & 0xF) | (((data_a[ib].qh[qhi + 1] >> qhshift) & 3) << 4)) - 32));
const uint ql_0 = (bitfieldInsert(uint(data_a_packed16[ib].ql[qsi]), uint(data_a_packed16[ib].ql[qsi + 1]), 16, 16) >> b) & 0x0F0F0F0F;
const uint ql_1 = (bitfieldInsert(uint(data_a_packed16[ib].ql[qsi + 2]), uint(data_a_packed16[ib].ql[qsi + 3]), 16, 16) >> b) & 0x0F0F0F0F;
const uint qh_0 = (bitfieldInsert(uint(data_a_packed16[ib].qh[qhi]), uint(data_a_packed16[ib].qh[qhi + 1]), 16, 16) >> qhshift) & 0x03030303;
const uint qh_1 = (bitfieldInsert(uint(data_a_packed16[ib].qh[qhi + 2]), uint(data_a_packed16[ib].qh[qhi + 3]), 16, 16) >> qhshift) & 0x03030303;
const vec4 q_0 = (vec4(unpack8(ql_0 | (qh_0 << 4))) - 32) * dscale;
const vec4 q_1 = (vec4(unpack8(ql_1 | (qh_1 << 4))) - 32) * dscale;

buf_a[buf_idx] = FLOAT_TYPE_VEC2(q_0.x, q_0.y);
buf_a[buf_idx + 1] = FLOAT_TYPE_VEC2(q_0.z, q_0.w);
buf_a[buf_idx + 2] = FLOAT_TYPE_VEC2(q_1.x, q_1.y);
buf_a[buf_idx + 3] = FLOAT_TYPE_VEC2(q_1.z, q_1.w);
#elif defined(DATA_A_IQ1_S)
const uint idx = pos_a + col * p.stride_a / LOAD_VEC_A + row;
const uint buf_idx = col * SHMEM_STRIDE + row * LOAD_VEC_A / 2;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -552,7 +552,7 @@ void matmul_shaders(bool fp16, MatMulIdType matmul_id_type, bool coopmat, bool c

for (const auto& tname : type_names) {
std::string load_vec_quant = "2";
if ((tname == "q4_0") || (tname == "q4_1") || (tname == "iq1_s") || (tname == "iq1_m") || (tname == "iq2_xxs") || (tname == "iq2_xs") || (tname == "iq2_s"))
if ((tname == "q4_0") || (tname == "q4_1") || (tname == "q6_k") || (tname == "iq1_s") || (tname == "iq1_m") || (tname == "iq2_xxs") || (tname == "iq2_xs") || (tname == "iq2_s"))
load_vec_quant = "8";
else if ((tname == "q5_0") || (tname == "q5_1") || (tname == "q8_0") || (tname == "iq3_xxs") || (tname == "iq3_s") || (tname == "iq4_nl") || (tname == "mxfp4"))
load_vec_quant = "4";
Expand Down
Loading