From 66a489706d5d7d0fb92b39576a30311cd124faf6 Mon Sep 17 00:00:00 2001 From: chenjian Date: Thu, 19 Oct 2023 10:14:48 +0800 Subject: [PATCH] [LLM] Support bloom prefix (#2248) * support bloom prefix * support_bloom_prefix * support bloom prefix * Update code for bloom prefix * update code * support bloom prefix --- llm/fastdeploy_llm/engine.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llm/fastdeploy_llm/engine.py b/llm/fastdeploy_llm/engine.py index 6ac73acf2e..8a96f8f7de 100644 --- a/llm/fastdeploy_llm/engine.py +++ b/llm/fastdeploy_llm/engine.py @@ -368,11 +368,11 @@ def get_alibi_slopes(num_heads): inputs["attention_mask"] = ( alibi_encoder + (1 - inputs["attention_mask"] ) * paddle.finfo(inputs["attention_mask"].dtype).min) - attention_mask = inputs["attention_mask"] - tgt_generation_mask = inputs["tgt_generation_mask"] inputs["tgt_generation_mask"] = ( alibi_decoder + (1 - inputs["tgt_generation_mask"]) * paddle.finfo(inputs["tgt_generation_mask"].dtype).min) + attention_mask = inputs["attention_mask"] + tgt_generation_mask = inputs["tgt_generation_mask"] def dy_input_preprocess(inputs):