方法1:观察attention中的线性层
import numpy as np
import pandas as pd
from peft import PeftModel
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel, BitsAndBytesConfig
from typing import List
from tqdm.auto import tqdm
from sentence_transformers import SentenceTransformer
import os
os.environ['CUDA_VISIBLE_DEVICES']='1,2'
os.environ["TOKENIZERS_PARALLELISM"] = "false"
model_path ="/home/jovyan/codes/llms/Qwen2.5-14B-Instruct"
base_model = AutoModel.from_pretrained(model_path, device_map='cuda:0',trust_remote_code=True)
打印attention模型层的名字
for name, module in base_model.named_modules():
if 'attn' in name or 'attention' in name: # Common attention module names
print(name)
for sub_name, sub_module in module.named_modules(): # Check sub-modules within attention
print(f" - {sub_name}")
方法2:通过bitsandbytes量化查找线性层
import bitsandbytes as bnb
def find_all_linear_names(model):
lora_module_names = set()
for name, module in model.named_modules():
if isinstance(module, bnb.nn.Linear4bit):
names = name.split(".")
# model-specific
lora_module_names.add(names[0] if len(names) == 1 else names[-1])
if "lm_head" in lora_module_names: # needed for 16-bit
lora_module_names.remove("lm_head")
return list(lora_module_names)
加载模型
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_use_double_quant=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.bfloat16
)
base_model = AutoModel.from_pretrained(
model_path,
quantization_config=bnb_config,
device_map="auto"
)
查找Lora的目标层
find_all_linear_names(base_model)
还有个函数,一样的原理
def find_target_modules(model):
# Initialize a Set to Store Unique Layers
unique_layers = set()
# Iterate Over All Named Modules in the Model
for name, module in model.named_modules():
# Check if the Module Type Contains 'Linear4bit'
if "Linear4bit" in str(type(module)):
# Extract the Type of the Layer
layer_type = name.split('.')[-1]
# Add the Layer Type to the Set of Unique Layers
unique_layers.add(layer_type)
# Return the Set of Unique Layers Converted to a List
return list(unique_layers)
find_target_modules(base_model)
方法3:通过分析开源框架的源码swift
代码地址
from collections import OrderedDict
from dataclasses import dataclass, field
from typing import List, Union
@dataclass
class ModelKeys:
model_type: str = None
module_list: str = None
embedding: str = None
mlp: str = None
down_proj: str = None
attention: str = None
o_proj: str = None
q_proj: str = None
k_proj: str = None
v_proj: str = None
qkv_proj: str = None
qk_proj: str = None
qa_proj: str = None
qb_proj: str = None
kva_proj: str = None
kvb_proj: str = None
output: str = None
@dataclass
class MultiModelKeys(ModelKeys):
language_model: Union[List[str], str] = field(default_factory=list)
connector: Union[List[str], str] = field(default_factory=list)
vision_tower: Union[List[str], str] = field(default_factory=list)
generator: Union[List[str], str] = field(default_factory=list)
def __post_init__(self):
# compat
for key in ['language_model', 'connector', 'vision_tower', 'generator']:
v = getattr(self, key)
if isinstance(v, str):
setattr(self, key, [v])
if v is None:
setattr(self, key, [])
LLAMA_KEYS = ModelKeys(
module_list='model.layers',
mlp='model.layers.{}.mlp',
down_proj='model.layers.{}.mlp.down_proj',
attention='model.layers.{}.self_attn',
o_proj='model.layers.{}.self_attn.o_proj',
q_proj='model.layers.{}.self_attn.q_proj',
k_proj='model.layers.{}.self_attn.k_proj',
v_proj='model.layers.{}.self_attn.v_proj',
embedding='model.embed_tokens',
output='lm_head',
)
INTERNLM2_KEYS = ModelKeys(
module_list='model.layers',
mlp='model.layers.{}.feed_forward',
down_proj='model.layers.{}.feed_forward.w2',
attention='model.layers.{}.attention',
o_proj='model.layers.{}.attention.wo',
qkv_proj='model.layers.{}.attention.wqkv',
embedding='model.tok_embeddings',
output='output',
)
CHATGLM_KEYS = ModelKeys(
module_list='transformer.encoder.layers',
mlp='transformer.encoder.layers.{}.mlp',
down_proj='transformer.encoder.layers.{}.mlp.dense_4h_to_h',
attention='transformer.encoder.layers.{}.self_attention',
o_proj='transformer.encoder.layers.{}.self_attention.dense',
qkv_proj='transformer.encoder.layers.{}.self_attention.query_key_value',
embedding='transformer.embedding',
output='transformer.output_layer',
)
BAICHUAN_KEYS = ModelKeys(
module_list='model.layers',
mlp='model.layers.{}.mlp',
down_proj='model.layers.{}.mlp.down_proj',
attention='model.layers.{}.self_attn',
qkv_proj='model.layers.{}.self_attn.W_pack',
embedding='model.embed_tokens',
output='lm_head',
)
YUAN_KEYS = ModelKeys(
module_list='model.layers',
mlp='model.layers.{}.mlp',
down_proj='model.layers.{}.mlp.down_proj',
attention='model.layers.{}.self_attn',
qk_proj='model.layers.{}.self_attn.qk_proj',
o_proj='model.layers.{}.self_attn.o_proj',
q_proj='model.layers.{}.self_attn.q_proj',
k_proj='model.layers.{}.self_attn.k_proj',
v_proj='model.layers.{}.self_attn.v_proj',
embedding='model.embed_tokens',
output='lm_head',
)
CODEFUSE_KEYS = ModelKeys(
module_list='gpt_neox.layers',
mlp='gpt_neox.layers.{}.mlp',
down_proj='gpt_neox.layers.{}.mlp.dense_4h_to_h',
attention='gpt_neox.layers.{}.attention',
o_proj='gpt_neox.layers.{}.attention.dense',
qkv_proj='gpt_neox.layers.{}.attention.query_key_value',
embedding='gpt_neox.embed_in',
output='gpt_neox.embed_out',
)
PHI2_KEYS = ModelKeys(
module_list='transformer.h',
mlp='transformer.h.{}.mlp',
down_proj='transformer.h.{}.mlp.c_proj',
attention='transformer.h.{}.mixer',
o_proj='transformer.h.{}.mixer.out_proj',
qkv_proj='transformer.h.{}.mixer.Wqkv',
embedding='transformer.embd',
output='lm_head',
)
QWEN_KEYS = ModelKeys(
module_list='transformer.h',
mlp='transformer.h.{}.mlp',
down_proj='transformer.h.{}.mlp.c_proj',
attention='transformer.h.{}.attn',
o_proj='transformer.h.{}.attn.c_proj',
qkv_proj='transformer.h.{}.attn.c_attn',
embedding='transformer.wte',
output='lm_head',
)
PHI3_KEYS = ModelKeys(
module_list='model.layers',
mlp='model.layers.{}.mlp',
down_proj='model.layers.{}.mlp.down_proj',
attention='model.layers.{}.self_attn',
o_proj='model.layers.{}.self_attn.o_proj',
qkv_proj='model.layers.{}.self_attn.qkv_proj',
embedding='model.embed_tokens',
output='lm_head',
)
PHI3_SMALL_KEYS = ModelKeys(
module_list='model.layers',
mlp='model.layers.{}.mlp',
down_proj='model.layers.{}.mlp.down_proj',
attention='model.layers.{}.self_attn',
o_proj='model.layers.{}.self_attn.dense',
qkv_proj='model.layers.{}.self_attn.query_key_value',
embedding='model.embed_tokens',
output='lm_head',
)
DEEPSEEK_V2_KEYS = ModelKeys(
module_list='model.layers',
mlp='model.layers.{}.mlp',
down_proj='model.layers.{}.mlp.down_proj',
attention='model.layers.{}.self_attn',
o_proj='model.layers.{}.self_attn.o_proj',
qa_proj='model.layers.{}.self_attn.q_a_proj',
qb_proj='model.layers.{}.self_attn.q_b_proj',
kva_proj='model.layers.{}.self_attn.kv_a_proj_with_mqa',
kvb_proj='model.layers.{}.self_attn.kv_b_proj',
embedding='model.embed_tokens',
output='lm_head',
)
我的博客即将同步至腾讯云开发者社区,邀请大家一同入驻:https://cloud.tencent.com/developer/support-plan?invite_code=3hiaca88ulogc