Tiktok-Talent-Info/pipeline_setup.py

25 lines
658 B
Python
Raw Normal View History

2025-01-26 20:42:56 +08:00
import os
2025-01-23 21:50:55 +08:00
from lmdeploy import pipeline, TurbomindEngineConfig, GenerationConfig
IMAGE_TOKEN = "[IMAGE_TOKEN]"
2025-01-26 20:42:56 +08:00
os.environ["CUDA_VISIBLE_DEVICES"] = "0,1"
2025-02-07 19:18:35 +08:00
# os.environ["CUDA_VISIBLE_DEVICES"] = "2,3"
2025-01-26 20:42:56 +08:00
2025-01-23 21:50:55 +08:00
# Model initialization
model = "OpenGVLab/InternVL2-26B-AWQ"
pipe = pipeline(
model,
backend_config=TurbomindEngineConfig(
model_format="awq",
2025-02-07 19:18:35 +08:00
tp=2,
2025-01-26 20:42:56 +08:00
# tp=4,
2025-02-07 19:18:35 +08:00
session_len=16384, # 4096, 8192, 16384, 32768
2025-01-23 21:50:55 +08:00
max_batch_size=1,
2025-02-07 19:18:35 +08:00
cache_max_entry_count=0.2, # 0.05
cache_block_seq_len=16384, # 8192, 16384, 32768
# quant_policy=8,
2025-01-26 20:42:56 +08:00
# precision="fp16",
),
# log_level='DEBUG'
2025-01-23 22:24:07 +08:00
)