2025-01-23 21:50:55 +08:00
|
|
|
from lmdeploy import pipeline, TurbomindEngineConfig, GenerationConfig
|
|
|
|
|
|
|
|
# Constants
|
|
|
|
IMAGE_TOKEN = "[IMAGE_TOKEN]"
|
|
|
|
|
|
|
|
# Model initialization
|
|
|
|
model = "OpenGVLab/InternVL2-26B-AWQ"
|
|
|
|
pipe = pipeline(
|
|
|
|
model,
|
|
|
|
backend_config=TurbomindEngineConfig(
|
|
|
|
model_format="awq",
|
2025-01-24 14:11:46 +08:00
|
|
|
# tp=2,
|
|
|
|
tp=4,
|
|
|
|
# device_ids=[0, 1],
|
2025-01-23 21:50:55 +08:00
|
|
|
session_len=12864,
|
|
|
|
max_batch_size=1,
|
|
|
|
cache_max_entry_count=0.05,
|
|
|
|
cache_block_seq_len=32768,
|
|
|
|
quant_policy=4
|
|
|
|
)
|
2025-01-23 22:24:07 +08:00
|
|
|
)
|