20 lines
471 B
Python
20 lines
471 B
Python
from lmdeploy import pipeline, TurbomindEngineConfig, GenerationConfig
|
|
|
|
# Constants
|
|
IMAGE_TOKEN = "[IMAGE_TOKEN]"
|
|
|
|
# Model initialization
|
|
model = "OpenGVLab/InternVL2-26B-AWQ"
|
|
pipe = pipeline(
|
|
model,
|
|
backend_config=TurbomindEngineConfig(
|
|
model_format="awq",
|
|
tp=2,
|
|
device_ids=[0, 1],
|
|
session_len=12864,
|
|
max_batch_size=1,
|
|
cache_max_entry_count=0.05,
|
|
cache_block_seq_len=32768,
|
|
quant_policy=4
|
|
)
|
|
) |