20 lines
443 B
Python
20 lines
443 B
Python
|
from lmdeploy import pipeline, TurbomindEngineConfig, GenerationConfig
|
||
|
|
||
|
# Constants
|
||
|
IMAGE_TOKEN = "[IMAGE_TOKEN]"
|
||
|
|
||
|
# Model initialization
|
||
|
model = "OpenGVLab/InternVL2-26B-AWQ"
|
||
|
pipe = pipeline(
|
||
|
model,
|
||
|
backend_config=TurbomindEngineConfig(
|
||
|
model_format="awq",
|
||
|
tp=4,
|
||
|
session_len=12864,
|
||
|
max_batch_size=1,
|
||
|
cache_max_entry_count=0.05,
|
||
|
cache_block_seq_len=32768,
|
||
|
quant_policy=4
|
||
|
)
|
||
|
)
|