from lmdeploy import pipeline, TurbomindEngineConfig, GenerationConfig # Constants IMAGE_TOKEN = "[IMAGE_TOKEN]" # Model initialization model = "OpenGVLab/InternVL2-26B-AWQ" pipe = pipeline( model, backend_config=TurbomindEngineConfig( model_format="awq", # tp=2, tp=4, # device_ids=[0, 1], session_len=12864, max_batch_size=1, cache_max_entry_count=0.05, cache_block_seq_len=32768, quant_policy=4 ) )