Skip to content

vllm.envs

CMAKE_BUILD_TYPE module-attribute

CMAKE_BUILD_TYPE: (
    Literal["Debug", "Release", "RelWithDebInfo"] | None
) = None

CUDA_VISIBLE_DEVICES module-attribute

CUDA_VISIBLE_DEVICES: str | None = None

K_SCALE_CONSTANT module-attribute

K_SCALE_CONSTANT: int = 200

LD_LIBRARY_PATH module-attribute

LD_LIBRARY_PATH: str | None = None

LOCAL_RANK module-attribute

LOCAL_RANK: int = 0

MAX_JOBS module-attribute

MAX_JOBS: str | None = None

NO_COLOR module-attribute

NO_COLOR: bool = False

NVCC_THREADS module-attribute

NVCC_THREADS: str | None = None

Q_SCALE_CONSTANT module-attribute

Q_SCALE_CONSTANT: int = 200

S3_ACCESS_KEY_ID module-attribute

S3_ACCESS_KEY_ID: str | None = None

S3_ENDPOINT_URL module-attribute

S3_ENDPOINT_URL: str | None = None

S3_SECRET_ACCESS_KEY module-attribute

S3_SECRET_ACCESS_KEY: str | None = None

VERBOSE module-attribute

VERBOSE: bool = False

VLLM_ALL2ALL_BACKEND module-attribute

VLLM_ALL2ALL_BACKEND: Literal[
    "naive",
    "pplx",
    "deepep_high_throughput",
    "deepep_low_latency",
    "allgather_reducescatter",
    "flashinfer_all2allv",
] = "allgather_reducescatter"

VLLM_ALLOW_CHUNKED_LOCAL_ATTN_WITH_HYBRID_KV_CACHE module-attribute

VLLM_ALLOW_CHUNKED_LOCAL_ATTN_WITH_HYBRID_KV_CACHE: bool = (
    False
)

VLLM_ALLOW_INSECURE_SERIALIZATION module-attribute

VLLM_ALLOW_INSECURE_SERIALIZATION: bool = False

VLLM_ALLOW_LONG_MAX_MODEL_LEN module-attribute

VLLM_ALLOW_LONG_MAX_MODEL_LEN: bool = False

VLLM_ALLOW_RUNTIME_LORA_UPDATING module-attribute

VLLM_ALLOW_RUNTIME_LORA_UPDATING: bool = False

VLLM_ALLREDUCE_USE_SYMM_MEM module-attribute

VLLM_ALLREDUCE_USE_SYMM_MEM: bool = True

VLLM_API_KEY module-attribute

VLLM_API_KEY: str | None = None

VLLM_ASSETS_CACHE module-attribute

VLLM_ASSETS_CACHE: str = join(VLLM_CACHE_ROOT, 'assets')

VLLM_ASSETS_CACHE_MODEL_CLEAN module-attribute

VLLM_ASSETS_CACHE_MODEL_CLEAN: bool = False

VLLM_ATTENTION_BACKEND module-attribute

VLLM_ATTENTION_BACKEND: str | None = None

VLLM_AUDIO_FETCH_TIMEOUT module-attribute

VLLM_AUDIO_FETCH_TIMEOUT: int = 10

VLLM_CACHE_ROOT module-attribute

VLLM_CACHE_ROOT: str = expanduser('~/.cache/vllm')

VLLM_COMPILE_CACHE_SAVE_FORMAT module-attribute

VLLM_COMPILE_CACHE_SAVE_FORMAT: Literal[
    "binary", "unpacked"
] = "binary"

VLLM_COMPUTE_NANS_IN_LOGITS module-attribute

VLLM_COMPUTE_NANS_IN_LOGITS: bool = False

VLLM_CONFIGURE_LOGGING module-attribute

VLLM_CONFIGURE_LOGGING: int = 1

VLLM_CONFIG_ROOT module-attribute

VLLM_CONFIG_ROOT: str = expanduser('~/.config/vllm')

VLLM_CPU_KVCACHE_SPACE module-attribute

VLLM_CPU_KVCACHE_SPACE: int | None = 0

VLLM_CPU_NUM_OF_RESERVED_CPU module-attribute

VLLM_CPU_NUM_OF_RESERVED_CPU: int | None = None

VLLM_CPU_OMP_THREADS_BIND module-attribute

VLLM_CPU_OMP_THREADS_BIND: str = ''

VLLM_CPU_SGL_KERNEL module-attribute

VLLM_CPU_SGL_KERNEL: bool = False

VLLM_CUDART_SO_PATH module-attribute

VLLM_CUDART_SO_PATH: str | None = None

VLLM_CUSTOM_SCOPES_FOR_PROFILING module-attribute

VLLM_CUSTOM_SCOPES_FOR_PROFILING: bool = False

VLLM_DBO_COMM_SMS module-attribute

VLLM_DBO_COMM_SMS: int = 20

VLLM_DEBUG_DUMP_PATH module-attribute

VLLM_DEBUG_DUMP_PATH: str | None = None

VLLM_DEBUG_LOG_API_SERVER_RESPONSE module-attribute

VLLM_DEBUG_LOG_API_SERVER_RESPONSE: bool = False

VLLM_DEEPEP_BUFFER_SIZE_MB module-attribute

VLLM_DEEPEP_BUFFER_SIZE_MB: int = 1024

VLLM_DEEPEP_HIGH_THROUGHPUT_FORCE_INTRA_NODE module-attribute

VLLM_DEEPEP_HIGH_THROUGHPUT_FORCE_INTRA_NODE: bool = False

VLLM_DEEPEP_LOW_LATENCY_USE_MNNVL module-attribute

VLLM_DEEPEP_LOW_LATENCY_USE_MNNVL: bool = False

VLLM_DEEP_GEMM_WARMUP module-attribute

VLLM_DEEP_GEMM_WARMUP: Literal["skip", "full", "relax"] = (
    "relax"
)

VLLM_DISABLED_KERNELS module-attribute

VLLM_DISABLED_KERNELS: list[str] = []

VLLM_DISABLE_COMPILE_CACHE module-attribute

VLLM_DISABLE_COMPILE_CACHE: bool = False

VLLM_DISABLE_FLASHINFER_PREFILL module-attribute

VLLM_DISABLE_FLASHINFER_PREFILL: bool = False

VLLM_DISABLE_PYNCCL module-attribute

VLLM_DISABLE_PYNCCL: bool = False

VLLM_DISABLE_SHARED_EXPERTS_STREAM module-attribute

VLLM_DISABLE_SHARED_EXPERTS_STREAM: bool = False

VLLM_DOCKER_BUILD_CONTEXT module-attribute

VLLM_DOCKER_BUILD_CONTEXT: bool = False

VLLM_DO_NOT_TRACK module-attribute

VLLM_DO_NOT_TRACK: bool = False

VLLM_DP_MASTER_IP module-attribute

VLLM_DP_MASTER_IP: str = ''

VLLM_DP_MASTER_PORT module-attribute

VLLM_DP_MASTER_PORT: int = 0

VLLM_DP_RANK module-attribute

VLLM_DP_RANK: int = 0

VLLM_DP_RANK_LOCAL module-attribute

VLLM_DP_RANK_LOCAL: int = -1

VLLM_DP_SIZE module-attribute

VLLM_DP_SIZE: int = 1

VLLM_ENABLE_CUDAGRAPH_GC module-attribute

VLLM_ENABLE_CUDAGRAPH_GC: bool = False

VLLM_ENABLE_FUSED_MOE_ACTIVATION_CHUNKING module-attribute

VLLM_ENABLE_FUSED_MOE_ACTIVATION_CHUNKING: bool = True

VLLM_ENABLE_INDUCTOR_COORDINATE_DESCENT_TUNING module-attribute

VLLM_ENABLE_INDUCTOR_COORDINATE_DESCENT_TUNING: bool = True

VLLM_ENABLE_INDUCTOR_MAX_AUTOTUNE module-attribute

VLLM_ENABLE_INDUCTOR_MAX_AUTOTUNE: bool = True

VLLM_ENABLE_RESPONSES_API_STORE module-attribute

VLLM_ENABLE_RESPONSES_API_STORE: bool = False

VLLM_ENABLE_V1_MULTIPROCESSING module-attribute

VLLM_ENABLE_V1_MULTIPROCESSING: bool = True

VLLM_ENGINE_ITERATION_TIMEOUT_S module-attribute

VLLM_ENGINE_ITERATION_TIMEOUT_S: int = 60

VLLM_EXECUTE_MODEL_TIMEOUT_SECONDS module-attribute

VLLM_EXECUTE_MODEL_TIMEOUT_SECONDS: int = 300

VLLM_FLASHINFER_DISABLE_Q_QUANTIZATION module-attribute

VLLM_FLASHINFER_DISABLE_Q_QUANTIZATION: bool = False

VLLM_FLASHINFER_MOE_BACKEND module-attribute

VLLM_FLASHINFER_MOE_BACKEND: Literal[
    "throughput", "latency", "masked_gemm"
] = "latency"

VLLM_FLASHINFER_WORKSPACE_BUFFER_SIZE module-attribute

VLLM_FLASHINFER_WORKSPACE_BUFFER_SIZE: int = (
    394 * 1024 * 1024
)

VLLM_FLASH_ATTN_MAX_NUM_SPLITS_FOR_CUDA_GRAPH module-attribute

VLLM_FLASH_ATTN_MAX_NUM_SPLITS_FOR_CUDA_GRAPH: int = 32

VLLM_FLASH_ATTN_VERSION module-attribute

VLLM_FLASH_ATTN_VERSION: int | None = None

VLLM_FORCE_AOT_LOAD module-attribute

VLLM_FORCE_AOT_LOAD: bool = False

VLLM_FUSED_MOE_CHUNK_SIZE module-attribute

VLLM_FUSED_MOE_CHUNK_SIZE: int = 64 * 1024

VLLM_GC_DEBUG module-attribute

VLLM_GC_DEBUG: str = ''

VLLM_GPT_OSS_HARMONY_SYSTEM_INSTRUCTIONS module-attribute

VLLM_GPT_OSS_HARMONY_SYSTEM_INSTRUCTIONS: bool = False

VLLM_GPT_OSS_SYSTEM_TOOL_MCP_LABELS module-attribute

VLLM_GPT_OSS_SYSTEM_TOOL_MCP_LABELS: set[str] = set()

VLLM_HAS_FLASHINFER_CUBIN module-attribute

VLLM_HAS_FLASHINFER_CUBIN: bool = False

VLLM_HOST_IP module-attribute

VLLM_HOST_IP: str = ''

VLLM_HTTP_TIMEOUT_KEEP_ALIVE module-attribute

VLLM_HTTP_TIMEOUT_KEEP_ALIVE: int = 5

VLLM_IMAGE_FETCH_TIMEOUT module-attribute

VLLM_IMAGE_FETCH_TIMEOUT: int = 5

VLLM_KEEP_ALIVE_ON_ENGINE_DEATH module-attribute

VLLM_KEEP_ALIVE_ON_ENGINE_DEATH: bool = False

VLLM_KV_CACHE_LAYOUT module-attribute

VLLM_KV_CACHE_LAYOUT: Literal['NHD', 'HND'] | None = None

VLLM_KV_EVENTS_USE_INT_BLOCK_HASHES module-attribute

VLLM_KV_EVENTS_USE_INT_BLOCK_HASHES: bool = True

VLLM_LOGGING_COLOR module-attribute

VLLM_LOGGING_COLOR: str = 'auto'

VLLM_LOGGING_CONFIG_PATH module-attribute

VLLM_LOGGING_CONFIG_PATH: str | None = None

VLLM_LOGGING_LEVEL module-attribute

VLLM_LOGGING_LEVEL: str = 'INFO'

VLLM_LOGGING_PREFIX module-attribute

VLLM_LOGGING_PREFIX: str = ''

VLLM_LOGGING_STREAM module-attribute

VLLM_LOGGING_STREAM: str = 'ext://sys.stdout'

VLLM_LOG_BATCHSIZE_INTERVAL module-attribute

VLLM_LOG_BATCHSIZE_INTERVAL: float = -1

VLLM_LOG_STATS_INTERVAL module-attribute

VLLM_LOG_STATS_INTERVAL: float = 10.0

VLLM_LOOPBACK_IP module-attribute

VLLM_LOOPBACK_IP: str = ''

VLLM_LORA_RESOLVER_CACHE_DIR module-attribute

VLLM_LORA_RESOLVER_CACHE_DIR: str | None = None

VLLM_MAIN_CUDA_VERSION module-attribute

VLLM_MAIN_CUDA_VERSION: str = '12.8'

VLLM_MARLIN_USE_ATOMIC_ADD module-attribute

VLLM_MARLIN_USE_ATOMIC_ADD: bool = False

VLLM_MAX_AUDIO_CLIP_FILESIZE_MB module-attribute

VLLM_MAX_AUDIO_CLIP_FILESIZE_MB: int = 25

VLLM_MAX_TOKENS_PER_EXPERT_FP4_MOE module-attribute

VLLM_MAX_TOKENS_PER_EXPERT_FP4_MOE: int = 163840

VLLM_MEDIA_CONNECTOR module-attribute

VLLM_MEDIA_CONNECTOR: str = 'http'

VLLM_MEDIA_LOADING_THREAD_COUNT module-attribute

VLLM_MEDIA_LOADING_THREAD_COUNT: int = 8

VLLM_MEDIA_URL_ALLOW_REDIRECTS module-attribute

VLLM_MEDIA_URL_ALLOW_REDIRECTS: bool = True

VLLM_MLA_DISABLE module-attribute

VLLM_MLA_DISABLE: bool = False

VLLM_MM_INPUT_CACHE_GIB module-attribute

VLLM_MM_INPUT_CACHE_GIB: int = 4

VLLM_MODEL_REDIRECT_PATH module-attribute

VLLM_MODEL_REDIRECT_PATH: str | None = None

VLLM_MOE_DP_CHUNK_SIZE module-attribute

VLLM_MOE_DP_CHUNK_SIZE: int = 256

VLLM_MOE_USE_DEEP_GEMM module-attribute

VLLM_MOE_USE_DEEP_GEMM: bool = True

VLLM_MQ_MAX_CHUNK_BYTES_MB module-attribute

VLLM_MQ_MAX_CHUNK_BYTES_MB: int = 16

VLLM_MSGPACK_ZERO_COPY_THRESHOLD module-attribute

VLLM_MSGPACK_ZERO_COPY_THRESHOLD: int = 256

VLLM_MXFP4_USE_MARLIN module-attribute

VLLM_MXFP4_USE_MARLIN: bool | None = None

VLLM_NCCL_INCLUDE_PATH module-attribute

VLLM_NCCL_INCLUDE_PATH: str | None = None

VLLM_NCCL_SO_PATH module-attribute

VLLM_NCCL_SO_PATH: str | None = None

VLLM_NIXL_ABORT_REQUEST_TIMEOUT module-attribute

VLLM_NIXL_ABORT_REQUEST_TIMEOUT: int = 480

VLLM_NIXL_SIDE_CHANNEL_HOST module-attribute

VLLM_NIXL_SIDE_CHANNEL_HOST: str = 'localhost'

VLLM_NIXL_SIDE_CHANNEL_PORT module-attribute

VLLM_NIXL_SIDE_CHANNEL_PORT: int = 5600

VLLM_NO_USAGE_STATS module-attribute

VLLM_NO_USAGE_STATS: bool = False

VLLM_NVFP4_GEMM_BACKEND module-attribute

VLLM_NVFP4_GEMM_BACKEND: str | None = None

VLLM_NVTX_SCOPES_FOR_PROFILING module-attribute

VLLM_NVTX_SCOPES_FOR_PROFILING: bool = False

VLLM_OBJECT_STORAGE_SHM_BUFFER_NAME module-attribute

VLLM_OBJECT_STORAGE_SHM_BUFFER_NAME: str = (
    "VLLM_OBJECT_STORAGE_SHM_BUFFER"
)

VLLM_PATTERN_MATCH_DEBUG module-attribute

VLLM_PATTERN_MATCH_DEBUG: str | None = None

VLLM_PLUGINS module-attribute

VLLM_PLUGINS: list[str] | None = None

VLLM_PORT module-attribute

VLLM_PORT: int | None = None

VLLM_PP_LAYER_PARTITION module-attribute

VLLM_PP_LAYER_PARTITION: str | None = None

VLLM_PROFILER_DELAY_ITERS module-attribute

VLLM_PROFILER_DELAY_ITERS: int = 0

VLLM_PROFILER_MAX_ITERS module-attribute

VLLM_PROFILER_MAX_ITERS: int = 0

VLLM_RANDOMIZE_DP_DUMMY_INPUTS module-attribute

VLLM_RANDOMIZE_DP_DUMMY_INPUTS: bool = False

VLLM_RAY_BUNDLE_INDICES module-attribute

VLLM_RAY_BUNDLE_INDICES: str = ''

VLLM_RAY_DP_PACK_STRATEGY module-attribute

VLLM_RAY_DP_PACK_STRATEGY: Literal[
    "strict", "fill", "span"
] = "strict"

VLLM_RAY_PER_WORKER_GPUS module-attribute

VLLM_RAY_PER_WORKER_GPUS: float = 1.0

VLLM_RINGBUFFER_WARNING_INTERVAL module-attribute

VLLM_RINGBUFFER_WARNING_INTERVAL: int = 60

VLLM_ROCM_CUSTOM_PAGED_ATTN module-attribute

VLLM_ROCM_CUSTOM_PAGED_ATTN: bool = True

VLLM_ROCM_FP8_MFMA_PAGE_ATTN module-attribute

VLLM_ROCM_FP8_MFMA_PAGE_ATTN: bool = False

VLLM_ROCM_FP8_PADDING module-attribute

VLLM_ROCM_FP8_PADDING: bool = True

VLLM_ROCM_MOE_PADDING module-attribute

VLLM_ROCM_MOE_PADDING: bool = True

VLLM_ROCM_QUICK_REDUCE_CAST_BF16_TO_FP16 module-attribute

VLLM_ROCM_QUICK_REDUCE_CAST_BF16_TO_FP16: bool = True

VLLM_ROCM_QUICK_REDUCE_MAX_SIZE_BYTES_MB module-attribute

VLLM_ROCM_QUICK_REDUCE_MAX_SIZE_BYTES_MB: int | None = None

VLLM_ROCM_QUICK_REDUCE_QUANTIZATION module-attribute

VLLM_ROCM_QUICK_REDUCE_QUANTIZATION: Literal[
    "FP", "INT8", "INT6", "INT4", "NONE"
] = "NONE"

VLLM_ROCM_SLEEP_MEM_CHUNK_SIZE module-attribute

VLLM_ROCM_SLEEP_MEM_CHUNK_SIZE: int = 256

VLLM_ROCM_USE_AITER module-attribute

VLLM_ROCM_USE_AITER: bool = False

VLLM_ROCM_USE_AITER_FP4_ASM_GEMM module-attribute

VLLM_ROCM_USE_AITER_FP4_ASM_GEMM: bool = False

VLLM_ROCM_USE_AITER_FP8BMM module-attribute

VLLM_ROCM_USE_AITER_FP8BMM: bool = True

VLLM_ROCM_USE_AITER_FUSION_SHARED_EXPERTS module-attribute

VLLM_ROCM_USE_AITER_FUSION_SHARED_EXPERTS: bool = True

VLLM_ROCM_USE_AITER_LINEAR module-attribute

VLLM_ROCM_USE_AITER_LINEAR: bool = True

VLLM_ROCM_USE_AITER_MHA module-attribute

VLLM_ROCM_USE_AITER_MHA: bool = True

VLLM_ROCM_USE_AITER_MLA module-attribute

VLLM_ROCM_USE_AITER_MLA: bool = True

VLLM_ROCM_USE_AITER_MOE module-attribute

VLLM_ROCM_USE_AITER_MOE: bool = True

VLLM_ROCM_USE_AITER_PAGED_ATTN module-attribute

VLLM_ROCM_USE_AITER_PAGED_ATTN: bool = False

VLLM_ROCM_USE_AITER_RMSNORM module-attribute

VLLM_ROCM_USE_AITER_RMSNORM: bool = True

VLLM_ROCM_USE_AITER_TRITON_GEMM module-attribute

VLLM_ROCM_USE_AITER_TRITON_GEMM: bool = True

VLLM_ROCM_USE_AITER_TRITON_ROPE module-attribute

VLLM_ROCM_USE_AITER_TRITON_ROPE: bool = False

VLLM_ROCM_USE_AITER_UNIFIED_ATTENTION module-attribute

VLLM_ROCM_USE_AITER_UNIFIED_ATTENTION: bool = False

VLLM_ROCM_USE_SKINNY_GEMM module-attribute

VLLM_ROCM_USE_SKINNY_GEMM: bool = True

VLLM_RPC_BASE_PATH module-attribute

VLLM_RPC_BASE_PATH: str = gettempdir()

VLLM_RPC_TIMEOUT module-attribute

VLLM_RPC_TIMEOUT: int = 10000

VLLM_SERVER_DEV_MODE module-attribute

VLLM_SERVER_DEV_MODE: bool = False

VLLM_SHARED_EXPERTS_STREAM_TOKEN_THRESHOLD module-attribute

VLLM_SHARED_EXPERTS_STREAM_TOKEN_THRESHOLD: int = 256

VLLM_SKIP_P2P_CHECK module-attribute

VLLM_SKIP_P2P_CHECK: bool = False

VLLM_SLEEP_WHEN_IDLE module-attribute

VLLM_SLEEP_WHEN_IDLE: bool = False

VLLM_TARGET_DEVICE module-attribute

VLLM_TARGET_DEVICE: str = 'cuda'

VLLM_TEST_USE_PRECOMPILED_NIGHTLY_WHEEL module-attribute

VLLM_TEST_USE_PRECOMPILED_NIGHTLY_WHEEL: bool = False

VLLM_TOOL_JSON_ERROR_AUTOMATIC_RETRY module-attribute

VLLM_TOOL_JSON_ERROR_AUTOMATIC_RETRY: bool = False

VLLM_TOOL_PARSE_REGEX_TIMEOUT_SECONDS module-attribute

VLLM_TOOL_PARSE_REGEX_TIMEOUT_SECONDS: int = 1

VLLM_TORCH_CUDA_PROFILE module-attribute

VLLM_TORCH_CUDA_PROFILE: bool = False

VLLM_TORCH_PROFILER_DIR module-attribute

VLLM_TORCH_PROFILER_DIR: str | None = None

VLLM_TORCH_PROFILER_DISABLE_ASYNC_LLM module-attribute

VLLM_TORCH_PROFILER_DISABLE_ASYNC_LLM: bool = False

VLLM_TORCH_PROFILER_RECORD_SHAPES module-attribute

VLLM_TORCH_PROFILER_RECORD_SHAPES: bool = False

VLLM_TORCH_PROFILER_WITH_FLOPS module-attribute

VLLM_TORCH_PROFILER_WITH_FLOPS: bool = False

VLLM_TORCH_PROFILER_WITH_PROFILE_MEMORY module-attribute

VLLM_TORCH_PROFILER_WITH_PROFILE_MEMORY: bool = False

VLLM_TORCH_PROFILER_WITH_STACK module-attribute

VLLM_TORCH_PROFILER_WITH_STACK: bool = True

VLLM_TPU_BUCKET_PADDING_GAP module-attribute

VLLM_TPU_BUCKET_PADDING_GAP: int = 0

VLLM_TPU_MOST_MODEL_LEN module-attribute

VLLM_TPU_MOST_MODEL_LEN: int | None = None

VLLM_TPU_USING_PATHWAYS module-attribute

VLLM_TPU_USING_PATHWAYS: bool = False

VLLM_TRACE_FUNCTION module-attribute

VLLM_TRACE_FUNCTION: int = 0

VLLM_TUNED_CONFIG_FOLDER module-attribute

VLLM_TUNED_CONFIG_FOLDER: str | None = None

VLLM_USAGE_SOURCE module-attribute

VLLM_USAGE_SOURCE: str = ''

VLLM_USAGE_STATS_SERVER module-attribute

VLLM_USAGE_STATS_SERVER: str = 'https://stats.vllm.ai'

VLLM_USE_AOT_COMPILE module-attribute

VLLM_USE_AOT_COMPILE: bool = False

VLLM_USE_BYTECODE_HOOK module-attribute

VLLM_USE_BYTECODE_HOOK: bool = False

VLLM_USE_CUDNN_PREFILL module-attribute

VLLM_USE_CUDNN_PREFILL: bool = False

VLLM_USE_DEEP_GEMM module-attribute

VLLM_USE_DEEP_GEMM: bool = True

VLLM_USE_DEEP_GEMM_E8M0 module-attribute

VLLM_USE_DEEP_GEMM_E8M0: bool = True

VLLM_USE_FBGEMM module-attribute

VLLM_USE_FBGEMM: bool = False

VLLM_USE_FLASHINFER_MOE_FP16 module-attribute

VLLM_USE_FLASHINFER_MOE_FP16: bool = False

VLLM_USE_FLASHINFER_MOE_FP4 module-attribute

VLLM_USE_FLASHINFER_MOE_FP4: bool = False

VLLM_USE_FLASHINFER_MOE_FP8 module-attribute

VLLM_USE_FLASHINFER_MOE_FP8: bool = False

VLLM_USE_FLASHINFER_MOE_MXFP4_BF16 module-attribute

VLLM_USE_FLASHINFER_MOE_MXFP4_BF16: bool = False

VLLM_USE_FLASHINFER_MOE_MXFP4_MXFP8 module-attribute

VLLM_USE_FLASHINFER_MOE_MXFP4_MXFP8: bool = False

VLLM_USE_FLASHINFER_MOE_MXFP4_MXFP8_CUTLASS module-attribute

VLLM_USE_FLASHINFER_MOE_MXFP4_MXFP8_CUTLASS: bool = False

VLLM_USE_FLASHINFER_SAMPLER module-attribute

VLLM_USE_FLASHINFER_SAMPLER: bool | None = None

VLLM_USE_FUSED_MOE_GROUPED_TOPK module-attribute

VLLM_USE_FUSED_MOE_GROUPED_TOPK: bool = True

VLLM_USE_MODELSCOPE module-attribute

VLLM_USE_MODELSCOPE: bool = False

VLLM_USE_NCCL_SYMM_MEM module-attribute

VLLM_USE_NCCL_SYMM_MEM: bool = False

VLLM_USE_NVFP4_CT_EMULATIONS module-attribute

VLLM_USE_NVFP4_CT_EMULATIONS: bool = False

VLLM_USE_PRECOMPILED module-attribute

VLLM_USE_PRECOMPILED: bool = False

VLLM_USE_RAY_COMPILED_DAG_CHANNEL_TYPE module-attribute

VLLM_USE_RAY_COMPILED_DAG_CHANNEL_TYPE: Literal[
    "auto", "nccl", "shm"
] = "auto"

VLLM_USE_RAY_COMPILED_DAG_OVERLAP_COMM module-attribute

VLLM_USE_RAY_COMPILED_DAG_OVERLAP_COMM: bool = False

VLLM_USE_RAY_WRAPPED_PP_COMM module-attribute

VLLM_USE_RAY_WRAPPED_PP_COMM: bool = True

VLLM_USE_STANDALONE_COMPILE module-attribute

VLLM_USE_STANDALONE_COMPILE: bool = True

VLLM_USE_TRITON_AWQ module-attribute

VLLM_USE_TRITON_AWQ: bool = False

VLLM_USE_TRTLLM_ATTENTION module-attribute

VLLM_USE_TRTLLM_ATTENTION: str | None = None

VLLM_USE_TRTLLM_RAGGED_DEEPSEEK_PREFILL module-attribute

VLLM_USE_TRTLLM_RAGGED_DEEPSEEK_PREFILL: bool = False

VLLM_V1_OUTPUT_PROC_CHUNK_SIZE module-attribute

VLLM_V1_OUTPUT_PROC_CHUNK_SIZE: int = 128

VLLM_V1_USE_OUTLINES_CACHE module-attribute

VLLM_V1_USE_OUTLINES_CACHE: bool = False

VLLM_V1_USE_PREFILL_DECODE_ATTENTION module-attribute

VLLM_V1_USE_PREFILL_DECODE_ATTENTION: bool = False

VLLM_VIDEO_FETCH_TIMEOUT module-attribute

VLLM_VIDEO_FETCH_TIMEOUT: int = 30

VLLM_VIDEO_LOADER_BACKEND module-attribute

VLLM_VIDEO_LOADER_BACKEND: str = 'opencv'

VLLM_WORKER_MULTIPROC_METHOD module-attribute

VLLM_WORKER_MULTIPROC_METHOD: Literal["fork", "spawn"] = (
    "fork"
)

VLLM_XGRAMMAR_CACHE_MB module-attribute

VLLM_XGRAMMAR_CACHE_MB: int = 0

VLLM_XLA_CACHE_PATH module-attribute

VLLM_XLA_CACHE_PATH: str = join(
    VLLM_CACHE_ROOT, "xla_cache"
)

VLLM_XLA_CHECK_RECOMPILATION module-attribute

VLLM_XLA_CHECK_RECOMPILATION: bool = False

VLLM_XLA_USE_SPMD module-attribute

VLLM_XLA_USE_SPMD: bool = False

V_SCALE_CONSTANT module-attribute

V_SCALE_CONSTANT: int = 100

environment_variables module-attribute

environment_variables: dict[str, Callable[[], Any]] = {
    "VLLM_TARGET_DEVICE": lambda: lower(),
    "VLLM_MAIN_CUDA_VERSION": lambda: lower() or "12.8",
    "MAX_JOBS": lambda: getenv("MAX_JOBS", None),
    "NVCC_THREADS": lambda: getenv("NVCC_THREADS", None),
    "VLLM_USE_PRECOMPILED": lambda: lower() in ("1", "true")
    or bool(get("VLLM_PRECOMPILED_WHEEL_LOCATION")),
    "VLLM_DOCKER_BUILD_CONTEXT": lambda: lower()
    in ("1", "true"),
    "VLLM_TEST_USE_PRECOMPILED_NIGHTLY_WHEEL": lambda: bool(
        int(
            getenv(
                "VLLM_TEST_USE_PRECOMPILED_NIGHTLY_WHEEL",
                "0",
            )
        )
    ),
    "CMAKE_BUILD_TYPE": env_with_choices(
        "CMAKE_BUILD_TYPE",
        None,
        ["Debug", "Release", "RelWithDebInfo"],
    ),
    "VERBOSE": lambda: bool(int(getenv("VERBOSE", "0"))),
    "VLLM_CONFIG_ROOT": lambda: expanduser(
        getenv(
            "VLLM_CONFIG_ROOT",
            join(get_default_config_root(), "vllm"),
        )
    ),
    "VLLM_CACHE_ROOT": lambda: expanduser(
        getenv(
            "VLLM_CACHE_ROOT",
            join(get_default_cache_root(), "vllm"),
        )
    ),
    "VLLM_HOST_IP": lambda: getenv("VLLM_HOST_IP", ""),
    "VLLM_PORT": get_vllm_port,
    "VLLM_RPC_BASE_PATH": lambda: getenv(
        "VLLM_RPC_BASE_PATH", gettempdir()
    ),
    "VLLM_USE_MODELSCOPE": lambda: lower() == "true",
    "VLLM_RINGBUFFER_WARNING_INTERVAL": lambda: int(
        get("VLLM_RINGBUFFER_WARNING_INTERVAL", "60")
    ),
    "CUDA_HOME": lambda: get("CUDA_HOME", None),
    "VLLM_NCCL_SO_PATH": lambda: get(
        "VLLM_NCCL_SO_PATH", None
    ),
    "LD_LIBRARY_PATH": lambda: get("LD_LIBRARY_PATH", None),
    "VLLM_ROCM_SLEEP_MEM_CHUNK_SIZE": lambda: int(
        get("VLLM_ROCM_SLEEP_MEM_CHUNK_SIZE", "256")
    ),
    "VLLM_V1_USE_PREFILL_DECODE_ATTENTION": lambda: lower()
    in ("true", "1"),
    "VLLM_FLASH_ATTN_VERSION": lambda: maybe_convert_int(
        get("VLLM_FLASH_ATTN_VERSION", None)
    ),
    "VLLM_USE_STANDALONE_COMPILE": lambda: get(
        "VLLM_USE_STANDALONE_COMPILE", "1"
    )
    == "1",
    "VLLM_PATTERN_MATCH_DEBUG": lambda: get(
        "VLLM_PATTERN_MATCH_DEBUG", None
    ),
    "VLLM_DEBUG_DUMP_PATH": lambda: get(
        "VLLM_DEBUG_DUMP_PATH", None
    ),
    "VLLM_USE_AOT_COMPILE": use_aot_compile,
    "VLLM_USE_BYTECODE_HOOK": lambda: bool(
        int(get("VLLM_USE_BYTECODE_HOOK", "1"))
    ),
    "VLLM_FORCE_AOT_LOAD": lambda: get(
        "VLLM_FORCE_AOT_LOAD", "0"
    )
    == "1",
    "LOCAL_RANK": lambda: int(get("LOCAL_RANK", "0")),
    "CUDA_VISIBLE_DEVICES": lambda: get(
        "CUDA_VISIBLE_DEVICES", None
    ),
    "VLLM_ENGINE_ITERATION_TIMEOUT_S": lambda: int(
        get("VLLM_ENGINE_ITERATION_TIMEOUT_S", "60")
    ),
    "VLLM_API_KEY": lambda: get("VLLM_API_KEY", None),
    "VLLM_DEBUG_LOG_API_SERVER_RESPONSE": lambda: lower()
    == "true",
    "S3_ACCESS_KEY_ID": lambda: get(
        "S3_ACCESS_KEY_ID", None
    ),
    "S3_SECRET_ACCESS_KEY": lambda: get(
        "S3_SECRET_ACCESS_KEY", None
    ),
    "S3_ENDPOINT_URL": lambda: get("S3_ENDPOINT_URL", None),
    "VLLM_USAGE_STATS_SERVER": lambda: get(
        "VLLM_USAGE_STATS_SERVER", "https://stats.vllm.ai"
    ),
    "VLLM_NO_USAGE_STATS": lambda: get(
        "VLLM_NO_USAGE_STATS", "0"
    )
    == "1",
    "VLLM_DISABLE_FLASHINFER_PREFILL": lambda: get(
        "VLLM_DISABLE_FLASHINFER_PREFILL", "0"
    )
    == "1",
    "VLLM_DO_NOT_TRACK": lambda: (
        get("VLLM_DO_NOT_TRACK", None)
        or get("DO_NOT_TRACK", None)
        or "0"
    )
    == "1",
    "VLLM_USAGE_SOURCE": lambda: get(
        "VLLM_USAGE_SOURCE", "production"
    ),
    "VLLM_CONFIGURE_LOGGING": lambda: int(
        getenv("VLLM_CONFIGURE_LOGGING", "1")
    ),
    "VLLM_LOGGING_CONFIG_PATH": lambda: getenv(
        "VLLM_LOGGING_CONFIG_PATH"
    ),
    "VLLM_LOGGING_LEVEL": lambda: upper(),
    "VLLM_LOGGING_STREAM": lambda: getenv(
        "VLLM_LOGGING_STREAM", "ext://sys.stdout"
    ),
    "VLLM_LOGGING_PREFIX": lambda: getenv(
        "VLLM_LOGGING_PREFIX", ""
    ),
    "VLLM_LOGGING_COLOR": lambda: getenv(
        "VLLM_LOGGING_COLOR", "auto"
    ),
    "NO_COLOR": lambda: getenv("NO_COLOR", "0") != "0",
    "VLLM_LOG_STATS_INTERVAL": lambda: val
    if (
        val := (
            float(getenv("VLLM_LOG_STATS_INTERVAL", "10."))
        )
    )
    > 0.0
    else 10.0,
    "VLLM_TRACE_FUNCTION": lambda: int(
        getenv("VLLM_TRACE_FUNCTION", "0")
    ),
    "VLLM_ATTENTION_BACKEND": env_with_choices(
        "VLLM_ATTENTION_BACKEND", None, lambda: list(keys())
    ),
    "VLLM_USE_FLASHINFER_SAMPLER": lambda: bool(
        int(environ["VLLM_USE_FLASHINFER_SAMPLER"])
    )
    if "VLLM_USE_FLASHINFER_SAMPLER" in environ
    else None,
    "VLLM_PP_LAYER_PARTITION": lambda: getenv(
        "VLLM_PP_LAYER_PARTITION", None
    ),
    "VLLM_CPU_KVCACHE_SPACE": lambda: int(
        getenv("VLLM_CPU_KVCACHE_SPACE", "0")
    )
    if "VLLM_CPU_KVCACHE_SPACE" in environ
    else None,
    "VLLM_CPU_OMP_THREADS_BIND": lambda: getenv(
        "VLLM_CPU_OMP_THREADS_BIND", "auto"
    ),
    "VLLM_CPU_NUM_OF_RESERVED_CPU": lambda: int(
        getenv("VLLM_CPU_NUM_OF_RESERVED_CPU", "0")
    )
    if "VLLM_CPU_NUM_OF_RESERVED_CPU" in environ
    else None,
    "VLLM_CPU_SGL_KERNEL": lambda: bool(
        int(getenv("VLLM_CPU_SGL_KERNEL", "0"))
    ),
    "VLLM_USE_RAY_COMPILED_DAG_CHANNEL_TYPE": env_with_choices(
        "VLLM_USE_RAY_COMPILED_DAG_CHANNEL_TYPE",
        "auto",
        ["auto", "nccl", "shm"],
    ),
    "VLLM_USE_RAY_COMPILED_DAG_OVERLAP_COMM": lambda: bool(
        int(
            getenv(
                "VLLM_USE_RAY_COMPILED_DAG_OVERLAP_COMM",
                "0",
            )
        )
    ),
    "VLLM_USE_RAY_WRAPPED_PP_COMM": lambda: bool(
        int(getenv("VLLM_USE_RAY_WRAPPED_PP_COMM", "1"))
    ),
    "VLLM_WORKER_MULTIPROC_METHOD": env_with_choices(
        "VLLM_WORKER_MULTIPROC_METHOD",
        "fork",
        ["spawn", "fork"],
    ),
    "VLLM_ASSETS_CACHE": lambda: expanduser(
        getenv(
            "VLLM_ASSETS_CACHE",
            join(
                get_default_cache_root(), "vllm", "assets"
            ),
        )
    ),
    "VLLM_ASSETS_CACHE_MODEL_CLEAN": lambda: bool(
        int(getenv("VLLM_ASSETS_CACHE_MODEL_CLEAN", "0"))
    ),
    "VLLM_IMAGE_FETCH_TIMEOUT": lambda: int(
        getenv("VLLM_IMAGE_FETCH_TIMEOUT", "5")
    ),
    "VLLM_VIDEO_FETCH_TIMEOUT": lambda: int(
        getenv("VLLM_VIDEO_FETCH_TIMEOUT", "30")
    ),
    "VLLM_AUDIO_FETCH_TIMEOUT": lambda: int(
        getenv("VLLM_AUDIO_FETCH_TIMEOUT", "10")
    ),
    "VLLM_MEDIA_URL_ALLOW_REDIRECTS": lambda: bool(
        int(getenv("VLLM_MEDIA_URL_ALLOW_REDIRECTS", "1"))
    ),
    "VLLM_MEDIA_LOADING_THREAD_COUNT": lambda: int(
        getenv("VLLM_MEDIA_LOADING_THREAD_COUNT", "8")
    ),
    "VLLM_MAX_AUDIO_CLIP_FILESIZE_MB": lambda: int(
        getenv("VLLM_MAX_AUDIO_CLIP_FILESIZE_MB", "25")
    ),
    "VLLM_VIDEO_LOADER_BACKEND": lambda: getenv(
        "VLLM_VIDEO_LOADER_BACKEND", "opencv"
    ),
    "VLLM_MEDIA_CONNECTOR": lambda: getenv(
        "VLLM_MEDIA_CONNECTOR", "http"
    ),
    "VLLM_MM_INPUT_CACHE_GIB": lambda: int(
        getenv("VLLM_MM_INPUT_CACHE_GIB", "4")
    ),
    "VLLM_XLA_CACHE_PATH": lambda: expanduser(
        getenv(
            "VLLM_XLA_CACHE_PATH",
            join(
                get_default_cache_root(),
                "vllm",
                "xla_cache",
            ),
        )
    ),
    "VLLM_XLA_CHECK_RECOMPILATION": lambda: bool(
        int(getenv("VLLM_XLA_CHECK_RECOMPILATION", "0"))
    ),
    "VLLM_XLA_USE_SPMD": lambda: bool(
        int(getenv("VLLM_XLA_USE_SPMD", "0"))
    ),
    "VLLM_FUSED_MOE_CHUNK_SIZE": lambda: int(
        getenv("VLLM_FUSED_MOE_CHUNK_SIZE", "32768")
    ),
    "VLLM_ENABLE_FUSED_MOE_ACTIVATION_CHUNKING": lambda: bool(
        int(
            getenv(
                "VLLM_ENABLE_FUSED_MOE_ACTIVATION_CHUNKING",
                "1",
            )
        )
    ),
    "VLLM_KEEP_ALIVE_ON_ENGINE_DEATH": lambda: bool(
        int(getenv("VLLM_KEEP_ALIVE_ON_ENGINE_DEATH", "0"))
    ),
    "VLLM_ALLOW_LONG_MAX_MODEL_LEN": lambda: lower()
    in ("1", "true"),
    "VLLM_TEST_FORCE_FP8_MARLIN": lambda: lower()
    in ("1", "true"),
    "VLLM_TEST_FORCE_LOAD_FORMAT": lambda: getenv(
        "VLLM_TEST_FORCE_LOAD_FORMAT", "dummy"
    ),
    "VLLM_RPC_TIMEOUT": lambda: int(
        getenv("VLLM_RPC_TIMEOUT", "10000")
    ),
    "VLLM_HTTP_TIMEOUT_KEEP_ALIVE": lambda: int(
        get("VLLM_HTTP_TIMEOUT_KEEP_ALIVE", "5")
    ),
    "VLLM_PLUGINS": lambda: None
    if "VLLM_PLUGINS" not in environ
    else split(","),
    "VLLM_LORA_RESOLVER_CACHE_DIR": lambda: getenv(
        "VLLM_LORA_RESOLVER_CACHE_DIR", None
    ),
    "VLLM_TORCH_CUDA_PROFILE": lambda: bool(
        getenv("VLLM_TORCH_CUDA_PROFILE", "0") != "0"
    ),
    "VLLM_TORCH_PROFILER_DIR": lambda: None
    if (val := (getenv("VLLM_TORCH_PROFILER_DIR"))) is None
    else val
    if startswith("gs://") and val[5:] and val[5] != "/"
    else abspath(expanduser(val)),
    "VLLM_TORCH_PROFILER_RECORD_SHAPES": lambda: bool(
        getenv("VLLM_TORCH_PROFILER_RECORD_SHAPES", "0")
        != "0"
    ),
    "VLLM_TORCH_PROFILER_WITH_PROFILE_MEMORY": lambda: bool(
        getenv(
            "VLLM_TORCH_PROFILER_WITH_PROFILE_MEMORY", "0"
        )
        != "0"
    ),
    "VLLM_TORCH_PROFILER_WITH_STACK": lambda: bool(
        getenv("VLLM_TORCH_PROFILER_WITH_STACK", "1") != "0"
    ),
    "VLLM_TORCH_PROFILER_WITH_FLOPS": lambda: bool(
        getenv("VLLM_TORCH_PROFILER_WITH_FLOPS", "0") != "0"
    ),
    "VLLM_TORCH_PROFILER_DISABLE_ASYNC_LLM": lambda: bool(
        getenv("VLLM_TORCH_PROFILER_DISABLE_ASYNC_LLM", "0")
        != "0"
    ),
    "VLLM_PROFILER_DELAY_ITERS": lambda: int(
        getenv("VLLM_PROFILER_DELAY_ITERS", "0")
    ),
    "VLLM_PROFILER_MAX_ITERS": lambda: int(
        getenv("VLLM_PROFILER_MAX_ITERS", "0")
    ),
    "VLLM_USE_TRITON_AWQ": lambda: bool(
        int(getenv("VLLM_USE_TRITON_AWQ", "0"))
    ),
    "VLLM_ALLOW_RUNTIME_LORA_UPDATING": lambda: lower()
    in ("1", "true"),
    "VLLM_SKIP_P2P_CHECK": lambda: getenv(
        "VLLM_SKIP_P2P_CHECK", "1"
    )
    == "1",
    "VLLM_DISABLED_KERNELS": lambda: []
    if "VLLM_DISABLED_KERNELS" not in environ
    else split(","),
    "VLLM_DISABLE_PYNCCL": lambda: lower() in ("true", "1"),
    "VLLM_ROCM_USE_AITER": lambda: lower() in ("true", "1"),
    "VLLM_ROCM_USE_AITER_PAGED_ATTN": lambda: lower()
    in ("true", "1"),
    "VLLM_ROCM_USE_AITER_LINEAR": lambda: lower()
    in ("true", "1"),
    "VLLM_ROCM_USE_AITER_MOE": lambda: lower()
    in ("true", "1"),
    "VLLM_ROCM_USE_AITER_RMSNORM": lambda: lower()
    in ("true", "1"),
    "VLLM_ROCM_USE_AITER_MLA": lambda: lower()
    in ("true", "1"),
    "VLLM_ROCM_USE_AITER_MHA": lambda: lower()
    in ("true", "1"),
    "VLLM_ROCM_USE_AITER_FP4_ASM_GEMM": lambda: lower()
    in ("true", "1"),
    "VLLM_ROCM_USE_AITER_TRITON_ROPE": lambda: lower()
    in ("true", "1"),
    "VLLM_ROCM_USE_AITER_FP8BMM": lambda: lower()
    in ("true", "1"),
    "VLLM_ROCM_USE_AITER_UNIFIED_ATTENTION": lambda: lower()
    in ("true", "1"),
    "VLLM_ROCM_USE_AITER_FUSION_SHARED_EXPERTS": lambda: lower()
    in ("true", "1"),
    "VLLM_ROCM_USE_AITER_TRITON_GEMM": lambda: lower()
    in ("true", "1"),
    "VLLM_ROCM_USE_SKINNY_GEMM": lambda: lower()
    in ("true", "1"),
    "VLLM_ROCM_FP8_PADDING": lambda: bool(
        int(getenv("VLLM_ROCM_FP8_PADDING", "1"))
    ),
    "VLLM_ROCM_MOE_PADDING": lambda: bool(
        int(getenv("VLLM_ROCM_MOE_PADDING", "1"))
    ),
    "VLLM_ROCM_CUSTOM_PAGED_ATTN": lambda: lower()
    in ("true", "1"),
    "VLLM_ROCM_QUICK_REDUCE_QUANTIZATION": env_with_choices(
        "VLLM_ROCM_QUICK_REDUCE_QUANTIZATION",
        "NONE",
        ["FP", "INT8", "INT6", "INT4", "NONE"],
    ),
    "VLLM_ROCM_QUICK_REDUCE_CAST_BF16_TO_FP16": lambda: lower()
    in ("true", "1"),
    "VLLM_ROCM_QUICK_REDUCE_MAX_SIZE_BYTES_MB": lambda: maybe_convert_int(
        get(
            "VLLM_ROCM_QUICK_REDUCE_MAX_SIZE_BYTES_MB", None
        )
    ),
    "Q_SCALE_CONSTANT": lambda: int(
        getenv("Q_SCALE_CONSTANT", "200")
    ),
    "K_SCALE_CONSTANT": lambda: int(
        getenv("K_SCALE_CONSTANT", "200")
    ),
    "V_SCALE_CONSTANT": lambda: int(
        getenv("V_SCALE_CONSTANT", "100")
    ),
    "VLLM_ENABLE_V1_MULTIPROCESSING": lambda: bool(
        int(getenv("VLLM_ENABLE_V1_MULTIPROCESSING", "1"))
    ),
    "VLLM_LOG_BATCHSIZE_INTERVAL": lambda: float(
        getenv("VLLM_LOG_BATCHSIZE_INTERVAL", "-1")
    ),
    "VLLM_DISABLE_COMPILE_CACHE": disable_compile_cache,
    "VLLM_SERVER_DEV_MODE": lambda: bool(
        int(getenv("VLLM_SERVER_DEV_MODE", "0"))
    ),
    "VLLM_V1_OUTPUT_PROC_CHUNK_SIZE": lambda: int(
        getenv("VLLM_V1_OUTPUT_PROC_CHUNK_SIZE", "128")
    ),
    "VLLM_MLA_DISABLE": lambda: bool(
        int(getenv("VLLM_MLA_DISABLE", "0"))
    ),
    "VLLM_FLASH_ATTN_MAX_NUM_SPLITS_FOR_CUDA_GRAPH": lambda: int(
        getenv(
            "VLLM_FLASH_ATTN_MAX_NUM_SPLITS_FOR_CUDA_GRAPH",
            "32",
        )
    ),
    "VLLM_RAY_PER_WORKER_GPUS": lambda: float(
        getenv("VLLM_RAY_PER_WORKER_GPUS", "1.0")
    ),
    "VLLM_RAY_BUNDLE_INDICES": lambda: getenv(
        "VLLM_RAY_BUNDLE_INDICES", ""
    ),
    "VLLM_CUDART_SO_PATH": lambda: getenv(
        "VLLM_CUDART_SO_PATH", None
    ),
    "VLLM_DP_RANK": lambda: int(
        getenv("VLLM_DP_RANK", "0")
    ),
    "VLLM_DP_RANK_LOCAL": lambda: int(
        getenv("VLLM_DP_RANK_LOCAL", VLLM_DP_RANK)
    ),
    "VLLM_DP_SIZE": lambda: int(
        getenv("VLLM_DP_SIZE", "1")
    ),
    "VLLM_DP_MASTER_IP": lambda: getenv(
        "VLLM_DP_MASTER_IP", "127.0.0.1"
    ),
    "VLLM_DP_MASTER_PORT": lambda: int(
        getenv("VLLM_DP_MASTER_PORT", "0")
    ),
    "VLLM_MOE_DP_CHUNK_SIZE": lambda: int(
        getenv("VLLM_MOE_DP_CHUNK_SIZE", "256")
    ),
    "VLLM_RANDOMIZE_DP_DUMMY_INPUTS": lambda: get(
        "VLLM_RANDOMIZE_DP_DUMMY_INPUTS", "0"
    )
    == "1",
    "VLLM_RAY_DP_PACK_STRATEGY": lambda: getenv(
        "VLLM_RAY_DP_PACK_STRATEGY", "strict"
    ),
    "VLLM_CI_USE_S3": lambda: get("VLLM_CI_USE_S3", "0")
    == "1",
    "VLLM_MODEL_REDIRECT_PATH": lambda: get(
        "VLLM_MODEL_REDIRECT_PATH", None
    ),
    "VLLM_MARLIN_USE_ATOMIC_ADD": lambda: get(
        "VLLM_MARLIN_USE_ATOMIC_ADD", "0"
    )
    == "1",
    "VLLM_MXFP4_USE_MARLIN": lambda: maybe_convert_bool(
        get("VLLM_MXFP4_USE_MARLIN", None)
    ),
    "VLLM_V1_USE_OUTLINES_CACHE": lambda: get(
        "VLLM_V1_USE_OUTLINES_CACHE", "0"
    )
    == "1",
    "VLLM_TPU_BUCKET_PADDING_GAP": lambda: int(
        environ["VLLM_TPU_BUCKET_PADDING_GAP"]
    )
    if "VLLM_TPU_BUCKET_PADDING_GAP" in environ
    else 0,
    "VLLM_TPU_MOST_MODEL_LEN": lambda: maybe_convert_int(
        get("VLLM_TPU_MOST_MODEL_LEN", None)
    ),
    "VLLM_TPU_USING_PATHWAYS": lambda: bool(
        "proxy" in lower()
    ),
    "VLLM_USE_DEEP_GEMM": lambda: bool(
        int(getenv("VLLM_USE_DEEP_GEMM", "1"))
    ),
    "VLLM_MOE_USE_DEEP_GEMM": lambda: bool(
        int(getenv("VLLM_MOE_USE_DEEP_GEMM", "1"))
    ),
    "VLLM_USE_DEEP_GEMM_E8M0": lambda: bool(
        int(getenv("VLLM_USE_DEEP_GEMM_E8M0", "1"))
    ),
    "VLLM_DEEP_GEMM_WARMUP": env_with_choices(
        "VLLM_DEEP_GEMM_WARMUP",
        "relax",
        ["skip", "full", "relax"],
    ),
    "VLLM_USE_FUSED_MOE_GROUPED_TOPK": lambda: bool(
        int(getenv("VLLM_USE_FUSED_MOE_GROUPED_TOPK", "1"))
    ),
    "VLLM_USE_FLASHINFER_MOE_FP16": lambda: bool(
        int(getenv("VLLM_USE_FLASHINFER_MOE_FP16", "0"))
    ),
    "VLLM_USE_FLASHINFER_MOE_FP8": lambda: bool(
        int(getenv("VLLM_USE_FLASHINFER_MOE_FP8", "0"))
    ),
    "VLLM_USE_FLASHINFER_MOE_FP4": lambda: bool(
        int(getenv("VLLM_USE_FLASHINFER_MOE_FP4", "0"))
    ),
    "VLLM_USE_FLASHINFER_MOE_MXFP4_MXFP8": lambda: bool(
        int(
            getenv(
                "VLLM_USE_FLASHINFER_MOE_MXFP4_MXFP8", "0"
            )
        )
    ),
    "VLLM_USE_FLASHINFER_MOE_MXFP4_MXFP8_CUTLASS": lambda: bool(
        int(
            getenv(
                "VLLM_USE_FLASHINFER_MOE_MXFP4_MXFP8_CUTLASS",
                "0",
            )
        )
    ),
    "VLLM_USE_FLASHINFER_MOE_MXFP4_BF16": lambda: bool(
        int(
            getenv(
                "VLLM_USE_FLASHINFER_MOE_MXFP4_BF16", "0"
            )
        )
    ),
    "VLLM_XGRAMMAR_CACHE_MB": lambda: int(
        getenv("VLLM_XGRAMMAR_CACHE_MB", "512")
    ),
    "VLLM_MSGPACK_ZERO_COPY_THRESHOLD": lambda: int(
        getenv("VLLM_MSGPACK_ZERO_COPY_THRESHOLD", "256")
    ),
    "VLLM_ALLOW_INSECURE_SERIALIZATION": lambda: bool(
        int(
            getenv("VLLM_ALLOW_INSECURE_SERIALIZATION", "0")
        )
    ),
    "VLLM_NIXL_SIDE_CHANNEL_HOST": lambda: getenv(
        "VLLM_NIXL_SIDE_CHANNEL_HOST", "localhost"
    ),
    "VLLM_NIXL_SIDE_CHANNEL_PORT": lambda: int(
        getenv("VLLM_NIXL_SIDE_CHANNEL_PORT", "5600")
    ),
    "VLLM_ALL2ALL_BACKEND": env_with_choices(
        "VLLM_ALL2ALL_BACKEND",
        "allgather_reducescatter",
        [
            "naive",
            "pplx",
            "deepep_high_throughput",
            "deepep_low_latency",
            "allgather_reducescatter",
            "flashinfer_all2allv",
        ],
    ),
    "VLLM_FLASHINFER_MOE_BACKEND": env_with_choices(
        "VLLM_FLASHINFER_MOE_BACKEND",
        "latency",
        ["throughput", "latency", "masked_gemm"],
    ),
    "VLLM_FLASHINFER_WORKSPACE_BUFFER_SIZE": lambda: int(
        getenv(
            "VLLM_FLASHINFER_WORKSPACE_BUFFER_SIZE",
            str(394 * 1024 * 1024),
        )
    ),
    "VLLM_MAX_TOKENS_PER_EXPERT_FP4_MOE": lambda: int(
        getenv(
            "VLLM_MAX_TOKENS_PER_EXPERT_FP4_MOE", "163840"
        )
    ),
    "VLLM_FLASHINFER_ALLREDUCE_FUSION_THRESHOLDS_MB": lambda: loads(
        getenv(
            "VLLM_FLASHINFER_ALLREDUCE_FUSION_THRESHOLDS_MB",
            "{}",
        )
    ),
    "VLLM_MOE_ROUTING_SIMULATION_STRATEGY": lambda: lower(),
    "VLLM_TOOL_PARSE_REGEX_TIMEOUT_SECONDS": lambda: int(
        getenv("VLLM_TOOL_PARSE_REGEX_TIMEOUT_SECONDS", "1")
    ),
    "VLLM_SLEEP_WHEN_IDLE": lambda: bool(
        int(getenv("VLLM_SLEEP_WHEN_IDLE", "0"))
    ),
    "VLLM_MQ_MAX_CHUNK_BYTES_MB": lambda: int(
        getenv("VLLM_MQ_MAX_CHUNK_BYTES_MB", "16")
    ),
    "VLLM_EXECUTE_MODEL_TIMEOUT_SECONDS": lambda: int(
        getenv("VLLM_EXECUTE_MODEL_TIMEOUT_SECONDS", "300")
    ),
    "VLLM_KV_CACHE_LAYOUT": env_with_choices(
        "VLLM_KV_CACHE_LAYOUT", None, ["NHD", "HND"]
    ),
    "VLLM_COMPUTE_NANS_IN_LOGITS": lambda: bool(
        int(getenv("VLLM_COMPUTE_NANS_IN_LOGITS", "0"))
    ),
    "VLLM_USE_NVFP4_CT_EMULATIONS": lambda: bool(
        int(getenv("VLLM_USE_NVFP4_CT_EMULATIONS", "0"))
    ),
    "VLLM_NIXL_ABORT_REQUEST_TIMEOUT": lambda: int(
        getenv("VLLM_NIXL_ABORT_REQUEST_TIMEOUT", "480")
    ),
    "VLLM_USE_CUDNN_PREFILL": lambda: bool(
        int(getenv("VLLM_USE_CUDNN_PREFILL", "0"))
    ),
    "VLLM_USE_TRTLLM_RAGGED_DEEPSEEK_PREFILL": lambda: bool(
        int(
            getenv(
                "VLLM_USE_TRTLLM_RAGGED_DEEPSEEK_PREFILL",
                "0",
            )
        )
    ),
    "VLLM_USE_TRTLLM_ATTENTION": lambda: None
    if "VLLM_USE_TRTLLM_ATTENTION" not in environ
    else lower() in ("1", "true"),
    "VLLM_FLASHINFER_DISABLE_Q_QUANTIZATION": lambda: bool(
        int(
            getenv(
                "VLLM_FLASHINFER_DISABLE_Q_QUANTIZATION",
                "0",
            )
        )
    ),
    "VLLM_HAS_FLASHINFER_CUBIN": lambda: bool(
        int(getenv("VLLM_HAS_FLASHINFER_CUBIN", "0"))
    ),
    "VLLM_NVFP4_GEMM_BACKEND": env_with_choices(
        "VLLM_NVFP4_GEMM_BACKEND",
        None,
        [
            "flashinfer-cudnn",
            "flashinfer-trtllm",
            "flashinfer-cutlass",
            "cutlass",
        ],
    ),
    "VLLM_ENABLE_CUDAGRAPH_GC": lambda: bool(
        int(getenv("VLLM_ENABLE_CUDAGRAPH_GC", "0"))
    ),
    "VLLM_LOOPBACK_IP": lambda: getenv(
        "VLLM_LOOPBACK_IP", ""
    ),
    "VLLM_PROCESS_NAME_PREFIX": lambda: getenv(
        "VLLM_PROCESS_NAME_PREFIX", "VLLM"
    ),
    "VLLM_ALLOW_CHUNKED_LOCAL_ATTN_WITH_HYBRID_KV_CACHE": lambda: bool(
        int(
            getenv(
                "VLLM_ALLOW_CHUNKED_LOCAL_ATTN_WITH_HYBRID_KV_CACHE",
                "0",
            )
        )
    ),
    "VLLM_ENABLE_RESPONSES_API_STORE": lambda: bool(
        int(getenv("VLLM_ENABLE_RESPONSES_API_STORE", "0"))
    ),
    "VLLM_ROCM_FP8_MFMA_PAGE_ATTN": lambda: bool(
        int(getenv("VLLM_ROCM_FP8_MFMA_PAGE_ATTN", "0"))
    ),
    "VLLM_ALLREDUCE_USE_SYMM_MEM": lambda: bool(
        int(getenv("VLLM_ALLREDUCE_USE_SYMM_MEM", "1"))
    ),
    "VLLM_TUNED_CONFIG_FOLDER": lambda: getenv(
        "VLLM_TUNED_CONFIG_FOLDER", None
    ),
    "VLLM_GPT_OSS_SYSTEM_TOOL_MCP_LABELS": env_set_with_choices(
        "VLLM_GPT_OSS_SYSTEM_TOOL_MCP_LABELS",
        default=[],
        choices=[
            "container",
            "code_interpreter",
            "web_search_preview",
        ],
    ),
    "VLLM_GPT_OSS_HARMONY_SYSTEM_INSTRUCTIONS": lambda: bool(
        int(
            getenv(
                "VLLM_GPT_OSS_HARMONY_SYSTEM_INSTRUCTIONS",
                "0",
            )
        )
    ),
    "VLLM_TOOL_JSON_ERROR_AUTOMATIC_RETRY": lambda: bool(
        int(
            getenv(
                "VLLM_TOOL_JSON_ERROR_AUTOMATIC_RETRY", "0"
            )
        )
    ),
    "VLLM_CUSTOM_SCOPES_FOR_PROFILING": lambda: bool(
        int(getenv("VLLM_CUSTOM_SCOPES_FOR_PROFILING", "0"))
    ),
    "VLLM_NVTX_SCOPES_FOR_PROFILING": lambda: bool(
        int(getenv("VLLM_NVTX_SCOPES_FOR_PROFILING", "0"))
    ),
    "VLLM_KV_EVENTS_USE_INT_BLOCK_HASHES": lambda: bool(
        int(
            getenv(
                "VLLM_KV_EVENTS_USE_INT_BLOCK_HASHES", "1"
            )
        )
    ),
    "VLLM_OBJECT_STORAGE_SHM_BUFFER_NAME": lambda: getenv(
        "VLLM_OBJECT_STORAGE_SHM_BUFFER_NAME",
        "VLLM_OBJECT_STORAGE_SHM_BUFFER",
    ),
    "VLLM_DEEPEP_BUFFER_SIZE_MB": lambda: int(
        getenv("VLLM_DEEPEP_BUFFER_SIZE_MB", "1024")
    ),
    "VLLM_DEEPEP_HIGH_THROUGHPUT_FORCE_INTRA_NODE": lambda: bool(
        int(
            getenv(
                "VLLM_DEEPEP_HIGH_THROUGHPUT_FORCE_INTRA_NODE",
                "0",
            )
        )
    ),
    "VLLM_DEEPEP_LOW_LATENCY_USE_MNNVL": lambda: bool(
        int(
            getenv("VLLM_DEEPEP_LOW_LATENCY_USE_MNNVL", "0")
        )
    ),
    "VLLM_DBO_COMM_SMS": lambda: int(
        getenv("VLLM_DBO_COMM_SMS", "20")
    ),
    "VLLM_ENABLE_INDUCTOR_MAX_AUTOTUNE": lambda: bool(
        int(
            getenv("VLLM_ENABLE_INDUCTOR_MAX_AUTOTUNE", "1")
        )
    ),
    "VLLM_ENABLE_INDUCTOR_COORDINATE_DESCENT_TUNING": lambda: bool(
        int(
            getenv(
                "VLLM_ENABLE_INDUCTOR_COORDINATE_DESCENT_TUNING",
                "1",
            )
        )
    ),
    "VLLM_USE_NCCL_SYMM_MEM": lambda: bool(
        int(getenv("VLLM_USE_NCCL_SYMM_MEM", "0"))
    ),
    "VLLM_NCCL_INCLUDE_PATH": lambda: get(
        "VLLM_NCCL_INCLUDE_PATH", None
    ),
    "VLLM_USE_FBGEMM": lambda: bool(
        int(getenv("VLLM_USE_FBGEMM", "0"))
    ),
    "VLLM_GC_DEBUG": lambda: getenv("VLLM_GC_DEBUG", ""),
    "VLLM_DISABLE_SHARED_EXPERTS_STREAM": lambda: bool(
        int(
            getenv(
                "VLLM_DISABLE_SHARED_EXPERTS_STREAM", "0"
            )
        )
    ),
    "VLLM_SHARED_EXPERTS_STREAM_TOKEN_THRESHOLD": lambda: int(
        int(
            getenv(
                "VLLM_SHARED_EXPERTS_STREAM_TOKEN_THRESHOLD",
                256,
            )
        )
    ),
    "VLLM_COMPILE_CACHE_SAVE_FORMAT": env_with_choices(
        "VLLM_COMPILE_CACHE_SAVE_FORMAT",
        "binary",
        ["binary", "unpacked"],
    ),
}

logger module-attribute

logger = getLogger(__name__)

__dir__

__dir__()
Source code in vllm/envs.py
def __dir__():
    return list(environment_variables.keys())

__getattr__

__getattr__(name: str)

Gets environment variables lazily.

NOTE: After enable_envs_cache() invocation (which triggered after service initialization), all environment variables will be cached.

Source code in vllm/envs.py
def __getattr__(name: str):
    """
    Gets environment variables lazily.

    NOTE: After enable_envs_cache() invocation (which triggered after service
    initialization), all environment variables will be cached.
    """
    if name in environment_variables:
        return environment_variables[name]()
    raise AttributeError(f"module {__name__!r} has no attribute {name!r}")

compile_factors

compile_factors() -> dict[str, object]

Return env vars used for torch.compile cache keys.

Start with every known vLLM env var; drop entries in ignored_factors; hash everything else. This keeps the cache key aligned across workers.

Source code in vllm/envs.py
def compile_factors() -> dict[str, object]:
    """Return env vars used for torch.compile cache keys.

    Start with every known vLLM env var; drop entries in `ignored_factors`;
    hash everything else. This keeps the cache key aligned across workers."""

    ignored_factors: set[str] = {
        "MAX_JOBS",
        "VLLM_RPC_BASE_PATH",
        "VLLM_USE_MODELSCOPE",
        "VLLM_RINGBUFFER_WARNING_INTERVAL",
        "VLLM_DEBUG_DUMP_PATH",
        "VLLM_PORT",
        "VLLM_CACHE_ROOT",
        "LD_LIBRARY_PATH",
        "VLLM_SERVER_DEV_MODE",
        "VLLM_DP_MASTER_IP",
        "VLLM_DP_MASTER_PORT",
        "VLLM_RANDOMIZE_DP_DUMMY_INPUTS",
        "VLLM_CI_USE_S3",
        "VLLM_MODEL_REDIRECT_PATH",
        "VLLM_HOST_IP",
        "S3_ACCESS_KEY_ID",
        "S3_SECRET_ACCESS_KEY",
        "S3_ENDPOINT_URL",
        "VLLM_USAGE_STATS_SERVER",
        "VLLM_NO_USAGE_STATS",
        "VLLM_DO_NOT_TRACK",
        "VLLM_LOGGING_LEVEL",
        "VLLM_LOGGING_PREFIX",
        "VLLM_LOGGING_STREAM",
        "VLLM_LOGGING_CONFIG_PATH",
        "VLLM_LOGGING_COLOR",
        "VLLM_LOG_STATS_INTERVAL",
        "VLLM_DEBUG_LOG_API_SERVER_RESPONSE",
        "VLLM_TUNED_CONFIG_FOLDER",
        "VLLM_ENGINE_ITERATION_TIMEOUT_S",
        "VLLM_HTTP_TIMEOUT_KEEP_ALIVE",
        "VLLM_EXECUTE_MODEL_TIMEOUT_SECONDS",
        "VLLM_KEEP_ALIVE_ON_ENGINE_DEATH",
        "VLLM_SLEEP_WHEN_IDLE",
        "VLLM_IMAGE_FETCH_TIMEOUT",
        "VLLM_VIDEO_FETCH_TIMEOUT",
        "VLLM_AUDIO_FETCH_TIMEOUT",
        "VLLM_MEDIA_URL_ALLOW_REDIRECTS",
        "VLLM_MEDIA_LOADING_THREAD_COUNT",
        "VLLM_MAX_AUDIO_CLIP_FILESIZE_MB",
        "VLLM_VIDEO_LOADER_BACKEND",
        "VLLM_MEDIA_CONNECTOR",
        "VLLM_ASSETS_CACHE",
        "VLLM_ASSETS_CACHE_MODEL_CLEAN",
        "VLLM_MM_INPUT_CACHE_GIB",
        "VLLM_WORKER_MULTIPROC_METHOD",
        "VLLM_ENABLE_V1_MULTIPROCESSING",
        "VLLM_V1_OUTPUT_PROC_CHUNK_SIZE",
        "VLLM_CPU_KVCACHE_SPACE",
        "VLLM_CPU_OMP_THREADS_BIND",
        "VLLM_CPU_NUM_OF_RESERVED_CPU",
        "VLLM_CPU_MOE_PREPACK",
        "VLLM_CPU_SGL_KERNEL",
        "VLLM_TEST_FORCE_LOAD_FORMAT",
        "LOCAL_RANK",
        "CUDA_VISIBLE_DEVICES",
        "NO_COLOR",
    }

    from vllm.config.utils import normalize_value

    factors: dict[str, object] = {}
    for factor, getter in environment_variables.items():
        if factor in ignored_factors:
            continue

        try:
            raw = getter()
        except Exception as exc:  # pragma: no cover - defensive logging
            logger.warning(
                "Skipping environment variable %s while hashing compile factors: %s",
                factor,
                exc,
            )
            continue

        factors[factor] = normalize_value(raw)

    ray_noset_env_vars = [
        # Refer to
        # https://github.com/ray-project/ray/blob/c584b1ea97b00793d1def71eaf81537d70efba42/python/ray/_private/accelerators/nvidia_gpu.py#L11
        # https://github.com/ray-project/ray/blob/c584b1ea97b00793d1def71eaf81537d70efba42/python/ray/_private/accelerators/amd_gpu.py#L11
        # https://github.com/ray-project/ray/blob/b97d21dab233c2bd8ed7db749a82a1e594222b5c/python/ray/_private/accelerators/amd_gpu.py#L10
        # https://github.com/ray-project/ray/blob/c584b1ea97b00793d1def71eaf81537d70efba42/python/ray/_private/accelerators/npu.py#L12
        # https://github.com/ray-project/ray/blob/c584b1ea97b00793d1def71eaf81537d70efba42/python/ray/_private/accelerators/hpu.py#L12
        # https://github.com/ray-project/ray/blob/c584b1ea97b00793d1def71eaf81537d70efba42/python/ray/_private/accelerators/neuron.py#L14
        # https://github.com/ray-project/ray/blob/c584b1ea97b00793d1def71eaf81537d70efba42/python/ray/_private/accelerators/tpu.py#L38
        # https://github.com/ray-project/ray/blob/c584b1ea97b00793d1def71eaf81537d70efba42/python/ray/_private/accelerators/intel_gpu.py#L10
        # https://github.com/ray-project/ray/blob/c584b1ea97b00793d1def71eaf81537d70efba42/python/ray/_private/accelerators/rbln.py#L10
        "RAY_EXPERIMENTAL_NOSET_CUDA_VISIBLE_DEVICES",
        "RAY_EXPERIMENTAL_NOSET_ROCR_VISIBLE_DEVICES",
        "RAY_EXPERIMENTAL_NOSET_HIP_VISIBLE_DEVICES",
        "RAY_EXPERIMENTAL_NOSET_ASCEND_RT_VISIBLE_DEVICES",
        "RAY_EXPERIMENTAL_NOSET_HABANA_VISIBLE_MODULES",
        "RAY_EXPERIMENTAL_NOSET_NEURON_RT_VISIBLE_CORES",
        "RAY_EXPERIMENTAL_NOSET_TPU_VISIBLE_CHIPS",
        "RAY_EXPERIMENTAL_NOSET_ONEAPI_DEVICE_SELECTOR",
        "RAY_EXPERIMENTAL_NOSET_RBLN_RT_VISIBLE_DEVICES",
    ]

    for var in ray_noset_env_vars:
        factors[var] = normalize_value(os.getenv(var))

    return factors

disable_compile_cache

disable_compile_cache() -> bool
Source code in vllm/envs.py
def disable_compile_cache() -> bool:
    return bool(int(os.getenv("VLLM_DISABLE_COMPILE_CACHE", "0")))

enable_envs_cache

enable_envs_cache() -> None

Enables caching of environment variables. This is useful for performance reasons, as it avoids the need to re-evaluate environment variables on every call.

NOTE: Currently, it's invoked after service initialization to reduce runtime overhead. This also means that environment variables should NOT be updated after the service is initialized.

Source code in vllm/envs.py
def enable_envs_cache() -> None:
    """
    Enables caching of environment variables. This is useful for performance
    reasons, as it avoids the need to re-evaluate environment variables on
    every call.

    NOTE: Currently, it's invoked after service initialization to reduce
    runtime overhead. This also means that environment variables should NOT
    be updated after the service is initialized.
    """
    # Tag __getattr__ with functools.cache
    global __getattr__
    __getattr__ = functools.cache(__getattr__)

    # Cache all environment variables
    for key in environment_variables:
        __getattr__(key)

env_list_with_choices

env_list_with_choices(
    env_name: str,
    default: list[str],
    choices: list[str] | Callable[[], list[str]],
    case_sensitive: bool = True,
) -> Callable[[], list[str]]

Create a lambda that validates environment variable containing comma-separated values against allowed choices

Parameters:

Name Type Description Default
env_name str

Name of the environment variable

required
default list[str]

Default list of values if not set

required
choices list[str] | Callable[[], list[str]]

List of valid string options or callable that returns list

required
case_sensitive bool

Whether validation should be case sensitive

True

Returns:

Type Description
Callable[[], list[str]]

Lambda function for environment_variables

Callable[[], list[str]]

dict that returns list of strings

Source code in vllm/envs.py
def env_list_with_choices(
    env_name: str,
    default: list[str],
    choices: list[str] | Callable[[], list[str]],
    case_sensitive: bool = True,
) -> Callable[[], list[str]]:
    """
    Create a lambda that validates environment variable
    containing comma-separated values against allowed choices

    Args:
        env_name: Name of the environment variable
        default: Default list of values if not set
        choices: List of valid string options or callable that returns list
        case_sensitive: Whether validation should be case sensitive

    Returns:
        Lambda function for environment_variables
        dict that returns list of strings
    """

    def _get_validated_env_list() -> list[str]:
        value = os.getenv(env_name)
        if value is None:
            return default

        # Split comma-separated values and strip whitespace
        values = [v.strip() for v in value.split(",") if v.strip()]

        if not values:
            return default

        # Resolve choices if it's a callable (for lazy loading)
        actual_choices = choices() if callable(choices) else choices

        # Validate each value
        for val in values:
            if not case_sensitive:
                check_value = val.lower()
                check_choices = [choice.lower() for choice in actual_choices]
            else:
                check_value = val
                check_choices = actual_choices

            if check_value not in check_choices:
                raise ValueError(
                    f"Invalid value '{val}' in {env_name}. "
                    f"Valid options: {actual_choices}."
                )

        return values

    return _get_validated_env_list

env_set_with_choices

env_set_with_choices(
    env_name: str,
    default: list[str],
    choices: list[str] | Callable[[], list[str]],
    case_sensitive: bool = True,
) -> Callable[[], set[str]]

Creates a lambda which that validates environment variable containing comma-separated values against allowed choices which returns choices as a set.

Source code in vllm/envs.py
def env_set_with_choices(
    env_name: str,
    default: list[str],
    choices: list[str] | Callable[[], list[str]],
    case_sensitive: bool = True,
) -> Callable[[], set[str]]:
    """
    Creates a lambda which that validates environment variable
    containing comma-separated values against allowed choices which
    returns choices as a set.
    """

    def _get_validated_env_set() -> set[str]:
        return set(env_list_with_choices(env_name, default, choices, case_sensitive)())

    return _get_validated_env_set

env_with_choices

env_with_choices(
    env_name: str,
    default: str | None,
    choices: list[str] | Callable[[], list[str]],
    case_sensitive: bool = True,
) -> Callable[[], str | None]

Create a lambda that validates environment variable against allowed choices

Parameters:

Name Type Description Default
env_name str

Name of the environment variable

required
default str | None

Default value if not set (can be None)

required
choices list[str] | Callable[[], list[str]]

List of valid string options or callable that returns list

required
case_sensitive bool

Whether validation should be case sensitive

True

Returns:

Type Description
Callable[[], str | None]

Lambda function for environment_variables dict

Source code in vllm/envs.py
def env_with_choices(
    env_name: str,
    default: str | None,
    choices: list[str] | Callable[[], list[str]],
    case_sensitive: bool = True,
) -> Callable[[], str | None]:
    """
    Create a lambda that validates environment variable against allowed choices

    Args:
        env_name: Name of the environment variable
        default: Default value if not set (can be None)
        choices: List of valid string options or callable that returns list
        case_sensitive: Whether validation should be case sensitive

    Returns:
        Lambda function for environment_variables dict
    """

    def _get_validated_env() -> str | None:
        value = os.getenv(env_name)
        if value is None:
            return default

        # Resolve choices if it's a callable (for lazy loading)
        actual_choices = choices() if callable(choices) else choices

        if not case_sensitive:
            check_value = value.lower()
            check_choices = [choice.lower() for choice in actual_choices]
        else:
            check_value = value
            check_choices = actual_choices

        if check_value not in check_choices:
            raise ValueError(
                f"Invalid value '{value}' for {env_name}. "
                f"Valid options: {actual_choices}."
            )

        return value

    return _get_validated_env

get_default_cache_root

get_default_cache_root()
Source code in vllm/envs.py
def get_default_cache_root():
    return os.getenv(
        "XDG_CACHE_HOME",
        os.path.join(os.path.expanduser("~"), ".cache"),
    )

get_default_config_root

get_default_config_root()
Source code in vllm/envs.py
def get_default_config_root():
    return os.getenv(
        "XDG_CONFIG_HOME",
        os.path.join(os.path.expanduser("~"), ".config"),
    )

get_vllm_port

get_vllm_port() -> int | None

Get the port from VLLM_PORT environment variable.

Returns:

Type Description
int | None

The port number as an integer if VLLM_PORT is set, None otherwise.

Raises:

Type Description
ValueError

If VLLM_PORT is a URI, suggest k8s service discovery issue.

Source code in vllm/envs.py
def get_vllm_port() -> int | None:
    """Get the port from VLLM_PORT environment variable.

    Returns:
        The port number as an integer if VLLM_PORT is set, None otherwise.

    Raises:
        ValueError: If VLLM_PORT is a URI, suggest k8s service discovery issue.
    """
    if "VLLM_PORT" not in os.environ:
        return None

    port = os.getenv("VLLM_PORT", "0")

    try:
        return int(port)
    except ValueError as err:
        from urllib.parse import urlparse

        parsed = urlparse(port)
        if parsed.scheme:
            raise ValueError(
                f"VLLM_PORT '{port}' appears to be a URI. "
                "This may be caused by a Kubernetes service discovery issue,"
                "check the warning in: https://docs.vllm.ai/en/stable/serving/env_vars.html"
            ) from None
        raise ValueError(f"VLLM_PORT '{port}' must be a valid integer") from err

is_set

is_set(name: str)

Check if an environment variable is explicitly set.

Source code in vllm/envs.py
def is_set(name: str):
    """Check if an environment variable is explicitly set."""
    if name in environment_variables:
        return name in os.environ
    raise AttributeError(f"module {__name__!r} has no attribute {name!r}")

maybe_convert_bool

maybe_convert_bool(value: str | None) -> bool | None
Source code in vllm/envs.py
def maybe_convert_bool(value: str | None) -> bool | None:
    if value is None:
        return None
    return bool(int(value))

maybe_convert_int

maybe_convert_int(value: str | None) -> int | None
Source code in vllm/envs.py
def maybe_convert_int(value: str | None) -> int | None:
    if value is None:
        return None
    return int(value)

use_aot_compile

use_aot_compile() -> bool
Source code in vllm/envs.py
def use_aot_compile() -> bool:
    from vllm.model_executor.layers.batch_invariant import (
        vllm_is_batch_invariant,
    )
    from vllm.utils.torch_utils import is_torch_equal_or_newer

    default_value = (
        "1"
        if is_torch_equal_or_newer("2.10.0.dev") and not disable_compile_cache()
        else "0"
    )

    return (
        not vllm_is_batch_invariant()
        and os.environ.get("VLLM_USE_AOT_COMPILE", default_value) == "1"
    )