build imageaws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7 && #!/bin/bash && if [[ -z $(docker manifest inspect public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:7f0307aad002922d7c16087b90b7eb32078fde95) ]]; then && echo "Image not found, proceeding with build..." && else && echo "Image found" && exit 0 && fi && docker build --build-arg max_jobs=16 --build-arg buildkite_commit=7f0307aad002922d7c16087b90b7eb32078fde95 --build-arg USE_SCCACHE=1 --tag public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:7f0307aad002922d7c16087b90b7eb32078fde95 --target test --progress plain . && docker push public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:7f0307aad002922d7c16087b90b7eb32078fde95

Ran in 34m 8s

Neuron Testbash .buildkite/run-neuron-test.sh

Documentation Build

Ran in 4m 32s

Basic Correctness Test

Ran in 19m 16s

Core Test

Ran in 27m 53s

Entrypoints Test

Ran in 1h 42m

Async Engine, Inputs, Utils, Worker Test

Python-only Installation Test

Chunked Prefill Test

Distributed Tests (4 GPUs)

Metrics, Tracing Test

Regression Test

Engine Test

V1 Test

Examples Test

Prefix Caching Test

Samplers Test

LogitsProcessor Test

Speculative decoding tests

1/4

LoRA Test 1

2/4

LoRA Test 2

3/4

LoRA Test 3

4/4

LoRA Test 4

PyTorch Fullgraph Smoke Test

PyTorch Fullgraph Test

1/4

Kernels Test 1

2/4

Kernels Test 2

3/4

Kernels Test 3

4/4

Kernels Test 4

Tensorizer Test

Benchmarks

Quantization Test

LM Eval Small Models

OpenAI API correctness

Encoder Decoder tests

OpenAI-Compatible Tool Use

Basic Models Test

Language Models Test (Standard)

Language Models Test (Extended)

Multi-Modal Models Test (Standard)

Multi-Modal Models Test (Extended) 1

Multi-Modal Models Test (Extended) 2

Custom Models Test

Distributed Comm Ops Test

Distributed Tests (2 GPUs)

Plugin Tests (2 GPUs)

Multi-step Tests (4 GPUs)

Pipeline Parallelism Test

LoRA TP Test (Distributed)

Weight Loading Multiple GPU Test

2 Node Tests (4 GPUs in total)./.buildkite/run-multi-node-test.sh /vllm-workspace/tests 2 2 public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:7f0307aad002922d7c16087b90b7eb32078fde95 "VLLM_TEST_SAME_HOST=0 torchrun --nnodes 2 --nproc-per-node=2 --rdzv_backend=c10d --rdzv_endpoint=192.168.10.10 distributed/test_same_node.py | grep 'Same node test passed' && VLLM_MULTI_NODE=1 pytest -v -s distributed/test_multi_node_assignment.py && VLLM_MULTI_NODE=1 pytest -v -s distributed/test_pipeline_parallel.py" "VLLM_TEST_SAME_HOST=0 torchrun --nnodes 2 --nproc-per-node=2 --rdzv_backend=c10d --rdzv_endpoint=192.168.10.10 distributed/test_same_node.py | grep 'Same node test passed'"

Weight Loading Multiple GPU Test - Large Models

Distributed Tests (A100)

LM Eval Large Models

TPU V0 Testif [[ -f ".buildkite/run-tpu-test.sh" ]]; then bash .buildkite/run-tpu-test.sh; fi && yes | docker system prune -a

TPU V1 Testif [[ -f ".buildkite/run-tpu-v1-test.sh" ]]; then bash .buildkite/run-tpu-v1-test.sh; fi && yes | docker system prune -a

GH200 Testnvidia-smi && bash .buildkite/run-gh200-test.sh

AMD:

build imagegrep -i 'from base as test' Dockerfile.rocm && docker build --build-arg max_jobs=16 --tag rocm/vllm-ci:7f0307aad002922d7c16087b90b7eb32078fde95 -f Dockerfile.rocm --target test --progress plain . || docker build --build-arg max_jobs=16 --tag rocm/vllm-ci:7f0307aad002922d7c16087b90b7eb32078fde95 -f Dockerfile.rocm --progress plain . && docker push rocm/vllm-ci:7f0307aad002922d7c16087b90b7eb32078fde95

AMD: Core Testbash .buildkite/run-amd-test.sh "(command rocm-smi || true) && export VLLM_LOGGING_LEVEL=DEBUG && export VLLM_ALLOW_DEPRECATED_BEAM_SEARCH=1 && cd /vllm-workspace/tests ; pytest -v -s core"

AMD: Entrypoints Testbash .buildkite/run-amd-test.sh "(command rocm-smi || true) && export VLLM_LOGGING_LEVEL=DEBUG && export VLLM_ALLOW_DEPRECATED_BEAM_SEARCH=1 && cd /vllm-workspace/tests ; export VLLM_WORKER_MULTIPROC_METHOD=spawn && pytest -v -s entrypoints/llm --ignore=entrypoints/llm/test_lazy_outlines.py --ignore=entrypoints/llm/test_generate.py --ignore=entrypoints/llm/test_generate_multiple_loras.py --ignore=entrypoints/llm/test_guided_generate.py --ignore=entrypoints/llm/test_collective_rpc.py && pytest -v -s entrypoints/llm/test_lazy_outlines.py && pytest -v -s entrypoints/llm/test_generate.py && pytest -v -s entrypoints/llm/test_generate_multiple_loras.py && VLLM_USE_V1=0 pytest -v -s entrypoints/llm/test_guided_generate.py && pytest -v -s entrypoints/openai --ignore=entrypoints/openai/test_oot_registration.py --ignore=entrypoints/openai/test_chat_with_tool_reasoning.py --ignore=entrypoints/openai/correctness/ && pytest -v -s entrypoints/test_chat_utils.py && VLLM_USE_V1=0 pytest -v -s entrypoints/offline_mode"

AMD: Regression Testbash .buildkite/run-amd-test.sh "(command rocm-smi || true) && export VLLM_LOGGING_LEVEL=DEBUG && export VLLM_ALLOW_DEPRECATED_BEAM_SEARCH=1 && cd /vllm-workspace/tests ; pip install modelscope && pytest -v -s test_regression.py"

AMD: Engine Testbash .buildkite/run-amd-test.sh "(command rocm-smi || true) && export VLLM_LOGGING_LEVEL=DEBUG && export VLLM_ALLOW_DEPRECATED_BEAM_SEARCH=1 && cd /vllm-workspace/tests ; pytest -v -s engine test_sequence.py test_config.py test_logger.py && pytest -v -s tokenization"

AMD: Prefix Caching Testbash .buildkite/run-amd-test.sh "(command rocm-smi || true) && export VLLM_LOGGING_LEVEL=DEBUG && export VLLM_ALLOW_DEPRECATED_BEAM_SEARCH=1 && cd /vllm-workspace/tests ; pytest -v -s prefix_caching"

AMD: LogitsProcessor Testbash .buildkite/run-amd-test.sh "(command rocm-smi || true) && export VLLM_LOGGING_LEVEL=DEBUG && export VLLM_ALLOW_DEPRECATED_BEAM_SEARCH=1 && cd /vllm-workspace/tests ; pytest -v -s test_logits_processor.py && pytest -v -s model_executor/test_guided_processors.py"

AMD: LoRA Test %Nbash .buildkite/run-amd-test.sh "(command rocm-smi || true) && export VLLM_LOGGING_LEVEL=DEBUG && export VLLM_ALLOW_DEPRECATED_BEAM_SEARCH=1 && cd /vllm-workspace/tests ; pytest -v -s lora --shard-id=$BUILDKITE_PARALLEL_JOB --num-shards=$BUILDKITE_PARALLEL_JOB_COUNT --ignore=lora/test_long_context.py --ignore=lora/test_chatglm3_tp.py --ignore=lora/test_llama_tp.py --ignore=lora/test_minicpmv_tp.py --ignore=lora/test_transfomers_model.py"

AMD: Kernels Test %Nbash .buildkite/run-amd-test.sh "(command rocm-smi || true) && export VLLM_LOGGING_LEVEL=DEBUG && export VLLM_ALLOW_DEPRECATED_BEAM_SEARCH=1 && cd /vllm-workspace/tests ; pytest -v -s kernels --shard-id=$BUILDKITE_PARALLEL_JOB --num-shards=$BUILDKITE_PARALLEL_JOB_COUNT"

AMD: Tensorizer Testbash .buildkite/run-amd-test.sh "(command rocm-smi || true) && export VLLM_LOGGING_LEVEL=DEBUG && export VLLM_ALLOW_DEPRECATED_BEAM_SEARCH=1 && cd /vllm-workspace/tests ; apt-get update && apt-get install -y curl libsodium23 && export VLLM_WORKER_MULTIPROC_METHOD=spawn && pytest -v -s tensorizer_loader"

AMD: Benchmarksbash .buildkite/run-amd-test.sh "(command rocm-smi || true) && export VLLM_LOGGING_LEVEL=DEBUG && export VLLM_ALLOW_DEPRECATED_BEAM_SEARCH=1 && cd /vllm-workspace/.buildkite ; bash run-benchmarks.sh"

AMD: OpenAI-Compatible Tool Usebash .buildkite/run-amd-test.sh "(command rocm-smi || true) && export VLLM_LOGGING_LEVEL=DEBUG && export VLLM_ALLOW_DEPRECATED_BEAM_SEARCH=1 && cd /vllm-workspace/tests ; pytest -v -s tool_use"

Total Job Run Time: 3h 9m