-
Notifications
You must be signed in to change notification settings - Fork 580
Add CI workflows to validate CH on Microsoft Hypervisor #7381
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,236 @@ | ||
| name: MSHV Infra Setup | ||
| on: | ||
| workflow_call: | ||
| inputs: | ||
| ARCH: | ||
| description: 'Architecture for the VM' | ||
| required: true | ||
| type: string | ||
| KEY: | ||
| description: 'SSH Key Name' | ||
| required: true | ||
| type: string | ||
| OS_DISK_SIZE: | ||
| description: 'OS Disk Size in GB' | ||
| required: true | ||
| type: string | ||
| RG: | ||
| description: 'Resource Group Name' | ||
| required: true | ||
| type: string | ||
| VM_SKU: | ||
| description: 'VM SKU' | ||
| required: true | ||
| type: string | ||
| secrets: | ||
| MI_CLIENT_ID: | ||
| required: true | ||
| RUNNER_RG: | ||
| required: true | ||
| STORAGE_ACCOUNT_PATHS: | ||
| required: true | ||
| ARCH_SOURCE_PATH: | ||
| required: true | ||
| USERNAME: | ||
| required: true | ||
| outputs: | ||
| PRIVATE_IP: | ||
| description: 'Private IP of the VM' | ||
| value: ${{ jobs.infra-setup.outputs.PRIVATE_IP }} | ||
| concurrency: | ||
| group: ${{ github.workflow }}-${{ github.ref }} | ||
| cancel-in-progress: true | ||
| jobs: | ||
| infra-setup: | ||
| name: ${{ inputs.ARCH }} VM Provision | ||
| runs-on: mshv | ||
| outputs: | ||
| PRIVATE_IP: ${{ steps.get-vm-ip.outputs.PRIVATE_IP }} | ||
| steps: | ||
| - name: Install & login to AZ CLI | ||
| env: | ||
| MI_CLIENT_ID: ${{ secrets.MI_CLIENT_ID }} | ||
| run: | | ||
| set -e | ||
| echo "Installing Azure CLI if not already installed" | ||
| if ! command -v az &>/dev/null; then | ||
| curl -sL https://aka.ms/InstallAzureCLIDeb | sudo bash | ||
| else | ||
| echo "Azure CLI already installed" | ||
| fi | ||
| az --version | ||
| echo "Logging into Azure CLI using Managed Identity" | ||
| az login --identity --client-id ${MI_CLIENT_ID} | ||
|
|
||
| - name: Get Location | ||
| id: get-location | ||
| env: | ||
| SKU: ${{ inputs.VM_SKU }} | ||
| STORAGE_ACCOUNT_PATHS: ${{ secrets.STORAGE_ACCOUNT_PATHS }} | ||
| run: | | ||
| set -e | ||
| # Extract vCPU count from SKU (e.g., "Standard_D2s_v3" => 2) | ||
| vcpu=$(echo "$SKU" | sed -n 's/^Standard_[A-Za-z]\+\([0-9]\+\).*/\1/p') | ||
| if [[ -z "$vcpu" ]]; then | ||
| echo "Cannot extract vCPU count from SKU: $SKU" | ||
| exit 1 | ||
| fi | ||
|
|
||
| SUPPORTED_LOCATIONS=$(echo "$STORAGE_ACCOUNT_PATHS" | jq -r 'to_entries[] | .key') | ||
|
|
||
| for location in $SUPPORTED_LOCATIONS; do | ||
| family=$(az vm list-skus --size "$SKU" --location "$location" --resource-type "virtualMachines" --query '[0].family' -o tsv) | ||
| if [[ -z "$family" ]]; then | ||
| echo "Cannot determine VM family for SKU: $SKU in $location" | ||
| continue | ||
| fi | ||
|
|
||
| usage=$(az vm list-usage --location "$location" --query "[?name.value=='$family'] | [0]" -o json) | ||
| current=$(echo "$usage" | jq -r '.currentValue') | ||
| limit=$(echo "$usage" | jq -r '.limit') | ||
|
|
||
| if [[ $((limit - current)) -ge $vcpu ]]; then | ||
| echo "Sufficient quota found in $location" | ||
| echo "location=$location" >> "$GITHUB_OUTPUT" | ||
| exit 0 | ||
| fi | ||
| done | ||
|
|
||
| echo "No location found with sufficient vCPU quota for SKU: $SKU" | ||
| exit 1 | ||
|
|
||
| - name: Create Resource Group | ||
| id: rg-setup | ||
| env: | ||
| LOCATION: ${{ steps.get-location.outputs.location }} | ||
| RG: ${{ inputs.RG }} | ||
| STORAGE_ACCOUNT_PATHS: ${{ secrets.STORAGE_ACCOUNT_PATHS }} | ||
| run: | | ||
| set -e | ||
| echo "Creating Resource Group: $RG" | ||
| # Create the resource group | ||
| echo "Creating resource group in location: ${LOCATION}" | ||
| az group create --name ${RG} --location ${LOCATION} | ||
| echo "Resource group created successfully." | ||
|
|
||
| - name: Generate SSH Key | ||
| id: generate-ssh-key | ||
| env: | ||
| KEY: ${{ inputs.KEY }} | ||
| run: | | ||
| set -e | ||
| echo "Generating SSH key: $KEY" | ||
| mkdir -p ~/.ssh | ||
| ssh-keygen -t rsa -b 4096 -f ~/.ssh/${KEY} -N "" | ||
|
|
||
| - name: Create VM | ||
| id: vm-setup | ||
| env: | ||
| KEY: ${{ inputs.KEY }} | ||
| LOCATION: ${{ steps.get-location.outputs.location }} | ||
| OS_DISK_SIZE: ${{ inputs.OS_DISK_SIZE }} | ||
| RG: ${{ inputs.RG }} | ||
| RUNNER_RG: ${{ secrets.RUNNER_RG }} | ||
| USERNAME: ${{ secrets.USERNAME }} | ||
| VM_SKU: ${{ inputs.VM_SKU }} | ||
| VM_IMAGE_NAME: ${{ inputs.ARCH }}_${{ steps.get-location.outputs.location }}_image | ||
| VM_NAME: ${{ inputs.ARCH }}_${{ steps.get-location.outputs.location }}_${{ github.run_id }} | ||
| run: | | ||
| set -e | ||
| echo "Creating $VM_SKU VM: $VM_NAME" | ||
|
|
||
| # Extract subnet ID from the runner VM | ||
| echo "Retrieving subnet ID..." | ||
| SUBNET_ID=$(az network vnet list --resource-group ${RUNNER_RG} --query "[?contains(location, '${LOCATION}')].{SUBNETS:subnets}" | jq -r ".[0].SUBNETS[0].id") | ||
| if [[ -z "${SUBNET_ID}" ]]; then | ||
| echo "ERROR: Failed to retrieve Subnet ID." | ||
| exit 1 | ||
| fi | ||
|
|
||
| # Extract image ID from the runner VM | ||
| echo "Retrieving image ID..." | ||
| IMAGE_ID=$(az image show --resource-group ${RUNNER_RG} --name ${VM_IMAGE_NAME} --query "id" -o tsv) | ||
| if [[ -z "${IMAGE_ID}" ]]; then | ||
| echo "ERROR: Failed to retrieve Image ID." | ||
| exit 1 | ||
| fi | ||
|
|
||
| # Create VM | ||
| az vm create \ | ||
| --resource-group ${RG} \ | ||
| --name ${VM_NAME} \ | ||
| --subnet ${SUBNET_ID} \ | ||
| --size ${VM_SKU} \ | ||
| --location ${LOCATION} \ | ||
| --image ${IMAGE_ID} \ | ||
| --os-disk-size-gb ${OS_DISK_SIZE} \ | ||
| --public-ip-sku Standard \ | ||
| --storage-sku Premium_LRS \ | ||
| --public-ip-address "" \ | ||
| --admin-username ${USERNAME} \ | ||
| --ssh-key-value ~/.ssh/${KEY}.pub \ | ||
| --security-type Standard \ | ||
| --output json | ||
|
|
||
| echo "VM creation process completed successfully." | ||
|
|
||
| - name: Get VM Private IP | ||
| id: get-vm-ip | ||
| env: | ||
| RG: ${{ inputs.RG }} | ||
| VM_NAME: ${{ inputs.ARCH }}_${{ steps.get-location.outputs.location }}_${{ github.run_id }} | ||
| run: | | ||
| set -e | ||
| echo "Retrieving VM Private IP address..." | ||
| # Retrieve VM Private IP address | ||
| PRIVATE_IP=$(az vm show -g ${RG} -n ${VM_NAME} -d --query privateIps -o tsv) | ||
| if [[ -z "$PRIVATE_IP" ]]; then | ||
| echo "ERROR: Failed to retrieve private IP address." | ||
| exit 1 | ||
| fi | ||
| echo "PRIVATE_IP=$PRIVATE_IP" >> $GITHUB_OUTPUT | ||
|
|
||
| - name: Wait for SSH availability | ||
| env: | ||
| KEY: ${{ inputs.KEY }} | ||
| PRIVATE_IP: ${{ steps.get-vm-ip.outputs.PRIVATE_IP }} | ||
| USERNAME: ${{ secrets.USERNAME }} | ||
| run: | | ||
| echo "Waiting for SSH to be accessible..." | ||
| timeout 120 bash -c 'until ssh -o StrictHostKeyChecking=no -i ~/.ssh/${KEY} ${USERNAME}@${PRIVATE_IP} "exit" 2>/dev/null; do sleep 5; done' | ||
| echo "VM is accessible!" | ||
|
|
||
| - name: Remove Old Host Key | ||
| env: | ||
| PRIVATE_IP: ${{ steps.get-vm-ip.outputs.PRIVATE_IP }} | ||
| run: | | ||
| set -e | ||
| echo "Removing the old host key" | ||
| ssh-keygen -R $PRIVATE_IP | ||
|
|
||
| - name: SSH into VM and Install Dependencies | ||
| env: | ||
| KEY: ${{ inputs.KEY }} | ||
| PRIVATE_IP: ${{ steps.get-vm-ip.outputs.PRIVATE_IP }} | ||
| USERNAME: ${{ secrets.USERNAME }} | ||
| run: | | ||
| set -e | ||
| ssh -i ~/.ssh/${KEY} -o StrictHostKeyChecking=no ${USERNAME}@${PRIVATE_IP} << EOF | ||
| set -e | ||
| echo "Logged in successfully." | ||
| echo "Installing dependencies..." | ||
| sudo tdnf install -y git moby-engine moby-cli clang llvm pkg-config make gcc glibc-devel | ||
| echo "Installing Rust..." | ||
| curl -sSf https://sh.rustup.rs | sh -s -- --default-toolchain stable --profile default -y | ||
| export PATH="\$HOME/.cargo/bin:\$PATH" | ||
| cargo --version | ||
| sudo mkdir -p /etc/docker/ | ||
| echo '{"default-ulimits":{"nofile":{"Hard":65535,"Name":"nofile","Soft":65535}}}' | sudo tee /etc/docker/daemon.json | ||
| sudo systemctl stop docker | ||
| sudo systemctl enable docker.service | ||
| sudo systemctl enable containerd.service | ||
| sudo systemctl start docker | ||
| sudo groupadd -f docker | ||
| sudo usermod -a -G docker ${USERNAME} | ||
| sudo systemctl restart docker | ||
| EOF |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,108 @@ | ||
| name: MSHV Integration Tests | ||
| on: [pull_request_target, merge_group] | ||
|
|
||
| jobs: | ||
| infra-setup: | ||
| name: MSHV Infra Setup (x86_64) | ||
| uses: ./.github/workflows/mshv-infra.yaml | ||
| with: | ||
| ARCH: x86_64 | ||
| KEY: azure_key_${{ github.run_id }} | ||
| OS_DISK_SIZE: 512 | ||
| RG: MSHV-INTEGRATION-${{ github.run_id }} | ||
| VM_SKU: Standard_D16s_v5 | ||
| secrets: | ||
| MI_CLIENT_ID: ${{ secrets.MSHV_MI_CLIENT_ID }} | ||
| RUNNER_RG: ${{ secrets.MSHV_RUNNER_RG }} | ||
| STORAGE_ACCOUNT_PATHS: ${{ secrets.MSHV_STORAGE_ACCOUNT_PATHS }} | ||
| ARCH_SOURCE_PATH: ${{ secrets.MSHV_X86_SOURCE_PATH }} | ||
| USERNAME: ${{ secrets.MSHV_USERNAME }} | ||
|
|
||
| run-tests: | ||
| name: Integration Tests | ||
| needs: infra-setup | ||
| if: ${{ always() && needs.infra-setup.result == 'success' }} | ||
| runs-on: mshv | ||
| continue-on-error: true | ||
| steps: | ||
| - name: Run integration tests | ||
| env: | ||
| KEY: azure_key_${{ github.run_id }} | ||
| PR_NUMBER: ${{ github.event.pull_request.number }} | ||
| REPO_URL: https://github.com/cloud-hypervisor/cloud-hypervisor.git | ||
| REPO_DIR: cloud-hypervisor | ||
| PRIVATE_IP: ${{ needs.infra-setup.outputs.PRIVATE_IP }} | ||
| RG: MSHV-${{ github.run_id }} | ||
| USERNAME: ${{ secrets.MSHV_USERNAME }} | ||
| run: | | ||
| set -e | ||
| echo "Connecting to the VM via SSH..." | ||
| ssh -i ~/.ssh/${KEY} -o StrictHostKeyChecking=no ${USERNAME}@${PRIVATE_IP} << EOF | ||
| set -e | ||
| echo "Logged in successfully." | ||
| export PATH="\$HOME/.cargo/bin:\$PATH" | ||
|
|
||
| if [[ "${{ github.event_name }}" == "pull_request_target" ]]; then | ||
| git clone --depth 1 "$REPO_URL" "$REPO_DIR" | ||
| cd "$REPO_DIR" | ||
| git fetch origin pull/${{ github.event.pull_request.number }}/merge | ||
| git checkout FETCH_HEAD | ||
| else | ||
| git clone --depth 1 --single-branch --branch "${{ github.ref_name }}" "$REPO_URL" "$REPO_DIR" | ||
| cd "$REPO_DIR" | ||
| fi | ||
|
|
||
| echo "Loading VDPA kernel modules..." | ||
| sudo modprobe vdpa | ||
| sudo modprobe vhost_vdpa | ||
| sudo modprobe vdpa_sim | ||
| sudo modprobe vdpa_sim_blk | ||
| sudo modprobe vdpa_sim_net | ||
|
|
||
| echo "Creating VDPA devices..." | ||
| sudo vdpa dev add name vdpa-blk0 mgmtdev vdpasim_blk | ||
| sudo vdpa dev add name vdpa-blk1 mgmtdev vdpasim_blk | ||
| sudo vdpa dev add name vdpa-blk2 mgmtdev vdpasim_net | ||
|
|
||
| echo "Setting permissions..." | ||
| for i in 0 1 2; do | ||
| dev="/dev/vhost-vdpa-$i" | ||
| if [ -e "$dev" ]; then | ||
| sudo chown $USER:$USER "$dev" | ||
| sudo chmod 660 "$dev" | ||
| else | ||
| echo "Warning: Device $dev not found" | ||
| fi | ||
| done | ||
|
|
||
| sudo ./scripts/dev_cli.sh tests --hypervisor mshv --integration -- -- --skip common_parallel::test_tpm --skip common_parallel::test_cpu_topology_421 --skip common_parallel::test_cpu_topology_142 --skip common_parallel::test_cpu_topology_262 --skip common_sequential::test_snapshot_restore_basic --skip common_sequential::test_snapshot_restore_with_fd --skip common_sequential::test_snapshot_restore_pvpanic --skip virtio_net_latency_us --skip common_parallel::test_cpu_hotplug | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. May be this skips tests could be retrieved from somewhere else, like a environment variable or a file.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, this can be done.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What value would it add? I imagine this list would shrink over time, so would it really be worth it? Also, how would the environment variable be set?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Not that much of value just clean code.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should disable the tests in integration.rs file may be. @likebreath ?
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In the integration.rs file we do have some build time filtering of tests that don't work on mshv. You will need to make sure you just pass the
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think since this CI will be still testing mode for some time, we can merge this as now and work in parallel to make changes in integration.rs and eventually remove these tests from here. We should create an issue @gamora12 and start working. @rbradford ??
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, that's fine by me. I'm already working on it; I can raise a separate PR for these changes as well. |
||
| EOF | ||
|
|
||
| cleanup: | ||
| name: Cleanup | ||
| needs: run-tests | ||
| if: always() | ||
| runs-on: mshv | ||
| steps: | ||
| - name: Delete RG | ||
| env: | ||
| RG: MSHV-INTEGRATION-${{ github.run_id }} | ||
| run: | | ||
| if az group exists --name ${RG}; then | ||
| az group delete --name ${RG} --yes --no-wait | ||
russell-islam marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| else | ||
| echo "Resource Group ${RG} does not exist. Skipping deletion." | ||
| fi | ||
| echo "Cleanup process completed." | ||
|
|
||
| - name: Delete SSH Key | ||
| env: | ||
| KEY: azure_key_${{ github.run_id }} | ||
| run: | | ||
| if [ -f ~/.ssh/${KEY} ]; then | ||
| rm -f ~/.ssh/${KEY} ~/.ssh/${KEY}.pub | ||
| echo "SSH key deleted successfully." | ||
| else | ||
| echo "SSH key does not exist. Skipping deletion." | ||
| fi | ||
| echo "Cleanup process completed." | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's not recommended to use this - from the docs:
We should run on
merge_groupas then the code has already been reviewed and can be sure isn't extracting secrets.Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
With
pull_request_target, the CI will only run the base repository code. Even if someone tries to introduce code change that accesses secrets, the code won't be run until it's merged. The runner vm has access to the secrets but it only runs the workflow code (not the PR code), the cloud-hypervisor code will be run on a separate azure vm (which doesn't have access to secrets & can't label or comment on PR), so we're actually safe.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah - yes! Because dev_cli.sh is run inside a separately created VM. I'm happy if you're happy with that.