NVMe over ROCE (Mellanox OFED)

Mellanox OFEDドライバをインストールする。

# ./mlnxofedinstall --add-kernel-support --with-nvmf

./mlnxofedinstallはperlの依存が多く、エラーが出たら解消していく。perl系の依存関係は標準エラー出力をだしてくれず、スクリプトが/tmpに出力したログファイルを見なければいけない。。。

# dnf install perl-Term-ANSIColor
# dnf install tcsh tcl gcc-gfortran python2 tk make
# dnf install perl-Getopt-Long

スクリプトが無事終了したらリブートして、インストールしたバージョンのドライバがロードできているか確認する。

# ls /usr/local/src/
MLNX_OFED_LINUX-4.7-1.0.0.1-fc29-x86_64
MLNX_OFED_LINUX-4.7-1.0.0.1-fc29-x86_64.tgz
#
#
# modinfo mlx5_core
filename:       /lib/modules/4.18.16-300.fc29.x86_64/updates/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.ko
version:        4.7-1.0.0
license:        Dual BSD/GPL
description:    Mellanox 5th generation network adapters (ConnectX series) core driver
author:         Eli Cohen <eli@mellanox.com>
srcversion:     6FBC9B62D72B2165942C338
alias:          pci:v000015B3d0000A2D3sv*sd*bc*sc*i*
...
alias:          pci:v000015B3d00001011sv*sd*bc*sc*i*
depends:        mlx_compat,devlink,mlxfw
retpoline:      Y
name:           mlx5_core
vermagic:       4.18.16-300.fc29.x86_64 SMP mod_unload 
parm:           out_ifname:string
parm:           guids:charp
parm:           node_guid:guids configuration. This module parameter will be obsolete!
parm:           debug_mask:debug mask: 1 = dump cmd data, 2 = dump cmd exec time, 3 = both. Default=0 (uint)
parm:           prof_sel:profile selector. Valid range 0 - 3 (uint)
parm:           probe_vf:probe VFs or not, 0 = not probe, 1 = probe. Default = 1 (bool)
#
# 
# lsmod | grep mlx
mlx5_fpga_tools        16384  0
mlx5_ib               438272  0
mlx5_core            1175552  2 mlx5_fpga_tools,mlx5_ib
mlxfw                  24576  1 mlx5_core
mlx4_en               159744  0
mlx4_ib               253952  0
ib_uverbs             159744  4 mlx4_ib,rdma_ucm,mlx5_ib,ib_ucm
ib_core               409600  10 rdma_cm,ib_ipoib,mlx4_ib,iw_cm,ib_umad,rdma_ucm,ib_uverbs,mlx5_ib,ib_cm,ib_ucm
mlx4_core             401408  2 mlx4_ib,mlx4_en
devlink                61440  4 mlx4_core,mlx4_ib,mlx4_en,mlx5_core
mlx_compat             36864  15 rdma_cm,ib_ipoib,mlx4_core,mlx4_ib,iw_cm,mlx5_fpga_tools,ib_umad,mlx4_en,ib_core,rdma_ucm,ib_uverbs,mlx5_ib,ib_cm,mlx5_core,ib_ucm
#

ターゲットにするマシンでnvme、nvmet、nvmet-rdmaドライバをロードする。

# modprobe nvme
# modprobe nvmet
# modprobe nvmet-rdma
#
# lsmod | grep nvme
nvmet_rdma             49152  0
nvmet                  94208  1 nvmet_rdma
nvme                   45056  2 nvmet,nvmet_rdma
nvme_core              94208  1 nvme
rdma_cm                69632  2 nvmet_rdma,rdma_ucm
ib_core               409600  11 rdma_cm,ib_ipoib,mlx4_ib,nvmet_rdma,iw_cm,ib_umad,rdma_ucm,ib_uverbs,mlx5_ib,ib_cm,ib_ucm
mlx_compat             36864  19 rdma_cm,ib_ipoib,mlx4_core,nvmet,mlx4_ib,nvme,nvmet_rdma,iw_cm,nvme_core,mlx5_fpga_tools,ib_umad,mlx4_en,ib_core,rdma_ucm,ib_uverbs,mlx5_ib,ib_cm,mlx5_core,ib_ucm
# 

nvmetcliでターゲットの設定をする。手順の流れとしては 1. subsystemでnvmeデバイスを設定 2. portでトランスポートを設定 3. subsystemとportをリンクさせる

下記はインタラクティブモードでの設定。

# nvmetcli 
/> ls
o- / ..................................................................... [...]
  o- hosts ............................................................... [...]
  o- ports ............................................................... [...]
  o- subsystems .......................................................... [...]
/> cd subsystems 
/subsystems> 
/subsystems> create rdma-test
/subsystems> cd rdma-test/
/subsystems/rdma-test> 
/subsystems/rdma-test> ls
o- rdma-test ............................................................. [...]
  o- allowed_hosts ....................................................... [...]
  o- namespaces .......................................................... [...]
/subsystems/rdma-test> 
/subsystems/rdma-test> set attr allow_any_host=1
Parameter allow_any_host is now '1'.
/subsystems/rdma-test> 
/subsystems/rdma-test> cd namespaces 
/subsystems/r...st/namespaces> 
/subsystems/r...st/namespaces> create 1
/subsystems/r...st/namespaces> cd 1
/subsystems/r.../namespaces/1> 
/subsystems/r.../namespaces/1> set device path=/dev/nullb0
Parameter path is now '/dev/nullb0'.
/subsystems/r.../namespaces/1> 
/subsystems/r.../namespaces/1> enable
The Namespace has been enabled.
/subsystems/r.../namespaces/1> 
/subsystems/r.../namespaces/1> ls /
o- / ..................................................................... [...]
  o- hosts ............................................................... [...]
  o- ports ............................................................... [...]
  o- subsystems .......................................................... [...]
    o- rdma-test ......................................................... [...]
      o- allowed_hosts ................................................... [...]
      o- namespaces ...................................................... [...]
        o- 1 ............................................................. [...]
/subsystems/r.../namespaces/1> 
/subsystems/r.../namespaces/1> cd /ports 
/ports> 
/ports> create 1
/ports> 
/ports> cd 1
/ports/1> set addr trtype=rdma
Parameter trtype is now 'rdma'.
/ports/1> 
/ports/1> set addr adrfam=ipv4
Parameter adrfam is now 'ipv4'.
/ports/1> 
/ports/1> set addr traddr=192.168.1.1
Parameter traddr is now '192.168.1.1'.
/ports/1> 
/ports/1> set addr trsvcid=4420
Parameter trsvcid is now '4420'.
/ports/1> 
/ports/1> cd subsystems 
/ports/1/subsystems> 
/ports/1/subsystems> create rdma-test 
/ports/1/subsystems> cd /
/> ls
o- / ..................................................................... [...]
  o- hosts ............................................................... [...]
  o- ports ............................................................... [...]
  | o- 1 ................................................................. [...]
  |   o- referrals ....................................................... [...]
  |   o- subsystems ...................................................... [...]
  |     o- rdma-test ..................................................... [...]
  o- subsystems .......................................................... [...]
    o- rdma-test ......................................................... [...]
      o- allowed_hosts ................................................... [...]
      o- namespaces ...................................................... [...]
        o- 1 ............................................................. [...]
/> saveconfig 
/> exit
# 
# ls /etc/nvmet/
config.json
#
# cat /etc/nvmet/config.json 
{
  "hosts": [],
  "ports": [
    {
      "addr": {
        "adrfam": "ipv4",
        "traddr": "192.168.1.1",
        "treq": "not specified",
        "trsvcid": "4420",
        "trtype": "rdma"
      },
      "portid": 1,
      "referrals": [],
      "subsystems": [
        "rdma-test"
      ]
    }
  ],
  "subsystems": [
    {
      "allowed_hosts": [],
      "attr": {
        "allow_any_host": "1",
        "offload": "0",
        "serial": "f3200381979e229a",
        "version": "1.3"
      },
      "namespaces": [
        {
          "device": {
            "nguid": "00000000-0000-0000-0000-000000000000",
            "path": "/dev/nullb0",
            "uuid": "d997bb6d-8a25-44bf-a48f-0a6ec8f44ce8"
          },
          "enable": 1,
          "nsid": 1
        }
      ],
      "nqn": "rdma-test"
    }
  ]
}
# 
# dmesg | grep nvmet
[ 1268.551544] nvmet: adding nsid 1 to subsystem rdma-test
[ 1361.866651] nvmet_rdma: enabling port 1 (192.168.1.1:4420)

クライアント側は 1. nvmeとnvme-rdmaドライバをロードする。 2. nvme-cliでNVMe over Fabricのデバイスをディスカバリする。 3. nvme-cliでディスカバリしたデバイスを接続する。

# modprobe nvme
# modprobe nvme-rdma
#
# lsmod | grep nvme
nvme_rdma              32768  0
nvme_fabrics           24576  1 nvme_rdma
nvme                   45056  0
nvme_core              94208  3 nvme,nvme_rdma,nvme_fabrics
rdma_cm                69632  2 nvme_rdma,rdma_ucm
ib_core               409600  11 rdma_cm,ib_ipoib,mlx4_ib,nvme_rdma,iw_cm,ib_umad,rdma_ucm,ib_uverbs,mlx5_ib,ib_cm,ib_ucm
mlx_compat             36864  19 rdma_cm,ib_ipoib,mlx4_core,mlx4_ib,nvme,nvme_rdma,iw_cm,nvme_core,mlx5_fpga_tools,nvme_fabrics,ib_umad,mlx4_en,ib_core,rdma_ucm,ib_uverbs,mlx5_ib,ib_cm,mlx5_core,ib_ucm
# 
# nvme discover -t rdma -a 192.168.1.1 -s 4420

Discovery Log Number of Records 1, Generation counter 3
=====Discovery Log Entry 0======
trtype:  rdma
adrfam:  ipv4
subtype: nvme subsystem
treq:    unrecognized
portid:  1
trsvcid: 4420
subnqn:  rdma-test
traddr:  192.168.1.1
rdma_prtype: not specified
rdma_qptype: connected
rdma_cms:    rdma-cm
rdma_pkey: 0x0000
# 
# nvme connect -t rdma -n rdma-test -a 192.168.1.1 -s 4420
# 
# lsblk 
NAME                   MAJ:MIN RM   SIZE RO TYPE MOUNTPOINT
sda                      8:0    0 223.6G  0 disk 
├─sda1                   8:1    0   200M  0 part /boot/efi
├─sda2                   8:2    0     1G  0 part /boot
└─sda3                   8:3    0 222.4G  0 part 
  ├─fedora_rdma21-root 253:0    0    15G  0 lvm  /
  └─fedora_rdma21-swap 253:1    0  22.4G  0 lvm  [SWAP]
sdb                      8:16   0 447.1G  0 disk 
├─sdb1                   8:17   0   200M  0 part 
├─sdb2                   8:18   0     1G  0 part 
└─sdb3                   8:19   0   446G  0 part 
  ├─rhel_pcsd21-root   253:2    0    50G  0 lvm  
  ├─rhel_pcsd21-home   253:3    0 364.6G  0 lvm  
  └─rhel_pcsd21-swap   253:4    0  31.3G  0 lvm  
sdc                      8:32   1  14.6G  0 disk 
└─sdc1                   8:33   1  14.6G  0 part 
sr0                     11:0    1     4G  0 rom  
nvme0n1                259:1    0   250G  0 disk 
# 
# nvme list
Node             SN                   Model                                    Namespace Usage                      Format           FW Rev  
---------------- -------------------- ---------------------------------------- --------- -------------------------- ---------------- --------
/dev/nvme0n1     9a229e97810320f3     Linux                                    1         268.44  GB / 268.44  GB    512   B +  0 B   4.18.16-
# 

無事にnvme0n1がクライアントに接続された。