blob: 5bc2586235ac15d9910b2d03547e343f3d95e4bf [file] [log] [blame]
Julian Stecklinac83e15b2017-02-13 10:03:59 +01001// Low level NVMe disk access
2//
3// Copyright 2017 Amazon.com, Inc. or its affiliates.
4//
5// This file may be distributed under the terms of the GNU LGPLv3 license.
6
7#include "blockcmd.h"
Julian Stecklinac83e15b2017-02-13 10:03:59 +01008#include "malloc.h" // malloc_high
9#include "output.h" // dprintf
10#include "pci.h"
11#include "pci_ids.h" // PCI_CLASS_STORAGE_NVME
12#include "pci_regs.h" // PCI_BASE_ADDRESS_0
13#include "pcidevice.h" // foreachpci
14#include "stacks.h" // yield
15#include "std/disk.h" // DISK_RET_
16#include "string.h" // memset
17#include "util.h" // boot_add_hd
18#include "x86.h" // readl
19
20#include "nvme.h"
21#include "nvme-int.h"
22
23static void *
24zalloc_page_aligned(struct zone_s *zone, u32 size)
25{
26 void *res = _malloc(zone, size, NVME_PAGE_SIZE);
27 if (res) memset(res, 0, size);
28 return res;
29}
30
31static void
32nvme_init_queue_common(struct nvme_ctrl *ctrl, struct nvme_queue *q, u16 q_idx,
33 u16 length)
34{
35 memset(q, 0, sizeof(*q));
36 q->dbl = (u32 *)((char *)ctrl->reg + 0x1000 + q_idx * ctrl->doorbell_stride);
37 dprintf(3, " q %p q_idx %u dbl %p\n", q, q_idx, q->dbl);
38 q->mask = length - 1;
39}
40
Julian Stecklinaf7036042017-10-03 15:47:17 +020041static int
Julian Stecklinac83e15b2017-02-13 10:03:59 +010042nvme_init_sq(struct nvme_ctrl *ctrl, struct nvme_sq *sq, u16 q_idx, u16 length,
43 struct nvme_cq *cq)
44{
45 nvme_init_queue_common(ctrl, &sq->common, q_idx, length);
46 sq->sqe = zalloc_page_aligned(&ZoneHigh, sizeof(*sq->sqe) * length);
Julian Stecklinaf7036042017-10-03 15:47:17 +020047
48 if (!sq->sqe) {
49 warn_noalloc();
50 return -1;
51 }
52
Julian Stecklinac83e15b2017-02-13 10:03:59 +010053 dprintf(3, "sq %p q_idx %u sqe %p\n", sq, q_idx, sq->sqe);
54 sq->cq = cq;
55 sq->head = 0;
56 sq->tail = 0;
Julian Stecklinaf7036042017-10-03 15:47:17 +020057
58 return 0;
Julian Stecklinac83e15b2017-02-13 10:03:59 +010059}
60
Julian Stecklinaf7036042017-10-03 15:47:17 +020061static int
Julian Stecklinac83e15b2017-02-13 10:03:59 +010062nvme_init_cq(struct nvme_ctrl *ctrl, struct nvme_cq *cq, u16 q_idx, u16 length)
63{
64 nvme_init_queue_common(ctrl, &cq->common, q_idx, length);
65 cq->cqe = zalloc_page_aligned(&ZoneHigh, sizeof(*cq->cqe) * length);
Julian Stecklinaf7036042017-10-03 15:47:17 +020066 if (!cq->cqe) {
67 warn_noalloc();
68 return -1;
69 }
Julian Stecklinac83e15b2017-02-13 10:03:59 +010070
71 cq->head = 0;
72
73 /* All CQE phase bits are initialized to zero. This means initially we wait
74 for the host controller to set these to 1. */
75 cq->phase = 1;
Julian Stecklinaf7036042017-10-03 15:47:17 +020076
77 return 0;
Julian Stecklinac83e15b2017-02-13 10:03:59 +010078}
79
80static int
81nvme_poll_cq(struct nvme_cq *cq)
82{
83 u32 dw3 = readl(&cq->cqe[cq->head].dword[3]);
84 return (!!(dw3 & NVME_CQE_DW3_P) == cq->phase);
85}
86
87static int
88nvme_is_cqe_success(struct nvme_cqe const *cqe)
89{
Daniel Verkampd8a6c842017-02-23 23:27:56 -070090 return ((cqe->status >> 1) & 0xFF) == 0;
Julian Stecklinac83e15b2017-02-13 10:03:59 +010091}
92
Julian Stecklinac83e15b2017-02-13 10:03:59 +010093static struct nvme_cqe
94nvme_error_cqe(void)
95{
96 struct nvme_cqe r;
97
98 /* 0xFF is a vendor specific status code != success. Should be okay for
99 indicating failure. */
100 memset(&r, 0xFF, sizeof(r));
101 return r;
102}
103
104static struct nvme_cqe
105nvme_consume_cqe(struct nvme_sq *sq)
106{
107 struct nvme_cq *cq = sq->cq;
108
109 if (!nvme_poll_cq(cq)) {
110 /* Cannot consume a completion queue entry, if there is none ready. */
111 return nvme_error_cqe();
112 }
113
114 struct nvme_cqe *cqe = &cq->cqe[cq->head];
115 u16 cq_next_head = (cq->head + 1) & cq->common.mask;
116 dprintf(4, "cq %p head %u -> %u\n", cq, cq->head, cq_next_head);
117 if (cq_next_head < cq->head) {
118 dprintf(3, "cq %p wrap\n", cq);
119 cq->phase = ~cq->phase;
120 }
121 cq->head = cq_next_head;
122
123 /* Update the submission queue head. */
124 if (cqe->sq_head != sq->head) {
125 sq->head = cqe->sq_head;
126 dprintf(4, "sq %p advanced to %u\n", sq, cqe->sq_head);
127 }
128
129 /* Tell the controller that we consumed the completion. */
130 writel(cq->common.dbl, cq->head);
131
132 return *cqe;
133}
134
135static struct nvme_cqe
136nvme_wait(struct nvme_sq *sq)
137{
Daniel Verkamp21de72f2017-02-23 23:27:54 -0700138 static const unsigned nvme_timeout = 5000 /* ms */;
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100139 u32 to = timer_calc(nvme_timeout);
140 while (!nvme_poll_cq(sq->cq)) {
141 yield();
142
143 if (timer_check(to)) {
144 warn_timeout();
145 return nvme_error_cqe();
146 }
147 }
148
149 return nvme_consume_cqe(sq);
150}
151
152/* Returns the next submission queue entry (or NULL if the queue is full). It
153 also fills out Command Dword 0 and clears the rest. */
154static struct nvme_sqe *
155nvme_get_next_sqe(struct nvme_sq *sq, u8 opc, void *metadata, void *data)
156{
157 if (((sq->head + 1) & sq->common.mask) == sq->tail) {
158 dprintf(3, "submission queue is full");
159 return NULL;
160 }
161
162 struct nvme_sqe *sqe = &sq->sqe[sq->tail];
163 dprintf(4, "sq %p next_sqe %u\n", sq, sq->tail);
164
165 memset(sqe, 0, sizeof(*sqe));
166 sqe->cdw0 = opc | (sq->tail << 16 /* CID */);
167 sqe->mptr = (u32)metadata;
168 sqe->dptr_prp1 = (u32)data;
169
170 if (sqe->dptr_prp1 & (NVME_PAGE_SIZE - 1)) {
171 /* Data buffer not page aligned. */
172 warn_internalerror();
173 }
174
175 return sqe;
176}
177
178/* Call this after you've filled out an sqe that you've got from nvme_get_next_sqe. */
179static void
180nvme_commit_sqe(struct nvme_sq *sq)
181{
182 dprintf(4, "sq %p commit_sqe %u\n", sq, sq->tail);
183 sq->tail = (sq->tail + 1) & sq->common.mask;
184 writel(sq->common.dbl, sq->tail);
185}
186
187/* Perform an identify command on the admin queue and return the resulting
188 buffer. This may be a NULL pointer, if something failed. This function
189 cannot be used after initialization, because it uses buffers in tmp zone. */
190static union nvme_identify *
191nvme_admin_identify(struct nvme_ctrl *ctrl, u8 cns, u32 nsid)
192{
193 union nvme_identify *identify_buf = zalloc_page_aligned(&ZoneTmpHigh, 4096);
194 if (!identify_buf) {
195 /* Could not allocate identify buffer. */
196 warn_internalerror();
197 return NULL;
198 }
199
200 struct nvme_sqe *cmd_identify;
201 cmd_identify = nvme_get_next_sqe(&ctrl->admin_sq,
202 NVME_SQE_OPC_ADMIN_IDENTIFY, NULL,
203 identify_buf);
204
205 if (!cmd_identify) {
206 warn_internalerror();
207 goto error;
208 }
209
210 cmd_identify->nsid = nsid;
211 cmd_identify->dword[10] = cns;
212
213 nvme_commit_sqe(&ctrl->admin_sq);
214
215 struct nvme_cqe cqe = nvme_wait(&ctrl->admin_sq);
216
217 if (!nvme_is_cqe_success(&cqe)) {
218 goto error;
219 }
220
221 return identify_buf;
222 error:
223 free(identify_buf);
224 return NULL;
225}
226
227static struct nvme_identify_ctrl *
228nvme_admin_identify_ctrl(struct nvme_ctrl *ctrl)
229{
230 return &nvme_admin_identify(ctrl, NVME_ADMIN_IDENTIFY_CNS_ID_CTRL, 0)->ctrl;
231}
232
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100233static struct nvme_identify_ns *
234nvme_admin_identify_ns(struct nvme_ctrl *ctrl, u32 ns_id)
235{
236 return &nvme_admin_identify(ctrl, NVME_ADMIN_IDENTIFY_CNS_ID_NS,
237 ns_id)->ns;
238}
239
240static void
Alexander Grafb68f3132020-09-30 23:10:53 +0200241nvme_probe_ns(struct nvme_ctrl *ctrl, struct nvme_namespace *ns, u32 ns_id,
242 u8 mdts)
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100243{
244 ns->ctrl = ctrl;
245 ns->ns_id = ns_id;
246
247 struct nvme_identify_ns *id = nvme_admin_identify_ns(ctrl, ns_id);
248 if (!id) {
249 dprintf(2, "NVMe couldn't identify namespace %u.\n", ns_id);
250 goto free_buffer;
251 }
252
253 u8 current_lba_format = id->flbas & 0xF;
254 if (current_lba_format > id->nlbaf) {
255 dprintf(2, "NVMe NS %u: current LBA format %u is beyond what the "
256 " namespace supports (%u)?\n",
257 ns_id, current_lba_format, id->nlbaf + 1);
258 goto free_buffer;
259 }
260
261 ns->lba_count = id->nsze;
Daniel Verkampf21e3042017-02-23 23:27:53 -0700262 if (!ns->lba_count) {
263 dprintf(2, "NVMe NS %u is inactive.\n", ns_id);
264 goto free_buffer;
265 }
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100266
267 struct nvme_lba_format *fmt = &id->lbaf[current_lba_format];
268
269 ns->block_size = 1U << fmt->lbads;
270 ns->metadata_size = fmt->ms;
271
272 if (ns->block_size > NVME_PAGE_SIZE) {
273 /* If we see devices that trigger this path, we need to increase our
274 buffer size. */
275 warn_internalerror();
276 goto free_buffer;
277 }
278
279 ns->drive.cntl_id = ns - ctrl->ns;
280 ns->drive.removable = 0;
281 ns->drive.type = DTYPE_NVME;
282 ns->drive.blksize = ns->block_size;
283 ns->drive.sectors = ns->lba_count;
284
Alexander Grafb68f3132020-09-30 23:10:53 +0200285 if (mdts) {
286 ns->max_req_size = ((1U << mdts) * NVME_PAGE_SIZE) / ns->block_size;
287 dprintf(3, "NVME NS %u max request size: %d sectors\n",
288 ns_id, ns->max_req_size);
289 } else {
290 ns->max_req_size = -1U;
291 }
292
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100293 ns->dma_buffer = zalloc_page_aligned(&ZoneHigh, NVME_PAGE_SIZE);
294
295 char *desc = znprintf(MAXDESCSIZE, "NVMe NS %u: %llu MiB (%llu %u-byte "
296 "blocks + %u-byte metadata)\n",
297 ns_id, (ns->lba_count * ns->block_size) >> 20,
298 ns->lba_count, ns->block_size, ns->metadata_size);
299
300 dprintf(3, "%s", desc);
301 boot_add_hd(&ns->drive, desc, bootprio_find_pci_device(ctrl->pci));
302
Julian Stecklinaf7036042017-10-03 15:47:17 +0200303free_buffer:
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100304 free (id);
Julian Stecklinaf7036042017-10-03 15:47:17 +0200305}
306
307
308/* Release memory allocated for a completion queue */
309static void
310nvme_destroy_cq(struct nvme_cq *cq)
311{
312 free(cq->cqe);
313 cq->cqe = NULL;
314}
315
316/* Release memory allocated for a submission queue */
317static void
318nvme_destroy_sq(struct nvme_sq *sq)
319{
320 free(sq->sqe);
321 sq->sqe = NULL;
322}
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100323
324/* Returns 0 on success. */
325static int
326nvme_create_io_cq(struct nvme_ctrl *ctrl, struct nvme_cq *cq, u16 q_idx)
327{
Julian Stecklinaf7036042017-10-03 15:47:17 +0200328 int rc;
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100329 struct nvme_sqe *cmd_create_cq;
Matt DeVillier79619172018-08-21 10:00:53 -0500330 u32 length = 1 + (ctrl->reg->cap & 0xffff);
Filippo Sironicd471722017-10-12 00:42:34 +0200331 if (length > NVME_PAGE_SIZE / sizeof(struct nvme_cqe))
332 length = NVME_PAGE_SIZE / sizeof(struct nvme_cqe);
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100333
Filippo Sironicd471722017-10-12 00:42:34 +0200334 rc = nvme_init_cq(ctrl, cq, q_idx, length);
Julian Stecklinaf7036042017-10-03 15:47:17 +0200335 if (rc) {
336 goto err;
337 }
338
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100339 cmd_create_cq = nvme_get_next_sqe(&ctrl->admin_sq,
340 NVME_SQE_OPC_ADMIN_CREATE_IO_CQ, NULL,
341 cq->cqe);
342 if (!cmd_create_cq) {
Julian Stecklinaf7036042017-10-03 15:47:17 +0200343 goto err_destroy_cq;
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100344 }
345
346 cmd_create_cq->dword[10] = (cq->common.mask << 16) | (q_idx >> 1);
347 cmd_create_cq->dword[11] = 1 /* physically contiguous */;
348
349 nvme_commit_sqe(&ctrl->admin_sq);
350
351 struct nvme_cqe cqe = nvme_wait(&ctrl->admin_sq);
352
353 if (!nvme_is_cqe_success(&cqe)) {
354 dprintf(2, "create io cq failed: %08x %08x %08x %08x\n",
355 cqe.dword[0], cqe.dword[1], cqe.dword[2], cqe.dword[3]);
356
Julian Stecklinaf7036042017-10-03 15:47:17 +0200357 goto err_destroy_cq;
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100358 }
359
360 return 0;
Julian Stecklinaf7036042017-10-03 15:47:17 +0200361
362err_destroy_cq:
363 nvme_destroy_cq(cq);
364err:
365 return -1;
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100366}
367
368/* Returns 0 on success. */
369static int
370nvme_create_io_sq(struct nvme_ctrl *ctrl, struct nvme_sq *sq, u16 q_idx, struct nvme_cq *cq)
371{
Julian Stecklinaf7036042017-10-03 15:47:17 +0200372 int rc;
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100373 struct nvme_sqe *cmd_create_sq;
Matt DeVillier79619172018-08-21 10:00:53 -0500374 u32 length = 1 + (ctrl->reg->cap & 0xffff);
Filippo Sironicd471722017-10-12 00:42:34 +0200375 if (length > NVME_PAGE_SIZE / sizeof(struct nvme_cqe))
376 length = NVME_PAGE_SIZE / sizeof(struct nvme_cqe);
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100377
Filippo Sironicd471722017-10-12 00:42:34 +0200378 rc = nvme_init_sq(ctrl, sq, q_idx, length, cq);
Julian Stecklinaf7036042017-10-03 15:47:17 +0200379 if (rc) {
380 goto err;
381 }
382
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100383 cmd_create_sq = nvme_get_next_sqe(&ctrl->admin_sq,
384 NVME_SQE_OPC_ADMIN_CREATE_IO_SQ, NULL,
385 sq->sqe);
386 if (!cmd_create_sq) {
Julian Stecklinaf7036042017-10-03 15:47:17 +0200387 goto err_destroy_sq;
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100388 }
389
390 cmd_create_sq->dword[10] = (sq->common.mask << 16) | (q_idx >> 1);
391 cmd_create_sq->dword[11] = (q_idx >> 1) << 16 | 1 /* contiguous */;
392 dprintf(3, "sq %p create dword10 %08x dword11 %08x\n", sq,
393 cmd_create_sq->dword[10], cmd_create_sq->dword[11]);
394
395 nvme_commit_sqe(&ctrl->admin_sq);
396
397 struct nvme_cqe cqe = nvme_wait(&ctrl->admin_sq);
398
399 if (!nvme_is_cqe_success(&cqe)) {
400 dprintf(2, "create io sq failed: %08x %08x %08x %08x\n",
401 cqe.dword[0], cqe.dword[1], cqe.dword[2], cqe.dword[3]);
Julian Stecklinaf7036042017-10-03 15:47:17 +0200402 goto err_destroy_sq;
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100403 }
404
405 return 0;
Julian Stecklinaf7036042017-10-03 15:47:17 +0200406
407err_destroy_sq:
408 nvme_destroy_sq(sq);
409err:
410 return -1;
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100411}
412
413/* Reads count sectors into buf. Returns DISK_RET_*. The buffer cannot cross
414 page boundaries. */
415static int
416nvme_io_readwrite(struct nvme_namespace *ns, u64 lba, char *buf, u16 count,
417 int write)
418{
419 u32 buf_addr = (u32)buf;
420
421 if ((buf_addr & 0x3) ||
422 ((buf_addr & ~(NVME_PAGE_SIZE - 1)) !=
423 ((buf_addr + ns->block_size * count - 1) & ~(NVME_PAGE_SIZE - 1)))) {
424 /* Buffer is misaligned or crosses page boundary */
425 warn_internalerror();
426 return DISK_RET_EBADTRACK;
427 }
428
429 struct nvme_sqe *io_read = nvme_get_next_sqe(&ns->ctrl->io_sq,
430 write ? NVME_SQE_OPC_IO_WRITE
431 : NVME_SQE_OPC_IO_READ,
432 NULL, buf);
433 io_read->nsid = ns->ns_id;
434 io_read->dword[10] = (u32)lba;
435 io_read->dword[11] = (u32)(lba >> 32);
436 io_read->dword[12] = (1U << 31 /* limited retry */) | (count - 1);
437
438 nvme_commit_sqe(&ns->ctrl->io_sq);
439
440 struct nvme_cqe cqe = nvme_wait(&ns->ctrl->io_sq);
441
442 if (!nvme_is_cqe_success(&cqe)) {
443 dprintf(2, "read io: %08x %08x %08x %08x\n",
444 cqe.dword[0], cqe.dword[1], cqe.dword[2], cqe.dword[3]);
445
446 return DISK_RET_EBADTRACK;
447 }
448
449 return DISK_RET_SUCCESS;
450}
451
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100452static int
453nvme_create_io_queues(struct nvme_ctrl *ctrl)
454{
455 if (nvme_create_io_cq(ctrl, &ctrl->io_cq, 3))
Julian Stecklinaf7036042017-10-03 15:47:17 +0200456 goto err;
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100457
458 if (nvme_create_io_sq(ctrl, &ctrl->io_sq, 2, &ctrl->io_cq))
Julian Stecklinaf7036042017-10-03 15:47:17 +0200459 goto err_free_cq;
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100460
461 return 0;
Julian Stecklinaf7036042017-10-03 15:47:17 +0200462
463 err_free_cq:
464 nvme_destroy_cq(&ctrl->io_cq);
465 err:
466 return -1;
467}
468
469static void
470nvme_destroy_io_queues(struct nvme_ctrl *ctrl)
471{
472 nvme_destroy_sq(&ctrl->io_sq);
473 nvme_destroy_cq(&ctrl->io_cq);
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100474}
475
476/* Waits for CSTS.RDY to match rdy. Returns 0 on success. */
477static int
478nvme_wait_csts_rdy(struct nvme_ctrl *ctrl, unsigned rdy)
479{
480 u32 const max_to = 500 /* ms */ * ((ctrl->reg->cap >> 24) & 0xFFU);
481 u32 to = timer_calc(max_to);
482 u32 csts;
483
484 while (rdy != ((csts = ctrl->reg->csts) & NVME_CSTS_RDY)) {
485 yield();
486
487 if (csts & NVME_CSTS_FATAL) {
488 dprintf(3, "NVMe fatal error during controller shutdown\n");
489 return -1;
490 }
491
492 if (timer_check(to)) {
493 warn_timeout();
494 return -1;
495 }
496 }
497
498 return 0;
499}
500
501/* Returns 0 on success. */
502static int
503nvme_controller_enable(struct nvme_ctrl *ctrl)
504{
Julian Stecklinaf7036042017-10-03 15:47:17 +0200505 int rc;
506
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100507 pci_enable_busmaster(ctrl->pci);
508
509 /* Turn the controller off. */
510 ctrl->reg->cc = 0;
511 if (nvme_wait_csts_rdy(ctrl, 0)) {
512 dprintf(2, "NVMe fatal error during controller shutdown\n");
513 return -1;
514 }
515
516 ctrl->doorbell_stride = 4U << ((ctrl->reg->cap >> 32) & 0xF);
517
Julian Stecklinaf7036042017-10-03 15:47:17 +0200518 rc = nvme_init_cq(ctrl, &ctrl->admin_cq, 1,
519 NVME_PAGE_SIZE / sizeof(struct nvme_cqe));
520 if (rc) {
521 return -1;
522 }
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100523
Julian Stecklinaf7036042017-10-03 15:47:17 +0200524 rc = nvme_init_sq(ctrl, &ctrl->admin_sq, 0,
525 NVME_PAGE_SIZE / sizeof(struct nvme_sqe), &ctrl->admin_cq);
526 if (rc) {
527 goto err_destroy_admin_cq;
528 }
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100529
530 ctrl->reg->aqa = ctrl->admin_cq.common.mask << 16
531 | ctrl->admin_sq.common.mask;
532
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100533 ctrl->reg->asq = (u32)ctrl->admin_sq.sqe;
534 ctrl->reg->acq = (u32)ctrl->admin_cq.cqe;
535
536 dprintf(3, " admin submission queue: %p\n", ctrl->admin_sq.sqe);
537 dprintf(3, " admin completion queue: %p\n", ctrl->admin_cq.cqe);
538
539 ctrl->reg->cc = NVME_CC_EN | (NVME_CQE_SIZE_LOG << 20)
540 | (NVME_SQE_SIZE_LOG << 16 /* IOSQES */);
541
542 if (nvme_wait_csts_rdy(ctrl, 1)) {
543 dprintf(2, "NVMe fatal error while enabling controller\n");
Julian Stecklinaf7036042017-10-03 15:47:17 +0200544 goto err_destroy_admin_sq;
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100545 }
Julian Stecklinaf7036042017-10-03 15:47:17 +0200546
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100547 /* The admin queue is set up and the controller is ready. Let's figure out
548 what namespaces we have. */
549
550 struct nvme_identify_ctrl *identify = nvme_admin_identify_ctrl(ctrl);
551
552 if (!identify) {
553 dprintf(2, "NVMe couldn't identify controller.\n");
Julian Stecklinaf7036042017-10-03 15:47:17 +0200554 goto err_destroy_admin_sq;
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100555 }
556
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100557 dprintf(3, "NVMe has %u namespace%s.\n",
558 identify->nn, (identify->nn == 1) ? "" : "s");
559
560 ctrl->ns_count = identify->nn;
561 free(identify);
562
563 if ((ctrl->ns_count == 0) || nvme_create_io_queues(ctrl)) {
564 /* No point to continue, if the controller says it doesn't have
565 namespaces or we couldn't create I/O queues. */
Julian Stecklinaf7036042017-10-03 15:47:17 +0200566 goto err_destroy_admin_sq;
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100567 }
568
569 ctrl->ns = malloc_fseg(sizeof(*ctrl->ns) * ctrl->ns_count);
Julian Stecklinaf7036042017-10-03 15:47:17 +0200570 if (!ctrl->ns) {
571 warn_noalloc();
572 goto err_destroy_ioq;
573 }
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100574 memset(ctrl->ns, 0, sizeof(*ctrl->ns) * ctrl->ns_count);
575
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100576 /* Populate namespace IDs */
577 int ns_idx;
Daniel Verkampf21e3042017-02-23 23:27:53 -0700578 for (ns_idx = 0; ns_idx < ctrl->ns_count; ns_idx++) {
Alexander Grafb68f3132020-09-30 23:10:53 +0200579 nvme_probe_ns(ctrl, &ctrl->ns[ns_idx], ns_idx + 1, identify->mdts);
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100580 }
581
582 dprintf(3, "NVMe initialization complete!\n");
583 return 0;
584
Julian Stecklinaf7036042017-10-03 15:47:17 +0200585 err_destroy_ioq:
586 nvme_destroy_io_queues(ctrl);
587 err_destroy_admin_sq:
588 nvme_destroy_sq(&ctrl->admin_sq);
589 err_destroy_admin_cq:
590 nvme_destroy_cq(&ctrl->admin_cq);
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100591 return -1;
592}
593
594/* Initialize an NVMe controller and detect its drives. */
595static void
596nvme_controller_setup(void *opaque)
597{
Gerd Hoffmann76551852020-01-14 10:12:01 +0100598 u8 skip_nonbootable = is_bootprio_strict();
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100599 struct pci_device *pci = opaque;
600
Gerd Hoffmann76551852020-01-14 10:12:01 +0100601 if (skip_nonbootable && bootprio_find_pci_device(pci) < 0) {
602 dprintf(1, "skipping init of a non-bootable NVMe at %pP\n",
603 pci);
604 goto err;
605 }
606
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100607 struct nvme_reg volatile *reg = pci_enable_membar(pci, PCI_BASE_ADDRESS_0);
608 if (!reg)
609 return;
610
611 u32 version = reg->vs;
612 dprintf(3, "Found NVMe controller with version %u.%u.%u.\n",
613 version >> 16, (version >> 8) & 0xFF, version & 0xFF);
614 dprintf(3, " Capabilities %016llx\n", reg->cap);
615
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100616 if (~reg->cap & NVME_CAP_CSS_NVME) {
617 dprintf(3, "Controller doesn't speak NVMe command set. Skipping.\n");
Julian Stecklinaf7036042017-10-03 15:47:17 +0200618 goto err;
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100619 }
620
621 struct nvme_ctrl *ctrl = malloc_high(sizeof(*ctrl));
622 if (!ctrl) {
623 warn_noalloc();
Julian Stecklinaf7036042017-10-03 15:47:17 +0200624 goto err;
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100625 }
626
627 memset(ctrl, 0, sizeof(*ctrl));
628
629 ctrl->reg = reg;
630 ctrl->pci = pci;
631
632 if (nvme_controller_enable(ctrl)) {
Julian Stecklinaf7036042017-10-03 15:47:17 +0200633 goto err_free_ctrl;
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100634 }
Julian Stecklinaf7036042017-10-03 15:47:17 +0200635
636 return;
637
638 err_free_ctrl:
639 free(ctrl);
640 err:
641 dprintf(2, "Failed to enable NVMe controller.\n");
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100642}
643
644// Locate and init NVMe controllers
645static void
646nvme_scan(void)
647{
Daniel Verkamp1415d462017-02-23 23:27:57 -0700648 // Scan PCI bus for NVMe adapters
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100649 struct pci_device *pci;
650
651 foreachpci(pci) {
652 if (pci->class != PCI_CLASS_STORAGE_NVME)
653 continue;
654 if (pci->prog_if != 2 /* as of NVM 1.0e */) {
655 dprintf(3, "Found incompatble NVMe: prog-if=%02x\n", pci->prog_if);
656 continue;
657 }
658
659 run_thread(nvme_controller_setup, pci);
660 }
661}
662
663static int
664nvme_cmd_readwrite(struct nvme_namespace *ns, struct disk_op_s *op, int write)
665{
666 int res = DISK_RET_SUCCESS;
667 u16 const max_blocks = NVME_PAGE_SIZE / ns->block_size;
668 u16 i;
669
Daniel Verkamp2e82b462017-02-23 23:27:55 -0700670 for (i = 0; i < op->count && res == DISK_RET_SUCCESS;) {
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100671 u16 blocks_remaining = op->count - i;
672 u16 blocks = blocks_remaining < max_blocks ? blocks_remaining
673 : max_blocks;
674 char *op_buf = op->buf_fl + i * ns->block_size;
675
676 if (write) {
677 memcpy(ns->dma_buffer, op_buf, blocks * ns->block_size);
678 }
679
680 res = nvme_io_readwrite(ns, op->lba + i, ns->dma_buffer, blocks, write);
Paul Menzeld9c812d2020-07-27 14:59:42 +0200681 dprintf(5, "ns %u %s lba %llu+%u: %d\n", ns->ns_id, write ? "write"
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100682 : "read",
683 op->lba + i, blocks, res);
684
685 if (!write && res == DISK_RET_SUCCESS) {
686 memcpy(op_buf, ns->dma_buffer, blocks * ns->block_size);
687 }
688
689 i += blocks;
690 }
691
692 return res;
693}
694
695int
696nvme_process_op(struct disk_op_s *op)
697{
Youness Alaoui7759d3a2017-06-12 21:09:07 -0400698 if (!CONFIG_NVME)
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100699 return DISK_RET_SUCCESS;
700
Kevin O'Connore5a0b612017-07-11 12:24:50 -0400701 struct nvme_namespace *ns = container_of(op->drive_fl, struct nvme_namespace,
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100702 drive);
703
704 switch (op->command) {
705 case CMD_READ:
706 case CMD_WRITE:
707 return nvme_cmd_readwrite(ns, op, op->command == CMD_WRITE);
708 default:
709 return default_process_op(op);
710 }
711}
712
713void
714nvme_setup(void)
715{
716 ASSERT32FLAT();
Kevin O'Connor235a8192017-05-10 16:14:39 -0400717 if (!CONFIG_NVME)
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100718 return;
719
720 dprintf(3, "init nvme\n");
721 nvme_scan();
722}
723
724/* EOF */