blob: 41f3b768528e2736ab53453a9cea56c70c8141e1 [file] [log] [blame]
Julian Stecklinac83e15b2017-02-13 10:03:59 +01001// Low level NVMe disk access
2//
3// Copyright 2017 Amazon.com, Inc. or its affiliates.
4//
5// This file may be distributed under the terms of the GNU LGPLv3 license.
6
7#include "blockcmd.h"
Julian Stecklinac83e15b2017-02-13 10:03:59 +01008#include "malloc.h" // malloc_high
9#include "output.h" // dprintf
10#include "pci.h"
11#include "pci_ids.h" // PCI_CLASS_STORAGE_NVME
12#include "pci_regs.h" // PCI_BASE_ADDRESS_0
13#include "pcidevice.h" // foreachpci
14#include "stacks.h" // yield
15#include "std/disk.h" // DISK_RET_
16#include "string.h" // memset
17#include "util.h" // boot_add_hd
18#include "x86.h" // readl
19
20#include "nvme.h"
21#include "nvme-int.h"
22
23static void *
24zalloc_page_aligned(struct zone_s *zone, u32 size)
25{
26 void *res = _malloc(zone, size, NVME_PAGE_SIZE);
27 if (res) memset(res, 0, size);
28 return res;
29}
30
31static void
32nvme_init_queue_common(struct nvme_ctrl *ctrl, struct nvme_queue *q, u16 q_idx,
33 u16 length)
34{
35 memset(q, 0, sizeof(*q));
36 q->dbl = (u32 *)((char *)ctrl->reg + 0x1000 + q_idx * ctrl->doorbell_stride);
37 dprintf(3, " q %p q_idx %u dbl %p\n", q, q_idx, q->dbl);
38 q->mask = length - 1;
39}
40
Julian Stecklinaf7036042017-10-03 15:47:17 +020041static int
Julian Stecklinac83e15b2017-02-13 10:03:59 +010042nvme_init_sq(struct nvme_ctrl *ctrl, struct nvme_sq *sq, u16 q_idx, u16 length,
43 struct nvme_cq *cq)
44{
45 nvme_init_queue_common(ctrl, &sq->common, q_idx, length);
46 sq->sqe = zalloc_page_aligned(&ZoneHigh, sizeof(*sq->sqe) * length);
Julian Stecklinaf7036042017-10-03 15:47:17 +020047
48 if (!sq->sqe) {
49 warn_noalloc();
50 return -1;
51 }
52
Julian Stecklinac83e15b2017-02-13 10:03:59 +010053 dprintf(3, "sq %p q_idx %u sqe %p\n", sq, q_idx, sq->sqe);
54 sq->cq = cq;
55 sq->head = 0;
56 sq->tail = 0;
Julian Stecklinaf7036042017-10-03 15:47:17 +020057
58 return 0;
Julian Stecklinac83e15b2017-02-13 10:03:59 +010059}
60
Julian Stecklinaf7036042017-10-03 15:47:17 +020061static int
Julian Stecklinac83e15b2017-02-13 10:03:59 +010062nvme_init_cq(struct nvme_ctrl *ctrl, struct nvme_cq *cq, u16 q_idx, u16 length)
63{
64 nvme_init_queue_common(ctrl, &cq->common, q_idx, length);
65 cq->cqe = zalloc_page_aligned(&ZoneHigh, sizeof(*cq->cqe) * length);
Julian Stecklinaf7036042017-10-03 15:47:17 +020066 if (!cq->cqe) {
67 warn_noalloc();
68 return -1;
69 }
Julian Stecklinac83e15b2017-02-13 10:03:59 +010070
71 cq->head = 0;
72
73 /* All CQE phase bits are initialized to zero. This means initially we wait
74 for the host controller to set these to 1. */
75 cq->phase = 1;
Julian Stecklinaf7036042017-10-03 15:47:17 +020076
77 return 0;
Julian Stecklinac83e15b2017-02-13 10:03:59 +010078}
79
80static int
81nvme_poll_cq(struct nvme_cq *cq)
82{
83 u32 dw3 = readl(&cq->cqe[cq->head].dword[3]);
84 return (!!(dw3 & NVME_CQE_DW3_P) == cq->phase);
85}
86
87static int
88nvme_is_cqe_success(struct nvme_cqe const *cqe)
89{
Daniel Verkampd8a6c842017-02-23 23:27:56 -070090 return ((cqe->status >> 1) & 0xFF) == 0;
Julian Stecklinac83e15b2017-02-13 10:03:59 +010091}
92
Julian Stecklinac83e15b2017-02-13 10:03:59 +010093static struct nvme_cqe
94nvme_error_cqe(void)
95{
96 struct nvme_cqe r;
97
98 /* 0xFF is a vendor specific status code != success. Should be okay for
99 indicating failure. */
100 memset(&r, 0xFF, sizeof(r));
101 return r;
102}
103
104static struct nvme_cqe
105nvme_consume_cqe(struct nvme_sq *sq)
106{
107 struct nvme_cq *cq = sq->cq;
108
109 if (!nvme_poll_cq(cq)) {
110 /* Cannot consume a completion queue entry, if there is none ready. */
111 return nvme_error_cqe();
112 }
113
114 struct nvme_cqe *cqe = &cq->cqe[cq->head];
115 u16 cq_next_head = (cq->head + 1) & cq->common.mask;
116 dprintf(4, "cq %p head %u -> %u\n", cq, cq->head, cq_next_head);
117 if (cq_next_head < cq->head) {
118 dprintf(3, "cq %p wrap\n", cq);
119 cq->phase = ~cq->phase;
120 }
121 cq->head = cq_next_head;
122
123 /* Update the submission queue head. */
124 if (cqe->sq_head != sq->head) {
125 sq->head = cqe->sq_head;
126 dprintf(4, "sq %p advanced to %u\n", sq, cqe->sq_head);
127 }
128
129 /* Tell the controller that we consumed the completion. */
130 writel(cq->common.dbl, cq->head);
131
132 return *cqe;
133}
134
135static struct nvme_cqe
136nvme_wait(struct nvme_sq *sq)
137{
Daniel Verkamp21de72f2017-02-23 23:27:54 -0700138 static const unsigned nvme_timeout = 5000 /* ms */;
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100139 u32 to = timer_calc(nvme_timeout);
140 while (!nvme_poll_cq(sq->cq)) {
141 yield();
142
143 if (timer_check(to)) {
144 warn_timeout();
145 return nvme_error_cqe();
146 }
147 }
148
149 return nvme_consume_cqe(sq);
150}
151
152/* Returns the next submission queue entry (or NULL if the queue is full). It
153 also fills out Command Dword 0 and clears the rest. */
154static struct nvme_sqe *
155nvme_get_next_sqe(struct nvme_sq *sq, u8 opc, void *metadata, void *data)
156{
157 if (((sq->head + 1) & sq->common.mask) == sq->tail) {
158 dprintf(3, "submission queue is full");
159 return NULL;
160 }
161
162 struct nvme_sqe *sqe = &sq->sqe[sq->tail];
163 dprintf(4, "sq %p next_sqe %u\n", sq, sq->tail);
164
165 memset(sqe, 0, sizeof(*sqe));
166 sqe->cdw0 = opc | (sq->tail << 16 /* CID */);
167 sqe->mptr = (u32)metadata;
168 sqe->dptr_prp1 = (u32)data;
169
170 if (sqe->dptr_prp1 & (NVME_PAGE_SIZE - 1)) {
171 /* Data buffer not page aligned. */
172 warn_internalerror();
173 }
174
175 return sqe;
176}
177
178/* Call this after you've filled out an sqe that you've got from nvme_get_next_sqe. */
179static void
180nvme_commit_sqe(struct nvme_sq *sq)
181{
182 dprintf(4, "sq %p commit_sqe %u\n", sq, sq->tail);
183 sq->tail = (sq->tail + 1) & sq->common.mask;
184 writel(sq->common.dbl, sq->tail);
185}
186
187/* Perform an identify command on the admin queue and return the resulting
188 buffer. This may be a NULL pointer, if something failed. This function
189 cannot be used after initialization, because it uses buffers in tmp zone. */
190static union nvme_identify *
191nvme_admin_identify(struct nvme_ctrl *ctrl, u8 cns, u32 nsid)
192{
193 union nvme_identify *identify_buf = zalloc_page_aligned(&ZoneTmpHigh, 4096);
194 if (!identify_buf) {
195 /* Could not allocate identify buffer. */
196 warn_internalerror();
197 return NULL;
198 }
199
200 struct nvme_sqe *cmd_identify;
201 cmd_identify = nvme_get_next_sqe(&ctrl->admin_sq,
202 NVME_SQE_OPC_ADMIN_IDENTIFY, NULL,
203 identify_buf);
204
205 if (!cmd_identify) {
206 warn_internalerror();
207 goto error;
208 }
209
210 cmd_identify->nsid = nsid;
211 cmd_identify->dword[10] = cns;
212
213 nvme_commit_sqe(&ctrl->admin_sq);
214
215 struct nvme_cqe cqe = nvme_wait(&ctrl->admin_sq);
216
217 if (!nvme_is_cqe_success(&cqe)) {
218 goto error;
219 }
220
221 return identify_buf;
222 error:
223 free(identify_buf);
224 return NULL;
225}
226
227static struct nvme_identify_ctrl *
228nvme_admin_identify_ctrl(struct nvme_ctrl *ctrl)
229{
230 return &nvme_admin_identify(ctrl, NVME_ADMIN_IDENTIFY_CNS_ID_CTRL, 0)->ctrl;
231}
232
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100233static struct nvme_identify_ns *
234nvme_admin_identify_ns(struct nvme_ctrl *ctrl, u32 ns_id)
235{
236 return &nvme_admin_identify(ctrl, NVME_ADMIN_IDENTIFY_CNS_ID_NS,
237 ns_id)->ns;
238}
239
240static void
241nvme_probe_ns(struct nvme_ctrl *ctrl, struct nvme_namespace *ns, u32 ns_id)
242{
243 ns->ctrl = ctrl;
244 ns->ns_id = ns_id;
245
246 struct nvme_identify_ns *id = nvme_admin_identify_ns(ctrl, ns_id);
247 if (!id) {
248 dprintf(2, "NVMe couldn't identify namespace %u.\n", ns_id);
249 goto free_buffer;
250 }
251
252 u8 current_lba_format = id->flbas & 0xF;
253 if (current_lba_format > id->nlbaf) {
254 dprintf(2, "NVMe NS %u: current LBA format %u is beyond what the "
255 " namespace supports (%u)?\n",
256 ns_id, current_lba_format, id->nlbaf + 1);
257 goto free_buffer;
258 }
259
260 ns->lba_count = id->nsze;
Daniel Verkampf21e3042017-02-23 23:27:53 -0700261 if (!ns->lba_count) {
262 dprintf(2, "NVMe NS %u is inactive.\n", ns_id);
263 goto free_buffer;
264 }
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100265
266 struct nvme_lba_format *fmt = &id->lbaf[current_lba_format];
267
268 ns->block_size = 1U << fmt->lbads;
269 ns->metadata_size = fmt->ms;
270
271 if (ns->block_size > NVME_PAGE_SIZE) {
272 /* If we see devices that trigger this path, we need to increase our
273 buffer size. */
274 warn_internalerror();
275 goto free_buffer;
276 }
277
278 ns->drive.cntl_id = ns - ctrl->ns;
279 ns->drive.removable = 0;
280 ns->drive.type = DTYPE_NVME;
281 ns->drive.blksize = ns->block_size;
282 ns->drive.sectors = ns->lba_count;
283
284 ns->dma_buffer = zalloc_page_aligned(&ZoneHigh, NVME_PAGE_SIZE);
285
286 char *desc = znprintf(MAXDESCSIZE, "NVMe NS %u: %llu MiB (%llu %u-byte "
287 "blocks + %u-byte metadata)\n",
288 ns_id, (ns->lba_count * ns->block_size) >> 20,
289 ns->lba_count, ns->block_size, ns->metadata_size);
290
291 dprintf(3, "%s", desc);
292 boot_add_hd(&ns->drive, desc, bootprio_find_pci_device(ctrl->pci));
293
Julian Stecklinaf7036042017-10-03 15:47:17 +0200294free_buffer:
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100295 free (id);
Julian Stecklinaf7036042017-10-03 15:47:17 +0200296}
297
298
299/* Release memory allocated for a completion queue */
300static void
301nvme_destroy_cq(struct nvme_cq *cq)
302{
303 free(cq->cqe);
304 cq->cqe = NULL;
305}
306
307/* Release memory allocated for a submission queue */
308static void
309nvme_destroy_sq(struct nvme_sq *sq)
310{
311 free(sq->sqe);
312 sq->sqe = NULL;
313}
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100314
315/* Returns 0 on success. */
316static int
317nvme_create_io_cq(struct nvme_ctrl *ctrl, struct nvme_cq *cq, u16 q_idx)
318{
Julian Stecklinaf7036042017-10-03 15:47:17 +0200319 int rc;
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100320 struct nvme_sqe *cmd_create_cq;
Matt DeVillier79619172018-08-21 10:00:53 -0500321 u32 length = 1 + (ctrl->reg->cap & 0xffff);
Filippo Sironicd471722017-10-12 00:42:34 +0200322 if (length > NVME_PAGE_SIZE / sizeof(struct nvme_cqe))
323 length = NVME_PAGE_SIZE / sizeof(struct nvme_cqe);
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100324
Filippo Sironicd471722017-10-12 00:42:34 +0200325 rc = nvme_init_cq(ctrl, cq, q_idx, length);
Julian Stecklinaf7036042017-10-03 15:47:17 +0200326 if (rc) {
327 goto err;
328 }
329
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100330 cmd_create_cq = nvme_get_next_sqe(&ctrl->admin_sq,
331 NVME_SQE_OPC_ADMIN_CREATE_IO_CQ, NULL,
332 cq->cqe);
333 if (!cmd_create_cq) {
Julian Stecklinaf7036042017-10-03 15:47:17 +0200334 goto err_destroy_cq;
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100335 }
336
337 cmd_create_cq->dword[10] = (cq->common.mask << 16) | (q_idx >> 1);
338 cmd_create_cq->dword[11] = 1 /* physically contiguous */;
339
340 nvme_commit_sqe(&ctrl->admin_sq);
341
342 struct nvme_cqe cqe = nvme_wait(&ctrl->admin_sq);
343
344 if (!nvme_is_cqe_success(&cqe)) {
345 dprintf(2, "create io cq failed: %08x %08x %08x %08x\n",
346 cqe.dword[0], cqe.dword[1], cqe.dword[2], cqe.dword[3]);
347
Julian Stecklinaf7036042017-10-03 15:47:17 +0200348 goto err_destroy_cq;
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100349 }
350
351 return 0;
Julian Stecklinaf7036042017-10-03 15:47:17 +0200352
353err_destroy_cq:
354 nvme_destroy_cq(cq);
355err:
356 return -1;
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100357}
358
359/* Returns 0 on success. */
360static int
361nvme_create_io_sq(struct nvme_ctrl *ctrl, struct nvme_sq *sq, u16 q_idx, struct nvme_cq *cq)
362{
Julian Stecklinaf7036042017-10-03 15:47:17 +0200363 int rc;
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100364 struct nvme_sqe *cmd_create_sq;
Matt DeVillier79619172018-08-21 10:00:53 -0500365 u32 length = 1 + (ctrl->reg->cap & 0xffff);
Filippo Sironicd471722017-10-12 00:42:34 +0200366 if (length > NVME_PAGE_SIZE / sizeof(struct nvme_cqe))
367 length = NVME_PAGE_SIZE / sizeof(struct nvme_cqe);
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100368
Filippo Sironicd471722017-10-12 00:42:34 +0200369 rc = nvme_init_sq(ctrl, sq, q_idx, length, cq);
Julian Stecklinaf7036042017-10-03 15:47:17 +0200370 if (rc) {
371 goto err;
372 }
373
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100374 cmd_create_sq = nvme_get_next_sqe(&ctrl->admin_sq,
375 NVME_SQE_OPC_ADMIN_CREATE_IO_SQ, NULL,
376 sq->sqe);
377 if (!cmd_create_sq) {
Julian Stecklinaf7036042017-10-03 15:47:17 +0200378 goto err_destroy_sq;
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100379 }
380
381 cmd_create_sq->dword[10] = (sq->common.mask << 16) | (q_idx >> 1);
382 cmd_create_sq->dword[11] = (q_idx >> 1) << 16 | 1 /* contiguous */;
383 dprintf(3, "sq %p create dword10 %08x dword11 %08x\n", sq,
384 cmd_create_sq->dword[10], cmd_create_sq->dword[11]);
385
386 nvme_commit_sqe(&ctrl->admin_sq);
387
388 struct nvme_cqe cqe = nvme_wait(&ctrl->admin_sq);
389
390 if (!nvme_is_cqe_success(&cqe)) {
391 dprintf(2, "create io sq failed: %08x %08x %08x %08x\n",
392 cqe.dword[0], cqe.dword[1], cqe.dword[2], cqe.dword[3]);
Julian Stecklinaf7036042017-10-03 15:47:17 +0200393 goto err_destroy_sq;
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100394 }
395
396 return 0;
Julian Stecklinaf7036042017-10-03 15:47:17 +0200397
398err_destroy_sq:
399 nvme_destroy_sq(sq);
400err:
401 return -1;
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100402}
403
404/* Reads count sectors into buf. Returns DISK_RET_*. The buffer cannot cross
405 page boundaries. */
406static int
407nvme_io_readwrite(struct nvme_namespace *ns, u64 lba, char *buf, u16 count,
408 int write)
409{
410 u32 buf_addr = (u32)buf;
411
412 if ((buf_addr & 0x3) ||
413 ((buf_addr & ~(NVME_PAGE_SIZE - 1)) !=
414 ((buf_addr + ns->block_size * count - 1) & ~(NVME_PAGE_SIZE - 1)))) {
415 /* Buffer is misaligned or crosses page boundary */
416 warn_internalerror();
417 return DISK_RET_EBADTRACK;
418 }
419
420 struct nvme_sqe *io_read = nvme_get_next_sqe(&ns->ctrl->io_sq,
421 write ? NVME_SQE_OPC_IO_WRITE
422 : NVME_SQE_OPC_IO_READ,
423 NULL, buf);
424 io_read->nsid = ns->ns_id;
425 io_read->dword[10] = (u32)lba;
426 io_read->dword[11] = (u32)(lba >> 32);
427 io_read->dword[12] = (1U << 31 /* limited retry */) | (count - 1);
428
429 nvme_commit_sqe(&ns->ctrl->io_sq);
430
431 struct nvme_cqe cqe = nvme_wait(&ns->ctrl->io_sq);
432
433 if (!nvme_is_cqe_success(&cqe)) {
434 dprintf(2, "read io: %08x %08x %08x %08x\n",
435 cqe.dword[0], cqe.dword[1], cqe.dword[2], cqe.dword[3]);
436
437 return DISK_RET_EBADTRACK;
438 }
439
440 return DISK_RET_SUCCESS;
441}
442
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100443static int
444nvme_create_io_queues(struct nvme_ctrl *ctrl)
445{
446 if (nvme_create_io_cq(ctrl, &ctrl->io_cq, 3))
Julian Stecklinaf7036042017-10-03 15:47:17 +0200447 goto err;
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100448
449 if (nvme_create_io_sq(ctrl, &ctrl->io_sq, 2, &ctrl->io_cq))
Julian Stecklinaf7036042017-10-03 15:47:17 +0200450 goto err_free_cq;
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100451
452 return 0;
Julian Stecklinaf7036042017-10-03 15:47:17 +0200453
454 err_free_cq:
455 nvme_destroy_cq(&ctrl->io_cq);
456 err:
457 return -1;
458}
459
460static void
461nvme_destroy_io_queues(struct nvme_ctrl *ctrl)
462{
463 nvme_destroy_sq(&ctrl->io_sq);
464 nvme_destroy_cq(&ctrl->io_cq);
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100465}
466
467/* Waits for CSTS.RDY to match rdy. Returns 0 on success. */
468static int
469nvme_wait_csts_rdy(struct nvme_ctrl *ctrl, unsigned rdy)
470{
471 u32 const max_to = 500 /* ms */ * ((ctrl->reg->cap >> 24) & 0xFFU);
472 u32 to = timer_calc(max_to);
473 u32 csts;
474
475 while (rdy != ((csts = ctrl->reg->csts) & NVME_CSTS_RDY)) {
476 yield();
477
478 if (csts & NVME_CSTS_FATAL) {
479 dprintf(3, "NVMe fatal error during controller shutdown\n");
480 return -1;
481 }
482
483 if (timer_check(to)) {
484 warn_timeout();
485 return -1;
486 }
487 }
488
489 return 0;
490}
491
492/* Returns 0 on success. */
493static int
494nvme_controller_enable(struct nvme_ctrl *ctrl)
495{
Julian Stecklinaf7036042017-10-03 15:47:17 +0200496 int rc;
497
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100498 pci_enable_busmaster(ctrl->pci);
499
500 /* Turn the controller off. */
501 ctrl->reg->cc = 0;
502 if (nvme_wait_csts_rdy(ctrl, 0)) {
503 dprintf(2, "NVMe fatal error during controller shutdown\n");
504 return -1;
505 }
506
507 ctrl->doorbell_stride = 4U << ((ctrl->reg->cap >> 32) & 0xF);
508
Julian Stecklinaf7036042017-10-03 15:47:17 +0200509 rc = nvme_init_cq(ctrl, &ctrl->admin_cq, 1,
510 NVME_PAGE_SIZE / sizeof(struct nvme_cqe));
511 if (rc) {
512 return -1;
513 }
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100514
Julian Stecklinaf7036042017-10-03 15:47:17 +0200515 rc = nvme_init_sq(ctrl, &ctrl->admin_sq, 0,
516 NVME_PAGE_SIZE / sizeof(struct nvme_sqe), &ctrl->admin_cq);
517 if (rc) {
518 goto err_destroy_admin_cq;
519 }
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100520
521 ctrl->reg->aqa = ctrl->admin_cq.common.mask << 16
522 | ctrl->admin_sq.common.mask;
523
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100524 ctrl->reg->asq = (u32)ctrl->admin_sq.sqe;
525 ctrl->reg->acq = (u32)ctrl->admin_cq.cqe;
526
527 dprintf(3, " admin submission queue: %p\n", ctrl->admin_sq.sqe);
528 dprintf(3, " admin completion queue: %p\n", ctrl->admin_cq.cqe);
529
530 ctrl->reg->cc = NVME_CC_EN | (NVME_CQE_SIZE_LOG << 20)
531 | (NVME_SQE_SIZE_LOG << 16 /* IOSQES */);
532
533 if (nvme_wait_csts_rdy(ctrl, 1)) {
534 dprintf(2, "NVMe fatal error while enabling controller\n");
Julian Stecklinaf7036042017-10-03 15:47:17 +0200535 goto err_destroy_admin_sq;
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100536 }
Julian Stecklinaf7036042017-10-03 15:47:17 +0200537
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100538 /* The admin queue is set up and the controller is ready. Let's figure out
539 what namespaces we have. */
540
541 struct nvme_identify_ctrl *identify = nvme_admin_identify_ctrl(ctrl);
542
543 if (!identify) {
544 dprintf(2, "NVMe couldn't identify controller.\n");
Julian Stecklinaf7036042017-10-03 15:47:17 +0200545 goto err_destroy_admin_sq;
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100546 }
547
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100548 dprintf(3, "NVMe has %u namespace%s.\n",
549 identify->nn, (identify->nn == 1) ? "" : "s");
550
551 ctrl->ns_count = identify->nn;
552 free(identify);
553
554 if ((ctrl->ns_count == 0) || nvme_create_io_queues(ctrl)) {
555 /* No point to continue, if the controller says it doesn't have
556 namespaces or we couldn't create I/O queues. */
Julian Stecklinaf7036042017-10-03 15:47:17 +0200557 goto err_destroy_admin_sq;
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100558 }
559
560 ctrl->ns = malloc_fseg(sizeof(*ctrl->ns) * ctrl->ns_count);
Julian Stecklinaf7036042017-10-03 15:47:17 +0200561 if (!ctrl->ns) {
562 warn_noalloc();
563 goto err_destroy_ioq;
564 }
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100565 memset(ctrl->ns, 0, sizeof(*ctrl->ns) * ctrl->ns_count);
566
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100567 /* Populate namespace IDs */
568 int ns_idx;
Daniel Verkampf21e3042017-02-23 23:27:53 -0700569 for (ns_idx = 0; ns_idx < ctrl->ns_count; ns_idx++) {
570 nvme_probe_ns(ctrl, &ctrl->ns[ns_idx], ns_idx + 1);
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100571 }
572
573 dprintf(3, "NVMe initialization complete!\n");
574 return 0;
575
Julian Stecklinaf7036042017-10-03 15:47:17 +0200576 err_destroy_ioq:
577 nvme_destroy_io_queues(ctrl);
578 err_destroy_admin_sq:
579 nvme_destroy_sq(&ctrl->admin_sq);
580 err_destroy_admin_cq:
581 nvme_destroy_cq(&ctrl->admin_cq);
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100582 return -1;
583}
584
585/* Initialize an NVMe controller and detect its drives. */
586static void
587nvme_controller_setup(void *opaque)
588{
Gerd Hoffmann76551852020-01-14 10:12:01 +0100589 u8 skip_nonbootable = is_bootprio_strict();
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100590 struct pci_device *pci = opaque;
591
Gerd Hoffmann76551852020-01-14 10:12:01 +0100592 if (skip_nonbootable && bootprio_find_pci_device(pci) < 0) {
593 dprintf(1, "skipping init of a non-bootable NVMe at %pP\n",
594 pci);
595 goto err;
596 }
597
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100598 struct nvme_reg volatile *reg = pci_enable_membar(pci, PCI_BASE_ADDRESS_0);
599 if (!reg)
600 return;
601
602 u32 version = reg->vs;
603 dprintf(3, "Found NVMe controller with version %u.%u.%u.\n",
604 version >> 16, (version >> 8) & 0xFF, version & 0xFF);
605 dprintf(3, " Capabilities %016llx\n", reg->cap);
606
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100607 if (~reg->cap & NVME_CAP_CSS_NVME) {
608 dprintf(3, "Controller doesn't speak NVMe command set. Skipping.\n");
Julian Stecklinaf7036042017-10-03 15:47:17 +0200609 goto err;
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100610 }
611
612 struct nvme_ctrl *ctrl = malloc_high(sizeof(*ctrl));
613 if (!ctrl) {
614 warn_noalloc();
Julian Stecklinaf7036042017-10-03 15:47:17 +0200615 goto err;
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100616 }
617
618 memset(ctrl, 0, sizeof(*ctrl));
619
620 ctrl->reg = reg;
621 ctrl->pci = pci;
622
623 if (nvme_controller_enable(ctrl)) {
Julian Stecklinaf7036042017-10-03 15:47:17 +0200624 goto err_free_ctrl;
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100625 }
Julian Stecklinaf7036042017-10-03 15:47:17 +0200626
627 return;
628
629 err_free_ctrl:
630 free(ctrl);
631 err:
632 dprintf(2, "Failed to enable NVMe controller.\n");
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100633}
634
635// Locate and init NVMe controllers
636static void
637nvme_scan(void)
638{
Daniel Verkamp1415d462017-02-23 23:27:57 -0700639 // Scan PCI bus for NVMe adapters
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100640 struct pci_device *pci;
641
642 foreachpci(pci) {
643 if (pci->class != PCI_CLASS_STORAGE_NVME)
644 continue;
645 if (pci->prog_if != 2 /* as of NVM 1.0e */) {
646 dprintf(3, "Found incompatble NVMe: prog-if=%02x\n", pci->prog_if);
647 continue;
648 }
649
650 run_thread(nvme_controller_setup, pci);
651 }
652}
653
654static int
655nvme_cmd_readwrite(struct nvme_namespace *ns, struct disk_op_s *op, int write)
656{
657 int res = DISK_RET_SUCCESS;
658 u16 const max_blocks = NVME_PAGE_SIZE / ns->block_size;
659 u16 i;
660
Daniel Verkamp2e82b462017-02-23 23:27:55 -0700661 for (i = 0; i < op->count && res == DISK_RET_SUCCESS;) {
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100662 u16 blocks_remaining = op->count - i;
663 u16 blocks = blocks_remaining < max_blocks ? blocks_remaining
664 : max_blocks;
665 char *op_buf = op->buf_fl + i * ns->block_size;
666
667 if (write) {
668 memcpy(ns->dma_buffer, op_buf, blocks * ns->block_size);
669 }
670
671 res = nvme_io_readwrite(ns, op->lba + i, ns->dma_buffer, blocks, write);
672 dprintf(3, "ns %u %s lba %llu+%u: %d\n", ns->ns_id, write ? "write"
673 : "read",
674 op->lba + i, blocks, res);
675
676 if (!write && res == DISK_RET_SUCCESS) {
677 memcpy(op_buf, ns->dma_buffer, blocks * ns->block_size);
678 }
679
680 i += blocks;
681 }
682
683 return res;
684}
685
686int
687nvme_process_op(struct disk_op_s *op)
688{
Youness Alaoui7759d3a2017-06-12 21:09:07 -0400689 if (!CONFIG_NVME)
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100690 return DISK_RET_SUCCESS;
691
Kevin O'Connore5a0b612017-07-11 12:24:50 -0400692 struct nvme_namespace *ns = container_of(op->drive_fl, struct nvme_namespace,
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100693 drive);
694
695 switch (op->command) {
696 case CMD_READ:
697 case CMD_WRITE:
698 return nvme_cmd_readwrite(ns, op, op->command == CMD_WRITE);
699 default:
700 return default_process_op(op);
701 }
702}
703
704void
705nvme_setup(void)
706{
707 ASSERT32FLAT();
Kevin O'Connor235a8192017-05-10 16:14:39 -0400708 if (!CONFIG_NVME)
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100709 return;
710
711 dprintf(3, "init nvme\n");
712 nvme_scan();
713}
714
715/* EOF */