blob: 946487f4fd43976f1c6b3075b18f34961e3c3b1f [file] [log] [blame]
Julian Stecklinac83e15b2017-02-13 10:03:59 +01001// Low level NVMe disk access
2//
3// Copyright 2017 Amazon.com, Inc. or its affiliates.
4//
5// This file may be distributed under the terms of the GNU LGPLv3 license.
6
7#include "blockcmd.h"
Julian Stecklinac83e15b2017-02-13 10:03:59 +01008#include "malloc.h" // malloc_high
9#include "output.h" // dprintf
10#include "pci.h"
11#include "pci_ids.h" // PCI_CLASS_STORAGE_NVME
12#include "pci_regs.h" // PCI_BASE_ADDRESS_0
13#include "pcidevice.h" // foreachpci
14#include "stacks.h" // yield
15#include "std/disk.h" // DISK_RET_
16#include "string.h" // memset
17#include "util.h" // boot_add_hd
18#include "x86.h" // readl
19
20#include "nvme.h"
21#include "nvme-int.h"
22
23static void *
24zalloc_page_aligned(struct zone_s *zone, u32 size)
25{
26 void *res = _malloc(zone, size, NVME_PAGE_SIZE);
27 if (res) memset(res, 0, size);
28 return res;
29}
30
31static void
32nvme_init_queue_common(struct nvme_ctrl *ctrl, struct nvme_queue *q, u16 q_idx,
33 u16 length)
34{
35 memset(q, 0, sizeof(*q));
36 q->dbl = (u32 *)((char *)ctrl->reg + 0x1000 + q_idx * ctrl->doorbell_stride);
37 dprintf(3, " q %p q_idx %u dbl %p\n", q, q_idx, q->dbl);
38 q->mask = length - 1;
39}
40
Julian Stecklinaf7036042017-10-03 15:47:17 +020041static int
Julian Stecklinac83e15b2017-02-13 10:03:59 +010042nvme_init_sq(struct nvme_ctrl *ctrl, struct nvme_sq *sq, u16 q_idx, u16 length,
43 struct nvme_cq *cq)
44{
45 nvme_init_queue_common(ctrl, &sq->common, q_idx, length);
46 sq->sqe = zalloc_page_aligned(&ZoneHigh, sizeof(*sq->sqe) * length);
Julian Stecklinaf7036042017-10-03 15:47:17 +020047
48 if (!sq->sqe) {
49 warn_noalloc();
50 return -1;
51 }
52
Julian Stecklinac83e15b2017-02-13 10:03:59 +010053 dprintf(3, "sq %p q_idx %u sqe %p\n", sq, q_idx, sq->sqe);
54 sq->cq = cq;
55 sq->head = 0;
56 sq->tail = 0;
Julian Stecklinaf7036042017-10-03 15:47:17 +020057
58 return 0;
Julian Stecklinac83e15b2017-02-13 10:03:59 +010059}
60
Julian Stecklinaf7036042017-10-03 15:47:17 +020061static int
Julian Stecklinac83e15b2017-02-13 10:03:59 +010062nvme_init_cq(struct nvme_ctrl *ctrl, struct nvme_cq *cq, u16 q_idx, u16 length)
63{
64 nvme_init_queue_common(ctrl, &cq->common, q_idx, length);
65 cq->cqe = zalloc_page_aligned(&ZoneHigh, sizeof(*cq->cqe) * length);
Julian Stecklinaf7036042017-10-03 15:47:17 +020066 if (!cq->cqe) {
67 warn_noalloc();
68 return -1;
69 }
Julian Stecklinac83e15b2017-02-13 10:03:59 +010070
71 cq->head = 0;
72
73 /* All CQE phase bits are initialized to zero. This means initially we wait
74 for the host controller to set these to 1. */
75 cq->phase = 1;
Julian Stecklinaf7036042017-10-03 15:47:17 +020076
77 return 0;
Julian Stecklinac83e15b2017-02-13 10:03:59 +010078}
79
80static int
81nvme_poll_cq(struct nvme_cq *cq)
82{
83 u32 dw3 = readl(&cq->cqe[cq->head].dword[3]);
84 return (!!(dw3 & NVME_CQE_DW3_P) == cq->phase);
85}
86
87static int
88nvme_is_cqe_success(struct nvme_cqe const *cqe)
89{
Daniel Verkampd8a6c842017-02-23 23:27:56 -070090 return ((cqe->status >> 1) & 0xFF) == 0;
Julian Stecklinac83e15b2017-02-13 10:03:59 +010091}
92
Julian Stecklinac83e15b2017-02-13 10:03:59 +010093static struct nvme_cqe
94nvme_error_cqe(void)
95{
96 struct nvme_cqe r;
97
98 /* 0xFF is a vendor specific status code != success. Should be okay for
99 indicating failure. */
100 memset(&r, 0xFF, sizeof(r));
101 return r;
102}
103
104static struct nvme_cqe
105nvme_consume_cqe(struct nvme_sq *sq)
106{
107 struct nvme_cq *cq = sq->cq;
108
109 if (!nvme_poll_cq(cq)) {
110 /* Cannot consume a completion queue entry, if there is none ready. */
111 return nvme_error_cqe();
112 }
113
114 struct nvme_cqe *cqe = &cq->cqe[cq->head];
115 u16 cq_next_head = (cq->head + 1) & cq->common.mask;
116 dprintf(4, "cq %p head %u -> %u\n", cq, cq->head, cq_next_head);
117 if (cq_next_head < cq->head) {
118 dprintf(3, "cq %p wrap\n", cq);
119 cq->phase = ~cq->phase;
120 }
121 cq->head = cq_next_head;
122
123 /* Update the submission queue head. */
124 if (cqe->sq_head != sq->head) {
125 sq->head = cqe->sq_head;
126 dprintf(4, "sq %p advanced to %u\n", sq, cqe->sq_head);
127 }
128
129 /* Tell the controller that we consumed the completion. */
130 writel(cq->common.dbl, cq->head);
131
132 return *cqe;
133}
134
135static struct nvme_cqe
136nvme_wait(struct nvme_sq *sq)
137{
Daniel Verkamp21de72f2017-02-23 23:27:54 -0700138 static const unsigned nvme_timeout = 5000 /* ms */;
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100139 u32 to = timer_calc(nvme_timeout);
140 while (!nvme_poll_cq(sq->cq)) {
141 yield();
142
143 if (timer_check(to)) {
144 warn_timeout();
145 return nvme_error_cqe();
146 }
147 }
148
149 return nvme_consume_cqe(sq);
150}
151
152/* Returns the next submission queue entry (or NULL if the queue is full). It
153 also fills out Command Dword 0 and clears the rest. */
154static struct nvme_sqe *
155nvme_get_next_sqe(struct nvme_sq *sq, u8 opc, void *metadata, void *data)
156{
157 if (((sq->head + 1) & sq->common.mask) == sq->tail) {
158 dprintf(3, "submission queue is full");
159 return NULL;
160 }
161
162 struct nvme_sqe *sqe = &sq->sqe[sq->tail];
163 dprintf(4, "sq %p next_sqe %u\n", sq, sq->tail);
164
165 memset(sqe, 0, sizeof(*sqe));
166 sqe->cdw0 = opc | (sq->tail << 16 /* CID */);
167 sqe->mptr = (u32)metadata;
168 sqe->dptr_prp1 = (u32)data;
169
170 if (sqe->dptr_prp1 & (NVME_PAGE_SIZE - 1)) {
171 /* Data buffer not page aligned. */
172 warn_internalerror();
173 }
174
175 return sqe;
176}
177
178/* Call this after you've filled out an sqe that you've got from nvme_get_next_sqe. */
179static void
180nvme_commit_sqe(struct nvme_sq *sq)
181{
182 dprintf(4, "sq %p commit_sqe %u\n", sq, sq->tail);
183 sq->tail = (sq->tail + 1) & sq->common.mask;
184 writel(sq->common.dbl, sq->tail);
185}
186
187/* Perform an identify command on the admin queue and return the resulting
188 buffer. This may be a NULL pointer, if something failed. This function
189 cannot be used after initialization, because it uses buffers in tmp zone. */
190static union nvme_identify *
191nvme_admin_identify(struct nvme_ctrl *ctrl, u8 cns, u32 nsid)
192{
193 union nvme_identify *identify_buf = zalloc_page_aligned(&ZoneTmpHigh, 4096);
194 if (!identify_buf) {
195 /* Could not allocate identify buffer. */
196 warn_internalerror();
197 return NULL;
198 }
199
200 struct nvme_sqe *cmd_identify;
201 cmd_identify = nvme_get_next_sqe(&ctrl->admin_sq,
202 NVME_SQE_OPC_ADMIN_IDENTIFY, NULL,
203 identify_buf);
204
205 if (!cmd_identify) {
206 warn_internalerror();
207 goto error;
208 }
209
210 cmd_identify->nsid = nsid;
211 cmd_identify->dword[10] = cns;
212
213 nvme_commit_sqe(&ctrl->admin_sq);
214
215 struct nvme_cqe cqe = nvme_wait(&ctrl->admin_sq);
216
217 if (!nvme_is_cqe_success(&cqe)) {
218 goto error;
219 }
220
221 return identify_buf;
222 error:
223 free(identify_buf);
224 return NULL;
225}
226
227static struct nvme_identify_ctrl *
228nvme_admin_identify_ctrl(struct nvme_ctrl *ctrl)
229{
230 return &nvme_admin_identify(ctrl, NVME_ADMIN_IDENTIFY_CNS_ID_CTRL, 0)->ctrl;
231}
232
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100233static struct nvme_identify_ns *
234nvme_admin_identify_ns(struct nvme_ctrl *ctrl, u32 ns_id)
235{
236 return &nvme_admin_identify(ctrl, NVME_ADMIN_IDENTIFY_CNS_ID_NS,
237 ns_id)->ns;
238}
239
240static void
241nvme_probe_ns(struct nvme_ctrl *ctrl, struct nvme_namespace *ns, u32 ns_id)
242{
243 ns->ctrl = ctrl;
244 ns->ns_id = ns_id;
245
246 struct nvme_identify_ns *id = nvme_admin_identify_ns(ctrl, ns_id);
247 if (!id) {
248 dprintf(2, "NVMe couldn't identify namespace %u.\n", ns_id);
249 goto free_buffer;
250 }
251
252 u8 current_lba_format = id->flbas & 0xF;
253 if (current_lba_format > id->nlbaf) {
254 dprintf(2, "NVMe NS %u: current LBA format %u is beyond what the "
255 " namespace supports (%u)?\n",
256 ns_id, current_lba_format, id->nlbaf + 1);
257 goto free_buffer;
258 }
259
260 ns->lba_count = id->nsze;
Daniel Verkampf21e3042017-02-23 23:27:53 -0700261 if (!ns->lba_count) {
262 dprintf(2, "NVMe NS %u is inactive.\n", ns_id);
263 goto free_buffer;
264 }
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100265
266 struct nvme_lba_format *fmt = &id->lbaf[current_lba_format];
267
268 ns->block_size = 1U << fmt->lbads;
269 ns->metadata_size = fmt->ms;
270
271 if (ns->block_size > NVME_PAGE_SIZE) {
272 /* If we see devices that trigger this path, we need to increase our
273 buffer size. */
274 warn_internalerror();
275 goto free_buffer;
276 }
277
278 ns->drive.cntl_id = ns - ctrl->ns;
279 ns->drive.removable = 0;
280 ns->drive.type = DTYPE_NVME;
281 ns->drive.blksize = ns->block_size;
282 ns->drive.sectors = ns->lba_count;
283
284 ns->dma_buffer = zalloc_page_aligned(&ZoneHigh, NVME_PAGE_SIZE);
285
286 char *desc = znprintf(MAXDESCSIZE, "NVMe NS %u: %llu MiB (%llu %u-byte "
287 "blocks + %u-byte metadata)\n",
288 ns_id, (ns->lba_count * ns->block_size) >> 20,
289 ns->lba_count, ns->block_size, ns->metadata_size);
290
291 dprintf(3, "%s", desc);
292 boot_add_hd(&ns->drive, desc, bootprio_find_pci_device(ctrl->pci));
293
Julian Stecklinaf7036042017-10-03 15:47:17 +0200294free_buffer:
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100295 free (id);
Julian Stecklinaf7036042017-10-03 15:47:17 +0200296}
297
298
299/* Release memory allocated for a completion queue */
300static void
301nvme_destroy_cq(struct nvme_cq *cq)
302{
303 free(cq->cqe);
304 cq->cqe = NULL;
305}
306
307/* Release memory allocated for a submission queue */
308static void
309nvme_destroy_sq(struct nvme_sq *sq)
310{
311 free(sq->sqe);
312 sq->sqe = NULL;
313}
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100314
315/* Returns 0 on success. */
316static int
317nvme_create_io_cq(struct nvme_ctrl *ctrl, struct nvme_cq *cq, u16 q_idx)
318{
Julian Stecklinaf7036042017-10-03 15:47:17 +0200319 int rc;
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100320 struct nvme_sqe *cmd_create_cq;
321
Julian Stecklinaf7036042017-10-03 15:47:17 +0200322 rc = nvme_init_cq(ctrl, cq, q_idx, NVME_PAGE_SIZE / sizeof(struct nvme_cqe));
323 if (rc) {
324 goto err;
325 }
326
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100327 cmd_create_cq = nvme_get_next_sqe(&ctrl->admin_sq,
328 NVME_SQE_OPC_ADMIN_CREATE_IO_CQ, NULL,
329 cq->cqe);
330 if (!cmd_create_cq) {
Julian Stecklinaf7036042017-10-03 15:47:17 +0200331 goto err_destroy_cq;
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100332 }
333
334 cmd_create_cq->dword[10] = (cq->common.mask << 16) | (q_idx >> 1);
335 cmd_create_cq->dword[11] = 1 /* physically contiguous */;
336
337 nvme_commit_sqe(&ctrl->admin_sq);
338
339 struct nvme_cqe cqe = nvme_wait(&ctrl->admin_sq);
340
341 if (!nvme_is_cqe_success(&cqe)) {
342 dprintf(2, "create io cq failed: %08x %08x %08x %08x\n",
343 cqe.dword[0], cqe.dword[1], cqe.dword[2], cqe.dword[3]);
344
Julian Stecklinaf7036042017-10-03 15:47:17 +0200345 goto err_destroy_cq;
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100346 }
347
348 return 0;
Julian Stecklinaf7036042017-10-03 15:47:17 +0200349
350err_destroy_cq:
351 nvme_destroy_cq(cq);
352err:
353 return -1;
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100354}
355
356/* Returns 0 on success. */
357static int
358nvme_create_io_sq(struct nvme_ctrl *ctrl, struct nvme_sq *sq, u16 q_idx, struct nvme_cq *cq)
359{
Julian Stecklinaf7036042017-10-03 15:47:17 +0200360 int rc;
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100361 struct nvme_sqe *cmd_create_sq;
362
Julian Stecklinaf7036042017-10-03 15:47:17 +0200363 rc = nvme_init_sq(ctrl, sq, q_idx, NVME_PAGE_SIZE / sizeof(struct nvme_cqe), cq);
364 if (rc) {
365 goto err;
366 }
367
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100368 cmd_create_sq = nvme_get_next_sqe(&ctrl->admin_sq,
369 NVME_SQE_OPC_ADMIN_CREATE_IO_SQ, NULL,
370 sq->sqe);
371 if (!cmd_create_sq) {
Julian Stecklinaf7036042017-10-03 15:47:17 +0200372 goto err_destroy_sq;
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100373 }
374
375 cmd_create_sq->dword[10] = (sq->common.mask << 16) | (q_idx >> 1);
376 cmd_create_sq->dword[11] = (q_idx >> 1) << 16 | 1 /* contiguous */;
377 dprintf(3, "sq %p create dword10 %08x dword11 %08x\n", sq,
378 cmd_create_sq->dword[10], cmd_create_sq->dword[11]);
379
380 nvme_commit_sqe(&ctrl->admin_sq);
381
382 struct nvme_cqe cqe = nvme_wait(&ctrl->admin_sq);
383
384 if (!nvme_is_cqe_success(&cqe)) {
385 dprintf(2, "create io sq failed: %08x %08x %08x %08x\n",
386 cqe.dword[0], cqe.dword[1], cqe.dword[2], cqe.dword[3]);
Julian Stecklinaf7036042017-10-03 15:47:17 +0200387 goto err_destroy_sq;
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100388 }
389
390 return 0;
Julian Stecklinaf7036042017-10-03 15:47:17 +0200391
392err_destroy_sq:
393 nvme_destroy_sq(sq);
394err:
395 return -1;
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100396}
397
398/* Reads count sectors into buf. Returns DISK_RET_*. The buffer cannot cross
399 page boundaries. */
400static int
401nvme_io_readwrite(struct nvme_namespace *ns, u64 lba, char *buf, u16 count,
402 int write)
403{
404 u32 buf_addr = (u32)buf;
405
406 if ((buf_addr & 0x3) ||
407 ((buf_addr & ~(NVME_PAGE_SIZE - 1)) !=
408 ((buf_addr + ns->block_size * count - 1) & ~(NVME_PAGE_SIZE - 1)))) {
409 /* Buffer is misaligned or crosses page boundary */
410 warn_internalerror();
411 return DISK_RET_EBADTRACK;
412 }
413
414 struct nvme_sqe *io_read = nvme_get_next_sqe(&ns->ctrl->io_sq,
415 write ? NVME_SQE_OPC_IO_WRITE
416 : NVME_SQE_OPC_IO_READ,
417 NULL, buf);
418 io_read->nsid = ns->ns_id;
419 io_read->dword[10] = (u32)lba;
420 io_read->dword[11] = (u32)(lba >> 32);
421 io_read->dword[12] = (1U << 31 /* limited retry */) | (count - 1);
422
423 nvme_commit_sqe(&ns->ctrl->io_sq);
424
425 struct nvme_cqe cqe = nvme_wait(&ns->ctrl->io_sq);
426
427 if (!nvme_is_cqe_success(&cqe)) {
428 dprintf(2, "read io: %08x %08x %08x %08x\n",
429 cqe.dword[0], cqe.dword[1], cqe.dword[2], cqe.dword[3]);
430
431 return DISK_RET_EBADTRACK;
432 }
433
434 return DISK_RET_SUCCESS;
435}
436
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100437static int
438nvme_create_io_queues(struct nvme_ctrl *ctrl)
439{
440 if (nvme_create_io_cq(ctrl, &ctrl->io_cq, 3))
Julian Stecklinaf7036042017-10-03 15:47:17 +0200441 goto err;
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100442
443 if (nvme_create_io_sq(ctrl, &ctrl->io_sq, 2, &ctrl->io_cq))
Julian Stecklinaf7036042017-10-03 15:47:17 +0200444 goto err_free_cq;
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100445
446 return 0;
Julian Stecklinaf7036042017-10-03 15:47:17 +0200447
448 err_free_cq:
449 nvme_destroy_cq(&ctrl->io_cq);
450 err:
451 return -1;
452}
453
454static void
455nvme_destroy_io_queues(struct nvme_ctrl *ctrl)
456{
457 nvme_destroy_sq(&ctrl->io_sq);
458 nvme_destroy_cq(&ctrl->io_cq);
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100459}
460
461/* Waits for CSTS.RDY to match rdy. Returns 0 on success. */
462static int
463nvme_wait_csts_rdy(struct nvme_ctrl *ctrl, unsigned rdy)
464{
465 u32 const max_to = 500 /* ms */ * ((ctrl->reg->cap >> 24) & 0xFFU);
466 u32 to = timer_calc(max_to);
467 u32 csts;
468
469 while (rdy != ((csts = ctrl->reg->csts) & NVME_CSTS_RDY)) {
470 yield();
471
472 if (csts & NVME_CSTS_FATAL) {
473 dprintf(3, "NVMe fatal error during controller shutdown\n");
474 return -1;
475 }
476
477 if (timer_check(to)) {
478 warn_timeout();
479 return -1;
480 }
481 }
482
483 return 0;
484}
485
486/* Returns 0 on success. */
487static int
488nvme_controller_enable(struct nvme_ctrl *ctrl)
489{
Julian Stecklinaf7036042017-10-03 15:47:17 +0200490 int rc;
491
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100492 pci_enable_busmaster(ctrl->pci);
493
494 /* Turn the controller off. */
495 ctrl->reg->cc = 0;
496 if (nvme_wait_csts_rdy(ctrl, 0)) {
497 dprintf(2, "NVMe fatal error during controller shutdown\n");
498 return -1;
499 }
500
501 ctrl->doorbell_stride = 4U << ((ctrl->reg->cap >> 32) & 0xF);
502
Julian Stecklinaf7036042017-10-03 15:47:17 +0200503 rc = nvme_init_cq(ctrl, &ctrl->admin_cq, 1,
504 NVME_PAGE_SIZE / sizeof(struct nvme_cqe));
505 if (rc) {
506 return -1;
507 }
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100508
Julian Stecklinaf7036042017-10-03 15:47:17 +0200509 rc = nvme_init_sq(ctrl, &ctrl->admin_sq, 0,
510 NVME_PAGE_SIZE / sizeof(struct nvme_sqe), &ctrl->admin_cq);
511 if (rc) {
512 goto err_destroy_admin_cq;
513 }
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100514
515 ctrl->reg->aqa = ctrl->admin_cq.common.mask << 16
516 | ctrl->admin_sq.common.mask;
517
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100518 ctrl->reg->asq = (u32)ctrl->admin_sq.sqe;
519 ctrl->reg->acq = (u32)ctrl->admin_cq.cqe;
520
521 dprintf(3, " admin submission queue: %p\n", ctrl->admin_sq.sqe);
522 dprintf(3, " admin completion queue: %p\n", ctrl->admin_cq.cqe);
523
524 ctrl->reg->cc = NVME_CC_EN | (NVME_CQE_SIZE_LOG << 20)
525 | (NVME_SQE_SIZE_LOG << 16 /* IOSQES */);
526
527 if (nvme_wait_csts_rdy(ctrl, 1)) {
528 dprintf(2, "NVMe fatal error while enabling controller\n");
Julian Stecklinaf7036042017-10-03 15:47:17 +0200529 goto err_destroy_admin_sq;
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100530 }
Julian Stecklinaf7036042017-10-03 15:47:17 +0200531
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100532 /* The admin queue is set up and the controller is ready. Let's figure out
533 what namespaces we have. */
534
535 struct nvme_identify_ctrl *identify = nvme_admin_identify_ctrl(ctrl);
536
537 if (!identify) {
538 dprintf(2, "NVMe couldn't identify controller.\n");
Julian Stecklinaf7036042017-10-03 15:47:17 +0200539 goto err_destroy_admin_sq;
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100540 }
541
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100542 dprintf(3, "NVMe has %u namespace%s.\n",
543 identify->nn, (identify->nn == 1) ? "" : "s");
544
545 ctrl->ns_count = identify->nn;
546 free(identify);
547
548 if ((ctrl->ns_count == 0) || nvme_create_io_queues(ctrl)) {
549 /* No point to continue, if the controller says it doesn't have
550 namespaces or we couldn't create I/O queues. */
Julian Stecklinaf7036042017-10-03 15:47:17 +0200551 goto err_destroy_admin_sq;
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100552 }
553
554 ctrl->ns = malloc_fseg(sizeof(*ctrl->ns) * ctrl->ns_count);
Julian Stecklinaf7036042017-10-03 15:47:17 +0200555 if (!ctrl->ns) {
556 warn_noalloc();
557 goto err_destroy_ioq;
558 }
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100559 memset(ctrl->ns, 0, sizeof(*ctrl->ns) * ctrl->ns_count);
560
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100561 /* Populate namespace IDs */
562 int ns_idx;
Daniel Verkampf21e3042017-02-23 23:27:53 -0700563 for (ns_idx = 0; ns_idx < ctrl->ns_count; ns_idx++) {
564 nvme_probe_ns(ctrl, &ctrl->ns[ns_idx], ns_idx + 1);
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100565 }
566
567 dprintf(3, "NVMe initialization complete!\n");
568 return 0;
569
Julian Stecklinaf7036042017-10-03 15:47:17 +0200570 err_destroy_ioq:
571 nvme_destroy_io_queues(ctrl);
572 err_destroy_admin_sq:
573 nvme_destroy_sq(&ctrl->admin_sq);
574 err_destroy_admin_cq:
575 nvme_destroy_cq(&ctrl->admin_cq);
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100576 return -1;
577}
578
579/* Initialize an NVMe controller and detect its drives. */
580static void
581nvme_controller_setup(void *opaque)
582{
583 struct pci_device *pci = opaque;
584
585 struct nvme_reg volatile *reg = pci_enable_membar(pci, PCI_BASE_ADDRESS_0);
586 if (!reg)
587 return;
588
589 u32 version = reg->vs;
590 dprintf(3, "Found NVMe controller with version %u.%u.%u.\n",
591 version >> 16, (version >> 8) & 0xFF, version & 0xFF);
592 dprintf(3, " Capabilities %016llx\n", reg->cap);
593
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100594 if (~reg->cap & NVME_CAP_CSS_NVME) {
595 dprintf(3, "Controller doesn't speak NVMe command set. Skipping.\n");
Julian Stecklinaf7036042017-10-03 15:47:17 +0200596 goto err;
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100597 }
598
599 struct nvme_ctrl *ctrl = malloc_high(sizeof(*ctrl));
600 if (!ctrl) {
601 warn_noalloc();
Julian Stecklinaf7036042017-10-03 15:47:17 +0200602 goto err;
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100603 }
604
605 memset(ctrl, 0, sizeof(*ctrl));
606
607 ctrl->reg = reg;
608 ctrl->pci = pci;
609
610 if (nvme_controller_enable(ctrl)) {
Julian Stecklinaf7036042017-10-03 15:47:17 +0200611 goto err_free_ctrl;
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100612 }
Julian Stecklinaf7036042017-10-03 15:47:17 +0200613
614 return;
615
616 err_free_ctrl:
617 free(ctrl);
618 err:
619 dprintf(2, "Failed to enable NVMe controller.\n");
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100620}
621
622// Locate and init NVMe controllers
623static void
624nvme_scan(void)
625{
Daniel Verkamp1415d462017-02-23 23:27:57 -0700626 // Scan PCI bus for NVMe adapters
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100627 struct pci_device *pci;
628
629 foreachpci(pci) {
630 if (pci->class != PCI_CLASS_STORAGE_NVME)
631 continue;
632 if (pci->prog_if != 2 /* as of NVM 1.0e */) {
633 dprintf(3, "Found incompatble NVMe: prog-if=%02x\n", pci->prog_if);
634 continue;
635 }
636
637 run_thread(nvme_controller_setup, pci);
638 }
639}
640
641static int
642nvme_cmd_readwrite(struct nvme_namespace *ns, struct disk_op_s *op, int write)
643{
644 int res = DISK_RET_SUCCESS;
645 u16 const max_blocks = NVME_PAGE_SIZE / ns->block_size;
646 u16 i;
647
Daniel Verkamp2e82b462017-02-23 23:27:55 -0700648 for (i = 0; i < op->count && res == DISK_RET_SUCCESS;) {
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100649 u16 blocks_remaining = op->count - i;
650 u16 blocks = blocks_remaining < max_blocks ? blocks_remaining
651 : max_blocks;
652 char *op_buf = op->buf_fl + i * ns->block_size;
653
654 if (write) {
655 memcpy(ns->dma_buffer, op_buf, blocks * ns->block_size);
656 }
657
658 res = nvme_io_readwrite(ns, op->lba + i, ns->dma_buffer, blocks, write);
659 dprintf(3, "ns %u %s lba %llu+%u: %d\n", ns->ns_id, write ? "write"
660 : "read",
661 op->lba + i, blocks, res);
662
663 if (!write && res == DISK_RET_SUCCESS) {
664 memcpy(op_buf, ns->dma_buffer, blocks * ns->block_size);
665 }
666
667 i += blocks;
668 }
669
670 return res;
671}
672
673int
674nvme_process_op(struct disk_op_s *op)
675{
Youness Alaoui7759d3a2017-06-12 21:09:07 -0400676 if (!CONFIG_NVME)
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100677 return DISK_RET_SUCCESS;
678
Kevin O'Connore5a0b612017-07-11 12:24:50 -0400679 struct nvme_namespace *ns = container_of(op->drive_fl, struct nvme_namespace,
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100680 drive);
681
682 switch (op->command) {
683 case CMD_READ:
684 case CMD_WRITE:
685 return nvme_cmd_readwrite(ns, op, op->command == CMD_WRITE);
686 default:
687 return default_process_op(op);
688 }
689}
690
691void
692nvme_setup(void)
693{
694 ASSERT32FLAT();
Kevin O'Connor235a8192017-05-10 16:14:39 -0400695 if (!CONFIG_NVME)
Julian Stecklinac83e15b2017-02-13 10:03:59 +0100696 return;
697
698 dprintf(3, "init nvme\n");
699 nvme_scan();
700}
701
702/* EOF */