1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/flock.h> 27 #include <nfs/export.h> 28 #include <sys/cmn_err.h> 29 #include <sys/atomic.h> 30 #include <nfs/nfs.h> 31 #include <nfs/nfs4.h> 32 #include <nfs/nfssys.h> 33 #include <nfs/lm.h> 34 #include <sys/pathname.h> 35 #include <sys/sdt.h> 36 #include <sys/nvpair.h> 37 #include <sys/sdt.h> 38 #include <sys/disp.h> 39 40 extern u_longlong_t nfs4_srv_caller_id; 41 42 #include <nfs/nfs_sstor_impl.h> 43 #include <nfs/mds_state.h> 44 #include <nfs/nfs41_sessions.h> 45 46 #include <nfs/rfs41_ds.h> 47 #include <nfs/nfs41_filehandle.h> 48 49 static void mds_do_lorecall(mds_lorec_t *); 50 static void mds_sess_lorecall_callout(rfs4_entry_t, void *); 51 static int mds_lorecall_cmd(struct mds_reclo_args *, cred_t *); 52 53 extern void mds_do_cb_recall(struct rfs4_deleg_state *, bool_t); 54 55 56 /* 57 * XXX - slrc_slot_size will more than likely have to be 58 * computed dynamically as the server adjusts the 59 * sessions' slot replay cache size. This should be 60 * good for proto. 61 */ 62 slotid4 slrc_slot_size = MAXSLOTS; 63 64 /* The values below are rfs4_lease_time units */ 65 66 #ifdef DEBUG 67 #define SESSION_CACHE_TIME 1 68 #else 69 #define SESSION_CACHE_TIME 10 70 #endif 71 72 #define ONES_64 (0xFFFFFFFFFFFFFFFFuLL) 73 74 /* Sessions */ 75 static void mds_session_destroy(rfs4_entry_t); 76 static bool_t mds_session_expiry(rfs4_entry_t); 77 static bool_t mds_session_create(rfs4_entry_t, void *); 78 static uint32_t sessid_hash(void *); 79 static bool_t sessid_compare(rfs4_entry_t, void *); 80 static void *sessid_mkkey(rfs4_entry_t); 81 82 extern int (*mds_recall_lo)(struct mds_reclo_args *, cred_t *); 83 84 extern char *kstrdup(const char *); 85 86 extern rfs4_client_t *findclient(nfs_server_instance_t *, nfs_client_id4 *, 87 bool_t *, rfs4_client_t *); 88 89 extern rfs4_client_t *findclient_by_id(nfs_server_instance_t *, clientid4); 90 91 extern rfs4_openowner_t *findopenowner(nfs_server_instance_t *, open_owner4 *, 92 bool_t *, seqid4); 93 94 extern void v4prot_sstor_init(nfs_server_instance_t *); 95 96 extern void rfs4_ss_retrieve_state(nfs_server_instance_t *); 97 extern int nfs_doorfd; 98 99 #ifdef DEBUG 100 #define MDS_TABSIZE 17 101 #else 102 #define MDS_TABSIZE 2047 103 #endif 104 105 #define MDS_MAXTABSZ 1024*1024 106 107 extern uint32_t clientid_hash(void *); 108 109 /* 110 * Returns the instances capabilities flag word 111 * the form of: 112 * 113 * EXCHGID4_FLAG_USE_NON_PNFS 114 * EXCHGID4_FLAG_USE_PNFS_MDS 115 * EXCHGID4_FLAG_USE_PNFS_DS 116 * 117 */ 118 uint32_t 119 mds_get_capabilities(nfs_server_instance_t *instp) 120 { 121 uint32_t my_abilities = 0; 122 123 if (instp) 124 my_abilities = 125 instp->inst_flags & EXCHGID4_FLAG_MASK_PNFS; 126 return (my_abilities); 127 } 128 129 130 /*ARGSUSED*/ 131 static bool_t 132 mds_do_not_expire(rfs4_entry_t u_entry) 133 { 134 return (FALSE); 135 } 136 137 /*ARGSUSED*/ 138 static stateid_t 139 mds_create_stateid(rfs4_dbe_t *dbe, stateid_type_t id_type) 140 { 141 stateid_t id; 142 143 id.v41_bits.boottime = dbe_to_instp(dbe)->start_time; 144 id.v41_bits.state_ident = rfs4_dbe_getid(dbe); 145 id.v41_bits.chgseq = 1; 146 id.v41_bits.type = id_type; 147 id.v41_bits.pid = 0; 148 149 return (id); 150 } 151 152 153 rfs4_openowner_t * 154 mds_findopenowner(nfs_server_instance_t *instp, open_owner4 *openowner, 155 bool_t *create) 156 { 157 rfs4_openowner_t *op; 158 rfs4_openowner_t arg; 159 160 arg.owner = *openowner; 161 arg.open_seqid = 0; 162 op = (rfs4_openowner_t *)rfs4_dbsearch(instp->openowner_idx, 163 openowner, create, &arg, RFS4_DBS_VALID); 164 return (op); 165 } 166 167 rfs4_lo_state_t * 168 mds_findlo_state_by_owner(rfs4_lockowner_t *lo, 169 rfs4_state_t *sp, bool_t *create) 170 { 171 rfs4_lo_state_t *lsp; 172 rfs4_lo_state_t arg; 173 nfs_server_instance_t *instp; 174 175 arg.locker = lo; 176 arg.state = sp; 177 178 instp = dbe_to_instp(lo->dbe); 179 180 lsp = (rfs4_lo_state_t *)rfs4_dbsearch(instp->lo_state_owner_idx, 181 &arg, create, &arg, RFS4_DBS_VALID); 182 183 return (lsp); 184 } 185 186 /* well clearly this needs to be cleaned up.. */ 187 typedef union { 188 struct { 189 uint32_t start_time; 190 uint32_t c_id; 191 } impl_id; 192 clientid4 id4; 193 } cid; 194 195 int 196 mds_check_stateid_seqid(rfs4_state_t *sp, stateid4 *stateid) 197 { 198 stateid_t *id = (stateid_t *)stateid; 199 200 if (rfs4_lease_expired(sp->owner->client)) 201 return (NFS4_CHECK_STATEID_EXPIRED); 202 203 /* Stateid is some time in the future - that's bad */ 204 if (sp->stateid.v41_bits.chgseq < id->v41_bits.chgseq) 205 return (NFS4_CHECK_STATEID_BAD); 206 207 if (sp->closed == TRUE) 208 return (NFS4_CHECK_STATEID_CLOSED); 209 210 return (NFS4_CHECK_STATEID_OKAY); 211 } 212 213 int 214 mds_fh_is_exi(struct exportinfo *exi, nfs41_fh_fmt_t *fhp) 215 { 216 if (exi->exi_fid.fid_len != fhp->fh.v1.export_fid.len) 217 return (0); 218 219 if (bcmp(exi->exi_fid.fid_data, fhp->fh.v1.export_fid.val, 220 fhp->fh.v1.export_fid.len) != 0) 221 return (0); 222 223 if (exi->exi_fsid.val[0] != fhp->fh.v1.export_fsid.val[0] || 224 exi->exi_fsid.val[1] != fhp->fh.v1.export_fsid.val[1]) 225 return (0); 226 227 return (1); 228 } 229 230 /* 231 * This function is used as a target for the rfs4_dbe_walk() call 232 * below. The purpose of this function is to see if the 233 * lockowner_state refers to a file that resides within the exportinfo 234 * export. If so, then remove the lock_owner state (file locks and 235 * share "locks") for this object since the intent is the server is 236 * unexporting the specified directory. Be sure to invalidate the 237 * object after the state has been released 238 */ 239 void 240 mds_lo_state_walk_callout(rfs4_entry_t u_entry, void *e) 241 { 242 rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry; 243 struct exportinfo *exi = (struct exportinfo *)e; 244 nfs41_fh_fmt_t *fhp; 245 246 fhp = 247 (nfs41_fh_fmt_t *)lsp->state->finfo->filehandle.nfs_fh4_val; 248 249 if (mds_fh_is_exi(exi, fhp)) { 250 rfs4_state_close(lsp->state, FALSE, FALSE, CRED()); 251 rfs4_dbe_invalidate(lsp->dbe); 252 rfs4_dbe_invalidate(lsp->state->dbe); 253 } 254 } 255 256 /* 257 * This function is used as a target for the rfs4_dbe_walk() call 258 * below. The purpose of this function is to see if the state refers 259 * to a file that resides within the exportinfo export. If so, then 260 * remove the open state for this object since the intent is the 261 * server is unexporting the specified directory. The main result for 262 * this type of entry is to invalidate it such it will not be found in 263 * the future. 264 */ 265 void 266 mds_state_walk_callout(rfs4_entry_t u_entry, void *e) 267 { 268 rfs4_state_t *sp = (rfs4_state_t *)u_entry; 269 struct exportinfo *exi = (struct exportinfo *)e; 270 nfs41_fh_fmt_t *fhp; 271 272 fhp = 273 (nfs41_fh_fmt_t *)sp->finfo->filehandle.nfs_fh4_val; 274 275 if (mds_fh_is_exi(exi, fhp)) { 276 rfs4_state_close(sp, TRUE, FALSE, CRED()); 277 rfs4_dbe_invalidate(sp->dbe); 278 } 279 } 280 281 /* 282 * This function is used as a target for the rfs4_dbe_walk() call 283 * below. The purpose of this function is to see if the state refers 284 * to a file that resides within the exportinfo export. If so, then 285 * remove the deleg state for this object since the intent is the 286 * server is unexporting the specified directory. The main result for 287 * this type of entry is to invalidate it such it will not be found in 288 * the future. 289 */ 290 void 291 mds_deleg_state_walk_callout(rfs4_entry_t u_entry, void *e) 292 { 293 rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry; 294 struct exportinfo *exi = (struct exportinfo *)e; 295 nfs41_fh_fmt_t *fhp; 296 297 fhp = 298 (nfs41_fh_fmt_t *)dsp->finfo->filehandle.nfs_fh4_val; 299 300 if (mds_fh_is_exi(exi, fhp)) { 301 rfs4_dbe_invalidate(dsp->dbe); 302 } 303 } 304 305 /* 306 * This function is used as a target for the rfs4_dbe_walk() call 307 * below. The purpose of this function is to see if the state refers 308 * to a file that resides within the exportinfo export. If so, then 309 * release vnode hold for this object since the intent is the server 310 * is unexporting the specified directory. Invalidation will prevent 311 * this struct from being found in the future. 312 */ 313 void 314 mds_file_walk_callout(rfs4_entry_t u_entry, void *e) 315 { 316 rfs4_file_t *fp = (rfs4_file_t *)u_entry; 317 struct exportinfo *exi = (struct exportinfo *)e; 318 nfs41_fh_fmt_t *fhp; 319 vnode_t *vp; 320 nfs_server_instance_t *instp; 321 322 fhp = (nfs41_fh_fmt_t *)fp->filehandle.nfs_fh4_val; 323 324 if (mds_fh_is_exi(exi, fhp) == 0) 325 return; 326 327 if ((vp = fp->vp) != NULL) { 328 329 instp = dbe_to_instp(fp->dbe); 330 ASSERT(instp); 331 /* 332 * don't leak monitors and remove the reference 333 * put on the vnode when the delegation was granted. 334 */ 335 if (fp->dinfo->dtype == OPEN_DELEGATE_READ) { 336 (void) fem_uninstall(vp, instp->deleg_rdops, 337 (void *)fp); 338 vn_open_downgrade(vp, FREAD); 339 } else if (fp->dinfo->dtype == OPEN_DELEGATE_WRITE) { 340 (void) fem_uninstall(vp, instp->deleg_wrops, 341 (void *)fp); 342 vn_open_downgrade(vp, FREAD|FWRITE); 343 } 344 mutex_enter(&vp->v_lock); 345 (void) vsd_set(vp, instp->vkey, NULL); 346 mutex_exit(&vp->v_lock); 347 VN_RELE(vp); 348 fp->vp = NULL; 349 } 350 rfs4_dbe_invalidate(fp->dbe); 351 } 352 353 /* 354 * -------------------------------------------------------- 355 * MDS - NFSv4.1 Sessions 356 * -------------------------------------------------------- 357 */ 358 static uint32_t 359 sessid_hash(void *key) 360 { 361 sid *idp = key; 362 363 return (idp->impl_id.s_id); 364 } 365 366 static bool_t 367 sessid_compare(rfs4_entry_t entry, void *key) 368 { 369 mds_session_t *sp = (mds_session_t *)entry; 370 sessionid4 *idp = (sessionid4 *)key; 371 372 return (bcmp(idp, &sp->sn_sessid, sizeof (sessionid4)) == 0); 373 } 374 375 static void * 376 sessid_mkkey(rfs4_entry_t entry) 377 { 378 mds_session_t *sp = (mds_session_t *)entry; 379 380 return (&sp->sn_sessid); 381 } 382 383 static bool_t 384 sess_clid_compare(rfs4_entry_t entry, void *key) 385 { 386 mds_session_t *sessp = (mds_session_t *)entry; 387 clientid4 *idp = key; 388 389 return (*idp == sessp->sn_clnt->clientid); 390 } 391 392 static void * 393 sess_clid_mkkey(rfs4_entry_t entry) 394 { 395 return (&(((mds_session_t *)entry)->sn_clnt->clientid)); 396 } 397 398 void 399 rfs41_session_rele(mds_session_t *sp) 400 { 401 rfs4_dbe_rele(sp->dbe); 402 } 403 404 mds_session_t * 405 mds_findsession_by_id(nfs_server_instance_t *instp, sessionid4 sessid) 406 { 407 mds_session_t *sp; 408 rfs4_index_t *idx = instp->mds_session_idx; 409 bool_t create = FALSE; 410 411 rw_enter(&instp->findsession_lock, RW_READER); 412 sp = (mds_session_t *)rfs4_dbsearch(idx, sessid, &create, NULL, 413 RFS4_DBS_VALID); 414 rw_exit(&instp->findsession_lock); 415 416 return (sp); 417 } 418 419 mds_session_t * 420 mds_findsession_by_clid(nfs_server_instance_t *instp, clientid4 clid) 421 { 422 mds_session_t *sp; 423 bool_t create = FALSE; 424 425 rw_enter(&instp->findsession_lock, RW_READER); 426 sp = (mds_session_t *)rfs4_dbsearch(instp->mds_sess_clientid_idx, &clid, 427 &create, NULL, RFS4_DBS_VALID); 428 rw_exit(&instp->findsession_lock); 429 430 return (sp); 431 } 432 433 /* 434 * A clientid can have multiple sessions associated with it. Hence, 435 * performing a raw 'mds_findsession' (even for a create) might 436 * yield a list of sessions associated with the clientid in question. 437 * Instead of delving deep into the rfs4_dbsearch engine to correct 438 * this now, we'll call our function directly and create an association 439 * between the session table and both primary (sessionid) index and 440 * secondary (clientid) index for the newly created session. 441 */ 442 mds_session_t * 443 mds_createsession(nfs_server_instance_t *instp, session41_create_t *ap) 444 { 445 mds_session_t *sp = NULL; 446 rfs4_index_t *idx = instp->mds_session_idx; 447 448 rw_enter(&instp->findsession_lock, RW_WRITER); 449 if ((sp = (mds_session_t *)rfs4_dbcreate(idx, (void *)ap)) == NULL) { 450 DTRACE_PROBE1(mds__srv__createsession__fail, 451 session41_create_t *, ap); 452 } 453 rw_exit(&instp->findsession_lock); 454 return (sp); 455 } 456 457 /* 458 * mds_session_inval invalidates the session so other 459 * threads won't "find" the session to place additional 460 * callbacks. Destroy session even if no backchannel has 461 * been established. 462 */ 463 nfsstat4 464 mds_session_inval(mds_session_t *sp) 465 { 466 nfsstat4 status; 467 468 ASSERT(sp != NULL); 469 ASSERT(rfs4_dbe_islocked(sp->dbe)); 470 rfs4_dbe_invalidate(sp->dbe); 471 472 if (SN_CB_CHAN_EST(sp)) { 473 sess_channel_t *bcp = sp->sn_back; 474 sess_bcsd_t *bsdp; 475 476 rw_enter(&bcp->cn_lock, RW_READER); 477 if ((bsdp = CTOBSD(bcp)) == NULL) 478 cmn_err(CE_PANIC, "mds_session_inval: BCSD Not Set"); 479 480 mutex_enter(&bsdp->bsd_lock); 481 status = bsdp->bsd_stat = slot_cb_status(bsdp->bsd_stok); 482 mutex_exit(&bsdp->bsd_lock); 483 484 rw_exit(&bcp->cn_lock); 485 } else { 486 cmn_err(CE_NOTE, "No back chan established"); 487 status = NFS4_OK; 488 } 489 return (status); 490 } 491 492 /* 493 * 1) Invalidate the session in the DB (so it can't be found anymore) 494 * 2) Verify that there's no outstanding CB traffic. If so, return err. 495 * 3) Eventually the session will be reaped by the reaper_thread 496 */ 497 nfsstat4 498 mds_destroysession(mds_session_t *sp) 499 { 500 nfsstat4 cbs; 501 502 rfs4_dbe_lock(sp->dbe); 503 cbs = mds_session_inval(sp); 504 rfs4_dbe_unlock(sp->dbe); 505 506 /* 507 * XXX - Destruction of a session should not affect any state 508 * bound to the clientid (Section 18.37.3 of draft-17). 509 * For now, keep destroying the clid until DESTROY_CLIENTID 510 * is explicitly done (see Section 18.50.4 of draft-17). 511 * The client struct will expire and the session no longer keeps 512 * a hold on the client struct, so an explicit call to client close 513 * is not needed. 514 */ 515 if (cbs == NFS4_OK) { 516 rfs41_session_rele(sp); 517 } 518 return (cbs); 519 } 520 521 sn_chan_dir_t 522 pd2cd(channel_dir_from_server4 dir) 523 { 524 switch (dir) { 525 case CDFS4_FORE: 526 return (SN_CHAN_FORE); 527 528 case CDFS4_BACK: 529 return (SN_CHAN_BACK); 530 531 case CDFS4_BOTH: 532 default: 533 return (SN_CHAN_BOTH); 534 } 535 /* NOTREACHED */ 536 } 537 538 /* 539 * Delegation CB race detection support 540 */ 541 void 542 rfs41_deleg_rs_hold(rfs4_deleg_state_t *dsp) 543 { 544 atomic_add_32(&dsp->rs.refcnt, 1); 545 } 546 547 void 548 rfs41_deleg_rs_rele(rfs4_deleg_state_t *dsp) 549 { 550 ASSERT(dsp->rs.refcnt > 0); 551 atomic_add_32(&dsp->rs.refcnt, -1); 552 if (dsp->rs.refcnt == 0) { 553 bzero(dsp->rs.sessid, sizeof (sessionid4)); 554 dsp->rs.seqid = dsp->rs.slotno = 0; 555 } 556 } 557 558 void 559 rfs41_seq4_hold(void *data, uint32_t flag) 560 { 561 bit_attr_t *p = (bit_attr_t *)data; 562 uint32_t idx = log2(flag); 563 564 ASSERT(p[idx].ba_bit == flag); 565 atomic_add_32(&p[idx].ba_refcnt, 1); 566 p[idx].ba_trigger = gethrestime_sec(); 567 } 568 569 void 570 rfs41_seq4_rele(void *data, uint32_t flag) 571 { 572 bit_attr_t *p = (bit_attr_t *)data; 573 uint32_t idx = log2(flag); 574 575 ASSERT(p[idx].ba_bit == flag); 576 if (p[idx].ba_refcnt > 0) 577 atomic_add_32(&p[idx].ba_refcnt, -1); 578 p[idx].ba_trigger = gethrestime_sec(); 579 } 580 581 sess_channel_t * 582 rfs41_create_session_channel(channel_dir_from_server4 dir) 583 { 584 sess_channel_t *cp; 585 sess_bcsd_t *bp; 586 587 cp = (sess_channel_t *)kmem_zalloc(sizeof (sess_channel_t), KM_SLEEP); 588 rw_init(&cp->cn_lock, NULL, RW_DEFAULT, NULL); 589 590 switch (dir) { 591 case CDFS4_FORE: 592 break; 593 594 case CDFS4_BOTH: 595 case CDFS4_BACK: 596 /* BackChan Specific Data */ 597 bp = (sess_bcsd_t *)kmem_zalloc(sizeof (sess_bcsd_t), KM_SLEEP); 598 mutex_init(&bp->bsd_lock, NULL, MUTEX_DEFAULT, NULL); 599 rw_init(&bp->bsd_rwlock, NULL, RW_DEFAULT, NULL); 600 cp->cn_csd = (sess_bcsd_t *)bp; 601 break; 602 } 603 return (cp); 604 } 605 606 void 607 rfs41_destroy_session_channel(sess_channel_t *cp) 608 { 609 sess_bcsd_t *bp; 610 611 if (cp == NULL) 612 return; 613 614 switch (cp->cn_dir) { 615 case CDFS4_FORE: 616 break; 617 618 case CDFS4_BOTH: 619 case CDFS4_BACK: 620 bp = (sess_bcsd_t *)cp->cn_csd; 621 rw_destroy(&bp->bsd_rwlock); 622 mutex_destroy(&bp->bsd_lock); 623 kmem_free(bp, sizeof (sess_bcsd_t)); 624 break; 625 } 626 rw_destroy(&cp->cn_lock); 627 kmem_free(cp, sizeof (sess_channel_t)); 628 } 629 630 /* 631 * Create/Initialize the session for this rfs4_client_t. Also 632 * create its slot replay cache as per the server's resource 633 * constraints. 634 */ 635 /* ARGSUSED */ 636 static bool_t 637 mds_session_create(rfs4_entry_t u_entry, 638 void *arg) 639 { 640 mds_session_t *sp = (mds_session_t *)u_entry; 641 session41_create_t *ap = (session41_create_t *)arg; 642 sess_channel_t *ocp = NULL; 643 sid *sidp; 644 SVCMASTERXPRT *mxprt; 645 uint32_t i; 646 int bdrpc; 647 rpcprog_t prog; 648 channel_dir_from_server4 dir; 649 sess_bcsd_t *bsdp; 650 nfs_server_instance_t *instp; 651 652 ASSERT(sp != NULL); 653 if (sp == NULL) 654 return (FALSE); 655 656 instp = dbe_to_instp(sp->dbe); 657 658 /* 659 * Back pointer to rfs4_client_t and sessionid 660 */ 661 sp->sn_clnt = (rfs4_client_t *)ap->cs_client; 662 mxprt = (SVCMASTERXPRT *)ap->cs_xprt->xp_master; 663 664 /* 665 * Handcrafting the session id 666 */ 667 sidp = (sid *)&sp->sn_sessid; 668 sidp->impl_id.pad0 = 0x00000000; 669 sidp->impl_id.pad1 = 0xFFFFFFFF; 670 sidp->impl_id.start_time = instp->start_time; 671 sidp->impl_id.s_id = (uint32_t)rfs4_dbe_getid(sp->dbe); 672 673 /* 674 * Process csa_flags; note that CREATE_SESSION4_FLAG_CONN_BACK_CHAN 675 * is processed below since it affects direction and setup of the 676 * backchannel accordingly. 677 */ 678 sp->sn_csflags = 0; 679 if (ap->cs_aotw.csa_flags & CREATE_SESSION4_FLAG_PERSIST) 680 /* XXX - Worry about persistence later */ 681 sp->sn_csflags &= ~CREATE_SESSION4_FLAG_PERSIST; 682 683 if (ap->cs_aotw.csa_flags & CREATE_SESSION4_FLAG_CONN_RDMA) 684 /* XXX - No RDMA for now */ 685 sp->sn_csflags &= ~CREATE_SESSION4_FLAG_CONN_RDMA; 686 687 /* 688 * Initialize some overall sessions values 689 */ 690 sp->sn_bc.progno = ap->cs_aotw.csa_cb_program; 691 sp->sn_laccess = gethrestime_sec(); 692 sp->sn_flags = 0; 693 694 /* 695 * Check if client has specified that the FORE channel should 696 * also be used for call back traffic (ie. bidir RPC). If so, 697 * let's try to accomodate the request. 698 */ 699 DTRACE_PROBE1(csa__flags, uint32_t, ap->cs_aotw.csa_flags); 700 bdrpc = ap->cs_aotw.csa_flags & CREATE_SESSION4_FLAG_CONN_BACK_CHAN; 701 702 if (bdrpc) { 703 SVCCB_ARGS cbargs; 704 prog = sp->sn_bc.progno; 705 cbargs.xprt = mxprt; 706 cbargs.prog = prog; 707 cbargs.vers = NFS_CB; 708 cbargs.family = AF_INET; 709 cbargs.tag = (void *)sp->sn_sessid; 710 711 if (SVC_CTL(ap->cs_xprt, SVCCTL_SET_CBCONN, (void *)&cbargs)) { 712 /* 713 * Couldn't create a bi-dir RPC connection. Reset 714 * bdrpc so that the session's channel flags are 715 * set appropriately and the client knows it needs 716 * to do the BIND_CONN_TO_SESSION dance in order 717 * to establish a callback path. 718 */ 719 bdrpc = 0; 720 } 721 } 722 723 /* 724 * Session's channel flags depending on bdrpc 725 */ 726 sp->sn_bdrpc = bdrpc; 727 dir = sp->sn_bdrpc ? (CDFS4_FORE | CDFS4_BACK) : CDFS4_FORE; 728 ocp = rfs41_create_session_channel(dir); 729 ocp->cn_dir = dir; 730 sp->sn_fore = ocp; 731 732 /* 733 * XXX: Let's not worry about channel attribute enforcement now. 734 * This should occur as part of the COMPOUND processing (in 735 * the dispatch routine); not on channel creation. 736 */ 737 ocp->cn_attrs = ap->cs_aotw.csa_fore_chan_attrs; 738 if (ocp->cn_attrs.ca_maxrequests > slrc_slot_size) 739 ocp->cn_attrs.ca_maxrequests = slrc_slot_size; 740 741 /* 742 * No need for locks/synchronization at this time, 743 * since we're barely creating the session. 744 */ 745 if (sp->sn_bdrpc) { 746 ocp->cn_attrs = ap->cs_aotw.csa_back_chan_attrs; 747 748 /* 749 * bcsd got built as part of the channel's construction. 750 */ 751 if ((bsdp = CTOBSD(ocp)) == NULL) { 752 cmn_err(CE_PANIC, "Back Chan Spec Data Not Set\t" 753 "<Internal Inconsistency>"); 754 } 755 bsdp->bsd_stok = sltab_create(slrc_slot_size); /* bdrpc */ 756 sp->sn_csflags |= CREATE_SESSION4_FLAG_CONN_BACK_CHAN; 757 sp->sn_back = ocp; 758 759 } else { 760 /* 761 * If not doing bdrpc, then we expect the client to perform 762 * an explicit BIND_CONN_TO_SESSION if it wants callback 763 * traffic. Subsequently, the cb channel should be set up 764 * at that point along with its corresponding sltab (see 765 * rfs41_bc_setup). 766 */ 767 sp->sn_csflags &= ~CREATE_SESSION4_FLAG_CONN_BACK_CHAN; 768 sp->sn_back = NULL; 769 prog = 0; 770 771 /* 772 * XXX 08/15/2008 (rick) - if the channel is not bidir when 773 * created in CREATE_SESSION, then we should save off 774 * the ap->cs_aotw.csa_back_chan_attrs in case later 775 * a bc2s is called to create the back channel. 776 */ 777 } 778 779 /* 780 * We're just creating the session... there _shouldn't_ be any 781 * other threads wanting to add connections to this sessions' 782 * conn list, so we purposefully do _not_ take the ocp->cn_lock 783 * 784 * sn_bc fields are all initialized to 0 (via zalloc) 785 */ 786 787 SVC_CTL(ap->cs_xprt, SVCCTL_SET_TAG, (void *)sp->sn_sessid); 788 789 if (sp->sn_bdrpc) { 790 atomic_add_32(&sp->sn_bc.pngcnt, 1); 791 } 792 793 /* 794 * Now we allocate space for the slrc, initializing each slot's 795 * sequenceid and slotid to zero and a (pre)cached result of 796 * NFS4ERR_SEQ_MISORDERED. Note that we zero out the entries 797 * by virtue of the z-alloc. 798 */ 799 sp->sn_slrc = 800 (rfs41_slrc_t *)kmem_zalloc(sizeof (rfs41_slrc_t), KM_SLEEP); 801 sp->sn_slrc->sc_maxslot = ocp->cn_attrs.ca_maxrequests; 802 803 for (i = 0; i < sp->sn_slrc->sc_maxslot; i++) { 804 sp->sn_slrc->sc_slot[i].status = NFS4ERR_SEQ_MISORDERED; 805 sp->sn_slrc->sc_slot[i].res.status = NFS4ERR_SEQ_MISORDERED; 806 sp->sn_slrc->sc_slot[i].p = NULL; 807 } 808 809 /* only initialize bits relevant to session scope */ 810 bzero(&sp->sn_seq4, sizeof (bit_attr_t) * BITS_PER_WORD); 811 for (i = 1; i <= SEQ4_HIGH_BIT && i != 0; i <<= 1) { 812 uint32_t idx = log2(i); 813 814 switch (i) { 815 case SEQ4_STATUS_CB_GSS_CONTEXTS_EXPIRING: 816 case SEQ4_STATUS_CB_GSS_CONTEXTS_EXPIRED: 817 case SEQ4_STATUS_CB_PATH_DOWN_SESSION: 818 case SEQ4_STATUS_BACKCHANNEL_FAULT: 819 sp->sn_seq4[idx].ba_bit = i; 820 break; 821 default: 822 /* already bzero'ed */ 823 break; 824 } 825 } 826 827 if (sp->sn_bdrpc) { 828 /* 829 * Recall that for CB_PATH_DOWN[_SESSION], the refcnt 830 * indicates the number of active back channel conns 831 */ 832 rfs41_seq4_hold(&sp->sn_seq4, SEQ4_STATUS_CB_PATH_DOWN_SESSION); 833 rfs41_seq4_hold(&sp->sn_clnt->seq4, SEQ4_STATUS_CB_PATH_DOWN); 834 } 835 return (TRUE); 836 } 837 838 /* ARGSUSED */ 839 static void 840 mds_session_destroy(rfs4_entry_t u_entry) 841 { 842 mds_session_t *sp = (mds_session_t *)u_entry; 843 sess_bcsd_t *bsdp; 844 845 if (SN_CB_CHAN_EST(sp) && ((bsdp = CTOBSD(sp->sn_back)) != NULL)) 846 sltab_destroy(bsdp->bsd_stok); 847 848 /* 849 * XXX - A session can have multiple BC clnt handles that need 850 * to be discarded. mds_session_inval calls CLNT_DESTROY 851 * which will remove the CB client handle from the global 852 * list (cb_clnt_list) now. This will have to change once 853 * we manage the BC clnt handles per session. 854 */ 855 856 /* 857 * Remove the fore and back channels; we still 858 * need to drop all associated connections. (XXX) 859 */ 860 rfs41_destroy_session_channel(sp->sn_fore); 861 if (!sp->sn_bdrpc) 862 rfs41_destroy_session_channel(sp->sn_back); 863 864 /* 865 * Nuke slot replay cache for this session 866 */ 867 kmem_free(sp->sn_slrc, sizeof (rfs41_slrc_t)); 868 } 869 870 static bool_t 871 mds_session_expiry(rfs4_entry_t u_entry) 872 { 873 mds_session_t *sp = (mds_session_t *)u_entry; 874 875 if (sp == NULL || rfs4_dbe_is_invalid(sp->dbe)) 876 return (TRUE); 877 878 return (FALSE); 879 } 880 881 void 882 mds_kill_session_callout(rfs4_entry_t u_entry, void *arg) 883 { 884 rfs4_client_t *cp = (rfs4_client_t *)arg; 885 mds_session_t *sp = (mds_session_t *)u_entry; 886 887 if (sp->sn_clnt == cp && !(rfs4_dbe_is_invalid(sp->dbe))) 888 mds_session_destroy(u_entry); 889 } 890 891 void 892 mds_clean_up_sessions(rfs4_client_t *cp) 893 { 894 nfs_server_instance_t *instp; 895 896 instp = dbe_to_instp(cp->dbe); 897 898 if (instp->mds_session_tab != NULL) 899 rfs4_dbe_walk(instp->mds_session_tab, 900 mds_kill_session_callout, cp); 901 } 902 903 904 /* 905 * ----------------------------------------------- 906 * MDS: Layout tables. 907 * ----------------------------------------------- 908 * 909 */ 910 static uint32_t 911 mds_layout_hash(void *key) 912 { 913 return ((uint32_t)(uintptr_t)key); 914 } 915 916 static bool_t 917 mds_layout_compare(rfs4_entry_t entry, void *key) 918 { 919 mds_layout_t *lp = (mds_layout_t *)entry; 920 921 return (lp->layout_id == (int)(uintptr_t)key); 922 } 923 924 static void * 925 mds_layout_mkkey(rfs4_entry_t entry) 926 { 927 mds_layout_t *lp = (mds_layout_t *)entry; 928 929 return ((void *)(uintptr_t)lp->layout_id); 930 } 931 932 struct mds_gather_args { 933 struct mds_addlo_args lo_arg; 934 uint32_t dev_id; 935 ds_addr_t *dev_ptr[100]; 936 int max_devs_needed; 937 int dex; 938 }; 939 940 typedef struct { 941 uint32_t id; 942 nfsv4_1_file_layout_ds_addr4 *ds_addr4; 943 } mds_addmpd_t; 944 945 /* 946 * XXX: 947 * 948 * this of course should trigger a recall of the 949 * associated layouts for the mpd. 950 */ 951 void 952 mds_nuke_mpd(nfs_server_instance_t *instp, uint32_t mpd_id) 953 { 954 bool_t create = FALSE; 955 rfs4_entry_t e; 956 957 rw_enter(&instp->mds_mpd_lock, RW_WRITER); 958 if ((e = rfs4_dbsearch(instp->mds_mpd_idx, (void *)(uintptr_t)mpd_id, 959 &create, NULL, RFS4_DBS_VALID)) != NULL) { 960 rfs4_dbe_invalidate(e->dbe); 961 } 962 rw_exit(&instp->mds_mpd_lock); 963 } 964 965 void 966 mds_gather_devs(rfs4_entry_t entry, void *arg) 967 { 968 ds_addr_t *dp = (ds_addr_t *)entry; 969 struct mds_gather_args *gap = (struct mds_gather_args *)arg; 970 971 if (rfs4_dbe_skip_or_invalid(dp->dbe)) 972 return; 973 974 if (gap->dex < gap->max_devs_needed) { 975 gap->lo_arg.lo_devs[gap->dex] = rfs4_dbe_getid(dp->dbe); 976 gap->dev_ptr[gap->dex] = dp; 977 gap->dex++; 978 } 979 } 980 981 /* 982 */ 983 mds_mpd_t * 984 mds_gen_mpd(nfs_server_instance_t *instp, struct mds_gather_args *args) 985 { 986 nfsv4_1_file_layout_ds_addr4 ds_dev; 987 988 mds_addmpd_t map = { .id = 0, .ds_addr4 = &ds_dev }; 989 mds_mpd_t *mp; 990 uint_t len; 991 int ii; 992 uint32_t *sivp; 993 multipath_list4 *mplp; 994 995 /* 996 * build a nfsv4_1_file_layout_ds_addr4, encode it and 997 * cache it in state_store. 998 */ 999 len = args->dex; 1000 1001 /* allocate space for the indices */ 1002 sivp = ds_dev.nflda_stripe_indices.nflda_stripe_indices_val = 1003 kmem_zalloc(len * sizeof (uint32_t), KM_SLEEP); 1004 1005 ds_dev.nflda_stripe_indices.nflda_stripe_indices_len = len; 1006 1007 /* populate the stripe indices */ 1008 for (ii = 0; ii < len; ii++) 1009 sivp[ii] = ii; 1010 1011 /* 1012 * allocate space for the multipath_list4 (for now we just 1013 * have the one path) 1014 */ 1015 mplp = ds_dev.nflda_multipath_ds_list.nflda_multipath_ds_list_val = 1016 kmem_zalloc(len * sizeof (multipath_list4), KM_SLEEP); 1017 1018 ds_dev.nflda_multipath_ds_list.nflda_multipath_ds_list_len = len; 1019 1020 /* 1021 * Now populate the netaddrs using the stashed ds_addr 1022 * pointers 1023 */ 1024 for (ii = 0; ii < len; ii++) { 1025 ds_addr_t *dp; 1026 1027 mplp[ii].multipath_list4_len = 1; 1028 dp = args->dev_ptr[ii]; 1029 mplp[ii].multipath_list4_val = &dp->dev_addr; 1030 } 1031 1032 /* 1033 * Add the multipath_list4, this will encode and cache 1034 * the result. 1035 */ 1036 rw_enter(&instp->mds_mpd_lock, RW_WRITER); 1037 mp = (mds_mpd_t *)rfs4_dbcreate(instp->mds_mpd_idx, (void *)&map); 1038 rw_exit(&instp->mds_mpd_lock); 1039 1040 /* now clean up after yourself dear boy */ 1041 kmem_free(mplp, len * sizeof (multipath_list4)); 1042 kmem_free(sivp, len * sizeof (uint32_t)); 1043 return (mp); 1044 } 1045 1046 int mds_default_stripe = 32; 1047 int mds_max_lo_devs = 20; 1048 1049 mds_layout_t * 1050 mds_gen_default_layout(nfs_server_instance_t *instp, int max_devs_needed) 1051 { 1052 struct mds_gather_args args; 1053 mds_layout_t *lop; 1054 1055 bzero(&args, sizeof (args)); 1056 1057 args.max_devs_needed = MIN(max_devs_needed, 1058 MIN(mds_max_lo_devs, 99)); 1059 1060 rw_enter(&instp->ds_addr_lock, RW_READER); 1061 rfs4_dbe_walk(instp->ds_addr_tab, mds_gather_devs, &args); 1062 rw_exit(&instp->ds_addr_lock); 1063 1064 /* 1065 * if we didn't find any devices then we do no service 1066 */ 1067 if (args.dex == 0) 1068 return (NULL); 1069 1070 args.lo_arg.loid = 1; 1071 args.lo_arg.lo_stripe_unit = mds_default_stripe * 1024; 1072 1073 rw_enter(&instp->mds_layout_lock, RW_WRITER); 1074 lop = (mds_layout_t *)rfs4_dbcreate(instp->mds_layout_idx, 1075 (void *)&args); 1076 rw_exit(&instp->mds_layout_lock); 1077 1078 return (lop); 1079 } 1080 1081 void 1082 mds_nuke_layout(nfs_server_instance_t *instp, uint32_t layout_id) 1083 { 1084 bool_t create = FALSE; 1085 rfs4_entry_t e; 1086 1087 rw_enter(&instp->mds_layout_lock, RW_WRITER); 1088 if ((e = rfs4_dbsearch(instp->mds_layout_idx, 1089 (void *)(uintptr_t)layout_id, 1090 &create, 1091 NULL, 1092 RFS4_DBS_VALID)) != NULL) { 1093 rfs4_dbe_invalidate(e->dbe); 1094 } 1095 rw_exit(&instp->mds_layout_lock); 1096 } 1097 1098 /*ARGSUSED*/ 1099 static bool_t 1100 mds_layout_create(rfs4_entry_t u_entry, void *arg) 1101 { 1102 mds_layout_t *lp = (mds_layout_t *)u_entry; 1103 mds_mpd_t *mp; 1104 ds_addr_t *dp; 1105 struct mds_gather_args *gap = (struct mds_gather_args *)arg; 1106 struct mds_addlo_args *alop = &gap->lo_arg; 1107 1108 nfs_server_instance_t *instp; 1109 int i; 1110 1111 1112 if (alop->loid == 0) 1113 lp->layout_id = rfs4_dbe_getid(lp->dbe); 1114 else 1115 lp->layout_id = alop->loid; 1116 1117 instp = dbe_to_instp(lp->dbe); 1118 1119 lp->layout_type = LAYOUT4_NFSV4_1_FILES; 1120 lp->stripe_unit = alop->lo_stripe_unit; 1121 1122 for (i = 0; alop->lo_devs[i] && i < 100; i++) { 1123 lp->devs[i] = alop->lo_devs[i]; 1124 dp = mds_find_ds_addr(instp, alop->lo_devs[i]); 1125 /* lets hope this doesn't occur */ 1126 if (dp == NULL) 1127 return (FALSE); 1128 gap->dev_ptr[i] = dp; 1129 } 1130 1131 lp->stripe_count = i; 1132 1133 /* Need to generate a device for this layout */ 1134 mp = mds_gen_mpd(instp, gap); 1135 1136 /* save the dev_id save the world */ 1137 lp->dev_id = mp->mpd_id; 1138 1139 return (TRUE); 1140 } 1141 1142 /*ARGSUSED*/ 1143 static void 1144 mds_layout_destroy(rfs4_entry_t bugger) 1145 { 1146 } 1147 1148 void 1149 mds_add_layout(struct mds_addlo_args *lop) 1150 { 1151 bool_t create = FALSE; 1152 rfs4_entry_t e; 1153 1154 rw_enter(&mds_server->mds_layout_lock, RW_WRITER); 1155 1156 if ((e = rfs4_dbsearch(mds_server->mds_layout_idx, 1157 (void *)(uintptr_t)lop->loid, 1158 &create, 1159 NULL, 1160 RFS4_DBS_VALID)) != NULL) { 1161 /* 1162 * Must have already existed, so invalidate 1163 * the entry in order to create a new one. 1164 */ 1165 rfs4_dbe_invalidate(e->dbe); 1166 } 1167 1168 if (rfs4_dbcreate(mds_server->mds_layout_idx, (void *)lop) == NULL) { 1169 printf("mds_add_layout: failed\n"); 1170 (void) set_errno(EFAULT); 1171 } 1172 rw_exit(&mds_server->mds_layout_lock); 1173 return; 1174 1175 } 1176 1177 #define ADDRHASH(key) ((unsigned long)(key) >> 3) 1178 1179 /* 1180 * ----------------------------------------------- 1181 * MDS: Layout Grant tables. 1182 * ----------------------------------------------- 1183 * 1184 */ 1185 static uint32_t 1186 mds_layout_grant_hash(void *key) 1187 { 1188 mds_layout_grant_t *lgp = (mds_layout_grant_t *)key; 1189 1190 return (ADDRHASH(lgp->cp) ^ ADDRHASH(lgp->fp)); 1191 } 1192 1193 static bool_t 1194 mds_layout_grant_compare(rfs4_entry_t u_entry, void *key) 1195 { 1196 mds_layout_grant_t *lgp = (mds_layout_grant_t *)u_entry; 1197 mds_layout_grant_t *klgp = (mds_layout_grant_t *)key; 1198 1199 return (lgp->cp == klgp->cp && lgp->fp == klgp->fp); 1200 } 1201 1202 static void * 1203 mds_layout_grant_mkkey(rfs4_entry_t entry) 1204 { 1205 return (entry); 1206 } 1207 1208 static uint32_t 1209 mds_layout_grant_id_hash(void *key) 1210 { 1211 stateid_t *id = (stateid_t *)key; 1212 1213 return (id->v41_bits.state_ident); 1214 } 1215 1216 static bool_t 1217 mds_layout_grant_id_compare(rfs4_entry_t entry, void *key) 1218 { 1219 mds_layout_grant_t *lgp = (mds_layout_grant_t *)entry; 1220 stateid_t *id = (stateid_t *)key; 1221 bool_t rc; 1222 1223 if (id->v41_bits.type != LAYOUTID) 1224 return (FALSE); 1225 1226 rc = (lgp->lo_stateid.v41_bits.boottime == id->v41_bits.boottime && 1227 lgp->lo_stateid.v41_bits.state_ident == id->v41_bits.state_ident); 1228 1229 return (rc); 1230 } 1231 1232 static void * 1233 mds_layout_grant_id_mkkey(rfs4_entry_t entry) 1234 { 1235 mds_layout_grant_t *lgp = (mds_layout_grant_t *)entry; 1236 1237 return (&lgp->lo_stateid); 1238 } 1239 1240 struct mds_grant_args { 1241 mds_layout_t *lop; 1242 }; 1243 1244 1245 /*ARGSUSED*/ 1246 static bool_t 1247 mds_layout_grant_create(rfs4_entry_t u_entry, void *arg) 1248 { 1249 mds_layout_grant_t *lgp = (mds_layout_grant_t *)u_entry; 1250 rfs4_file_t *fp = ((mds_layout_grant_t *)arg)->fp; 1251 rfs4_client_t *cp = ((mds_layout_grant_t *)arg)->cp; 1252 1253 rfs4_dbe_hold(fp->dbe); 1254 rfs4_dbe_hold(cp->dbe); 1255 1256 lgp->lo_stateid = mds_create_stateid(lgp->dbe, LAYOUTID); 1257 lgp->fp = fp; 1258 lgp->cp = cp; 1259 1260 /* Init layout grant lists for remque/insque */ 1261 lgp->lo_grant_list.next = lgp->lo_grant_list.prev = 1262 &lgp->lo_grant_list; 1263 lgp->lo_grant_list.lgp = lgp; 1264 1265 lgp->clientgrantlist.next = lgp->clientgrantlist.prev = 1266 &lgp->clientgrantlist; 1267 lgp->clientgrantlist.lgp = lgp; 1268 1269 /* Insert the grant on the client's list */ 1270 rfs4_dbe_lock(cp->dbe); 1271 insque(&lgp->clientgrantlist, cp->clientgrantlist.prev); 1272 rfs4_dbe_unlock(cp->dbe); 1273 1274 /* Insert the grant on the file's list */ 1275 rfs4_dbe_lock(fp->dbe); 1276 insque(&lgp->lo_grant_list, fp->lo_grant_list.prev); 1277 rfs4_dbe_unlock(fp->dbe); 1278 1279 return (TRUE); 1280 } 1281 1282 /*ARGSUSED*/ 1283 static void 1284 mds_layout_grant_destroy(rfs4_entry_t foo) 1285 { 1286 } 1287 1288 mds_layout_grant_t * 1289 rfs41_findlogrant(struct compound_state *cs, rfs4_file_t *fp, 1290 rfs4_client_t *cp, bool_t *create) 1291 { 1292 mds_layout_grant_t lg, *lgp; 1293 1294 lg.cp = cp; 1295 lg.fp = fp; 1296 1297 lgp = (mds_layout_grant_t *)rfs4_dbsearch( 1298 cs->instp->mds_layout_grant_idx, &lg, create, &lg, RFS4_DBS_VALID); 1299 1300 return (lgp); 1301 } 1302 1303 void 1304 rfs41_lo_grant_rele(mds_layout_grant_t *lpg) 1305 { 1306 rfs4_dbe_rele(lpg->dbe); 1307 } 1308 1309 static void 1310 mds_do_lorecall(mds_lorec_t *lorec) 1311 { 1312 CB_COMPOUND4args cb4_args; 1313 CB_COMPOUND4res cb4_res; 1314 CB_SEQUENCE4args *cbsap; 1315 CB_LAYOUTRECALL4args *cblrap; 1316 nfs_cb_argop4 *argops; 1317 struct timeval timeout; 1318 enum clnt_stat call_stat = RPC_FAILED; 1319 int zilch = 0; 1320 layoutrecall_file4 *lorf; 1321 CLIENT *ch; 1322 int numops; 1323 int argsz; 1324 mds_session_t *sp; 1325 slot_ent_t *p; 1326 1327 DTRACE_PROBE1(nfssrv__i__sess_lorecall_fh, mds_lorec_t *, lorec); 1328 1329 if (lorec->lor_sess == NULL) 1330 return; 1331 sp = lorec->lor_sess; 1332 1333 /* 1334 * XXX - until we fix blasting _all_ sessions for one lorecall, 1335 * make sure that the session in question at least has the 1336 * back chan established. 1337 */ 1338 if (!SN_CB_CHAN_EST(sp)) 1339 return; 1340 1341 /* 1342 * set up the compound args 1343 */ 1344 numops = 2; /* CB_SEQUENCE + CB_LAYOUTRECALL */ 1345 argsz = numops * sizeof (nfs_cb_argop4); 1346 argops = kmem_zalloc(argsz, KM_SLEEP); 1347 1348 argops[0].argop = OP_CB_SEQUENCE; 1349 cbsap = &argops[0].nfs_cb_argop4_u.opcbsequence; 1350 1351 argops[1].argop = OP_CB_LAYOUTRECALL; 1352 cblrap = &argops[1].nfs_cb_argop4_u.opcblayoutrecall; 1353 1354 (void) str_to_utf8("cb_lo_recall", &cb4_args.tag); 1355 cb4_args.minorversion = CB4_MINOR_v1; 1356 1357 cb4_args.callback_ident = sp->sn_bc.progno; 1358 cb4_args.array_len = numops; 1359 cb4_args.array = argops; 1360 1361 cb4_res.tag.utf8string_val = NULL; 1362 cb4_res.array = NULL; 1363 1364 /* 1365 * CB_SEQUENCE 1366 */ 1367 bcopy(sp->sn_sessid, cbsap->csa_sessionid, sizeof (sessionid4)); 1368 p = svc_slot_alloc(sp); 1369 mutex_enter(&p->se_lock); 1370 cbsap->csa_slotid = p->se_sltno; 1371 cbsap->csa_sequenceid = p->se_seqid; 1372 cbsap->csa_highest_slotid = svc_slot_maxslot(sp); 1373 cbsap->csa_cachethis = FALSE; 1374 1375 /* no referring calling list for lo recall */ 1376 cbsap->csa_rcall_llen = 0; 1377 cbsap->csa_rcall_lval = NULL; 1378 mutex_exit(&p->se_lock); 1379 1380 /* 1381 * CB_LAYOUTRECALL 1382 * 1383 * clora_change: 1384 * 1: server prefers that client write modified data through 1385 * MDS when pushing modified data due to layout recall 1386 * 0: server has no DS/MDS preference 1387 */ 1388 cblrap->clora_type = LAYOUT4_NFSV4_1_FILES; 1389 cblrap->clora_iomode = LAYOUTIOMODE4_ANY; 1390 cblrap->clora_changed = 0; 1391 cblrap->clora_recall.lor_recalltype = lorec->lor_type; 1392 1393 switch (lorec->lor_type) { 1394 case LAYOUTRECALL4_FILE: 1395 lorf = &cblrap->clora_recall.layoutrecall4_u.lor_layout; 1396 lorf->lor_offset = 0; 1397 lorf->lor_length = ONES_64; 1398 lorf->lor_fh.nfs_fh4_len = lorec->lor_fh.fh_len; 1399 lorf->lor_fh.nfs_fh4_val = (char *)&lorec->lor_fh.fh_buf; 1400 break; 1401 1402 case LAYOUTRECALL4_FSID: 1403 cblrap->clora_recall.layoutrecall4_u.lor_fsid = lorec->lor_fsid; 1404 break; 1405 1406 case LAYOUTRECALL4_ALL: 1407 default: 1408 break; 1409 } 1410 1411 /* 1412 * Set up the timeout for the callback and make the actual call. 1413 * Timeout will be 80% of the lease period. 1414 */ 1415 timeout.tv_sec = 1416 (dbe_to_instp(lorec->lor_sess->dbe)->lease_period * 80) / 100; 1417 timeout.tv_usec = 0; 1418 1419 ch = rfs41_cb_getch(sp); 1420 (void) CLNT_CONTROL(ch, CLSET_XID, (char *)&zilch); 1421 call_stat = clnt_call(ch, CB_COMPOUND, 1422 xdr_CB_COMPOUND4args_srv, (caddr_t)&cb4_args, 1423 xdr_CB_COMPOUND4res, (caddr_t)&cb4_res, timeout); 1424 rfs41_cb_freech(sp, ch); 1425 1426 if (call_stat != RPC_SUCCESS) { 1427 /* 1428 * XXX same checks as cb_recall; 1429 * a) do we want to retry ? 1430 * b) how can we tell layout still "delegated" 1431 * c) how much time do we wait before cb_path_down ? 1432 * lease period ? 1433 */ 1434 cmn_err(CE_NOTE, "r41_lo_recall: RPC call failed"); 1435 goto done; 1436 1437 } else if (cb4_res.status != NFS4_OK) { 1438 /* 1439 * XXX check protocol errors. This may be where we 1440 * detect the LAYOUTRECALL / LAYOUTRETURN race 1441 */ 1442 cmn_err(CE_NOTE, "r41_lo_recall: status != NFS4_OK"); 1443 1444 } 1445 svc_slot_cb_seqid(&cb4_res, p); 1446 done: 1447 kmem_free(lorec, sizeof (mds_lorec_t)); 1448 svc_slot_free(sp, p); 1449 } 1450 1451 static void 1452 mds_sess_lorecall_callout(rfs4_entry_t u_entry, void *arg) 1453 { 1454 mds_lorec_t *lorec; 1455 1456 lorec = kmem_alloc(sizeof (mds_lorec_t), KM_SLEEP); 1457 bcopy(arg, lorec, sizeof (mds_lorec_t)); 1458 lorec->lor_sess = (mds_session_t *)u_entry; 1459 1460 (void) thread_create(NULL, 0, mds_do_lorecall, lorec, 0, &p0, 1461 TS_RUN, minclsyspri); 1462 } 1463 1464 void 1465 inst_lorecall(nfs_server_instance_t *instp, void *args) 1466 { 1467 if (instp->mds_session_tab != NULL) 1468 rfs4_dbe_walk(instp->mds_session_tab, 1469 mds_sess_lorecall_callout, args); 1470 } 1471 1472 /* 1473 * Recall a layout: 1474 * 1475 * Either all layouts 1476 * 1477 * ... or 1478 * 1479 * For a given pathname construct FH first (same thing we do 1480 * for nfs_sys(GETFH)) args have already been copied into kernel 1481 * adspace 1482 */ 1483 static int 1484 mds_lorecall_cmd(struct mds_reclo_args *args, cred_t *cr) 1485 { 1486 int error; 1487 nfs_fh4 fh4; 1488 struct exportinfo *exi; 1489 mds_lorec_t lorec; 1490 vnode_t *vp = NULL, *dvp = NULL; 1491 1492 /* 1493 * XXX - This code works for only one clientid. The code 1494 * blasts layout recalls to all sessions in the dbe 1495 * database. We either need to keep an outstanding 1496 * layout list per clientid or have some way to find 1497 * per-FSID and per-CLIENT layouts efficiently. 1498 */ 1499 if ((args->lo_type != LAYOUTRECALL4_FILE) && 1500 (args->lo_type != LAYOUTRECALL4_FSID) && 1501 (args->lo_type != LAYOUTRECALL4_ALL)) { 1502 return (EINVAL); 1503 } 1504 lorec.lor_type = args->lo_type; 1505 1506 if (lorec.lor_type == LAYOUTRECALL4_ALL) { 1507 nsi_walk(inst_lorecall, &lorec); 1508 return (0); 1509 } 1510 error = lookupname(args->lo_fname, UIO_SYSSPACE, FOLLOW, &dvp, &vp); 1511 if (!error && vp == NULL) { 1512 /* 1513 * Last component of fname not found 1514 */ 1515 if (dvp != NULL) 1516 VN_RELE(dvp); 1517 error = ENOENT; 1518 } 1519 if (error) 1520 return (error); 1521 1522 /* 1523 * 'vp' may be an AUTOFS node, so we perform a 1524 * VOP_ACCESS() to trigger the mount of the 1525 * intended filesystem, so we can share the intended 1526 * filesystem instead of the AUTOFS filesystem. 1527 */ 1528 (void) VOP_ACCESS(vp, 0, 0, cr, NULL); 1529 1530 /* 1531 * We're interested in the top most filesystem. 1532 * This is specially important when uap->dname is a trigger 1533 * AUTOFS node, since we're really interested in sharing the 1534 * filesystem AUTOFS mounted as result of the VOP_ACCESS() 1535 * call not the AUTOFS node itself. 1536 */ 1537 if (vn_mountedvfs(vp) != NULL) { 1538 if (error = traverse(&vp)) 1539 goto errout; 1540 } 1541 1542 /* 1543 * The last arg for nfs_vptoexi says to create a v4 FH (instead of v3). 1544 * This will need to be changed to select the new MDS FH format. 1545 */ 1546 rw_enter(&exported_lock, RW_READER); 1547 exi = nfs_vptoexi(dvp, vp, cr, NULL, &error, TRUE); 1548 rw_exit(&exported_lock); 1549 1550 /* 1551 * file isn't shared. 1552 */ 1553 if (exi == NULL) 1554 goto errout; 1555 1556 1557 fh4.nfs_fh4_val = lorec.lor_fh.fh_buf; 1558 error = mknfs41_fh(&fh4, vp, exi); 1559 lorec.lor_fh.fh_len = fh4.nfs_fh4_len; 1560 lorec.lor_sess = NULL; 1561 1562 /* 1563 * set fsid just like rfs4_fattr4_fsid() 1564 */ 1565 if (exi->exi_volatile_dev) { 1566 int *pmaj = (int *)&lorec.lor_fsid.major; 1567 1568 pmaj[0] = exi->exi_fsid.val[0]; 1569 pmaj[1] = exi->exi_fsid.val[1]; 1570 lorec.lor_fsid.minor = 0; 1571 } else { 1572 vattr_t va; 1573 1574 va.va_mask = AT_FSID | AT_TYPE; 1575 error = rfs4_vop_getattr(vp, &va, 0, cr); 1576 1577 if (error == 0 && va.va_type != VREG) 1578 error = EINVAL; 1579 if (error) 1580 goto errout; 1581 1582 lorec.lor_fsid.major = getmajor(va.va_fsid); 1583 lorec.lor_fsid.minor = getminor(va.va_fsid); 1584 } 1585 1586 /* 1587 * JFB (just for bakeoff): simply push layout recall 1588 * to the back chan of every session. The "real" code 1589 * will first find the rfs4_file_t using the FH created 1590 * above, and the file struct will refer to the layout. 1591 * Either the layout struct will contain a list of 1592 * rfs4_client_t structs granted the layout or another 1593 * table/index will be created exist to associate a 1594 * layout with the set of clients granted the layout. 1595 */ 1596 if (!error) 1597 nsi_walk(inst_lorecall, &lorec); 1598 errout: 1599 VN_RELE(vp); 1600 if (dvp != NULL) 1601 VN_RELE(dvp); 1602 return (error); 1603 } 1604 1605 /* 1606 * ----------------------------------------------- 1607 * MDS: DS_ADDR tables. 1608 * ----------------------------------------------- 1609 * 1610 */ 1611 1612 static uint32_t 1613 ds_addr_hash(void *key) 1614 { 1615 return ((uint32_t)(uintptr_t)key); 1616 } 1617 1618 static bool_t 1619 ds_addr_compare(rfs4_entry_t entry, void *key) 1620 { 1621 ds_addr_t *dp = (ds_addr_t *)entry; 1622 1623 return (rfs4_dbe_getid(dp->dbe) == (int)(uintptr_t)key); 1624 } 1625 1626 static void * 1627 ds_addr_mkkey(rfs4_entry_t entry) 1628 { 1629 ds_addr_t *dp = (ds_addr_t *)entry; 1630 1631 return ((void *)(uintptr_t)rfs4_dbe_getid(dp->dbe)); 1632 } 1633 1634 /*ARGSUSED*/ 1635 static bool_t 1636 ds_addr_create(rfs4_entry_t u_entry, void *arg) 1637 { 1638 ds_addr_t *dp = (ds_addr_t *)u_entry; 1639 struct mds_adddev_args *u_dp = (struct mds_adddev_args *)arg; 1640 1641 dp->dev_addr.na_r_netid = u_dp->dev_netid; 1642 dp->dev_addr.na_r_addr = u_dp->dev_addr; 1643 dp->ds_owner = NULL; 1644 dp->dev_knc = NULL; 1645 dp->dev_nb = NULL; 1646 return (TRUE); 1647 } 1648 1649 1650 /*ARGSUSED*/ 1651 static void 1652 ds_addr_destroy(rfs4_entry_t foo) 1653 { 1654 ds_addr_t *dp = (ds_addr_t *)foo; 1655 1656 if (dp->dev_knc != NULL) 1657 kmem_free(dp->dev_knc, sizeof (struct knetconfig)); 1658 if (dp->dev_nb != NULL) { 1659 if (dp->dev_nb->buf) 1660 kmem_free(dp->dev_nb->buf, dp->dev_nb->maxlen); 1661 kmem_free(dp->dev_nb, sizeof (struct netbuf)); 1662 } 1663 } 1664 1665 1666 /* 1667 * Multipath devices. 1668 */ 1669 static uint32_t 1670 mds_mpd_hash(void *key) 1671 { 1672 return ((uint32_t)(uintptr_t)key); 1673 } 1674 1675 static bool_t 1676 mds_mpd_compare(rfs4_entry_t entry, void *key) 1677 { 1678 mds_mpd_t *dp = (mds_mpd_t *)entry; 1679 1680 return (dp->mpd_id == (uint32_t)(uintptr_t)key); 1681 } 1682 1683 static void * 1684 mds_mpd_mkkey(rfs4_entry_t entry) 1685 { 1686 mds_mpd_t *dp = (mds_mpd_t *)entry; 1687 1688 return ((void*)(uintptr_t)dp->mpd_id); 1689 } 1690 1691 void 1692 mds_mpd_encode(nfsv4_1_file_layout_ds_addr4 *ds_dev, uint_t *len, char **val) 1693 { 1694 char *xdr_ds_dev; 1695 int xdr_size = 0; 1696 XDR xdr; 1697 1698 ASSERT(val); 1699 1700 xdr_size = xdr_sizeof(xdr_nfsv4_1_file_layout_ds_addr4, ds_dev); 1701 1702 ASSERT(xdr_size); 1703 1704 xdr_ds_dev = kmem_alloc(xdr_size, KM_SLEEP); 1705 1706 xdrmem_create(&xdr, xdr_ds_dev, xdr_size, XDR_ENCODE); 1707 1708 if (xdr_nfsv4_1_file_layout_ds_addr4(&xdr, ds_dev) == FALSE) { 1709 *len = 0; 1710 *val = NULL; 1711 /* don't leak ! */ 1712 kmem_free(xdr_ds_dev, xdr_size); 1713 return; 1714 } 1715 1716 *len = xdr_size; 1717 *val = xdr_ds_dev; 1718 } 1719 1720 /*ARGSUSED*/ 1721 static bool_t 1722 mds_mpd_create(rfs4_entry_t u_entry, void *arg) 1723 { 1724 mds_mpd_t *dp = (mds_mpd_t *)u_entry; 1725 mds_addmpd_t *maap = (mds_addmpd_t *)arg; 1726 1727 dp->mpd_id = maap->id; 1728 mds_mpd_encode(maap->ds_addr4, &(dp->mpd_encoded_len), 1729 &(dp->mpd_encoded_val)); 1730 1731 return (TRUE); 1732 } 1733 1734 1735 /*ARGSUSED*/ 1736 static void 1737 mds_mpd_destroy(rfs4_entry_t foo) 1738 { 1739 } 1740 1741 /* 1742 * The OTW device id is 128bits in length, we however are 1743 * still using a uint_32 internally. 1744 */ 1745 mds_mpd_t * 1746 mds_find_mpd(nfs_server_instance_t *instp, uint32_t id) 1747 { 1748 mds_mpd_t *dp; 1749 bool_t create = FALSE; 1750 1751 dp = (mds_mpd_t *)rfs4_dbsearch(instp->mds_mpd_idx, 1752 (void *)(uintptr_t)id, &create, NULL, RFS4_DBS_VALID); 1753 return (dp); 1754 } 1755 1756 /* 1757 * Plop a uint32 into the 128bit OTW deviceid 1758 */ 1759 void 1760 mds_set_deviceid(uint32_t did, deviceid4 *otw_id) 1761 { 1762 ba_devid_t d; 1763 1764 bzero(&d, sizeof (d)); 1765 d.i.did = did; 1766 bcopy(&d, otw_id, sizeof (d)); 1767 } 1768 1769 /* 1770 * Used by the walker to populate the deviceid list. 1771 */ 1772 void 1773 mds_mpd_list(rfs4_entry_t entry, void *arg) 1774 { 1775 mds_mpd_t *dp = (mds_mpd_t *)entry; 1776 mds_device_list_t *mdl = (mds_device_list_t *)arg; 1777 1778 deviceid4 *dlip; 1779 1780 /* 1781 * If this entry is invalid or we should skip it 1782 * go to the next one.. 1783 */ 1784 if (rfs4_dbe_skip_or_invalid(dp->dbe)) 1785 return; 1786 1787 dlip = &(mdl->dl[mdl->count]); 1788 1789 mds_set_deviceid(dp->mpd_id, dlip); 1790 1791 /* 1792 * bump to the next devlist_item4 1793 */ 1794 mdl->count++; 1795 } 1796 1797 ds_addr_t * 1798 mds_find_ds_addr_by_uaddr(nfs_server_instance_t *instp, char *ptr) 1799 { 1800 ds_addr_t *dp; 1801 bool_t create = FALSE; 1802 1803 dp = (ds_addr_t *)rfs4_dbsearch(instp->ds_addr_uaddr_idx, 1804 (void *)ptr, &create, NULL, RFS4_DBS_VALID); 1805 return (dp); 1806 } 1807 1808 1809 ds_addr_t * 1810 mds_find_ds_addr(nfs_server_instance_t *instp, uint32_t id) 1811 { 1812 ds_addr_t *dp; 1813 bool_t create = FALSE; 1814 1815 dp = (ds_addr_t *)rfs4_dbsearch(instp->ds_addr_idx, 1816 (void *)(uintptr_t)id, &create, NULL, RFS4_DBS_VALID); 1817 return (dp); 1818 } 1819 1820 1821 /* 1822 */ 1823 static uint32_t 1824 mds_str_hash(void *key) 1825 { 1826 char *addr = (char *)key; 1827 int i; 1828 uint32_t hash = 0; 1829 1830 for (i = 0; addr[i]; i++) { 1831 hash <<= 1; 1832 hash += (uint_t)addr[i]; 1833 } 1834 1835 return (hash); 1836 } 1837 1838 1839 static void * 1840 ds_addr_uaddr_mkkey(rfs4_entry_t entry) 1841 { 1842 ds_addr_t *dp = (ds_addr_t *)entry; 1843 1844 return (dp->dev_addr.na_r_addr); 1845 } 1846 1847 static int 1848 ds_addr_uaddr_compare(rfs4_entry_t entry, void *key) 1849 { 1850 ds_addr_t *dp = (ds_addr_t *)entry; 1851 char *addr_key = (char *)key; 1852 1853 return (strcmp(addr_key, dp->dev_addr.na_r_addr) == 0); 1854 } 1855 1856 1857 /* 1858 * Data-server information (ds_owner) tables and indexes. 1859 */ 1860 static uint32_t 1861 ds_owner_hash(void *key) 1862 { 1863 return ((uint32_t)(uintptr_t)key); 1864 } 1865 1866 static bool_t 1867 ds_owner_compare(rfs4_entry_t entry, void *key) 1868 { 1869 ds_owner_t *dop = (ds_owner_t *)entry; 1870 1871 return (dop->ds_id == (int)(uintptr_t)key); 1872 1873 } 1874 1875 static void * 1876 ds_owner_mkkey(rfs4_entry_t entry) 1877 { 1878 ds_owner_t *dop = (ds_owner_t *)entry; 1879 1880 return ((void *)(uintptr_t)dop->ds_id); 1881 } 1882 1883 static bool_t 1884 ds_owner_inst_compare(rfs4_entry_t entry, void *key) 1885 { 1886 ds_owner_t *dop = (ds_owner_t *)entry; 1887 1888 return (strcmp(dop->identity, key) == 0); 1889 1890 } 1891 1892 static void * 1893 ds_owner_inst_mkkey(rfs4_entry_t entry) 1894 { 1895 ds_owner_t *dop = (ds_owner_t *)entry; 1896 return (dop->identity); 1897 } 1898 1899 1900 /*ARGSUSED*/ 1901 static bool_t 1902 ds_owner_create(rfs4_entry_t u_entry, void *arg) 1903 { 1904 ds_owner_t *dop = (ds_owner_t *)u_entry; 1905 DS_EXIBIargs *drap = (DS_EXIBIargs *)arg; 1906 1907 dop->ds_id = rfs4_dbe_getid(dop->dbe); 1908 dop->verifier = drap->ds_ident.boot_verifier; 1909 dop->identity = kstrdup(drap->ds_ident.instance.instance_val); 1910 list_create(&dop->ds_addr_list, sizeof (ds_addr_t), 1911 offsetof(ds_addr_t, ds_addr_next)); 1912 list_create(&dop->ds_guid_list, sizeof (ds_guid_info_t), 1913 offsetof(ds_guid_info_t, ds_guid_next)); 1914 return (TRUE); 1915 } 1916 1917 ds_owner_t * 1918 ds_owner_alloc(DS_EXIBIargs *drap) 1919 { 1920 ds_owner_t *dop; 1921 1922 rw_enter(&mds_server->ds_owner_lock, RW_WRITER); 1923 /* Add the "new" entry */ 1924 dop = (ds_owner_t *)rfs4_dbcreate(mds_server->ds_owner_inst_idx, 1925 (void *)drap); 1926 rw_exit(&mds_server->ds_owner_lock); 1927 return (dop); 1928 } 1929 1930 void 1931 ds_guid_free(ds_guid_t *gp) 1932 { 1933 if (gp == NULL) 1934 return; 1935 1936 /* 1937 * Yes, overkill for one stor_type, but ready 1938 * to go for more! 1939 */ 1940 switch (gp->stor_type) { 1941 case ZFS: 1942 kmem_free(gp->ds_guid_u.zfsguid.zfsguid_val, 1943 gp->ds_guid_u.zfsguid.zfsguid_len); 1944 break; 1945 } 1946 } 1947 1948 /* 1949 * Duplicate the src guid to dst. 1950 * 1951 * return 0 on success or 1 for failure. 1952 */ 1953 int 1954 ds_guid_dup(ds_guid_t *src, ds_guid_t *dst) 1955 { 1956 dst = src; 1957 1958 switch (dst->stor_type) { 1959 case ZFS: 1960 dst->ds_guid_u.zfsguid.zfsguid_val 1961 = kmem_alloc(dst->ds_guid_u.zfsguid.zfsguid_len, KM_SLEEP); 1962 bcopy(src->ds_guid_u.zfsguid.zfsguid_val, 1963 dst->ds_guid_u.zfsguid.zfsguid_val, 1964 dst->ds_guid_u.zfsguid.zfsguid_len); 1965 break; 1966 default: 1967 /* if it's unknown zero out the dst */ 1968 bzero(dst, sizeof (ds_guid_t)); 1969 return (1); 1970 1971 } 1972 return (0); 1973 } 1974 1975 /* 1976 * compare ds_guids return 0 for not the same or 1977 * 1 if they are equal.. 1978 */ 1979 int 1980 ds_guid_compare(ds_guid_t *gp1, ds_guid_t *gp2) 1981 { 1982 if (gp1->stor_type != gp2->stor_type) 1983 return (0); 1984 1985 switch (gp1->stor_type) { 1986 case ZFS: 1987 if (gp1->ds_guid_u.zfsguid.zfsguid_len != 1988 gp2->ds_guid_u.zfsguid.zfsguid_len) 1989 return (0); 1990 if (bcmp(gp1->ds_guid_u.zfsguid.zfsguid_val, 1991 gp2->ds_guid_u.zfsguid.zfsguid_val, 1992 gp2->ds_guid_u.zfsguid.zfsguid_len) != 0) 1993 return (0); 1994 break; 1995 1996 default: 1997 return (0); 1998 } 1999 return (1); 2000 } 2001 2002 void 2003 mds_free_zfsattr(ds_guid_info_t *dst) 2004 { 2005 int i; 2006 2007 if (dst->ds_attr_len == 0) 2008 return; 2009 2010 for (i = 0; i < dst->ds_attr_len; i++) { 2011 2012 UTF8STRING_FREE(dst->ds_attr_val[i].attrname); 2013 kmem_free(dst->ds_attr_val[i].attrvalue.attrvalue_val, 2014 dst->ds_attr_val[i].attrvalue.attrvalue_len); 2015 } 2016 } 2017 2018 void 2019 mds_dup_zfsattr(ds_zfsattr *src, ds_guid_info_t *dst) 2020 { 2021 int i; 2022 int len; 2023 2024 for (i = 0; i < dst->ds_attr_len; i++) { 2025 2026 dst->ds_attr_val[i].attrname.utf8string_val = 2027 kmem_alloc(dst->ds_attr_val[i].attrname.utf8string_len, 2028 KM_SLEEP); 2029 2030 bcopy(src->attrname.utf8string_val, 2031 dst->ds_attr_val[i].attrname.utf8string_val, 2032 dst->ds_attr_val[i].attrname.utf8string_len); 2033 2034 len = dst->ds_attr_val[i].attrvalue.attrvalue_len = 2035 src->attrvalue.attrvalue_len; 2036 2037 dst->ds_attr_val[i].attrvalue.attrvalue_val 2038 = kmem_alloc(len, KM_SLEEP); 2039 2040 bcopy(src->attrvalue.attrvalue_val, 2041 dst->ds_attr_val[i].attrvalue.attrvalue_val, len); 2042 } 2043 } 2044 2045 /* 2046 */ 2047 /*ARGSUSED*/ 2048 static bool_t 2049 ds_guid_info_create(rfs4_entry_t e, void *arg) 2050 { 2051 pinfo_create_t *p = (pinfo_create_t *)arg; 2052 ds_guid_info_t *pip = (ds_guid_info_t *)e; 2053 2054 pip->ds_ownerp = p->dop; 2055 2056 /* Only supported type is ZFS */ 2057 ASSERT(p->si->type == ZFS); 2058 2059 pip->ds_guid = p->si->ds_storinfo_u.zfs_info.guid_map.ds_guid; 2060 2061 pip->ds_attr_len = p->si->ds_storinfo_u.zfs_info.attrs.attrs_len; 2062 pip->ds_attr_val = kmem_alloc( 2063 sizeof (ds_zfsattr) * pip->ds_attr_len, KM_SLEEP); 2064 mds_dup_zfsattr(p->si->ds_storinfo_u.zfs_info.attrs.attrs_val, pip); 2065 2066 return (TRUE); 2067 } 2068 2069 static void * 2070 ds_guid_info_mkkey(rfs4_entry_t e) 2071 { 2072 ds_guid_info_t *gip = (ds_guid_info_t *)e; 2073 2074 return ((void *)(uintptr_t)&gip->ds_guid); 2075 } 2076 2077 static bool_t 2078 ds_guid_info_compare(rfs4_entry_t e, void *key) 2079 { 2080 ds_guid_info_t *gip = (ds_guid_info_t *)e; 2081 ds_guid_t *guid = (ds_guid_t *)key; 2082 2083 return (ds_guid_compare(&gip->ds_guid, guid)); 2084 } 2085 2086 static uint32_t 2087 ds_guid_info_hash(void *key) 2088 { 2089 return ((uint32_t)(uintptr_t)key); 2090 } 2091 /*ARGSUSED*/ 2092 static void 2093 ds_guid_info_destroy(rfs4_entry_t e) 2094 { 2095 ds_guid_info_t *gip = (ds_guid_info_t *)e; 2096 ds_guid_free(&gip->ds_guid); 2097 mds_free_zfsattr(gip); 2098 } 2099 2100 /*ARGSUSED*/ 2101 static void 2102 ds_owner_destroy(rfs4_entry_t foo) 2103 { 2104 } 2105 /* 2106 * XXX this should be populated during startup. we 2107 * XXX should get the data from stable store. For now 2108 * XXX we are just going to keep the map that the DS 2109 * XXX provides us.. 2110 */ 2111 /*ARGSUSED*/ 2112 static bool_t 2113 mds_mapzap_create(nfs_server_instance_t *instp, 2114 rfs4_entry_t e, void *arg) 2115 { 2116 mds_mapzap_t *mzp = (mds_mapzap_t *)e; 2117 2118 mzp->ds_map = *(ds_guid_map_t *)arg; 2119 /* write to disk */ 2120 return (TRUE); 2121 } 2122 2123 static void * 2124 mds_mapzap_mkkey(rfs4_entry_t e) 2125 { 2126 mds_mapzap_t *mzp = (mds_mapzap_t *)e; 2127 2128 return ((void *)(uintptr_t)&mzp->ds_map.ds_guid); 2129 } 2130 2131 2132 static bool_t 2133 mds_mapzap_compare(rfs4_entry_t e, void *key) 2134 { 2135 mds_mapzap_t *mzp = (mds_mapzap_t *)e; 2136 ds_guid_t *gp = (ds_guid_t *)key; 2137 2138 return ((bool_t)ds_guid_compare(&mzp->ds_map.ds_guid, gp)); 2139 2140 } 2141 2142 static uint32_t 2143 mds_mapzap_hash(void *key) 2144 { 2145 return ((uint32_t)(uintptr_t)key); 2146 } 2147 2148 /*ARGSUSED*/ 2149 static void 2150 mds_mapzap_destroy(rfs4_entry_t foo) 2151 { 2152 } 2153 2154 /* 2155 * Used to initialize the NFSv4.1 server's state. 2156 * All of the tables are created and timers are set. 2157 */ 2158 2159 int 2160 sstor_init(nfs_server_instance_t *instp, int def_persona, int def_reap) 2161 { 2162 /* 2163 * If the server state store has already been initialized, 2164 * skip it 2165 */ 2166 mutex_enter(&instp->state_lock); 2167 if (instp->state_store != NULL) { 2168 mutex_exit(&instp->state_lock); 2169 return (0); 2170 } 2171 2172 /* 2173 * Set the boot time. If the server has been restarted quickly 2174 * and has had the opportunity to service clients, then the start_time 2175 * needs to be bumped regardless. A small window but it exists... 2176 */ 2177 if (instp->start_time != gethrestime_sec()) 2178 instp->start_time = gethrestime_sec(); 2179 else 2180 instp->start_time++; 2181 2182 /* 2183 * If a table does not have a specific reap time, 2184 * this value is used. 2185 */ 2186 instp->reap_time = def_reap * rfs4_lease_time; 2187 instp->default_persona = def_persona; 2188 2189 instp->state_store = rfs4_database_create(); 2190 instp->state_store->instp = instp; 2191 2192 /* reset the "first NFSv4 request" status */ 2193 instp->seen_first_compound = 0; 2194 instp->exi_clean_func = NULL; 2195 2196 return (1); 2197 } 2198 2199 /* 2200 * Create/init just the session stateStore tables. 2201 * used for data-server 2202 */ 2203 void 2204 ds_sstor_init(nfs_server_instance_t *instp) 2205 { 2206 /* 2207 * Client table. 2208 */ 2209 rw_init(&instp->findclient_lock, NULL, RW_DEFAULT, NULL); 2210 2211 instp->client_tab = rfs4_table_create( 2212 instp, "Client", instp->client_cache_time, 2, 2213 rfs4_client_create, rfs4_client_destroy, rfs4_client_expiry, 2214 sizeof (rfs4_client_t), TABSIZE, MAXTABSZ/8, 100); 2215 2216 instp->nfsclnt_idx = rfs4_index_create(instp->client_tab, 2217 "nfs_client_id4", nfsclnt_hash, nfsclnt_compare, nfsclnt_mkkey, 2218 TRUE); 2219 2220 instp->clientid_idx = rfs4_index_create(instp->client_tab, 2221 "client_id", clientid_hash, clientid_compare, clientid_mkkey, 2222 FALSE); 2223 2224 /* 2225 * Session table. 2226 */ 2227 rw_init(&instp->findsession_lock, NULL, RW_DEFAULT, NULL); 2228 2229 instp->mds_session_tab = rfs4_table_create(instp, 2230 "Session", instp->reap_time, 2, mds_session_create, 2231 mds_session_destroy, mds_do_not_expire, sizeof (mds_session_t), 2232 MDS_TABSIZE, MDS_MAXTABSZ/8, 100); 2233 2234 instp->mds_session_idx = rfs4_index_create(instp->mds_session_tab, 2235 "session_idx", sessid_hash, sessid_compare, sessid_mkkey, TRUE); 2236 2237 instp->mds_sess_clientid_idx = rfs4_index_create(instp->mds_session_tab, 2238 "sess_clnt_idx", clientid_hash, sess_clid_compare, sess_clid_mkkey, 2239 FALSE); 2240 } 2241 2242 /* 2243 * Used to initialize the NFSv4.1 server's state. 2244 * All of the tables are created and timers are set. 2245 */ 2246 void 2247 mds_sstor_init(nfs_server_instance_t *instp) 2248 { 2249 extern rfs4_cbstate_t mds_cbcheck(rfs4_state_t *); 2250 int need_sstor_init; 2251 2252 /* 2253 * Create the state store and set the 2254 * start-up time. 2255 */ 2256 need_sstor_init = sstor_init(instp, FH41_TYPE_NFS, 60); 2257 2258 if (need_sstor_init == 0) 2259 return; 2260 2261 instp->deleg_cbrecall = mds_do_cb_recall; 2262 instp->deleg_cbcheck = mds_cbcheck; 2263 2264 /* 2265 * Now create the common tables and indexes 2266 */ 2267 v4prot_sstor_init(instp); 2268 2269 rw_init(&instp->mds_mpd_lock, NULL, RW_DEFAULT, NULL); 2270 rw_init(&instp->ds_addr_lock, NULL, RW_DEFAULT, NULL); 2271 rw_init(&instp->ds_guid_info_lock, NULL, RW_DEFAULT, NULL); 2272 2273 /* 2274 * Session table. 2275 */ 2276 rw_init(&instp->findsession_lock, NULL, RW_DEFAULT, NULL); 2277 2278 instp->mds_session_tab = rfs4_table_create(instp, 2279 "Session", instp->reap_time, 2, mds_session_create, 2280 mds_session_destroy, mds_do_not_expire, sizeof (mds_session_t), 2281 MDS_TABSIZE, MDS_MAXTABSZ/8, 100); 2282 2283 instp->mds_session_idx = rfs4_index_create(instp->mds_session_tab, 2284 "session_idx", sessid_hash, sessid_compare, sessid_mkkey, TRUE); 2285 2286 instp->mds_sess_clientid_idx = rfs4_index_create(instp->mds_session_tab, 2287 "sess_clnt_idx", clientid_hash, sess_clid_compare, sess_clid_mkkey, 2288 FALSE); 2289 2290 /* 2291 * pNFS layout table. 2292 */ 2293 rw_init(&instp->mds_layout_lock, NULL, RW_DEFAULT, NULL); 2294 2295 instp->mds_layout_tab = rfs4_table_create(instp, 2296 "Layout", instp->reap_time, 2, mds_layout_create, 2297 mds_layout_destroy, 2298 mds_do_not_expire, sizeof (mds_layout_t), MDS_TABSIZE, 2299 MDS_MAXTABSZ, 100); 2300 2301 instp->mds_layout_idx = rfs4_index_create(instp->mds_layout_tab, 2302 "layout-idx", mds_layout_hash, mds_layout_compare, mds_layout_mkkey, 2303 TRUE); 2304 2305 /* 2306 * Create the layout_grant table. 2307 * 2308 * This table tracks the layout segments that have been granted 2309 * to clients. It is indexed by the layout state_id and also by client. 2310 */ 2311 instp->mds_layout_grant_tab = rfs4_table_create(instp, 2312 "Layout_grant", instp->reap_time, 2, mds_layout_grant_create, 2313 mds_layout_grant_destroy, mds_do_not_expire, 2314 sizeof (mds_layout_grant_t), MDS_TABSIZE, MDS_MAXTABSZ, 100); 2315 2316 instp->mds_layout_grant_idx = 2317 rfs4_index_create(instp->mds_layout_grant_tab, 2318 "layout-grant-idx", mds_layout_grant_hash, mds_layout_grant_compare, 2319 mds_layout_grant_mkkey, TRUE); 2320 2321 instp->mds_layout_grant_ID_idx = 2322 rfs4_index_create(instp->mds_layout_grant_tab, 2323 "layout-grant-ID-idx", mds_layout_grant_id_hash, 2324 mds_layout_grant_id_compare, mds_layout_grant_id_mkkey, FALSE); 2325 2326 /* 2327 * Data server addresses. 2328 */ 2329 instp->ds_addr_tab = rfs4_table_create(instp, 2330 "DSaddr", instp->reap_time, 3, ds_addr_create, 2331 ds_addr_destroy, mds_do_not_expire, sizeof (ds_addr_t), 2332 MDS_TABSIZE, MDS_MAXTABSZ, 200); 2333 2334 instp->ds_addr_idx = rfs4_index_create(instp->ds_addr_tab, 2335 "dsaddr-idx", ds_addr_hash, ds_addr_compare, 2336 ds_addr_mkkey, TRUE); 2337 2338 instp->ds_addr_uaddr_idx = rfs4_index_create(instp->ds_addr_tab, 2339 "dsaddr-uaddr-idx", mds_str_hash, ds_addr_uaddr_compare, 2340 ds_addr_uaddr_mkkey, FALSE); 2341 2342 /* 2343 * Multipath Device table. 2344 */ 2345 instp->mds_mpd_tab = rfs4_table_create(instp, 2346 "mpd", instp->reap_time, 3, mds_mpd_create, mds_mpd_destroy, 2347 mds_do_not_expire, sizeof (mds_mpd_t), MDS_TABSIZE, 2348 MDS_MAXTABSZ, 200); 2349 2350 instp->mds_mpd_idx = rfs4_index_create(instp->mds_mpd_tab, 2351 "mpd-idx", mds_mpd_hash, mds_mpd_compare, mds_mpd_mkkey, TRUE); 2352 2353 /* 2354 * data-server information tables. 2355 */ 2356 instp->ds_owner_tab = rfs4_table_create(instp, 2357 "DS_owner", instp->reap_time, 2, ds_owner_create, 2358 ds_owner_destroy, mds_do_not_expire, 2359 sizeof (ds_owner_t), MDS_TABSIZE, 2360 MDS_MAXTABSZ, 100); 2361 2362 instp->ds_owner_inst_idx = rfs4_index_create(instp->ds_owner_tab, 2363 "DS_owner-inst-idx", mds_str_hash, ds_owner_inst_compare, 2364 ds_owner_inst_mkkey, TRUE); 2365 2366 instp->ds_owner_idx = rfs4_index_create(instp->ds_owner_tab, 2367 "DS_owner-idx", ds_owner_hash, ds_owner_compare, 2368 ds_owner_mkkey, FALSE); 2369 2370 /* 2371 * data-server guid information table. 2372 */ 2373 instp->ds_guid_info_tab = rfs4_table_create(instp, 2374 "DS_guid", instp->reap_time, 2, ds_guid_info_create, 2375 ds_guid_info_destroy, 2376 mds_do_not_expire, sizeof (ds_guid_info_t), MDS_TABSIZE, 2377 MDS_MAXTABSZ, 100); 2378 2379 instp->ds_guid_info_idx = rfs4_index_create(instp->ds_guid_info_tab, 2380 "DS_guid-idx", ds_guid_info_hash, ds_guid_info_compare, 2381 ds_guid_info_mkkey, 2382 TRUE); 2383 2384 instp->attrvers = 1; 2385 2386 /* 2387 * Mark it as fully initialized 2388 */ 2389 instp->inst_flags |= NFS_INST_STORE_INIT | NFS_INST_v41; 2390 2391 mutex_exit(&instp->state_lock); 2392 } 2393 2394 /* 2395 * Module load initialization 2396 */ 2397 void 2398 mds_srvrinit(void) 2399 { 2400 mds_recall_lo = mds_lorecall_cmd; 2401 } 2402 2403 static char * 2404 mds_read_odl(char *path, int *size) 2405 { 2406 struct uio uio; 2407 struct iovec iov; 2408 2409 char *odlp; 2410 vnode_t *vp; 2411 vattr_t va; 2412 int sz, err, bad_file; 2413 2414 *size = 0; 2415 if (path == NULL) 2416 return (NULL); 2417 2418 /* 2419 * open the layout file. 2420 */ 2421 if ((err = vn_open(path, UIO_SYSSPACE, FREAD, 0, &vp, 0, 0)) != 0) { 2422 return (NULL); 2423 } 2424 2425 if (vp->v_type != VREG) { 2426 (void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL); 2427 VN_RELE(vp); 2428 return (NULL); 2429 } 2430 2431 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL); 2432 /* 2433 * get the file size. 2434 */ 2435 va.va_mask = AT_SIZE; 2436 err = VOP_GETATTR(vp, &va, 0, CRED(), NULL); 2437 2438 sz = va.va_size; 2439 bad_file = (sz == 0 || sz < sizeof (odl_t)); 2440 2441 if (err || bad_file) { 2442 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL); 2443 (void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL); 2444 VN_RELE(vp); 2445 return (NULL); 2446 } 2447 2448 odlp = kmem_alloc(sz, KM_SLEEP); 2449 2450 /* 2451 * build iovec to read in the file. 2452 */ 2453 iov.iov_base = (caddr_t)odlp; 2454 iov.iov_len = sz; 2455 2456 uio.uio_iov = &iov; 2457 uio.uio_iovcnt = 1; 2458 uio.uio_segflg = UIO_SYSSPACE; 2459 uio.uio_loffset = 0; 2460 uio.uio_resid = iov.iov_len; 2461 2462 if (err = VOP_READ(vp, &uio, FREAD, CRED(), NULL)) { 2463 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL); 2464 (void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL); 2465 VN_RELE(vp); 2466 kmem_free(odlp, sz); 2467 return (NULL); 2468 } 2469 2470 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL); 2471 (void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL); 2472 VN_RELE(vp); 2473 *size = sz; 2474 return (odlp); 2475 } 2476 2477 /* 2478 * blah 2479 */ 2480 static int 2481 mds_write_odl(char *path, char *odlp, int size) 2482 { 2483 int ioflag, err; 2484 struct uio uio; 2485 struct iovec iov; 2486 vnode_t *vp; 2487 2488 if (path == NULL) 2489 return (-1); 2490 2491 if (vn_open(path, UIO_SYSSPACE, FCREAT|FWRITE|FTRUNC, 0600, &vp, 2492 CRCREAT, 0)) { 2493 return (-1); 2494 } 2495 2496 iov.iov_base = (caddr_t)odlp; 2497 iov.iov_len = size; 2498 2499 uio.uio_iov = &iov; 2500 uio.uio_iovcnt = 1; 2501 uio.uio_loffset = 0; 2502 uio.uio_segflg = UIO_SYSSPACE; 2503 uio.uio_llimit = (rlim64_t)MAXOFFSET_T; 2504 uio.uio_resid = size; 2505 2506 ioflag = uio.uio_fmode = (FWRITE|FSYNC); 2507 uio.uio_extflg = UIO_COPY_DEFAULT; 2508 2509 (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL); 2510 err = VOP_WRITE(vp, &uio, ioflag, CRED(), NULL); 2511 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL); 2512 2513 (void) VOP_CLOSE(vp, FWRITE, 1, (offset_t)0, CRED(), NULL); 2514 VN_RELE(vp); 2515 2516 return (err); 2517 } 2518 2519 static void 2520 mds_remove_odl(char *path) 2521 { 2522 (void) vn_remove(path, UIO_SYSSPACE, RMFILE); 2523 } 2524 2525 #define ODL_DIR "/var/nfs/v4_state/layouts" 2526 2527 int 2528 mds_mkdir(char *parent, char *dirnm) 2529 { 2530 int err; 2531 vnode_t *dvp, *vp; 2532 struct vattr vap; 2533 cred_t *cr = CRED(); 2534 2535 /* 2536 * if (err = lookupname(parent, UIO_SYSSPACE, NO_FOLLOW, NULLVPP, &dvp)) 2537 */ 2538 if ((err = vn_open(parent, UIO_SYSSPACE, FREAD, 0, &dvp, 0, 0))) 2539 return (1); 2540 2541 vap.va_mask = AT_UID|AT_GID|AT_TYPE|AT_MODE; 2542 vap.va_uid = crgetuid(cr); 2543 vap.va_gid = crgetgid(cr); 2544 vap.va_type = VDIR; 2545 vap.va_mode = 0755; 2546 err = VOP_MKDIR(dvp, dirnm, &vap, &vp, cr, NULL, 0, NULL); 2547 2548 (void) VOP_CLOSE(dvp, FREAD, 1, (offset_t)0, CRED(), NULL); 2549 VN_RELE(dvp); 2550 2551 if (err) 2552 return (1); 2553 2554 VN_RELE(vp); 2555 2556 return (0); 2557 } 2558 2559 /* 2560 * Pathname will be /var/nfs/v4_state/layouts/<fsid>/<fid> 2561 */ 2562 char * 2563 mds_create_name(vnode_t *vp, int *len) 2564 { 2565 static int parent_created = 0; 2566 int plen, err; 2567 fid_t fid; 2568 statvfs64_t svfs; 2569 vnode_t *dvp = NULL; 2570 uint64_t name = 0; 2571 char *pname; 2572 char dir[65]; 2573 2574 *len = 0; 2575 if (!parent_created) { 2576 if (vn_open(ODL_DIR, UIO_SYSSPACE, FREAD, 0, &dvp, 0, 0)) { 2577 err = mds_mkdir("/var/nfs/v4_state", "layouts"); 2578 if (err) 2579 return (NULL); 2580 } else { 2581 (void) VOP_CLOSE(dvp, FREAD, 1, (offset_t)0, 2582 CRED(), NULL); 2583 VN_RELE(dvp); 2584 } 2585 parent_created = 1; 2586 } 2587 2588 /* 2589 * fsid = vp->v_vfsp->vfs_fsid; 2590 * zfs changes vfs_fsid on reboot, so we can't use it. 2591 */ 2592 err = VFS_STATVFS(vp->v_vfsp, &svfs); 2593 if (err) { 2594 return (NULL); 2595 } 2596 2597 (void) snprintf(dir, 65, "%llx", (long long)svfs.f_fsid); 2598 2599 plen = MAXPATHLEN; 2600 pname = kmem_alloc(plen, KM_SLEEP); 2601 (void) snprintf(pname, plen, "%s/%s", ODL_DIR, dir); 2602 2603 /* does this dir already exist */ 2604 if (vn_open(pname, UIO_SYSSPACE, FREAD, 0, &dvp, 0, 0)) { 2605 err = mds_mkdir(ODL_DIR, dir); 2606 if (err) 2607 return (NULL); 2608 } else { 2609 (void) VOP_CLOSE(dvp, FREAD, 1, (offset_t)0, CRED(), NULL); 2610 VN_RELE(dvp); 2611 } 2612 2613 bzero(&fid, sizeof (fid)); 2614 fid.fid_len = MAXFIDSZ; 2615 err = VOP_FID(vp, &fid, NULL); 2616 if (err || fid.fid_len == 0) { 2617 return (NULL); 2618 } 2619 2620 bcopy(fid.fid_data, &name, fid.fid_len); 2621 2622 (void) snprintf(pname, plen, "%s/%s/%llx", ODL_DIR, dir, 2623 (long long)name); 2624 2625 *len = plen; 2626 return (pname); 2627 } 2628 2629 /* xdr encode a mds_layout to the on-disk layout */ 2630 static char * 2631 xdr_convert_layout(mds_layout_t *lop, int *size) 2632 { 2633 int xdr_size; 2634 char *xdr_buf; 2635 XDR xdr; 2636 2637 xdr_size = xdr_sizeof(xdr_odl, lop->odl); 2638 xdr_buf = kmem_zalloc(xdr_size, KM_SLEEP); 2639 2640 xdrmem_create(&xdr, xdr_buf, xdr_size, XDR_ENCODE); 2641 2642 if (xdr_odl(&xdr, lop->odl) == FALSE) { 2643 *size = 0; 2644 kmem_free(xdr_buf, xdr_size); 2645 return (NULL); 2646 } 2647 2648 *size = xdr_size; 2649 return (xdr_buf); 2650 } 2651 2652 /* xdr decode an on-disk layout to a mds_layout */ 2653 /*ARGSUSED*/ 2654 static odl * 2655 xdr_convert_odl(char *odlp, int size) 2656 { 2657 int sz; 2658 char *unxdr_buf; 2659 XDR xdr; 2660 2661 sz = sizeof (odl); 2662 unxdr_buf = kmem_zalloc(sz, KM_SLEEP); 2663 2664 xdrmem_create(&xdr, unxdr_buf, sz, XDR_DECODE); 2665 2666 if (xdr_odl(&xdr, (odl *)odlp) == FALSE) { 2667 kmem_free(unxdr_buf, sz); 2668 return (NULL); 2669 } 2670 2671 return ((odl *)unxdr_buf); 2672 } 2673 2674 int 2675 mds_put_layout(mds_layout_t *lop, vnode_t *vp) 2676 { 2677 char *odlp; 2678 char *name; 2679 int len, size, err; 2680 2681 name = mds_create_name(vp, &len); 2682 if (name == NULL) { 2683 return (-1); 2684 } 2685 2686 /* mythical xdr encode routine */ 2687 odlp = xdr_convert_layout(lop, &size); 2688 if (odlp == NULL) 2689 return (-1); 2690 2691 err = mds_write_odl(name, odlp, size); 2692 2693 kmem_free(name, len); 2694 kmem_free(odlp, size); 2695 2696 return (err); 2697 } 2698 2699 int 2700 mds_get_odl(vnode_t *vp, mds_layout_t **lopp) 2701 { 2702 char *odlp; 2703 int len, size; 2704 char *name; 2705 mds_layout_t *lop; 2706 2707 ASSERT(lopp != NULL); 2708 2709 name = mds_create_name(vp, &len); 2710 if (name == NULL) 2711 return (NFS4ERR_LAYOUTTRYLATER); 2712 2713 odlp = mds_read_odl(name, &size); 2714 if (odlp == NULL) 2715 return (NFS4ERR_LAYOUTTRYLATER); 2716 2717 lop = *lopp; 2718 2719 /* the magic xdr decode routine */ 2720 lop->odl = xdr_convert_odl(odlp, size); 2721 2722 kmem_free(name, len); 2723 kmem_free(odlp, size); 2724 2725 if (lop->odl == NULL) 2726 return (NFS4ERR_LAYOUTTRYLATER); 2727 2728 return (NFS4_OK); 2729 } 2730 2731 void 2732 mds_delete_layout(vnode_t *vp) 2733 { 2734 int len; 2735 char *name; 2736 2737 name = mds_create_name(vp, &len); 2738 if (name == NULL) { 2739 return; 2740 } 2741 2742 mds_remove_odl(name); 2743 2744 kmem_free(name, len); 2745 } 2746