| File: | src/usr.sbin/ldapd/btree.c |
| Warning: | line 1027, column 19 Division by zero |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
| 1 | /* $OpenBSD: btree.c,v 1.38 2017/05/26 21:23:14 sthen Exp $ */ | |||
| 2 | ||||
| 3 | /* | |||
| 4 | * Copyright (c) 2009, 2010 Martin Hedenfalk <martin@bzero.se> | |||
| 5 | * | |||
| 6 | * Permission to use, copy, modify, and distribute this software for any | |||
| 7 | * purpose with or without fee is hereby granted, provided that the above | |||
| 8 | * copyright notice and this permission notice appear in all copies. | |||
| 9 | * | |||
| 10 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | |||
| 11 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | |||
| 12 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | |||
| 13 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | |||
| 14 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | |||
| 15 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | |||
| 16 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | |||
| 17 | */ | |||
| 18 | ||||
| 19 | #include <sys/types.h> | |||
| 20 | #include <sys/tree.h> | |||
| 21 | #include <sys/stat.h> | |||
| 22 | #include <sys/queue.h> | |||
| 23 | #include <sys/uio.h> | |||
| 24 | ||||
| 25 | #include <assert.h> | |||
| 26 | #include <err.h> | |||
| 27 | #include <errno(*__errno()).h> | |||
| 28 | #include <fcntl.h> | |||
| 29 | #include <stddef.h> | |||
| 30 | #include <stdint.h> | |||
| 31 | #include <stdio.h> | |||
| 32 | #include <stdlib.h> | |||
| 33 | #include <string.h> | |||
| 34 | #include <time.h> | |||
| 35 | #include <unistd.h> | |||
| 36 | ||||
| 37 | #include "btree.h" | |||
| 38 | ||||
| 39 | /* #define DEBUG */ | |||
| 40 | ||||
| 41 | #ifdef DEBUG | |||
| 42 | # define DPRINTF(...) do { fprintf(stderr(&__sF[2]), "%s:%d: ", __func__, __LINE__42); \ | |||
| 43 | fprintf(stderr(&__sF[2]), __VA_ARGS__); \ | |||
| 44 | fprintf(stderr(&__sF[2]), "\n"); } while(0) | |||
| 45 | #else | |||
| 46 | # define DPRINTF(...) | |||
| 47 | #endif | |||
| 48 | ||||
| 49 | #define PAGESIZE4096 4096 | |||
| 50 | #define BT_MINKEYS4 4 | |||
| 51 | #define BT_MAGIC0xB3DBB3DB 0xB3DBB3DB | |||
| 52 | #define BT_VERSION4 4 | |||
| 53 | #define MAXKEYSIZE255 255 | |||
| 54 | ||||
| 55 | #define P_INVALID0xFFFFFFFF 0xFFFFFFFF | |||
| 56 | ||||
| 57 | #define F_ISSET(w, f)(((w) & (f)) == (f)) (((w) & (f)) == (f)) | |||
| 58 | #define MINIMUM(a,b)((a) < (b) ? (a) : (b)) ((a) < (b) ? (a) : (b)) | |||
| 59 | ||||
| 60 | typedef uint32_t pgno_t; | |||
| 61 | typedef uint16_t indx_t; | |||
| 62 | ||||
| 63 | /* There are four page types: meta, index, leaf and overflow. | |||
| 64 | * They all share the same page header. | |||
| 65 | */ | |||
| 66 | struct page { /* represents an on-disk page */ | |||
| 67 | pgno_t pgno; /* page number */ | |||
| 68 | #define P_BRANCH0x01 0x01 /* branch page */ | |||
| 69 | #define P_LEAF0x02 0x02 /* leaf page */ | |||
| 70 | #define P_OVERFLOW0x04 0x04 /* overflow page */ | |||
| 71 | #define P_META0x08 0x08 /* meta page */ | |||
| 72 | #define P_HEAD0x10 0x10 /* header page */ | |||
| 73 | uint32_t flags; | |||
| 74 | #define lowerb.fb.fb_lower b.fb.fb_lower | |||
| 75 | #define upperb.fb.fb_upper b.fb.fb_upper | |||
| 76 | #define p_next_pgnob.pb_next_pgno b.pb_next_pgno | |||
| 77 | union page_bounds { | |||
| 78 | struct { | |||
| 79 | indx_t fb_lower; /* lower bound of free space */ | |||
| 80 | indx_t fb_upper; /* upper bound of free space */ | |||
| 81 | } fb; | |||
| 82 | pgno_t pb_next_pgno; /* overflow page linked list */ | |||
| 83 | } b; | |||
| 84 | indx_t ptrs[1]; /* dynamic size */ | |||
| 85 | } __packed__attribute__((__packed__)); | |||
| 86 | ||||
| 87 | #define PAGEHDRSZ__builtin_offsetof(struct page, ptrs) offsetof(struct page, ptrs)__builtin_offsetof(struct page, ptrs) | |||
| 88 | ||||
| 89 | #define NUMKEYSP(p)(((p)->b.fb.fb_lower - __builtin_offsetof(struct page, ptrs )) >> 1) (((p)->lowerb.fb.fb_lower - PAGEHDRSZ__builtin_offsetof(struct page, ptrs)) >> 1) | |||
| 90 | #define NUMKEYS(mp)(((mp)->page->b.fb.fb_lower - __builtin_offsetof(struct page, ptrs)) >> 1) (((mp)->page->lowerb.fb.fb_lower - PAGEHDRSZ__builtin_offsetof(struct page, ptrs)) >> 1) | |||
| 91 | #define SIZELEFT(mp)(indx_t)((mp)->page->b.fb.fb_upper - (mp)->page-> b.fb.fb_lower) (indx_t)((mp)->page->upperb.fb.fb_upper - (mp)->page->lowerb.fb.fb_lower) | |||
| 92 | #define PAGEFILL(bt, mp)(1000 * ((bt)->head.psize - __builtin_offsetof(struct page , ptrs) - (indx_t)((mp)->page->b.fb.fb_upper - (mp)-> page->b.fb.fb_lower)) / ((bt)->head.psize - __builtin_offsetof (struct page, ptrs))) (1000 * ((bt)->head.psize - PAGEHDRSZ__builtin_offsetof(struct page, ptrs) - SIZELEFT(mp)(indx_t)((mp)->page->b.fb.fb_upper - (mp)->page-> b.fb.fb_lower)) / \ | |||
| 93 | ((bt)->head.psize - PAGEHDRSZ__builtin_offsetof(struct page, ptrs))) | |||
| 94 | #define IS_LEAF(mp)((((mp)->page->flags) & (0x02)) == (0x02)) F_ISSET((mp)->page->flags, P_LEAF)((((mp)->page->flags) & (0x02)) == (0x02)) | |||
| 95 | #define IS_BRANCH(mp)((((mp)->page->flags) & (0x01)) == (0x01)) F_ISSET((mp)->page->flags, P_BRANCH)((((mp)->page->flags) & (0x01)) == (0x01)) | |||
| 96 | #define IS_OVERFLOW(mp)((((mp)->page->flags) & (0x04)) == (0x04)) F_ISSET((mp)->page->flags, P_OVERFLOW)((((mp)->page->flags) & (0x04)) == (0x04)) | |||
| 97 | ||||
| 98 | struct bt_head { /* header page content */ | |||
| 99 | uint32_t magic; | |||
| 100 | uint32_t version; | |||
| 101 | uint32_t flags; | |||
| 102 | uint32_t psize; /* page size */ | |||
| 103 | } __packed__attribute__((__packed__)); | |||
| 104 | ||||
| 105 | struct bt_meta { /* meta (footer) page content */ | |||
| 106 | #define BT_TOMBSTONE0x01 0x01 /* file is replaced */ | |||
| 107 | uint32_t flags; | |||
| 108 | pgno_t root; /* page number of root page */ | |||
| 109 | pgno_t prev_meta; /* previous meta page number */ | |||
| 110 | time_t created_at; | |||
| 111 | uint32_t branch_pages; | |||
| 112 | uint32_t leaf_pages; | |||
| 113 | uint32_t overflow_pages; | |||
| 114 | uint32_t revisions; | |||
| 115 | uint32_t depth; | |||
| 116 | uint64_t entries; | |||
| 117 | unsigned char hash[SHA_DIGEST_LENGTH20]; | |||
| 118 | } __packed__attribute__((__packed__)); | |||
| 119 | ||||
| 120 | struct btkey { | |||
| 121 | size_t len; | |||
| 122 | char str[MAXKEYSIZE255]; | |||
| 123 | }; | |||
| 124 | ||||
| 125 | struct mpage { /* an in-memory cached page */ | |||
| 126 | RB_ENTRY(mpage)struct { struct mpage *rbe_left; struct mpage *rbe_right; struct mpage *rbe_parent; int rbe_color; } entry; /* page cache entry */ | |||
| 127 | SIMPLEQ_ENTRY(mpage)struct { struct mpage *sqe_next; } next; /* queue of dirty pages */ | |||
| 128 | TAILQ_ENTRY(mpage)struct { struct mpage *tqe_next; struct mpage **tqe_prev; } lru_next; /* LRU queue */ | |||
| 129 | struct mpage *parent; /* NULL if root */ | |||
| 130 | unsigned int parent_index; /* keep track of node index */ | |||
| 131 | struct btkey prefix; | |||
| 132 | struct page *page; | |||
| 133 | pgno_t pgno; /* copy of page->pgno */ | |||
| 134 | short ref; /* increased by cursors */ | |||
| 135 | short dirty; /* 1 if on dirty queue */ | |||
| 136 | }; | |||
| 137 | RB_HEAD(page_cache, mpage)struct page_cache { struct mpage *rbh_root; }; | |||
| 138 | SIMPLEQ_HEAD(dirty_queue, mpage)struct dirty_queue { struct mpage *sqh_first; struct mpage ** sqh_last; }; | |||
| 139 | TAILQ_HEAD(lru_queue, mpage)struct lru_queue { struct mpage *tqh_first; struct mpage **tqh_last ; }; | |||
| 140 | ||||
| 141 | static int mpage_cmp(struct mpage *a, struct mpage *b); | |||
| 142 | static struct mpage *mpage_lookup(struct btree *bt, pgno_t pgno); | |||
| 143 | static void mpage_add(struct btree *bt, struct mpage *mp); | |||
| 144 | static void mpage_free(struct mpage *mp); | |||
| 145 | static void mpage_del(struct btree *bt, struct mpage *mp); | |||
| 146 | static void mpage_flush(struct btree *bt); | |||
| 147 | static struct mpage *mpage_copy(struct btree *bt, struct mpage *mp); | |||
| 148 | static void mpage_prune(struct btree *bt); | |||
| 149 | static void mpage_dirty(struct btree *bt, struct mpage *mp); | |||
| 150 | static struct mpage *mpage_touch(struct btree *bt, struct mpage *mp); | |||
| 151 | ||||
| 152 | RB_PROTOTYPE(page_cache, mpage, entry, mpage_cmp)void page_cache_RB_INSERT_COLOR(struct page_cache *, struct mpage *); void page_cache_RB_REMOVE_COLOR(struct page_cache *, struct mpage *, struct mpage *); struct mpage *page_cache_RB_REMOVE (struct page_cache *, struct mpage *); struct mpage *page_cache_RB_INSERT (struct page_cache *, struct mpage *); struct mpage *page_cache_RB_FIND (struct page_cache *, struct mpage *); struct mpage *page_cache_RB_NFIND (struct page_cache *, struct mpage *); struct mpage *page_cache_RB_NEXT (struct mpage *); struct mpage *page_cache_RB_PREV(struct mpage *); struct mpage *page_cache_RB_MINMAX(struct page_cache *, int );; | |||
| 153 | RB_GENERATE(page_cache, mpage, entry, mpage_cmp)void page_cache_RB_INSERT_COLOR(struct page_cache *head, struct mpage *elm) { struct mpage *parent, *gparent, *tmp; while (( parent = (elm)->entry.rbe_parent) && (parent)-> entry.rbe_color == 1) { gparent = (parent)->entry.rbe_parent ; if (parent == (gparent)->entry.rbe_left) { tmp = (gparent )->entry.rbe_right; if (tmp && (tmp)->entry.rbe_color == 1) { (tmp)->entry.rbe_color = 0; do { (parent)->entry .rbe_color = 0; (gparent)->entry.rbe_color = 1; } while (0 ); elm = gparent; continue; } if ((parent)->entry.rbe_right == elm) { do { (tmp) = (parent)->entry.rbe_right; if (((parent )->entry.rbe_right = (tmp)->entry.rbe_left)) { ((tmp)-> entry.rbe_left)->entry.rbe_parent = (parent); } do {} while (0); if (((tmp)->entry.rbe_parent = (parent)->entry.rbe_parent )) { if ((parent) == ((parent)->entry.rbe_parent)->entry .rbe_left) ((parent)->entry.rbe_parent)->entry.rbe_left = (tmp); else ((parent)->entry.rbe_parent)->entry.rbe_right = (tmp); } else (head)->rbh_root = (tmp); (tmp)->entry .rbe_left = (parent); (parent)->entry.rbe_parent = (tmp); do {} while (0); if (((tmp)->entry.rbe_parent)) do {} while ( 0); } while (0); tmp = parent; parent = elm; elm = tmp; } do { (parent)->entry.rbe_color = 0; (gparent)->entry.rbe_color = 1; } while (0); do { (tmp) = (gparent)->entry.rbe_left; if (((gparent)->entry.rbe_left = (tmp)->entry.rbe_right )) { ((tmp)->entry.rbe_right)->entry.rbe_parent = (gparent ); } do {} while (0); if (((tmp)->entry.rbe_parent = (gparent )->entry.rbe_parent)) { if ((gparent) == ((gparent)->entry .rbe_parent)->entry.rbe_left) ((gparent)->entry.rbe_parent )->entry.rbe_left = (tmp); else ((gparent)->entry.rbe_parent )->entry.rbe_right = (tmp); } else (head)->rbh_root = ( tmp); (tmp)->entry.rbe_right = (gparent); (gparent)->entry .rbe_parent = (tmp); do {} while (0); if (((tmp)->entry.rbe_parent )) do {} while (0); } while (0); } else { tmp = (gparent)-> entry.rbe_left; if (tmp && (tmp)->entry.rbe_color == 1) { (tmp)->entry.rbe_color = 0; do { (parent)->entry. rbe_color = 0; (gparent)->entry.rbe_color = 1; } while (0) ; elm = gparent; continue; } if ((parent)->entry.rbe_left == elm) { do { (tmp) = (parent)->entry.rbe_left; if (((parent )->entry.rbe_left = (tmp)->entry.rbe_right)) { ((tmp)-> entry.rbe_right)->entry.rbe_parent = (parent); } do {} while (0); if (((tmp)->entry.rbe_parent = (parent)->entry.rbe_parent )) { if ((parent) == ((parent)->entry.rbe_parent)->entry .rbe_left) ((parent)->entry.rbe_parent)->entry.rbe_left = (tmp); else ((parent)->entry.rbe_parent)->entry.rbe_right = (tmp); } else (head)->rbh_root = (tmp); (tmp)->entry .rbe_right = (parent); (parent)->entry.rbe_parent = (tmp); do {} while (0); if (((tmp)->entry.rbe_parent)) do {} while (0); } while (0); tmp = parent; parent = elm; elm = tmp; } do { (parent)->entry.rbe_color = 0; (gparent)->entry.rbe_color = 1; } while (0); do { (tmp) = (gparent)->entry.rbe_right ; if (((gparent)->entry.rbe_right = (tmp)->entry.rbe_left )) { ((tmp)->entry.rbe_left)->entry.rbe_parent = (gparent ); } do {} while (0); if (((tmp)->entry.rbe_parent = (gparent )->entry.rbe_parent)) { if ((gparent) == ((gparent)->entry .rbe_parent)->entry.rbe_left) ((gparent)->entry.rbe_parent )->entry.rbe_left = (tmp); else ((gparent)->entry.rbe_parent )->entry.rbe_right = (tmp); } else (head)->rbh_root = ( tmp); (tmp)->entry.rbe_left = (gparent); (gparent)->entry .rbe_parent = (tmp); do {} while (0); if (((tmp)->entry.rbe_parent )) do {} while (0); } while (0); } } (head->rbh_root)-> entry.rbe_color = 0; } void page_cache_RB_REMOVE_COLOR(struct page_cache *head, struct mpage *parent, struct mpage *elm) { struct mpage *tmp; while ((elm == ((void*)0) || (elm)->entry .rbe_color == 0) && elm != (head)->rbh_root) { if ( (parent)->entry.rbe_left == elm) { tmp = (parent)->entry .rbe_right; if ((tmp)->entry.rbe_color == 1) { do { (tmp)-> entry.rbe_color = 0; (parent)->entry.rbe_color = 1; } while (0); do { (tmp) = (parent)->entry.rbe_right; if (((parent )->entry.rbe_right = (tmp)->entry.rbe_left)) { ((tmp)-> entry.rbe_left)->entry.rbe_parent = (parent); } do {} while (0); if (((tmp)->entry.rbe_parent = (parent)->entry.rbe_parent )) { if ((parent) == ((parent)->entry.rbe_parent)->entry .rbe_left) ((parent)->entry.rbe_parent)->entry.rbe_left = (tmp); else ((parent)->entry.rbe_parent)->entry.rbe_right = (tmp); } else (head)->rbh_root = (tmp); (tmp)->entry .rbe_left = (parent); (parent)->entry.rbe_parent = (tmp); do {} while (0); if (((tmp)->entry.rbe_parent)) do {} while ( 0); } while (0); tmp = (parent)->entry.rbe_right; } if ((( tmp)->entry.rbe_left == ((void*)0) || ((tmp)->entry.rbe_left )->entry.rbe_color == 0) && ((tmp)->entry.rbe_right == ((void*)0) || ((tmp)->entry.rbe_right)->entry.rbe_color == 0)) { (tmp)->entry.rbe_color = 1; elm = parent; parent = (elm)->entry.rbe_parent; } else { if ((tmp)->entry.rbe_right == ((void*)0) || ((tmp)->entry.rbe_right)->entry.rbe_color == 0) { struct mpage *oleft; if ((oleft = (tmp)->entry.rbe_left )) (oleft)->entry.rbe_color = 0; (tmp)->entry.rbe_color = 1; do { (oleft) = (tmp)->entry.rbe_left; if (((tmp)-> entry.rbe_left = (oleft)->entry.rbe_right)) { ((oleft)-> entry.rbe_right)->entry.rbe_parent = (tmp); } do {} while ( 0); if (((oleft)->entry.rbe_parent = (tmp)->entry.rbe_parent )) { if ((tmp) == ((tmp)->entry.rbe_parent)->entry.rbe_left ) ((tmp)->entry.rbe_parent)->entry.rbe_left = (oleft); else ((tmp)->entry.rbe_parent)->entry.rbe_right = (oleft); } else (head)->rbh_root = (oleft); (oleft)->entry.rbe_right = (tmp); (tmp)->entry.rbe_parent = (oleft); do {} while ( 0); if (((oleft)->entry.rbe_parent)) do {} while (0); } while (0); tmp = (parent)->entry.rbe_right; } (tmp)->entry.rbe_color = (parent)->entry.rbe_color; (parent)->entry.rbe_color = 0; if ((tmp)->entry.rbe_right) ((tmp)->entry.rbe_right )->entry.rbe_color = 0; do { (tmp) = (parent)->entry.rbe_right ; if (((parent)->entry.rbe_right = (tmp)->entry.rbe_left )) { ((tmp)->entry.rbe_left)->entry.rbe_parent = (parent ); } do {} while (0); if (((tmp)->entry.rbe_parent = (parent )->entry.rbe_parent)) { if ((parent) == ((parent)->entry .rbe_parent)->entry.rbe_left) ((parent)->entry.rbe_parent )->entry.rbe_left = (tmp); else ((parent)->entry.rbe_parent )->entry.rbe_right = (tmp); } else (head)->rbh_root = ( tmp); (tmp)->entry.rbe_left = (parent); (parent)->entry .rbe_parent = (tmp); do {} while (0); if (((tmp)->entry.rbe_parent )) do {} while (0); } while (0); elm = (head)->rbh_root; break ; } } else { tmp = (parent)->entry.rbe_left; if ((tmp)-> entry.rbe_color == 1) { do { (tmp)->entry.rbe_color = 0; ( parent)->entry.rbe_color = 1; } while (0); do { (tmp) = (parent )->entry.rbe_left; if (((parent)->entry.rbe_left = (tmp )->entry.rbe_right)) { ((tmp)->entry.rbe_right)->entry .rbe_parent = (parent); } do {} while (0); if (((tmp)->entry .rbe_parent = (parent)->entry.rbe_parent)) { if ((parent) == ((parent)->entry.rbe_parent)->entry.rbe_left) ((parent )->entry.rbe_parent)->entry.rbe_left = (tmp); else ((parent )->entry.rbe_parent)->entry.rbe_right = (tmp); } else ( head)->rbh_root = (tmp); (tmp)->entry.rbe_right = (parent ); (parent)->entry.rbe_parent = (tmp); do {} while (0); if (((tmp)->entry.rbe_parent)) do {} while (0); } while (0); tmp = (parent)->entry.rbe_left; } if (((tmp)->entry.rbe_left == ((void*)0) || ((tmp)->entry.rbe_left)->entry.rbe_color == 0) && ((tmp)->entry.rbe_right == ((void*)0) || ((tmp)->entry.rbe_right)->entry.rbe_color == 0)) { (tmp )->entry.rbe_color = 1; elm = parent; parent = (elm)->entry .rbe_parent; } else { if ((tmp)->entry.rbe_left == ((void* )0) || ((tmp)->entry.rbe_left)->entry.rbe_color == 0) { struct mpage *oright; if ((oright = (tmp)->entry.rbe_right )) (oright)->entry.rbe_color = 0; (tmp)->entry.rbe_color = 1; do { (oright) = (tmp)->entry.rbe_right; if (((tmp)-> entry.rbe_right = (oright)->entry.rbe_left)) { ((oright)-> entry.rbe_left)->entry.rbe_parent = (tmp); } do {} while ( 0); if (((oright)->entry.rbe_parent = (tmp)->entry.rbe_parent )) { if ((tmp) == ((tmp)->entry.rbe_parent)->entry.rbe_left ) ((tmp)->entry.rbe_parent)->entry.rbe_left = (oright); else ((tmp)->entry.rbe_parent)->entry.rbe_right = (oright ); } else (head)->rbh_root = (oright); (oright)->entry. rbe_left = (tmp); (tmp)->entry.rbe_parent = (oright); do { } while (0); if (((oright)->entry.rbe_parent)) do {} while (0); } while (0); tmp = (parent)->entry.rbe_left; } (tmp) ->entry.rbe_color = (parent)->entry.rbe_color; (parent) ->entry.rbe_color = 0; if ((tmp)->entry.rbe_left) ((tmp )->entry.rbe_left)->entry.rbe_color = 0; do { (tmp) = ( parent)->entry.rbe_left; if (((parent)->entry.rbe_left = (tmp)->entry.rbe_right)) { ((tmp)->entry.rbe_right)-> entry.rbe_parent = (parent); } do {} while (0); if (((tmp)-> entry.rbe_parent = (parent)->entry.rbe_parent)) { if ((parent ) == ((parent)->entry.rbe_parent)->entry.rbe_left) ((parent )->entry.rbe_parent)->entry.rbe_left = (tmp); else ((parent )->entry.rbe_parent)->entry.rbe_right = (tmp); } else ( head)->rbh_root = (tmp); (tmp)->entry.rbe_right = (parent ); (parent)->entry.rbe_parent = (tmp); do {} while (0); if (((tmp)->entry.rbe_parent)) do {} while (0); } while (0); elm = (head)->rbh_root; break; } } } if (elm) (elm)->entry .rbe_color = 0; } struct mpage * page_cache_RB_REMOVE(struct page_cache *head, struct mpage *elm) { struct mpage *child, *parent, *old = elm; int color; if ((elm)->entry.rbe_left == ((void*)0) ) child = (elm)->entry.rbe_right; else if ((elm)->entry .rbe_right == ((void*)0)) child = (elm)->entry.rbe_left; else { struct mpage *left; elm = (elm)->entry.rbe_right; while ((left = (elm)->entry.rbe_left)) elm = left; child = (elm )->entry.rbe_right; parent = (elm)->entry.rbe_parent; color = (elm)->entry.rbe_color; if (child) (child)->entry.rbe_parent = parent; if (parent) { if ((parent)->entry.rbe_left == elm ) (parent)->entry.rbe_left = child; else (parent)->entry .rbe_right = child; do {} while (0); } else (head)->rbh_root = child; if ((elm)->entry.rbe_parent == old) parent = elm ; (elm)->entry = (old)->entry; if ((old)->entry.rbe_parent ) { if (((old)->entry.rbe_parent)->entry.rbe_left == old ) ((old)->entry.rbe_parent)->entry.rbe_left = elm; else ((old)->entry.rbe_parent)->entry.rbe_right = elm; do { } while (0); } else (head)->rbh_root = elm; ((old)->entry .rbe_left)->entry.rbe_parent = elm; if ((old)->entry.rbe_right ) ((old)->entry.rbe_right)->entry.rbe_parent = elm; if ( parent) { left = parent; do { do {} while (0); } while ((left = (left)->entry.rbe_parent)); } goto color; } parent = (elm )->entry.rbe_parent; color = (elm)->entry.rbe_color; if (child) (child)->entry.rbe_parent = parent; if (parent) { if ((parent)->entry.rbe_left == elm) (parent)->entry.rbe_left = child; else (parent)->entry.rbe_right = child; do {} while (0); } else (head)->rbh_root = child; color: if (color == 0) page_cache_RB_REMOVE_COLOR(head, parent, child); return ( old); } struct mpage * page_cache_RB_INSERT(struct page_cache *head, struct mpage *elm) { struct mpage *tmp; struct mpage * parent = ((void*)0); int comp = 0; tmp = (head)->rbh_root; while (tmp) { parent = tmp; comp = (mpage_cmp)(elm, parent); if (comp < 0) tmp = (tmp)->entry.rbe_left; else if (comp > 0) tmp = (tmp)->entry.rbe_right; else return (tmp); } do { (elm)->entry.rbe_parent = parent; (elm)->entry.rbe_left = (elm)->entry.rbe_right = ((void*)0); (elm)->entry.rbe_color = 1; } while (0); if (parent != ((void*)0)) { if (comp < 0 ) (parent)->entry.rbe_left = elm; else (parent)->entry. rbe_right = elm; do {} while (0); } else (head)->rbh_root = elm; page_cache_RB_INSERT_COLOR(head, elm); return (((void*) 0)); } struct mpage * page_cache_RB_FIND(struct page_cache *head , struct mpage *elm) { struct mpage *tmp = (head)->rbh_root ; int comp; while (tmp) { comp = mpage_cmp(elm, tmp); if (comp < 0) tmp = (tmp)->entry.rbe_left; else if (comp > 0 ) tmp = (tmp)->entry.rbe_right; else return (tmp); } return (((void*)0)); } struct mpage * page_cache_RB_NFIND(struct page_cache *head, struct mpage *elm) { struct mpage *tmp = (head)->rbh_root ; struct mpage *res = ((void*)0); int comp; while (tmp) { comp = mpage_cmp(elm, tmp); if (comp < 0) { res = tmp; tmp = ( tmp)->entry.rbe_left; } else if (comp > 0) tmp = (tmp)-> entry.rbe_right; else return (tmp); } return (res); } struct mpage * page_cache_RB_NEXT(struct mpage *elm) { if ((elm)->entry .rbe_right) { elm = (elm)->entry.rbe_right; while ((elm)-> entry.rbe_left) elm = (elm)->entry.rbe_left; } else { if ( (elm)->entry.rbe_parent && (elm == ((elm)->entry .rbe_parent)->entry.rbe_left)) elm = (elm)->entry.rbe_parent ; else { while ((elm)->entry.rbe_parent && (elm == ((elm)->entry.rbe_parent)->entry.rbe_right)) elm = (elm )->entry.rbe_parent; elm = (elm)->entry.rbe_parent; } } return (elm); } struct mpage * page_cache_RB_PREV(struct mpage *elm) { if ((elm)->entry.rbe_left) { elm = (elm)->entry .rbe_left; while ((elm)->entry.rbe_right) elm = (elm)-> entry.rbe_right; } else { if ((elm)->entry.rbe_parent && (elm == ((elm)->entry.rbe_parent)->entry.rbe_right)) elm = (elm)->entry.rbe_parent; else { while ((elm)->entry. rbe_parent && (elm == ((elm)->entry.rbe_parent)-> entry.rbe_left)) elm = (elm)->entry.rbe_parent; elm = (elm )->entry.rbe_parent; } } return (elm); } struct mpage * page_cache_RB_MINMAX (struct page_cache *head, int val) { struct mpage *tmp = (head )->rbh_root; struct mpage *parent = ((void*)0); while (tmp ) { parent = tmp; if (val < 0) tmp = (tmp)->entry.rbe_left ; else tmp = (tmp)->entry.rbe_right; } return (parent); }; | |||
| 154 | ||||
| 155 | struct ppage { /* ordered list of pages */ | |||
| 156 | SLIST_ENTRY(ppage)struct { struct ppage *sle_next; } entry; | |||
| 157 | struct mpage *mpage; | |||
| 158 | unsigned int ki; /* cursor index on page */ | |||
| 159 | }; | |||
| 160 | SLIST_HEAD(page_stack, ppage)struct page_stack { struct ppage *slh_first; }; | |||
| 161 | ||||
| 162 | #define CURSOR_EMPTY(c)(((&(c)->stack)->slh_first) == ((void*)0)) SLIST_EMPTY(&(c)->stack)(((&(c)->stack)->slh_first) == ((void*)0)) | |||
| 163 | #define CURSOR_TOP(c)((&(c)->stack)->slh_first) SLIST_FIRST(&(c)->stack)((&(c)->stack)->slh_first) | |||
| 164 | #define CURSOR_POP(c)do { (&(c)->stack)->slh_first = (&(c)->stack )->slh_first->entry.sle_next; } while (0) SLIST_REMOVE_HEAD(&(c)->stack, entry)do { (&(c)->stack)->slh_first = (&(c)->stack )->slh_first->entry.sle_next; } while (0) | |||
| 165 | #define CURSOR_PUSH(c,p)do { (p)->entry.sle_next = (&(c)->stack)->slh_first ; (&(c)->stack)->slh_first = (p); } while (0) SLIST_INSERT_HEAD(&(c)->stack, p, entry)do { (p)->entry.sle_next = (&(c)->stack)->slh_first ; (&(c)->stack)->slh_first = (p); } while (0) | |||
| 166 | ||||
| 167 | struct cursor { | |||
| 168 | struct btree *bt; | |||
| 169 | struct btree_txn *txn; | |||
| 170 | struct page_stack stack; /* stack of parent pages */ | |||
| 171 | short initialized; /* 1 if initialized */ | |||
| 172 | short eof; /* 1 if end is reached */ | |||
| 173 | }; | |||
| 174 | ||||
| 175 | #define METAHASHLEN__builtin_offsetof(struct bt_meta, hash) offsetof(struct bt_meta, hash)__builtin_offsetof(struct bt_meta, hash) | |||
| 176 | #define METADATA(p)((void *)((char *)p + __builtin_offsetof(struct page, ptrs))) ((void *)((char *)p + PAGEHDRSZ__builtin_offsetof(struct page, ptrs))) | |||
| 177 | ||||
| 178 | struct node { | |||
| 179 | #define n_pgnop.np_pgno p.np_pgno | |||
| 180 | #define n_dsizep.np_dsize p.np_dsize | |||
| 181 | union { | |||
| 182 | pgno_t np_pgno; /* child page number */ | |||
| 183 | uint32_t np_dsize; /* leaf data size */ | |||
| 184 | } p; | |||
| 185 | uint16_t ksize; /* key size */ | |||
| 186 | #define F_BIGDATA0x01 0x01 /* data put on overflow page */ | |||
| 187 | uint8_t flags; | |||
| 188 | char data[1]; | |||
| 189 | } __packed__attribute__((__packed__)); | |||
| 190 | ||||
| 191 | struct btree_txn { | |||
| 192 | pgno_t root; /* current / new root page */ | |||
| 193 | pgno_t next_pgno; /* next unallocated page */ | |||
| 194 | struct btree *bt; /* btree is ref'd */ | |||
| 195 | struct dirty_queue *dirty_queue; /* modified pages */ | |||
| 196 | #define BT_TXN_RDONLY0x01 0x01 /* read-only transaction */ | |||
| 197 | #define BT_TXN_ERROR0x02 0x02 /* an error has occurred */ | |||
| 198 | unsigned int flags; | |||
| 199 | }; | |||
| 200 | ||||
| 201 | struct btree { | |||
| 202 | int fd; | |||
| 203 | char *path; | |||
| 204 | #define BT_FIXPADDING0x01 0x01 /* internal */ | |||
| 205 | unsigned int flags; | |||
| 206 | bt_cmp_func cmp; /* user compare function */ | |||
| 207 | struct bt_head head; | |||
| 208 | struct bt_meta meta; | |||
| 209 | struct page_cache *page_cache; | |||
| 210 | struct lru_queue *lru_queue; | |||
| 211 | struct btree_txn *txn; /* current write transaction */ | |||
| 212 | int ref; /* increased by cursors & txn */ | |||
| 213 | struct btree_stat stat; | |||
| 214 | off_t size; /* current file size */ | |||
| 215 | }; | |||
| 216 | ||||
| 217 | #define NODESIZE__builtin_offsetof(struct node, data) offsetof(struct node, data)__builtin_offsetof(struct node, data) | |||
| 218 | ||||
| 219 | #define INDXSIZE(k)(__builtin_offsetof(struct node, data) + ((k) == ((void*)0) ? 0 : (k)->size)) (NODESIZE__builtin_offsetof(struct node, data) + ((k) == NULL((void*)0) ? 0 : (k)->size)) | |||
| 220 | #define LEAFSIZE(k, d)(__builtin_offsetof(struct node, data) + (k)->size + (d)-> size) (NODESIZE__builtin_offsetof(struct node, data) + (k)->size + (d)->size) | |||
| 221 | #define NODEPTRP(p, i)((struct node *)((char *)(p) + (p)->ptrs[i])) ((struct node *)((char *)(p) + (p)->ptrs[i])) | |||
| 222 | #define NODEPTR(mp, i)((struct node *)((char *)((mp)->page) + ((mp)->page)-> ptrs[i])) NODEPTRP((mp)->page, i)((struct node *)((char *)((mp)->page) + ((mp)->page)-> ptrs[i])) | |||
| 223 | #define NODEKEY(node)(void *)((node)->data) (void *)((node)->data) | |||
| 224 | #define NODEDATA(node)(void *)((char *)(node)->data + (node)->ksize) (void *)((char *)(node)->data + (node)->ksize) | |||
| 225 | #define NODEPGNO(node)((node)->p.np_pgno) ((node)->p.np_pgno) | |||
| 226 | #define NODEDSZ(node)((node)->p.np_dsize) ((node)->p.np_dsize) | |||
| 227 | ||||
| 228 | #define BT_COMMIT_PAGES64 64 /* max number of pages to write in one commit */ | |||
| 229 | #define BT_MAXCACHE_DEF1024 1024 /* max number of pages to keep in cache */ | |||
| 230 | ||||
| 231 | static int btree_read_page(struct btree *bt, pgno_t pgno, | |||
| 232 | struct page *page); | |||
| 233 | static struct mpage *btree_get_mpage(struct btree *bt, pgno_t pgno); | |||
| 234 | static int btree_search_page_root(struct btree *bt, | |||
| 235 | struct mpage *root, struct btval *key, | |||
| 236 | struct cursor *cursor, int modify, | |||
| 237 | struct mpage **mpp); | |||
| 238 | static int btree_search_page(struct btree *bt, | |||
| 239 | struct btree_txn *txn, struct btval *key, | |||
| 240 | struct cursor *cursor, int modify, | |||
| 241 | struct mpage **mpp); | |||
| 242 | ||||
| 243 | static int btree_write_header(struct btree *bt, int fd); | |||
| 244 | static int btree_read_header(struct btree *bt); | |||
| 245 | static int btree_is_meta_page(struct page *p); | |||
| 246 | static int btree_read_meta(struct btree *bt, pgno_t *p_next); | |||
| 247 | static int btree_write_meta(struct btree *bt, pgno_t root, | |||
| 248 | unsigned int flags); | |||
| 249 | static void btree_ref(struct btree *bt); | |||
| 250 | ||||
| 251 | static struct node *btree_search_node(struct btree *bt, struct mpage *mp, | |||
| 252 | struct btval *key, int *exactp, unsigned int *kip); | |||
| 253 | static int btree_add_node(struct btree *bt, struct mpage *mp, | |||
| 254 | indx_t indx, struct btval *key, struct btval *data, | |||
| 255 | pgno_t pgno, uint8_t flags); | |||
| 256 | static void btree_del_node(struct btree *bt, struct mpage *mp, | |||
| 257 | indx_t indx); | |||
| 258 | static int btree_read_data(struct btree *bt, struct mpage *mp, | |||
| 259 | struct node *leaf, struct btval *data); | |||
| 260 | ||||
| 261 | static int btree_rebalance(struct btree *bt, struct mpage *mp); | |||
| 262 | static int btree_update_key(struct btree *bt, struct mpage *mp, | |||
| 263 | indx_t indx, struct btval *key); | |||
| 264 | static int btree_adjust_prefix(struct btree *bt, | |||
| 265 | struct mpage *src, int delta); | |||
| 266 | static int btree_move_node(struct btree *bt, struct mpage *src, | |||
| 267 | indx_t srcindx, struct mpage *dst, indx_t dstindx); | |||
| 268 | static int btree_merge(struct btree *bt, struct mpage *src, | |||
| 269 | struct mpage *dst); | |||
| 270 | static int btree_split(struct btree *bt, struct mpage **mpp, | |||
| 271 | unsigned int *newindxp, struct btval *newkey, | |||
| 272 | struct btval *newdata, pgno_t newpgno); | |||
| 273 | static struct mpage *btree_new_page(struct btree *bt, uint32_t flags); | |||
| 274 | static int btree_write_overflow_data(struct btree *bt, | |||
| 275 | struct page *p, struct btval *data); | |||
| 276 | ||||
| 277 | static void cursor_pop_page(struct cursor *cursor); | |||
| 278 | static struct ppage *cursor_push_page(struct cursor *cursor, | |||
| 279 | struct mpage *mp); | |||
| 280 | ||||
| 281 | static int bt_set_key(struct btree *bt, struct mpage *mp, | |||
| 282 | struct node *node, struct btval *key); | |||
| 283 | static int btree_sibling(struct cursor *cursor, int move_right); | |||
| 284 | static int btree_cursor_next(struct cursor *cursor, | |||
| 285 | struct btval *key, struct btval *data); | |||
| 286 | static int btree_cursor_set(struct cursor *cursor, | |||
| 287 | struct btval *key, struct btval *data, int *exactp); | |||
| 288 | static int btree_cursor_first(struct cursor *cursor, | |||
| 289 | struct btval *key, struct btval *data); | |||
| 290 | ||||
| 291 | static void bt_reduce_separator(struct btree *bt, struct node *min, | |||
| 292 | struct btval *sep); | |||
| 293 | static void remove_prefix(struct btree *bt, struct btval *key, | |||
| 294 | size_t pfxlen); | |||
| 295 | static void expand_prefix(struct btree *bt, struct mpage *mp, | |||
| 296 | indx_t indx, struct btkey *expkey); | |||
| 297 | static void concat_prefix(struct btree *bt, char *s1, size_t n1, | |||
| 298 | char *s2, size_t n2, char *cs, size_t *cn); | |||
| 299 | static void common_prefix(struct btree *bt, struct btkey *min, | |||
| 300 | struct btkey *max, struct btkey *pfx); | |||
| 301 | static void find_common_prefix(struct btree *bt, struct mpage *mp); | |||
| 302 | ||||
| 303 | static size_t bt_leaf_size(struct btree *bt, struct btval *key, | |||
| 304 | struct btval *data); | |||
| 305 | static size_t bt_branch_size(struct btree *bt, struct btval *key); | |||
| 306 | ||||
| 307 | static pgno_t btree_compact_tree(struct btree *bt, pgno_t pgno, | |||
| 308 | struct btree *btc); | |||
| 309 | ||||
| 310 | static int memncmp(const void *s1, size_t n1, | |||
| 311 | const void *s2, size_t n2); | |||
| 312 | static int memnrcmp(const void *s1, size_t n1, | |||
| 313 | const void *s2, size_t n2); | |||
| 314 | ||||
| 315 | static int | |||
| 316 | memncmp(const void *s1, size_t n1, const void *s2, size_t n2) | |||
| 317 | { | |||
| 318 | if (n1 < n2) { | |||
| 319 | if (memcmp(s1, s2, n1) == 0) | |||
| 320 | return -1; | |||
| 321 | } | |||
| 322 | else if (n1 > n2) { | |||
| 323 | if (memcmp(s1, s2, n2) == 0) | |||
| 324 | return 1; | |||
| 325 | } | |||
| 326 | return memcmp(s1, s2, n1); | |||
| 327 | } | |||
| 328 | ||||
| 329 | static int | |||
| 330 | memnrcmp(const void *s1, size_t n1, const void *s2, size_t n2) | |||
| 331 | { | |||
| 332 | const unsigned char *p1; | |||
| 333 | const unsigned char *p2; | |||
| 334 | ||||
| 335 | if (n1 == 0) | |||
| 336 | return n2 == 0 ? 0 : -1; | |||
| 337 | ||||
| 338 | if (n2 == 0) | |||
| 339 | return n1 == 0 ? 0 : 1; | |||
| 340 | ||||
| 341 | p1 = (const unsigned char *)s1 + n1 - 1; | |||
| 342 | p2 = (const unsigned char *)s2 + n2 - 1; | |||
| 343 | ||||
| 344 | while (*p1 == *p2) { | |||
| 345 | if (p1 == s1) | |||
| 346 | return (p2 == s2) ? 0 : -1; | |||
| 347 | if (p2 == s2) | |||
| 348 | return (p1 == p2) ? 0 : 1; | |||
| 349 | p1--; | |||
| 350 | p2--; | |||
| 351 | } | |||
| 352 | return *p1 - *p2; | |||
| 353 | } | |||
| 354 | ||||
| 355 | int | |||
| 356 | btree_cmp(struct btree *bt, const struct btval *a, const struct btval *b) | |||
| 357 | { | |||
| 358 | return bt->cmp(a, b); | |||
| 359 | } | |||
| 360 | ||||
| 361 | static void | |||
| 362 | common_prefix(struct btree *bt, struct btkey *min, struct btkey *max, | |||
| 363 | struct btkey *pfx) | |||
| 364 | { | |||
| 365 | size_t n = 0; | |||
| 366 | char *p1; | |||
| 367 | char *p2; | |||
| 368 | ||||
| 369 | if (min->len == 0 || max->len == 0) { | |||
| 370 | pfx->len = 0; | |||
| 371 | return; | |||
| 372 | } | |||
| 373 | ||||
| 374 | if (F_ISSET(bt->flags, BT_REVERSEKEY)(((bt->flags) & (0x08)) == (0x08))) { | |||
| 375 | p1 = min->str + min->len - 1; | |||
| 376 | p2 = max->str + max->len - 1; | |||
| 377 | ||||
| 378 | while (*p1 == *p2) { | |||
| 379 | if (p1 < min->str || p2 < max->str) | |||
| 380 | break; | |||
| 381 | p1--; | |||
| 382 | p2--; | |||
| 383 | n++; | |||
| 384 | } | |||
| 385 | ||||
| 386 | assert(n <= (int)sizeof(pfx->str))((n <= (int)sizeof(pfx->str)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 386, __func__, "n <= (int)sizeof(pfx->str)")); | |||
| 387 | pfx->len = n; | |||
| 388 | bcopy(p2 + 1, pfx->str, n); | |||
| 389 | } else { | |||
| 390 | p1 = min->str; | |||
| 391 | p2 = max->str; | |||
| 392 | ||||
| 393 | while (*p1 == *p2) { | |||
| 394 | if (n == min->len || n == max->len) | |||
| 395 | break; | |||
| 396 | p1++; | |||
| 397 | p2++; | |||
| 398 | n++; | |||
| 399 | } | |||
| 400 | ||||
| 401 | assert(n <= (int)sizeof(pfx->str))((n <= (int)sizeof(pfx->str)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 401, __func__, "n <= (int)sizeof(pfx->str)")); | |||
| 402 | pfx->len = n; | |||
| 403 | bcopy(max->str, pfx->str, n); | |||
| 404 | } | |||
| 405 | } | |||
| 406 | ||||
| 407 | static void | |||
| 408 | remove_prefix(struct btree *bt, struct btval *key, size_t pfxlen) | |||
| 409 | { | |||
| 410 | if (pfxlen == 0 || bt->cmp != NULL((void*)0)) | |||
| 411 | return; | |||
| 412 | ||||
| 413 | DPRINTF("removing %zu bytes of prefix from key [%.*s]", pfxlen, | |||
| 414 | (int)key->size, (char *)key->data); | |||
| 415 | assert(pfxlen <= key->size)((pfxlen <= key->size) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 415, __func__, "pfxlen <= key->size")); | |||
| 416 | key->size -= pfxlen; | |||
| 417 | if (!F_ISSET(bt->flags, BT_REVERSEKEY)(((bt->flags) & (0x08)) == (0x08))) | |||
| 418 | key->data = (char *)key->data + pfxlen; | |||
| 419 | } | |||
| 420 | ||||
| 421 | static void | |||
| 422 | expand_prefix(struct btree *bt, struct mpage *mp, indx_t indx, | |||
| 423 | struct btkey *expkey) | |||
| 424 | { | |||
| 425 | struct node *node; | |||
| 426 | ||||
| 427 | node = NODEPTR(mp, indx)((struct node *)((char *)((mp)->page) + ((mp)->page)-> ptrs[indx])); | |||
| 428 | expkey->len = sizeof(expkey->str); | |||
| 429 | concat_prefix(bt, mp->prefix.str, mp->prefix.len, | |||
| 430 | NODEKEY(node)(void *)((node)->data), node->ksize, expkey->str, &expkey->len); | |||
| 431 | } | |||
| 432 | ||||
| 433 | static int | |||
| 434 | bt_cmp(struct btree *bt, const struct btval *key1, const struct btval *key2, | |||
| 435 | struct btkey *pfx) | |||
| 436 | { | |||
| 437 | if (F_ISSET(bt->flags, BT_REVERSEKEY)(((bt->flags) & (0x08)) == (0x08))) | |||
| 438 | return memnrcmp(key1->data, key1->size - pfx->len, | |||
| 439 | key2->data, key2->size); | |||
| 440 | else | |||
| 441 | return memncmp((char *)key1->data + pfx->len, key1->size - pfx->len, | |||
| 442 | key2->data, key2->size); | |||
| 443 | } | |||
| 444 | ||||
| 445 | void | |||
| 446 | btval_reset(struct btval *btv) | |||
| 447 | { | |||
| 448 | if (btv) { | |||
| 449 | if (btv->mp) | |||
| 450 | btv->mp->ref--; | |||
| 451 | if (btv->free_data) | |||
| 452 | free(btv->data); | |||
| 453 | memset(btv, 0, sizeof(*btv)); | |||
| 454 | } | |||
| 455 | } | |||
| 456 | ||||
| 457 | static int | |||
| 458 | mpage_cmp(struct mpage *a, struct mpage *b) | |||
| 459 | { | |||
| 460 | if (a->pgno > b->pgno) | |||
| 461 | return 1; | |||
| 462 | if (a->pgno < b->pgno) | |||
| 463 | return -1; | |||
| 464 | return 0; | |||
| 465 | } | |||
| 466 | ||||
| 467 | static struct mpage * | |||
| 468 | mpage_lookup(struct btree *bt, pgno_t pgno) | |||
| 469 | { | |||
| 470 | struct mpage find, *mp; | |||
| 471 | ||||
| 472 | find.pgno = pgno; | |||
| 473 | mp = RB_FIND(page_cache, bt->page_cache, &find)page_cache_RB_FIND(bt->page_cache, &find); | |||
| 474 | if (mp) { | |||
| 475 | bt->stat.hits++; | |||
| 476 | /* Update LRU queue. Move page to the end. */ | |||
| 477 | TAILQ_REMOVE(bt->lru_queue, mp, lru_next)do { if (((mp)->lru_next.tqe_next) != ((void*)0)) (mp)-> lru_next.tqe_next->lru_next.tqe_prev = (mp)->lru_next.tqe_prev ; else (bt->lru_queue)->tqh_last = (mp)->lru_next.tqe_prev ; *(mp)->lru_next.tqe_prev = (mp)->lru_next.tqe_next; ; ; } while (0); | |||
| 478 | TAILQ_INSERT_TAIL(bt->lru_queue, mp, lru_next)do { (mp)->lru_next.tqe_next = ((void*)0); (mp)->lru_next .tqe_prev = (bt->lru_queue)->tqh_last; *(bt->lru_queue )->tqh_last = (mp); (bt->lru_queue)->tqh_last = & (mp)->lru_next.tqe_next; } while (0); | |||
| 479 | } | |||
| 480 | return mp; | |||
| 481 | } | |||
| 482 | ||||
| 483 | static void | |||
| 484 | mpage_add(struct btree *bt, struct mpage *mp) | |||
| 485 | { | |||
| 486 | assert(RB_INSERT(page_cache, bt->page_cache, mp) == NULL)((page_cache_RB_INSERT(bt->page_cache, mp) == ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c", 486, __func__ , "RB_INSERT(page_cache, bt->page_cache, mp) == NULL")); | |||
| 487 | bt->stat.cache_size++; | |||
| 488 | TAILQ_INSERT_TAIL(bt->lru_queue, mp, lru_next)do { (mp)->lru_next.tqe_next = ((void*)0); (mp)->lru_next .tqe_prev = (bt->lru_queue)->tqh_last; *(bt->lru_queue )->tqh_last = (mp); (bt->lru_queue)->tqh_last = & (mp)->lru_next.tqe_next; } while (0); | |||
| 489 | } | |||
| 490 | ||||
| 491 | static void | |||
| 492 | mpage_free(struct mpage *mp) | |||
| 493 | { | |||
| 494 | if (mp != NULL((void*)0)) { | |||
| 495 | free(mp->page); | |||
| 496 | free(mp); | |||
| 497 | } | |||
| 498 | } | |||
| 499 | ||||
| 500 | static void | |||
| 501 | mpage_del(struct btree *bt, struct mpage *mp) | |||
| 502 | { | |||
| 503 | assert(RB_REMOVE(page_cache, bt->page_cache, mp) == mp)((page_cache_RB_REMOVE(bt->page_cache, mp) == mp) ? (void) 0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c", 503, __func__ , "RB_REMOVE(page_cache, bt->page_cache, mp) == mp")); | |||
| 504 | assert(bt->stat.cache_size > 0)((bt->stat.cache_size > 0) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 504, __func__, "bt->stat.cache_size > 0")); | |||
| 505 | bt->stat.cache_size--; | |||
| 506 | TAILQ_REMOVE(bt->lru_queue, mp, lru_next)do { if (((mp)->lru_next.tqe_next) != ((void*)0)) (mp)-> lru_next.tqe_next->lru_next.tqe_prev = (mp)->lru_next.tqe_prev ; else (bt->lru_queue)->tqh_last = (mp)->lru_next.tqe_prev ; *(mp)->lru_next.tqe_prev = (mp)->lru_next.tqe_next; ; ; } while (0); | |||
| 507 | } | |||
| 508 | ||||
| 509 | static void | |||
| 510 | mpage_flush(struct btree *bt) | |||
| 511 | { | |||
| 512 | struct mpage *mp; | |||
| 513 | ||||
| 514 | while ((mp = RB_MIN(page_cache, bt->page_cache)page_cache_RB_MINMAX(bt->page_cache, -1)) != NULL((void*)0)) { | |||
| 515 | mpage_del(bt, mp); | |||
| 516 | mpage_free(mp); | |||
| 517 | } | |||
| 518 | } | |||
| 519 | ||||
| 520 | static struct mpage * | |||
| 521 | mpage_copy(struct btree *bt, struct mpage *mp) | |||
| 522 | { | |||
| 523 | struct mpage *copy; | |||
| 524 | ||||
| 525 | if ((copy = calloc(1, sizeof(*copy))) == NULL((void*)0)) | |||
| 526 | return NULL((void*)0); | |||
| 527 | if ((copy->page = malloc(bt->head.psize)) == NULL((void*)0)) { | |||
| 528 | free(copy); | |||
| 529 | return NULL((void*)0); | |||
| 530 | } | |||
| 531 | bcopy(mp->page, copy->page, bt->head.psize); | |||
| 532 | bcopy(&mp->prefix, ©->prefix, sizeof(mp->prefix)); | |||
| 533 | copy->parent = mp->parent; | |||
| 534 | copy->parent_index = mp->parent_index; | |||
| 535 | copy->pgno = mp->pgno; | |||
| 536 | ||||
| 537 | return copy; | |||
| 538 | } | |||
| 539 | ||||
| 540 | /* Remove the least recently used memory pages until the cache size is | |||
| 541 | * within the configured bounds. Pages referenced by cursors or returned | |||
| 542 | * key/data are not pruned. | |||
| 543 | */ | |||
| 544 | static void | |||
| 545 | mpage_prune(struct btree *bt) | |||
| 546 | { | |||
| 547 | struct mpage *mp, *next; | |||
| 548 | ||||
| 549 | for (mp = TAILQ_FIRST(bt->lru_queue)((bt->lru_queue)->tqh_first); mp; mp = next) { | |||
| 550 | if (bt->stat.cache_size <= bt->stat.max_cache) | |||
| 551 | break; | |||
| 552 | next = TAILQ_NEXT(mp, lru_next)((mp)->lru_next.tqe_next); | |||
| 553 | if (!mp->dirty && mp->ref <= 0) { | |||
| 554 | mpage_del(bt, mp); | |||
| 555 | mpage_free(mp); | |||
| 556 | } | |||
| 557 | } | |||
| 558 | } | |||
| 559 | ||||
| 560 | /* Mark a page as dirty and push it on the dirty queue. | |||
| 561 | */ | |||
| 562 | static void | |||
| 563 | mpage_dirty(struct btree *bt, struct mpage *mp) | |||
| 564 | { | |||
| 565 | assert(bt != NULL)((bt != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 565, __func__, "bt != NULL")); | |||
| 566 | assert(bt->txn != NULL)((bt->txn != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 566, __func__, "bt->txn != NULL")); | |||
| 567 | ||||
| 568 | if (!mp->dirty) { | |||
| 569 | mp->dirty = 1; | |||
| 570 | SIMPLEQ_INSERT_TAIL(bt->txn->dirty_queue, mp, next)do { (mp)->next.sqe_next = ((void*)0); *(bt->txn->dirty_queue )->sqh_last = (mp); (bt->txn->dirty_queue)->sqh_last = &(mp)->next.sqe_next; } while (0); | |||
| 571 | } | |||
| 572 | } | |||
| 573 | ||||
| 574 | /* Touch a page: make it dirty and re-insert into tree with updated pgno. | |||
| 575 | */ | |||
| 576 | static struct mpage * | |||
| 577 | mpage_touch(struct btree *bt, struct mpage *mp) | |||
| 578 | { | |||
| 579 | assert(bt != NULL)((bt != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 579, __func__, "bt != NULL")); | |||
| 580 | assert(bt->txn != NULL)((bt->txn != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 580, __func__, "bt->txn != NULL")); | |||
| 581 | assert(mp != NULL)((mp != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 581, __func__, "mp != NULL")); | |||
| 582 | ||||
| 583 | if (!mp->dirty) { | |||
| 584 | DPRINTF("touching page %u -> %u", mp->pgno, bt->txn->next_pgno); | |||
| 585 | if (mp->ref == 0) | |||
| 586 | mpage_del(bt, mp); | |||
| 587 | else { | |||
| 588 | if ((mp = mpage_copy(bt, mp)) == NULL((void*)0)) | |||
| 589 | return NULL((void*)0); | |||
| 590 | } | |||
| 591 | mp->pgno = mp->page->pgno = bt->txn->next_pgno++; | |||
| 592 | mpage_dirty(bt, mp); | |||
| 593 | mpage_add(bt, mp); | |||
| 594 | ||||
| 595 | /* Update the page number to new touched page. */ | |||
| 596 | if (mp->parent != NULL((void*)0)) | |||
| 597 | NODEPGNO(NODEPTR(mp->parent,((((struct node *)((char *)((mp->parent)->page) + ((mp-> parent)->page)->ptrs[mp->parent_index])))->p.np_pgno ) | |||
| 598 | mp->parent_index))((((struct node *)((char *)((mp->parent)->page) + ((mp-> parent)->page)->ptrs[mp->parent_index])))->p.np_pgno ) = mp->pgno; | |||
| 599 | } | |||
| 600 | ||||
| 601 | return mp; | |||
| 602 | } | |||
| 603 | ||||
| 604 | static int | |||
| 605 | btree_read_page(struct btree *bt, pgno_t pgno, struct page *page) | |||
| 606 | { | |||
| 607 | ssize_t rc; | |||
| 608 | ||||
| 609 | DPRINTF("reading page %u", pgno); | |||
| 610 | bt->stat.reads++; | |||
| 611 | if ((rc = pread(bt->fd, page, bt->head.psize, (off_t)pgno*bt->head.psize)) == 0) { | |||
| 612 | DPRINTF("page %u doesn't exist", pgno); | |||
| 613 | errno(*__errno()) = ENOENT2; | |||
| 614 | return BT_FAIL-1; | |||
| 615 | } else if (rc != (ssize_t)bt->head.psize) { | |||
| 616 | if (rc > 0) | |||
| 617 | errno(*__errno()) = EINVAL22; | |||
| 618 | DPRINTF("read: %s", strerror(errno)); | |||
| 619 | return BT_FAIL-1; | |||
| 620 | } | |||
| 621 | ||||
| 622 | if (page->pgno != pgno) { | |||
| 623 | DPRINTF("page numbers don't match: %u != %u", pgno, page->pgno); | |||
| 624 | errno(*__errno()) = EINVAL22; | |||
| 625 | return BT_FAIL-1; | |||
| 626 | } | |||
| 627 | ||||
| 628 | DPRINTF("page %u has flags 0x%X", pgno, page->flags); | |||
| 629 | ||||
| 630 | return BT_SUCCESS0; | |||
| 631 | } | |||
| 632 | ||||
| 633 | int | |||
| 634 | btree_sync(struct btree *bt) | |||
| 635 | { | |||
| 636 | if (!F_ISSET(bt->flags, BT_NOSYNC)(((bt->flags) & (0x02)) == (0x02))) | |||
| 637 | return fsync(bt->fd); | |||
| 638 | return 0; | |||
| 639 | } | |||
| 640 | ||||
| 641 | struct btree_txn * | |||
| 642 | btree_txn_begin(struct btree *bt, int rdonly) | |||
| 643 | { | |||
| 644 | struct btree_txn *txn; | |||
| 645 | ||||
| 646 | if (!rdonly && bt->txn != NULL((void*)0)) { | |||
| 647 | DPRINTF("write transaction already begun"); | |||
| 648 | errno(*__errno()) = EBUSY16; | |||
| 649 | return NULL((void*)0); | |||
| 650 | } | |||
| 651 | ||||
| 652 | if ((txn = calloc(1, sizeof(*txn))) == NULL((void*)0)) { | |||
| 653 | DPRINTF("calloc: %s", strerror(errno)); | |||
| 654 | return NULL((void*)0); | |||
| 655 | } | |||
| 656 | ||||
| 657 | if (rdonly) { | |||
| 658 | txn->flags |= BT_TXN_RDONLY0x01; | |||
| 659 | } else { | |||
| 660 | txn->dirty_queue = calloc(1, sizeof(*txn->dirty_queue)); | |||
| 661 | if (txn->dirty_queue == NULL((void*)0)) { | |||
| 662 | free(txn); | |||
| 663 | return NULL((void*)0); | |||
| 664 | } | |||
| 665 | SIMPLEQ_INIT(txn->dirty_queue)do { (txn->dirty_queue)->sqh_first = ((void*)0); (txn-> dirty_queue)->sqh_last = &(txn->dirty_queue)->sqh_first ; } while (0); | |||
| 666 | ||||
| 667 | DPRINTF("taking write lock on txn %p", txn); | |||
| 668 | if (flock(bt->fd, LOCK_EX0x02 | LOCK_NB0x04) != 0) { | |||
| 669 | DPRINTF("flock: %s", strerror(errno)); | |||
| 670 | errno(*__errno()) = EBUSY16; | |||
| 671 | free(txn->dirty_queue); | |||
| 672 | free(txn); | |||
| 673 | return NULL((void*)0); | |||
| 674 | } | |||
| 675 | bt->txn = txn; | |||
| 676 | } | |||
| 677 | ||||
| 678 | txn->bt = bt; | |||
| 679 | btree_ref(bt); | |||
| 680 | ||||
| 681 | if (btree_read_meta(bt, &txn->next_pgno) != BT_SUCCESS0) { | |||
| 682 | btree_txn_abort(txn); | |||
| 683 | return NULL((void*)0); | |||
| 684 | } | |||
| 685 | ||||
| 686 | txn->root = bt->meta.root; | |||
| 687 | DPRINTF("begin transaction on btree %p, root page %u", bt, txn->root); | |||
| 688 | ||||
| 689 | return txn; | |||
| 690 | } | |||
| 691 | ||||
| 692 | void | |||
| 693 | btree_txn_abort(struct btree_txn *txn) | |||
| 694 | { | |||
| 695 | struct mpage *mp; | |||
| 696 | struct btree *bt; | |||
| 697 | ||||
| 698 | if (txn == NULL((void*)0)) | |||
| 699 | return; | |||
| 700 | ||||
| 701 | bt = txn->bt; | |||
| 702 | DPRINTF("abort transaction on btree %p, root page %u", bt, txn->root); | |||
| 703 | ||||
| 704 | if (!F_ISSET(txn->flags, BT_TXN_RDONLY)(((txn->flags) & (0x01)) == (0x01))) { | |||
| 705 | /* Discard all dirty pages. | |||
| 706 | */ | |||
| 707 | while (!SIMPLEQ_EMPTY(txn->dirty_queue)(((txn->dirty_queue)->sqh_first) == ((void*)0))) { | |||
| 708 | mp = SIMPLEQ_FIRST(txn->dirty_queue)((txn->dirty_queue)->sqh_first); | |||
| 709 | assert(mp->ref == 0)((mp->ref == 0) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 709, __func__, "mp->ref == 0")); /* cursors should be closed */ | |||
| 710 | mpage_del(bt, mp); | |||
| 711 | SIMPLEQ_REMOVE_HEAD(txn->dirty_queue, next)do { if (((txn->dirty_queue)->sqh_first = (txn->dirty_queue )->sqh_first->next.sqe_next) == ((void*)0)) (txn->dirty_queue )->sqh_last = &(txn->dirty_queue)->sqh_first; } while (0); | |||
| 712 | mpage_free(mp); | |||
| 713 | } | |||
| 714 | ||||
| 715 | DPRINTF("releasing write lock on txn %p", txn); | |||
| 716 | txn->bt->txn = NULL((void*)0); | |||
| 717 | if (flock(txn->bt->fd, LOCK_UN0x08) != 0) { | |||
| 718 | DPRINTF("failed to unlock fd %d: %s", | |||
| 719 | txn->bt->fd, strerror(errno)); | |||
| 720 | } | |||
| 721 | free(txn->dirty_queue); | |||
| 722 | } | |||
| 723 | ||||
| 724 | btree_close(txn->bt); | |||
| 725 | free(txn); | |||
| 726 | } | |||
| 727 | ||||
| 728 | int | |||
| 729 | btree_txn_commit(struct btree_txn *txn) | |||
| 730 | { | |||
| 731 | int n, done; | |||
| 732 | ssize_t rc; | |||
| 733 | off_t size; | |||
| 734 | struct mpage *mp; | |||
| 735 | struct btree *bt; | |||
| 736 | struct iovec iov[BT_COMMIT_PAGES64]; | |||
| 737 | ||||
| 738 | assert(txn != NULL)((txn != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 738, __func__, "txn != NULL")); | |||
| 739 | assert(txn->bt != NULL)((txn->bt != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 739, __func__, "txn->bt != NULL")); | |||
| 740 | ||||
| 741 | bt = txn->bt; | |||
| 742 | ||||
| 743 | if (F_ISSET(txn->flags, BT_TXN_RDONLY)(((txn->flags) & (0x01)) == (0x01))) { | |||
| 744 | DPRINTF("attempt to commit read-only transaction"); | |||
| 745 | btree_txn_abort(txn); | |||
| 746 | errno(*__errno()) = EPERM1; | |||
| 747 | return BT_FAIL-1; | |||
| 748 | } | |||
| 749 | ||||
| 750 | if (txn != bt->txn) { | |||
| 751 | DPRINTF("attempt to commit unknown transaction"); | |||
| 752 | btree_txn_abort(txn); | |||
| 753 | errno(*__errno()) = EINVAL22; | |||
| 754 | return BT_FAIL-1; | |||
| 755 | } | |||
| 756 | ||||
| 757 | if (F_ISSET(txn->flags, BT_TXN_ERROR)(((txn->flags) & (0x02)) == (0x02))) { | |||
| 758 | DPRINTF("error flag is set, can't commit"); | |||
| 759 | btree_txn_abort(txn); | |||
| 760 | errno(*__errno()) = EINVAL22; | |||
| 761 | return BT_FAIL-1; | |||
| 762 | } | |||
| 763 | ||||
| 764 | if (SIMPLEQ_EMPTY(txn->dirty_queue)(((txn->dirty_queue)->sqh_first) == ((void*)0))) | |||
| 765 | goto done; | |||
| 766 | ||||
| 767 | if (F_ISSET(bt->flags, BT_FIXPADDING)(((bt->flags) & (0x01)) == (0x01))) { | |||
| 768 | size = lseek(bt->fd, 0, SEEK_END2); | |||
| 769 | size += bt->head.psize - (size % bt->head.psize); | |||
| 770 | DPRINTF("extending to multiple of page size: %llu", size); | |||
| 771 | if (ftruncate(bt->fd, size) != 0) { | |||
| 772 | DPRINTF("ftruncate: %s", strerror(errno)); | |||
| 773 | btree_txn_abort(txn); | |||
| 774 | return BT_FAIL-1; | |||
| 775 | } | |||
| 776 | bt->flags &= ~BT_FIXPADDING0x01; | |||
| 777 | } | |||
| 778 | ||||
| 779 | DPRINTF("committing transaction on btree %p, root page %u", | |||
| 780 | bt, txn->root); | |||
| 781 | ||||
| 782 | /* Commit up to BT_COMMIT_PAGES dirty pages to disk until done. | |||
| 783 | */ | |||
| 784 | do { | |||
| 785 | n = 0; | |||
| 786 | done = 1; | |||
| 787 | SIMPLEQ_FOREACH(mp, txn->dirty_queue, next)for((mp) = ((txn->dirty_queue)->sqh_first); (mp) != ((void *)0); (mp) = ((mp)->next.sqe_next)) { | |||
| 788 | DPRINTF("committing page %u", mp->pgno); | |||
| 789 | iov[n].iov_len = bt->head.psize; | |||
| 790 | iov[n].iov_base = mp->page; | |||
| 791 | if (++n >= BT_COMMIT_PAGES64) { | |||
| 792 | done = 0; | |||
| 793 | break; | |||
| 794 | } | |||
| 795 | } | |||
| 796 | ||||
| 797 | if (n == 0) | |||
| 798 | break; | |||
| 799 | ||||
| 800 | DPRINTF("committing %u dirty pages", n); | |||
| 801 | rc = writev(bt->fd, iov, n); | |||
| 802 | if (rc != (ssize_t)bt->head.psize*n) { | |||
| 803 | if (rc > 0) | |||
| 804 | DPRINTF("short write, filesystem full?"); | |||
| 805 | else | |||
| 806 | DPRINTF("writev: %s", strerror(errno)); | |||
| 807 | btree_txn_abort(txn); | |||
| 808 | return BT_FAIL-1; | |||
| 809 | } | |||
| 810 | ||||
| 811 | /* Remove the dirty flag from the written pages. | |||
| 812 | */ | |||
| 813 | while (!SIMPLEQ_EMPTY(txn->dirty_queue)(((txn->dirty_queue)->sqh_first) == ((void*)0))) { | |||
| 814 | mp = SIMPLEQ_FIRST(txn->dirty_queue)((txn->dirty_queue)->sqh_first); | |||
| 815 | mp->dirty = 0; | |||
| 816 | SIMPLEQ_REMOVE_HEAD(txn->dirty_queue, next)do { if (((txn->dirty_queue)->sqh_first = (txn->dirty_queue )->sqh_first->next.sqe_next) == ((void*)0)) (txn->dirty_queue )->sqh_last = &(txn->dirty_queue)->sqh_first; } while (0); | |||
| 817 | if (--n == 0) | |||
| 818 | break; | |||
| 819 | } | |||
| 820 | } while (!done); | |||
| 821 | ||||
| 822 | if (btree_sync(bt) != 0 || | |||
| 823 | btree_write_meta(bt, txn->root, 0) != BT_SUCCESS0 || | |||
| 824 | btree_sync(bt) != 0) { | |||
| 825 | btree_txn_abort(txn); | |||
| 826 | return BT_FAIL-1; | |||
| 827 | } | |||
| 828 | ||||
| 829 | done: | |||
| 830 | mpage_prune(bt); | |||
| 831 | btree_txn_abort(txn); | |||
| 832 | ||||
| 833 | return BT_SUCCESS0; | |||
| 834 | } | |||
| 835 | ||||
| 836 | static int | |||
| 837 | btree_write_header(struct btree *bt, int fd) | |||
| 838 | { | |||
| 839 | struct stat sb; | |||
| 840 | struct bt_head *h; | |||
| 841 | struct page *p; | |||
| 842 | ssize_t rc; | |||
| 843 | unsigned int psize; | |||
| 844 | ||||
| 845 | DPRINTF("writing header page"); | |||
| 846 | assert(bt != NULL)((bt != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 846, __func__, "bt != NULL")); | |||
| 847 | ||||
| 848 | /* | |||
| 849 | * Ask stat for 'optimal blocksize for I/O', but cap to fit in indx_t. | |||
| 850 | */ | |||
| 851 | if (fstat(fd, &sb) == 0) | |||
| 852 | psize = MINIMUM(32*1024, sb.st_blksize)((32*1024) < (sb.st_blksize) ? (32*1024) : (sb.st_blksize) ); | |||
| 853 | else | |||
| 854 | psize = PAGESIZE4096; | |||
| 855 | ||||
| 856 | if ((p = calloc(1, psize)) == NULL((void*)0)) | |||
| 857 | return -1; | |||
| 858 | p->flags = P_HEAD0x10; | |||
| 859 | ||||
| 860 | h = METADATA(p)((void *)((char *)p + __builtin_offsetof(struct page, ptrs))); | |||
| 861 | h->magic = BT_MAGIC0xB3DBB3DB; | |||
| 862 | h->version = BT_VERSION4; | |||
| 863 | h->psize = psize; | |||
| 864 | bcopy(h, &bt->head, sizeof(*h)); | |||
| 865 | ||||
| 866 | rc = write(fd, p, bt->head.psize); | |||
| 867 | free(p); | |||
| 868 | if (rc != (ssize_t)bt->head.psize) { | |||
| 869 | if (rc > 0) | |||
| 870 | DPRINTF("short write, filesystem full?"); | |||
| 871 | return BT_FAIL-1; | |||
| 872 | } | |||
| 873 | ||||
| 874 | return BT_SUCCESS0; | |||
| 875 | } | |||
| 876 | ||||
| 877 | static int | |||
| 878 | btree_read_header(struct btree *bt) | |||
| 879 | { | |||
| 880 | char page[PAGESIZE4096]; | |||
| 881 | struct page *p; | |||
| 882 | struct bt_head *h; | |||
| 883 | int rc; | |||
| 884 | ||||
| 885 | assert(bt != NULL)((bt != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 885, __func__, "bt != NULL")); | |||
| 886 | ||||
| 887 | /* We don't know the page size yet, so use a minimum value. | |||
| 888 | */ | |||
| 889 | ||||
| 890 | if ((rc = pread(bt->fd, page, PAGESIZE4096, 0)) == 0) { | |||
| 891 | errno(*__errno()) = ENOENT2; | |||
| 892 | return -1; | |||
| 893 | } else if (rc != PAGESIZE4096) { | |||
| 894 | if (rc > 0) | |||
| 895 | errno(*__errno()) = EINVAL22; | |||
| 896 | DPRINTF("read: %s", strerror(errno)); | |||
| 897 | return -1; | |||
| 898 | } | |||
| 899 | ||||
| 900 | p = (struct page *)page; | |||
| 901 | ||||
| 902 | if (!F_ISSET(p->flags, P_HEAD)(((p->flags) & (0x10)) == (0x10))) { | |||
| 903 | DPRINTF("page %d not a header page", p->pgno); | |||
| 904 | errno(*__errno()) = EINVAL22; | |||
| 905 | return -1; | |||
| 906 | } | |||
| 907 | ||||
| 908 | h = METADATA(p)((void *)((char *)p + __builtin_offsetof(struct page, ptrs))); | |||
| 909 | if (h->magic != BT_MAGIC0xB3DBB3DB) { | |||
| 910 | DPRINTF("header has invalid magic"); | |||
| 911 | errno(*__errno()) = EINVAL22; | |||
| 912 | return -1; | |||
| 913 | } | |||
| 914 | ||||
| 915 | if (h->version != BT_VERSION4) { | |||
| 916 | DPRINTF("database is version %u, expected version %u", | |||
| 917 | bt->head.version, BT_VERSION); | |||
| 918 | errno(*__errno()) = EINVAL22; | |||
| 919 | return -1; | |||
| 920 | } | |||
| 921 | ||||
| 922 | bcopy(h, &bt->head, sizeof(*h)); | |||
| 923 | return 0; | |||
| 924 | } | |||
| 925 | ||||
| 926 | static int | |||
| 927 | btree_write_meta(struct btree *bt, pgno_t root, unsigned int flags) | |||
| 928 | { | |||
| 929 | struct mpage *mp; | |||
| 930 | struct bt_meta *meta; | |||
| 931 | ssize_t rc; | |||
| 932 | ||||
| 933 | DPRINTF("writing meta page for root page %u", root); | |||
| 934 | ||||
| 935 | assert(bt != NULL)((bt != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 935, __func__, "bt != NULL")); | |||
| 936 | assert(bt->txn != NULL)((bt->txn != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 936, __func__, "bt->txn != NULL")); | |||
| 937 | ||||
| 938 | if ((mp = btree_new_page(bt, P_META0x08)) == NULL((void*)0)) | |||
| 939 | return -1; | |||
| 940 | ||||
| 941 | bt->meta.prev_meta = bt->meta.root; | |||
| 942 | bt->meta.root = root; | |||
| 943 | bt->meta.flags = flags; | |||
| 944 | bt->meta.created_at = time(0); | |||
| 945 | bt->meta.revisions++; | |||
| 946 | SHA1((unsigned char *)&bt->meta, METAHASHLEN__builtin_offsetof(struct bt_meta, hash), bt->meta.hash); | |||
| 947 | ||||
| 948 | /* Copy the meta data changes to the new meta page. */ | |||
| 949 | meta = METADATA(mp->page)((void *)((char *)mp->page + __builtin_offsetof(struct page , ptrs))); | |||
| 950 | bcopy(&bt->meta, meta, sizeof(*meta)); | |||
| 951 | ||||
| 952 | rc = write(bt->fd, mp->page, bt->head.psize); | |||
| 953 | mp->dirty = 0; | |||
| 954 | SIMPLEQ_REMOVE_HEAD(bt->txn->dirty_queue, next)do { if (((bt->txn->dirty_queue)->sqh_first = (bt-> txn->dirty_queue)->sqh_first->next.sqe_next) == ((void *)0)) (bt->txn->dirty_queue)->sqh_last = &(bt-> txn->dirty_queue)->sqh_first; } while (0); | |||
| 955 | if (rc != (ssize_t)bt->head.psize) { | |||
| 956 | if (rc > 0) | |||
| 957 | DPRINTF("short write, filesystem full?"); | |||
| 958 | return BT_FAIL-1; | |||
| 959 | } | |||
| 960 | ||||
| 961 | if ((bt->size = lseek(bt->fd, 0, SEEK_END2)) == -1) { | |||
| 962 | DPRINTF("failed to update file size: %s", strerror(errno)); | |||
| 963 | bt->size = 0; | |||
| 964 | } | |||
| 965 | ||||
| 966 | return BT_SUCCESS0; | |||
| 967 | } | |||
| 968 | ||||
| 969 | /* Returns true if page p is a valid meta page, false otherwise. | |||
| 970 | */ | |||
| 971 | static int | |||
| 972 | btree_is_meta_page(struct page *p) | |||
| 973 | { | |||
| 974 | struct bt_meta *m; | |||
| 975 | unsigned char hash[SHA_DIGEST_LENGTH20]; | |||
| 976 | ||||
| 977 | m = METADATA(p)((void *)((char *)p + __builtin_offsetof(struct page, ptrs))); | |||
| 978 | if (!F_ISSET(p->flags, P_META)(((p->flags) & (0x08)) == (0x08))) { | |||
| 979 | DPRINTF("page %d not a meta page", p->pgno); | |||
| 980 | errno(*__errno()) = EINVAL22; | |||
| 981 | return 0; | |||
| 982 | } | |||
| 983 | ||||
| 984 | if (m->root >= p->pgno && m->root != P_INVALID0xFFFFFFFF) { | |||
| 985 | DPRINTF("page %d points to an invalid root page", p->pgno); | |||
| 986 | errno(*__errno()) = EINVAL22; | |||
| 987 | return 0; | |||
| 988 | } | |||
| 989 | ||||
| 990 | SHA1((unsigned char *)m, METAHASHLEN__builtin_offsetof(struct bt_meta, hash), hash); | |||
| 991 | if (bcmp(hash, m->hash, SHA_DIGEST_LENGTH20) != 0) { | |||
| 992 | DPRINTF("page %d has an invalid digest", p->pgno); | |||
| 993 | errno(*__errno()) = EINVAL22; | |||
| 994 | return 0; | |||
| 995 | } | |||
| 996 | ||||
| 997 | return 1; | |||
| 998 | } | |||
| 999 | ||||
| 1000 | static int | |||
| 1001 | btree_read_meta(struct btree *bt, pgno_t *p_next) | |||
| 1002 | { | |||
| 1003 | struct mpage *mp; | |||
| 1004 | struct bt_meta *meta; | |||
| 1005 | pgno_t meta_pgno, next_pgno; | |||
| 1006 | off_t size; | |||
| 1007 | ||||
| 1008 | assert(bt != NULL)((bt != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 1008, __func__, "bt != NULL")); | |||
| 1009 | ||||
| 1010 | if ((size = lseek(bt->fd, 0, SEEK_END2)) == -1) | |||
| 1011 | goto fail; | |||
| 1012 | ||||
| 1013 | DPRINTF("btree_read_meta: size = %llu", size); | |||
| 1014 | ||||
| 1015 | if (size < bt->size) { | |||
| 1016 | DPRINTF("file has shrunk!"); | |||
| 1017 | errno(*__errno()) = EIO5; | |||
| 1018 | goto fail; | |||
| 1019 | } | |||
| 1020 | ||||
| 1021 | if (size == bt->head.psize) { /* there is only the header */ | |||
| 1022 | if (p_next != NULL((void*)0)) | |||
| 1023 | *p_next = 1; | |||
| 1024 | return BT_SUCCESS0; /* new file */ | |||
| 1025 | } | |||
| 1026 | ||||
| 1027 | next_pgno = size / bt->head.psize; | |||
| ||||
| 1028 | if (next_pgno == 0) { | |||
| 1029 | DPRINTF("corrupt file"); | |||
| 1030 | errno(*__errno()) = EIO5; | |||
| 1031 | goto fail; | |||
| 1032 | } | |||
| 1033 | ||||
| 1034 | meta_pgno = next_pgno - 1; | |||
| 1035 | ||||
| 1036 | if (size % bt->head.psize != 0) { | |||
| 1037 | DPRINTF("filesize not a multiple of the page size!"); | |||
| 1038 | bt->flags |= BT_FIXPADDING0x01; | |||
| 1039 | next_pgno++; | |||
| 1040 | } | |||
| 1041 | ||||
| 1042 | if (p_next != NULL((void*)0)) | |||
| 1043 | *p_next = next_pgno; | |||
| 1044 | ||||
| 1045 | if (size == bt->size) { | |||
| 1046 | DPRINTF("size unchanged, keeping current meta page"); | |||
| 1047 | if (F_ISSET(bt->meta.flags, BT_TOMBSTONE)(((bt->meta.flags) & (0x01)) == (0x01))) { | |||
| 1048 | DPRINTF("file is dead"); | |||
| 1049 | errno(*__errno()) = ESTALE70; | |||
| 1050 | return BT_FAIL-1; | |||
| 1051 | } else | |||
| 1052 | return BT_SUCCESS0; | |||
| 1053 | } | |||
| 1054 | bt->size = size; | |||
| 1055 | ||||
| 1056 | while (meta_pgno > 0) { | |||
| 1057 | if ((mp = btree_get_mpage(bt, meta_pgno)) == NULL((void*)0)) | |||
| 1058 | break; | |||
| 1059 | if (btree_is_meta_page(mp->page)) { | |||
| 1060 | meta = METADATA(mp->page)((void *)((char *)mp->page + __builtin_offsetof(struct page , ptrs))); | |||
| 1061 | DPRINTF("flags = 0x%x", meta->flags); | |||
| 1062 | if (F_ISSET(meta->flags, BT_TOMBSTONE)(((meta->flags) & (0x01)) == (0x01))) { | |||
| 1063 | DPRINTF("file is dead"); | |||
| 1064 | errno(*__errno()) = ESTALE70; | |||
| 1065 | return BT_FAIL-1; | |||
| 1066 | } else { | |||
| 1067 | /* Make copy of last meta page. */ | |||
| 1068 | bcopy(meta, &bt->meta, sizeof(bt->meta)); | |||
| 1069 | return BT_SUCCESS0; | |||
| 1070 | } | |||
| 1071 | } | |||
| 1072 | --meta_pgno; /* scan backwards to first valid meta page */ | |||
| 1073 | } | |||
| 1074 | ||||
| 1075 | errno(*__errno()) = EIO5; | |||
| 1076 | fail: | |||
| 1077 | if (p_next != NULL((void*)0)) | |||
| 1078 | *p_next = P_INVALID0xFFFFFFFF; | |||
| 1079 | return BT_FAIL-1; | |||
| 1080 | } | |||
| 1081 | ||||
| 1082 | struct btree * | |||
| 1083 | btree_open_fd(int fd, unsigned int flags) | |||
| 1084 | { | |||
| 1085 | struct btree *bt; | |||
| 1086 | int fl; | |||
| 1087 | ||||
| 1088 | fl = fcntl(fd, F_GETFL3); | |||
| 1089 | if (fcntl(fd, F_SETFL4, fl | O_APPEND0x0008) == -1) | |||
| 1090 | return NULL((void*)0); | |||
| 1091 | ||||
| 1092 | if ((bt = calloc(1, sizeof(*bt))) == NULL((void*)0)) | |||
| 1093 | return NULL((void*)0); | |||
| 1094 | bt->fd = fd; | |||
| 1095 | bt->flags = flags; | |||
| 1096 | bt->flags &= ~BT_FIXPADDING0x01; | |||
| 1097 | bt->ref = 1; | |||
| 1098 | bt->meta.root = P_INVALID0xFFFFFFFF; | |||
| 1099 | ||||
| 1100 | if ((bt->page_cache = calloc(1, sizeof(*bt->page_cache))) == NULL((void*)0)) | |||
| 1101 | goto fail; | |||
| 1102 | bt->stat.max_cache = BT_MAXCACHE_DEF1024; | |||
| 1103 | RB_INIT(bt->page_cache)do { (bt->page_cache)->rbh_root = ((void*)0); } while ( 0); | |||
| 1104 | ||||
| 1105 | if ((bt->lru_queue = calloc(1, sizeof(*bt->lru_queue))) == NULL((void*)0)) | |||
| 1106 | goto fail; | |||
| 1107 | TAILQ_INIT(bt->lru_queue)do { (bt->lru_queue)->tqh_first = ((void*)0); (bt->lru_queue )->tqh_last = &(bt->lru_queue)->tqh_first; } while (0); | |||
| 1108 | ||||
| 1109 | if (btree_read_header(bt) != 0) { | |||
| 1110 | if (errno(*__errno()) != ENOENT2) | |||
| 1111 | goto fail; | |||
| 1112 | DPRINTF("new database"); | |||
| 1113 | btree_write_header(bt, bt->fd); | |||
| 1114 | } | |||
| 1115 | ||||
| 1116 | if (btree_read_meta(bt, NULL((void*)0)) != 0) | |||
| 1117 | goto fail; | |||
| 1118 | ||||
| 1119 | DPRINTF("opened database version %u, pagesize %u", | |||
| 1120 | bt->head.version, bt->head.psize); | |||
| 1121 | DPRINTF("timestamp: %s", ctime(&bt->meta.created_at)); | |||
| 1122 | DPRINTF("depth: %u", bt->meta.depth); | |||
| 1123 | DPRINTF("entries: %llu", bt->meta.entries); | |||
| 1124 | DPRINTF("revisions: %u", bt->meta.revisions); | |||
| 1125 | DPRINTF("branch pages: %u", bt->meta.branch_pages); | |||
| 1126 | DPRINTF("leaf pages: %u", bt->meta.leaf_pages); | |||
| 1127 | DPRINTF("overflow pages: %u", bt->meta.overflow_pages); | |||
| 1128 | DPRINTF("root: %u", bt->meta.root); | |||
| 1129 | DPRINTF("previous meta page: %u", bt->meta.prev_meta); | |||
| 1130 | ||||
| 1131 | return bt; | |||
| 1132 | ||||
| 1133 | fail: | |||
| 1134 | free(bt->lru_queue); | |||
| 1135 | free(bt->page_cache); | |||
| 1136 | free(bt); | |||
| 1137 | return NULL((void*)0); | |||
| 1138 | } | |||
| 1139 | ||||
| 1140 | struct btree * | |||
| 1141 | btree_open(const char *path, unsigned int flags, mode_t mode) | |||
| 1142 | { | |||
| 1143 | int fd, oflags; | |||
| 1144 | struct btree *bt; | |||
| 1145 | ||||
| 1146 | if (F_ISSET(flags, BT_RDONLY)(((flags) & (0x04)) == (0x04))) | |||
| ||||
| 1147 | oflags = O_RDONLY0x0000; | |||
| 1148 | else | |||
| 1149 | oflags = O_RDWR0x0002 | O_CREAT0x0200 | O_APPEND0x0008; | |||
| 1150 | ||||
| 1151 | if ((fd = open(path, oflags, mode)) == -1) | |||
| 1152 | return NULL((void*)0); | |||
| 1153 | ||||
| 1154 | if ((bt = btree_open_fd(fd, flags)) == NULL((void*)0)) | |||
| 1155 | close(fd); | |||
| 1156 | else { | |||
| 1157 | bt->path = strdup(path); | |||
| 1158 | DPRINTF("opened btree %p", bt); | |||
| 1159 | } | |||
| 1160 | ||||
| 1161 | return bt; | |||
| 1162 | } | |||
| 1163 | ||||
| 1164 | static void | |||
| 1165 | btree_ref(struct btree *bt) | |||
| 1166 | { | |||
| 1167 | bt->ref++; | |||
| 1168 | DPRINTF("ref is now %d on btree %p", bt->ref, bt); | |||
| 1169 | } | |||
| 1170 | ||||
| 1171 | void | |||
| 1172 | btree_close(struct btree *bt) | |||
| 1173 | { | |||
| 1174 | if (bt == NULL((void*)0)) | |||
| 1175 | return; | |||
| 1176 | ||||
| 1177 | if (--bt->ref == 0) { | |||
| 1178 | DPRINTF("ref is zero, closing btree %p", bt); | |||
| 1179 | close(bt->fd); | |||
| 1180 | mpage_flush(bt); | |||
| 1181 | free(bt->lru_queue); | |||
| 1182 | free(bt->path); | |||
| 1183 | free(bt->page_cache); | |||
| 1184 | free(bt); | |||
| 1185 | } else | |||
| 1186 | DPRINTF("ref is now %d on btree %p", bt->ref, bt); | |||
| 1187 | } | |||
| 1188 | ||||
| 1189 | /* Search for key within a leaf page, using binary search. | |||
| 1190 | * Returns the smallest entry larger or equal to the key. | |||
| 1191 | * If exactp is non-null, stores whether the found entry was an exact match | |||
| 1192 | * in *exactp (1 or 0). | |||
| 1193 | * If kip is non-null, stores the index of the found entry in *kip. | |||
| 1194 | * If no entry larger of equal to the key is found, returns NULL. | |||
| 1195 | */ | |||
| 1196 | static struct node * | |||
| 1197 | btree_search_node(struct btree *bt, struct mpage *mp, struct btval *key, | |||
| 1198 | int *exactp, unsigned int *kip) | |||
| 1199 | { | |||
| 1200 | unsigned int i = 0; | |||
| 1201 | int low, high; | |||
| 1202 | int rc = 0; | |||
| 1203 | struct node *node; | |||
| 1204 | struct btval nodekey; | |||
| 1205 | ||||
| 1206 | DPRINTF("searching %lu keys in %s page %u with prefix [%.*s]", | |||
| 1207 | NUMKEYS(mp), | |||
| 1208 | IS_LEAF(mp) ? "leaf" : "branch", | |||
| 1209 | mp->pgno, (int)mp->prefix.len, (char *)mp->prefix.str); | |||
| 1210 | ||||
| 1211 | assert(NUMKEYS(mp) > 0)(((((mp)->page->b.fb.fb_lower - __builtin_offsetof(struct page, ptrs)) >> 1) > 0) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 1211, __func__, "NUMKEYS(mp) > 0")); | |||
| 1212 | ||||
| 1213 | memset(&nodekey, 0, sizeof(nodekey)); | |||
| 1214 | ||||
| 1215 | low = IS_LEAF(mp)((((mp)->page->flags) & (0x02)) == (0x02)) ? 0 : 1; | |||
| 1216 | high = NUMKEYS(mp)(((mp)->page->b.fb.fb_lower - __builtin_offsetof(struct page, ptrs)) >> 1) - 1; | |||
| 1217 | while (low <= high) { | |||
| 1218 | i = (low + high) >> 1; | |||
| 1219 | node = NODEPTR(mp, i)((struct node *)((char *)((mp)->page) + ((mp)->page)-> ptrs[i])); | |||
| 1220 | ||||
| 1221 | nodekey.size = node->ksize; | |||
| 1222 | nodekey.data = NODEKEY(node)(void *)((node)->data); | |||
| 1223 | ||||
| 1224 | if (bt->cmp) | |||
| 1225 | rc = bt->cmp(key, &nodekey); | |||
| 1226 | else | |||
| 1227 | rc = bt_cmp(bt, key, &nodekey, &mp->prefix); | |||
| 1228 | ||||
| 1229 | if (IS_LEAF(mp)((((mp)->page->flags) & (0x02)) == (0x02))) | |||
| 1230 | DPRINTF("found leaf index %u [%.*s], rc = %d", | |||
| 1231 | i, (int)nodekey.size, (char *)nodekey.data, rc); | |||
| 1232 | else | |||
| 1233 | DPRINTF("found branch index %u [%.*s -> %u], rc = %d", | |||
| 1234 | i, (int)node->ksize, (char *)NODEKEY(node), | |||
| 1235 | node->n_pgno, rc); | |||
| 1236 | ||||
| 1237 | if (rc == 0) | |||
| 1238 | break; | |||
| 1239 | if (rc > 0) | |||
| 1240 | low = i + 1; | |||
| 1241 | else | |||
| 1242 | high = i - 1; | |||
| 1243 | } | |||
| 1244 | ||||
| 1245 | if (rc > 0) { /* Found entry is less than the key. */ | |||
| 1246 | i++; /* Skip to get the smallest entry larger than key. */ | |||
| 1247 | if (i >= NUMKEYS(mp)(((mp)->page->b.fb.fb_lower - __builtin_offsetof(struct page, ptrs)) >> 1)) | |||
| 1248 | /* There is no entry larger or equal to the key. */ | |||
| 1249 | return NULL((void*)0); | |||
| 1250 | } | |||
| 1251 | if (exactp) | |||
| 1252 | *exactp = (rc == 0); | |||
| 1253 | if (kip) /* Store the key index if requested. */ | |||
| 1254 | *kip = i; | |||
| 1255 | ||||
| 1256 | return NODEPTR(mp, i)((struct node *)((char *)((mp)->page) + ((mp)->page)-> ptrs[i])); | |||
| 1257 | } | |||
| 1258 | ||||
| 1259 | static void | |||
| 1260 | cursor_pop_page(struct cursor *cursor) | |||
| 1261 | { | |||
| 1262 | struct ppage *top; | |||
| 1263 | ||||
| 1264 | top = CURSOR_TOP(cursor)((&(cursor)->stack)->slh_first); | |||
| 1265 | CURSOR_POP(cursor)do { (&(cursor)->stack)->slh_first = (&(cursor) ->stack)->slh_first->entry.sle_next; } while (0); | |||
| 1266 | top->mpage->ref--; | |||
| 1267 | ||||
| 1268 | DPRINTF("popped page %u off cursor %p", top->mpage->pgno, cursor); | |||
| 1269 | ||||
| 1270 | free(top); | |||
| 1271 | } | |||
| 1272 | ||||
| 1273 | static struct ppage * | |||
| 1274 | cursor_push_page(struct cursor *cursor, struct mpage *mp) | |||
| 1275 | { | |||
| 1276 | struct ppage *ppage; | |||
| 1277 | ||||
| 1278 | DPRINTF("pushing page %u on cursor %p", mp->pgno, cursor); | |||
| 1279 | ||||
| 1280 | if ((ppage = calloc(1, sizeof(*ppage))) == NULL((void*)0)) | |||
| 1281 | return NULL((void*)0); | |||
| 1282 | ppage->mpage = mp; | |||
| 1283 | mp->ref++; | |||
| 1284 | CURSOR_PUSH(cursor, ppage)do { (ppage)->entry.sle_next = (&(cursor)->stack)-> slh_first; (&(cursor)->stack)->slh_first = (ppage); } while (0); | |||
| 1285 | return ppage; | |||
| 1286 | } | |||
| 1287 | ||||
| 1288 | static struct mpage * | |||
| 1289 | btree_get_mpage(struct btree *bt, pgno_t pgno) | |||
| 1290 | { | |||
| 1291 | struct mpage *mp; | |||
| 1292 | ||||
| 1293 | mp = mpage_lookup(bt, pgno); | |||
| 1294 | if (mp == NULL((void*)0)) { | |||
| 1295 | if ((mp = calloc(1, sizeof(*mp))) == NULL((void*)0)) | |||
| 1296 | return NULL((void*)0); | |||
| 1297 | if ((mp->page = malloc(bt->head.psize)) == NULL((void*)0)) { | |||
| 1298 | free(mp); | |||
| 1299 | return NULL((void*)0); | |||
| 1300 | } | |||
| 1301 | if (btree_read_page(bt, pgno, mp->page) != BT_SUCCESS0) { | |||
| 1302 | mpage_free(mp); | |||
| 1303 | return NULL((void*)0); | |||
| 1304 | } | |||
| 1305 | mp->pgno = pgno; | |||
| 1306 | mpage_add(bt, mp); | |||
| 1307 | } else | |||
| 1308 | DPRINTF("returning page %u from cache", pgno); | |||
| 1309 | ||||
| 1310 | return mp; | |||
| 1311 | } | |||
| 1312 | ||||
| 1313 | static void | |||
| 1314 | concat_prefix(struct btree *bt, char *s1, size_t n1, char *s2, size_t n2, | |||
| 1315 | char *cs, size_t *cn) | |||
| 1316 | { | |||
| 1317 | assert(*cn >= n1 + n2)((*cn >= n1 + n2) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 1317, __func__, "*cn >= n1 + n2")); | |||
| 1318 | if (F_ISSET(bt->flags, BT_REVERSEKEY)(((bt->flags) & (0x08)) == (0x08))) { | |||
| 1319 | bcopy(s2, cs, n2); | |||
| 1320 | bcopy(s1, cs + n2, n1); | |||
| 1321 | } else { | |||
| 1322 | bcopy(s1, cs, n1); | |||
| 1323 | bcopy(s2, cs + n1, n2); | |||
| 1324 | } | |||
| 1325 | *cn = n1 + n2; | |||
| 1326 | } | |||
| 1327 | ||||
| 1328 | static void | |||
| 1329 | find_common_prefix(struct btree *bt, struct mpage *mp) | |||
| 1330 | { | |||
| 1331 | indx_t lbound = 0, ubound = 0; | |||
| 1332 | struct mpage *lp, *up; | |||
| 1333 | struct btkey lprefix, uprefix; | |||
| 1334 | ||||
| 1335 | mp->prefix.len = 0; | |||
| 1336 | if (bt->cmp != NULL((void*)0)) | |||
| 1337 | return; | |||
| 1338 | ||||
| 1339 | lp = mp; | |||
| 1340 | while (lp->parent != NULL((void*)0)) { | |||
| 1341 | if (lp->parent_index > 0) { | |||
| 1342 | lbound = lp->parent_index; | |||
| 1343 | break; | |||
| 1344 | } | |||
| 1345 | lp = lp->parent; | |||
| 1346 | } | |||
| 1347 | ||||
| 1348 | up = mp; | |||
| 1349 | while (up->parent != NULL((void*)0)) { | |||
| 1350 | if (up->parent_index + 1 < (indx_t)NUMKEYS(up->parent)(((up->parent)->page->b.fb.fb_lower - __builtin_offsetof (struct page, ptrs)) >> 1)) { | |||
| 1351 | ubound = up->parent_index + 1; | |||
| 1352 | break; | |||
| 1353 | } | |||
| 1354 | up = up->parent; | |||
| 1355 | } | |||
| 1356 | ||||
| 1357 | if (lp->parent != NULL((void*)0) && up->parent != NULL((void*)0)) { | |||
| 1358 | expand_prefix(bt, lp->parent, lbound, &lprefix); | |||
| 1359 | expand_prefix(bt, up->parent, ubound, &uprefix); | |||
| 1360 | common_prefix(bt, &lprefix, &uprefix, &mp->prefix); | |||
| 1361 | } | |||
| 1362 | else if (mp->parent) | |||
| 1363 | bcopy(&mp->parent->prefix, &mp->prefix, sizeof(mp->prefix)); | |||
| 1364 | ||||
| 1365 | DPRINTF("found common prefix [%.*s] (len %zu) for page %u", | |||
| 1366 | (int)mp->prefix.len, mp->prefix.str, mp->prefix.len, mp->pgno); | |||
| 1367 | } | |||
| 1368 | ||||
| 1369 | static int | |||
| 1370 | btree_search_page_root(struct btree *bt, struct mpage *root, struct btval *key, | |||
| 1371 | struct cursor *cursor, int modify, struct mpage **mpp) | |||
| 1372 | { | |||
| 1373 | struct mpage *mp, *parent; | |||
| 1374 | ||||
| 1375 | if (cursor && cursor_push_page(cursor, root) == NULL((void*)0)) | |||
| 1376 | return BT_FAIL-1; | |||
| 1377 | ||||
| 1378 | mp = root; | |||
| 1379 | while (IS_BRANCH(mp)((((mp)->page->flags) & (0x01)) == (0x01))) { | |||
| 1380 | unsigned int i = 0; | |||
| 1381 | struct node *node; | |||
| 1382 | ||||
| 1383 | DPRINTF("branch page %u has %lu keys", mp->pgno, NUMKEYS(mp)); | |||
| 1384 | assert(NUMKEYS(mp) > 1)(((((mp)->page->b.fb.fb_lower - __builtin_offsetof(struct page, ptrs)) >> 1) > 1) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 1384, __func__, "NUMKEYS(mp) > 1")); | |||
| 1385 | DPRINTF("found index 0 to page %u", NODEPGNO(NODEPTR(mp, 0))); | |||
| 1386 | ||||
| 1387 | if (key == NULL((void*)0)) /* Initialize cursor to first page. */ | |||
| 1388 | i = 0; | |||
| 1389 | else { | |||
| 1390 | int exact; | |||
| 1391 | node = btree_search_node(bt, mp, key, &exact, &i); | |||
| 1392 | if (node == NULL((void*)0)) | |||
| 1393 | i = NUMKEYS(mp)(((mp)->page->b.fb.fb_lower - __builtin_offsetof(struct page, ptrs)) >> 1) - 1; | |||
| 1394 | else if (!exact) { | |||
| 1395 | assert(i > 0)((i > 0) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 1395, __func__, "i > 0")); | |||
| 1396 | i--; | |||
| 1397 | } | |||
| 1398 | } | |||
| 1399 | ||||
| 1400 | if (key) | |||
| 1401 | DPRINTF("following index %u for key %.*s", | |||
| 1402 | i, (int)key->size, (char *)key->data); | |||
| 1403 | assert(i >= 0 && i < NUMKEYS(mp))((i >= 0 && i < (((mp)->page->b.fb.fb_lower - __builtin_offsetof(struct page, ptrs)) >> 1)) ? (void )0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c", 1403, __func__ , "i >= 0 && i < NUMKEYS(mp)")); | |||
| 1404 | node = NODEPTR(mp, i)((struct node *)((char *)((mp)->page) + ((mp)->page)-> ptrs[i])); | |||
| 1405 | ||||
| 1406 | if (cursor) | |||
| 1407 | CURSOR_TOP(cursor)((&(cursor)->stack)->slh_first)->ki = i; | |||
| 1408 | ||||
| 1409 | parent = mp; | |||
| 1410 | if ((mp = btree_get_mpage(bt, NODEPGNO(node)((node)->p.np_pgno))) == NULL((void*)0)) | |||
| 1411 | return BT_FAIL-1; | |||
| 1412 | mp->parent = parent; | |||
| 1413 | mp->parent_index = i; | |||
| 1414 | find_common_prefix(bt, mp); | |||
| 1415 | ||||
| 1416 | if (cursor && cursor_push_page(cursor, mp) == NULL((void*)0)) | |||
| 1417 | return BT_FAIL-1; | |||
| 1418 | ||||
| 1419 | if (modify && (mp = mpage_touch(bt, mp)) == NULL((void*)0)) | |||
| 1420 | return BT_FAIL-1; | |||
| 1421 | } | |||
| 1422 | ||||
| 1423 | if (!IS_LEAF(mp)((((mp)->page->flags) & (0x02)) == (0x02))) { | |||
| 1424 | DPRINTF("internal error, index points to a %02X page!?", | |||
| 1425 | mp->page->flags); | |||
| 1426 | return BT_FAIL-1; | |||
| 1427 | } | |||
| 1428 | ||||
| 1429 | DPRINTF("found leaf page %u for key %.*s", mp->pgno, | |||
| 1430 | key ? (int)key->size : 0, key ? (char *)key->data : NULL); | |||
| 1431 | ||||
| 1432 | *mpp = mp; | |||
| 1433 | return BT_SUCCESS0; | |||
| 1434 | } | |||
| 1435 | ||||
| 1436 | /* Search for the page a given key should be in. | |||
| 1437 | * Stores a pointer to the found page in *mpp. | |||
| 1438 | * If key is NULL, search for the lowest page (used by btree_cursor_first). | |||
| 1439 | * If cursor is non-null, pushes parent pages on the cursor stack. | |||
| 1440 | * If modify is true, visited pages are updated with new page numbers. | |||
| 1441 | */ | |||
| 1442 | static int | |||
| 1443 | btree_search_page(struct btree *bt, struct btree_txn *txn, struct btval *key, | |||
| 1444 | struct cursor *cursor, int modify, struct mpage **mpp) | |||
| 1445 | { | |||
| 1446 | int rc; | |||
| 1447 | pgno_t root; | |||
| 1448 | struct mpage *mp; | |||
| 1449 | ||||
| 1450 | /* Can't modify pages outside a transaction. */ | |||
| 1451 | if (txn == NULL((void*)0) && modify) { | |||
| 1452 | errno(*__errno()) = EINVAL22; | |||
| 1453 | return BT_FAIL-1; | |||
| 1454 | } | |||
| 1455 | ||||
| 1456 | /* Choose which root page to start with. If a transaction is given | |||
| 1457 | * use the root page from the transaction, otherwise read the last | |||
| 1458 | * committed root page. | |||
| 1459 | */ | |||
| 1460 | if (txn == NULL((void*)0)) { | |||
| 1461 | if ((rc = btree_read_meta(bt, NULL((void*)0))) != BT_SUCCESS0) | |||
| 1462 | return rc; | |||
| 1463 | root = bt->meta.root; | |||
| 1464 | } else if (F_ISSET(txn->flags, BT_TXN_ERROR)(((txn->flags) & (0x02)) == (0x02))) { | |||
| 1465 | DPRINTF("transaction has failed, must abort"); | |||
| 1466 | errno(*__errno()) = EINVAL22; | |||
| 1467 | return BT_FAIL-1; | |||
| 1468 | } else | |||
| 1469 | root = txn->root; | |||
| 1470 | ||||
| 1471 | if (root == P_INVALID0xFFFFFFFF) { /* Tree is empty. */ | |||
| 1472 | DPRINTF("tree is empty"); | |||
| 1473 | errno(*__errno()) = ENOENT2; | |||
| 1474 | return BT_FAIL-1; | |||
| 1475 | } | |||
| 1476 | ||||
| 1477 | if ((mp = btree_get_mpage(bt, root)) == NULL((void*)0)) | |||
| 1478 | return BT_FAIL-1; | |||
| 1479 | ||||
| 1480 | DPRINTF("root page has flags 0x%X", mp->page->flags); | |||
| 1481 | ||||
| 1482 | assert(mp->parent == NULL)((mp->parent == ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 1482, __func__, "mp->parent == NULL")); | |||
| 1483 | assert(mp->prefix.len == 0)((mp->prefix.len == 0) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 1483, __func__, "mp->prefix.len == 0")); | |||
| 1484 | ||||
| 1485 | if (modify && !mp->dirty) { | |||
| 1486 | if ((mp = mpage_touch(bt, mp)) == NULL((void*)0)) | |||
| 1487 | return BT_FAIL-1; | |||
| 1488 | txn->root = mp->pgno; | |||
| 1489 | } | |||
| 1490 | ||||
| 1491 | return btree_search_page_root(bt, mp, key, cursor, modify, mpp); | |||
| 1492 | } | |||
| 1493 | ||||
| 1494 | static int | |||
| 1495 | btree_read_data(struct btree *bt, struct mpage *mp, struct node *leaf, | |||
| 1496 | struct btval *data) | |||
| 1497 | { | |||
| 1498 | struct mpage *omp; /* overflow mpage */ | |||
| 1499 | size_t psz; | |||
| 1500 | size_t max; | |||
| 1501 | size_t sz = 0; | |||
| 1502 | pgno_t pgno; | |||
| 1503 | ||||
| 1504 | memset(data, 0, sizeof(*data)); | |||
| 1505 | max = bt->head.psize - PAGEHDRSZ__builtin_offsetof(struct page, ptrs); | |||
| 1506 | ||||
| 1507 | if (!F_ISSET(leaf->flags, F_BIGDATA)(((leaf->flags) & (0x01)) == (0x01))) { | |||
| 1508 | data->size = leaf->n_dsizep.np_dsize; | |||
| 1509 | if (data->size > 0) { | |||
| 1510 | if (mp == NULL((void*)0)) { | |||
| 1511 | if ((data->data = malloc(data->size)) == NULL((void*)0)) | |||
| 1512 | return BT_FAIL-1; | |||
| 1513 | bcopy(NODEDATA(leaf)(void *)((char *)(leaf)->data + (leaf)->ksize), data->data, data->size); | |||
| 1514 | data->free_data = 1; | |||
| 1515 | data->mp = NULL((void*)0); | |||
| 1516 | } else { | |||
| 1517 | data->data = NODEDATA(leaf)(void *)((char *)(leaf)->data + (leaf)->ksize); | |||
| 1518 | data->free_data = 0; | |||
| 1519 | data->mp = mp; | |||
| 1520 | mp->ref++; | |||
| 1521 | } | |||
| 1522 | } | |||
| 1523 | return BT_SUCCESS0; | |||
| 1524 | } | |||
| 1525 | ||||
| 1526 | /* Read overflow data. | |||
| 1527 | */ | |||
| 1528 | DPRINTF("allocating %u byte for overflow data", leaf->n_dsize); | |||
| 1529 | if ((data->data = malloc(leaf->n_dsizep.np_dsize)) == NULL((void*)0)) | |||
| 1530 | return BT_FAIL-1; | |||
| 1531 | data->size = leaf->n_dsizep.np_dsize; | |||
| 1532 | data->free_data = 1; | |||
| 1533 | data->mp = NULL((void*)0); | |||
| 1534 | bcopy(NODEDATA(leaf)(void *)((char *)(leaf)->data + (leaf)->ksize), &pgno, sizeof(pgno)); | |||
| 1535 | for (sz = 0; sz < data->size; ) { | |||
| 1536 | if ((omp = btree_get_mpage(bt, pgno)) == NULL((void*)0) || | |||
| 1537 | !F_ISSET(omp->page->flags, P_OVERFLOW)(((omp->page->flags) & (0x04)) == (0x04))) { | |||
| 1538 | DPRINTF("read overflow page %u failed", pgno); | |||
| 1539 | free(data->data); | |||
| 1540 | mpage_free(omp); | |||
| 1541 | return BT_FAIL-1; | |||
| 1542 | } | |||
| 1543 | psz = data->size - sz; | |||
| 1544 | if (psz > max) | |||
| 1545 | psz = max; | |||
| 1546 | bcopy(omp->page->ptrs, (char *)data->data + sz, psz); | |||
| 1547 | sz += psz; | |||
| 1548 | pgno = omp->page->p_next_pgnob.pb_next_pgno; | |||
| 1549 | } | |||
| 1550 | ||||
| 1551 | return BT_SUCCESS0; | |||
| 1552 | } | |||
| 1553 | ||||
| 1554 | int | |||
| 1555 | btree_txn_get(struct btree *bt, struct btree_txn *txn, | |||
| 1556 | struct btval *key, struct btval *data) | |||
| 1557 | { | |||
| 1558 | int rc, exact; | |||
| 1559 | struct node *leaf; | |||
| 1560 | struct mpage *mp; | |||
| 1561 | ||||
| 1562 | assert(key)((key) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 1562, __func__, "key")); | |||
| 1563 | assert(data)((data) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 1563, __func__, "data")); | |||
| 1564 | DPRINTF("===> get key [%.*s]", (int)key->size, (char *)key->data); | |||
| 1565 | ||||
| 1566 | if (bt != NULL((void*)0) && txn != NULL((void*)0) && bt != txn->bt) { | |||
| 1567 | errno(*__errno()) = EINVAL22; | |||
| 1568 | return BT_FAIL-1; | |||
| 1569 | } | |||
| 1570 | ||||
| 1571 | if (bt == NULL((void*)0)) { | |||
| 1572 | if (txn == NULL((void*)0)) { | |||
| 1573 | errno(*__errno()) = EINVAL22; | |||
| 1574 | return BT_FAIL-1; | |||
| 1575 | } | |||
| 1576 | bt = txn->bt; | |||
| 1577 | } | |||
| 1578 | ||||
| 1579 | if (key->size == 0 || key->size > MAXKEYSIZE255) { | |||
| 1580 | errno(*__errno()) = EINVAL22; | |||
| 1581 | return BT_FAIL-1; | |||
| 1582 | } | |||
| 1583 | ||||
| 1584 | if ((rc = btree_search_page(bt, txn, key, NULL((void*)0), 0, &mp)) != BT_SUCCESS0) | |||
| 1585 | return rc; | |||
| 1586 | ||||
| 1587 | leaf = btree_search_node(bt, mp, key, &exact, NULL((void*)0)); | |||
| 1588 | if (leaf && exact) | |||
| 1589 | rc = btree_read_data(bt, mp, leaf, data); | |||
| 1590 | else { | |||
| 1591 | errno(*__errno()) = ENOENT2; | |||
| 1592 | rc = BT_FAIL-1; | |||
| 1593 | } | |||
| 1594 | ||||
| 1595 | mpage_prune(bt); | |||
| 1596 | return rc; | |||
| 1597 | } | |||
| 1598 | ||||
| 1599 | static int | |||
| 1600 | btree_sibling(struct cursor *cursor, int move_right) | |||
| 1601 | { | |||
| 1602 | int rc; | |||
| 1603 | struct node *indx; | |||
| 1604 | struct ppage *parent, *top; | |||
| 1605 | struct mpage *mp; | |||
| 1606 | ||||
| 1607 | top = CURSOR_TOP(cursor)((&(cursor)->stack)->slh_first); | |||
| 1608 | if ((parent = SLIST_NEXT(top, entry)((top)->entry.sle_next)) == NULL((void*)0)) { | |||
| 1609 | errno(*__errno()) = ENOENT2; | |||
| 1610 | return BT_FAIL-1; /* root has no siblings */ | |||
| 1611 | } | |||
| 1612 | ||||
| 1613 | DPRINTF("parent page is page %u, index %u", | |||
| 1614 | parent->mpage->pgno, parent->ki); | |||
| 1615 | ||||
| 1616 | cursor_pop_page(cursor); | |||
| 1617 | if (move_right ? (parent->ki + 1 >= NUMKEYS(parent->mpage)(((parent->mpage)->page->b.fb.fb_lower - __builtin_offsetof (struct page, ptrs)) >> 1)) | |||
| 1618 | : (parent->ki == 0)) { | |||
| 1619 | DPRINTF("no more keys left, moving to %s sibling", | |||
| 1620 | move_right ? "right" : "left"); | |||
| 1621 | if ((rc = btree_sibling(cursor, move_right)) != BT_SUCCESS0) | |||
| 1622 | return rc; | |||
| 1623 | parent = CURSOR_TOP(cursor)((&(cursor)->stack)->slh_first); | |||
| 1624 | } else { | |||
| 1625 | if (move_right) | |||
| 1626 | parent->ki++; | |||
| 1627 | else | |||
| 1628 | parent->ki--; | |||
| 1629 | DPRINTF("just moving to %s index key %u", | |||
| 1630 | move_right ? "right" : "left", parent->ki); | |||
| 1631 | } | |||
| 1632 | assert(IS_BRANCH(parent->mpage))((((((parent->mpage)->page->flags) & (0x01)) == ( 0x01))) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 1632, __func__, "IS_BRANCH(parent->mpage)")); | |||
| 1633 | ||||
| 1634 | indx = NODEPTR(parent->mpage, parent->ki)((struct node *)((char *)((parent->mpage)->page) + ((parent ->mpage)->page)->ptrs[parent->ki])); | |||
| 1635 | if ((mp = btree_get_mpage(cursor->bt, indx->n_pgnop.np_pgno)) == NULL((void*)0)) | |||
| 1636 | return BT_FAIL-1; | |||
| 1637 | mp->parent = parent->mpage; | |||
| 1638 | mp->parent_index = parent->ki; | |||
| 1639 | ||||
| 1640 | cursor_push_page(cursor, mp); | |||
| 1641 | find_common_prefix(cursor->bt, mp); | |||
| 1642 | ||||
| 1643 | return BT_SUCCESS0; | |||
| 1644 | } | |||
| 1645 | ||||
| 1646 | static int | |||
| 1647 | bt_set_key(struct btree *bt, struct mpage *mp, struct node *node, | |||
| 1648 | struct btval *key) | |||
| 1649 | { | |||
| 1650 | if (key == NULL((void*)0)) | |||
| 1651 | return 0; | |||
| 1652 | ||||
| 1653 | if (mp->prefix.len > 0) { | |||
| 1654 | key->size = node->ksize + mp->prefix.len; | |||
| 1655 | key->data = malloc(key->size); | |||
| 1656 | if (key->data == NULL((void*)0)) | |||
| 1657 | return -1; | |||
| 1658 | concat_prefix(bt, | |||
| 1659 | mp->prefix.str, mp->prefix.len, | |||
| 1660 | NODEKEY(node)(void *)((node)->data), node->ksize, | |||
| 1661 | key->data, &key->size); | |||
| 1662 | key->free_data = 1; | |||
| 1663 | } else { | |||
| 1664 | key->size = node->ksize; | |||
| 1665 | key->data = NODEKEY(node)(void *)((node)->data); | |||
| 1666 | key->free_data = 0; | |||
| 1667 | key->mp = mp; | |||
| 1668 | mp->ref++; | |||
| 1669 | } | |||
| 1670 | ||||
| 1671 | return 0; | |||
| 1672 | } | |||
| 1673 | ||||
| 1674 | static int | |||
| 1675 | btree_cursor_next(struct cursor *cursor, struct btval *key, struct btval *data) | |||
| 1676 | { | |||
| 1677 | struct ppage *top; | |||
| 1678 | struct mpage *mp; | |||
| 1679 | struct node *leaf; | |||
| 1680 | ||||
| 1681 | if (cursor->eof) { | |||
| 1682 | errno(*__errno()) = ENOENT2; | |||
| 1683 | return BT_FAIL-1; | |||
| 1684 | } | |||
| 1685 | ||||
| 1686 | assert(cursor->initialized)((cursor->initialized) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 1686, __func__, "cursor->initialized")); | |||
| 1687 | ||||
| 1688 | top = CURSOR_TOP(cursor)((&(cursor)->stack)->slh_first); | |||
| 1689 | mp = top->mpage; | |||
| 1690 | ||||
| 1691 | DPRINTF("cursor_next: top page is %u in cursor %p", mp->pgno, cursor); | |||
| 1692 | ||||
| 1693 | if (top->ki + 1 >= NUMKEYS(mp)(((mp)->page->b.fb.fb_lower - __builtin_offsetof(struct page, ptrs)) >> 1)) { | |||
| 1694 | DPRINTF("=====> move to next sibling page"); | |||
| 1695 | if (btree_sibling(cursor, 1) != BT_SUCCESS0) { | |||
| 1696 | cursor->eof = 1; | |||
| 1697 | return BT_FAIL-1; | |||
| 1698 | } | |||
| 1699 | top = CURSOR_TOP(cursor)((&(cursor)->stack)->slh_first); | |||
| 1700 | mp = top->mpage; | |||
| 1701 | DPRINTF("next page is %u, key index %u", mp->pgno, top->ki); | |||
| 1702 | } else | |||
| 1703 | top->ki++; | |||
| 1704 | ||||
| 1705 | DPRINTF("==> cursor points to page %u with %lu keys, key index %u", | |||
| 1706 | mp->pgno, NUMKEYS(mp), top->ki); | |||
| 1707 | ||||
| 1708 | assert(IS_LEAF(mp))((((((mp)->page->flags) & (0x02)) == (0x02))) ? (void )0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c", 1708, __func__ , "IS_LEAF(mp)")); | |||
| 1709 | leaf = NODEPTR(mp, top->ki)((struct node *)((char *)((mp)->page) + ((mp)->page)-> ptrs[top->ki])); | |||
| 1710 | ||||
| 1711 | if (data && btree_read_data(cursor->bt, mp, leaf, data) != BT_SUCCESS0) | |||
| 1712 | return BT_FAIL-1; | |||
| 1713 | ||||
| 1714 | if (bt_set_key(cursor->bt, mp, leaf, key) != 0) | |||
| 1715 | return BT_FAIL-1; | |||
| 1716 | ||||
| 1717 | return BT_SUCCESS0; | |||
| 1718 | } | |||
| 1719 | ||||
| 1720 | static int | |||
| 1721 | btree_cursor_set(struct cursor *cursor, struct btval *key, struct btval *data, | |||
| 1722 | int *exactp) | |||
| 1723 | { | |||
| 1724 | int rc; | |||
| 1725 | struct node *leaf; | |||
| 1726 | struct mpage *mp; | |||
| 1727 | struct ppage *top; | |||
| 1728 | ||||
| 1729 | assert(cursor)((cursor) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 1729, __func__, "cursor")); | |||
| 1730 | assert(key)((key) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 1730, __func__, "key")); | |||
| 1731 | assert(key->size > 0)((key->size > 0) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 1731, __func__, "key->size > 0")); | |||
| 1732 | ||||
| 1733 | rc = btree_search_page(cursor->bt, cursor->txn, key, cursor, 0, &mp); | |||
| 1734 | if (rc != BT_SUCCESS0) | |||
| 1735 | return rc; | |||
| 1736 | assert(IS_LEAF(mp))((((((mp)->page->flags) & (0x02)) == (0x02))) ? (void )0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c", 1736, __func__ , "IS_LEAF(mp)")); | |||
| 1737 | ||||
| 1738 | top = CURSOR_TOP(cursor)((&(cursor)->stack)->slh_first); | |||
| 1739 | leaf = btree_search_node(cursor->bt, mp, key, exactp, &top->ki); | |||
| 1740 | if (exactp != NULL((void*)0) && !*exactp) { | |||
| 1741 | /* BT_CURSOR_EXACT specified and not an exact match. */ | |||
| 1742 | errno(*__errno()) = ENOENT2; | |||
| 1743 | return BT_FAIL-1; | |||
| 1744 | } | |||
| 1745 | ||||
| 1746 | if (leaf == NULL((void*)0)) { | |||
| 1747 | DPRINTF("===> inexact leaf not found, goto sibling"); | |||
| 1748 | if (btree_sibling(cursor, 1) != BT_SUCCESS0) | |||
| 1749 | return BT_FAIL-1; /* no entries matched */ | |||
| 1750 | top = CURSOR_TOP(cursor)((&(cursor)->stack)->slh_first); | |||
| 1751 | top->ki = 0; | |||
| 1752 | mp = top->mpage; | |||
| 1753 | assert(IS_LEAF(mp))((((((mp)->page->flags) & (0x02)) == (0x02))) ? (void )0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c", 1753, __func__ , "IS_LEAF(mp)")); | |||
| 1754 | leaf = NODEPTR(mp, 0)((struct node *)((char *)((mp)->page) + ((mp)->page)-> ptrs[0])); | |||
| 1755 | } | |||
| 1756 | ||||
| 1757 | cursor->initialized = 1; | |||
| 1758 | cursor->eof = 0; | |||
| 1759 | ||||
| 1760 | if (data && btree_read_data(cursor->bt, mp, leaf, data) != BT_SUCCESS0) | |||
| 1761 | return BT_FAIL-1; | |||
| 1762 | ||||
| 1763 | if (bt_set_key(cursor->bt, mp, leaf, key) != 0) | |||
| 1764 | return BT_FAIL-1; | |||
| 1765 | DPRINTF("==> cursor placed on key %.*s", | |||
| 1766 | (int)key->size, (char *)key->data); | |||
| 1767 | ||||
| 1768 | return BT_SUCCESS0; | |||
| 1769 | } | |||
| 1770 | ||||
| 1771 | static int | |||
| 1772 | btree_cursor_first(struct cursor *cursor, struct btval *key, struct btval *data) | |||
| 1773 | { | |||
| 1774 | int rc; | |||
| 1775 | struct mpage *mp; | |||
| 1776 | struct node *leaf; | |||
| 1777 | ||||
| 1778 | rc = btree_search_page(cursor->bt, cursor->txn, NULL((void*)0), cursor, 0, &mp); | |||
| 1779 | if (rc != BT_SUCCESS0) | |||
| 1780 | return rc; | |||
| 1781 | assert(IS_LEAF(mp))((((((mp)->page->flags) & (0x02)) == (0x02))) ? (void )0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c", 1781, __func__ , "IS_LEAF(mp)")); | |||
| 1782 | ||||
| 1783 | leaf = NODEPTR(mp, 0)((struct node *)((char *)((mp)->page) + ((mp)->page)-> ptrs[0])); | |||
| 1784 | cursor->initialized = 1; | |||
| 1785 | cursor->eof = 0; | |||
| 1786 | ||||
| 1787 | if (data && btree_read_data(cursor->bt, mp, leaf, data) != BT_SUCCESS0) | |||
| 1788 | return BT_FAIL-1; | |||
| 1789 | ||||
| 1790 | if (bt_set_key(cursor->bt, mp, leaf, key) != 0) | |||
| 1791 | return BT_FAIL-1; | |||
| 1792 | ||||
| 1793 | return BT_SUCCESS0; | |||
| 1794 | } | |||
| 1795 | ||||
| 1796 | int | |||
| 1797 | btree_cursor_get(struct cursor *cursor, struct btval *key, struct btval *data, | |||
| 1798 | enum cursor_op op) | |||
| 1799 | { | |||
| 1800 | int rc; | |||
| 1801 | int exact = 0; | |||
| 1802 | ||||
| 1803 | assert(cursor)((cursor) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 1803, __func__, "cursor")); | |||
| 1804 | ||||
| 1805 | switch (op) { | |||
| 1806 | case BT_CURSOR: | |||
| 1807 | case BT_CURSOR_EXACT: | |||
| 1808 | while (CURSOR_TOP(cursor)((&(cursor)->stack)->slh_first) != NULL((void*)0)) | |||
| 1809 | cursor_pop_page(cursor); | |||
| 1810 | if (key == NULL((void*)0) || key->size == 0 || key->size > MAXKEYSIZE255) { | |||
| 1811 | errno(*__errno()) = EINVAL22; | |||
| 1812 | rc = BT_FAIL-1; | |||
| 1813 | } else if (op == BT_CURSOR_EXACT) | |||
| 1814 | rc = btree_cursor_set(cursor, key, data, &exact); | |||
| 1815 | else | |||
| 1816 | rc = btree_cursor_set(cursor, key, data, NULL((void*)0)); | |||
| 1817 | break; | |||
| 1818 | case BT_NEXT: | |||
| 1819 | if (!cursor->initialized) | |||
| 1820 | rc = btree_cursor_first(cursor, key, data); | |||
| 1821 | else | |||
| 1822 | rc = btree_cursor_next(cursor, key, data); | |||
| 1823 | break; | |||
| 1824 | case BT_FIRST: | |||
| 1825 | while (CURSOR_TOP(cursor)((&(cursor)->stack)->slh_first) != NULL((void*)0)) | |||
| 1826 | cursor_pop_page(cursor); | |||
| 1827 | rc = btree_cursor_first(cursor, key, data); | |||
| 1828 | break; | |||
| 1829 | default: | |||
| 1830 | DPRINTF("unhandled/unimplemented cursor operation %u", op); | |||
| 1831 | rc = BT_FAIL-1; | |||
| 1832 | break; | |||
| 1833 | } | |||
| 1834 | ||||
| 1835 | mpage_prune(cursor->bt); | |||
| 1836 | ||||
| 1837 | return rc; | |||
| 1838 | } | |||
| 1839 | ||||
| 1840 | static struct mpage * | |||
| 1841 | btree_new_page(struct btree *bt, uint32_t flags) | |||
| 1842 | { | |||
| 1843 | struct mpage *mp; | |||
| 1844 | ||||
| 1845 | assert(bt != NULL)((bt != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 1845, __func__, "bt != NULL")); | |||
| 1846 | assert(bt->txn != NULL)((bt->txn != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 1846, __func__, "bt->txn != NULL")); | |||
| 1847 | ||||
| 1848 | DPRINTF("allocating new mpage %u, page size %u", | |||
| 1849 | bt->txn->next_pgno, bt->head.psize); | |||
| 1850 | if ((mp = calloc(1, sizeof(*mp))) == NULL((void*)0)) | |||
| 1851 | return NULL((void*)0); | |||
| 1852 | if ((mp->page = malloc(bt->head.psize)) == NULL((void*)0)) { | |||
| 1853 | free(mp); | |||
| 1854 | return NULL((void*)0); | |||
| 1855 | } | |||
| 1856 | mp->pgno = mp->page->pgno = bt->txn->next_pgno++; | |||
| 1857 | mp->page->flags = flags; | |||
| 1858 | mp->page->lowerb.fb.fb_lower = PAGEHDRSZ__builtin_offsetof(struct page, ptrs); | |||
| 1859 | mp->page->upperb.fb.fb_upper = bt->head.psize; | |||
| 1860 | ||||
| 1861 | if (IS_BRANCH(mp)((((mp)->page->flags) & (0x01)) == (0x01))) | |||
| 1862 | bt->meta.branch_pages++; | |||
| 1863 | else if (IS_LEAF(mp)((((mp)->page->flags) & (0x02)) == (0x02))) | |||
| 1864 | bt->meta.leaf_pages++; | |||
| 1865 | else if (IS_OVERFLOW(mp)((((mp)->page->flags) & (0x04)) == (0x04))) | |||
| 1866 | bt->meta.overflow_pages++; | |||
| 1867 | ||||
| 1868 | mpage_add(bt, mp); | |||
| 1869 | mpage_dirty(bt, mp); | |||
| 1870 | ||||
| 1871 | return mp; | |||
| 1872 | } | |||
| 1873 | ||||
| 1874 | static size_t | |||
| 1875 | bt_leaf_size(struct btree *bt, struct btval *key, struct btval *data) | |||
| 1876 | { | |||
| 1877 | size_t sz; | |||
| 1878 | ||||
| 1879 | sz = LEAFSIZE(key, data)(__builtin_offsetof(struct node, data) + (key)->size + (data )->size); | |||
| 1880 | if (data->size >= bt->head.psize / BT_MINKEYS4) { | |||
| 1881 | /* put on overflow page */ | |||
| 1882 | sz -= data->size - sizeof(pgno_t); | |||
| 1883 | } | |||
| 1884 | ||||
| 1885 | return sz + sizeof(indx_t); | |||
| 1886 | } | |||
| 1887 | ||||
| 1888 | static size_t | |||
| 1889 | bt_branch_size(struct btree *bt, struct btval *key) | |||
| 1890 | { | |||
| 1891 | size_t sz; | |||
| 1892 | ||||
| 1893 | sz = INDXSIZE(key)(__builtin_offsetof(struct node, data) + ((key) == ((void*)0) ? 0 : (key)->size)); | |||
| 1894 | if (sz >= bt->head.psize / BT_MINKEYS4) { | |||
| 1895 | /* put on overflow page */ | |||
| 1896 | /* not implemented */ | |||
| 1897 | /* sz -= key->size - sizeof(pgno_t); */ | |||
| 1898 | } | |||
| 1899 | ||||
| 1900 | return sz + sizeof(indx_t); | |||
| 1901 | } | |||
| 1902 | ||||
| 1903 | static int | |||
| 1904 | btree_write_overflow_data(struct btree *bt, struct page *p, struct btval *data) | |||
| 1905 | { | |||
| 1906 | size_t done = 0; | |||
| 1907 | size_t sz; | |||
| 1908 | size_t max; | |||
| 1909 | pgno_t *linkp; /* linked page stored here */ | |||
| 1910 | struct mpage *next = NULL((void*)0); | |||
| 1911 | ||||
| 1912 | max = bt->head.psize - PAGEHDRSZ__builtin_offsetof(struct page, ptrs); | |||
| 1913 | ||||
| 1914 | while (done < data->size) { | |||
| 1915 | if (next != NULL((void*)0)) | |||
| 1916 | p = next->page; | |||
| 1917 | linkp = &p->p_next_pgnob.pb_next_pgno; | |||
| 1918 | if (data->size - done > max) { | |||
| 1919 | /* need another overflow page */ | |||
| 1920 | if ((next = btree_new_page(bt, P_OVERFLOW0x04)) == NULL((void*)0)) | |||
| 1921 | return BT_FAIL-1; | |||
| 1922 | *linkp = next->pgno; | |||
| 1923 | DPRINTF("linking overflow page %u", next->pgno); | |||
| 1924 | } else | |||
| 1925 | *linkp = 0; /* indicates end of list */ | |||
| 1926 | sz = data->size - done; | |||
| 1927 | if (sz > max) | |||
| 1928 | sz = max; | |||
| 1929 | DPRINTF("copying %zu bytes to overflow page %u", sz, p->pgno); | |||
| 1930 | bcopy((char *)data->data + done, p->ptrs, sz); | |||
| 1931 | done += sz; | |||
| 1932 | } | |||
| 1933 | ||||
| 1934 | return BT_SUCCESS0; | |||
| 1935 | } | |||
| 1936 | ||||
| 1937 | /* Key prefix should already be stripped. | |||
| 1938 | */ | |||
| 1939 | static int | |||
| 1940 | btree_add_node(struct btree *bt, struct mpage *mp, indx_t indx, | |||
| 1941 | struct btval *key, struct btval *data, pgno_t pgno, uint8_t flags) | |||
| 1942 | { | |||
| 1943 | unsigned int i; | |||
| 1944 | size_t node_size = NODESIZE__builtin_offsetof(struct node, data); | |||
| 1945 | indx_t ofs; | |||
| 1946 | struct node *node; | |||
| 1947 | struct page *p; | |||
| 1948 | struct mpage *ofp = NULL((void*)0); /* overflow page */ | |||
| 1949 | ||||
| 1950 | p = mp->page; | |||
| 1951 | assert(p->upper >= p->lower)((p->b.fb.fb_upper >= p->b.fb.fb_lower) ? (void)0 : __assert2 ("/usr/src/usr.sbin/ldapd/btree.c", 1951, __func__, "p->upper >= p->lower" )); | |||
| 1952 | ||||
| 1953 | DPRINTF("add node [%.*s] to %s page %u at index %d, key size %zu", | |||
| 1954 | key ? (int)key->size : 0, key ? (char *)key->data : NULL, | |||
| 1955 | IS_LEAF(mp) ? "leaf" : "branch", | |||
| 1956 | mp->pgno, indx, key ? key->size : 0); | |||
| 1957 | ||||
| 1958 | if (key != NULL((void*)0)) | |||
| 1959 | node_size += key->size; | |||
| 1960 | ||||
| 1961 | if (IS_LEAF(mp)((((mp)->page->flags) & (0x02)) == (0x02))) { | |||
| 1962 | assert(data)((data) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 1962, __func__, "data")); | |||
| 1963 | node_size += data->size; | |||
| 1964 | if (F_ISSET(flags, F_BIGDATA)(((flags) & (0x01)) == (0x01))) { | |||
| 1965 | /* Data already on overflow page. */ | |||
| 1966 | node_size -= data->size - sizeof(pgno_t); | |||
| 1967 | } else if (data->size >= bt->head.psize / BT_MINKEYS4) { | |||
| 1968 | /* Put data on overflow page. */ | |||
| 1969 | DPRINTF("data size is %zu, put on overflow page", | |||
| 1970 | data->size); | |||
| 1971 | node_size -= data->size - sizeof(pgno_t); | |||
| 1972 | if ((ofp = btree_new_page(bt, P_OVERFLOW0x04)) == NULL((void*)0)) | |||
| 1973 | return BT_FAIL-1; | |||
| 1974 | DPRINTF("allocated overflow page %u", ofp->pgno); | |||
| 1975 | flags |= F_BIGDATA0x01; | |||
| 1976 | } | |||
| 1977 | } | |||
| 1978 | ||||
| 1979 | if (node_size + sizeof(indx_t) > SIZELEFT(mp)(indx_t)((mp)->page->b.fb.fb_upper - (mp)->page-> b.fb.fb_lower)) { | |||
| 1980 | DPRINTF("not enough room in page %u, got %lu ptrs", | |||
| 1981 | mp->pgno, NUMKEYS(mp)); | |||
| 1982 | DPRINTF("upper - lower = %u - %u = %u", p->upper, p->lower, | |||
| 1983 | p->upper - p->lower); | |||
| 1984 | DPRINTF("node size = %zu", node_size); | |||
| 1985 | return BT_FAIL-1; | |||
| 1986 | } | |||
| 1987 | ||||
| 1988 | /* Move higher pointers up one slot. */ | |||
| 1989 | for (i = NUMKEYS(mp)(((mp)->page->b.fb.fb_lower - __builtin_offsetof(struct page, ptrs)) >> 1); i > indx; i--) | |||
| 1990 | p->ptrs[i] = p->ptrs[i - 1]; | |||
| 1991 | ||||
| 1992 | /* Adjust free space offsets. */ | |||
| 1993 | ofs = p->upperb.fb.fb_upper - node_size; | |||
| 1994 | assert(ofs >= p->lower + sizeof(indx_t))((ofs >= p->b.fb.fb_lower + sizeof(indx_t)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c", 1994, __func__, "ofs >= p->lower + sizeof(indx_t)")); | |||
| 1995 | p->ptrs[indx] = ofs; | |||
| 1996 | p->upperb.fb.fb_upper = ofs; | |||
| 1997 | p->lowerb.fb.fb_lower += sizeof(indx_t); | |||
| 1998 | ||||
| 1999 | /* Write the node data. */ | |||
| 2000 | node = NODEPTR(mp, indx)((struct node *)((char *)((mp)->page) + ((mp)->page)-> ptrs[indx])); | |||
| 2001 | node->ksize = (key == NULL((void*)0)) ? 0 : key->size; | |||
| 2002 | node->flags = flags; | |||
| 2003 | if (IS_LEAF(mp)((((mp)->page->flags) & (0x02)) == (0x02))) | |||
| 2004 | node->n_dsizep.np_dsize = data->size; | |||
| 2005 | else | |||
| 2006 | node->n_pgnop.np_pgno = pgno; | |||
| 2007 | ||||
| 2008 | if (key) | |||
| 2009 | bcopy(key->data, NODEKEY(node)(void *)((node)->data), key->size); | |||
| 2010 | ||||
| 2011 | if (IS_LEAF(mp)((((mp)->page->flags) & (0x02)) == (0x02))) { | |||
| 2012 | assert(key)((key) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 2012, __func__, "key")); | |||
| 2013 | if (ofp == NULL((void*)0)) { | |||
| 2014 | if (F_ISSET(flags, F_BIGDATA)(((flags) & (0x01)) == (0x01))) | |||
| 2015 | bcopy(data->data, node->data + key->size, | |||
| 2016 | sizeof(pgno_t)); | |||
| 2017 | else | |||
| 2018 | bcopy(data->data, node->data + key->size, | |||
| 2019 | data->size); | |||
| 2020 | } else { | |||
| 2021 | bcopy(&ofp->pgno, node->data + key->size, | |||
| 2022 | sizeof(pgno_t)); | |||
| 2023 | if (btree_write_overflow_data(bt, ofp->page, | |||
| 2024 | data) == BT_FAIL-1) | |||
| 2025 | return BT_FAIL-1; | |||
| 2026 | } | |||
| 2027 | } | |||
| 2028 | ||||
| 2029 | return BT_SUCCESS0; | |||
| 2030 | } | |||
| 2031 | ||||
| 2032 | static void | |||
| 2033 | btree_del_node(struct btree *bt, struct mpage *mp, indx_t indx) | |||
| 2034 | { | |||
| 2035 | unsigned int sz; | |||
| 2036 | indx_t i, j, numkeys, ptr; | |||
| 2037 | struct node *node; | |||
| 2038 | char *base; | |||
| 2039 | ||||
| 2040 | DPRINTF("delete node %u on %s page %u", indx, | |||
| 2041 | IS_LEAF(mp) ? "leaf" : "branch", mp->pgno); | |||
| 2042 | assert(indx < NUMKEYS(mp))((indx < (((mp)->page->b.fb.fb_lower - __builtin_offsetof (struct page, ptrs)) >> 1)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 2042, __func__, "indx < NUMKEYS(mp)")); | |||
| 2043 | ||||
| 2044 | node = NODEPTR(mp, indx)((struct node *)((char *)((mp)->page) + ((mp)->page)-> ptrs[indx])); | |||
| 2045 | sz = NODESIZE__builtin_offsetof(struct node, data) + node->ksize; | |||
| 2046 | if (IS_LEAF(mp)((((mp)->page->flags) & (0x02)) == (0x02))) { | |||
| 2047 | if (F_ISSET(node->flags, F_BIGDATA)(((node->flags) & (0x01)) == (0x01))) | |||
| 2048 | sz += sizeof(pgno_t); | |||
| 2049 | else | |||
| 2050 | sz += NODEDSZ(node)((node)->p.np_dsize); | |||
| 2051 | } | |||
| 2052 | ||||
| 2053 | ptr = mp->page->ptrs[indx]; | |||
| 2054 | numkeys = NUMKEYS(mp)(((mp)->page->b.fb.fb_lower - __builtin_offsetof(struct page, ptrs)) >> 1); | |||
| 2055 | for (i = j = 0; i < numkeys; i++) { | |||
| 2056 | if (i != indx) { | |||
| 2057 | mp->page->ptrs[j] = mp->page->ptrs[i]; | |||
| 2058 | if (mp->page->ptrs[i] < ptr) | |||
| 2059 | mp->page->ptrs[j] += sz; | |||
| 2060 | j++; | |||
| 2061 | } | |||
| 2062 | } | |||
| 2063 | ||||
| 2064 | base = (char *)mp->page + mp->page->upperb.fb.fb_upper; | |||
| 2065 | bcopy(base, base + sz, ptr - mp->page->upperb.fb.fb_upper); | |||
| 2066 | ||||
| 2067 | mp->page->lowerb.fb.fb_lower -= sizeof(indx_t); | |||
| 2068 | mp->page->upperb.fb.fb_upper += sz; | |||
| 2069 | } | |||
| 2070 | ||||
| 2071 | struct cursor * | |||
| 2072 | btree_txn_cursor_open(struct btree *bt, struct btree_txn *txn) | |||
| 2073 | { | |||
| 2074 | struct cursor *cursor; | |||
| 2075 | ||||
| 2076 | if (bt != NULL((void*)0) && txn != NULL((void*)0) && bt != txn->bt) { | |||
| 2077 | errno(*__errno()) = EINVAL22; | |||
| 2078 | return NULL((void*)0); | |||
| 2079 | } | |||
| 2080 | ||||
| 2081 | if (bt == NULL((void*)0)) { | |||
| 2082 | if (txn == NULL((void*)0)) { | |||
| 2083 | errno(*__errno()) = EINVAL22; | |||
| 2084 | return NULL((void*)0); | |||
| 2085 | } | |||
| 2086 | bt = txn->bt; | |||
| 2087 | } | |||
| 2088 | ||||
| 2089 | if ((cursor = calloc(1, sizeof(*cursor))) != NULL((void*)0)) { | |||
| 2090 | SLIST_INIT(&cursor->stack){ ((&cursor->stack)->slh_first) = ((void*)0); }; | |||
| 2091 | cursor->bt = bt; | |||
| 2092 | cursor->txn = txn; | |||
| 2093 | btree_ref(bt); | |||
| 2094 | } | |||
| 2095 | ||||
| 2096 | return cursor; | |||
| 2097 | } | |||
| 2098 | ||||
| 2099 | void | |||
| 2100 | btree_cursor_close(struct cursor *cursor) | |||
| 2101 | { | |||
| 2102 | if (cursor != NULL((void*)0)) { | |||
| 2103 | while (!CURSOR_EMPTY(cursor)(((&(cursor)->stack)->slh_first) == ((void*)0))) | |||
| 2104 | cursor_pop_page(cursor); | |||
| 2105 | ||||
| 2106 | btree_close(cursor->bt); | |||
| 2107 | free(cursor); | |||
| 2108 | } | |||
| 2109 | } | |||
| 2110 | ||||
| 2111 | static int | |||
| 2112 | btree_update_key(struct btree *bt, struct mpage *mp, indx_t indx, | |||
| 2113 | struct btval *key) | |||
| 2114 | { | |||
| 2115 | indx_t ptr, i, numkeys; | |||
| 2116 | int delta; | |||
| 2117 | size_t len; | |||
| 2118 | struct node *node; | |||
| 2119 | char *base; | |||
| 2120 | ||||
| 2121 | node = NODEPTR(mp, indx)((struct node *)((char *)((mp)->page) + ((mp)->page)-> ptrs[indx])); | |||
| 2122 | ptr = mp->page->ptrs[indx]; | |||
| 2123 | DPRINTF("update key %u (ofs %u) [%.*s] to [%.*s] on page %u", | |||
| 2124 | indx, ptr, | |||
| 2125 | (int)node->ksize, (char *)NODEKEY(node), | |||
| 2126 | (int)key->size, (char *)key->data, | |||
| 2127 | mp->pgno); | |||
| 2128 | ||||
| 2129 | if (key->size != node->ksize) { | |||
| 2130 | delta = key->size - node->ksize; | |||
| 2131 | if (delta > 0 && SIZELEFT(mp)(indx_t)((mp)->page->b.fb.fb_upper - (mp)->page-> b.fb.fb_lower) < delta) { | |||
| 2132 | DPRINTF("OUCH! Not enough room, delta = %d", delta); | |||
| 2133 | return BT_FAIL-1; | |||
| 2134 | } | |||
| 2135 | ||||
| 2136 | numkeys = NUMKEYS(mp)(((mp)->page->b.fb.fb_lower - __builtin_offsetof(struct page, ptrs)) >> 1); | |||
| 2137 | for (i = 0; i < numkeys; i++) { | |||
| 2138 | if (mp->page->ptrs[i] <= ptr) | |||
| 2139 | mp->page->ptrs[i] -= delta; | |||
| 2140 | } | |||
| 2141 | ||||
| 2142 | base = (char *)mp->page + mp->page->upperb.fb.fb_upper; | |||
| 2143 | len = ptr - mp->page->upperb.fb.fb_upper + NODESIZE__builtin_offsetof(struct node, data); | |||
| 2144 | bcopy(base, base - delta, len); | |||
| 2145 | mp->page->upperb.fb.fb_upper -= delta; | |||
| 2146 | ||||
| 2147 | node = NODEPTR(mp, indx)((struct node *)((char *)((mp)->page) + ((mp)->page)-> ptrs[indx])); | |||
| 2148 | node->ksize = key->size; | |||
| 2149 | } | |||
| 2150 | ||||
| 2151 | bcopy(key->data, NODEKEY(node)(void *)((node)->data), key->size); | |||
| 2152 | ||||
| 2153 | return BT_SUCCESS0; | |||
| 2154 | } | |||
| 2155 | ||||
| 2156 | static int | |||
| 2157 | btree_adjust_prefix(struct btree *bt, struct mpage *src, int delta) | |||
| 2158 | { | |||
| 2159 | indx_t i; | |||
| 2160 | struct node *node; | |||
| 2161 | struct btkey tmpkey; | |||
| 2162 | struct btval key; | |||
| 2163 | ||||
| 2164 | DPRINTF("adjusting prefix lengths on page %u with delta %d", | |||
| 2165 | src->pgno, delta); | |||
| 2166 | assert(delta != 0)((delta != 0) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 2166, __func__, "delta != 0")); | |||
| 2167 | ||||
| 2168 | for (i = 0; i < NUMKEYS(src)(((src)->page->b.fb.fb_lower - __builtin_offsetof(struct page, ptrs)) >> 1); i++) { | |||
| 2169 | node = NODEPTR(src, i)((struct node *)((char *)((src)->page) + ((src)->page)-> ptrs[i])); | |||
| 2170 | tmpkey.len = node->ksize - delta; | |||
| 2171 | if (delta > 0) { | |||
| 2172 | if (F_ISSET(bt->flags, BT_REVERSEKEY)(((bt->flags) & (0x08)) == (0x08))) | |||
| 2173 | bcopy(NODEKEY(node)(void *)((node)->data), tmpkey.str, tmpkey.len); | |||
| 2174 | else | |||
| 2175 | bcopy((char *)NODEKEY(node)(void *)((node)->data) + delta, tmpkey.str, | |||
| 2176 | tmpkey.len); | |||
| 2177 | } else { | |||
| 2178 | if (F_ISSET(bt->flags, BT_REVERSEKEY)(((bt->flags) & (0x08)) == (0x08))) { | |||
| 2179 | bcopy(NODEKEY(node)(void *)((node)->data), tmpkey.str, node->ksize); | |||
| 2180 | bcopy(src->prefix.str, tmpkey.str + node->ksize, | |||
| 2181 | -delta); | |||
| 2182 | } else { | |||
| 2183 | bcopy(src->prefix.str + src->prefix.len + delta, | |||
| 2184 | tmpkey.str, -delta); | |||
| 2185 | bcopy(NODEKEY(node)(void *)((node)->data), tmpkey.str - delta, | |||
| 2186 | node->ksize); | |||
| 2187 | } | |||
| 2188 | } | |||
| 2189 | key.size = tmpkey.len; | |||
| 2190 | key.data = tmpkey.str; | |||
| 2191 | if (btree_update_key(bt, src, i, &key) != BT_SUCCESS0) | |||
| 2192 | return BT_FAIL-1; | |||
| 2193 | } | |||
| 2194 | ||||
| 2195 | return BT_SUCCESS0; | |||
| 2196 | } | |||
| 2197 | ||||
| 2198 | /* Move a node from src to dst. | |||
| 2199 | */ | |||
| 2200 | static int | |||
| 2201 | btree_move_node(struct btree *bt, struct mpage *src, indx_t srcindx, | |||
| 2202 | struct mpage *dst, indx_t dstindx) | |||
| 2203 | { | |||
| 2204 | int rc; | |||
| 2205 | unsigned int pfxlen, mp_pfxlen = 0; | |||
| 2206 | struct node *srcnode; | |||
| 2207 | struct mpage *mp = NULL((void*)0); | |||
| 2208 | struct btkey tmpkey, srckey; | |||
| 2209 | struct btval key, data; | |||
| 2210 | ||||
| 2211 | assert(src->parent)((src->parent) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 2211, __func__, "src->parent")); | |||
| 2212 | assert(dst->parent)((dst->parent) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 2212, __func__, "dst->parent")); | |||
| 2213 | ||||
| 2214 | srcnode = NODEPTR(src, srcindx)((struct node *)((char *)((src)->page) + ((src)->page)-> ptrs[srcindx])); | |||
| 2215 | DPRINTF("moving %s node %u [%.*s] on page %u to node %u on page %u", | |||
| 2216 | IS_LEAF(src) ? "leaf" : "branch", | |||
| 2217 | srcindx, | |||
| 2218 | (int)srcnode->ksize, (char *)NODEKEY(srcnode), | |||
| 2219 | src->pgno, | |||
| 2220 | dstindx, dst->pgno); | |||
| 2221 | ||||
| 2222 | find_common_prefix(bt, src); | |||
| 2223 | ||||
| 2224 | if (IS_BRANCH(src)((((src)->page->flags) & (0x01)) == (0x01))) { | |||
| 2225 | /* Need to check if the page the moved node points to | |||
| 2226 | * changes prefix. | |||
| 2227 | */ | |||
| 2228 | if ((mp = btree_get_mpage(bt, NODEPGNO(srcnode)((srcnode)->p.np_pgno))) == NULL((void*)0)) | |||
| 2229 | return BT_FAIL-1; | |||
| 2230 | mp->parent = src; | |||
| 2231 | mp->parent_index = srcindx; | |||
| 2232 | find_common_prefix(bt, mp); | |||
| 2233 | mp_pfxlen = mp->prefix.len; | |||
| 2234 | } | |||
| 2235 | ||||
| 2236 | /* Mark src and dst as dirty. */ | |||
| 2237 | if ((src = mpage_touch(bt, src)) == NULL((void*)0) || | |||
| 2238 | (dst = mpage_touch(bt, dst)) == NULL((void*)0)) | |||
| 2239 | return BT_FAIL-1; | |||
| 2240 | ||||
| 2241 | find_common_prefix(bt, dst); | |||
| 2242 | ||||
| 2243 | /* Check if src node has destination page prefix. Otherwise the | |||
| 2244 | * destination page must expand its prefix on all its nodes. | |||
| 2245 | */ | |||
| 2246 | srckey.len = srcnode->ksize; | |||
| 2247 | bcopy(NODEKEY(srcnode)(void *)((srcnode)->data), srckey.str, srckey.len); | |||
| 2248 | common_prefix(bt, &srckey, &dst->prefix, &tmpkey); | |||
| 2249 | if (tmpkey.len != dst->prefix.len) { | |||
| 2250 | if (btree_adjust_prefix(bt, dst, | |||
| 2251 | tmpkey.len - dst->prefix.len) != BT_SUCCESS0) | |||
| 2252 | return BT_FAIL-1; | |||
| 2253 | bcopy(&tmpkey, &dst->prefix, sizeof(tmpkey)); | |||
| 2254 | } | |||
| 2255 | ||||
| 2256 | if (srcindx == 0 && IS_BRANCH(src)((((src)->page->flags) & (0x01)) == (0x01))) { | |||
| 2257 | struct mpage *low; | |||
| 2258 | ||||
| 2259 | /* must find the lowest key below src | |||
| 2260 | */ | |||
| 2261 | assert(btree_search_page_root(bt, src, NULL, NULL, 0,((btree_search_page_root(bt, src, ((void*)0), ((void*)0), 0, & low) == 0) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 2262, __func__, "btree_search_page_root(bt, src, NULL, NULL, 0, &low) == BT_SUCCESS" )) | |||
| 2262 | &low) == BT_SUCCESS)((btree_search_page_root(bt, src, ((void*)0), ((void*)0), 0, & low) == 0) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 2262, __func__, "btree_search_page_root(bt, src, NULL, NULL, 0, &low) == BT_SUCCESS" )); | |||
| 2263 | expand_prefix(bt, low, 0, &srckey); | |||
| 2264 | DPRINTF("found lowest key [%.*s] on leaf page %u", | |||
| 2265 | (int)srckey.len, srckey.str, low->pgno); | |||
| 2266 | } else { | |||
| 2267 | srckey.len = srcnode->ksize; | |||
| 2268 | bcopy(NODEKEY(srcnode)(void *)((srcnode)->data), srckey.str, srcnode->ksize); | |||
| 2269 | } | |||
| 2270 | find_common_prefix(bt, src); | |||
| 2271 | ||||
| 2272 | /* expand the prefix */ | |||
| 2273 | tmpkey.len = sizeof(tmpkey.str); | |||
| 2274 | concat_prefix(bt, src->prefix.str, src->prefix.len, | |||
| 2275 | srckey.str, srckey.len, tmpkey.str, &tmpkey.len); | |||
| 2276 | ||||
| 2277 | /* Add the node to the destination page. Adjust prefix for | |||
| 2278 | * destination page. | |||
| 2279 | */ | |||
| 2280 | key.size = tmpkey.len; | |||
| 2281 | key.data = tmpkey.str; | |||
| 2282 | remove_prefix(bt, &key, dst->prefix.len); | |||
| 2283 | data.size = NODEDSZ(srcnode)((srcnode)->p.np_dsize); | |||
| 2284 | data.data = NODEDATA(srcnode)(void *)((char *)(srcnode)->data + (srcnode)->ksize); | |||
| 2285 | rc = btree_add_node(bt, dst, dstindx, &key, &data, NODEPGNO(srcnode)((srcnode)->p.np_pgno), | |||
| 2286 | srcnode->flags); | |||
| 2287 | if (rc != BT_SUCCESS0) | |||
| 2288 | return rc; | |||
| 2289 | ||||
| 2290 | /* Delete the node from the source page. | |||
| 2291 | */ | |||
| 2292 | btree_del_node(bt, src, srcindx); | |||
| 2293 | ||||
| 2294 | /* Update the parent separators. | |||
| 2295 | */ | |||
| 2296 | if (srcindx == 0 && src->parent_index != 0) { | |||
| 2297 | expand_prefix(bt, src, 0, &tmpkey); | |||
| 2298 | key.size = tmpkey.len; | |||
| 2299 | key.data = tmpkey.str; | |||
| 2300 | remove_prefix(bt, &key, src->parent->prefix.len); | |||
| 2301 | ||||
| 2302 | DPRINTF("update separator for source page %u to [%.*s]", | |||
| 2303 | src->pgno, (int)key.size, (char *)key.data); | |||
| 2304 | if (btree_update_key(bt, src->parent, src->parent_index, | |||
| 2305 | &key) != BT_SUCCESS0) | |||
| 2306 | return BT_FAIL-1; | |||
| 2307 | } | |||
| 2308 | ||||
| 2309 | if (srcindx == 0 && IS_BRANCH(src)((((src)->page->flags) & (0x01)) == (0x01))) { | |||
| 2310 | struct btval nullkey; | |||
| 2311 | nullkey.size = 0; | |||
| 2312 | assert(btree_update_key(bt, src, 0, &nullkey) == BT_SUCCESS)((btree_update_key(bt, src, 0, &nullkey) == 0) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c", 2312, __func__, "btree_update_key(bt, src, 0, &nullkey) == BT_SUCCESS")); | |||
| 2313 | } | |||
| 2314 | ||||
| 2315 | if (dstindx == 0 && dst->parent_index != 0) { | |||
| 2316 | expand_prefix(bt, dst, 0, &tmpkey); | |||
| 2317 | key.size = tmpkey.len; | |||
| 2318 | key.data = tmpkey.str; | |||
| 2319 | remove_prefix(bt, &key, dst->parent->prefix.len); | |||
| 2320 | ||||
| 2321 | DPRINTF("update separator for destination page %u to [%.*s]", | |||
| 2322 | dst->pgno, (int)key.size, (char *)key.data); | |||
| 2323 | if (btree_update_key(bt, dst->parent, dst->parent_index, | |||
| 2324 | &key) != BT_SUCCESS0) | |||
| 2325 | return BT_FAIL-1; | |||
| 2326 | } | |||
| 2327 | ||||
| 2328 | if (dstindx == 0 && IS_BRANCH(dst)((((dst)->page->flags) & (0x01)) == (0x01))) { | |||
| 2329 | struct btval nullkey; | |||
| 2330 | nullkey.size = 0; | |||
| 2331 | assert(btree_update_key(bt, dst, 0, &nullkey) == BT_SUCCESS)((btree_update_key(bt, dst, 0, &nullkey) == 0) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c", 2331, __func__, "btree_update_key(bt, dst, 0, &nullkey) == BT_SUCCESS")); | |||
| 2332 | } | |||
| 2333 | ||||
| 2334 | /* We can get a new page prefix here! | |||
| 2335 | * Must update keys in all nodes of this page! | |||
| 2336 | */ | |||
| 2337 | pfxlen = src->prefix.len; | |||
| 2338 | find_common_prefix(bt, src); | |||
| 2339 | if (src->prefix.len != pfxlen) { | |||
| 2340 | if (btree_adjust_prefix(bt, src, | |||
| 2341 | src->prefix.len - pfxlen) != BT_SUCCESS0) | |||
| 2342 | return BT_FAIL-1; | |||
| 2343 | } | |||
| 2344 | ||||
| 2345 | pfxlen = dst->prefix.len; | |||
| 2346 | find_common_prefix(bt, dst); | |||
| 2347 | if (dst->prefix.len != pfxlen) { | |||
| 2348 | if (btree_adjust_prefix(bt, dst, | |||
| 2349 | dst->prefix.len - pfxlen) != BT_SUCCESS0) | |||
| 2350 | return BT_FAIL-1; | |||
| 2351 | } | |||
| 2352 | ||||
| 2353 | if (IS_BRANCH(dst)((((dst)->page->flags) & (0x01)) == (0x01))) { | |||
| 2354 | assert(mp)((mp) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 2354, __func__, "mp")); | |||
| 2355 | mp->parent = dst; | |||
| 2356 | mp->parent_index = dstindx; | |||
| 2357 | find_common_prefix(bt, mp); | |||
| 2358 | if (mp->prefix.len != mp_pfxlen) { | |||
| 2359 | DPRINTF("moved branch node has changed prefix"); | |||
| 2360 | if ((mp = mpage_touch(bt, mp)) == NULL((void*)0)) | |||
| 2361 | return BT_FAIL-1; | |||
| 2362 | if (btree_adjust_prefix(bt, mp, | |||
| 2363 | mp->prefix.len - mp_pfxlen) != BT_SUCCESS0) | |||
| 2364 | return BT_FAIL-1; | |||
| 2365 | } | |||
| 2366 | } | |||
| 2367 | ||||
| 2368 | return BT_SUCCESS0; | |||
| 2369 | } | |||
| 2370 | ||||
| 2371 | static int | |||
| 2372 | btree_merge(struct btree *bt, struct mpage *src, struct mpage *dst) | |||
| 2373 | { | |||
| 2374 | int rc; | |||
| 2375 | indx_t i; | |||
| 2376 | unsigned int pfxlen; | |||
| 2377 | struct node *srcnode; | |||
| 2378 | struct btkey tmpkey, dstpfx; | |||
| 2379 | struct btval key, data; | |||
| 2380 | ||||
| 2381 | DPRINTF("merging page %u and %u", src->pgno, dst->pgno); | |||
| 2382 | ||||
| 2383 | assert(src->parent)((src->parent) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 2383, __func__, "src->parent")); /* can't merge root page */ | |||
| 2384 | assert(dst->parent)((dst->parent) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 2384, __func__, "dst->parent")); | |||
| 2385 | assert(bt->txn != NULL)((bt->txn != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 2385, __func__, "bt->txn != NULL")); | |||
| 2386 | ||||
| 2387 | /* Mark src and dst as dirty. */ | |||
| 2388 | if ((src = mpage_touch(bt, src)) == NULL((void*)0) || | |||
| 2389 | (dst = mpage_touch(bt, dst)) == NULL((void*)0)) | |||
| 2390 | return BT_FAIL-1; | |||
| 2391 | ||||
| 2392 | find_common_prefix(bt, src); | |||
| 2393 | find_common_prefix(bt, dst); | |||
| 2394 | ||||
| 2395 | /* Check if source nodes has destination page prefix. Otherwise | |||
| 2396 | * the destination page must expand its prefix on all its nodes. | |||
| 2397 | */ | |||
| 2398 | common_prefix(bt, &src->prefix, &dst->prefix, &dstpfx); | |||
| 2399 | if (dstpfx.len != dst->prefix.len) { | |||
| 2400 | if (btree_adjust_prefix(bt, dst, | |||
| 2401 | dstpfx.len - dst->prefix.len) != BT_SUCCESS0) | |||
| 2402 | return BT_FAIL-1; | |||
| 2403 | bcopy(&dstpfx, &dst->prefix, sizeof(dstpfx)); | |||
| 2404 | } | |||
| 2405 | ||||
| 2406 | /* Move all nodes from src to dst. | |||
| 2407 | */ | |||
| 2408 | for (i = 0; i < NUMKEYS(src)(((src)->page->b.fb.fb_lower - __builtin_offsetof(struct page, ptrs)) >> 1); i++) { | |||
| 2409 | srcnode = NODEPTR(src, i)((struct node *)((char *)((src)->page) + ((src)->page)-> ptrs[i])); | |||
| 2410 | ||||
| 2411 | /* If branch node 0 (implicit key), find the real key. | |||
| 2412 | */ | |||
| 2413 | if (i == 0 && IS_BRANCH(src)((((src)->page->flags) & (0x01)) == (0x01))) { | |||
| 2414 | struct mpage *low; | |||
| 2415 | ||||
| 2416 | /* must find the lowest key below src | |||
| 2417 | */ | |||
| 2418 | assert(btree_search_page_root(bt, src, NULL, NULL, 0,((btree_search_page_root(bt, src, ((void*)0), ((void*)0), 0, & low) == 0) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 2419, __func__, "btree_search_page_root(bt, src, NULL, NULL, 0, &low) == BT_SUCCESS" )) | |||
| 2419 | &low) == BT_SUCCESS)((btree_search_page_root(bt, src, ((void*)0), ((void*)0), 0, & low) == 0) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 2419, __func__, "btree_search_page_root(bt, src, NULL, NULL, 0, &low) == BT_SUCCESS" )); | |||
| 2420 | expand_prefix(bt, low, 0, &tmpkey); | |||
| 2421 | DPRINTF("found lowest key [%.*s] on leaf page %u", | |||
| 2422 | (int)tmpkey.len, tmpkey.str, low->pgno); | |||
| 2423 | } else { | |||
| 2424 | expand_prefix(bt, src, i, &tmpkey); | |||
| 2425 | } | |||
| 2426 | ||||
| 2427 | key.size = tmpkey.len; | |||
| 2428 | key.data = tmpkey.str; | |||
| 2429 | ||||
| 2430 | remove_prefix(bt, &key, dst->prefix.len); | |||
| 2431 | data.size = NODEDSZ(srcnode)((srcnode)->p.np_dsize); | |||
| 2432 | data.data = NODEDATA(srcnode)(void *)((char *)(srcnode)->data + (srcnode)->ksize); | |||
| 2433 | rc = btree_add_node(bt, dst, NUMKEYS(dst)(((dst)->page->b.fb.fb_lower - __builtin_offsetof(struct page, ptrs)) >> 1), &key, | |||
| 2434 | &data, NODEPGNO(srcnode)((srcnode)->p.np_pgno), srcnode->flags); | |||
| 2435 | if (rc != BT_SUCCESS0) | |||
| 2436 | return rc; | |||
| 2437 | } | |||
| 2438 | ||||
| 2439 | DPRINTF("dst page %u now has %lu keys (%.1f%% filled)", | |||
| 2440 | dst->pgno, NUMKEYS(dst), (float)PAGEFILL(bt, dst) / 10); | |||
| 2441 | ||||
| 2442 | /* Unlink the src page from parent. | |||
| 2443 | */ | |||
| 2444 | btree_del_node(bt, src->parent, src->parent_index); | |||
| 2445 | if (src->parent_index == 0) { | |||
| 2446 | key.size = 0; | |||
| 2447 | if (btree_update_key(bt, src->parent, 0, &key) != BT_SUCCESS0) | |||
| 2448 | return BT_FAIL-1; | |||
| 2449 | ||||
| 2450 | pfxlen = src->prefix.len; | |||
| 2451 | find_common_prefix(bt, src); | |||
| 2452 | assert (src->prefix.len == pfxlen)((src->prefix.len == pfxlen) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 2452, __func__, "src->prefix.len == pfxlen")); | |||
| 2453 | } | |||
| 2454 | ||||
| 2455 | if (IS_LEAF(src)((((src)->page->flags) & (0x02)) == (0x02))) | |||
| 2456 | bt->meta.leaf_pages--; | |||
| 2457 | else | |||
| 2458 | bt->meta.branch_pages--; | |||
| 2459 | ||||
| 2460 | return btree_rebalance(bt, src->parent); | |||
| 2461 | } | |||
| 2462 | ||||
| 2463 | #define FILL_THRESHOLD250 250 | |||
| 2464 | ||||
| 2465 | static int | |||
| 2466 | btree_rebalance(struct btree *bt, struct mpage *mp) | |||
| 2467 | { | |||
| 2468 | struct node *node; | |||
| 2469 | struct mpage *parent; | |||
| 2470 | struct mpage *root; | |||
| 2471 | struct mpage *neighbor = NULL((void*)0); | |||
| 2472 | indx_t si = 0, di = 0; | |||
| 2473 | ||||
| 2474 | assert(bt != NULL)((bt != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 2474, __func__, "bt != NULL")); | |||
| 2475 | assert(bt->txn != NULL)((bt->txn != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 2475, __func__, "bt->txn != NULL")); | |||
| 2476 | assert(mp != NULL)((mp != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 2476, __func__, "mp != NULL")); | |||
| 2477 | ||||
| 2478 | DPRINTF("rebalancing %s page %u (has %lu keys, %.1f%% full)", | |||
| 2479 | IS_LEAF(mp) ? "leaf" : "branch", | |||
| 2480 | mp->pgno, NUMKEYS(mp), (float)PAGEFILL(bt, mp) / 10); | |||
| 2481 | ||||
| 2482 | if (PAGEFILL(bt, mp)(1000 * ((bt)->head.psize - __builtin_offsetof(struct page , ptrs) - (indx_t)((mp)->page->b.fb.fb_upper - (mp)-> page->b.fb.fb_lower)) / ((bt)->head.psize - __builtin_offsetof (struct page, ptrs))) >= FILL_THRESHOLD250) { | |||
| 2483 | DPRINTF("no need to rebalance page %u, above fill threshold", | |||
| 2484 | mp->pgno); | |||
| 2485 | return BT_SUCCESS0; | |||
| 2486 | } | |||
| 2487 | ||||
| 2488 | parent = mp->parent; | |||
| 2489 | ||||
| 2490 | if (parent == NULL((void*)0)) { | |||
| 2491 | if (NUMKEYS(mp)(((mp)->page->b.fb.fb_lower - __builtin_offsetof(struct page, ptrs)) >> 1) == 0) { | |||
| 2492 | DPRINTF("tree is completely empty"); | |||
| 2493 | bt->txn->root = P_INVALID0xFFFFFFFF; | |||
| 2494 | bt->meta.depth--; | |||
| 2495 | bt->meta.leaf_pages--; | |||
| 2496 | } else if (IS_BRANCH(mp)((((mp)->page->flags) & (0x01)) == (0x01)) && NUMKEYS(mp)(((mp)->page->b.fb.fb_lower - __builtin_offsetof(struct page, ptrs)) >> 1) == 1) { | |||
| 2497 | DPRINTF("collapsing root page!"); | |||
| 2498 | bt->txn->root = NODEPGNO(NODEPTR(mp, 0))((((struct node *)((char *)((mp)->page) + ((mp)->page)-> ptrs[0])))->p.np_pgno); | |||
| 2499 | if ((root = btree_get_mpage(bt, bt->txn->root)) == NULL((void*)0)) | |||
| 2500 | return BT_FAIL-1; | |||
| 2501 | root->parent = NULL((void*)0); | |||
| 2502 | bt->meta.depth--; | |||
| 2503 | bt->meta.branch_pages--; | |||
| 2504 | } else | |||
| 2505 | DPRINTF("root page doesn't need rebalancing"); | |||
| 2506 | return BT_SUCCESS0; | |||
| 2507 | } | |||
| 2508 | ||||
| 2509 | /* The parent (branch page) must have at least 2 pointers, | |||
| 2510 | * otherwise the tree is invalid. | |||
| 2511 | */ | |||
| 2512 | assert(NUMKEYS(parent) > 1)(((((parent)->page->b.fb.fb_lower - __builtin_offsetof( struct page, ptrs)) >> 1) > 1) ? (void)0 : __assert2 ("/usr/src/usr.sbin/ldapd/btree.c", 2512, __func__, "NUMKEYS(parent) > 1" )); | |||
| 2513 | ||||
| 2514 | /* Leaf page fill factor is below the threshold. | |||
| 2515 | * Try to move keys from left or right neighbor, or | |||
| 2516 | * merge with a neighbor page. | |||
| 2517 | */ | |||
| 2518 | ||||
| 2519 | /* Find neighbors. | |||
| 2520 | */ | |||
| 2521 | if (mp->parent_index == 0) { | |||
| 2522 | /* We're the leftmost leaf in our parent. | |||
| 2523 | */ | |||
| 2524 | DPRINTF("reading right neighbor"); | |||
| 2525 | node = NODEPTR(parent, mp->parent_index + 1)((struct node *)((char *)((parent)->page) + ((parent)-> page)->ptrs[mp->parent_index + 1])); | |||
| 2526 | if ((neighbor = btree_get_mpage(bt, NODEPGNO(node)((node)->p.np_pgno))) == NULL((void*)0)) | |||
| 2527 | return BT_FAIL-1; | |||
| 2528 | neighbor->parent_index = mp->parent_index + 1; | |||
| 2529 | si = 0; | |||
| 2530 | di = NUMKEYS(mp)(((mp)->page->b.fb.fb_lower - __builtin_offsetof(struct page, ptrs)) >> 1); | |||
| 2531 | } else { | |||
| 2532 | /* There is at least one neighbor to the left. | |||
| 2533 | */ | |||
| 2534 | DPRINTF("reading left neighbor"); | |||
| 2535 | node = NODEPTR(parent, mp->parent_index - 1)((struct node *)((char *)((parent)->page) + ((parent)-> page)->ptrs[mp->parent_index - 1])); | |||
| 2536 | if ((neighbor = btree_get_mpage(bt, NODEPGNO(node)((node)->p.np_pgno))) == NULL((void*)0)) | |||
| 2537 | return BT_FAIL-1; | |||
| 2538 | neighbor->parent_index = mp->parent_index - 1; | |||
| 2539 | si = NUMKEYS(neighbor)(((neighbor)->page->b.fb.fb_lower - __builtin_offsetof( struct page, ptrs)) >> 1) - 1; | |||
| 2540 | di = 0; | |||
| 2541 | } | |||
| 2542 | neighbor->parent = parent; | |||
| 2543 | ||||
| 2544 | DPRINTF("found neighbor page %u (%lu keys, %.1f%% full)", | |||
| 2545 | neighbor->pgno, NUMKEYS(neighbor), (float)PAGEFILL(bt, neighbor) / 10); | |||
| 2546 | ||||
| 2547 | /* If the neighbor page is above threshold and has at least two | |||
| 2548 | * keys, move one key from it. | |||
| 2549 | * | |||
| 2550 | * Otherwise we should try to merge them, but that might not be | |||
| 2551 | * possible, even if both are below threshold, as prefix expansion | |||
| 2552 | * might make keys larger. FIXME: detect this | |||
| 2553 | */ | |||
| 2554 | if (PAGEFILL(bt, neighbor)(1000 * ((bt)->head.psize - __builtin_offsetof(struct page , ptrs) - (indx_t)((neighbor)->page->b.fb.fb_upper - (neighbor )->page->b.fb.fb_lower)) / ((bt)->head.psize - __builtin_offsetof (struct page, ptrs))) >= FILL_THRESHOLD250 && NUMKEYS(neighbor)(((neighbor)->page->b.fb.fb_lower - __builtin_offsetof( struct page, ptrs)) >> 1) >= 2) | |||
| 2555 | return btree_move_node(bt, neighbor, si, mp, di); | |||
| 2556 | else { /* FIXME: if (has_enough_room()) */ | |||
| 2557 | if (mp->parent_index == 0) | |||
| 2558 | return btree_merge(bt, neighbor, mp); | |||
| 2559 | else | |||
| 2560 | return btree_merge(bt, mp, neighbor); | |||
| 2561 | } | |||
| 2562 | } | |||
| 2563 | ||||
| 2564 | int | |||
| 2565 | btree_txn_del(struct btree *bt, struct btree_txn *txn, | |||
| 2566 | struct btval *key, struct btval *data) | |||
| 2567 | { | |||
| 2568 | int rc, exact, close_txn = 0; | |||
| 2569 | unsigned int ki; | |||
| 2570 | struct node *leaf; | |||
| 2571 | struct mpage *mp; | |||
| 2572 | ||||
| 2573 | DPRINTF("========> delete key %.*s", (int)key->size, (char *)key->data); | |||
| 2574 | ||||
| 2575 | assert(key != NULL)((key != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 2575, __func__, "key != NULL")); | |||
| 2576 | ||||
| 2577 | if (bt != NULL((void*)0) && txn != NULL((void*)0) && bt != txn->bt) { | |||
| 2578 | errno(*__errno()) = EINVAL22; | |||
| 2579 | return BT_FAIL-1; | |||
| 2580 | } | |||
| 2581 | ||||
| 2582 | if (txn != NULL((void*)0) && F_ISSET(txn->flags, BT_TXN_RDONLY)(((txn->flags) & (0x01)) == (0x01))) { | |||
| 2583 | errno(*__errno()) = EINVAL22; | |||
| 2584 | return BT_FAIL-1; | |||
| 2585 | } | |||
| 2586 | ||||
| 2587 | if (bt == NULL((void*)0)) { | |||
| 2588 | if (txn == NULL((void*)0)) { | |||
| 2589 | errno(*__errno()) = EINVAL22; | |||
| 2590 | return BT_FAIL-1; | |||
| 2591 | } | |||
| 2592 | bt = txn->bt; | |||
| 2593 | } | |||
| 2594 | ||||
| 2595 | if (key->size == 0 || key->size > MAXKEYSIZE255) { | |||
| 2596 | errno(*__errno()) = EINVAL22; | |||
| 2597 | return BT_FAIL-1; | |||
| 2598 | } | |||
| 2599 | ||||
| 2600 | if (txn == NULL((void*)0)) { | |||
| 2601 | close_txn = 1; | |||
| 2602 | if ((txn = btree_txn_begin(bt, 0)) == NULL((void*)0)) | |||
| 2603 | return BT_FAIL-1; | |||
| 2604 | } | |||
| 2605 | ||||
| 2606 | if ((rc = btree_search_page(bt, txn, key, NULL((void*)0), 1, &mp)) != BT_SUCCESS0) | |||
| 2607 | goto done; | |||
| 2608 | ||||
| 2609 | leaf = btree_search_node(bt, mp, key, &exact, &ki); | |||
| 2610 | if (leaf == NULL((void*)0) || !exact) { | |||
| 2611 | errno(*__errno()) = ENOENT2; | |||
| 2612 | rc = BT_FAIL-1; | |||
| 2613 | goto done; | |||
| 2614 | } | |||
| 2615 | ||||
| 2616 | if (data && (rc = btree_read_data(bt, NULL((void*)0), leaf, data)) != BT_SUCCESS0) | |||
| 2617 | goto done; | |||
| 2618 | ||||
| 2619 | btree_del_node(bt, mp, ki); | |||
| 2620 | bt->meta.entries--; | |||
| 2621 | rc = btree_rebalance(bt, mp); | |||
| 2622 | if (rc != BT_SUCCESS0) | |||
| 2623 | txn->flags |= BT_TXN_ERROR0x02; | |||
| 2624 | ||||
| 2625 | done: | |||
| 2626 | if (close_txn) { | |||
| 2627 | if (rc == BT_SUCCESS0) | |||
| 2628 | rc = btree_txn_commit(txn); | |||
| 2629 | else | |||
| 2630 | btree_txn_abort(txn); | |||
| 2631 | } | |||
| 2632 | mpage_prune(bt); | |||
| 2633 | return rc; | |||
| 2634 | } | |||
| 2635 | ||||
| 2636 | /* Reduce the length of the prefix separator <sep> to the minimum length that | |||
| 2637 | * still makes it uniquely distinguishable from <min>. | |||
| 2638 | * | |||
| 2639 | * <min> is guaranteed to be sorted less than <sep> | |||
| 2640 | * | |||
| 2641 | * On return, <sep> is modified to the minimum length. | |||
| 2642 | */ | |||
| 2643 | static void | |||
| 2644 | bt_reduce_separator(struct btree *bt, struct node *min, struct btval *sep) | |||
| 2645 | { | |||
| 2646 | size_t n = 0; | |||
| 2647 | char *p1; | |||
| 2648 | char *p2; | |||
| 2649 | ||||
| 2650 | if (F_ISSET(bt->flags, BT_REVERSEKEY)(((bt->flags) & (0x08)) == (0x08))) { | |||
| 2651 | ||||
| 2652 | assert(sep->size > 0)((sep->size > 0) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 2652, __func__, "sep->size > 0")); | |||
| 2653 | ||||
| 2654 | p1 = (char *)NODEKEY(min)(void *)((min)->data) + min->ksize - 1; | |||
| 2655 | p2 = (char *)sep->data + sep->size - 1; | |||
| 2656 | ||||
| 2657 | while (p1 >= (char *)NODEKEY(min)(void *)((min)->data) && *p1 == *p2) { | |||
| 2658 | assert(p2 > (char *)sep->data)((p2 > (char *)sep->data) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 2658, __func__, "p2 > (char *)sep->data")); | |||
| 2659 | p1--; | |||
| 2660 | p2--; | |||
| 2661 | n++; | |||
| 2662 | } | |||
| 2663 | ||||
| 2664 | sep->data = p2; | |||
| 2665 | sep->size = n + 1; | |||
| 2666 | } else { | |||
| 2667 | ||||
| 2668 | assert(min->ksize > 0)((min->ksize > 0) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 2668, __func__, "min->ksize > 0")); | |||
| 2669 | assert(sep->size > 0)((sep->size > 0) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 2669, __func__, "sep->size > 0")); | |||
| 2670 | ||||
| 2671 | p1 = (char *)NODEKEY(min)(void *)((min)->data); | |||
| 2672 | p2 = (char *)sep->data; | |||
| 2673 | ||||
| 2674 | while (*p1 == *p2) { | |||
| 2675 | p1++; | |||
| 2676 | p2++; | |||
| 2677 | n++; | |||
| 2678 | if (n == min->ksize || n == sep->size) | |||
| 2679 | break; | |||
| 2680 | } | |||
| 2681 | ||||
| 2682 | sep->size = n + 1; | |||
| 2683 | } | |||
| 2684 | ||||
| 2685 | DPRINTF("reduced separator to [%.*s] > [%.*s]", | |||
| 2686 | (int)sep->size, (char *)sep->data, | |||
| 2687 | (int)min->ksize, (char *)NODEKEY(min)); | |||
| 2688 | } | |||
| 2689 | ||||
| 2690 | /* Split page <*mpp>, and insert <key,(data|newpgno)> in either left or | |||
| 2691 | * right sibling, at index <*newindxp> (as if unsplit). Updates *mpp and | |||
| 2692 | * *newindxp with the actual values after split, ie if *mpp and *newindxp | |||
| 2693 | * refer to a node in the new right sibling page. | |||
| 2694 | */ | |||
| 2695 | static int | |||
| 2696 | btree_split(struct btree *bt, struct mpage **mpp, unsigned int *newindxp, | |||
| 2697 | struct btval *newkey, struct btval *newdata, pgno_t newpgno) | |||
| 2698 | { | |||
| 2699 | uint8_t flags; | |||
| 2700 | int rc = BT_SUCCESS0, ins_new = 0; | |||
| 2701 | indx_t newindx; | |||
| 2702 | pgno_t pgno = 0; | |||
| 2703 | size_t orig_pfx_len, left_pfx_diff, right_pfx_diff, pfx_diff; | |||
| 2704 | unsigned int i, j, split_indx; | |||
| 2705 | struct node *node; | |||
| 2706 | struct mpage *pright, *p, *mp; | |||
| 2707 | struct btval sepkey, rkey, rdata; | |||
| 2708 | struct btkey tmpkey; | |||
| 2709 | struct page *copy; | |||
| 2710 | ||||
| 2711 | assert(bt != NULL)((bt != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 2711, __func__, "bt != NULL")); | |||
| 2712 | assert(bt->txn != NULL)((bt->txn != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 2712, __func__, "bt->txn != NULL")); | |||
| 2713 | ||||
| 2714 | mp = *mpp; | |||
| 2715 | newindx = *newindxp; | |||
| 2716 | ||||
| 2717 | DPRINTF("-----> splitting %s page %u and adding [%.*s] at index %d", | |||
| 2718 | IS_LEAF(mp) ? "leaf" : "branch", mp->pgno, | |||
| 2719 | (int)newkey->size, (char *)newkey->data, *newindxp); | |||
| 2720 | DPRINTF("page %u has prefix [%.*s]", mp->pgno, | |||
| 2721 | (int)mp->prefix.len, (char *)mp->prefix.str); | |||
| 2722 | orig_pfx_len = mp->prefix.len; | |||
| 2723 | ||||
| 2724 | if (mp->parent == NULL((void*)0)) { | |||
| 2725 | if ((mp->parent = btree_new_page(bt, P_BRANCH0x01)) == NULL((void*)0)) | |||
| 2726 | return BT_FAIL-1; | |||
| 2727 | mp->parent_index = 0; | |||
| 2728 | bt->txn->root = mp->parent->pgno; | |||
| 2729 | DPRINTF("root split! new root = %u", mp->parent->pgno); | |||
| 2730 | bt->meta.depth++; | |||
| 2731 | ||||
| 2732 | /* Add left (implicit) pointer. */ | |||
| 2733 | if (btree_add_node(bt, mp->parent, 0, NULL((void*)0), NULL((void*)0), | |||
| 2734 | mp->pgno, 0) != BT_SUCCESS0) | |||
| 2735 | return BT_FAIL-1; | |||
| 2736 | } else { | |||
| 2737 | DPRINTF("parent branch page is %u", mp->parent->pgno); | |||
| 2738 | } | |||
| 2739 | ||||
| 2740 | /* Create a right sibling. */ | |||
| 2741 | if ((pright = btree_new_page(bt, mp->page->flags)) == NULL((void*)0)) | |||
| 2742 | return BT_FAIL-1; | |||
| 2743 | pright->parent = mp->parent; | |||
| 2744 | pright->parent_index = mp->parent_index + 1; | |||
| 2745 | DPRINTF("new right sibling: page %u", pright->pgno); | |||
| 2746 | ||||
| 2747 | /* Move half of the keys to the right sibling. */ | |||
| 2748 | if ((copy = malloc(bt->head.psize)) == NULL((void*)0)) | |||
| 2749 | return BT_FAIL-1; | |||
| 2750 | bcopy(mp->page, copy, bt->head.psize); | |||
| 2751 | assert(mp->ref == 0)((mp->ref == 0) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 2751, __func__, "mp->ref == 0")); /* XXX */ | |||
| 2752 | memset(&mp->page->ptrs, 0, bt->head.psize - PAGEHDRSZ__builtin_offsetof(struct page, ptrs)); | |||
| 2753 | mp->page->lowerb.fb.fb_lower = PAGEHDRSZ__builtin_offsetof(struct page, ptrs); | |||
| 2754 | mp->page->upperb.fb.fb_upper = bt->head.psize; | |||
| 2755 | ||||
| 2756 | split_indx = NUMKEYSP(copy)(((copy)->b.fb.fb_lower - __builtin_offsetof(struct page, ptrs )) >> 1) / 2 + 1; | |||
| 2757 | ||||
| 2758 | /* First find the separating key between the split pages. | |||
| 2759 | */ | |||
| 2760 | memset(&sepkey, 0, sizeof(sepkey)); | |||
| 2761 | if (newindx == split_indx) { | |||
| 2762 | sepkey.size = newkey->size; | |||
| 2763 | sepkey.data = newkey->data; | |||
| 2764 | remove_prefix(bt, &sepkey, mp->prefix.len); | |||
| 2765 | } else { | |||
| 2766 | node = NODEPTRP(copy, split_indx)((struct node *)((char *)(copy) + (copy)->ptrs[split_indx] )); | |||
| 2767 | sepkey.size = node->ksize; | |||
| 2768 | sepkey.data = NODEKEY(node)(void *)((node)->data); | |||
| 2769 | } | |||
| 2770 | ||||
| 2771 | if (IS_LEAF(mp)((((mp)->page->flags) & (0x02)) == (0x02)) && bt->cmp == NULL((void*)0)) { | |||
| 2772 | /* Find the smallest separator. */ | |||
| 2773 | /* Ref: Prefix B-trees, R. Bayer, K. Unterauer, 1977 */ | |||
| 2774 | node = NODEPTRP(copy, split_indx - 1)((struct node *)((char *)(copy) + (copy)->ptrs[split_indx - 1])); | |||
| 2775 | bt_reduce_separator(bt, node, &sepkey); | |||
| 2776 | } | |||
| 2777 | ||||
| 2778 | /* Fix separator wrt parent prefix. */ | |||
| 2779 | if (bt->cmp == NULL((void*)0)) { | |||
| 2780 | tmpkey.len = sizeof(tmpkey.str); | |||
| 2781 | concat_prefix(bt, mp->prefix.str, mp->prefix.len, | |||
| 2782 | sepkey.data, sepkey.size, tmpkey.str, &tmpkey.len); | |||
| 2783 | sepkey.data = tmpkey.str; | |||
| 2784 | sepkey.size = tmpkey.len; | |||
| 2785 | } | |||
| 2786 | ||||
| 2787 | DPRINTF("separator is [%.*s]", (int)sepkey.size, (char *)sepkey.data); | |||
| 2788 | ||||
| 2789 | /* Copy separator key to the parent. | |||
| 2790 | */ | |||
| 2791 | if (SIZELEFT(pright->parent)(indx_t)((pright->parent)->page->b.fb.fb_upper - (pright ->parent)->page->b.fb.fb_lower) < bt_branch_size(bt, &sepkey)) { | |||
| 2792 | rc = btree_split(bt, &pright->parent, &pright->parent_index, | |||
| 2793 | &sepkey, NULL((void*)0), pright->pgno); | |||
| 2794 | ||||
| 2795 | /* Right page might now have changed parent. | |||
| 2796 | * Check if left page also changed parent. | |||
| 2797 | */ | |||
| 2798 | if (pright->parent != mp->parent && | |||
| 2799 | mp->parent_index >= NUMKEYS(mp->parent)(((mp->parent)->page->b.fb.fb_lower - __builtin_offsetof (struct page, ptrs)) >> 1)) { | |||
| 2800 | mp->parent = pright->parent; | |||
| 2801 | mp->parent_index = pright->parent_index - 1; | |||
| 2802 | } | |||
| 2803 | } else { | |||
| 2804 | remove_prefix(bt, &sepkey, pright->parent->prefix.len); | |||
| 2805 | rc = btree_add_node(bt, pright->parent, pright->parent_index, | |||
| 2806 | &sepkey, NULL((void*)0), pright->pgno, 0); | |||
| 2807 | } | |||
| 2808 | if (rc != BT_SUCCESS0) { | |||
| 2809 | free(copy); | |||
| 2810 | return BT_FAIL-1; | |||
| 2811 | } | |||
| 2812 | ||||
| 2813 | /* Update prefix for right and left page, if the parent was split. | |||
| 2814 | */ | |||
| 2815 | find_common_prefix(bt, pright); | |||
| 2816 | assert(orig_pfx_len <= pright->prefix.len)((orig_pfx_len <= pright->prefix.len) ? (void)0 : __assert2 ("/usr/src/usr.sbin/ldapd/btree.c", 2816, __func__, "orig_pfx_len <= pright->prefix.len" )); | |||
| 2817 | right_pfx_diff = pright->prefix.len - orig_pfx_len; | |||
| 2818 | ||||
| 2819 | find_common_prefix(bt, mp); | |||
| 2820 | assert(orig_pfx_len <= mp->prefix.len)((orig_pfx_len <= mp->prefix.len) ? (void)0 : __assert2 ("/usr/src/usr.sbin/ldapd/btree.c", 2820, __func__, "orig_pfx_len <= mp->prefix.len" )); | |||
| 2821 | left_pfx_diff = mp->prefix.len - orig_pfx_len; | |||
| 2822 | ||||
| 2823 | for (i = j = 0; i <= NUMKEYSP(copy)(((copy)->b.fb.fb_lower - __builtin_offsetof(struct page, ptrs )) >> 1); j++) { | |||
| 2824 | if (i < split_indx) { | |||
| 2825 | /* Re-insert in left sibling. */ | |||
| 2826 | p = mp; | |||
| 2827 | pfx_diff = left_pfx_diff; | |||
| 2828 | } else { | |||
| 2829 | /* Insert in right sibling. */ | |||
| 2830 | if (i == split_indx) | |||
| 2831 | /* Reset insert index for right sibling. */ | |||
| 2832 | j = (i == newindx && ins_new); | |||
| 2833 | p = pright; | |||
| 2834 | pfx_diff = right_pfx_diff; | |||
| 2835 | } | |||
| 2836 | ||||
| 2837 | if (i == newindx && !ins_new) { | |||
| 2838 | /* Insert the original entry that caused the split. */ | |||
| 2839 | rkey.data = newkey->data; | |||
| 2840 | rkey.size = newkey->size; | |||
| 2841 | if (IS_LEAF(mp)((((mp)->page->flags) & (0x02)) == (0x02))) { | |||
| 2842 | rdata.data = newdata->data; | |||
| 2843 | rdata.size = newdata->size; | |||
| 2844 | } else | |||
| 2845 | pgno = newpgno; | |||
| 2846 | flags = 0; | |||
| 2847 | pfx_diff = p->prefix.len; | |||
| 2848 | ||||
| 2849 | ins_new = 1; | |||
| 2850 | ||||
| 2851 | /* Update page and index for the new key. */ | |||
| 2852 | *newindxp = j; | |||
| 2853 | *mpp = p; | |||
| 2854 | } else if (i == NUMKEYSP(copy)(((copy)->b.fb.fb_lower - __builtin_offsetof(struct page, ptrs )) >> 1)) { | |||
| 2855 | break; | |||
| 2856 | } else { | |||
| 2857 | node = NODEPTRP(copy, i)((struct node *)((char *)(copy) + (copy)->ptrs[i])); | |||
| 2858 | rkey.data = NODEKEY(node)(void *)((node)->data); | |||
| 2859 | rkey.size = node->ksize; | |||
| 2860 | if (IS_LEAF(mp)((((mp)->page->flags) & (0x02)) == (0x02))) { | |||
| 2861 | rdata.data = NODEDATA(node)(void *)((char *)(node)->data + (node)->ksize); | |||
| 2862 | rdata.size = node->n_dsizep.np_dsize; | |||
| 2863 | } else | |||
| 2864 | pgno = node->n_pgnop.np_pgno; | |||
| 2865 | flags = node->flags; | |||
| 2866 | ||||
| 2867 | i++; | |||
| 2868 | } | |||
| 2869 | ||||
| 2870 | if (!IS_LEAF(mp)((((mp)->page->flags) & (0x02)) == (0x02)) && j == 0) { | |||
| 2871 | /* First branch index doesn't need key data. */ | |||
| 2872 | rkey.size = 0; | |||
| 2873 | } else | |||
| 2874 | remove_prefix(bt, &rkey, pfx_diff); | |||
| 2875 | ||||
| 2876 | rc = btree_add_node(bt, p, j, &rkey, &rdata, pgno,flags); | |||
| 2877 | } | |||
| 2878 | ||||
| 2879 | free(copy); | |||
| 2880 | return rc; | |||
| 2881 | } | |||
| 2882 | ||||
| 2883 | int | |||
| 2884 | btree_txn_put(struct btree *bt, struct btree_txn *txn, | |||
| 2885 | struct btval *key, struct btval *data, unsigned int flags) | |||
| 2886 | { | |||
| 2887 | int rc = BT_SUCCESS0, exact, close_txn = 0; | |||
| 2888 | unsigned int ki; | |||
| 2889 | struct node *leaf; | |||
| 2890 | struct mpage *mp; | |||
| 2891 | struct btval xkey; | |||
| 2892 | ||||
| 2893 | assert(key != NULL)((key != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 2893, __func__, "key != NULL")); | |||
| 2894 | assert(data != NULL)((data != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 2894, __func__, "data != NULL")); | |||
| 2895 | ||||
| 2896 | if (bt != NULL((void*)0) && txn != NULL((void*)0) && bt != txn->bt) { | |||
| 2897 | errno(*__errno()) = EINVAL22; | |||
| 2898 | return BT_FAIL-1; | |||
| 2899 | } | |||
| 2900 | ||||
| 2901 | if (txn != NULL((void*)0) && F_ISSET(txn->flags, BT_TXN_RDONLY)(((txn->flags) & (0x01)) == (0x01))) { | |||
| 2902 | errno(*__errno()) = EINVAL22; | |||
| 2903 | return BT_FAIL-1; | |||
| 2904 | } | |||
| 2905 | ||||
| 2906 | if (bt == NULL((void*)0)) { | |||
| 2907 | if (txn == NULL((void*)0)) { | |||
| 2908 | errno(*__errno()) = EINVAL22; | |||
| 2909 | return BT_FAIL-1; | |||
| 2910 | } | |||
| 2911 | bt = txn->bt; | |||
| 2912 | } | |||
| 2913 | ||||
| 2914 | if (key->size == 0 || key->size > MAXKEYSIZE255) { | |||
| 2915 | errno(*__errno()) = EINVAL22; | |||
| 2916 | return BT_FAIL-1; | |||
| 2917 | } | |||
| 2918 | ||||
| 2919 | DPRINTF("==> put key %.*s, size %zu, data size %zu", | |||
| 2920 | (int)key->size, (char *)key->data, key->size, data->size); | |||
| 2921 | ||||
| 2922 | if (txn == NULL((void*)0)) { | |||
| 2923 | close_txn = 1; | |||
| 2924 | if ((txn = btree_txn_begin(bt, 0)) == NULL((void*)0)) | |||
| 2925 | return BT_FAIL-1; | |||
| 2926 | } | |||
| 2927 | ||||
| 2928 | rc = btree_search_page(bt, txn, key, NULL((void*)0), 1, &mp); | |||
| 2929 | if (rc == BT_SUCCESS0) { | |||
| 2930 | leaf = btree_search_node(bt, mp, key, &exact, &ki); | |||
| 2931 | if (leaf && exact) { | |||
| 2932 | if (F_ISSET(flags, BT_NOOVERWRITE)(((flags) & (1)) == (1))) { | |||
| 2933 | DPRINTF("duplicate key %.*s", | |||
| 2934 | (int)key->size, (char *)key->data); | |||
| 2935 | errno(*__errno()) = EEXIST17; | |||
| 2936 | rc = BT_FAIL-1; | |||
| 2937 | goto done; | |||
| 2938 | } | |||
| 2939 | btree_del_node(bt, mp, ki); | |||
| 2940 | } | |||
| 2941 | if (leaf == NULL((void*)0)) { /* append if not found */ | |||
| 2942 | ki = NUMKEYS(mp)(((mp)->page->b.fb.fb_lower - __builtin_offsetof(struct page, ptrs)) >> 1); | |||
| 2943 | DPRINTF("appending key at index %d", ki); | |||
| 2944 | } | |||
| 2945 | } else if (errno(*__errno()) == ENOENT2) { | |||
| 2946 | /* new file, just write a root leaf page */ | |||
| 2947 | DPRINTF("allocating new root leaf page"); | |||
| 2948 | if ((mp = btree_new_page(bt, P_LEAF0x02)) == NULL((void*)0)) { | |||
| 2949 | rc = BT_FAIL-1; | |||
| 2950 | goto done; | |||
| 2951 | } | |||
| 2952 | txn->root = mp->pgno; | |||
| 2953 | bt->meta.depth++; | |||
| 2954 | ki = 0; | |||
| 2955 | } | |||
| 2956 | else | |||
| 2957 | goto done; | |||
| 2958 | ||||
| 2959 | assert(IS_LEAF(mp))((((((mp)->page->flags) & (0x02)) == (0x02))) ? (void )0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c", 2959, __func__ , "IS_LEAF(mp)")); | |||
| 2960 | DPRINTF("there are %lu keys, should insert new key at index %u", | |||
| 2961 | NUMKEYS(mp), ki); | |||
| 2962 | ||||
| 2963 | /* Copy the key pointer as it is modified by the prefix code. The | |||
| 2964 | * caller might have malloc'ed the data. | |||
| 2965 | */ | |||
| 2966 | xkey.data = key->data; | |||
| 2967 | xkey.size = key->size; | |||
| 2968 | ||||
| 2969 | if (SIZELEFT(mp)(indx_t)((mp)->page->b.fb.fb_upper - (mp)->page-> b.fb.fb_lower) < bt_leaf_size(bt, key, data)) { | |||
| 2970 | rc = btree_split(bt, &mp, &ki, &xkey, data, P_INVALID0xFFFFFFFF); | |||
| 2971 | } else { | |||
| 2972 | /* There is room already in this leaf page. */ | |||
| 2973 | remove_prefix(bt, &xkey, mp->prefix.len); | |||
| 2974 | rc = btree_add_node(bt, mp, ki, &xkey, data, 0, 0); | |||
| 2975 | } | |||
| 2976 | ||||
| 2977 | if (rc != BT_SUCCESS0) | |||
| 2978 | txn->flags |= BT_TXN_ERROR0x02; | |||
| 2979 | else | |||
| 2980 | bt->meta.entries++; | |||
| 2981 | ||||
| 2982 | done: | |||
| 2983 | if (close_txn) { | |||
| 2984 | if (rc == BT_SUCCESS0) | |||
| 2985 | rc = btree_txn_commit(txn); | |||
| 2986 | else | |||
| 2987 | btree_txn_abort(txn); | |||
| 2988 | } | |||
| 2989 | mpage_prune(bt); | |||
| 2990 | return rc; | |||
| 2991 | } | |||
| 2992 | ||||
| 2993 | static pgno_t | |||
| 2994 | btree_compact_tree(struct btree *bt, pgno_t pgno, struct btree *btc) | |||
| 2995 | { | |||
| 2996 | ssize_t rc; | |||
| 2997 | indx_t i; | |||
| 2998 | pgno_t *pnext, next; | |||
| 2999 | struct node *node; | |||
| 3000 | struct page *p; | |||
| 3001 | struct mpage *mp; | |||
| 3002 | ||||
| 3003 | /* Get the page and make a copy of it. | |||
| 3004 | */ | |||
| 3005 | if ((mp = btree_get_mpage(bt, pgno)) == NULL((void*)0)) | |||
| 3006 | return P_INVALID0xFFFFFFFF; | |||
| 3007 | if ((p = malloc(bt->head.psize)) == NULL((void*)0)) | |||
| 3008 | return P_INVALID0xFFFFFFFF; | |||
| 3009 | bcopy(mp->page, p, bt->head.psize); | |||
| 3010 | ||||
| 3011 | /* Go through all nodes in the (copied) page and update the | |||
| 3012 | * page pointers. | |||
| 3013 | */ | |||
| 3014 | if (F_ISSET(p->flags, P_BRANCH)(((p->flags) & (0x01)) == (0x01))) { | |||
| 3015 | for (i = 0; i < NUMKEYSP(p)(((p)->b.fb.fb_lower - __builtin_offsetof(struct page, ptrs )) >> 1); i++) { | |||
| 3016 | node = NODEPTRP(p, i)((struct node *)((char *)(p) + (p)->ptrs[i])); | |||
| 3017 | node->n_pgnop.np_pgno = btree_compact_tree(bt, node->n_pgnop.np_pgno, btc); | |||
| 3018 | if (node->n_pgnop.np_pgno == P_INVALID0xFFFFFFFF) { | |||
| 3019 | free(p); | |||
| 3020 | return P_INVALID0xFFFFFFFF; | |||
| 3021 | } | |||
| 3022 | } | |||
| 3023 | } else if (F_ISSET(p->flags, P_LEAF)(((p->flags) & (0x02)) == (0x02))) { | |||
| 3024 | for (i = 0; i < NUMKEYSP(p)(((p)->b.fb.fb_lower - __builtin_offsetof(struct page, ptrs )) >> 1); i++) { | |||
| 3025 | node = NODEPTRP(p, i)((struct node *)((char *)(p) + (p)->ptrs[i])); | |||
| 3026 | if (F_ISSET(node->flags, F_BIGDATA)(((node->flags) & (0x01)) == (0x01))) { | |||
| 3027 | bcopy(NODEDATA(node)(void *)((char *)(node)->data + (node)->ksize), &next, sizeof(next)); | |||
| 3028 | next = btree_compact_tree(bt, next, btc); | |||
| 3029 | if (next == P_INVALID0xFFFFFFFF) { | |||
| 3030 | free(p); | |||
| 3031 | return P_INVALID0xFFFFFFFF; | |||
| 3032 | } | |||
| 3033 | bcopy(&next, NODEDATA(node)(void *)((char *)(node)->data + (node)->ksize), sizeof(next)); | |||
| 3034 | } | |||
| 3035 | } | |||
| 3036 | } else if (F_ISSET(p->flags, P_OVERFLOW)(((p->flags) & (0x04)) == (0x04))) { | |||
| 3037 | pnext = &p->p_next_pgnob.pb_next_pgno; | |||
| 3038 | if (*pnext > 0) { | |||
| 3039 | *pnext = btree_compact_tree(bt, *pnext, btc); | |||
| 3040 | if (*pnext == P_INVALID0xFFFFFFFF) { | |||
| 3041 | free(p); | |||
| 3042 | return P_INVALID0xFFFFFFFF; | |||
| 3043 | } | |||
| 3044 | } | |||
| 3045 | } else | |||
| 3046 | assert(0)((0) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c", 3046, __func__, "0")); | |||
| 3047 | ||||
| 3048 | pgno = p->pgno = btc->txn->next_pgno++; | |||
| 3049 | rc = write(btc->fd, p, bt->head.psize); | |||
| 3050 | free(p); | |||
| 3051 | if (rc != (ssize_t)bt->head.psize) | |||
| 3052 | return P_INVALID0xFFFFFFFF; | |||
| 3053 | mpage_prune(bt); | |||
| 3054 | return pgno; | |||
| 3055 | } | |||
| 3056 | ||||
| 3057 | int | |||
| 3058 | btree_compact(struct btree *bt) | |||
| 3059 | { | |||
| 3060 | char *compact_path = NULL((void*)0); | |||
| 3061 | struct btree *btc; | |||
| 3062 | struct btree_txn *txn, *txnc = NULL((void*)0); | |||
| 3063 | int fd; | |||
| 3064 | pgno_t root; | |||
| 3065 | ||||
| 3066 | assert(bt != NULL)((bt != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 3066, __func__, "bt != NULL")); | |||
| 3067 | ||||
| 3068 | DPRINTF("compacting btree %p with path %s", bt, bt->path); | |||
| 3069 | ||||
| 3070 | if (bt->path == NULL((void*)0)) { | |||
| 3071 | errno(*__errno()) = EINVAL22; | |||
| 3072 | return BT_FAIL-1; | |||
| 3073 | } | |||
| 3074 | ||||
| 3075 | if ((txn = btree_txn_begin(bt, 0)) == NULL((void*)0)) | |||
| 3076 | return BT_FAIL-1; | |||
| 3077 | ||||
| 3078 | if (asprintf(&compact_path, "%s.compact.XXXXXX", bt->path) == -1) { | |||
| 3079 | btree_txn_abort(txn); | |||
| 3080 | return BT_FAIL-1; | |||
| 3081 | } | |||
| 3082 | fd = mkstemp(compact_path); | |||
| 3083 | if (fd == -1) { | |||
| 3084 | free(compact_path); | |||
| 3085 | btree_txn_abort(txn); | |||
| 3086 | return BT_FAIL-1; | |||
| 3087 | } | |||
| 3088 | ||||
| 3089 | if ((btc = btree_open_fd(fd, 0)) == NULL((void*)0)) | |||
| 3090 | goto failed; | |||
| 3091 | bcopy(&bt->meta, &btc->meta, sizeof(bt->meta)); | |||
| 3092 | btc->meta.revisions = 0; | |||
| 3093 | ||||
| 3094 | if ((txnc = btree_txn_begin(btc, 0)) == NULL((void*)0)) | |||
| 3095 | goto failed; | |||
| 3096 | ||||
| 3097 | if (bt->meta.root != P_INVALID0xFFFFFFFF) { | |||
| 3098 | root = btree_compact_tree(bt, bt->meta.root, btc); | |||
| 3099 | if (root == P_INVALID0xFFFFFFFF) | |||
| 3100 | goto failed; | |||
| 3101 | if (btree_write_meta(btc, root, 0) != BT_SUCCESS0) | |||
| 3102 | goto failed; | |||
| 3103 | } | |||
| 3104 | ||||
| 3105 | fsync(fd); | |||
| 3106 | ||||
| 3107 | DPRINTF("renaming %s to %s", compact_path, bt->path); | |||
| 3108 | if (rename(compact_path, bt->path) != 0) | |||
| 3109 | goto failed; | |||
| 3110 | ||||
| 3111 | /* Write a "tombstone" meta page so other processes can pick up | |||
| 3112 | * the change and re-open the file. | |||
| 3113 | */ | |||
| 3114 | if (btree_write_meta(bt, P_INVALID0xFFFFFFFF, BT_TOMBSTONE0x01) != BT_SUCCESS0) | |||
| 3115 | goto failed; | |||
| 3116 | ||||
| 3117 | btree_txn_abort(txn); | |||
| 3118 | btree_txn_abort(txnc); | |||
| 3119 | free(compact_path); | |||
| 3120 | btree_close(btc); | |||
| 3121 | mpage_prune(bt); | |||
| 3122 | return 0; | |||
| 3123 | ||||
| 3124 | failed: | |||
| 3125 | btree_txn_abort(txn); | |||
| 3126 | btree_txn_abort(txnc); | |||
| 3127 | unlink(compact_path); | |||
| 3128 | free(compact_path); | |||
| 3129 | btree_close(btc); | |||
| 3130 | mpage_prune(bt); | |||
| 3131 | return BT_FAIL-1; | |||
| 3132 | } | |||
| 3133 | ||||
| 3134 | /* Reverts the last change. Truncates the file at the last root page. | |||
| 3135 | */ | |||
| 3136 | int | |||
| 3137 | btree_revert(struct btree *bt) | |||
| 3138 | { | |||
| 3139 | if (btree_read_meta(bt, NULL((void*)0)) != 0) | |||
| 3140 | return -1; | |||
| 3141 | ||||
| 3142 | DPRINTF("truncating file at page %u", bt->meta.root); | |||
| 3143 | return ftruncate(bt->fd, bt->head.psize * bt->meta.root); | |||
| 3144 | } | |||
| 3145 | ||||
| 3146 | void | |||
| 3147 | btree_set_cache_size(struct btree *bt, unsigned int cache_size) | |||
| 3148 | { | |||
| 3149 | bt->stat.max_cache = cache_size; | |||
| 3150 | } | |||
| 3151 | ||||
| 3152 | unsigned int | |||
| 3153 | btree_get_flags(struct btree *bt) | |||
| 3154 | { | |||
| 3155 | return (bt->flags & ~BT_FIXPADDING0x01); | |||
| 3156 | } | |||
| 3157 | ||||
| 3158 | const char * | |||
| 3159 | btree_get_path(struct btree *bt) | |||
| 3160 | { | |||
| 3161 | return bt->path; | |||
| 3162 | } | |||
| 3163 | ||||
| 3164 | const struct btree_stat * | |||
| 3165 | btree_stat(struct btree *bt) | |||
| 3166 | { | |||
| 3167 | if (bt == NULL((void*)0)) | |||
| 3168 | return NULL((void*)0); | |||
| 3169 | ||||
| 3170 | bt->stat.branch_pages = bt->meta.branch_pages; | |||
| 3171 | bt->stat.leaf_pages = bt->meta.leaf_pages; | |||
| 3172 | bt->stat.overflow_pages = bt->meta.overflow_pages; | |||
| 3173 | bt->stat.revisions = bt->meta.revisions; | |||
| 3174 | bt->stat.depth = bt->meta.depth; | |||
| 3175 | bt->stat.entries = bt->meta.entries; | |||
| 3176 | bt->stat.psize = bt->head.psize; | |||
| 3177 | bt->stat.created_at = bt->meta.created_at; | |||
| 3178 | ||||
| 3179 | return &bt->stat; | |||
| 3180 | } | |||
| 3181 |