File: | src/usr.sbin/ldapd/btree.c |
Warning: | line 1963, column 13 Assigned value is garbage or undefined |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | /* $OpenBSD: btree.c,v 1.38 2017/05/26 21:23:14 sthen Exp $ */ | ||||
2 | |||||
3 | /* | ||||
4 | * Copyright (c) 2009, 2010 Martin Hedenfalk <martin@bzero.se> | ||||
5 | * | ||||
6 | * Permission to use, copy, modify, and distribute this software for any | ||||
7 | * purpose with or without fee is hereby granted, provided that the above | ||||
8 | * copyright notice and this permission notice appear in all copies. | ||||
9 | * | ||||
10 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | ||||
11 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | ||||
12 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | ||||
13 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | ||||
14 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | ||||
15 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | ||||
16 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | ||||
17 | */ | ||||
18 | |||||
19 | #include <sys/types.h> | ||||
20 | #include <sys/tree.h> | ||||
21 | #include <sys/stat.h> | ||||
22 | #include <sys/queue.h> | ||||
23 | #include <sys/uio.h> | ||||
24 | |||||
25 | #include <assert.h> | ||||
26 | #include <err.h> | ||||
27 | #include <errno(*__errno()).h> | ||||
28 | #include <fcntl.h> | ||||
29 | #include <stddef.h> | ||||
30 | #include <stdint.h> | ||||
31 | #include <stdio.h> | ||||
32 | #include <stdlib.h> | ||||
33 | #include <string.h> | ||||
34 | #include <time.h> | ||||
35 | #include <unistd.h> | ||||
36 | |||||
37 | #include "btree.h" | ||||
38 | |||||
39 | /* #define DEBUG */ | ||||
40 | |||||
41 | #ifdef DEBUG | ||||
42 | # define DPRINTF(...) do { fprintf(stderr(&__sF[2]), "%s:%d: ", __func__, __LINE__42); \ | ||||
43 | fprintf(stderr(&__sF[2]), __VA_ARGS__); \ | ||||
44 | fprintf(stderr(&__sF[2]), "\n"); } while(0) | ||||
45 | #else | ||||
46 | # define DPRINTF(...) | ||||
47 | #endif | ||||
48 | |||||
49 | #define PAGESIZE4096 4096 | ||||
50 | #define BT_MINKEYS4 4 | ||||
51 | #define BT_MAGIC0xB3DBB3DB 0xB3DBB3DB | ||||
52 | #define BT_VERSION4 4 | ||||
53 | #define MAXKEYSIZE255 255 | ||||
54 | |||||
55 | #define P_INVALID0xFFFFFFFF 0xFFFFFFFF | ||||
56 | |||||
57 | #define F_ISSET(w, f)(((w) & (f)) == (f)) (((w) & (f)) == (f)) | ||||
58 | #define MINIMUM(a,b)((a) < (b) ? (a) : (b)) ((a) < (b) ? (a) : (b)) | ||||
59 | |||||
60 | typedef uint32_t pgno_t; | ||||
61 | typedef uint16_t indx_t; | ||||
62 | |||||
63 | /* There are four page types: meta, index, leaf and overflow. | ||||
64 | * They all share the same page header. | ||||
65 | */ | ||||
66 | struct page { /* represents an on-disk page */ | ||||
67 | pgno_t pgno; /* page number */ | ||||
68 | #define P_BRANCH0x01 0x01 /* branch page */ | ||||
69 | #define P_LEAF0x02 0x02 /* leaf page */ | ||||
70 | #define P_OVERFLOW0x04 0x04 /* overflow page */ | ||||
71 | #define P_META0x08 0x08 /* meta page */ | ||||
72 | #define P_HEAD0x10 0x10 /* header page */ | ||||
73 | uint32_t flags; | ||||
74 | #define lowerb.fb.fb_lower b.fb.fb_lower | ||||
75 | #define upperb.fb.fb_upper b.fb.fb_upper | ||||
76 | #define p_next_pgnob.pb_next_pgno b.pb_next_pgno | ||||
77 | union page_bounds { | ||||
78 | struct { | ||||
79 | indx_t fb_lower; /* lower bound of free space */ | ||||
80 | indx_t fb_upper; /* upper bound of free space */ | ||||
81 | } fb; | ||||
82 | pgno_t pb_next_pgno; /* overflow page linked list */ | ||||
83 | } b; | ||||
84 | indx_t ptrs[1]; /* dynamic size */ | ||||
85 | } __packed__attribute__((__packed__)); | ||||
86 | |||||
87 | #define PAGEHDRSZ__builtin_offsetof(struct page, ptrs) offsetof(struct page, ptrs)__builtin_offsetof(struct page, ptrs) | ||||
88 | |||||
89 | #define NUMKEYSP(p)(((p)->b.fb.fb_lower - __builtin_offsetof(struct page, ptrs )) >> 1) (((p)->lowerb.fb.fb_lower - PAGEHDRSZ__builtin_offsetof(struct page, ptrs)) >> 1) | ||||
90 | #define NUMKEYS(mp)(((mp)->page->b.fb.fb_lower - __builtin_offsetof(struct page, ptrs)) >> 1) (((mp)->page->lowerb.fb.fb_lower - PAGEHDRSZ__builtin_offsetof(struct page, ptrs)) >> 1) | ||||
91 | #define SIZELEFT(mp)(indx_t)((mp)->page->b.fb.fb_upper - (mp)->page-> b.fb.fb_lower) (indx_t)((mp)->page->upperb.fb.fb_upper - (mp)->page->lowerb.fb.fb_lower) | ||||
92 | #define PAGEFILL(bt, mp)(1000 * ((bt)->head.psize - __builtin_offsetof(struct page , ptrs) - (indx_t)((mp)->page->b.fb.fb_upper - (mp)-> page->b.fb.fb_lower)) / ((bt)->head.psize - __builtin_offsetof (struct page, ptrs))) (1000 * ((bt)->head.psize - PAGEHDRSZ__builtin_offsetof(struct page, ptrs) - SIZELEFT(mp)(indx_t)((mp)->page->b.fb.fb_upper - (mp)->page-> b.fb.fb_lower)) / \ | ||||
93 | ((bt)->head.psize - PAGEHDRSZ__builtin_offsetof(struct page, ptrs))) | ||||
94 | #define IS_LEAF(mp)((((mp)->page->flags) & (0x02)) == (0x02)) F_ISSET((mp)->page->flags, P_LEAF)((((mp)->page->flags) & (0x02)) == (0x02)) | ||||
95 | #define IS_BRANCH(mp)((((mp)->page->flags) & (0x01)) == (0x01)) F_ISSET((mp)->page->flags, P_BRANCH)((((mp)->page->flags) & (0x01)) == (0x01)) | ||||
96 | #define IS_OVERFLOW(mp)((((mp)->page->flags) & (0x04)) == (0x04)) F_ISSET((mp)->page->flags, P_OVERFLOW)((((mp)->page->flags) & (0x04)) == (0x04)) | ||||
97 | |||||
98 | struct bt_head { /* header page content */ | ||||
99 | uint32_t magic; | ||||
100 | uint32_t version; | ||||
101 | uint32_t flags; | ||||
102 | uint32_t psize; /* page size */ | ||||
103 | } __packed__attribute__((__packed__)); | ||||
104 | |||||
105 | struct bt_meta { /* meta (footer) page content */ | ||||
106 | #define BT_TOMBSTONE0x01 0x01 /* file is replaced */ | ||||
107 | uint32_t flags; | ||||
108 | pgno_t root; /* page number of root page */ | ||||
109 | pgno_t prev_meta; /* previous meta page number */ | ||||
110 | time_t created_at; | ||||
111 | uint32_t branch_pages; | ||||
112 | uint32_t leaf_pages; | ||||
113 | uint32_t overflow_pages; | ||||
114 | uint32_t revisions; | ||||
115 | uint32_t depth; | ||||
116 | uint64_t entries; | ||||
117 | unsigned char hash[SHA_DIGEST_LENGTH20]; | ||||
118 | } __packed__attribute__((__packed__)); | ||||
119 | |||||
120 | struct btkey { | ||||
121 | size_t len; | ||||
122 | char str[MAXKEYSIZE255]; | ||||
123 | }; | ||||
124 | |||||
125 | struct mpage { /* an in-memory cached page */ | ||||
126 | RB_ENTRY(mpage)struct { struct mpage *rbe_left; struct mpage *rbe_right; struct mpage *rbe_parent; int rbe_color; } entry; /* page cache entry */ | ||||
127 | SIMPLEQ_ENTRY(mpage)struct { struct mpage *sqe_next; } next; /* queue of dirty pages */ | ||||
128 | TAILQ_ENTRY(mpage)struct { struct mpage *tqe_next; struct mpage **tqe_prev; } lru_next; /* LRU queue */ | ||||
129 | struct mpage *parent; /* NULL if root */ | ||||
130 | unsigned int parent_index; /* keep track of node index */ | ||||
131 | struct btkey prefix; | ||||
132 | struct page *page; | ||||
133 | pgno_t pgno; /* copy of page->pgno */ | ||||
134 | short ref; /* increased by cursors */ | ||||
135 | short dirty; /* 1 if on dirty queue */ | ||||
136 | }; | ||||
137 | RB_HEAD(page_cache, mpage)struct page_cache { struct mpage *rbh_root; }; | ||||
138 | SIMPLEQ_HEAD(dirty_queue, mpage)struct dirty_queue { struct mpage *sqh_first; struct mpage ** sqh_last; }; | ||||
139 | TAILQ_HEAD(lru_queue, mpage)struct lru_queue { struct mpage *tqh_first; struct mpage **tqh_last ; }; | ||||
140 | |||||
141 | static int mpage_cmp(struct mpage *a, struct mpage *b); | ||||
142 | static struct mpage *mpage_lookup(struct btree *bt, pgno_t pgno); | ||||
143 | static void mpage_add(struct btree *bt, struct mpage *mp); | ||||
144 | static void mpage_free(struct mpage *mp); | ||||
145 | static void mpage_del(struct btree *bt, struct mpage *mp); | ||||
146 | static void mpage_flush(struct btree *bt); | ||||
147 | static struct mpage *mpage_copy(struct btree *bt, struct mpage *mp); | ||||
148 | static void mpage_prune(struct btree *bt); | ||||
149 | static void mpage_dirty(struct btree *bt, struct mpage *mp); | ||||
150 | static struct mpage *mpage_touch(struct btree *bt, struct mpage *mp); | ||||
151 | |||||
152 | RB_PROTOTYPE(page_cache, mpage, entry, mpage_cmp)void page_cache_RB_INSERT_COLOR(struct page_cache *, struct mpage *); void page_cache_RB_REMOVE_COLOR(struct page_cache *, struct mpage *, struct mpage *); struct mpage *page_cache_RB_REMOVE (struct page_cache *, struct mpage *); struct mpage *page_cache_RB_INSERT (struct page_cache *, struct mpage *); struct mpage *page_cache_RB_FIND (struct page_cache *, struct mpage *); struct mpage *page_cache_RB_NFIND (struct page_cache *, struct mpage *); struct mpage *page_cache_RB_NEXT (struct mpage *); struct mpage *page_cache_RB_PREV(struct mpage *); struct mpage *page_cache_RB_MINMAX(struct page_cache *, int );; | ||||
153 | RB_GENERATE(page_cache, mpage, entry, mpage_cmp)void page_cache_RB_INSERT_COLOR(struct page_cache *head, struct mpage *elm) { struct mpage *parent, *gparent, *tmp; while (( parent = (elm)->entry.rbe_parent) && (parent)-> entry.rbe_color == 1) { gparent = (parent)->entry.rbe_parent ; if (parent == (gparent)->entry.rbe_left) { tmp = (gparent )->entry.rbe_right; if (tmp && (tmp)->entry.rbe_color == 1) { (tmp)->entry.rbe_color = 0; do { (parent)->entry .rbe_color = 0; (gparent)->entry.rbe_color = 1; } while (0 ); elm = gparent; continue; } if ((parent)->entry.rbe_right == elm) { do { (tmp) = (parent)->entry.rbe_right; if (((parent )->entry.rbe_right = (tmp)->entry.rbe_left)) { ((tmp)-> entry.rbe_left)->entry.rbe_parent = (parent); } do {} while (0); if (((tmp)->entry.rbe_parent = (parent)->entry.rbe_parent )) { if ((parent) == ((parent)->entry.rbe_parent)->entry .rbe_left) ((parent)->entry.rbe_parent)->entry.rbe_left = (tmp); else ((parent)->entry.rbe_parent)->entry.rbe_right = (tmp); } else (head)->rbh_root = (tmp); (tmp)->entry .rbe_left = (parent); (parent)->entry.rbe_parent = (tmp); do {} while (0); if (((tmp)->entry.rbe_parent)) do {} while ( 0); } while (0); tmp = parent; parent = elm; elm = tmp; } do { (parent)->entry.rbe_color = 0; (gparent)->entry.rbe_color = 1; } while (0); do { (tmp) = (gparent)->entry.rbe_left; if (((gparent)->entry.rbe_left = (tmp)->entry.rbe_right )) { ((tmp)->entry.rbe_right)->entry.rbe_parent = (gparent ); } do {} while (0); if (((tmp)->entry.rbe_parent = (gparent )->entry.rbe_parent)) { if ((gparent) == ((gparent)->entry .rbe_parent)->entry.rbe_left) ((gparent)->entry.rbe_parent )->entry.rbe_left = (tmp); else ((gparent)->entry.rbe_parent )->entry.rbe_right = (tmp); } else (head)->rbh_root = ( tmp); (tmp)->entry.rbe_right = (gparent); (gparent)->entry .rbe_parent = (tmp); do {} while (0); if (((tmp)->entry.rbe_parent )) do {} while (0); } while (0); } else { tmp = (gparent)-> entry.rbe_left; if (tmp && (tmp)->entry.rbe_color == 1) { (tmp)->entry.rbe_color = 0; do { (parent)->entry. rbe_color = 0; (gparent)->entry.rbe_color = 1; } while (0) ; elm = gparent; continue; } if ((parent)->entry.rbe_left == elm) { do { (tmp) = (parent)->entry.rbe_left; if (((parent )->entry.rbe_left = (tmp)->entry.rbe_right)) { ((tmp)-> entry.rbe_right)->entry.rbe_parent = (parent); } do {} while (0); if (((tmp)->entry.rbe_parent = (parent)->entry.rbe_parent )) { if ((parent) == ((parent)->entry.rbe_parent)->entry .rbe_left) ((parent)->entry.rbe_parent)->entry.rbe_left = (tmp); else ((parent)->entry.rbe_parent)->entry.rbe_right = (tmp); } else (head)->rbh_root = (tmp); (tmp)->entry .rbe_right = (parent); (parent)->entry.rbe_parent = (tmp); do {} while (0); if (((tmp)->entry.rbe_parent)) do {} while (0); } while (0); tmp = parent; parent = elm; elm = tmp; } do { (parent)->entry.rbe_color = 0; (gparent)->entry.rbe_color = 1; } while (0); do { (tmp) = (gparent)->entry.rbe_right ; if (((gparent)->entry.rbe_right = (tmp)->entry.rbe_left )) { ((tmp)->entry.rbe_left)->entry.rbe_parent = (gparent ); } do {} while (0); if (((tmp)->entry.rbe_parent = (gparent )->entry.rbe_parent)) { if ((gparent) == ((gparent)->entry .rbe_parent)->entry.rbe_left) ((gparent)->entry.rbe_parent )->entry.rbe_left = (tmp); else ((gparent)->entry.rbe_parent )->entry.rbe_right = (tmp); } else (head)->rbh_root = ( tmp); (tmp)->entry.rbe_left = (gparent); (gparent)->entry .rbe_parent = (tmp); do {} while (0); if (((tmp)->entry.rbe_parent )) do {} while (0); } while (0); } } (head->rbh_root)-> entry.rbe_color = 0; } void page_cache_RB_REMOVE_COLOR(struct page_cache *head, struct mpage *parent, struct mpage *elm) { struct mpage *tmp; while ((elm == ((void*)0) || (elm)->entry .rbe_color == 0) && elm != (head)->rbh_root) { if ( (parent)->entry.rbe_left == elm) { tmp = (parent)->entry .rbe_right; if ((tmp)->entry.rbe_color == 1) { do { (tmp)-> entry.rbe_color = 0; (parent)->entry.rbe_color = 1; } while (0); do { (tmp) = (parent)->entry.rbe_right; if (((parent )->entry.rbe_right = (tmp)->entry.rbe_left)) { ((tmp)-> entry.rbe_left)->entry.rbe_parent = (parent); } do {} while (0); if (((tmp)->entry.rbe_parent = (parent)->entry.rbe_parent )) { if ((parent) == ((parent)->entry.rbe_parent)->entry .rbe_left) ((parent)->entry.rbe_parent)->entry.rbe_left = (tmp); else ((parent)->entry.rbe_parent)->entry.rbe_right = (tmp); } else (head)->rbh_root = (tmp); (tmp)->entry .rbe_left = (parent); (parent)->entry.rbe_parent = (tmp); do {} while (0); if (((tmp)->entry.rbe_parent)) do {} while ( 0); } while (0); tmp = (parent)->entry.rbe_right; } if ((( tmp)->entry.rbe_left == ((void*)0) || ((tmp)->entry.rbe_left )->entry.rbe_color == 0) && ((tmp)->entry.rbe_right == ((void*)0) || ((tmp)->entry.rbe_right)->entry.rbe_color == 0)) { (tmp)->entry.rbe_color = 1; elm = parent; parent = (elm)->entry.rbe_parent; } else { if ((tmp)->entry.rbe_right == ((void*)0) || ((tmp)->entry.rbe_right)->entry.rbe_color == 0) { struct mpage *oleft; if ((oleft = (tmp)->entry.rbe_left )) (oleft)->entry.rbe_color = 0; (tmp)->entry.rbe_color = 1; do { (oleft) = (tmp)->entry.rbe_left; if (((tmp)-> entry.rbe_left = (oleft)->entry.rbe_right)) { ((oleft)-> entry.rbe_right)->entry.rbe_parent = (tmp); } do {} while ( 0); if (((oleft)->entry.rbe_parent = (tmp)->entry.rbe_parent )) { if ((tmp) == ((tmp)->entry.rbe_parent)->entry.rbe_left ) ((tmp)->entry.rbe_parent)->entry.rbe_left = (oleft); else ((tmp)->entry.rbe_parent)->entry.rbe_right = (oleft); } else (head)->rbh_root = (oleft); (oleft)->entry.rbe_right = (tmp); (tmp)->entry.rbe_parent = (oleft); do {} while ( 0); if (((oleft)->entry.rbe_parent)) do {} while (0); } while (0); tmp = (parent)->entry.rbe_right; } (tmp)->entry.rbe_color = (parent)->entry.rbe_color; (parent)->entry.rbe_color = 0; if ((tmp)->entry.rbe_right) ((tmp)->entry.rbe_right )->entry.rbe_color = 0; do { (tmp) = (parent)->entry.rbe_right ; if (((parent)->entry.rbe_right = (tmp)->entry.rbe_left )) { ((tmp)->entry.rbe_left)->entry.rbe_parent = (parent ); } do {} while (0); if (((tmp)->entry.rbe_parent = (parent )->entry.rbe_parent)) { if ((parent) == ((parent)->entry .rbe_parent)->entry.rbe_left) ((parent)->entry.rbe_parent )->entry.rbe_left = (tmp); else ((parent)->entry.rbe_parent )->entry.rbe_right = (tmp); } else (head)->rbh_root = ( tmp); (tmp)->entry.rbe_left = (parent); (parent)->entry .rbe_parent = (tmp); do {} while (0); if (((tmp)->entry.rbe_parent )) do {} while (0); } while (0); elm = (head)->rbh_root; break ; } } else { tmp = (parent)->entry.rbe_left; if ((tmp)-> entry.rbe_color == 1) { do { (tmp)->entry.rbe_color = 0; ( parent)->entry.rbe_color = 1; } while (0); do { (tmp) = (parent )->entry.rbe_left; if (((parent)->entry.rbe_left = (tmp )->entry.rbe_right)) { ((tmp)->entry.rbe_right)->entry .rbe_parent = (parent); } do {} while (0); if (((tmp)->entry .rbe_parent = (parent)->entry.rbe_parent)) { if ((parent) == ((parent)->entry.rbe_parent)->entry.rbe_left) ((parent )->entry.rbe_parent)->entry.rbe_left = (tmp); else ((parent )->entry.rbe_parent)->entry.rbe_right = (tmp); } else ( head)->rbh_root = (tmp); (tmp)->entry.rbe_right = (parent ); (parent)->entry.rbe_parent = (tmp); do {} while (0); if (((tmp)->entry.rbe_parent)) do {} while (0); } while (0); tmp = (parent)->entry.rbe_left; } if (((tmp)->entry.rbe_left == ((void*)0) || ((tmp)->entry.rbe_left)->entry.rbe_color == 0) && ((tmp)->entry.rbe_right == ((void*)0) || ((tmp)->entry.rbe_right)->entry.rbe_color == 0)) { (tmp )->entry.rbe_color = 1; elm = parent; parent = (elm)->entry .rbe_parent; } else { if ((tmp)->entry.rbe_left == ((void* )0) || ((tmp)->entry.rbe_left)->entry.rbe_color == 0) { struct mpage *oright; if ((oright = (tmp)->entry.rbe_right )) (oright)->entry.rbe_color = 0; (tmp)->entry.rbe_color = 1; do { (oright) = (tmp)->entry.rbe_right; if (((tmp)-> entry.rbe_right = (oright)->entry.rbe_left)) { ((oright)-> entry.rbe_left)->entry.rbe_parent = (tmp); } do {} while ( 0); if (((oright)->entry.rbe_parent = (tmp)->entry.rbe_parent )) { if ((tmp) == ((tmp)->entry.rbe_parent)->entry.rbe_left ) ((tmp)->entry.rbe_parent)->entry.rbe_left = (oright); else ((tmp)->entry.rbe_parent)->entry.rbe_right = (oright ); } else (head)->rbh_root = (oright); (oright)->entry. rbe_left = (tmp); (tmp)->entry.rbe_parent = (oright); do { } while (0); if (((oright)->entry.rbe_parent)) do {} while (0); } while (0); tmp = (parent)->entry.rbe_left; } (tmp) ->entry.rbe_color = (parent)->entry.rbe_color; (parent) ->entry.rbe_color = 0; if ((tmp)->entry.rbe_left) ((tmp )->entry.rbe_left)->entry.rbe_color = 0; do { (tmp) = ( parent)->entry.rbe_left; if (((parent)->entry.rbe_left = (tmp)->entry.rbe_right)) { ((tmp)->entry.rbe_right)-> entry.rbe_parent = (parent); } do {} while (0); if (((tmp)-> entry.rbe_parent = (parent)->entry.rbe_parent)) { if ((parent ) == ((parent)->entry.rbe_parent)->entry.rbe_left) ((parent )->entry.rbe_parent)->entry.rbe_left = (tmp); else ((parent )->entry.rbe_parent)->entry.rbe_right = (tmp); } else ( head)->rbh_root = (tmp); (tmp)->entry.rbe_right = (parent ); (parent)->entry.rbe_parent = (tmp); do {} while (0); if (((tmp)->entry.rbe_parent)) do {} while (0); } while (0); elm = (head)->rbh_root; break; } } } if (elm) (elm)->entry .rbe_color = 0; } struct mpage * page_cache_RB_REMOVE(struct page_cache *head, struct mpage *elm) { struct mpage *child, *parent, *old = elm; int color; if ((elm)->entry.rbe_left == ((void*)0) ) child = (elm)->entry.rbe_right; else if ((elm)->entry .rbe_right == ((void*)0)) child = (elm)->entry.rbe_left; else { struct mpage *left; elm = (elm)->entry.rbe_right; while ((left = (elm)->entry.rbe_left)) elm = left; child = (elm )->entry.rbe_right; parent = (elm)->entry.rbe_parent; color = (elm)->entry.rbe_color; if (child) (child)->entry.rbe_parent = parent; if (parent) { if ((parent)->entry.rbe_left == elm ) (parent)->entry.rbe_left = child; else (parent)->entry .rbe_right = child; do {} while (0); } else (head)->rbh_root = child; if ((elm)->entry.rbe_parent == old) parent = elm ; (elm)->entry = (old)->entry; if ((old)->entry.rbe_parent ) { if (((old)->entry.rbe_parent)->entry.rbe_left == old ) ((old)->entry.rbe_parent)->entry.rbe_left = elm; else ((old)->entry.rbe_parent)->entry.rbe_right = elm; do { } while (0); } else (head)->rbh_root = elm; ((old)->entry .rbe_left)->entry.rbe_parent = elm; if ((old)->entry.rbe_right ) ((old)->entry.rbe_right)->entry.rbe_parent = elm; if ( parent) { left = parent; do { do {} while (0); } while ((left = (left)->entry.rbe_parent)); } goto color; } parent = (elm )->entry.rbe_parent; color = (elm)->entry.rbe_color; if (child) (child)->entry.rbe_parent = parent; if (parent) { if ((parent)->entry.rbe_left == elm) (parent)->entry.rbe_left = child; else (parent)->entry.rbe_right = child; do {} while (0); } else (head)->rbh_root = child; color: if (color == 0) page_cache_RB_REMOVE_COLOR(head, parent, child); return ( old); } struct mpage * page_cache_RB_INSERT(struct page_cache *head, struct mpage *elm) { struct mpage *tmp; struct mpage * parent = ((void*)0); int comp = 0; tmp = (head)->rbh_root; while (tmp) { parent = tmp; comp = (mpage_cmp)(elm, parent); if (comp < 0) tmp = (tmp)->entry.rbe_left; else if (comp > 0) tmp = (tmp)->entry.rbe_right; else return (tmp); } do { (elm)->entry.rbe_parent = parent; (elm)->entry.rbe_left = (elm)->entry.rbe_right = ((void*)0); (elm)->entry.rbe_color = 1; } while (0); if (parent != ((void*)0)) { if (comp < 0 ) (parent)->entry.rbe_left = elm; else (parent)->entry. rbe_right = elm; do {} while (0); } else (head)->rbh_root = elm; page_cache_RB_INSERT_COLOR(head, elm); return (((void*) 0)); } struct mpage * page_cache_RB_FIND(struct page_cache *head , struct mpage *elm) { struct mpage *tmp = (head)->rbh_root ; int comp; while (tmp) { comp = mpage_cmp(elm, tmp); if (comp < 0) tmp = (tmp)->entry.rbe_left; else if (comp > 0 ) tmp = (tmp)->entry.rbe_right; else return (tmp); } return (((void*)0)); } struct mpage * page_cache_RB_NFIND(struct page_cache *head, struct mpage *elm) { struct mpage *tmp = (head)->rbh_root ; struct mpage *res = ((void*)0); int comp; while (tmp) { comp = mpage_cmp(elm, tmp); if (comp < 0) { res = tmp; tmp = ( tmp)->entry.rbe_left; } else if (comp > 0) tmp = (tmp)-> entry.rbe_right; else return (tmp); } return (res); } struct mpage * page_cache_RB_NEXT(struct mpage *elm) { if ((elm)->entry .rbe_right) { elm = (elm)->entry.rbe_right; while ((elm)-> entry.rbe_left) elm = (elm)->entry.rbe_left; } else { if ( (elm)->entry.rbe_parent && (elm == ((elm)->entry .rbe_parent)->entry.rbe_left)) elm = (elm)->entry.rbe_parent ; else { while ((elm)->entry.rbe_parent && (elm == ((elm)->entry.rbe_parent)->entry.rbe_right)) elm = (elm )->entry.rbe_parent; elm = (elm)->entry.rbe_parent; } } return (elm); } struct mpage * page_cache_RB_PREV(struct mpage *elm) { if ((elm)->entry.rbe_left) { elm = (elm)->entry .rbe_left; while ((elm)->entry.rbe_right) elm = (elm)-> entry.rbe_right; } else { if ((elm)->entry.rbe_parent && (elm == ((elm)->entry.rbe_parent)->entry.rbe_right)) elm = (elm)->entry.rbe_parent; else { while ((elm)->entry. rbe_parent && (elm == ((elm)->entry.rbe_parent)-> entry.rbe_left)) elm = (elm)->entry.rbe_parent; elm = (elm )->entry.rbe_parent; } } return (elm); } struct mpage * page_cache_RB_MINMAX (struct page_cache *head, int val) { struct mpage *tmp = (head )->rbh_root; struct mpage *parent = ((void*)0); while (tmp ) { parent = tmp; if (val < 0) tmp = (tmp)->entry.rbe_left ; else tmp = (tmp)->entry.rbe_right; } return (parent); }; | ||||
154 | |||||
155 | struct ppage { /* ordered list of pages */ | ||||
156 | SLIST_ENTRY(ppage)struct { struct ppage *sle_next; } entry; | ||||
157 | struct mpage *mpage; | ||||
158 | unsigned int ki; /* cursor index on page */ | ||||
159 | }; | ||||
160 | SLIST_HEAD(page_stack, ppage)struct page_stack { struct ppage *slh_first; }; | ||||
161 | |||||
162 | #define CURSOR_EMPTY(c)(((&(c)->stack)->slh_first) == ((void*)0)) SLIST_EMPTY(&(c)->stack)(((&(c)->stack)->slh_first) == ((void*)0)) | ||||
163 | #define CURSOR_TOP(c)((&(c)->stack)->slh_first) SLIST_FIRST(&(c)->stack)((&(c)->stack)->slh_first) | ||||
164 | #define CURSOR_POP(c)do { (&(c)->stack)->slh_first = (&(c)->stack )->slh_first->entry.sle_next; } while (0) SLIST_REMOVE_HEAD(&(c)->stack, entry)do { (&(c)->stack)->slh_first = (&(c)->stack )->slh_first->entry.sle_next; } while (0) | ||||
165 | #define CURSOR_PUSH(c,p)do { (p)->entry.sle_next = (&(c)->stack)->slh_first ; (&(c)->stack)->slh_first = (p); } while (0) SLIST_INSERT_HEAD(&(c)->stack, p, entry)do { (p)->entry.sle_next = (&(c)->stack)->slh_first ; (&(c)->stack)->slh_first = (p); } while (0) | ||||
166 | |||||
167 | struct cursor { | ||||
168 | struct btree *bt; | ||||
169 | struct btree_txn *txn; | ||||
170 | struct page_stack stack; /* stack of parent pages */ | ||||
171 | short initialized; /* 1 if initialized */ | ||||
172 | short eof; /* 1 if end is reached */ | ||||
173 | }; | ||||
174 | |||||
175 | #define METAHASHLEN__builtin_offsetof(struct bt_meta, hash) offsetof(struct bt_meta, hash)__builtin_offsetof(struct bt_meta, hash) | ||||
176 | #define METADATA(p)((void *)((char *)p + __builtin_offsetof(struct page, ptrs))) ((void *)((char *)p + PAGEHDRSZ__builtin_offsetof(struct page, ptrs))) | ||||
177 | |||||
178 | struct node { | ||||
179 | #define n_pgnop.np_pgno p.np_pgno | ||||
180 | #define n_dsizep.np_dsize p.np_dsize | ||||
181 | union { | ||||
182 | pgno_t np_pgno; /* child page number */ | ||||
183 | uint32_t np_dsize; /* leaf data size */ | ||||
184 | } p; | ||||
185 | uint16_t ksize; /* key size */ | ||||
186 | #define F_BIGDATA0x01 0x01 /* data put on overflow page */ | ||||
187 | uint8_t flags; | ||||
188 | char data[1]; | ||||
189 | } __packed__attribute__((__packed__)); | ||||
190 | |||||
191 | struct btree_txn { | ||||
192 | pgno_t root; /* current / new root page */ | ||||
193 | pgno_t next_pgno; /* next unallocated page */ | ||||
194 | struct btree *bt; /* btree is ref'd */ | ||||
195 | struct dirty_queue *dirty_queue; /* modified pages */ | ||||
196 | #define BT_TXN_RDONLY0x01 0x01 /* read-only transaction */ | ||||
197 | #define BT_TXN_ERROR0x02 0x02 /* an error has occurred */ | ||||
198 | unsigned int flags; | ||||
199 | }; | ||||
200 | |||||
201 | struct btree { | ||||
202 | int fd; | ||||
203 | char *path; | ||||
204 | #define BT_FIXPADDING0x01 0x01 /* internal */ | ||||
205 | unsigned int flags; | ||||
206 | bt_cmp_func cmp; /* user compare function */ | ||||
207 | struct bt_head head; | ||||
208 | struct bt_meta meta; | ||||
209 | struct page_cache *page_cache; | ||||
210 | struct lru_queue *lru_queue; | ||||
211 | struct btree_txn *txn; /* current write transaction */ | ||||
212 | int ref; /* increased by cursors & txn */ | ||||
213 | struct btree_stat stat; | ||||
214 | off_t size; /* current file size */ | ||||
215 | }; | ||||
216 | |||||
217 | #define NODESIZE__builtin_offsetof(struct node, data) offsetof(struct node, data)__builtin_offsetof(struct node, data) | ||||
218 | |||||
219 | #define INDXSIZE(k)(__builtin_offsetof(struct node, data) + ((k) == ((void*)0) ? 0 : (k)->size)) (NODESIZE__builtin_offsetof(struct node, data) + ((k) == NULL((void*)0) ? 0 : (k)->size)) | ||||
220 | #define LEAFSIZE(k, d)(__builtin_offsetof(struct node, data) + (k)->size + (d)-> size) (NODESIZE__builtin_offsetof(struct node, data) + (k)->size + (d)->size) | ||||
221 | #define NODEPTRP(p, i)((struct node *)((char *)(p) + (p)->ptrs[i])) ((struct node *)((char *)(p) + (p)->ptrs[i])) | ||||
222 | #define NODEPTR(mp, i)((struct node *)((char *)((mp)->page) + ((mp)->page)-> ptrs[i])) NODEPTRP((mp)->page, i)((struct node *)((char *)((mp)->page) + ((mp)->page)-> ptrs[i])) | ||||
223 | #define NODEKEY(node)(void *)((node)->data) (void *)((node)->data) | ||||
224 | #define NODEDATA(node)(void *)((char *)(node)->data + (node)->ksize) (void *)((char *)(node)->data + (node)->ksize) | ||||
225 | #define NODEPGNO(node)((node)->p.np_pgno) ((node)->p.np_pgno) | ||||
226 | #define NODEDSZ(node)((node)->p.np_dsize) ((node)->p.np_dsize) | ||||
227 | |||||
228 | #define BT_COMMIT_PAGES64 64 /* max number of pages to write in one commit */ | ||||
229 | #define BT_MAXCACHE_DEF1024 1024 /* max number of pages to keep in cache */ | ||||
230 | |||||
231 | static int btree_read_page(struct btree *bt, pgno_t pgno, | ||||
232 | struct page *page); | ||||
233 | static struct mpage *btree_get_mpage(struct btree *bt, pgno_t pgno); | ||||
234 | static int btree_search_page_root(struct btree *bt, | ||||
235 | struct mpage *root, struct btval *key, | ||||
236 | struct cursor *cursor, int modify, | ||||
237 | struct mpage **mpp); | ||||
238 | static int btree_search_page(struct btree *bt, | ||||
239 | struct btree_txn *txn, struct btval *key, | ||||
240 | struct cursor *cursor, int modify, | ||||
241 | struct mpage **mpp); | ||||
242 | |||||
243 | static int btree_write_header(struct btree *bt, int fd); | ||||
244 | static int btree_read_header(struct btree *bt); | ||||
245 | static int btree_is_meta_page(struct page *p); | ||||
246 | static int btree_read_meta(struct btree *bt, pgno_t *p_next); | ||||
247 | static int btree_write_meta(struct btree *bt, pgno_t root, | ||||
248 | unsigned int flags); | ||||
249 | static void btree_ref(struct btree *bt); | ||||
250 | |||||
251 | static struct node *btree_search_node(struct btree *bt, struct mpage *mp, | ||||
252 | struct btval *key, int *exactp, unsigned int *kip); | ||||
253 | static int btree_add_node(struct btree *bt, struct mpage *mp, | ||||
254 | indx_t indx, struct btval *key, struct btval *data, | ||||
255 | pgno_t pgno, uint8_t flags); | ||||
256 | static void btree_del_node(struct btree *bt, struct mpage *mp, | ||||
257 | indx_t indx); | ||||
258 | static int btree_read_data(struct btree *bt, struct mpage *mp, | ||||
259 | struct node *leaf, struct btval *data); | ||||
260 | |||||
261 | static int btree_rebalance(struct btree *bt, struct mpage *mp); | ||||
262 | static int btree_update_key(struct btree *bt, struct mpage *mp, | ||||
263 | indx_t indx, struct btval *key); | ||||
264 | static int btree_adjust_prefix(struct btree *bt, | ||||
265 | struct mpage *src, int delta); | ||||
266 | static int btree_move_node(struct btree *bt, struct mpage *src, | ||||
267 | indx_t srcindx, struct mpage *dst, indx_t dstindx); | ||||
268 | static int btree_merge(struct btree *bt, struct mpage *src, | ||||
269 | struct mpage *dst); | ||||
270 | static int btree_split(struct btree *bt, struct mpage **mpp, | ||||
271 | unsigned int *newindxp, struct btval *newkey, | ||||
272 | struct btval *newdata, pgno_t newpgno); | ||||
273 | static struct mpage *btree_new_page(struct btree *bt, uint32_t flags); | ||||
274 | static int btree_write_overflow_data(struct btree *bt, | ||||
275 | struct page *p, struct btval *data); | ||||
276 | |||||
277 | static void cursor_pop_page(struct cursor *cursor); | ||||
278 | static struct ppage *cursor_push_page(struct cursor *cursor, | ||||
279 | struct mpage *mp); | ||||
280 | |||||
281 | static int bt_set_key(struct btree *bt, struct mpage *mp, | ||||
282 | struct node *node, struct btval *key); | ||||
283 | static int btree_sibling(struct cursor *cursor, int move_right); | ||||
284 | static int btree_cursor_next(struct cursor *cursor, | ||||
285 | struct btval *key, struct btval *data); | ||||
286 | static int btree_cursor_set(struct cursor *cursor, | ||||
287 | struct btval *key, struct btval *data, int *exactp); | ||||
288 | static int btree_cursor_first(struct cursor *cursor, | ||||
289 | struct btval *key, struct btval *data); | ||||
290 | |||||
291 | static void bt_reduce_separator(struct btree *bt, struct node *min, | ||||
292 | struct btval *sep); | ||||
293 | static void remove_prefix(struct btree *bt, struct btval *key, | ||||
294 | size_t pfxlen); | ||||
295 | static void expand_prefix(struct btree *bt, struct mpage *mp, | ||||
296 | indx_t indx, struct btkey *expkey); | ||||
297 | static void concat_prefix(struct btree *bt, char *s1, size_t n1, | ||||
298 | char *s2, size_t n2, char *cs, size_t *cn); | ||||
299 | static void common_prefix(struct btree *bt, struct btkey *min, | ||||
300 | struct btkey *max, struct btkey *pfx); | ||||
301 | static void find_common_prefix(struct btree *bt, struct mpage *mp); | ||||
302 | |||||
303 | static size_t bt_leaf_size(struct btree *bt, struct btval *key, | ||||
304 | struct btval *data); | ||||
305 | static size_t bt_branch_size(struct btree *bt, struct btval *key); | ||||
306 | |||||
307 | static pgno_t btree_compact_tree(struct btree *bt, pgno_t pgno, | ||||
308 | struct btree *btc); | ||||
309 | |||||
310 | static int memncmp(const void *s1, size_t n1, | ||||
311 | const void *s2, size_t n2); | ||||
312 | static int memnrcmp(const void *s1, size_t n1, | ||||
313 | const void *s2, size_t n2); | ||||
314 | |||||
315 | static int | ||||
316 | memncmp(const void *s1, size_t n1, const void *s2, size_t n2) | ||||
317 | { | ||||
318 | if (n1 < n2) { | ||||
319 | if (memcmp(s1, s2, n1) == 0) | ||||
320 | return -1; | ||||
321 | } | ||||
322 | else if (n1 > n2) { | ||||
323 | if (memcmp(s1, s2, n2) == 0) | ||||
324 | return 1; | ||||
325 | } | ||||
326 | return memcmp(s1, s2, n1); | ||||
327 | } | ||||
328 | |||||
329 | static int | ||||
330 | memnrcmp(const void *s1, size_t n1, const void *s2, size_t n2) | ||||
331 | { | ||||
332 | const unsigned char *p1; | ||||
333 | const unsigned char *p2; | ||||
334 | |||||
335 | if (n1 == 0) | ||||
336 | return n2 == 0 ? 0 : -1; | ||||
337 | |||||
338 | if (n2 == 0) | ||||
339 | return n1 == 0 ? 0 : 1; | ||||
340 | |||||
341 | p1 = (const unsigned char *)s1 + n1 - 1; | ||||
342 | p2 = (const unsigned char *)s2 + n2 - 1; | ||||
343 | |||||
344 | while (*p1 == *p2) { | ||||
345 | if (p1 == s1) | ||||
346 | return (p2 == s2) ? 0 : -1; | ||||
347 | if (p2 == s2) | ||||
348 | return (p1 == p2) ? 0 : 1; | ||||
349 | p1--; | ||||
350 | p2--; | ||||
351 | } | ||||
352 | return *p1 - *p2; | ||||
353 | } | ||||
354 | |||||
355 | int | ||||
356 | btree_cmp(struct btree *bt, const struct btval *a, const struct btval *b) | ||||
357 | { | ||||
358 | return bt->cmp(a, b); | ||||
359 | } | ||||
360 | |||||
361 | static void | ||||
362 | common_prefix(struct btree *bt, struct btkey *min, struct btkey *max, | ||||
363 | struct btkey *pfx) | ||||
364 | { | ||||
365 | size_t n = 0; | ||||
366 | char *p1; | ||||
367 | char *p2; | ||||
368 | |||||
369 | if (min->len == 0 || max->len == 0) { | ||||
370 | pfx->len = 0; | ||||
371 | return; | ||||
372 | } | ||||
373 | |||||
374 | if (F_ISSET(bt->flags, BT_REVERSEKEY)(((bt->flags) & (0x08)) == (0x08))) { | ||||
375 | p1 = min->str + min->len - 1; | ||||
376 | p2 = max->str + max->len - 1; | ||||
377 | |||||
378 | while (*p1 == *p2) { | ||||
379 | if (p1 < min->str || p2 < max->str) | ||||
380 | break; | ||||
381 | p1--; | ||||
382 | p2--; | ||||
383 | n++; | ||||
384 | } | ||||
385 | |||||
386 | assert(n <= (int)sizeof(pfx->str))((n <= (int)sizeof(pfx->str)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 386, __func__, "n <= (int)sizeof(pfx->str)")); | ||||
387 | pfx->len = n; | ||||
388 | bcopy(p2 + 1, pfx->str, n); | ||||
389 | } else { | ||||
390 | p1 = min->str; | ||||
391 | p2 = max->str; | ||||
392 | |||||
393 | while (*p1 == *p2) { | ||||
394 | if (n == min->len || n == max->len) | ||||
395 | break; | ||||
396 | p1++; | ||||
397 | p2++; | ||||
398 | n++; | ||||
399 | } | ||||
400 | |||||
401 | assert(n <= (int)sizeof(pfx->str))((n <= (int)sizeof(pfx->str)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 401, __func__, "n <= (int)sizeof(pfx->str)")); | ||||
402 | pfx->len = n; | ||||
403 | bcopy(max->str, pfx->str, n); | ||||
404 | } | ||||
405 | } | ||||
406 | |||||
407 | static void | ||||
408 | remove_prefix(struct btree *bt, struct btval *key, size_t pfxlen) | ||||
409 | { | ||||
410 | if (pfxlen == 0 || bt->cmp != NULL((void*)0)) | ||||
411 | return; | ||||
412 | |||||
413 | DPRINTF("removing %zu bytes of prefix from key [%.*s]", pfxlen, | ||||
414 | (int)key->size, (char *)key->data); | ||||
415 | assert(pfxlen <= key->size)((pfxlen <= key->size) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 415, __func__, "pfxlen <= key->size")); | ||||
416 | key->size -= pfxlen; | ||||
417 | if (!F_ISSET(bt->flags, BT_REVERSEKEY)(((bt->flags) & (0x08)) == (0x08))) | ||||
418 | key->data = (char *)key->data + pfxlen; | ||||
419 | } | ||||
420 | |||||
421 | static void | ||||
422 | expand_prefix(struct btree *bt, struct mpage *mp, indx_t indx, | ||||
423 | struct btkey *expkey) | ||||
424 | { | ||||
425 | struct node *node; | ||||
426 | |||||
427 | node = NODEPTR(mp, indx)((struct node *)((char *)((mp)->page) + ((mp)->page)-> ptrs[indx])); | ||||
428 | expkey->len = sizeof(expkey->str); | ||||
429 | concat_prefix(bt, mp->prefix.str, mp->prefix.len, | ||||
430 | NODEKEY(node)(void *)((node)->data), node->ksize, expkey->str, &expkey->len); | ||||
431 | } | ||||
432 | |||||
433 | static int | ||||
434 | bt_cmp(struct btree *bt, const struct btval *key1, const struct btval *key2, | ||||
435 | struct btkey *pfx) | ||||
436 | { | ||||
437 | if (F_ISSET(bt->flags, BT_REVERSEKEY)(((bt->flags) & (0x08)) == (0x08))) | ||||
438 | return memnrcmp(key1->data, key1->size - pfx->len, | ||||
439 | key2->data, key2->size); | ||||
440 | else | ||||
441 | return memncmp((char *)key1->data + pfx->len, key1->size - pfx->len, | ||||
442 | key2->data, key2->size); | ||||
443 | } | ||||
444 | |||||
445 | void | ||||
446 | btval_reset(struct btval *btv) | ||||
447 | { | ||||
448 | if (btv) { | ||||
449 | if (btv->mp) | ||||
450 | btv->mp->ref--; | ||||
451 | if (btv->free_data) | ||||
452 | free(btv->data); | ||||
453 | memset(btv, 0, sizeof(*btv)); | ||||
454 | } | ||||
455 | } | ||||
456 | |||||
457 | static int | ||||
458 | mpage_cmp(struct mpage *a, struct mpage *b) | ||||
459 | { | ||||
460 | if (a->pgno > b->pgno) | ||||
461 | return 1; | ||||
462 | if (a->pgno < b->pgno) | ||||
463 | return -1; | ||||
464 | return 0; | ||||
465 | } | ||||
466 | |||||
467 | static struct mpage * | ||||
468 | mpage_lookup(struct btree *bt, pgno_t pgno) | ||||
469 | { | ||||
470 | struct mpage find, *mp; | ||||
471 | |||||
472 | find.pgno = pgno; | ||||
473 | mp = RB_FIND(page_cache, bt->page_cache, &find)page_cache_RB_FIND(bt->page_cache, &find); | ||||
474 | if (mp) { | ||||
475 | bt->stat.hits++; | ||||
476 | /* Update LRU queue. Move page to the end. */ | ||||
477 | TAILQ_REMOVE(bt->lru_queue, mp, lru_next)do { if (((mp)->lru_next.tqe_next) != ((void*)0)) (mp)-> lru_next.tqe_next->lru_next.tqe_prev = (mp)->lru_next.tqe_prev ; else (bt->lru_queue)->tqh_last = (mp)->lru_next.tqe_prev ; *(mp)->lru_next.tqe_prev = (mp)->lru_next.tqe_next; ; ; } while (0); | ||||
478 | TAILQ_INSERT_TAIL(bt->lru_queue, mp, lru_next)do { (mp)->lru_next.tqe_next = ((void*)0); (mp)->lru_next .tqe_prev = (bt->lru_queue)->tqh_last; *(bt->lru_queue )->tqh_last = (mp); (bt->lru_queue)->tqh_last = & (mp)->lru_next.tqe_next; } while (0); | ||||
479 | } | ||||
480 | return mp; | ||||
481 | } | ||||
482 | |||||
483 | static void | ||||
484 | mpage_add(struct btree *bt, struct mpage *mp) | ||||
485 | { | ||||
486 | assert(RB_INSERT(page_cache, bt->page_cache, mp) == NULL)((page_cache_RB_INSERT(bt->page_cache, mp) == ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c", 486, __func__ , "RB_INSERT(page_cache, bt->page_cache, mp) == NULL")); | ||||
487 | bt->stat.cache_size++; | ||||
488 | TAILQ_INSERT_TAIL(bt->lru_queue, mp, lru_next)do { (mp)->lru_next.tqe_next = ((void*)0); (mp)->lru_next .tqe_prev = (bt->lru_queue)->tqh_last; *(bt->lru_queue )->tqh_last = (mp); (bt->lru_queue)->tqh_last = & (mp)->lru_next.tqe_next; } while (0); | ||||
489 | } | ||||
490 | |||||
491 | static void | ||||
492 | mpage_free(struct mpage *mp) | ||||
493 | { | ||||
494 | if (mp != NULL((void*)0)) { | ||||
495 | free(mp->page); | ||||
496 | free(mp); | ||||
497 | } | ||||
498 | } | ||||
499 | |||||
500 | static void | ||||
501 | mpage_del(struct btree *bt, struct mpage *mp) | ||||
502 | { | ||||
503 | assert(RB_REMOVE(page_cache, bt->page_cache, mp) == mp)((page_cache_RB_REMOVE(bt->page_cache, mp) == mp) ? (void) 0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c", 503, __func__ , "RB_REMOVE(page_cache, bt->page_cache, mp) == mp")); | ||||
504 | assert(bt->stat.cache_size > 0)((bt->stat.cache_size > 0) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 504, __func__, "bt->stat.cache_size > 0")); | ||||
505 | bt->stat.cache_size--; | ||||
506 | TAILQ_REMOVE(bt->lru_queue, mp, lru_next)do { if (((mp)->lru_next.tqe_next) != ((void*)0)) (mp)-> lru_next.tqe_next->lru_next.tqe_prev = (mp)->lru_next.tqe_prev ; else (bt->lru_queue)->tqh_last = (mp)->lru_next.tqe_prev ; *(mp)->lru_next.tqe_prev = (mp)->lru_next.tqe_next; ; ; } while (0); | ||||
507 | } | ||||
508 | |||||
509 | static void | ||||
510 | mpage_flush(struct btree *bt) | ||||
511 | { | ||||
512 | struct mpage *mp; | ||||
513 | |||||
514 | while ((mp = RB_MIN(page_cache, bt->page_cache)page_cache_RB_MINMAX(bt->page_cache, -1)) != NULL((void*)0)) { | ||||
515 | mpage_del(bt, mp); | ||||
516 | mpage_free(mp); | ||||
517 | } | ||||
518 | } | ||||
519 | |||||
520 | static struct mpage * | ||||
521 | mpage_copy(struct btree *bt, struct mpage *mp) | ||||
522 | { | ||||
523 | struct mpage *copy; | ||||
524 | |||||
525 | if ((copy = calloc(1, sizeof(*copy))) == NULL((void*)0)) | ||||
526 | return NULL((void*)0); | ||||
527 | if ((copy->page = malloc(bt->head.psize)) == NULL((void*)0)) { | ||||
528 | free(copy); | ||||
529 | return NULL((void*)0); | ||||
530 | } | ||||
531 | bcopy(mp->page, copy->page, bt->head.psize); | ||||
532 | bcopy(&mp->prefix, ©->prefix, sizeof(mp->prefix)); | ||||
533 | copy->parent = mp->parent; | ||||
534 | copy->parent_index = mp->parent_index; | ||||
535 | copy->pgno = mp->pgno; | ||||
536 | |||||
537 | return copy; | ||||
538 | } | ||||
539 | |||||
540 | /* Remove the least recently used memory pages until the cache size is | ||||
541 | * within the configured bounds. Pages referenced by cursors or returned | ||||
542 | * key/data are not pruned. | ||||
543 | */ | ||||
544 | static void | ||||
545 | mpage_prune(struct btree *bt) | ||||
546 | { | ||||
547 | struct mpage *mp, *next; | ||||
548 | |||||
549 | for (mp = TAILQ_FIRST(bt->lru_queue)((bt->lru_queue)->tqh_first); mp; mp = next) { | ||||
550 | if (bt->stat.cache_size <= bt->stat.max_cache) | ||||
551 | break; | ||||
552 | next = TAILQ_NEXT(mp, lru_next)((mp)->lru_next.tqe_next); | ||||
553 | if (!mp->dirty && mp->ref <= 0) { | ||||
554 | mpage_del(bt, mp); | ||||
555 | mpage_free(mp); | ||||
556 | } | ||||
557 | } | ||||
558 | } | ||||
559 | |||||
560 | /* Mark a page as dirty and push it on the dirty queue. | ||||
561 | */ | ||||
562 | static void | ||||
563 | mpage_dirty(struct btree *bt, struct mpage *mp) | ||||
564 | { | ||||
565 | assert(bt != NULL)((bt != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 565, __func__, "bt != NULL")); | ||||
566 | assert(bt->txn != NULL)((bt->txn != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 566, __func__, "bt->txn != NULL")); | ||||
567 | |||||
568 | if (!mp->dirty) { | ||||
569 | mp->dirty = 1; | ||||
570 | SIMPLEQ_INSERT_TAIL(bt->txn->dirty_queue, mp, next)do { (mp)->next.sqe_next = ((void*)0); *(bt->txn->dirty_queue )->sqh_last = (mp); (bt->txn->dirty_queue)->sqh_last = &(mp)->next.sqe_next; } while (0); | ||||
571 | } | ||||
572 | } | ||||
573 | |||||
574 | /* Touch a page: make it dirty and re-insert into tree with updated pgno. | ||||
575 | */ | ||||
576 | static struct mpage * | ||||
577 | mpage_touch(struct btree *bt, struct mpage *mp) | ||||
578 | { | ||||
579 | assert(bt != NULL)((bt != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 579, __func__, "bt != NULL")); | ||||
580 | assert(bt->txn != NULL)((bt->txn != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 580, __func__, "bt->txn != NULL")); | ||||
581 | assert(mp != NULL)((mp != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 581, __func__, "mp != NULL")); | ||||
582 | |||||
583 | if (!mp->dirty) { | ||||
584 | DPRINTF("touching page %u -> %u", mp->pgno, bt->txn->next_pgno); | ||||
585 | if (mp->ref == 0) | ||||
586 | mpage_del(bt, mp); | ||||
587 | else { | ||||
588 | if ((mp = mpage_copy(bt, mp)) == NULL((void*)0)) | ||||
589 | return NULL((void*)0); | ||||
590 | } | ||||
591 | mp->pgno = mp->page->pgno = bt->txn->next_pgno++; | ||||
592 | mpage_dirty(bt, mp); | ||||
593 | mpage_add(bt, mp); | ||||
594 | |||||
595 | /* Update the page number to new touched page. */ | ||||
596 | if (mp->parent != NULL((void*)0)) | ||||
597 | NODEPGNO(NODEPTR(mp->parent,((((struct node *)((char *)((mp->parent)->page) + ((mp-> parent)->page)->ptrs[mp->parent_index])))->p.np_pgno ) | ||||
598 | mp->parent_index))((((struct node *)((char *)((mp->parent)->page) + ((mp-> parent)->page)->ptrs[mp->parent_index])))->p.np_pgno ) = mp->pgno; | ||||
599 | } | ||||
600 | |||||
601 | return mp; | ||||
602 | } | ||||
603 | |||||
604 | static int | ||||
605 | btree_read_page(struct btree *bt, pgno_t pgno, struct page *page) | ||||
606 | { | ||||
607 | ssize_t rc; | ||||
608 | |||||
609 | DPRINTF("reading page %u", pgno); | ||||
610 | bt->stat.reads++; | ||||
611 | if ((rc = pread(bt->fd, page, bt->head.psize, (off_t)pgno*bt->head.psize)) == 0) { | ||||
612 | DPRINTF("page %u doesn't exist", pgno); | ||||
613 | errno(*__errno()) = ENOENT2; | ||||
614 | return BT_FAIL-1; | ||||
615 | } else if (rc != (ssize_t)bt->head.psize) { | ||||
616 | if (rc > 0) | ||||
617 | errno(*__errno()) = EINVAL22; | ||||
618 | DPRINTF("read: %s", strerror(errno)); | ||||
619 | return BT_FAIL-1; | ||||
620 | } | ||||
621 | |||||
622 | if (page->pgno != pgno) { | ||||
623 | DPRINTF("page numbers don't match: %u != %u", pgno, page->pgno); | ||||
624 | errno(*__errno()) = EINVAL22; | ||||
625 | return BT_FAIL-1; | ||||
626 | } | ||||
627 | |||||
628 | DPRINTF("page %u has flags 0x%X", pgno, page->flags); | ||||
629 | |||||
630 | return BT_SUCCESS0; | ||||
631 | } | ||||
632 | |||||
633 | int | ||||
634 | btree_sync(struct btree *bt) | ||||
635 | { | ||||
636 | if (!F_ISSET(bt->flags, BT_NOSYNC)(((bt->flags) & (0x02)) == (0x02))) | ||||
637 | return fsync(bt->fd); | ||||
638 | return 0; | ||||
639 | } | ||||
640 | |||||
641 | struct btree_txn * | ||||
642 | btree_txn_begin(struct btree *bt, int rdonly) | ||||
643 | { | ||||
644 | struct btree_txn *txn; | ||||
645 | |||||
646 | if (!rdonly && bt->txn != NULL((void*)0)) { | ||||
647 | DPRINTF("write transaction already begun"); | ||||
648 | errno(*__errno()) = EBUSY16; | ||||
649 | return NULL((void*)0); | ||||
650 | } | ||||
651 | |||||
652 | if ((txn = calloc(1, sizeof(*txn))) == NULL((void*)0)) { | ||||
653 | DPRINTF("calloc: %s", strerror(errno)); | ||||
654 | return NULL((void*)0); | ||||
655 | } | ||||
656 | |||||
657 | if (rdonly) { | ||||
658 | txn->flags |= BT_TXN_RDONLY0x01; | ||||
659 | } else { | ||||
660 | txn->dirty_queue = calloc(1, sizeof(*txn->dirty_queue)); | ||||
661 | if (txn->dirty_queue == NULL((void*)0)) { | ||||
662 | free(txn); | ||||
663 | return NULL((void*)0); | ||||
664 | } | ||||
665 | SIMPLEQ_INIT(txn->dirty_queue)do { (txn->dirty_queue)->sqh_first = ((void*)0); (txn-> dirty_queue)->sqh_last = &(txn->dirty_queue)->sqh_first ; } while (0); | ||||
666 | |||||
667 | DPRINTF("taking write lock on txn %p", txn); | ||||
668 | if (flock(bt->fd, LOCK_EX0x02 | LOCK_NB0x04) != 0) { | ||||
669 | DPRINTF("flock: %s", strerror(errno)); | ||||
670 | errno(*__errno()) = EBUSY16; | ||||
671 | free(txn->dirty_queue); | ||||
672 | free(txn); | ||||
673 | return NULL((void*)0); | ||||
674 | } | ||||
675 | bt->txn = txn; | ||||
676 | } | ||||
677 | |||||
678 | txn->bt = bt; | ||||
679 | btree_ref(bt); | ||||
680 | |||||
681 | if (btree_read_meta(bt, &txn->next_pgno) != BT_SUCCESS0) { | ||||
682 | btree_txn_abort(txn); | ||||
683 | return NULL((void*)0); | ||||
684 | } | ||||
685 | |||||
686 | txn->root = bt->meta.root; | ||||
687 | DPRINTF("begin transaction on btree %p, root page %u", bt, txn->root); | ||||
688 | |||||
689 | return txn; | ||||
690 | } | ||||
691 | |||||
692 | void | ||||
693 | btree_txn_abort(struct btree_txn *txn) | ||||
694 | { | ||||
695 | struct mpage *mp; | ||||
696 | struct btree *bt; | ||||
697 | |||||
698 | if (txn == NULL((void*)0)) | ||||
699 | return; | ||||
700 | |||||
701 | bt = txn->bt; | ||||
702 | DPRINTF("abort transaction on btree %p, root page %u", bt, txn->root); | ||||
703 | |||||
704 | if (!F_ISSET(txn->flags, BT_TXN_RDONLY)(((txn->flags) & (0x01)) == (0x01))) { | ||||
705 | /* Discard all dirty pages. | ||||
706 | */ | ||||
707 | while (!SIMPLEQ_EMPTY(txn->dirty_queue)(((txn->dirty_queue)->sqh_first) == ((void*)0))) { | ||||
708 | mp = SIMPLEQ_FIRST(txn->dirty_queue)((txn->dirty_queue)->sqh_first); | ||||
709 | assert(mp->ref == 0)((mp->ref == 0) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 709, __func__, "mp->ref == 0")); /* cursors should be closed */ | ||||
710 | mpage_del(bt, mp); | ||||
711 | SIMPLEQ_REMOVE_HEAD(txn->dirty_queue, next)do { if (((txn->dirty_queue)->sqh_first = (txn->dirty_queue )->sqh_first->next.sqe_next) == ((void*)0)) (txn->dirty_queue )->sqh_last = &(txn->dirty_queue)->sqh_first; } while (0); | ||||
712 | mpage_free(mp); | ||||
713 | } | ||||
714 | |||||
715 | DPRINTF("releasing write lock on txn %p", txn); | ||||
716 | txn->bt->txn = NULL((void*)0); | ||||
717 | if (flock(txn->bt->fd, LOCK_UN0x08) != 0) { | ||||
718 | DPRINTF("failed to unlock fd %d: %s", | ||||
719 | txn->bt->fd, strerror(errno)); | ||||
720 | } | ||||
721 | free(txn->dirty_queue); | ||||
722 | } | ||||
723 | |||||
724 | btree_close(txn->bt); | ||||
725 | free(txn); | ||||
726 | } | ||||
727 | |||||
728 | int | ||||
729 | btree_txn_commit(struct btree_txn *txn) | ||||
730 | { | ||||
731 | int n, done; | ||||
732 | ssize_t rc; | ||||
733 | off_t size; | ||||
734 | struct mpage *mp; | ||||
735 | struct btree *bt; | ||||
736 | struct iovec iov[BT_COMMIT_PAGES64]; | ||||
737 | |||||
738 | assert(txn != NULL)((txn != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 738, __func__, "txn != NULL")); | ||||
739 | assert(txn->bt != NULL)((txn->bt != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 739, __func__, "txn->bt != NULL")); | ||||
740 | |||||
741 | bt = txn->bt; | ||||
742 | |||||
743 | if (F_ISSET(txn->flags, BT_TXN_RDONLY)(((txn->flags) & (0x01)) == (0x01))) { | ||||
744 | DPRINTF("attempt to commit read-only transaction"); | ||||
745 | btree_txn_abort(txn); | ||||
746 | errno(*__errno()) = EPERM1; | ||||
747 | return BT_FAIL-1; | ||||
748 | } | ||||
749 | |||||
750 | if (txn != bt->txn) { | ||||
751 | DPRINTF("attempt to commit unknown transaction"); | ||||
752 | btree_txn_abort(txn); | ||||
753 | errno(*__errno()) = EINVAL22; | ||||
754 | return BT_FAIL-1; | ||||
755 | } | ||||
756 | |||||
757 | if (F_ISSET(txn->flags, BT_TXN_ERROR)(((txn->flags) & (0x02)) == (0x02))) { | ||||
758 | DPRINTF("error flag is set, can't commit"); | ||||
759 | btree_txn_abort(txn); | ||||
760 | errno(*__errno()) = EINVAL22; | ||||
761 | return BT_FAIL-1; | ||||
762 | } | ||||
763 | |||||
764 | if (SIMPLEQ_EMPTY(txn->dirty_queue)(((txn->dirty_queue)->sqh_first) == ((void*)0))) | ||||
765 | goto done; | ||||
766 | |||||
767 | if (F_ISSET(bt->flags, BT_FIXPADDING)(((bt->flags) & (0x01)) == (0x01))) { | ||||
768 | size = lseek(bt->fd, 0, SEEK_END2); | ||||
769 | size += bt->head.psize - (size % bt->head.psize); | ||||
770 | DPRINTF("extending to multiple of page size: %llu", size); | ||||
771 | if (ftruncate(bt->fd, size) != 0) { | ||||
772 | DPRINTF("ftruncate: %s", strerror(errno)); | ||||
773 | btree_txn_abort(txn); | ||||
774 | return BT_FAIL-1; | ||||
775 | } | ||||
776 | bt->flags &= ~BT_FIXPADDING0x01; | ||||
777 | } | ||||
778 | |||||
779 | DPRINTF("committing transaction on btree %p, root page %u", | ||||
780 | bt, txn->root); | ||||
781 | |||||
782 | /* Commit up to BT_COMMIT_PAGES dirty pages to disk until done. | ||||
783 | */ | ||||
784 | do { | ||||
785 | n = 0; | ||||
786 | done = 1; | ||||
787 | SIMPLEQ_FOREACH(mp, txn->dirty_queue, next)for((mp) = ((txn->dirty_queue)->sqh_first); (mp) != ((void *)0); (mp) = ((mp)->next.sqe_next)) { | ||||
788 | DPRINTF("committing page %u", mp->pgno); | ||||
789 | iov[n].iov_len = bt->head.psize; | ||||
790 | iov[n].iov_base = mp->page; | ||||
791 | if (++n >= BT_COMMIT_PAGES64) { | ||||
792 | done = 0; | ||||
793 | break; | ||||
794 | } | ||||
795 | } | ||||
796 | |||||
797 | if (n == 0) | ||||
798 | break; | ||||
799 | |||||
800 | DPRINTF("committing %u dirty pages", n); | ||||
801 | rc = writev(bt->fd, iov, n); | ||||
802 | if (rc != (ssize_t)bt->head.psize*n) { | ||||
803 | if (rc > 0) | ||||
804 | DPRINTF("short write, filesystem full?"); | ||||
805 | else | ||||
806 | DPRINTF("writev: %s", strerror(errno)); | ||||
807 | btree_txn_abort(txn); | ||||
808 | return BT_FAIL-1; | ||||
809 | } | ||||
810 | |||||
811 | /* Remove the dirty flag from the written pages. | ||||
812 | */ | ||||
813 | while (!SIMPLEQ_EMPTY(txn->dirty_queue)(((txn->dirty_queue)->sqh_first) == ((void*)0))) { | ||||
814 | mp = SIMPLEQ_FIRST(txn->dirty_queue)((txn->dirty_queue)->sqh_first); | ||||
815 | mp->dirty = 0; | ||||
816 | SIMPLEQ_REMOVE_HEAD(txn->dirty_queue, next)do { if (((txn->dirty_queue)->sqh_first = (txn->dirty_queue )->sqh_first->next.sqe_next) == ((void*)0)) (txn->dirty_queue )->sqh_last = &(txn->dirty_queue)->sqh_first; } while (0); | ||||
817 | if (--n == 0) | ||||
818 | break; | ||||
819 | } | ||||
820 | } while (!done); | ||||
821 | |||||
822 | if (btree_sync(bt) != 0 || | ||||
823 | btree_write_meta(bt, txn->root, 0) != BT_SUCCESS0 || | ||||
824 | btree_sync(bt) != 0) { | ||||
825 | btree_txn_abort(txn); | ||||
826 | return BT_FAIL-1; | ||||
827 | } | ||||
828 | |||||
829 | done: | ||||
830 | mpage_prune(bt); | ||||
831 | btree_txn_abort(txn); | ||||
832 | |||||
833 | return BT_SUCCESS0; | ||||
834 | } | ||||
835 | |||||
836 | static int | ||||
837 | btree_write_header(struct btree *bt, int fd) | ||||
838 | { | ||||
839 | struct stat sb; | ||||
840 | struct bt_head *h; | ||||
841 | struct page *p; | ||||
842 | ssize_t rc; | ||||
843 | unsigned int psize; | ||||
844 | |||||
845 | DPRINTF("writing header page"); | ||||
846 | assert(bt != NULL)((bt != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 846, __func__, "bt != NULL")); | ||||
847 | |||||
848 | /* | ||||
849 | * Ask stat for 'optimal blocksize for I/O', but cap to fit in indx_t. | ||||
850 | */ | ||||
851 | if (fstat(fd, &sb) == 0) | ||||
852 | psize = MINIMUM(32*1024, sb.st_blksize)((32*1024) < (sb.st_blksize) ? (32*1024) : (sb.st_blksize) ); | ||||
853 | else | ||||
854 | psize = PAGESIZE4096; | ||||
855 | |||||
856 | if ((p = calloc(1, psize)) == NULL((void*)0)) | ||||
857 | return -1; | ||||
858 | p->flags = P_HEAD0x10; | ||||
859 | |||||
860 | h = METADATA(p)((void *)((char *)p + __builtin_offsetof(struct page, ptrs))); | ||||
861 | h->magic = BT_MAGIC0xB3DBB3DB; | ||||
862 | h->version = BT_VERSION4; | ||||
863 | h->psize = psize; | ||||
864 | bcopy(h, &bt->head, sizeof(*h)); | ||||
865 | |||||
866 | rc = write(fd, p, bt->head.psize); | ||||
867 | free(p); | ||||
868 | if (rc != (ssize_t)bt->head.psize) { | ||||
869 | if (rc > 0) | ||||
870 | DPRINTF("short write, filesystem full?"); | ||||
871 | return BT_FAIL-1; | ||||
872 | } | ||||
873 | |||||
874 | return BT_SUCCESS0; | ||||
875 | } | ||||
876 | |||||
877 | static int | ||||
878 | btree_read_header(struct btree *bt) | ||||
879 | { | ||||
880 | char page[PAGESIZE4096]; | ||||
881 | struct page *p; | ||||
882 | struct bt_head *h; | ||||
883 | int rc; | ||||
884 | |||||
885 | assert(bt != NULL)((bt != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 885, __func__, "bt != NULL")); | ||||
886 | |||||
887 | /* We don't know the page size yet, so use a minimum value. | ||||
888 | */ | ||||
889 | |||||
890 | if ((rc = pread(bt->fd, page, PAGESIZE4096, 0)) == 0) { | ||||
891 | errno(*__errno()) = ENOENT2; | ||||
892 | return -1; | ||||
893 | } else if (rc != PAGESIZE4096) { | ||||
894 | if (rc > 0) | ||||
895 | errno(*__errno()) = EINVAL22; | ||||
896 | DPRINTF("read: %s", strerror(errno)); | ||||
897 | return -1; | ||||
898 | } | ||||
899 | |||||
900 | p = (struct page *)page; | ||||
901 | |||||
902 | if (!F_ISSET(p->flags, P_HEAD)(((p->flags) & (0x10)) == (0x10))) { | ||||
903 | DPRINTF("page %d not a header page", p->pgno); | ||||
904 | errno(*__errno()) = EINVAL22; | ||||
905 | return -1; | ||||
906 | } | ||||
907 | |||||
908 | h = METADATA(p)((void *)((char *)p + __builtin_offsetof(struct page, ptrs))); | ||||
909 | if (h->magic != BT_MAGIC0xB3DBB3DB) { | ||||
910 | DPRINTF("header has invalid magic"); | ||||
911 | errno(*__errno()) = EINVAL22; | ||||
912 | return -1; | ||||
913 | } | ||||
914 | |||||
915 | if (h->version != BT_VERSION4) { | ||||
916 | DPRINTF("database is version %u, expected version %u", | ||||
917 | bt->head.version, BT_VERSION); | ||||
918 | errno(*__errno()) = EINVAL22; | ||||
919 | return -1; | ||||
920 | } | ||||
921 | |||||
922 | bcopy(h, &bt->head, sizeof(*h)); | ||||
923 | return 0; | ||||
924 | } | ||||
925 | |||||
926 | static int | ||||
927 | btree_write_meta(struct btree *bt, pgno_t root, unsigned int flags) | ||||
928 | { | ||||
929 | struct mpage *mp; | ||||
930 | struct bt_meta *meta; | ||||
931 | ssize_t rc; | ||||
932 | |||||
933 | DPRINTF("writing meta page for root page %u", root); | ||||
934 | |||||
935 | assert(bt != NULL)((bt != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 935, __func__, "bt != NULL")); | ||||
936 | assert(bt->txn != NULL)((bt->txn != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 936, __func__, "bt->txn != NULL")); | ||||
937 | |||||
938 | if ((mp = btree_new_page(bt, P_META0x08)) == NULL((void*)0)) | ||||
939 | return -1; | ||||
940 | |||||
941 | bt->meta.prev_meta = bt->meta.root; | ||||
942 | bt->meta.root = root; | ||||
943 | bt->meta.flags = flags; | ||||
944 | bt->meta.created_at = time(0); | ||||
945 | bt->meta.revisions++; | ||||
946 | SHA1((unsigned char *)&bt->meta, METAHASHLEN__builtin_offsetof(struct bt_meta, hash), bt->meta.hash); | ||||
947 | |||||
948 | /* Copy the meta data changes to the new meta page. */ | ||||
949 | meta = METADATA(mp->page)((void *)((char *)mp->page + __builtin_offsetof(struct page , ptrs))); | ||||
950 | bcopy(&bt->meta, meta, sizeof(*meta)); | ||||
951 | |||||
952 | rc = write(bt->fd, mp->page, bt->head.psize); | ||||
953 | mp->dirty = 0; | ||||
954 | SIMPLEQ_REMOVE_HEAD(bt->txn->dirty_queue, next)do { if (((bt->txn->dirty_queue)->sqh_first = (bt-> txn->dirty_queue)->sqh_first->next.sqe_next) == ((void *)0)) (bt->txn->dirty_queue)->sqh_last = &(bt-> txn->dirty_queue)->sqh_first; } while (0); | ||||
955 | if (rc != (ssize_t)bt->head.psize) { | ||||
956 | if (rc > 0) | ||||
957 | DPRINTF("short write, filesystem full?"); | ||||
958 | return BT_FAIL-1; | ||||
959 | } | ||||
960 | |||||
961 | if ((bt->size = lseek(bt->fd, 0, SEEK_END2)) == -1) { | ||||
962 | DPRINTF("failed to update file size: %s", strerror(errno)); | ||||
963 | bt->size = 0; | ||||
964 | } | ||||
965 | |||||
966 | return BT_SUCCESS0; | ||||
967 | } | ||||
968 | |||||
969 | /* Returns true if page p is a valid meta page, false otherwise. | ||||
970 | */ | ||||
971 | static int | ||||
972 | btree_is_meta_page(struct page *p) | ||||
973 | { | ||||
974 | struct bt_meta *m; | ||||
975 | unsigned char hash[SHA_DIGEST_LENGTH20]; | ||||
976 | |||||
977 | m = METADATA(p)((void *)((char *)p + __builtin_offsetof(struct page, ptrs))); | ||||
978 | if (!F_ISSET(p->flags, P_META)(((p->flags) & (0x08)) == (0x08))) { | ||||
979 | DPRINTF("page %d not a meta page", p->pgno); | ||||
980 | errno(*__errno()) = EINVAL22; | ||||
981 | return 0; | ||||
982 | } | ||||
983 | |||||
984 | if (m->root >= p->pgno && m->root != P_INVALID0xFFFFFFFF) { | ||||
985 | DPRINTF("page %d points to an invalid root page", p->pgno); | ||||
986 | errno(*__errno()) = EINVAL22; | ||||
987 | return 0; | ||||
988 | } | ||||
989 | |||||
990 | SHA1((unsigned char *)m, METAHASHLEN__builtin_offsetof(struct bt_meta, hash), hash); | ||||
991 | if (bcmp(hash, m->hash, SHA_DIGEST_LENGTH20) != 0) { | ||||
992 | DPRINTF("page %d has an invalid digest", p->pgno); | ||||
993 | errno(*__errno()) = EINVAL22; | ||||
994 | return 0; | ||||
995 | } | ||||
996 | |||||
997 | return 1; | ||||
998 | } | ||||
999 | |||||
1000 | static int | ||||
1001 | btree_read_meta(struct btree *bt, pgno_t *p_next) | ||||
1002 | { | ||||
1003 | struct mpage *mp; | ||||
1004 | struct bt_meta *meta; | ||||
1005 | pgno_t meta_pgno, next_pgno; | ||||
1006 | off_t size; | ||||
1007 | |||||
1008 | assert(bt != NULL)((bt != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 1008, __func__, "bt != NULL")); | ||||
1009 | |||||
1010 | if ((size = lseek(bt->fd, 0, SEEK_END2)) == -1) | ||||
1011 | goto fail; | ||||
1012 | |||||
1013 | DPRINTF("btree_read_meta: size = %llu", size); | ||||
1014 | |||||
1015 | if (size < bt->size) { | ||||
1016 | DPRINTF("file has shrunk!"); | ||||
1017 | errno(*__errno()) = EIO5; | ||||
1018 | goto fail; | ||||
1019 | } | ||||
1020 | |||||
1021 | if (size == bt->head.psize) { /* there is only the header */ | ||||
1022 | if (p_next != NULL((void*)0)) | ||||
1023 | *p_next = 1; | ||||
1024 | return BT_SUCCESS0; /* new file */ | ||||
1025 | } | ||||
1026 | |||||
1027 | next_pgno = size / bt->head.psize; | ||||
1028 | if (next_pgno == 0) { | ||||
1029 | DPRINTF("corrupt file"); | ||||
1030 | errno(*__errno()) = EIO5; | ||||
1031 | goto fail; | ||||
1032 | } | ||||
1033 | |||||
1034 | meta_pgno = next_pgno - 1; | ||||
1035 | |||||
1036 | if (size % bt->head.psize != 0) { | ||||
1037 | DPRINTF("filesize not a multiple of the page size!"); | ||||
1038 | bt->flags |= BT_FIXPADDING0x01; | ||||
1039 | next_pgno++; | ||||
1040 | } | ||||
1041 | |||||
1042 | if (p_next != NULL((void*)0)) | ||||
1043 | *p_next = next_pgno; | ||||
1044 | |||||
1045 | if (size == bt->size) { | ||||
1046 | DPRINTF("size unchanged, keeping current meta page"); | ||||
1047 | if (F_ISSET(bt->meta.flags, BT_TOMBSTONE)(((bt->meta.flags) & (0x01)) == (0x01))) { | ||||
1048 | DPRINTF("file is dead"); | ||||
1049 | errno(*__errno()) = ESTALE70; | ||||
1050 | return BT_FAIL-1; | ||||
1051 | } else | ||||
1052 | return BT_SUCCESS0; | ||||
1053 | } | ||||
1054 | bt->size = size; | ||||
1055 | |||||
1056 | while (meta_pgno > 0) { | ||||
1057 | if ((mp = btree_get_mpage(bt, meta_pgno)) == NULL((void*)0)) | ||||
1058 | break; | ||||
1059 | if (btree_is_meta_page(mp->page)) { | ||||
1060 | meta = METADATA(mp->page)((void *)((char *)mp->page + __builtin_offsetof(struct page , ptrs))); | ||||
1061 | DPRINTF("flags = 0x%x", meta->flags); | ||||
1062 | if (F_ISSET(meta->flags, BT_TOMBSTONE)(((meta->flags) & (0x01)) == (0x01))) { | ||||
1063 | DPRINTF("file is dead"); | ||||
1064 | errno(*__errno()) = ESTALE70; | ||||
1065 | return BT_FAIL-1; | ||||
1066 | } else { | ||||
1067 | /* Make copy of last meta page. */ | ||||
1068 | bcopy(meta, &bt->meta, sizeof(bt->meta)); | ||||
1069 | return BT_SUCCESS0; | ||||
1070 | } | ||||
1071 | } | ||||
1072 | --meta_pgno; /* scan backwards to first valid meta page */ | ||||
1073 | } | ||||
1074 | |||||
1075 | errno(*__errno()) = EIO5; | ||||
1076 | fail: | ||||
1077 | if (p_next != NULL((void*)0)) | ||||
1078 | *p_next = P_INVALID0xFFFFFFFF; | ||||
1079 | return BT_FAIL-1; | ||||
1080 | } | ||||
1081 | |||||
1082 | struct btree * | ||||
1083 | btree_open_fd(int fd, unsigned int flags) | ||||
1084 | { | ||||
1085 | struct btree *bt; | ||||
1086 | int fl; | ||||
1087 | |||||
1088 | fl = fcntl(fd, F_GETFL3); | ||||
1089 | if (fcntl(fd, F_SETFL4, fl | O_APPEND0x0008) == -1) | ||||
1090 | return NULL((void*)0); | ||||
1091 | |||||
1092 | if ((bt = calloc(1, sizeof(*bt))) == NULL((void*)0)) | ||||
1093 | return NULL((void*)0); | ||||
1094 | bt->fd = fd; | ||||
1095 | bt->flags = flags; | ||||
1096 | bt->flags &= ~BT_FIXPADDING0x01; | ||||
1097 | bt->ref = 1; | ||||
1098 | bt->meta.root = P_INVALID0xFFFFFFFF; | ||||
1099 | |||||
1100 | if ((bt->page_cache = calloc(1, sizeof(*bt->page_cache))) == NULL((void*)0)) | ||||
1101 | goto fail; | ||||
1102 | bt->stat.max_cache = BT_MAXCACHE_DEF1024; | ||||
1103 | RB_INIT(bt->page_cache)do { (bt->page_cache)->rbh_root = ((void*)0); } while ( 0); | ||||
1104 | |||||
1105 | if ((bt->lru_queue = calloc(1, sizeof(*bt->lru_queue))) == NULL((void*)0)) | ||||
1106 | goto fail; | ||||
1107 | TAILQ_INIT(bt->lru_queue)do { (bt->lru_queue)->tqh_first = ((void*)0); (bt->lru_queue )->tqh_last = &(bt->lru_queue)->tqh_first; } while (0); | ||||
1108 | |||||
1109 | if (btree_read_header(bt) != 0) { | ||||
1110 | if (errno(*__errno()) != ENOENT2) | ||||
1111 | goto fail; | ||||
1112 | DPRINTF("new database"); | ||||
1113 | btree_write_header(bt, bt->fd); | ||||
1114 | } | ||||
1115 | |||||
1116 | if (btree_read_meta(bt, NULL((void*)0)) != 0) | ||||
1117 | goto fail; | ||||
1118 | |||||
1119 | DPRINTF("opened database version %u, pagesize %u", | ||||
1120 | bt->head.version, bt->head.psize); | ||||
1121 | DPRINTF("timestamp: %s", ctime(&bt->meta.created_at)); | ||||
1122 | DPRINTF("depth: %u", bt->meta.depth); | ||||
1123 | DPRINTF("entries: %llu", bt->meta.entries); | ||||
1124 | DPRINTF("revisions: %u", bt->meta.revisions); | ||||
1125 | DPRINTF("branch pages: %u", bt->meta.branch_pages); | ||||
1126 | DPRINTF("leaf pages: %u", bt->meta.leaf_pages); | ||||
1127 | DPRINTF("overflow pages: %u", bt->meta.overflow_pages); | ||||
1128 | DPRINTF("root: %u", bt->meta.root); | ||||
1129 | DPRINTF("previous meta page: %u", bt->meta.prev_meta); | ||||
1130 | |||||
1131 | return bt; | ||||
1132 | |||||
1133 | fail: | ||||
1134 | free(bt->lru_queue); | ||||
1135 | free(bt->page_cache); | ||||
1136 | free(bt); | ||||
1137 | return NULL((void*)0); | ||||
1138 | } | ||||
1139 | |||||
1140 | struct btree * | ||||
1141 | btree_open(const char *path, unsigned int flags, mode_t mode) | ||||
1142 | { | ||||
1143 | int fd, oflags; | ||||
1144 | struct btree *bt; | ||||
1145 | |||||
1146 | if (F_ISSET(flags, BT_RDONLY)(((flags) & (0x04)) == (0x04))) | ||||
1147 | oflags = O_RDONLY0x0000; | ||||
1148 | else | ||||
1149 | oflags = O_RDWR0x0002 | O_CREAT0x0200 | O_APPEND0x0008; | ||||
1150 | |||||
1151 | if ((fd = open(path, oflags, mode)) == -1) | ||||
1152 | return NULL((void*)0); | ||||
1153 | |||||
1154 | if ((bt = btree_open_fd(fd, flags)) == NULL((void*)0)) | ||||
1155 | close(fd); | ||||
1156 | else { | ||||
1157 | bt->path = strdup(path); | ||||
1158 | DPRINTF("opened btree %p", bt); | ||||
1159 | } | ||||
1160 | |||||
1161 | return bt; | ||||
1162 | } | ||||
1163 | |||||
1164 | static void | ||||
1165 | btree_ref(struct btree *bt) | ||||
1166 | { | ||||
1167 | bt->ref++; | ||||
1168 | DPRINTF("ref is now %d on btree %p", bt->ref, bt); | ||||
1169 | } | ||||
1170 | |||||
1171 | void | ||||
1172 | btree_close(struct btree *bt) | ||||
1173 | { | ||||
1174 | if (bt == NULL((void*)0)) | ||||
1175 | return; | ||||
1176 | |||||
1177 | if (--bt->ref == 0) { | ||||
1178 | DPRINTF("ref is zero, closing btree %p", bt); | ||||
1179 | close(bt->fd); | ||||
1180 | mpage_flush(bt); | ||||
1181 | free(bt->lru_queue); | ||||
1182 | free(bt->path); | ||||
1183 | free(bt->page_cache); | ||||
1184 | free(bt); | ||||
1185 | } else | ||||
1186 | DPRINTF("ref is now %d on btree %p", bt->ref, bt); | ||||
1187 | } | ||||
1188 | |||||
1189 | /* Search for key within a leaf page, using binary search. | ||||
1190 | * Returns the smallest entry larger or equal to the key. | ||||
1191 | * If exactp is non-null, stores whether the found entry was an exact match | ||||
1192 | * in *exactp (1 or 0). | ||||
1193 | * If kip is non-null, stores the index of the found entry in *kip. | ||||
1194 | * If no entry larger of equal to the key is found, returns NULL. | ||||
1195 | */ | ||||
1196 | static struct node * | ||||
1197 | btree_search_node(struct btree *bt, struct mpage *mp, struct btval *key, | ||||
1198 | int *exactp, unsigned int *kip) | ||||
1199 | { | ||||
1200 | unsigned int i = 0; | ||||
1201 | int low, high; | ||||
1202 | int rc = 0; | ||||
1203 | struct node *node; | ||||
1204 | struct btval nodekey; | ||||
1205 | |||||
1206 | DPRINTF("searching %lu keys in %s page %u with prefix [%.*s]", | ||||
1207 | NUMKEYS(mp), | ||||
1208 | IS_LEAF(mp) ? "leaf" : "branch", | ||||
1209 | mp->pgno, (int)mp->prefix.len, (char *)mp->prefix.str); | ||||
1210 | |||||
1211 | assert(NUMKEYS(mp) > 0)(((((mp)->page->b.fb.fb_lower - __builtin_offsetof(struct page, ptrs)) >> 1) > 0) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 1211, __func__, "NUMKEYS(mp) > 0")); | ||||
1212 | |||||
1213 | memset(&nodekey, 0, sizeof(nodekey)); | ||||
1214 | |||||
1215 | low = IS_LEAF(mp)((((mp)->page->flags) & (0x02)) == (0x02)) ? 0 : 1; | ||||
1216 | high = NUMKEYS(mp)(((mp)->page->b.fb.fb_lower - __builtin_offsetof(struct page, ptrs)) >> 1) - 1; | ||||
1217 | while (low <= high) { | ||||
1218 | i = (low + high) >> 1; | ||||
1219 | node = NODEPTR(mp, i)((struct node *)((char *)((mp)->page) + ((mp)->page)-> ptrs[i])); | ||||
1220 | |||||
1221 | nodekey.size = node->ksize; | ||||
1222 | nodekey.data = NODEKEY(node)(void *)((node)->data); | ||||
1223 | |||||
1224 | if (bt->cmp) | ||||
1225 | rc = bt->cmp(key, &nodekey); | ||||
1226 | else | ||||
1227 | rc = bt_cmp(bt, key, &nodekey, &mp->prefix); | ||||
1228 | |||||
1229 | if (IS_LEAF(mp)((((mp)->page->flags) & (0x02)) == (0x02))) | ||||
1230 | DPRINTF("found leaf index %u [%.*s], rc = %d", | ||||
1231 | i, (int)nodekey.size, (char *)nodekey.data, rc); | ||||
1232 | else | ||||
1233 | DPRINTF("found branch index %u [%.*s -> %u], rc = %d", | ||||
1234 | i, (int)node->ksize, (char *)NODEKEY(node), | ||||
1235 | node->n_pgno, rc); | ||||
1236 | |||||
1237 | if (rc == 0) | ||||
1238 | break; | ||||
1239 | if (rc > 0) | ||||
1240 | low = i + 1; | ||||
1241 | else | ||||
1242 | high = i - 1; | ||||
1243 | } | ||||
1244 | |||||
1245 | if (rc > 0) { /* Found entry is less than the key. */ | ||||
1246 | i++; /* Skip to get the smallest entry larger than key. */ | ||||
1247 | if (i >= NUMKEYS(mp)(((mp)->page->b.fb.fb_lower - __builtin_offsetof(struct page, ptrs)) >> 1)) | ||||
1248 | /* There is no entry larger or equal to the key. */ | ||||
1249 | return NULL((void*)0); | ||||
1250 | } | ||||
1251 | if (exactp) | ||||
1252 | *exactp = (rc == 0); | ||||
1253 | if (kip) /* Store the key index if requested. */ | ||||
1254 | *kip = i; | ||||
1255 | |||||
1256 | return NODEPTR(mp, i)((struct node *)((char *)((mp)->page) + ((mp)->page)-> ptrs[i])); | ||||
1257 | } | ||||
1258 | |||||
1259 | static void | ||||
1260 | cursor_pop_page(struct cursor *cursor) | ||||
1261 | { | ||||
1262 | struct ppage *top; | ||||
1263 | |||||
1264 | top = CURSOR_TOP(cursor)((&(cursor)->stack)->slh_first); | ||||
1265 | CURSOR_POP(cursor)do { (&(cursor)->stack)->slh_first = (&(cursor) ->stack)->slh_first->entry.sle_next; } while (0); | ||||
1266 | top->mpage->ref--; | ||||
1267 | |||||
1268 | DPRINTF("popped page %u off cursor %p", top->mpage->pgno, cursor); | ||||
1269 | |||||
1270 | free(top); | ||||
1271 | } | ||||
1272 | |||||
1273 | static struct ppage * | ||||
1274 | cursor_push_page(struct cursor *cursor, struct mpage *mp) | ||||
1275 | { | ||||
1276 | struct ppage *ppage; | ||||
1277 | |||||
1278 | DPRINTF("pushing page %u on cursor %p", mp->pgno, cursor); | ||||
1279 | |||||
1280 | if ((ppage = calloc(1, sizeof(*ppage))) == NULL((void*)0)) | ||||
1281 | return NULL((void*)0); | ||||
1282 | ppage->mpage = mp; | ||||
1283 | mp->ref++; | ||||
1284 | CURSOR_PUSH(cursor, ppage)do { (ppage)->entry.sle_next = (&(cursor)->stack)-> slh_first; (&(cursor)->stack)->slh_first = (ppage); } while (0); | ||||
1285 | return ppage; | ||||
1286 | } | ||||
1287 | |||||
1288 | static struct mpage * | ||||
1289 | btree_get_mpage(struct btree *bt, pgno_t pgno) | ||||
1290 | { | ||||
1291 | struct mpage *mp; | ||||
1292 | |||||
1293 | mp = mpage_lookup(bt, pgno); | ||||
1294 | if (mp == NULL((void*)0)) { | ||||
1295 | if ((mp = calloc(1, sizeof(*mp))) == NULL((void*)0)) | ||||
1296 | return NULL((void*)0); | ||||
1297 | if ((mp->page = malloc(bt->head.psize)) == NULL((void*)0)) { | ||||
1298 | free(mp); | ||||
1299 | return NULL((void*)0); | ||||
1300 | } | ||||
1301 | if (btree_read_page(bt, pgno, mp->page) != BT_SUCCESS0) { | ||||
1302 | mpage_free(mp); | ||||
1303 | return NULL((void*)0); | ||||
1304 | } | ||||
1305 | mp->pgno = pgno; | ||||
1306 | mpage_add(bt, mp); | ||||
1307 | } else | ||||
1308 | DPRINTF("returning page %u from cache", pgno); | ||||
1309 | |||||
1310 | return mp; | ||||
1311 | } | ||||
1312 | |||||
1313 | static void | ||||
1314 | concat_prefix(struct btree *bt, char *s1, size_t n1, char *s2, size_t n2, | ||||
1315 | char *cs, size_t *cn) | ||||
1316 | { | ||||
1317 | assert(*cn >= n1 + n2)((*cn >= n1 + n2) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 1317, __func__, "*cn >= n1 + n2")); | ||||
1318 | if (F_ISSET(bt->flags, BT_REVERSEKEY)(((bt->flags) & (0x08)) == (0x08))) { | ||||
1319 | bcopy(s2, cs, n2); | ||||
1320 | bcopy(s1, cs + n2, n1); | ||||
1321 | } else { | ||||
1322 | bcopy(s1, cs, n1); | ||||
1323 | bcopy(s2, cs + n1, n2); | ||||
1324 | } | ||||
1325 | *cn = n1 + n2; | ||||
1326 | } | ||||
1327 | |||||
1328 | static void | ||||
1329 | find_common_prefix(struct btree *bt, struct mpage *mp) | ||||
1330 | { | ||||
1331 | indx_t lbound = 0, ubound = 0; | ||||
1332 | struct mpage *lp, *up; | ||||
1333 | struct btkey lprefix, uprefix; | ||||
1334 | |||||
1335 | mp->prefix.len = 0; | ||||
1336 | if (bt->cmp != NULL((void*)0)) | ||||
1337 | return; | ||||
1338 | |||||
1339 | lp = mp; | ||||
1340 | while (lp->parent != NULL((void*)0)) { | ||||
1341 | if (lp->parent_index > 0) { | ||||
1342 | lbound = lp->parent_index; | ||||
1343 | break; | ||||
1344 | } | ||||
1345 | lp = lp->parent; | ||||
1346 | } | ||||
1347 | |||||
1348 | up = mp; | ||||
1349 | while (up->parent != NULL((void*)0)) { | ||||
1350 | if (up->parent_index + 1 < (indx_t)NUMKEYS(up->parent)(((up->parent)->page->b.fb.fb_lower - __builtin_offsetof (struct page, ptrs)) >> 1)) { | ||||
1351 | ubound = up->parent_index + 1; | ||||
1352 | break; | ||||
1353 | } | ||||
1354 | up = up->parent; | ||||
1355 | } | ||||
1356 | |||||
1357 | if (lp->parent != NULL((void*)0) && up->parent != NULL((void*)0)) { | ||||
1358 | expand_prefix(bt, lp->parent, lbound, &lprefix); | ||||
1359 | expand_prefix(bt, up->parent, ubound, &uprefix); | ||||
1360 | common_prefix(bt, &lprefix, &uprefix, &mp->prefix); | ||||
1361 | } | ||||
1362 | else if (mp->parent) | ||||
1363 | bcopy(&mp->parent->prefix, &mp->prefix, sizeof(mp->prefix)); | ||||
1364 | |||||
1365 | DPRINTF("found common prefix [%.*s] (len %zu) for page %u", | ||||
1366 | (int)mp->prefix.len, mp->prefix.str, mp->prefix.len, mp->pgno); | ||||
1367 | } | ||||
1368 | |||||
1369 | static int | ||||
1370 | btree_search_page_root(struct btree *bt, struct mpage *root, struct btval *key, | ||||
1371 | struct cursor *cursor, int modify, struct mpage **mpp) | ||||
1372 | { | ||||
1373 | struct mpage *mp, *parent; | ||||
1374 | |||||
1375 | if (cursor && cursor_push_page(cursor, root) == NULL((void*)0)) | ||||
1376 | return BT_FAIL-1; | ||||
1377 | |||||
1378 | mp = root; | ||||
1379 | while (IS_BRANCH(mp)((((mp)->page->flags) & (0x01)) == (0x01))) { | ||||
1380 | unsigned int i = 0; | ||||
1381 | struct node *node; | ||||
1382 | |||||
1383 | DPRINTF("branch page %u has %lu keys", mp->pgno, NUMKEYS(mp)); | ||||
1384 | assert(NUMKEYS(mp) > 1)(((((mp)->page->b.fb.fb_lower - __builtin_offsetof(struct page, ptrs)) >> 1) > 1) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 1384, __func__, "NUMKEYS(mp) > 1")); | ||||
1385 | DPRINTF("found index 0 to page %u", NODEPGNO(NODEPTR(mp, 0))); | ||||
1386 | |||||
1387 | if (key == NULL((void*)0)) /* Initialize cursor to first page. */ | ||||
1388 | i = 0; | ||||
1389 | else { | ||||
1390 | int exact; | ||||
1391 | node = btree_search_node(bt, mp, key, &exact, &i); | ||||
1392 | if (node == NULL((void*)0)) | ||||
1393 | i = NUMKEYS(mp)(((mp)->page->b.fb.fb_lower - __builtin_offsetof(struct page, ptrs)) >> 1) - 1; | ||||
1394 | else if (!exact) { | ||||
1395 | assert(i > 0)((i > 0) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 1395, __func__, "i > 0")); | ||||
1396 | i--; | ||||
1397 | } | ||||
1398 | } | ||||
1399 | |||||
1400 | if (key) | ||||
1401 | DPRINTF("following index %u for key %.*s", | ||||
1402 | i, (int)key->size, (char *)key->data); | ||||
1403 | assert(i >= 0 && i < NUMKEYS(mp))((i >= 0 && i < (((mp)->page->b.fb.fb_lower - __builtin_offsetof(struct page, ptrs)) >> 1)) ? (void )0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c", 1403, __func__ , "i >= 0 && i < NUMKEYS(mp)")); | ||||
1404 | node = NODEPTR(mp, i)((struct node *)((char *)((mp)->page) + ((mp)->page)-> ptrs[i])); | ||||
1405 | |||||
1406 | if (cursor) | ||||
1407 | CURSOR_TOP(cursor)((&(cursor)->stack)->slh_first)->ki = i; | ||||
1408 | |||||
1409 | parent = mp; | ||||
1410 | if ((mp = btree_get_mpage(bt, NODEPGNO(node)((node)->p.np_pgno))) == NULL((void*)0)) | ||||
1411 | return BT_FAIL-1; | ||||
1412 | mp->parent = parent; | ||||
1413 | mp->parent_index = i; | ||||
1414 | find_common_prefix(bt, mp); | ||||
1415 | |||||
1416 | if (cursor && cursor_push_page(cursor, mp) == NULL((void*)0)) | ||||
1417 | return BT_FAIL-1; | ||||
1418 | |||||
1419 | if (modify && (mp = mpage_touch(bt, mp)) == NULL((void*)0)) | ||||
1420 | return BT_FAIL-1; | ||||
1421 | } | ||||
1422 | |||||
1423 | if (!IS_LEAF(mp)((((mp)->page->flags) & (0x02)) == (0x02))) { | ||||
1424 | DPRINTF("internal error, index points to a %02X page!?", | ||||
1425 | mp->page->flags); | ||||
1426 | return BT_FAIL-1; | ||||
1427 | } | ||||
1428 | |||||
1429 | DPRINTF("found leaf page %u for key %.*s", mp->pgno, | ||||
1430 | key ? (int)key->size : 0, key ? (char *)key->data : NULL); | ||||
1431 | |||||
1432 | *mpp = mp; | ||||
1433 | return BT_SUCCESS0; | ||||
1434 | } | ||||
1435 | |||||
1436 | /* Search for the page a given key should be in. | ||||
1437 | * Stores a pointer to the found page in *mpp. | ||||
1438 | * If key is NULL, search for the lowest page (used by btree_cursor_first). | ||||
1439 | * If cursor is non-null, pushes parent pages on the cursor stack. | ||||
1440 | * If modify is true, visited pages are updated with new page numbers. | ||||
1441 | */ | ||||
1442 | static int | ||||
1443 | btree_search_page(struct btree *bt, struct btree_txn *txn, struct btval *key, | ||||
1444 | struct cursor *cursor, int modify, struct mpage **mpp) | ||||
1445 | { | ||||
1446 | int rc; | ||||
1447 | pgno_t root; | ||||
1448 | struct mpage *mp; | ||||
1449 | |||||
1450 | /* Can't modify pages outside a transaction. */ | ||||
1451 | if (txn == NULL((void*)0) && modify) { | ||||
1452 | errno(*__errno()) = EINVAL22; | ||||
1453 | return BT_FAIL-1; | ||||
1454 | } | ||||
1455 | |||||
1456 | /* Choose which root page to start with. If a transaction is given | ||||
1457 | * use the root page from the transaction, otherwise read the last | ||||
1458 | * committed root page. | ||||
1459 | */ | ||||
1460 | if (txn == NULL((void*)0)) { | ||||
1461 | if ((rc = btree_read_meta(bt, NULL((void*)0))) != BT_SUCCESS0) | ||||
1462 | return rc; | ||||
1463 | root = bt->meta.root; | ||||
1464 | } else if (F_ISSET(txn->flags, BT_TXN_ERROR)(((txn->flags) & (0x02)) == (0x02))) { | ||||
1465 | DPRINTF("transaction has failed, must abort"); | ||||
1466 | errno(*__errno()) = EINVAL22; | ||||
1467 | return BT_FAIL-1; | ||||
1468 | } else | ||||
1469 | root = txn->root; | ||||
1470 | |||||
1471 | if (root == P_INVALID0xFFFFFFFF) { /* Tree is empty. */ | ||||
1472 | DPRINTF("tree is empty"); | ||||
1473 | errno(*__errno()) = ENOENT2; | ||||
1474 | return BT_FAIL-1; | ||||
1475 | } | ||||
1476 | |||||
1477 | if ((mp = btree_get_mpage(bt, root)) == NULL((void*)0)) | ||||
1478 | return BT_FAIL-1; | ||||
1479 | |||||
1480 | DPRINTF("root page has flags 0x%X", mp->page->flags); | ||||
1481 | |||||
1482 | assert(mp->parent == NULL)((mp->parent == ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 1482, __func__, "mp->parent == NULL")); | ||||
1483 | assert(mp->prefix.len == 0)((mp->prefix.len == 0) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 1483, __func__, "mp->prefix.len == 0")); | ||||
1484 | |||||
1485 | if (modify && !mp->dirty) { | ||||
1486 | if ((mp = mpage_touch(bt, mp)) == NULL((void*)0)) | ||||
1487 | return BT_FAIL-1; | ||||
1488 | txn->root = mp->pgno; | ||||
1489 | } | ||||
1490 | |||||
1491 | return btree_search_page_root(bt, mp, key, cursor, modify, mpp); | ||||
1492 | } | ||||
1493 | |||||
1494 | static int | ||||
1495 | btree_read_data(struct btree *bt, struct mpage *mp, struct node *leaf, | ||||
1496 | struct btval *data) | ||||
1497 | { | ||||
1498 | struct mpage *omp; /* overflow mpage */ | ||||
1499 | size_t psz; | ||||
1500 | size_t max; | ||||
1501 | size_t sz = 0; | ||||
1502 | pgno_t pgno; | ||||
1503 | |||||
1504 | memset(data, 0, sizeof(*data)); | ||||
1505 | max = bt->head.psize - PAGEHDRSZ__builtin_offsetof(struct page, ptrs); | ||||
1506 | |||||
1507 | if (!F_ISSET(leaf->flags, F_BIGDATA)(((leaf->flags) & (0x01)) == (0x01))) { | ||||
1508 | data->size = leaf->n_dsizep.np_dsize; | ||||
1509 | if (data->size > 0) { | ||||
1510 | if (mp == NULL((void*)0)) { | ||||
1511 | if ((data->data = malloc(data->size)) == NULL((void*)0)) | ||||
1512 | return BT_FAIL-1; | ||||
1513 | bcopy(NODEDATA(leaf)(void *)((char *)(leaf)->data + (leaf)->ksize), data->data, data->size); | ||||
1514 | data->free_data = 1; | ||||
1515 | data->mp = NULL((void*)0); | ||||
1516 | } else { | ||||
1517 | data->data = NODEDATA(leaf)(void *)((char *)(leaf)->data + (leaf)->ksize); | ||||
1518 | data->free_data = 0; | ||||
1519 | data->mp = mp; | ||||
1520 | mp->ref++; | ||||
1521 | } | ||||
1522 | } | ||||
1523 | return BT_SUCCESS0; | ||||
1524 | } | ||||
1525 | |||||
1526 | /* Read overflow data. | ||||
1527 | */ | ||||
1528 | DPRINTF("allocating %u byte for overflow data", leaf->n_dsize); | ||||
1529 | if ((data->data = malloc(leaf->n_dsizep.np_dsize)) == NULL((void*)0)) | ||||
1530 | return BT_FAIL-1; | ||||
1531 | data->size = leaf->n_dsizep.np_dsize; | ||||
1532 | data->free_data = 1; | ||||
1533 | data->mp = NULL((void*)0); | ||||
1534 | bcopy(NODEDATA(leaf)(void *)((char *)(leaf)->data + (leaf)->ksize), &pgno, sizeof(pgno)); | ||||
1535 | for (sz = 0; sz < data->size; ) { | ||||
1536 | if ((omp = btree_get_mpage(bt, pgno)) == NULL((void*)0) || | ||||
1537 | !F_ISSET(omp->page->flags, P_OVERFLOW)(((omp->page->flags) & (0x04)) == (0x04))) { | ||||
1538 | DPRINTF("read overflow page %u failed", pgno); | ||||
1539 | free(data->data); | ||||
1540 | mpage_free(omp); | ||||
1541 | return BT_FAIL-1; | ||||
1542 | } | ||||
1543 | psz = data->size - sz; | ||||
1544 | if (psz > max) | ||||
1545 | psz = max; | ||||
1546 | bcopy(omp->page->ptrs, (char *)data->data + sz, psz); | ||||
1547 | sz += psz; | ||||
1548 | pgno = omp->page->p_next_pgnob.pb_next_pgno; | ||||
1549 | } | ||||
1550 | |||||
1551 | return BT_SUCCESS0; | ||||
1552 | } | ||||
1553 | |||||
1554 | int | ||||
1555 | btree_txn_get(struct btree *bt, struct btree_txn *txn, | ||||
1556 | struct btval *key, struct btval *data) | ||||
1557 | { | ||||
1558 | int rc, exact; | ||||
1559 | struct node *leaf; | ||||
1560 | struct mpage *mp; | ||||
1561 | |||||
1562 | assert(key)((key) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 1562, __func__, "key")); | ||||
1563 | assert(data)((data) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 1563, __func__, "data")); | ||||
1564 | DPRINTF("===> get key [%.*s]", (int)key->size, (char *)key->data); | ||||
1565 | |||||
1566 | if (bt != NULL((void*)0) && txn != NULL((void*)0) && bt != txn->bt) { | ||||
1567 | errno(*__errno()) = EINVAL22; | ||||
1568 | return BT_FAIL-1; | ||||
1569 | } | ||||
1570 | |||||
1571 | if (bt == NULL((void*)0)) { | ||||
1572 | if (txn == NULL((void*)0)) { | ||||
1573 | errno(*__errno()) = EINVAL22; | ||||
1574 | return BT_FAIL-1; | ||||
1575 | } | ||||
1576 | bt = txn->bt; | ||||
1577 | } | ||||
1578 | |||||
1579 | if (key->size == 0 || key->size > MAXKEYSIZE255) { | ||||
1580 | errno(*__errno()) = EINVAL22; | ||||
1581 | return BT_FAIL-1; | ||||
1582 | } | ||||
1583 | |||||
1584 | if ((rc = btree_search_page(bt, txn, key, NULL((void*)0), 0, &mp)) != BT_SUCCESS0) | ||||
1585 | return rc; | ||||
1586 | |||||
1587 | leaf = btree_search_node(bt, mp, key, &exact, NULL((void*)0)); | ||||
1588 | if (leaf && exact) | ||||
1589 | rc = btree_read_data(bt, mp, leaf, data); | ||||
1590 | else { | ||||
1591 | errno(*__errno()) = ENOENT2; | ||||
1592 | rc = BT_FAIL-1; | ||||
1593 | } | ||||
1594 | |||||
1595 | mpage_prune(bt); | ||||
1596 | return rc; | ||||
1597 | } | ||||
1598 | |||||
1599 | static int | ||||
1600 | btree_sibling(struct cursor *cursor, int move_right) | ||||
1601 | { | ||||
1602 | int rc; | ||||
1603 | struct node *indx; | ||||
1604 | struct ppage *parent, *top; | ||||
1605 | struct mpage *mp; | ||||
1606 | |||||
1607 | top = CURSOR_TOP(cursor)((&(cursor)->stack)->slh_first); | ||||
1608 | if ((parent = SLIST_NEXT(top, entry)((top)->entry.sle_next)) == NULL((void*)0)) { | ||||
1609 | errno(*__errno()) = ENOENT2; | ||||
1610 | return BT_FAIL-1; /* root has no siblings */ | ||||
1611 | } | ||||
1612 | |||||
1613 | DPRINTF("parent page is page %u, index %u", | ||||
1614 | parent->mpage->pgno, parent->ki); | ||||
1615 | |||||
1616 | cursor_pop_page(cursor); | ||||
1617 | if (move_right ? (parent->ki + 1 >= NUMKEYS(parent->mpage)(((parent->mpage)->page->b.fb.fb_lower - __builtin_offsetof (struct page, ptrs)) >> 1)) | ||||
1618 | : (parent->ki == 0)) { | ||||
1619 | DPRINTF("no more keys left, moving to %s sibling", | ||||
1620 | move_right ? "right" : "left"); | ||||
1621 | if ((rc = btree_sibling(cursor, move_right)) != BT_SUCCESS0) | ||||
1622 | return rc; | ||||
1623 | parent = CURSOR_TOP(cursor)((&(cursor)->stack)->slh_first); | ||||
1624 | } else { | ||||
1625 | if (move_right) | ||||
1626 | parent->ki++; | ||||
1627 | else | ||||
1628 | parent->ki--; | ||||
1629 | DPRINTF("just moving to %s index key %u", | ||||
1630 | move_right ? "right" : "left", parent->ki); | ||||
1631 | } | ||||
1632 | assert(IS_BRANCH(parent->mpage))((((((parent->mpage)->page->flags) & (0x01)) == ( 0x01))) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 1632, __func__, "IS_BRANCH(parent->mpage)")); | ||||
1633 | |||||
1634 | indx = NODEPTR(parent->mpage, parent->ki)((struct node *)((char *)((parent->mpage)->page) + ((parent ->mpage)->page)->ptrs[parent->ki])); | ||||
1635 | if ((mp = btree_get_mpage(cursor->bt, indx->n_pgnop.np_pgno)) == NULL((void*)0)) | ||||
1636 | return BT_FAIL-1; | ||||
1637 | mp->parent = parent->mpage; | ||||
1638 | mp->parent_index = parent->ki; | ||||
1639 | |||||
1640 | cursor_push_page(cursor, mp); | ||||
1641 | find_common_prefix(cursor->bt, mp); | ||||
1642 | |||||
1643 | return BT_SUCCESS0; | ||||
1644 | } | ||||
1645 | |||||
1646 | static int | ||||
1647 | bt_set_key(struct btree *bt, struct mpage *mp, struct node *node, | ||||
1648 | struct btval *key) | ||||
1649 | { | ||||
1650 | if (key == NULL((void*)0)) | ||||
1651 | return 0; | ||||
1652 | |||||
1653 | if (mp->prefix.len > 0) { | ||||
1654 | key->size = node->ksize + mp->prefix.len; | ||||
1655 | key->data = malloc(key->size); | ||||
1656 | if (key->data == NULL((void*)0)) | ||||
1657 | return -1; | ||||
1658 | concat_prefix(bt, | ||||
1659 | mp->prefix.str, mp->prefix.len, | ||||
1660 | NODEKEY(node)(void *)((node)->data), node->ksize, | ||||
1661 | key->data, &key->size); | ||||
1662 | key->free_data = 1; | ||||
1663 | } else { | ||||
1664 | key->size = node->ksize; | ||||
1665 | key->data = NODEKEY(node)(void *)((node)->data); | ||||
1666 | key->free_data = 0; | ||||
1667 | key->mp = mp; | ||||
1668 | mp->ref++; | ||||
1669 | } | ||||
1670 | |||||
1671 | return 0; | ||||
1672 | } | ||||
1673 | |||||
1674 | static int | ||||
1675 | btree_cursor_next(struct cursor *cursor, struct btval *key, struct btval *data) | ||||
1676 | { | ||||
1677 | struct ppage *top; | ||||
1678 | struct mpage *mp; | ||||
1679 | struct node *leaf; | ||||
1680 | |||||
1681 | if (cursor->eof) { | ||||
1682 | errno(*__errno()) = ENOENT2; | ||||
1683 | return BT_FAIL-1; | ||||
1684 | } | ||||
1685 | |||||
1686 | assert(cursor->initialized)((cursor->initialized) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 1686, __func__, "cursor->initialized")); | ||||
1687 | |||||
1688 | top = CURSOR_TOP(cursor)((&(cursor)->stack)->slh_first); | ||||
1689 | mp = top->mpage; | ||||
1690 | |||||
1691 | DPRINTF("cursor_next: top page is %u in cursor %p", mp->pgno, cursor); | ||||
1692 | |||||
1693 | if (top->ki + 1 >= NUMKEYS(mp)(((mp)->page->b.fb.fb_lower - __builtin_offsetof(struct page, ptrs)) >> 1)) { | ||||
1694 | DPRINTF("=====> move to next sibling page"); | ||||
1695 | if (btree_sibling(cursor, 1) != BT_SUCCESS0) { | ||||
1696 | cursor->eof = 1; | ||||
1697 | return BT_FAIL-1; | ||||
1698 | } | ||||
1699 | top = CURSOR_TOP(cursor)((&(cursor)->stack)->slh_first); | ||||
1700 | mp = top->mpage; | ||||
1701 | DPRINTF("next page is %u, key index %u", mp->pgno, top->ki); | ||||
1702 | } else | ||||
1703 | top->ki++; | ||||
1704 | |||||
1705 | DPRINTF("==> cursor points to page %u with %lu keys, key index %u", | ||||
1706 | mp->pgno, NUMKEYS(mp), top->ki); | ||||
1707 | |||||
1708 | assert(IS_LEAF(mp))((((((mp)->page->flags) & (0x02)) == (0x02))) ? (void )0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c", 1708, __func__ , "IS_LEAF(mp)")); | ||||
1709 | leaf = NODEPTR(mp, top->ki)((struct node *)((char *)((mp)->page) + ((mp)->page)-> ptrs[top->ki])); | ||||
1710 | |||||
1711 | if (data && btree_read_data(cursor->bt, mp, leaf, data) != BT_SUCCESS0) | ||||
1712 | return BT_FAIL-1; | ||||
1713 | |||||
1714 | if (bt_set_key(cursor->bt, mp, leaf, key) != 0) | ||||
1715 | return BT_FAIL-1; | ||||
1716 | |||||
1717 | return BT_SUCCESS0; | ||||
1718 | } | ||||
1719 | |||||
1720 | static int | ||||
1721 | btree_cursor_set(struct cursor *cursor, struct btval *key, struct btval *data, | ||||
1722 | int *exactp) | ||||
1723 | { | ||||
1724 | int rc; | ||||
1725 | struct node *leaf; | ||||
1726 | struct mpage *mp; | ||||
1727 | struct ppage *top; | ||||
1728 | |||||
1729 | assert(cursor)((cursor) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 1729, __func__, "cursor")); | ||||
1730 | assert(key)((key) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 1730, __func__, "key")); | ||||
1731 | assert(key->size > 0)((key->size > 0) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 1731, __func__, "key->size > 0")); | ||||
1732 | |||||
1733 | rc = btree_search_page(cursor->bt, cursor->txn, key, cursor, 0, &mp); | ||||
1734 | if (rc != BT_SUCCESS0) | ||||
1735 | return rc; | ||||
1736 | assert(IS_LEAF(mp))((((((mp)->page->flags) & (0x02)) == (0x02))) ? (void )0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c", 1736, __func__ , "IS_LEAF(mp)")); | ||||
1737 | |||||
1738 | top = CURSOR_TOP(cursor)((&(cursor)->stack)->slh_first); | ||||
1739 | leaf = btree_search_node(cursor->bt, mp, key, exactp, &top->ki); | ||||
1740 | if (exactp != NULL((void*)0) && !*exactp) { | ||||
1741 | /* BT_CURSOR_EXACT specified and not an exact match. */ | ||||
1742 | errno(*__errno()) = ENOENT2; | ||||
1743 | return BT_FAIL-1; | ||||
1744 | } | ||||
1745 | |||||
1746 | if (leaf == NULL((void*)0)) { | ||||
1747 | DPRINTF("===> inexact leaf not found, goto sibling"); | ||||
1748 | if (btree_sibling(cursor, 1) != BT_SUCCESS0) | ||||
1749 | return BT_FAIL-1; /* no entries matched */ | ||||
1750 | top = CURSOR_TOP(cursor)((&(cursor)->stack)->slh_first); | ||||
1751 | top->ki = 0; | ||||
1752 | mp = top->mpage; | ||||
1753 | assert(IS_LEAF(mp))((((((mp)->page->flags) & (0x02)) == (0x02))) ? (void )0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c", 1753, __func__ , "IS_LEAF(mp)")); | ||||
1754 | leaf = NODEPTR(mp, 0)((struct node *)((char *)((mp)->page) + ((mp)->page)-> ptrs[0])); | ||||
1755 | } | ||||
1756 | |||||
1757 | cursor->initialized = 1; | ||||
1758 | cursor->eof = 0; | ||||
1759 | |||||
1760 | if (data && btree_read_data(cursor->bt, mp, leaf, data) != BT_SUCCESS0) | ||||
1761 | return BT_FAIL-1; | ||||
1762 | |||||
1763 | if (bt_set_key(cursor->bt, mp, leaf, key) != 0) | ||||
1764 | return BT_FAIL-1; | ||||
1765 | DPRINTF("==> cursor placed on key %.*s", | ||||
1766 | (int)key->size, (char *)key->data); | ||||
1767 | |||||
1768 | return BT_SUCCESS0; | ||||
1769 | } | ||||
1770 | |||||
1771 | static int | ||||
1772 | btree_cursor_first(struct cursor *cursor, struct btval *key, struct btval *data) | ||||
1773 | { | ||||
1774 | int rc; | ||||
1775 | struct mpage *mp; | ||||
1776 | struct node *leaf; | ||||
1777 | |||||
1778 | rc = btree_search_page(cursor->bt, cursor->txn, NULL((void*)0), cursor, 0, &mp); | ||||
1779 | if (rc != BT_SUCCESS0) | ||||
1780 | return rc; | ||||
1781 | assert(IS_LEAF(mp))((((((mp)->page->flags) & (0x02)) == (0x02))) ? (void )0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c", 1781, __func__ , "IS_LEAF(mp)")); | ||||
1782 | |||||
1783 | leaf = NODEPTR(mp, 0)((struct node *)((char *)((mp)->page) + ((mp)->page)-> ptrs[0])); | ||||
1784 | cursor->initialized = 1; | ||||
1785 | cursor->eof = 0; | ||||
1786 | |||||
1787 | if (data && btree_read_data(cursor->bt, mp, leaf, data) != BT_SUCCESS0) | ||||
1788 | return BT_FAIL-1; | ||||
1789 | |||||
1790 | if (bt_set_key(cursor->bt, mp, leaf, key) != 0) | ||||
1791 | return BT_FAIL-1; | ||||
1792 | |||||
1793 | return BT_SUCCESS0; | ||||
1794 | } | ||||
1795 | |||||
1796 | int | ||||
1797 | btree_cursor_get(struct cursor *cursor, struct btval *key, struct btval *data, | ||||
1798 | enum cursor_op op) | ||||
1799 | { | ||||
1800 | int rc; | ||||
1801 | int exact = 0; | ||||
1802 | |||||
1803 | assert(cursor)((cursor) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 1803, __func__, "cursor")); | ||||
1804 | |||||
1805 | switch (op) { | ||||
1806 | case BT_CURSOR: | ||||
1807 | case BT_CURSOR_EXACT: | ||||
1808 | while (CURSOR_TOP(cursor)((&(cursor)->stack)->slh_first) != NULL((void*)0)) | ||||
1809 | cursor_pop_page(cursor); | ||||
1810 | if (key == NULL((void*)0) || key->size == 0 || key->size > MAXKEYSIZE255) { | ||||
1811 | errno(*__errno()) = EINVAL22; | ||||
1812 | rc = BT_FAIL-1; | ||||
1813 | } else if (op == BT_CURSOR_EXACT) | ||||
1814 | rc = btree_cursor_set(cursor, key, data, &exact); | ||||
1815 | else | ||||
1816 | rc = btree_cursor_set(cursor, key, data, NULL((void*)0)); | ||||
1817 | break; | ||||
1818 | case BT_NEXT: | ||||
1819 | if (!cursor->initialized) | ||||
1820 | rc = btree_cursor_first(cursor, key, data); | ||||
1821 | else | ||||
1822 | rc = btree_cursor_next(cursor, key, data); | ||||
1823 | break; | ||||
1824 | case BT_FIRST: | ||||
1825 | while (CURSOR_TOP(cursor)((&(cursor)->stack)->slh_first) != NULL((void*)0)) | ||||
1826 | cursor_pop_page(cursor); | ||||
1827 | rc = btree_cursor_first(cursor, key, data); | ||||
1828 | break; | ||||
1829 | default: | ||||
1830 | DPRINTF("unhandled/unimplemented cursor operation %u", op); | ||||
1831 | rc = BT_FAIL-1; | ||||
1832 | break; | ||||
1833 | } | ||||
1834 | |||||
1835 | mpage_prune(cursor->bt); | ||||
1836 | |||||
1837 | return rc; | ||||
1838 | } | ||||
1839 | |||||
1840 | static struct mpage * | ||||
1841 | btree_new_page(struct btree *bt, uint32_t flags) | ||||
1842 | { | ||||
1843 | struct mpage *mp; | ||||
1844 | |||||
1845 | assert(bt != NULL)((bt != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 1845, __func__, "bt != NULL")); | ||||
1846 | assert(bt->txn != NULL)((bt->txn != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 1846, __func__, "bt->txn != NULL")); | ||||
1847 | |||||
1848 | DPRINTF("allocating new mpage %u, page size %u", | ||||
1849 | bt->txn->next_pgno, bt->head.psize); | ||||
1850 | if ((mp = calloc(1, sizeof(*mp))) == NULL((void*)0)) | ||||
1851 | return NULL((void*)0); | ||||
1852 | if ((mp->page = malloc(bt->head.psize)) == NULL((void*)0)) { | ||||
1853 | free(mp); | ||||
1854 | return NULL((void*)0); | ||||
1855 | } | ||||
1856 | mp->pgno = mp->page->pgno = bt->txn->next_pgno++; | ||||
1857 | mp->page->flags = flags; | ||||
1858 | mp->page->lowerb.fb.fb_lower = PAGEHDRSZ__builtin_offsetof(struct page, ptrs); | ||||
1859 | mp->page->upperb.fb.fb_upper = bt->head.psize; | ||||
1860 | |||||
1861 | if (IS_BRANCH(mp)((((mp)->page->flags) & (0x01)) == (0x01))) | ||||
1862 | bt->meta.branch_pages++; | ||||
1863 | else if (IS_LEAF(mp)((((mp)->page->flags) & (0x02)) == (0x02))) | ||||
1864 | bt->meta.leaf_pages++; | ||||
1865 | else if (IS_OVERFLOW(mp)((((mp)->page->flags) & (0x04)) == (0x04))) | ||||
1866 | bt->meta.overflow_pages++; | ||||
1867 | |||||
1868 | mpage_add(bt, mp); | ||||
1869 | mpage_dirty(bt, mp); | ||||
1870 | |||||
1871 | return mp; | ||||
1872 | } | ||||
1873 | |||||
1874 | static size_t | ||||
1875 | bt_leaf_size(struct btree *bt, struct btval *key, struct btval *data) | ||||
1876 | { | ||||
1877 | size_t sz; | ||||
1878 | |||||
1879 | sz = LEAFSIZE(key, data)(__builtin_offsetof(struct node, data) + (key)->size + (data )->size); | ||||
1880 | if (data->size >= bt->head.psize / BT_MINKEYS4) { | ||||
1881 | /* put on overflow page */ | ||||
1882 | sz -= data->size - sizeof(pgno_t); | ||||
1883 | } | ||||
1884 | |||||
1885 | return sz + sizeof(indx_t); | ||||
1886 | } | ||||
1887 | |||||
1888 | static size_t | ||||
1889 | bt_branch_size(struct btree *bt, struct btval *key) | ||||
1890 | { | ||||
1891 | size_t sz; | ||||
1892 | |||||
1893 | sz = INDXSIZE(key)(__builtin_offsetof(struct node, data) + ((key) == ((void*)0) ? 0 : (key)->size)); | ||||
1894 | if (sz >= bt->head.psize / BT_MINKEYS4) { | ||||
1895 | /* put on overflow page */ | ||||
1896 | /* not implemented */ | ||||
1897 | /* sz -= key->size - sizeof(pgno_t); */ | ||||
1898 | } | ||||
1899 | |||||
1900 | return sz + sizeof(indx_t); | ||||
1901 | } | ||||
1902 | |||||
1903 | static int | ||||
1904 | btree_write_overflow_data(struct btree *bt, struct page *p, struct btval *data) | ||||
1905 | { | ||||
1906 | size_t done = 0; | ||||
1907 | size_t sz; | ||||
1908 | size_t max; | ||||
1909 | pgno_t *linkp; /* linked page stored here */ | ||||
1910 | struct mpage *next = NULL((void*)0); | ||||
1911 | |||||
1912 | max = bt->head.psize - PAGEHDRSZ__builtin_offsetof(struct page, ptrs); | ||||
1913 | |||||
1914 | while (done < data->size) { | ||||
1915 | if (next != NULL((void*)0)) | ||||
1916 | p = next->page; | ||||
1917 | linkp = &p->p_next_pgnob.pb_next_pgno; | ||||
1918 | if (data->size - done > max) { | ||||
1919 | /* need another overflow page */ | ||||
1920 | if ((next = btree_new_page(bt, P_OVERFLOW0x04)) == NULL((void*)0)) | ||||
1921 | return BT_FAIL-1; | ||||
1922 | *linkp = next->pgno; | ||||
1923 | DPRINTF("linking overflow page %u", next->pgno); | ||||
1924 | } else | ||||
1925 | *linkp = 0; /* indicates end of list */ | ||||
1926 | sz = data->size - done; | ||||
1927 | if (sz > max) | ||||
1928 | sz = max; | ||||
1929 | DPRINTF("copying %zu bytes to overflow page %u", sz, p->pgno); | ||||
1930 | bcopy((char *)data->data + done, p->ptrs, sz); | ||||
1931 | done += sz; | ||||
1932 | } | ||||
1933 | |||||
1934 | return BT_SUCCESS0; | ||||
1935 | } | ||||
1936 | |||||
1937 | /* Key prefix should already be stripped. | ||||
1938 | */ | ||||
1939 | static int | ||||
1940 | btree_add_node(struct btree *bt, struct mpage *mp, indx_t indx, | ||||
1941 | struct btval *key, struct btval *data, pgno_t pgno, uint8_t flags) | ||||
1942 | { | ||||
1943 | unsigned int i; | ||||
1944 | size_t node_size = NODESIZE__builtin_offsetof(struct node, data); | ||||
1945 | indx_t ofs; | ||||
1946 | struct node *node; | ||||
1947 | struct page *p; | ||||
1948 | struct mpage *ofp = NULL((void*)0); /* overflow page */ | ||||
1949 | |||||
1950 | p = mp->page; | ||||
1951 | assert(p->upper >= p->lower)((p->b.fb.fb_upper >= p->b.fb.fb_lower) ? (void)0 : __assert2 ("/usr/src/usr.sbin/ldapd/btree.c", 1951, __func__, "p->upper >= p->lower" )); | ||||
1952 | |||||
1953 | DPRINTF("add node [%.*s] to %s page %u at index %d, key size %zu", | ||||
1954 | key ? (int)key->size : 0, key ? (char *)key->data : NULL, | ||||
1955 | IS_LEAF(mp) ? "leaf" : "branch", | ||||
1956 | mp->pgno, indx, key ? key->size : 0); | ||||
1957 | |||||
1958 | if (key
| ||||
1959 | node_size += key->size; | ||||
1960 | |||||
1961 | if (IS_LEAF(mp)((((mp)->page->flags) & (0x02)) == (0x02))) { | ||||
1962 | assert(data)((data) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 1962, __func__, "data")); | ||||
1963 | node_size += data->size; | ||||
| |||||
1964 | if (F_ISSET(flags, F_BIGDATA)(((flags) & (0x01)) == (0x01))) { | ||||
1965 | /* Data already on overflow page. */ | ||||
1966 | node_size -= data->size - sizeof(pgno_t); | ||||
1967 | } else if (data->size >= bt->head.psize / BT_MINKEYS4) { | ||||
1968 | /* Put data on overflow page. */ | ||||
1969 | DPRINTF("data size is %zu, put on overflow page", | ||||
1970 | data->size); | ||||
1971 | node_size -= data->size - sizeof(pgno_t); | ||||
1972 | if ((ofp = btree_new_page(bt, P_OVERFLOW0x04)) == NULL((void*)0)) | ||||
1973 | return BT_FAIL-1; | ||||
1974 | DPRINTF("allocated overflow page %u", ofp->pgno); | ||||
1975 | flags |= F_BIGDATA0x01; | ||||
1976 | } | ||||
1977 | } | ||||
1978 | |||||
1979 | if (node_size + sizeof(indx_t) > SIZELEFT(mp)(indx_t)((mp)->page->b.fb.fb_upper - (mp)->page-> b.fb.fb_lower)) { | ||||
1980 | DPRINTF("not enough room in page %u, got %lu ptrs", | ||||
1981 | mp->pgno, NUMKEYS(mp)); | ||||
1982 | DPRINTF("upper - lower = %u - %u = %u", p->upper, p->lower, | ||||
1983 | p->upper - p->lower); | ||||
1984 | DPRINTF("node size = %zu", node_size); | ||||
1985 | return BT_FAIL-1; | ||||
1986 | } | ||||
1987 | |||||
1988 | /* Move higher pointers up one slot. */ | ||||
1989 | for (i = NUMKEYS(mp)(((mp)->page->b.fb.fb_lower - __builtin_offsetof(struct page, ptrs)) >> 1); i > indx; i--) | ||||
1990 | p->ptrs[i] = p->ptrs[i - 1]; | ||||
1991 | |||||
1992 | /* Adjust free space offsets. */ | ||||
1993 | ofs = p->upperb.fb.fb_upper - node_size; | ||||
1994 | assert(ofs >= p->lower + sizeof(indx_t))((ofs >= p->b.fb.fb_lower + sizeof(indx_t)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c", 1994, __func__, "ofs >= p->lower + sizeof(indx_t)")); | ||||
1995 | p->ptrs[indx] = ofs; | ||||
1996 | p->upperb.fb.fb_upper = ofs; | ||||
1997 | p->lowerb.fb.fb_lower += sizeof(indx_t); | ||||
1998 | |||||
1999 | /* Write the node data. */ | ||||
2000 | node = NODEPTR(mp, indx)((struct node *)((char *)((mp)->page) + ((mp)->page)-> ptrs[indx])); | ||||
2001 | node->ksize = (key == NULL((void*)0)) ? 0 : key->size; | ||||
2002 | node->flags = flags; | ||||
2003 | if (IS_LEAF(mp)((((mp)->page->flags) & (0x02)) == (0x02))) | ||||
2004 | node->n_dsizep.np_dsize = data->size; | ||||
2005 | else | ||||
2006 | node->n_pgnop.np_pgno = pgno; | ||||
2007 | |||||
2008 | if (key) | ||||
2009 | bcopy(key->data, NODEKEY(node)(void *)((node)->data), key->size); | ||||
2010 | |||||
2011 | if (IS_LEAF(mp)((((mp)->page->flags) & (0x02)) == (0x02))) { | ||||
2012 | assert(key)((key) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 2012, __func__, "key")); | ||||
2013 | if (ofp == NULL((void*)0)) { | ||||
2014 | if (F_ISSET(flags, F_BIGDATA)(((flags) & (0x01)) == (0x01))) | ||||
2015 | bcopy(data->data, node->data + key->size, | ||||
2016 | sizeof(pgno_t)); | ||||
2017 | else | ||||
2018 | bcopy(data->data, node->data + key->size, | ||||
2019 | data->size); | ||||
2020 | } else { | ||||
2021 | bcopy(&ofp->pgno, node->data + key->size, | ||||
2022 | sizeof(pgno_t)); | ||||
2023 | if (btree_write_overflow_data(bt, ofp->page, | ||||
2024 | data) == BT_FAIL-1) | ||||
2025 | return BT_FAIL-1; | ||||
2026 | } | ||||
2027 | } | ||||
2028 | |||||
2029 | return BT_SUCCESS0; | ||||
2030 | } | ||||
2031 | |||||
2032 | static void | ||||
2033 | btree_del_node(struct btree *bt, struct mpage *mp, indx_t indx) | ||||
2034 | { | ||||
2035 | unsigned int sz; | ||||
2036 | indx_t i, j, numkeys, ptr; | ||||
2037 | struct node *node; | ||||
2038 | char *base; | ||||
2039 | |||||
2040 | DPRINTF("delete node %u on %s page %u", indx, | ||||
2041 | IS_LEAF(mp) ? "leaf" : "branch", mp->pgno); | ||||
2042 | assert(indx < NUMKEYS(mp))((indx < (((mp)->page->b.fb.fb_lower - __builtin_offsetof (struct page, ptrs)) >> 1)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 2042, __func__, "indx < NUMKEYS(mp)")); | ||||
2043 | |||||
2044 | node = NODEPTR(mp, indx)((struct node *)((char *)((mp)->page) + ((mp)->page)-> ptrs[indx])); | ||||
2045 | sz = NODESIZE__builtin_offsetof(struct node, data) + node->ksize; | ||||
2046 | if (IS_LEAF(mp)((((mp)->page->flags) & (0x02)) == (0x02))) { | ||||
2047 | if (F_ISSET(node->flags, F_BIGDATA)(((node->flags) & (0x01)) == (0x01))) | ||||
2048 | sz += sizeof(pgno_t); | ||||
2049 | else | ||||
2050 | sz += NODEDSZ(node)((node)->p.np_dsize); | ||||
2051 | } | ||||
2052 | |||||
2053 | ptr = mp->page->ptrs[indx]; | ||||
2054 | numkeys = NUMKEYS(mp)(((mp)->page->b.fb.fb_lower - __builtin_offsetof(struct page, ptrs)) >> 1); | ||||
2055 | for (i = j = 0; i < numkeys; i++) { | ||||
2056 | if (i != indx) { | ||||
2057 | mp->page->ptrs[j] = mp->page->ptrs[i]; | ||||
2058 | if (mp->page->ptrs[i] < ptr) | ||||
2059 | mp->page->ptrs[j] += sz; | ||||
2060 | j++; | ||||
2061 | } | ||||
2062 | } | ||||
2063 | |||||
2064 | base = (char *)mp->page + mp->page->upperb.fb.fb_upper; | ||||
2065 | bcopy(base, base + sz, ptr - mp->page->upperb.fb.fb_upper); | ||||
2066 | |||||
2067 | mp->page->lowerb.fb.fb_lower -= sizeof(indx_t); | ||||
2068 | mp->page->upperb.fb.fb_upper += sz; | ||||
2069 | } | ||||
2070 | |||||
2071 | struct cursor * | ||||
2072 | btree_txn_cursor_open(struct btree *bt, struct btree_txn *txn) | ||||
2073 | { | ||||
2074 | struct cursor *cursor; | ||||
2075 | |||||
2076 | if (bt != NULL((void*)0) && txn != NULL((void*)0) && bt != txn->bt) { | ||||
2077 | errno(*__errno()) = EINVAL22; | ||||
2078 | return NULL((void*)0); | ||||
2079 | } | ||||
2080 | |||||
2081 | if (bt == NULL((void*)0)) { | ||||
2082 | if (txn == NULL((void*)0)) { | ||||
2083 | errno(*__errno()) = EINVAL22; | ||||
2084 | return NULL((void*)0); | ||||
2085 | } | ||||
2086 | bt = txn->bt; | ||||
2087 | } | ||||
2088 | |||||
2089 | if ((cursor = calloc(1, sizeof(*cursor))) != NULL((void*)0)) { | ||||
2090 | SLIST_INIT(&cursor->stack){ ((&cursor->stack)->slh_first) = ((void*)0); }; | ||||
2091 | cursor->bt = bt; | ||||
2092 | cursor->txn = txn; | ||||
2093 | btree_ref(bt); | ||||
2094 | } | ||||
2095 | |||||
2096 | return cursor; | ||||
2097 | } | ||||
2098 | |||||
2099 | void | ||||
2100 | btree_cursor_close(struct cursor *cursor) | ||||
2101 | { | ||||
2102 | if (cursor != NULL((void*)0)) { | ||||
2103 | while (!CURSOR_EMPTY(cursor)(((&(cursor)->stack)->slh_first) == ((void*)0))) | ||||
2104 | cursor_pop_page(cursor); | ||||
2105 | |||||
2106 | btree_close(cursor->bt); | ||||
2107 | free(cursor); | ||||
2108 | } | ||||
2109 | } | ||||
2110 | |||||
2111 | static int | ||||
2112 | btree_update_key(struct btree *bt, struct mpage *mp, indx_t indx, | ||||
2113 | struct btval *key) | ||||
2114 | { | ||||
2115 | indx_t ptr, i, numkeys; | ||||
2116 | int delta; | ||||
2117 | size_t len; | ||||
2118 | struct node *node; | ||||
2119 | char *base; | ||||
2120 | |||||
2121 | node = NODEPTR(mp, indx)((struct node *)((char *)((mp)->page) + ((mp)->page)-> ptrs[indx])); | ||||
2122 | ptr = mp->page->ptrs[indx]; | ||||
2123 | DPRINTF("update key %u (ofs %u) [%.*s] to [%.*s] on page %u", | ||||
2124 | indx, ptr, | ||||
2125 | (int)node->ksize, (char *)NODEKEY(node), | ||||
2126 | (int)key->size, (char *)key->data, | ||||
2127 | mp->pgno); | ||||
2128 | |||||
2129 | if (key->size != node->ksize) { | ||||
2130 | delta = key->size - node->ksize; | ||||
2131 | if (delta > 0 && SIZELEFT(mp)(indx_t)((mp)->page->b.fb.fb_upper - (mp)->page-> b.fb.fb_lower) < delta) { | ||||
2132 | DPRINTF("OUCH! Not enough room, delta = %d", delta); | ||||
2133 | return BT_FAIL-1; | ||||
2134 | } | ||||
2135 | |||||
2136 | numkeys = NUMKEYS(mp)(((mp)->page->b.fb.fb_lower - __builtin_offsetof(struct page, ptrs)) >> 1); | ||||
2137 | for (i = 0; i < numkeys; i++) { | ||||
2138 | if (mp->page->ptrs[i] <= ptr) | ||||
2139 | mp->page->ptrs[i] -= delta; | ||||
2140 | } | ||||
2141 | |||||
2142 | base = (char *)mp->page + mp->page->upperb.fb.fb_upper; | ||||
2143 | len = ptr - mp->page->upperb.fb.fb_upper + NODESIZE__builtin_offsetof(struct node, data); | ||||
2144 | bcopy(base, base - delta, len); | ||||
2145 | mp->page->upperb.fb.fb_upper -= delta; | ||||
2146 | |||||
2147 | node = NODEPTR(mp, indx)((struct node *)((char *)((mp)->page) + ((mp)->page)-> ptrs[indx])); | ||||
2148 | node->ksize = key->size; | ||||
2149 | } | ||||
2150 | |||||
2151 | bcopy(key->data, NODEKEY(node)(void *)((node)->data), key->size); | ||||
2152 | |||||
2153 | return BT_SUCCESS0; | ||||
2154 | } | ||||
2155 | |||||
2156 | static int | ||||
2157 | btree_adjust_prefix(struct btree *bt, struct mpage *src, int delta) | ||||
2158 | { | ||||
2159 | indx_t i; | ||||
2160 | struct node *node; | ||||
2161 | struct btkey tmpkey; | ||||
2162 | struct btval key; | ||||
2163 | |||||
2164 | DPRINTF("adjusting prefix lengths on page %u with delta %d", | ||||
2165 | src->pgno, delta); | ||||
2166 | assert(delta != 0)((delta != 0) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 2166, __func__, "delta != 0")); | ||||
2167 | |||||
2168 | for (i = 0; i < NUMKEYS(src)(((src)->page->b.fb.fb_lower - __builtin_offsetof(struct page, ptrs)) >> 1); i++) { | ||||
2169 | node = NODEPTR(src, i)((struct node *)((char *)((src)->page) + ((src)->page)-> ptrs[i])); | ||||
2170 | tmpkey.len = node->ksize - delta; | ||||
2171 | if (delta > 0) { | ||||
2172 | if (F_ISSET(bt->flags, BT_REVERSEKEY)(((bt->flags) & (0x08)) == (0x08))) | ||||
2173 | bcopy(NODEKEY(node)(void *)((node)->data), tmpkey.str, tmpkey.len); | ||||
2174 | else | ||||
2175 | bcopy((char *)NODEKEY(node)(void *)((node)->data) + delta, tmpkey.str, | ||||
2176 | tmpkey.len); | ||||
2177 | } else { | ||||
2178 | if (F_ISSET(bt->flags, BT_REVERSEKEY)(((bt->flags) & (0x08)) == (0x08))) { | ||||
2179 | bcopy(NODEKEY(node)(void *)((node)->data), tmpkey.str, node->ksize); | ||||
2180 | bcopy(src->prefix.str, tmpkey.str + node->ksize, | ||||
2181 | -delta); | ||||
2182 | } else { | ||||
2183 | bcopy(src->prefix.str + src->prefix.len + delta, | ||||
2184 | tmpkey.str, -delta); | ||||
2185 | bcopy(NODEKEY(node)(void *)((node)->data), tmpkey.str - delta, | ||||
2186 | node->ksize); | ||||
2187 | } | ||||
2188 | } | ||||
2189 | key.size = tmpkey.len; | ||||
2190 | key.data = tmpkey.str; | ||||
2191 | if (btree_update_key(bt, src, i, &key) != BT_SUCCESS0) | ||||
2192 | return BT_FAIL-1; | ||||
2193 | } | ||||
2194 | |||||
2195 | return BT_SUCCESS0; | ||||
2196 | } | ||||
2197 | |||||
2198 | /* Move a node from src to dst. | ||||
2199 | */ | ||||
2200 | static int | ||||
2201 | btree_move_node(struct btree *bt, struct mpage *src, indx_t srcindx, | ||||
2202 | struct mpage *dst, indx_t dstindx) | ||||
2203 | { | ||||
2204 | int rc; | ||||
2205 | unsigned int pfxlen, mp_pfxlen = 0; | ||||
2206 | struct node *srcnode; | ||||
2207 | struct mpage *mp = NULL((void*)0); | ||||
2208 | struct btkey tmpkey, srckey; | ||||
2209 | struct btval key, data; | ||||
2210 | |||||
2211 | assert(src->parent)((src->parent) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 2211, __func__, "src->parent")); | ||||
2212 | assert(dst->parent)((dst->parent) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 2212, __func__, "dst->parent")); | ||||
2213 | |||||
2214 | srcnode = NODEPTR(src, srcindx)((struct node *)((char *)((src)->page) + ((src)->page)-> ptrs[srcindx])); | ||||
2215 | DPRINTF("moving %s node %u [%.*s] on page %u to node %u on page %u", | ||||
2216 | IS_LEAF(src) ? "leaf" : "branch", | ||||
2217 | srcindx, | ||||
2218 | (int)srcnode->ksize, (char *)NODEKEY(srcnode), | ||||
2219 | src->pgno, | ||||
2220 | dstindx, dst->pgno); | ||||
2221 | |||||
2222 | find_common_prefix(bt, src); | ||||
2223 | |||||
2224 | if (IS_BRANCH(src)((((src)->page->flags) & (0x01)) == (0x01))) { | ||||
2225 | /* Need to check if the page the moved node points to | ||||
2226 | * changes prefix. | ||||
2227 | */ | ||||
2228 | if ((mp = btree_get_mpage(bt, NODEPGNO(srcnode)((srcnode)->p.np_pgno))) == NULL((void*)0)) | ||||
2229 | return BT_FAIL-1; | ||||
2230 | mp->parent = src; | ||||
2231 | mp->parent_index = srcindx; | ||||
2232 | find_common_prefix(bt, mp); | ||||
2233 | mp_pfxlen = mp->prefix.len; | ||||
2234 | } | ||||
2235 | |||||
2236 | /* Mark src and dst as dirty. */ | ||||
2237 | if ((src = mpage_touch(bt, src)) == NULL((void*)0) || | ||||
2238 | (dst = mpage_touch(bt, dst)) == NULL((void*)0)) | ||||
2239 | return BT_FAIL-1; | ||||
2240 | |||||
2241 | find_common_prefix(bt, dst); | ||||
2242 | |||||
2243 | /* Check if src node has destination page prefix. Otherwise the | ||||
2244 | * destination page must expand its prefix on all its nodes. | ||||
2245 | */ | ||||
2246 | srckey.len = srcnode->ksize; | ||||
2247 | bcopy(NODEKEY(srcnode)(void *)((srcnode)->data), srckey.str, srckey.len); | ||||
2248 | common_prefix(bt, &srckey, &dst->prefix, &tmpkey); | ||||
2249 | if (tmpkey.len != dst->prefix.len) { | ||||
2250 | if (btree_adjust_prefix(bt, dst, | ||||
2251 | tmpkey.len - dst->prefix.len) != BT_SUCCESS0) | ||||
2252 | return BT_FAIL-1; | ||||
2253 | bcopy(&tmpkey, &dst->prefix, sizeof(tmpkey)); | ||||
2254 | } | ||||
2255 | |||||
2256 | if (srcindx == 0 && IS_BRANCH(src)((((src)->page->flags) & (0x01)) == (0x01))) { | ||||
2257 | struct mpage *low; | ||||
2258 | |||||
2259 | /* must find the lowest key below src | ||||
2260 | */ | ||||
2261 | assert(btree_search_page_root(bt, src, NULL, NULL, 0,((btree_search_page_root(bt, src, ((void*)0), ((void*)0), 0, & low) == 0) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 2262, __func__, "btree_search_page_root(bt, src, NULL, NULL, 0, &low) == BT_SUCCESS" )) | ||||
2262 | &low) == BT_SUCCESS)((btree_search_page_root(bt, src, ((void*)0), ((void*)0), 0, & low) == 0) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 2262, __func__, "btree_search_page_root(bt, src, NULL, NULL, 0, &low) == BT_SUCCESS" )); | ||||
2263 | expand_prefix(bt, low, 0, &srckey); | ||||
2264 | DPRINTF("found lowest key [%.*s] on leaf page %u", | ||||
2265 | (int)srckey.len, srckey.str, low->pgno); | ||||
2266 | } else { | ||||
2267 | srckey.len = srcnode->ksize; | ||||
2268 | bcopy(NODEKEY(srcnode)(void *)((srcnode)->data), srckey.str, srcnode->ksize); | ||||
2269 | } | ||||
2270 | find_common_prefix(bt, src); | ||||
2271 | |||||
2272 | /* expand the prefix */ | ||||
2273 | tmpkey.len = sizeof(tmpkey.str); | ||||
2274 | concat_prefix(bt, src->prefix.str, src->prefix.len, | ||||
2275 | srckey.str, srckey.len, tmpkey.str, &tmpkey.len); | ||||
2276 | |||||
2277 | /* Add the node to the destination page. Adjust prefix for | ||||
2278 | * destination page. | ||||
2279 | */ | ||||
2280 | key.size = tmpkey.len; | ||||
2281 | key.data = tmpkey.str; | ||||
2282 | remove_prefix(bt, &key, dst->prefix.len); | ||||
2283 | data.size = NODEDSZ(srcnode)((srcnode)->p.np_dsize); | ||||
2284 | data.data = NODEDATA(srcnode)(void *)((char *)(srcnode)->data + (srcnode)->ksize); | ||||
2285 | rc = btree_add_node(bt, dst, dstindx, &key, &data, NODEPGNO(srcnode)((srcnode)->p.np_pgno), | ||||
2286 | srcnode->flags); | ||||
2287 | if (rc != BT_SUCCESS0) | ||||
2288 | return rc; | ||||
2289 | |||||
2290 | /* Delete the node from the source page. | ||||
2291 | */ | ||||
2292 | btree_del_node(bt, src, srcindx); | ||||
2293 | |||||
2294 | /* Update the parent separators. | ||||
2295 | */ | ||||
2296 | if (srcindx == 0 && src->parent_index != 0) { | ||||
2297 | expand_prefix(bt, src, 0, &tmpkey); | ||||
2298 | key.size = tmpkey.len; | ||||
2299 | key.data = tmpkey.str; | ||||
2300 | remove_prefix(bt, &key, src->parent->prefix.len); | ||||
2301 | |||||
2302 | DPRINTF("update separator for source page %u to [%.*s]", | ||||
2303 | src->pgno, (int)key.size, (char *)key.data); | ||||
2304 | if (btree_update_key(bt, src->parent, src->parent_index, | ||||
2305 | &key) != BT_SUCCESS0) | ||||
2306 | return BT_FAIL-1; | ||||
2307 | } | ||||
2308 | |||||
2309 | if (srcindx == 0 && IS_BRANCH(src)((((src)->page->flags) & (0x01)) == (0x01))) { | ||||
2310 | struct btval nullkey; | ||||
2311 | nullkey.size = 0; | ||||
2312 | assert(btree_update_key(bt, src, 0, &nullkey) == BT_SUCCESS)((btree_update_key(bt, src, 0, &nullkey) == 0) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c", 2312, __func__, "btree_update_key(bt, src, 0, &nullkey) == BT_SUCCESS")); | ||||
2313 | } | ||||
2314 | |||||
2315 | if (dstindx == 0 && dst->parent_index != 0) { | ||||
2316 | expand_prefix(bt, dst, 0, &tmpkey); | ||||
2317 | key.size = tmpkey.len; | ||||
2318 | key.data = tmpkey.str; | ||||
2319 | remove_prefix(bt, &key, dst->parent->prefix.len); | ||||
2320 | |||||
2321 | DPRINTF("update separator for destination page %u to [%.*s]", | ||||
2322 | dst->pgno, (int)key.size, (char *)key.data); | ||||
2323 | if (btree_update_key(bt, dst->parent, dst->parent_index, | ||||
2324 | &key) != BT_SUCCESS0) | ||||
2325 | return BT_FAIL-1; | ||||
2326 | } | ||||
2327 | |||||
2328 | if (dstindx == 0 && IS_BRANCH(dst)((((dst)->page->flags) & (0x01)) == (0x01))) { | ||||
2329 | struct btval nullkey; | ||||
2330 | nullkey.size = 0; | ||||
2331 | assert(btree_update_key(bt, dst, 0, &nullkey) == BT_SUCCESS)((btree_update_key(bt, dst, 0, &nullkey) == 0) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c", 2331, __func__, "btree_update_key(bt, dst, 0, &nullkey) == BT_SUCCESS")); | ||||
2332 | } | ||||
2333 | |||||
2334 | /* We can get a new page prefix here! | ||||
2335 | * Must update keys in all nodes of this page! | ||||
2336 | */ | ||||
2337 | pfxlen = src->prefix.len; | ||||
2338 | find_common_prefix(bt, src); | ||||
2339 | if (src->prefix.len != pfxlen) { | ||||
2340 | if (btree_adjust_prefix(bt, src, | ||||
2341 | src->prefix.len - pfxlen) != BT_SUCCESS0) | ||||
2342 | return BT_FAIL-1; | ||||
2343 | } | ||||
2344 | |||||
2345 | pfxlen = dst->prefix.len; | ||||
2346 | find_common_prefix(bt, dst); | ||||
2347 | if (dst->prefix.len != pfxlen) { | ||||
2348 | if (btree_adjust_prefix(bt, dst, | ||||
2349 | dst->prefix.len - pfxlen) != BT_SUCCESS0) | ||||
2350 | return BT_FAIL-1; | ||||
2351 | } | ||||
2352 | |||||
2353 | if (IS_BRANCH(dst)((((dst)->page->flags) & (0x01)) == (0x01))) { | ||||
2354 | assert(mp)((mp) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 2354, __func__, "mp")); | ||||
2355 | mp->parent = dst; | ||||
2356 | mp->parent_index = dstindx; | ||||
2357 | find_common_prefix(bt, mp); | ||||
2358 | if (mp->prefix.len != mp_pfxlen) { | ||||
2359 | DPRINTF("moved branch node has changed prefix"); | ||||
2360 | if ((mp = mpage_touch(bt, mp)) == NULL((void*)0)) | ||||
2361 | return BT_FAIL-1; | ||||
2362 | if (btree_adjust_prefix(bt, mp, | ||||
2363 | mp->prefix.len - mp_pfxlen) != BT_SUCCESS0) | ||||
2364 | return BT_FAIL-1; | ||||
2365 | } | ||||
2366 | } | ||||
2367 | |||||
2368 | return BT_SUCCESS0; | ||||
2369 | } | ||||
2370 | |||||
2371 | static int | ||||
2372 | btree_merge(struct btree *bt, struct mpage *src, struct mpage *dst) | ||||
2373 | { | ||||
2374 | int rc; | ||||
2375 | indx_t i; | ||||
2376 | unsigned int pfxlen; | ||||
2377 | struct node *srcnode; | ||||
2378 | struct btkey tmpkey, dstpfx; | ||||
2379 | struct btval key, data; | ||||
2380 | |||||
2381 | DPRINTF("merging page %u and %u", src->pgno, dst->pgno); | ||||
2382 | |||||
2383 | assert(src->parent)((src->parent) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 2383, __func__, "src->parent")); /* can't merge root page */ | ||||
2384 | assert(dst->parent)((dst->parent) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 2384, __func__, "dst->parent")); | ||||
2385 | assert(bt->txn != NULL)((bt->txn != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 2385, __func__, "bt->txn != NULL")); | ||||
2386 | |||||
2387 | /* Mark src and dst as dirty. */ | ||||
2388 | if ((src = mpage_touch(bt, src)) == NULL((void*)0) || | ||||
2389 | (dst = mpage_touch(bt, dst)) == NULL((void*)0)) | ||||
2390 | return BT_FAIL-1; | ||||
2391 | |||||
2392 | find_common_prefix(bt, src); | ||||
2393 | find_common_prefix(bt, dst); | ||||
2394 | |||||
2395 | /* Check if source nodes has destination page prefix. Otherwise | ||||
2396 | * the destination page must expand its prefix on all its nodes. | ||||
2397 | */ | ||||
2398 | common_prefix(bt, &src->prefix, &dst->prefix, &dstpfx); | ||||
2399 | if (dstpfx.len != dst->prefix.len) { | ||||
2400 | if (btree_adjust_prefix(bt, dst, | ||||
2401 | dstpfx.len - dst->prefix.len) != BT_SUCCESS0) | ||||
2402 | return BT_FAIL-1; | ||||
2403 | bcopy(&dstpfx, &dst->prefix, sizeof(dstpfx)); | ||||
2404 | } | ||||
2405 | |||||
2406 | /* Move all nodes from src to dst. | ||||
2407 | */ | ||||
2408 | for (i = 0; i < NUMKEYS(src)(((src)->page->b.fb.fb_lower - __builtin_offsetof(struct page, ptrs)) >> 1); i++) { | ||||
2409 | srcnode = NODEPTR(src, i)((struct node *)((char *)((src)->page) + ((src)->page)-> ptrs[i])); | ||||
2410 | |||||
2411 | /* If branch node 0 (implicit key), find the real key. | ||||
2412 | */ | ||||
2413 | if (i == 0 && IS_BRANCH(src)((((src)->page->flags) & (0x01)) == (0x01))) { | ||||
2414 | struct mpage *low; | ||||
2415 | |||||
2416 | /* must find the lowest key below src | ||||
2417 | */ | ||||
2418 | assert(btree_search_page_root(bt, src, NULL, NULL, 0,((btree_search_page_root(bt, src, ((void*)0), ((void*)0), 0, & low) == 0) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 2419, __func__, "btree_search_page_root(bt, src, NULL, NULL, 0, &low) == BT_SUCCESS" )) | ||||
2419 | &low) == BT_SUCCESS)((btree_search_page_root(bt, src, ((void*)0), ((void*)0), 0, & low) == 0) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 2419, __func__, "btree_search_page_root(bt, src, NULL, NULL, 0, &low) == BT_SUCCESS" )); | ||||
2420 | expand_prefix(bt, low, 0, &tmpkey); | ||||
2421 | DPRINTF("found lowest key [%.*s] on leaf page %u", | ||||
2422 | (int)tmpkey.len, tmpkey.str, low->pgno); | ||||
2423 | } else { | ||||
2424 | expand_prefix(bt, src, i, &tmpkey); | ||||
2425 | } | ||||
2426 | |||||
2427 | key.size = tmpkey.len; | ||||
2428 | key.data = tmpkey.str; | ||||
2429 | |||||
2430 | remove_prefix(bt, &key, dst->prefix.len); | ||||
2431 | data.size = NODEDSZ(srcnode)((srcnode)->p.np_dsize); | ||||
2432 | data.data = NODEDATA(srcnode)(void *)((char *)(srcnode)->data + (srcnode)->ksize); | ||||
2433 | rc = btree_add_node(bt, dst, NUMKEYS(dst)(((dst)->page->b.fb.fb_lower - __builtin_offsetof(struct page, ptrs)) >> 1), &key, | ||||
2434 | &data, NODEPGNO(srcnode)((srcnode)->p.np_pgno), srcnode->flags); | ||||
2435 | if (rc != BT_SUCCESS0) | ||||
2436 | return rc; | ||||
2437 | } | ||||
2438 | |||||
2439 | DPRINTF("dst page %u now has %lu keys (%.1f%% filled)", | ||||
2440 | dst->pgno, NUMKEYS(dst), (float)PAGEFILL(bt, dst) / 10); | ||||
2441 | |||||
2442 | /* Unlink the src page from parent. | ||||
2443 | */ | ||||
2444 | btree_del_node(bt, src->parent, src->parent_index); | ||||
2445 | if (src->parent_index == 0) { | ||||
2446 | key.size = 0; | ||||
2447 | if (btree_update_key(bt, src->parent, 0, &key) != BT_SUCCESS0) | ||||
2448 | return BT_FAIL-1; | ||||
2449 | |||||
2450 | pfxlen = src->prefix.len; | ||||
2451 | find_common_prefix(bt, src); | ||||
2452 | assert (src->prefix.len == pfxlen)((src->prefix.len == pfxlen) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 2452, __func__, "src->prefix.len == pfxlen")); | ||||
2453 | } | ||||
2454 | |||||
2455 | if (IS_LEAF(src)((((src)->page->flags) & (0x02)) == (0x02))) | ||||
2456 | bt->meta.leaf_pages--; | ||||
2457 | else | ||||
2458 | bt->meta.branch_pages--; | ||||
2459 | |||||
2460 | return btree_rebalance(bt, src->parent); | ||||
2461 | } | ||||
2462 | |||||
2463 | #define FILL_THRESHOLD250 250 | ||||
2464 | |||||
2465 | static int | ||||
2466 | btree_rebalance(struct btree *bt, struct mpage *mp) | ||||
2467 | { | ||||
2468 | struct node *node; | ||||
2469 | struct mpage *parent; | ||||
2470 | struct mpage *root; | ||||
2471 | struct mpage *neighbor = NULL((void*)0); | ||||
2472 | indx_t si = 0, di = 0; | ||||
2473 | |||||
2474 | assert(bt != NULL)((bt != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 2474, __func__, "bt != NULL")); | ||||
2475 | assert(bt->txn != NULL)((bt->txn != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 2475, __func__, "bt->txn != NULL")); | ||||
2476 | assert(mp != NULL)((mp != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 2476, __func__, "mp != NULL")); | ||||
2477 | |||||
2478 | DPRINTF("rebalancing %s page %u (has %lu keys, %.1f%% full)", | ||||
2479 | IS_LEAF(mp) ? "leaf" : "branch", | ||||
2480 | mp->pgno, NUMKEYS(mp), (float)PAGEFILL(bt, mp) / 10); | ||||
2481 | |||||
2482 | if (PAGEFILL(bt, mp)(1000 * ((bt)->head.psize - __builtin_offsetof(struct page , ptrs) - (indx_t)((mp)->page->b.fb.fb_upper - (mp)-> page->b.fb.fb_lower)) / ((bt)->head.psize - __builtin_offsetof (struct page, ptrs))) >= FILL_THRESHOLD250) { | ||||
2483 | DPRINTF("no need to rebalance page %u, above fill threshold", | ||||
2484 | mp->pgno); | ||||
2485 | return BT_SUCCESS0; | ||||
2486 | } | ||||
2487 | |||||
2488 | parent = mp->parent; | ||||
2489 | |||||
2490 | if (parent == NULL((void*)0)) { | ||||
2491 | if (NUMKEYS(mp)(((mp)->page->b.fb.fb_lower - __builtin_offsetof(struct page, ptrs)) >> 1) == 0) { | ||||
2492 | DPRINTF("tree is completely empty"); | ||||
2493 | bt->txn->root = P_INVALID0xFFFFFFFF; | ||||
2494 | bt->meta.depth--; | ||||
2495 | bt->meta.leaf_pages--; | ||||
2496 | } else if (IS_BRANCH(mp)((((mp)->page->flags) & (0x01)) == (0x01)) && NUMKEYS(mp)(((mp)->page->b.fb.fb_lower - __builtin_offsetof(struct page, ptrs)) >> 1) == 1) { | ||||
2497 | DPRINTF("collapsing root page!"); | ||||
2498 | bt->txn->root = NODEPGNO(NODEPTR(mp, 0))((((struct node *)((char *)((mp)->page) + ((mp)->page)-> ptrs[0])))->p.np_pgno); | ||||
2499 | if ((root = btree_get_mpage(bt, bt->txn->root)) == NULL((void*)0)) | ||||
2500 | return BT_FAIL-1; | ||||
2501 | root->parent = NULL((void*)0); | ||||
2502 | bt->meta.depth--; | ||||
2503 | bt->meta.branch_pages--; | ||||
2504 | } else | ||||
2505 | DPRINTF("root page doesn't need rebalancing"); | ||||
2506 | return BT_SUCCESS0; | ||||
2507 | } | ||||
2508 | |||||
2509 | /* The parent (branch page) must have at least 2 pointers, | ||||
2510 | * otherwise the tree is invalid. | ||||
2511 | */ | ||||
2512 | assert(NUMKEYS(parent) > 1)(((((parent)->page->b.fb.fb_lower - __builtin_offsetof( struct page, ptrs)) >> 1) > 1) ? (void)0 : __assert2 ("/usr/src/usr.sbin/ldapd/btree.c", 2512, __func__, "NUMKEYS(parent) > 1" )); | ||||
2513 | |||||
2514 | /* Leaf page fill factor is below the threshold. | ||||
2515 | * Try to move keys from left or right neighbor, or | ||||
2516 | * merge with a neighbor page. | ||||
2517 | */ | ||||
2518 | |||||
2519 | /* Find neighbors. | ||||
2520 | */ | ||||
2521 | if (mp->parent_index == 0) { | ||||
2522 | /* We're the leftmost leaf in our parent. | ||||
2523 | */ | ||||
2524 | DPRINTF("reading right neighbor"); | ||||
2525 | node = NODEPTR(parent, mp->parent_index + 1)((struct node *)((char *)((parent)->page) + ((parent)-> page)->ptrs[mp->parent_index + 1])); | ||||
2526 | if ((neighbor = btree_get_mpage(bt, NODEPGNO(node)((node)->p.np_pgno))) == NULL((void*)0)) | ||||
2527 | return BT_FAIL-1; | ||||
2528 | neighbor->parent_index = mp->parent_index + 1; | ||||
2529 | si = 0; | ||||
2530 | di = NUMKEYS(mp)(((mp)->page->b.fb.fb_lower - __builtin_offsetof(struct page, ptrs)) >> 1); | ||||
2531 | } else { | ||||
2532 | /* There is at least one neighbor to the left. | ||||
2533 | */ | ||||
2534 | DPRINTF("reading left neighbor"); | ||||
2535 | node = NODEPTR(parent, mp->parent_index - 1)((struct node *)((char *)((parent)->page) + ((parent)-> page)->ptrs[mp->parent_index - 1])); | ||||
2536 | if ((neighbor = btree_get_mpage(bt, NODEPGNO(node)((node)->p.np_pgno))) == NULL((void*)0)) | ||||
2537 | return BT_FAIL-1; | ||||
2538 | neighbor->parent_index = mp->parent_index - 1; | ||||
2539 | si = NUMKEYS(neighbor)(((neighbor)->page->b.fb.fb_lower - __builtin_offsetof( struct page, ptrs)) >> 1) - 1; | ||||
2540 | di = 0; | ||||
2541 | } | ||||
2542 | neighbor->parent = parent; | ||||
2543 | |||||
2544 | DPRINTF("found neighbor page %u (%lu keys, %.1f%% full)", | ||||
2545 | neighbor->pgno, NUMKEYS(neighbor), (float)PAGEFILL(bt, neighbor) / 10); | ||||
2546 | |||||
2547 | /* If the neighbor page is above threshold and has at least two | ||||
2548 | * keys, move one key from it. | ||||
2549 | * | ||||
2550 | * Otherwise we should try to merge them, but that might not be | ||||
2551 | * possible, even if both are below threshold, as prefix expansion | ||||
2552 | * might make keys larger. FIXME: detect this | ||||
2553 | */ | ||||
2554 | if (PAGEFILL(bt, neighbor)(1000 * ((bt)->head.psize - __builtin_offsetof(struct page , ptrs) - (indx_t)((neighbor)->page->b.fb.fb_upper - (neighbor )->page->b.fb.fb_lower)) / ((bt)->head.psize - __builtin_offsetof (struct page, ptrs))) >= FILL_THRESHOLD250 && NUMKEYS(neighbor)(((neighbor)->page->b.fb.fb_lower - __builtin_offsetof( struct page, ptrs)) >> 1) >= 2) | ||||
2555 | return btree_move_node(bt, neighbor, si, mp, di); | ||||
2556 | else { /* FIXME: if (has_enough_room()) */ | ||||
2557 | if (mp->parent_index == 0) | ||||
2558 | return btree_merge(bt, neighbor, mp); | ||||
2559 | else | ||||
2560 | return btree_merge(bt, mp, neighbor); | ||||
2561 | } | ||||
2562 | } | ||||
2563 | |||||
2564 | int | ||||
2565 | btree_txn_del(struct btree *bt, struct btree_txn *txn, | ||||
2566 | struct btval *key, struct btval *data) | ||||
2567 | { | ||||
2568 | int rc, exact, close_txn = 0; | ||||
2569 | unsigned int ki; | ||||
2570 | struct node *leaf; | ||||
2571 | struct mpage *mp; | ||||
2572 | |||||
2573 | DPRINTF("========> delete key %.*s", (int)key->size, (char *)key->data); | ||||
2574 | |||||
2575 | assert(key != NULL)((key != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 2575, __func__, "key != NULL")); | ||||
2576 | |||||
2577 | if (bt != NULL((void*)0) && txn != NULL((void*)0) && bt != txn->bt) { | ||||
2578 | errno(*__errno()) = EINVAL22; | ||||
2579 | return BT_FAIL-1; | ||||
2580 | } | ||||
2581 | |||||
2582 | if (txn != NULL((void*)0) && F_ISSET(txn->flags, BT_TXN_RDONLY)(((txn->flags) & (0x01)) == (0x01))) { | ||||
2583 | errno(*__errno()) = EINVAL22; | ||||
2584 | return BT_FAIL-1; | ||||
2585 | } | ||||
2586 | |||||
2587 | if (bt == NULL((void*)0)) { | ||||
2588 | if (txn == NULL((void*)0)) { | ||||
2589 | errno(*__errno()) = EINVAL22; | ||||
2590 | return BT_FAIL-1; | ||||
2591 | } | ||||
2592 | bt = txn->bt; | ||||
2593 | } | ||||
2594 | |||||
2595 | if (key->size == 0 || key->size > MAXKEYSIZE255) { | ||||
2596 | errno(*__errno()) = EINVAL22; | ||||
2597 | return BT_FAIL-1; | ||||
2598 | } | ||||
2599 | |||||
2600 | if (txn == NULL((void*)0)) { | ||||
2601 | close_txn = 1; | ||||
2602 | if ((txn = btree_txn_begin(bt, 0)) == NULL((void*)0)) | ||||
2603 | return BT_FAIL-1; | ||||
2604 | } | ||||
2605 | |||||
2606 | if ((rc = btree_search_page(bt, txn, key, NULL((void*)0), 1, &mp)) != BT_SUCCESS0) | ||||
2607 | goto done; | ||||
2608 | |||||
2609 | leaf = btree_search_node(bt, mp, key, &exact, &ki); | ||||
2610 | if (leaf == NULL((void*)0) || !exact) { | ||||
2611 | errno(*__errno()) = ENOENT2; | ||||
2612 | rc = BT_FAIL-1; | ||||
2613 | goto done; | ||||
2614 | } | ||||
2615 | |||||
2616 | if (data && (rc = btree_read_data(bt, NULL((void*)0), leaf, data)) != BT_SUCCESS0) | ||||
2617 | goto done; | ||||
2618 | |||||
2619 | btree_del_node(bt, mp, ki); | ||||
2620 | bt->meta.entries--; | ||||
2621 | rc = btree_rebalance(bt, mp); | ||||
2622 | if (rc != BT_SUCCESS0) | ||||
2623 | txn->flags |= BT_TXN_ERROR0x02; | ||||
2624 | |||||
2625 | done: | ||||
2626 | if (close_txn) { | ||||
2627 | if (rc == BT_SUCCESS0) | ||||
2628 | rc = btree_txn_commit(txn); | ||||
2629 | else | ||||
2630 | btree_txn_abort(txn); | ||||
2631 | } | ||||
2632 | mpage_prune(bt); | ||||
2633 | return rc; | ||||
2634 | } | ||||
2635 | |||||
2636 | /* Reduce the length of the prefix separator <sep> to the minimum length that | ||||
2637 | * still makes it uniquely distinguishable from <min>. | ||||
2638 | * | ||||
2639 | * <min> is guaranteed to be sorted less than <sep> | ||||
2640 | * | ||||
2641 | * On return, <sep> is modified to the minimum length. | ||||
2642 | */ | ||||
2643 | static void | ||||
2644 | bt_reduce_separator(struct btree *bt, struct node *min, struct btval *sep) | ||||
2645 | { | ||||
2646 | size_t n = 0; | ||||
2647 | char *p1; | ||||
2648 | char *p2; | ||||
2649 | |||||
2650 | if (F_ISSET(bt->flags, BT_REVERSEKEY)(((bt->flags) & (0x08)) == (0x08))) { | ||||
2651 | |||||
2652 | assert(sep->size > 0)((sep->size > 0) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 2652, __func__, "sep->size > 0")); | ||||
2653 | |||||
2654 | p1 = (char *)NODEKEY(min)(void *)((min)->data) + min->ksize - 1; | ||||
2655 | p2 = (char *)sep->data + sep->size - 1; | ||||
2656 | |||||
2657 | while (p1 >= (char *)NODEKEY(min)(void *)((min)->data) && *p1 == *p2) { | ||||
2658 | assert(p2 > (char *)sep->data)((p2 > (char *)sep->data) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 2658, __func__, "p2 > (char *)sep->data")); | ||||
2659 | p1--; | ||||
2660 | p2--; | ||||
2661 | n++; | ||||
2662 | } | ||||
2663 | |||||
2664 | sep->data = p2; | ||||
2665 | sep->size = n + 1; | ||||
2666 | } else { | ||||
2667 | |||||
2668 | assert(min->ksize > 0)((min->ksize > 0) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 2668, __func__, "min->ksize > 0")); | ||||
2669 | assert(sep->size > 0)((sep->size > 0) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 2669, __func__, "sep->size > 0")); | ||||
2670 | |||||
2671 | p1 = (char *)NODEKEY(min)(void *)((min)->data); | ||||
2672 | p2 = (char *)sep->data; | ||||
2673 | |||||
2674 | while (*p1 == *p2) { | ||||
2675 | p1++; | ||||
2676 | p2++; | ||||
2677 | n++; | ||||
2678 | if (n == min->ksize || n == sep->size) | ||||
2679 | break; | ||||
2680 | } | ||||
2681 | |||||
2682 | sep->size = n + 1; | ||||
2683 | } | ||||
2684 | |||||
2685 | DPRINTF("reduced separator to [%.*s] > [%.*s]", | ||||
2686 | (int)sep->size, (char *)sep->data, | ||||
2687 | (int)min->ksize, (char *)NODEKEY(min)); | ||||
2688 | } | ||||
2689 | |||||
2690 | /* Split page <*mpp>, and insert <key,(data|newpgno)> in either left or | ||||
2691 | * right sibling, at index <*newindxp> (as if unsplit). Updates *mpp and | ||||
2692 | * *newindxp with the actual values after split, ie if *mpp and *newindxp | ||||
2693 | * refer to a node in the new right sibling page. | ||||
2694 | */ | ||||
2695 | static int | ||||
2696 | btree_split(struct btree *bt, struct mpage **mpp, unsigned int *newindxp, | ||||
2697 | struct btval *newkey, struct btval *newdata, pgno_t newpgno) | ||||
2698 | { | ||||
2699 | uint8_t flags; | ||||
2700 | int rc = BT_SUCCESS0, ins_new = 0; | ||||
2701 | indx_t newindx; | ||||
2702 | pgno_t pgno = 0; | ||||
2703 | size_t orig_pfx_len, left_pfx_diff, right_pfx_diff, pfx_diff; | ||||
2704 | unsigned int i, j, split_indx; | ||||
2705 | struct node *node; | ||||
2706 | struct mpage *pright, *p, *mp; | ||||
2707 | struct btval sepkey, rkey, rdata; | ||||
2708 | struct btkey tmpkey; | ||||
2709 | struct page *copy; | ||||
2710 | |||||
2711 | assert(bt != NULL)((bt != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 2711, __func__, "bt != NULL")); | ||||
2712 | assert(bt->txn != NULL)((bt->txn != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 2712, __func__, "bt->txn != NULL")); | ||||
2713 | |||||
2714 | mp = *mpp; | ||||
2715 | newindx = *newindxp; | ||||
2716 | |||||
2717 | DPRINTF("-----> splitting %s page %u and adding [%.*s] at index %d", | ||||
2718 | IS_LEAF(mp) ? "leaf" : "branch", mp->pgno, | ||||
2719 | (int)newkey->size, (char *)newkey->data, *newindxp); | ||||
2720 | DPRINTF("page %u has prefix [%.*s]", mp->pgno, | ||||
2721 | (int)mp->prefix.len, (char *)mp->prefix.str); | ||||
2722 | orig_pfx_len = mp->prefix.len; | ||||
2723 | |||||
2724 | if (mp->parent == NULL((void*)0)) { | ||||
2725 | if ((mp->parent = btree_new_page(bt, P_BRANCH0x01)) == NULL((void*)0)) | ||||
2726 | return BT_FAIL-1; | ||||
2727 | mp->parent_index = 0; | ||||
2728 | bt->txn->root = mp->parent->pgno; | ||||
2729 | DPRINTF("root split! new root = %u", mp->parent->pgno); | ||||
2730 | bt->meta.depth++; | ||||
2731 | |||||
2732 | /* Add left (implicit) pointer. */ | ||||
2733 | if (btree_add_node(bt, mp->parent, 0, NULL((void*)0), NULL((void*)0), | ||||
2734 | mp->pgno, 0) != BT_SUCCESS0) | ||||
2735 | return BT_FAIL-1; | ||||
2736 | } else { | ||||
2737 | DPRINTF("parent branch page is %u", mp->parent->pgno); | ||||
2738 | } | ||||
2739 | |||||
2740 | /* Create a right sibling. */ | ||||
2741 | if ((pright = btree_new_page(bt, mp->page->flags)) == NULL((void*)0)) | ||||
2742 | return BT_FAIL-1; | ||||
2743 | pright->parent = mp->parent; | ||||
2744 | pright->parent_index = mp->parent_index + 1; | ||||
2745 | DPRINTF("new right sibling: page %u", pright->pgno); | ||||
2746 | |||||
2747 | /* Move half of the keys to the right sibling. */ | ||||
2748 | if ((copy = malloc(bt->head.psize)) == NULL((void*)0)) | ||||
2749 | return BT_FAIL-1; | ||||
2750 | bcopy(mp->page, copy, bt->head.psize); | ||||
2751 | assert(mp->ref == 0)((mp->ref == 0) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 2751, __func__, "mp->ref == 0")); /* XXX */ | ||||
2752 | memset(&mp->page->ptrs, 0, bt->head.psize - PAGEHDRSZ__builtin_offsetof(struct page, ptrs)); | ||||
2753 | mp->page->lowerb.fb.fb_lower = PAGEHDRSZ__builtin_offsetof(struct page, ptrs); | ||||
2754 | mp->page->upperb.fb.fb_upper = bt->head.psize; | ||||
2755 | |||||
2756 | split_indx = NUMKEYSP(copy)(((copy)->b.fb.fb_lower - __builtin_offsetof(struct page, ptrs )) >> 1) / 2 + 1; | ||||
2757 | |||||
2758 | /* First find the separating key between the split pages. | ||||
2759 | */ | ||||
2760 | memset(&sepkey, 0, sizeof(sepkey)); | ||||
2761 | if (newindx == split_indx) { | ||||
2762 | sepkey.size = newkey->size; | ||||
2763 | sepkey.data = newkey->data; | ||||
2764 | remove_prefix(bt, &sepkey, mp->prefix.len); | ||||
2765 | } else { | ||||
2766 | node = NODEPTRP(copy, split_indx)((struct node *)((char *)(copy) + (copy)->ptrs[split_indx] )); | ||||
2767 | sepkey.size = node->ksize; | ||||
2768 | sepkey.data = NODEKEY(node)(void *)((node)->data); | ||||
2769 | } | ||||
2770 | |||||
2771 | if (IS_LEAF(mp)((((mp)->page->flags) & (0x02)) == (0x02)) && bt->cmp == NULL((void*)0)) { | ||||
2772 | /* Find the smallest separator. */ | ||||
2773 | /* Ref: Prefix B-trees, R. Bayer, K. Unterauer, 1977 */ | ||||
2774 | node = NODEPTRP(copy, split_indx - 1)((struct node *)((char *)(copy) + (copy)->ptrs[split_indx - 1])); | ||||
2775 | bt_reduce_separator(bt, node, &sepkey); | ||||
2776 | } | ||||
2777 | |||||
2778 | /* Fix separator wrt parent prefix. */ | ||||
2779 | if (bt->cmp
| ||||
2780 | tmpkey.len = sizeof(tmpkey.str); | ||||
2781 | concat_prefix(bt, mp->prefix.str, mp->prefix.len, | ||||
2782 | sepkey.data, sepkey.size, tmpkey.str, &tmpkey.len); | ||||
2783 | sepkey.data = tmpkey.str; | ||||
2784 | sepkey.size = tmpkey.len; | ||||
2785 | } | ||||
2786 | |||||
2787 | DPRINTF("separator is [%.*s]", (int)sepkey.size, (char *)sepkey.data); | ||||
2788 | |||||
2789 | /* Copy separator key to the parent. | ||||
2790 | */ | ||||
2791 | if (SIZELEFT(pright->parent)(indx_t)((pright->parent)->page->b.fb.fb_upper - (pright ->parent)->page->b.fb.fb_lower) < bt_branch_size(bt, &sepkey)) { | ||||
2792 | rc = btree_split(bt, &pright->parent, &pright->parent_index, | ||||
2793 | &sepkey, NULL((void*)0), pright->pgno); | ||||
2794 | |||||
2795 | /* Right page might now have changed parent. | ||||
2796 | * Check if left page also changed parent. | ||||
2797 | */ | ||||
2798 | if (pright->parent != mp->parent && | ||||
2799 | mp->parent_index >= NUMKEYS(mp->parent)(((mp->parent)->page->b.fb.fb_lower - __builtin_offsetof (struct page, ptrs)) >> 1)) { | ||||
2800 | mp->parent = pright->parent; | ||||
2801 | mp->parent_index = pright->parent_index - 1; | ||||
2802 | } | ||||
2803 | } else { | ||||
2804 | remove_prefix(bt, &sepkey, pright->parent->prefix.len); | ||||
2805 | rc = btree_add_node(bt, pright->parent, pright->parent_index, | ||||
2806 | &sepkey, NULL((void*)0), pright->pgno, 0); | ||||
2807 | } | ||||
2808 | if (rc
| ||||
2809 | free(copy); | ||||
2810 | return BT_FAIL-1; | ||||
2811 | } | ||||
2812 | |||||
2813 | /* Update prefix for right and left page, if the parent was split. | ||||
2814 | */ | ||||
2815 | find_common_prefix(bt, pright); | ||||
2816 | assert(orig_pfx_len <= pright->prefix.len)((orig_pfx_len <= pright->prefix.len) ? (void)0 : __assert2 ("/usr/src/usr.sbin/ldapd/btree.c", 2816, __func__, "orig_pfx_len <= pright->prefix.len" )); | ||||
2817 | right_pfx_diff = pright->prefix.len - orig_pfx_len; | ||||
2818 | |||||
2819 | find_common_prefix(bt, mp); | ||||
2820 | assert(orig_pfx_len <= mp->prefix.len)((orig_pfx_len <= mp->prefix.len) ? (void)0 : __assert2 ("/usr/src/usr.sbin/ldapd/btree.c", 2820, __func__, "orig_pfx_len <= mp->prefix.len" )); | ||||
2821 | left_pfx_diff = mp->prefix.len - orig_pfx_len; | ||||
2822 | |||||
2823 | for (i = j = 0; i <= NUMKEYSP(copy)(((copy)->b.fb.fb_lower - __builtin_offsetof(struct page, ptrs )) >> 1); j++) { | ||||
2824 | if (i < split_indx) { | ||||
2825 | /* Re-insert in left sibling. */ | ||||
2826 | p = mp; | ||||
2827 | pfx_diff = left_pfx_diff; | ||||
2828 | } else { | ||||
2829 | /* Insert in right sibling. */ | ||||
2830 | if (i
| ||||
2831 | /* Reset insert index for right sibling. */ | ||||
2832 | j = (i == newindx && ins_new); | ||||
2833 | p = pright; | ||||
2834 | pfx_diff = right_pfx_diff; | ||||
2835 | } | ||||
2836 | |||||
2837 | if (i
| ||||
2838 | /* Insert the original entry that caused the split. */ | ||||
2839 | rkey.data = newkey->data; | ||||
2840 | rkey.size = newkey->size; | ||||
2841 | if (IS_LEAF(mp)((((mp)->page->flags) & (0x02)) == (0x02))) { | ||||
2842 | rdata.data = newdata->data; | ||||
2843 | rdata.size = newdata->size; | ||||
2844 | } else | ||||
2845 | pgno = newpgno; | ||||
2846 | flags = 0; | ||||
2847 | pfx_diff = p->prefix.len; | ||||
2848 | |||||
2849 | ins_new = 1; | ||||
2850 | |||||
2851 | /* Update page and index for the new key. */ | ||||
2852 | *newindxp = j; | ||||
2853 | *mpp = p; | ||||
2854 | } else if (i == NUMKEYSP(copy)(((copy)->b.fb.fb_lower - __builtin_offsetof(struct page, ptrs )) >> 1)) { | ||||
2855 | break; | ||||
2856 | } else { | ||||
2857 | node = NODEPTRP(copy, i)((struct node *)((char *)(copy) + (copy)->ptrs[i])); | ||||
2858 | rkey.data = NODEKEY(node)(void *)((node)->data); | ||||
2859 | rkey.size = node->ksize; | ||||
2860 | if (IS_LEAF(mp)((((mp)->page->flags) & (0x02)) == (0x02))) { | ||||
2861 | rdata.data = NODEDATA(node)(void *)((char *)(node)->data + (node)->ksize); | ||||
2862 | rdata.size = node->n_dsizep.np_dsize; | ||||
2863 | } else | ||||
2864 | pgno = node->n_pgnop.np_pgno; | ||||
2865 | flags = node->flags; | ||||
2866 | |||||
2867 | i++; | ||||
2868 | } | ||||
2869 | |||||
2870 | if (!IS_LEAF(mp)((((mp)->page->flags) & (0x02)) == (0x02)) && j
| ||||
2871 | /* First branch index doesn't need key data. */ | ||||
2872 | rkey.size = 0; | ||||
2873 | } else | ||||
2874 | remove_prefix(bt, &rkey, pfx_diff); | ||||
2875 | |||||
2876 | rc = btree_add_node(bt, p, j, &rkey, &rdata, pgno,flags); | ||||
2877 | } | ||||
2878 | |||||
2879 | free(copy); | ||||
2880 | return rc; | ||||
2881 | } | ||||
2882 | |||||
2883 | int | ||||
2884 | btree_txn_put(struct btree *bt, struct btree_txn *txn, | ||||
2885 | struct btval *key, struct btval *data, unsigned int flags) | ||||
2886 | { | ||||
2887 | int rc = BT_SUCCESS0, exact, close_txn = 0; | ||||
2888 | unsigned int ki; | ||||
2889 | struct node *leaf; | ||||
2890 | struct mpage *mp; | ||||
2891 | struct btval xkey; | ||||
2892 | |||||
2893 | assert(key != NULL)((key != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 2893, __func__, "key != NULL")); | ||||
| |||||
2894 | assert(data != NULL)((data != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 2894, __func__, "data != NULL")); | ||||
2895 | |||||
2896 | if (bt != NULL((void*)0) && txn != NULL((void*)0) && bt != txn->bt) { | ||||
2897 | errno(*__errno()) = EINVAL22; | ||||
2898 | return BT_FAIL-1; | ||||
2899 | } | ||||
2900 | |||||
2901 | if (txn != NULL((void*)0) && F_ISSET(txn->flags, BT_TXN_RDONLY)(((txn->flags) & (0x01)) == (0x01))) { | ||||
2902 | errno(*__errno()) = EINVAL22; | ||||
2903 | return BT_FAIL-1; | ||||
2904 | } | ||||
2905 | |||||
2906 | if (bt
| ||||
2907 | if (txn
| ||||
2908 | errno(*__errno()) = EINVAL22; | ||||
2909 | return BT_FAIL-1; | ||||
2910 | } | ||||
2911 | bt = txn->bt; | ||||
2912 | } | ||||
2913 | |||||
2914 | if (key->size == 0 || key->size > MAXKEYSIZE255) { | ||||
2915 | errno(*__errno()) = EINVAL22; | ||||
2916 | return BT_FAIL-1; | ||||
2917 | } | ||||
2918 | |||||
2919 | DPRINTF("==> put key %.*s, size %zu, data size %zu", | ||||
2920 | (int)key->size, (char *)key->data, key->size, data->size); | ||||
2921 | |||||
2922 | if (txn
| ||||
2923 | close_txn = 1; | ||||
2924 | if ((txn = btree_txn_begin(bt, 0)) == NULL((void*)0)) | ||||
2925 | return BT_FAIL-1; | ||||
2926 | } | ||||
2927 | |||||
2928 | rc = btree_search_page(bt, txn, key, NULL((void*)0), 1, &mp); | ||||
2929 | if (rc
| ||||
2930 | leaf = btree_search_node(bt, mp, key, &exact, &ki); | ||||
2931 | if (leaf && exact) { | ||||
2932 | if (F_ISSET(flags, BT_NOOVERWRITE)(((flags) & (1)) == (1))) { | ||||
2933 | DPRINTF("duplicate key %.*s", | ||||
2934 | (int)key->size, (char *)key->data); | ||||
2935 | errno(*__errno()) = EEXIST17; | ||||
2936 | rc = BT_FAIL-1; | ||||
2937 | goto done; | ||||
2938 | } | ||||
2939 | btree_del_node(bt, mp, ki); | ||||
2940 | } | ||||
2941 | if (leaf == NULL((void*)0)) { /* append if not found */ | ||||
2942 | ki = NUMKEYS(mp)(((mp)->page->b.fb.fb_lower - __builtin_offsetof(struct page, ptrs)) >> 1); | ||||
2943 | DPRINTF("appending key at index %d", ki); | ||||
2944 | } | ||||
2945 | } else if (errno(*__errno()) == ENOENT2) { | ||||
2946 | /* new file, just write a root leaf page */ | ||||
2947 | DPRINTF("allocating new root leaf page"); | ||||
2948 | if ((mp = btree_new_page(bt, P_LEAF0x02)) == NULL((void*)0)) { | ||||
2949 | rc = BT_FAIL-1; | ||||
2950 | goto done; | ||||
2951 | } | ||||
2952 | txn->root = mp->pgno; | ||||
2953 | bt->meta.depth++; | ||||
2954 | ki = 0; | ||||
2955 | } | ||||
2956 | else | ||||
2957 | goto done; | ||||
2958 | |||||
2959 | assert(IS_LEAF(mp))((((((mp)->page->flags) & (0x02)) == (0x02))) ? (void )0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c", 2959, __func__ , "IS_LEAF(mp)")); | ||||
2960 | DPRINTF("there are %lu keys, should insert new key at index %u", | ||||
2961 | NUMKEYS(mp), ki); | ||||
2962 | |||||
2963 | /* Copy the key pointer as it is modified by the prefix code. The | ||||
2964 | * caller might have malloc'ed the data. | ||||
2965 | */ | ||||
2966 | xkey.data = key->data; | ||||
2967 | xkey.size = key->size; | ||||
2968 | |||||
2969 | if (SIZELEFT(mp)(indx_t)((mp)->page->b.fb.fb_upper - (mp)->page-> b.fb.fb_lower) < bt_leaf_size(bt, key, data)) { | ||||
2970 | rc = btree_split(bt, &mp, &ki, &xkey, data, P_INVALID0xFFFFFFFF); | ||||
2971 | } else { | ||||
2972 | /* There is room already in this leaf page. */ | ||||
2973 | remove_prefix(bt, &xkey, mp->prefix.len); | ||||
2974 | rc = btree_add_node(bt, mp, ki, &xkey, data, 0, 0); | ||||
2975 | } | ||||
2976 | |||||
2977 | if (rc != BT_SUCCESS0) | ||||
2978 | txn->flags |= BT_TXN_ERROR0x02; | ||||
2979 | else | ||||
2980 | bt->meta.entries++; | ||||
2981 | |||||
2982 | done: | ||||
2983 | if (close_txn) { | ||||
2984 | if (rc == BT_SUCCESS0) | ||||
2985 | rc = btree_txn_commit(txn); | ||||
2986 | else | ||||
2987 | btree_txn_abort(txn); | ||||
2988 | } | ||||
2989 | mpage_prune(bt); | ||||
2990 | return rc; | ||||
2991 | } | ||||
2992 | |||||
2993 | static pgno_t | ||||
2994 | btree_compact_tree(struct btree *bt, pgno_t pgno, struct btree *btc) | ||||
2995 | { | ||||
2996 | ssize_t rc; | ||||
2997 | indx_t i; | ||||
2998 | pgno_t *pnext, next; | ||||
2999 | struct node *node; | ||||
3000 | struct page *p; | ||||
3001 | struct mpage *mp; | ||||
3002 | |||||
3003 | /* Get the page and make a copy of it. | ||||
3004 | */ | ||||
3005 | if ((mp = btree_get_mpage(bt, pgno)) == NULL((void*)0)) | ||||
3006 | return P_INVALID0xFFFFFFFF; | ||||
3007 | if ((p = malloc(bt->head.psize)) == NULL((void*)0)) | ||||
3008 | return P_INVALID0xFFFFFFFF; | ||||
3009 | bcopy(mp->page, p, bt->head.psize); | ||||
3010 | |||||
3011 | /* Go through all nodes in the (copied) page and update the | ||||
3012 | * page pointers. | ||||
3013 | */ | ||||
3014 | if (F_ISSET(p->flags, P_BRANCH)(((p->flags) & (0x01)) == (0x01))) { | ||||
3015 | for (i = 0; i < NUMKEYSP(p)(((p)->b.fb.fb_lower - __builtin_offsetof(struct page, ptrs )) >> 1); i++) { | ||||
3016 | node = NODEPTRP(p, i)((struct node *)((char *)(p) + (p)->ptrs[i])); | ||||
3017 | node->n_pgnop.np_pgno = btree_compact_tree(bt, node->n_pgnop.np_pgno, btc); | ||||
3018 | if (node->n_pgnop.np_pgno == P_INVALID0xFFFFFFFF) { | ||||
3019 | free(p); | ||||
3020 | return P_INVALID0xFFFFFFFF; | ||||
3021 | } | ||||
3022 | } | ||||
3023 | } else if (F_ISSET(p->flags, P_LEAF)(((p->flags) & (0x02)) == (0x02))) { | ||||
3024 | for (i = 0; i < NUMKEYSP(p)(((p)->b.fb.fb_lower - __builtin_offsetof(struct page, ptrs )) >> 1); i++) { | ||||
3025 | node = NODEPTRP(p, i)((struct node *)((char *)(p) + (p)->ptrs[i])); | ||||
3026 | if (F_ISSET(node->flags, F_BIGDATA)(((node->flags) & (0x01)) == (0x01))) { | ||||
3027 | bcopy(NODEDATA(node)(void *)((char *)(node)->data + (node)->ksize), &next, sizeof(next)); | ||||
3028 | next = btree_compact_tree(bt, next, btc); | ||||
3029 | if (next == P_INVALID0xFFFFFFFF) { | ||||
3030 | free(p); | ||||
3031 | return P_INVALID0xFFFFFFFF; | ||||
3032 | } | ||||
3033 | bcopy(&next, NODEDATA(node)(void *)((char *)(node)->data + (node)->ksize), sizeof(next)); | ||||
3034 | } | ||||
3035 | } | ||||
3036 | } else if (F_ISSET(p->flags, P_OVERFLOW)(((p->flags) & (0x04)) == (0x04))) { | ||||
3037 | pnext = &p->p_next_pgnob.pb_next_pgno; | ||||
3038 | if (*pnext > 0) { | ||||
3039 | *pnext = btree_compact_tree(bt, *pnext, btc); | ||||
3040 | if (*pnext == P_INVALID0xFFFFFFFF) { | ||||
3041 | free(p); | ||||
3042 | return P_INVALID0xFFFFFFFF; | ||||
3043 | } | ||||
3044 | } | ||||
3045 | } else | ||||
3046 | assert(0)((0) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c", 3046, __func__, "0")); | ||||
3047 | |||||
3048 | pgno = p->pgno = btc->txn->next_pgno++; | ||||
3049 | rc = write(btc->fd, p, bt->head.psize); | ||||
3050 | free(p); | ||||
3051 | if (rc != (ssize_t)bt->head.psize) | ||||
3052 | return P_INVALID0xFFFFFFFF; | ||||
3053 | mpage_prune(bt); | ||||
3054 | return pgno; | ||||
3055 | } | ||||
3056 | |||||
3057 | int | ||||
3058 | btree_compact(struct btree *bt) | ||||
3059 | { | ||||
3060 | char *compact_path = NULL((void*)0); | ||||
3061 | struct btree *btc; | ||||
3062 | struct btree_txn *txn, *txnc = NULL((void*)0); | ||||
3063 | int fd; | ||||
3064 | pgno_t root; | ||||
3065 | |||||
3066 | assert(bt != NULL)((bt != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 3066, __func__, "bt != NULL")); | ||||
3067 | |||||
3068 | DPRINTF("compacting btree %p with path %s", bt, bt->path); | ||||
3069 | |||||
3070 | if (bt->path == NULL((void*)0)) { | ||||
3071 | errno(*__errno()) = EINVAL22; | ||||
3072 | return BT_FAIL-1; | ||||
3073 | } | ||||
3074 | |||||
3075 | if ((txn = btree_txn_begin(bt, 0)) == NULL((void*)0)) | ||||
3076 | return BT_FAIL-1; | ||||
3077 | |||||
3078 | if (asprintf(&compact_path, "%s.compact.XXXXXX", bt->path) == -1) { | ||||
3079 | btree_txn_abort(txn); | ||||
3080 | return BT_FAIL-1; | ||||
3081 | } | ||||
3082 | fd = mkstemp(compact_path); | ||||
3083 | if (fd == -1) { | ||||
3084 | free(compact_path); | ||||
3085 | btree_txn_abort(txn); | ||||
3086 | return BT_FAIL-1; | ||||
3087 | } | ||||
3088 | |||||
3089 | if ((btc = btree_open_fd(fd, 0)) == NULL((void*)0)) | ||||
3090 | goto failed; | ||||
3091 | bcopy(&bt->meta, &btc->meta, sizeof(bt->meta)); | ||||
3092 | btc->meta.revisions = 0; | ||||
3093 | |||||
3094 | if ((txnc = btree_txn_begin(btc, 0)) == NULL((void*)0)) | ||||
3095 | goto failed; | ||||
3096 | |||||
3097 | if (bt->meta.root != P_INVALID0xFFFFFFFF) { | ||||
3098 | root = btree_compact_tree(bt, bt->meta.root, btc); | ||||
3099 | if (root == P_INVALID0xFFFFFFFF) | ||||
3100 | goto failed; | ||||
3101 | if (btree_write_meta(btc, root, 0) != BT_SUCCESS0) | ||||
3102 | goto failed; | ||||
3103 | } | ||||
3104 | |||||
3105 | fsync(fd); | ||||
3106 | |||||
3107 | DPRINTF("renaming %s to %s", compact_path, bt->path); | ||||
3108 | if (rename(compact_path, bt->path) != 0) | ||||
3109 | goto failed; | ||||
3110 | |||||
3111 | /* Write a "tombstone" meta page so other processes can pick up | ||||
3112 | * the change and re-open the file. | ||||
3113 | */ | ||||
3114 | if (btree_write_meta(bt, P_INVALID0xFFFFFFFF, BT_TOMBSTONE0x01) != BT_SUCCESS0) | ||||
3115 | goto failed; | ||||
3116 | |||||
3117 | btree_txn_abort(txn); | ||||
3118 | btree_txn_abort(txnc); | ||||
3119 | free(compact_path); | ||||
3120 | btree_close(btc); | ||||
3121 | mpage_prune(bt); | ||||
3122 | return 0; | ||||
3123 | |||||
3124 | failed: | ||||
3125 | btree_txn_abort(txn); | ||||
3126 | btree_txn_abort(txnc); | ||||
3127 | unlink(compact_path); | ||||
3128 | free(compact_path); | ||||
3129 | btree_close(btc); | ||||
3130 | mpage_prune(bt); | ||||
3131 | return BT_FAIL-1; | ||||
3132 | } | ||||
3133 | |||||
3134 | /* Reverts the last change. Truncates the file at the last root page. | ||||
3135 | */ | ||||
3136 | int | ||||
3137 | btree_revert(struct btree *bt) | ||||
3138 | { | ||||
3139 | if (btree_read_meta(bt, NULL((void*)0)) != 0) | ||||
3140 | return -1; | ||||
3141 | |||||
3142 | DPRINTF("truncating file at page %u", bt->meta.root); | ||||
3143 | return ftruncate(bt->fd, bt->head.psize * bt->meta.root); | ||||
3144 | } | ||||
3145 | |||||
3146 | void | ||||
3147 | btree_set_cache_size(struct btree *bt, unsigned int cache_size) | ||||
3148 | { | ||||
3149 | bt->stat.max_cache = cache_size; | ||||
3150 | } | ||||
3151 | |||||
3152 | unsigned int | ||||
3153 | btree_get_flags(struct btree *bt) | ||||
3154 | { | ||||
3155 | return (bt->flags & ~BT_FIXPADDING0x01); | ||||
3156 | } | ||||
3157 | |||||
3158 | const char * | ||||
3159 | btree_get_path(struct btree *bt) | ||||
3160 | { | ||||
3161 | return bt->path; | ||||
3162 | } | ||||
3163 | |||||
3164 | const struct btree_stat * | ||||
3165 | btree_stat(struct btree *bt) | ||||
3166 | { | ||||
3167 | if (bt == NULL((void*)0)) | ||||
3168 | return NULL((void*)0); | ||||
3169 | |||||
3170 | bt->stat.branch_pages = bt->meta.branch_pages; | ||||
3171 | bt->stat.leaf_pages = bt->meta.leaf_pages; | ||||
3172 | bt->stat.overflow_pages = bt->meta.overflow_pages; | ||||
3173 | bt->stat.revisions = bt->meta.revisions; | ||||
3174 | bt->stat.depth = bt->meta.depth; | ||||
3175 | bt->stat.entries = bt->meta.entries; | ||||
3176 | bt->stat.psize = bt->head.psize; | ||||
3177 | bt->stat.created_at = bt->meta.created_at; | ||||
3178 | |||||
3179 | return &bt->stat; | ||||
3180 | } | ||||
3181 |