Bug Summary

File:src/usr.sbin/ldapctl/../ldapd/btree.c
Warning:line 1963, column 13
Assigned value is garbage or undefined

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple amd64-unknown-openbsd7.0 -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name btree.c -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 1 -pic-is-pie -mframe-pointer=all -relaxed-aliasing -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -target-feature +retpoline-indirect-calls -target-feature +retpoline-indirect-branches -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/usr/src/usr.sbin/ldapctl/obj -resource-dir /usr/local/lib/clang/13.0.0 -I /usr/src/usr.sbin/ldapctl/../ldapd -internal-isystem /usr/local/lib/clang/13.0.0/include -internal-externc-isystem /usr/include -O2 -fdebug-compilation-dir=/usr/src/usr.sbin/ldapctl/obj -ferror-limit 19 -fwrapv -D_RET_PROTECTOR -ret-protector -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-valloc -fno-builtin-free -fno-builtin-strdup -fno-builtin-strndup -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /home/ben/Projects/vmm/scan-build/2022-01-12-194120-40624-1 -x c /usr/src/usr.sbin/ldapctl/../ldapd/btree.c
1/* $OpenBSD: btree.c,v 1.38 2017/05/26 21:23:14 sthen Exp $ */
2
3/*
4 * Copyright (c) 2009, 2010 Martin Hedenfalk <martin@bzero.se>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18
19#include <sys/types.h>
20#include <sys/tree.h>
21#include <sys/stat.h>
22#include <sys/queue.h>
23#include <sys/uio.h>
24
25#include <assert.h>
26#include <err.h>
27#include <errno(*__errno()).h>
28#include <fcntl.h>
29#include <stddef.h>
30#include <stdint.h>
31#include <stdio.h>
32#include <stdlib.h>
33#include <string.h>
34#include <time.h>
35#include <unistd.h>
36
37#include "btree.h"
38
39/* #define DEBUG */
40
41#ifdef DEBUG
42# define DPRINTF(...) do { fprintf(stderr(&__sF[2]), "%s:%d: ", __func__, __LINE__42); \
43 fprintf(stderr(&__sF[2]), __VA_ARGS__); \
44 fprintf(stderr(&__sF[2]), "\n"); } while(0)
45#else
46# define DPRINTF(...)
47#endif
48
49#define PAGESIZE4096 4096
50#define BT_MINKEYS4 4
51#define BT_MAGIC0xB3DBB3DB 0xB3DBB3DB
52#define BT_VERSION4 4
53#define MAXKEYSIZE255 255
54
55#define P_INVALID0xFFFFFFFF 0xFFFFFFFF
56
57#define F_ISSET(w, f)(((w) & (f)) == (f)) (((w) & (f)) == (f))
58#define MINIMUM(a,b)((a) < (b) ? (a) : (b)) ((a) < (b) ? (a) : (b))
59
60typedef uint32_t pgno_t;
61typedef uint16_t indx_t;
62
63/* There are four page types: meta, index, leaf and overflow.
64 * They all share the same page header.
65 */
66struct page { /* represents an on-disk page */
67 pgno_t pgno; /* page number */
68#define P_BRANCH0x01 0x01 /* branch page */
69#define P_LEAF0x02 0x02 /* leaf page */
70#define P_OVERFLOW0x04 0x04 /* overflow page */
71#define P_META0x08 0x08 /* meta page */
72#define P_HEAD0x10 0x10 /* header page */
73 uint32_t flags;
74#define lowerb.fb.fb_lower b.fb.fb_lower
75#define upperb.fb.fb_upper b.fb.fb_upper
76#define p_next_pgnob.pb_next_pgno b.pb_next_pgno
77 union page_bounds {
78 struct {
79 indx_t fb_lower; /* lower bound of free space */
80 indx_t fb_upper; /* upper bound of free space */
81 } fb;
82 pgno_t pb_next_pgno; /* overflow page linked list */
83 } b;
84 indx_t ptrs[1]; /* dynamic size */
85} __packed__attribute__((__packed__));
86
87#define PAGEHDRSZ__builtin_offsetof(struct page, ptrs) offsetof(struct page, ptrs)__builtin_offsetof(struct page, ptrs)
88
89#define NUMKEYSP(p)(((p)->b.fb.fb_lower - __builtin_offsetof(struct page, ptrs
)) >> 1)
(((p)->lowerb.fb.fb_lower - PAGEHDRSZ__builtin_offsetof(struct page, ptrs)) >> 1)
90#define NUMKEYS(mp)(((mp)->page->b.fb.fb_lower - __builtin_offsetof(struct
page, ptrs)) >> 1)
(((mp)->page->lowerb.fb.fb_lower - PAGEHDRSZ__builtin_offsetof(struct page, ptrs)) >> 1)
91#define SIZELEFT(mp)(indx_t)((mp)->page->b.fb.fb_upper - (mp)->page->
b.fb.fb_lower)
(indx_t)((mp)->page->upperb.fb.fb_upper - (mp)->page->lowerb.fb.fb_lower)
92#define PAGEFILL(bt, mp)(1000 * ((bt)->head.psize - __builtin_offsetof(struct page
, ptrs) - (indx_t)((mp)->page->b.fb.fb_upper - (mp)->
page->b.fb.fb_lower)) / ((bt)->head.psize - __builtin_offsetof
(struct page, ptrs)))
(1000 * ((bt)->head.psize - PAGEHDRSZ__builtin_offsetof(struct page, ptrs) - SIZELEFT(mp)(indx_t)((mp)->page->b.fb.fb_upper - (mp)->page->
b.fb.fb_lower)
) / \
93 ((bt)->head.psize - PAGEHDRSZ__builtin_offsetof(struct page, ptrs)))
94#define IS_LEAF(mp)((((mp)->page->flags) & (0x02)) == (0x02)) F_ISSET((mp)->page->flags, P_LEAF)((((mp)->page->flags) & (0x02)) == (0x02))
95#define IS_BRANCH(mp)((((mp)->page->flags) & (0x01)) == (0x01)) F_ISSET((mp)->page->flags, P_BRANCH)((((mp)->page->flags) & (0x01)) == (0x01))
96#define IS_OVERFLOW(mp)((((mp)->page->flags) & (0x04)) == (0x04)) F_ISSET((mp)->page->flags, P_OVERFLOW)((((mp)->page->flags) & (0x04)) == (0x04))
97
98struct bt_head { /* header page content */
99 uint32_t magic;
100 uint32_t version;
101 uint32_t flags;
102 uint32_t psize; /* page size */
103} __packed__attribute__((__packed__));
104
105struct bt_meta { /* meta (footer) page content */
106#define BT_TOMBSTONE0x01 0x01 /* file is replaced */
107 uint32_t flags;
108 pgno_t root; /* page number of root page */
109 pgno_t prev_meta; /* previous meta page number */
110 time_t created_at;
111 uint32_t branch_pages;
112 uint32_t leaf_pages;
113 uint32_t overflow_pages;
114 uint32_t revisions;
115 uint32_t depth;
116 uint64_t entries;
117 unsigned char hash[SHA_DIGEST_LENGTH20];
118} __packed__attribute__((__packed__));
119
120struct btkey {
121 size_t len;
122 char str[MAXKEYSIZE255];
123};
124
125struct mpage { /* an in-memory cached page */
126 RB_ENTRY(mpage)struct { struct mpage *rbe_left; struct mpage *rbe_right; struct
mpage *rbe_parent; int rbe_color; }
entry; /* page cache entry */
127 SIMPLEQ_ENTRY(mpage)struct { struct mpage *sqe_next; } next; /* queue of dirty pages */
128 TAILQ_ENTRY(mpage)struct { struct mpage *tqe_next; struct mpage **tqe_prev; } lru_next; /* LRU queue */
129 struct mpage *parent; /* NULL if root */
130 unsigned int parent_index; /* keep track of node index */
131 struct btkey prefix;
132 struct page *page;
133 pgno_t pgno; /* copy of page->pgno */
134 short ref; /* increased by cursors */
135 short dirty; /* 1 if on dirty queue */
136};
137RB_HEAD(page_cache, mpage)struct page_cache { struct mpage *rbh_root; };
138SIMPLEQ_HEAD(dirty_queue, mpage)struct dirty_queue { struct mpage *sqh_first; struct mpage **
sqh_last; }
;
139TAILQ_HEAD(lru_queue, mpage)struct lru_queue { struct mpage *tqh_first; struct mpage **tqh_last
; }
;
140
141static int mpage_cmp(struct mpage *a, struct mpage *b);
142static struct mpage *mpage_lookup(struct btree *bt, pgno_t pgno);
143static void mpage_add(struct btree *bt, struct mpage *mp);
144static void mpage_free(struct mpage *mp);
145static void mpage_del(struct btree *bt, struct mpage *mp);
146static void mpage_flush(struct btree *bt);
147static struct mpage *mpage_copy(struct btree *bt, struct mpage *mp);
148static void mpage_prune(struct btree *bt);
149static void mpage_dirty(struct btree *bt, struct mpage *mp);
150static struct mpage *mpage_touch(struct btree *bt, struct mpage *mp);
151
152RB_PROTOTYPE(page_cache, mpage, entry, mpage_cmp)void page_cache_RB_INSERT_COLOR(struct page_cache *, struct mpage
*); void page_cache_RB_REMOVE_COLOR(struct page_cache *, struct
mpage *, struct mpage *); struct mpage *page_cache_RB_REMOVE
(struct page_cache *, struct mpage *); struct mpage *page_cache_RB_INSERT
(struct page_cache *, struct mpage *); struct mpage *page_cache_RB_FIND
(struct page_cache *, struct mpage *); struct mpage *page_cache_RB_NFIND
(struct page_cache *, struct mpage *); struct mpage *page_cache_RB_NEXT
(struct mpage *); struct mpage *page_cache_RB_PREV(struct mpage
*); struct mpage *page_cache_RB_MINMAX(struct page_cache *, int
);
;
153RB_GENERATE(page_cache, mpage, entry, mpage_cmp)void page_cache_RB_INSERT_COLOR(struct page_cache *head, struct
mpage *elm) { struct mpage *parent, *gparent, *tmp; while ((
parent = (elm)->entry.rbe_parent) && (parent)->
entry.rbe_color == 1) { gparent = (parent)->entry.rbe_parent
; if (parent == (gparent)->entry.rbe_left) { tmp = (gparent
)->entry.rbe_right; if (tmp && (tmp)->entry.rbe_color
== 1) { (tmp)->entry.rbe_color = 0; do { (parent)->entry
.rbe_color = 0; (gparent)->entry.rbe_color = 1; } while (0
); elm = gparent; continue; } if ((parent)->entry.rbe_right
== elm) { do { (tmp) = (parent)->entry.rbe_right; if (((parent
)->entry.rbe_right = (tmp)->entry.rbe_left)) { ((tmp)->
entry.rbe_left)->entry.rbe_parent = (parent); } do {} while
(0); if (((tmp)->entry.rbe_parent = (parent)->entry.rbe_parent
)) { if ((parent) == ((parent)->entry.rbe_parent)->entry
.rbe_left) ((parent)->entry.rbe_parent)->entry.rbe_left
= (tmp); else ((parent)->entry.rbe_parent)->entry.rbe_right
= (tmp); } else (head)->rbh_root = (tmp); (tmp)->entry
.rbe_left = (parent); (parent)->entry.rbe_parent = (tmp); do
{} while (0); if (((tmp)->entry.rbe_parent)) do {} while (
0); } while (0); tmp = parent; parent = elm; elm = tmp; } do {
(parent)->entry.rbe_color = 0; (gparent)->entry.rbe_color
= 1; } while (0); do { (tmp) = (gparent)->entry.rbe_left;
if (((gparent)->entry.rbe_left = (tmp)->entry.rbe_right
)) { ((tmp)->entry.rbe_right)->entry.rbe_parent = (gparent
); } do {} while (0); if (((tmp)->entry.rbe_parent = (gparent
)->entry.rbe_parent)) { if ((gparent) == ((gparent)->entry
.rbe_parent)->entry.rbe_left) ((gparent)->entry.rbe_parent
)->entry.rbe_left = (tmp); else ((gparent)->entry.rbe_parent
)->entry.rbe_right = (tmp); } else (head)->rbh_root = (
tmp); (tmp)->entry.rbe_right = (gparent); (gparent)->entry
.rbe_parent = (tmp); do {} while (0); if (((tmp)->entry.rbe_parent
)) do {} while (0); } while (0); } else { tmp = (gparent)->
entry.rbe_left; if (tmp && (tmp)->entry.rbe_color ==
1) { (tmp)->entry.rbe_color = 0; do { (parent)->entry.
rbe_color = 0; (gparent)->entry.rbe_color = 1; } while (0)
; elm = gparent; continue; } if ((parent)->entry.rbe_left ==
elm) { do { (tmp) = (parent)->entry.rbe_left; if (((parent
)->entry.rbe_left = (tmp)->entry.rbe_right)) { ((tmp)->
entry.rbe_right)->entry.rbe_parent = (parent); } do {} while
(0); if (((tmp)->entry.rbe_parent = (parent)->entry.rbe_parent
)) { if ((parent) == ((parent)->entry.rbe_parent)->entry
.rbe_left) ((parent)->entry.rbe_parent)->entry.rbe_left
= (tmp); else ((parent)->entry.rbe_parent)->entry.rbe_right
= (tmp); } else (head)->rbh_root = (tmp); (tmp)->entry
.rbe_right = (parent); (parent)->entry.rbe_parent = (tmp);
do {} while (0); if (((tmp)->entry.rbe_parent)) do {} while
(0); } while (0); tmp = parent; parent = elm; elm = tmp; } do
{ (parent)->entry.rbe_color = 0; (gparent)->entry.rbe_color
= 1; } while (0); do { (tmp) = (gparent)->entry.rbe_right
; if (((gparent)->entry.rbe_right = (tmp)->entry.rbe_left
)) { ((tmp)->entry.rbe_left)->entry.rbe_parent = (gparent
); } do {} while (0); if (((tmp)->entry.rbe_parent = (gparent
)->entry.rbe_parent)) { if ((gparent) == ((gparent)->entry
.rbe_parent)->entry.rbe_left) ((gparent)->entry.rbe_parent
)->entry.rbe_left = (tmp); else ((gparent)->entry.rbe_parent
)->entry.rbe_right = (tmp); } else (head)->rbh_root = (
tmp); (tmp)->entry.rbe_left = (gparent); (gparent)->entry
.rbe_parent = (tmp); do {} while (0); if (((tmp)->entry.rbe_parent
)) do {} while (0); } while (0); } } (head->rbh_root)->
entry.rbe_color = 0; } void page_cache_RB_REMOVE_COLOR(struct
page_cache *head, struct mpage *parent, struct mpage *elm) {
struct mpage *tmp; while ((elm == ((void*)0) || (elm)->entry
.rbe_color == 0) && elm != (head)->rbh_root) { if (
(parent)->entry.rbe_left == elm) { tmp = (parent)->entry
.rbe_right; if ((tmp)->entry.rbe_color == 1) { do { (tmp)->
entry.rbe_color = 0; (parent)->entry.rbe_color = 1; } while
(0); do { (tmp) = (parent)->entry.rbe_right; if (((parent
)->entry.rbe_right = (tmp)->entry.rbe_left)) { ((tmp)->
entry.rbe_left)->entry.rbe_parent = (parent); } do {} while
(0); if (((tmp)->entry.rbe_parent = (parent)->entry.rbe_parent
)) { if ((parent) == ((parent)->entry.rbe_parent)->entry
.rbe_left) ((parent)->entry.rbe_parent)->entry.rbe_left
= (tmp); else ((parent)->entry.rbe_parent)->entry.rbe_right
= (tmp); } else (head)->rbh_root = (tmp); (tmp)->entry
.rbe_left = (parent); (parent)->entry.rbe_parent = (tmp); do
{} while (0); if (((tmp)->entry.rbe_parent)) do {} while (
0); } while (0); tmp = (parent)->entry.rbe_right; } if (((
tmp)->entry.rbe_left == ((void*)0) || ((tmp)->entry.rbe_left
)->entry.rbe_color == 0) && ((tmp)->entry.rbe_right
== ((void*)0) || ((tmp)->entry.rbe_right)->entry.rbe_color
== 0)) { (tmp)->entry.rbe_color = 1; elm = parent; parent
= (elm)->entry.rbe_parent; } else { if ((tmp)->entry.rbe_right
== ((void*)0) || ((tmp)->entry.rbe_right)->entry.rbe_color
== 0) { struct mpage *oleft; if ((oleft = (tmp)->entry.rbe_left
)) (oleft)->entry.rbe_color = 0; (tmp)->entry.rbe_color
= 1; do { (oleft) = (tmp)->entry.rbe_left; if (((tmp)->
entry.rbe_left = (oleft)->entry.rbe_right)) { ((oleft)->
entry.rbe_right)->entry.rbe_parent = (tmp); } do {} while (
0); if (((oleft)->entry.rbe_parent = (tmp)->entry.rbe_parent
)) { if ((tmp) == ((tmp)->entry.rbe_parent)->entry.rbe_left
) ((tmp)->entry.rbe_parent)->entry.rbe_left = (oleft); else
((tmp)->entry.rbe_parent)->entry.rbe_right = (oleft); }
else (head)->rbh_root = (oleft); (oleft)->entry.rbe_right
= (tmp); (tmp)->entry.rbe_parent = (oleft); do {} while (
0); if (((oleft)->entry.rbe_parent)) do {} while (0); } while
(0); tmp = (parent)->entry.rbe_right; } (tmp)->entry.rbe_color
= (parent)->entry.rbe_color; (parent)->entry.rbe_color
= 0; if ((tmp)->entry.rbe_right) ((tmp)->entry.rbe_right
)->entry.rbe_color = 0; do { (tmp) = (parent)->entry.rbe_right
; if (((parent)->entry.rbe_right = (tmp)->entry.rbe_left
)) { ((tmp)->entry.rbe_left)->entry.rbe_parent = (parent
); } do {} while (0); if (((tmp)->entry.rbe_parent = (parent
)->entry.rbe_parent)) { if ((parent) == ((parent)->entry
.rbe_parent)->entry.rbe_left) ((parent)->entry.rbe_parent
)->entry.rbe_left = (tmp); else ((parent)->entry.rbe_parent
)->entry.rbe_right = (tmp); } else (head)->rbh_root = (
tmp); (tmp)->entry.rbe_left = (parent); (parent)->entry
.rbe_parent = (tmp); do {} while (0); if (((tmp)->entry.rbe_parent
)) do {} while (0); } while (0); elm = (head)->rbh_root; break
; } } else { tmp = (parent)->entry.rbe_left; if ((tmp)->
entry.rbe_color == 1) { do { (tmp)->entry.rbe_color = 0; (
parent)->entry.rbe_color = 1; } while (0); do { (tmp) = (parent
)->entry.rbe_left; if (((parent)->entry.rbe_left = (tmp
)->entry.rbe_right)) { ((tmp)->entry.rbe_right)->entry
.rbe_parent = (parent); } do {} while (0); if (((tmp)->entry
.rbe_parent = (parent)->entry.rbe_parent)) { if ((parent) ==
((parent)->entry.rbe_parent)->entry.rbe_left) ((parent
)->entry.rbe_parent)->entry.rbe_left = (tmp); else ((parent
)->entry.rbe_parent)->entry.rbe_right = (tmp); } else (
head)->rbh_root = (tmp); (tmp)->entry.rbe_right = (parent
); (parent)->entry.rbe_parent = (tmp); do {} while (0); if
(((tmp)->entry.rbe_parent)) do {} while (0); } while (0);
tmp = (parent)->entry.rbe_left; } if (((tmp)->entry.rbe_left
== ((void*)0) || ((tmp)->entry.rbe_left)->entry.rbe_color
== 0) && ((tmp)->entry.rbe_right == ((void*)0) ||
((tmp)->entry.rbe_right)->entry.rbe_color == 0)) { (tmp
)->entry.rbe_color = 1; elm = parent; parent = (elm)->entry
.rbe_parent; } else { if ((tmp)->entry.rbe_left == ((void*
)0) || ((tmp)->entry.rbe_left)->entry.rbe_color == 0) {
struct mpage *oright; if ((oright = (tmp)->entry.rbe_right
)) (oright)->entry.rbe_color = 0; (tmp)->entry.rbe_color
= 1; do { (oright) = (tmp)->entry.rbe_right; if (((tmp)->
entry.rbe_right = (oright)->entry.rbe_left)) { ((oright)->
entry.rbe_left)->entry.rbe_parent = (tmp); } do {} while (
0); if (((oright)->entry.rbe_parent = (tmp)->entry.rbe_parent
)) { if ((tmp) == ((tmp)->entry.rbe_parent)->entry.rbe_left
) ((tmp)->entry.rbe_parent)->entry.rbe_left = (oright);
else ((tmp)->entry.rbe_parent)->entry.rbe_right = (oright
); } else (head)->rbh_root = (oright); (oright)->entry.
rbe_left = (tmp); (tmp)->entry.rbe_parent = (oright); do {
} while (0); if (((oright)->entry.rbe_parent)) do {} while
(0); } while (0); tmp = (parent)->entry.rbe_left; } (tmp)
->entry.rbe_color = (parent)->entry.rbe_color; (parent)
->entry.rbe_color = 0; if ((tmp)->entry.rbe_left) ((tmp
)->entry.rbe_left)->entry.rbe_color = 0; do { (tmp) = (
parent)->entry.rbe_left; if (((parent)->entry.rbe_left =
(tmp)->entry.rbe_right)) { ((tmp)->entry.rbe_right)->
entry.rbe_parent = (parent); } do {} while (0); if (((tmp)->
entry.rbe_parent = (parent)->entry.rbe_parent)) { if ((parent
) == ((parent)->entry.rbe_parent)->entry.rbe_left) ((parent
)->entry.rbe_parent)->entry.rbe_left = (tmp); else ((parent
)->entry.rbe_parent)->entry.rbe_right = (tmp); } else (
head)->rbh_root = (tmp); (tmp)->entry.rbe_right = (parent
); (parent)->entry.rbe_parent = (tmp); do {} while (0); if
(((tmp)->entry.rbe_parent)) do {} while (0); } while (0);
elm = (head)->rbh_root; break; } } } if (elm) (elm)->entry
.rbe_color = 0; } struct mpage * page_cache_RB_REMOVE(struct page_cache
*head, struct mpage *elm) { struct mpage *child, *parent, *old
= elm; int color; if ((elm)->entry.rbe_left == ((void*)0)
) child = (elm)->entry.rbe_right; else if ((elm)->entry
.rbe_right == ((void*)0)) child = (elm)->entry.rbe_left; else
{ struct mpage *left; elm = (elm)->entry.rbe_right; while
((left = (elm)->entry.rbe_left)) elm = left; child = (elm
)->entry.rbe_right; parent = (elm)->entry.rbe_parent; color
= (elm)->entry.rbe_color; if (child) (child)->entry.rbe_parent
= parent; if (parent) { if ((parent)->entry.rbe_left == elm
) (parent)->entry.rbe_left = child; else (parent)->entry
.rbe_right = child; do {} while (0); } else (head)->rbh_root
= child; if ((elm)->entry.rbe_parent == old) parent = elm
; (elm)->entry = (old)->entry; if ((old)->entry.rbe_parent
) { if (((old)->entry.rbe_parent)->entry.rbe_left == old
) ((old)->entry.rbe_parent)->entry.rbe_left = elm; else
((old)->entry.rbe_parent)->entry.rbe_right = elm; do {
} while (0); } else (head)->rbh_root = elm; ((old)->entry
.rbe_left)->entry.rbe_parent = elm; if ((old)->entry.rbe_right
) ((old)->entry.rbe_right)->entry.rbe_parent = elm; if (
parent) { left = parent; do { do {} while (0); } while ((left
= (left)->entry.rbe_parent)); } goto color; } parent = (elm
)->entry.rbe_parent; color = (elm)->entry.rbe_color; if
(child) (child)->entry.rbe_parent = parent; if (parent) {
if ((parent)->entry.rbe_left == elm) (parent)->entry.rbe_left
= child; else (parent)->entry.rbe_right = child; do {} while
(0); } else (head)->rbh_root = child; color: if (color ==
0) page_cache_RB_REMOVE_COLOR(head, parent, child); return (
old); } struct mpage * page_cache_RB_INSERT(struct page_cache
*head, struct mpage *elm) { struct mpage *tmp; struct mpage *
parent = ((void*)0); int comp = 0; tmp = (head)->rbh_root;
while (tmp) { parent = tmp; comp = (mpage_cmp)(elm, parent);
if (comp < 0) tmp = (tmp)->entry.rbe_left; else if (comp
> 0) tmp = (tmp)->entry.rbe_right; else return (tmp); }
do { (elm)->entry.rbe_parent = parent; (elm)->entry.rbe_left
= (elm)->entry.rbe_right = ((void*)0); (elm)->entry.rbe_color
= 1; } while (0); if (parent != ((void*)0)) { if (comp < 0
) (parent)->entry.rbe_left = elm; else (parent)->entry.
rbe_right = elm; do {} while (0); } else (head)->rbh_root =
elm; page_cache_RB_INSERT_COLOR(head, elm); return (((void*)
0)); } struct mpage * page_cache_RB_FIND(struct page_cache *head
, struct mpage *elm) { struct mpage *tmp = (head)->rbh_root
; int comp; while (tmp) { comp = mpage_cmp(elm, tmp); if (comp
< 0) tmp = (tmp)->entry.rbe_left; else if (comp > 0
) tmp = (tmp)->entry.rbe_right; else return (tmp); } return
(((void*)0)); } struct mpage * page_cache_RB_NFIND(struct page_cache
*head, struct mpage *elm) { struct mpage *tmp = (head)->rbh_root
; struct mpage *res = ((void*)0); int comp; while (tmp) { comp
= mpage_cmp(elm, tmp); if (comp < 0) { res = tmp; tmp = (
tmp)->entry.rbe_left; } else if (comp > 0) tmp = (tmp)->
entry.rbe_right; else return (tmp); } return (res); } struct mpage
* page_cache_RB_NEXT(struct mpage *elm) { if ((elm)->entry
.rbe_right) { elm = (elm)->entry.rbe_right; while ((elm)->
entry.rbe_left) elm = (elm)->entry.rbe_left; } else { if (
(elm)->entry.rbe_parent && (elm == ((elm)->entry
.rbe_parent)->entry.rbe_left)) elm = (elm)->entry.rbe_parent
; else { while ((elm)->entry.rbe_parent && (elm ==
((elm)->entry.rbe_parent)->entry.rbe_right)) elm = (elm
)->entry.rbe_parent; elm = (elm)->entry.rbe_parent; } }
return (elm); } struct mpage * page_cache_RB_PREV(struct mpage
*elm) { if ((elm)->entry.rbe_left) { elm = (elm)->entry
.rbe_left; while ((elm)->entry.rbe_right) elm = (elm)->
entry.rbe_right; } else { if ((elm)->entry.rbe_parent &&
(elm == ((elm)->entry.rbe_parent)->entry.rbe_right)) elm
= (elm)->entry.rbe_parent; else { while ((elm)->entry.
rbe_parent && (elm == ((elm)->entry.rbe_parent)->
entry.rbe_left)) elm = (elm)->entry.rbe_parent; elm = (elm
)->entry.rbe_parent; } } return (elm); } struct mpage * page_cache_RB_MINMAX
(struct page_cache *head, int val) { struct mpage *tmp = (head
)->rbh_root; struct mpage *parent = ((void*)0); while (tmp
) { parent = tmp; if (val < 0) tmp = (tmp)->entry.rbe_left
; else tmp = (tmp)->entry.rbe_right; } return (parent); }
;
154
155struct ppage { /* ordered list of pages */
156 SLIST_ENTRY(ppage)struct { struct ppage *sle_next; } entry;
157 struct mpage *mpage;
158 unsigned int ki; /* cursor index on page */
159};
160SLIST_HEAD(page_stack, ppage)struct page_stack { struct ppage *slh_first; };
161
162#define CURSOR_EMPTY(c)(((&(c)->stack)->slh_first) == ((void*)0)) SLIST_EMPTY(&(c)->stack)(((&(c)->stack)->slh_first) == ((void*)0))
163#define CURSOR_TOP(c)((&(c)->stack)->slh_first) SLIST_FIRST(&(c)->stack)((&(c)->stack)->slh_first)
164#define CURSOR_POP(c)do { (&(c)->stack)->slh_first = (&(c)->stack
)->slh_first->entry.sle_next; } while (0)
SLIST_REMOVE_HEAD(&(c)->stack, entry)do { (&(c)->stack)->slh_first = (&(c)->stack
)->slh_first->entry.sle_next; } while (0)
165#define CURSOR_PUSH(c,p)do { (p)->entry.sle_next = (&(c)->stack)->slh_first
; (&(c)->stack)->slh_first = (p); } while (0)
SLIST_INSERT_HEAD(&(c)->stack, p, entry)do { (p)->entry.sle_next = (&(c)->stack)->slh_first
; (&(c)->stack)->slh_first = (p); } while (0)
166
167struct cursor {
168 struct btree *bt;
169 struct btree_txn *txn;
170 struct page_stack stack; /* stack of parent pages */
171 short initialized; /* 1 if initialized */
172 short eof; /* 1 if end is reached */
173};
174
175#define METAHASHLEN__builtin_offsetof(struct bt_meta, hash) offsetof(struct bt_meta, hash)__builtin_offsetof(struct bt_meta, hash)
176#define METADATA(p)((void *)((char *)p + __builtin_offsetof(struct page, ptrs))) ((void *)((char *)p + PAGEHDRSZ__builtin_offsetof(struct page, ptrs)))
177
178struct node {
179#define n_pgnop.np_pgno p.np_pgno
180#define n_dsizep.np_dsize p.np_dsize
181 union {
182 pgno_t np_pgno; /* child page number */
183 uint32_t np_dsize; /* leaf data size */
184 } p;
185 uint16_t ksize; /* key size */
186#define F_BIGDATA0x01 0x01 /* data put on overflow page */
187 uint8_t flags;
188 char data[1];
189} __packed__attribute__((__packed__));
190
191struct btree_txn {
192 pgno_t root; /* current / new root page */
193 pgno_t next_pgno; /* next unallocated page */
194 struct btree *bt; /* btree is ref'd */
195 struct dirty_queue *dirty_queue; /* modified pages */
196#define BT_TXN_RDONLY0x01 0x01 /* read-only transaction */
197#define BT_TXN_ERROR0x02 0x02 /* an error has occurred */
198 unsigned int flags;
199};
200
201struct btree {
202 int fd;
203 char *path;
204#define BT_FIXPADDING0x01 0x01 /* internal */
205 unsigned int flags;
206 bt_cmp_func cmp; /* user compare function */
207 struct bt_head head;
208 struct bt_meta meta;
209 struct page_cache *page_cache;
210 struct lru_queue *lru_queue;
211 struct btree_txn *txn; /* current write transaction */
212 int ref; /* increased by cursors & txn */
213 struct btree_stat stat;
214 off_t size; /* current file size */
215};
216
217#define NODESIZE__builtin_offsetof(struct node, data) offsetof(struct node, data)__builtin_offsetof(struct node, data)
218
219#define INDXSIZE(k)(__builtin_offsetof(struct node, data) + ((k) == ((void*)0) ?
0 : (k)->size))
(NODESIZE__builtin_offsetof(struct node, data) + ((k) == NULL((void*)0) ? 0 : (k)->size))
220#define LEAFSIZE(k, d)(__builtin_offsetof(struct node, data) + (k)->size + (d)->
size)
(NODESIZE__builtin_offsetof(struct node, data) + (k)->size + (d)->size)
221#define NODEPTRP(p, i)((struct node *)((char *)(p) + (p)->ptrs[i])) ((struct node *)((char *)(p) + (p)->ptrs[i]))
222#define NODEPTR(mp, i)((struct node *)((char *)((mp)->page) + ((mp)->page)->
ptrs[i]))
NODEPTRP((mp)->page, i)((struct node *)((char *)((mp)->page) + ((mp)->page)->
ptrs[i]))
223#define NODEKEY(node)(void *)((node)->data) (void *)((node)->data)
224#define NODEDATA(node)(void *)((char *)(node)->data + (node)->ksize) (void *)((char *)(node)->data + (node)->ksize)
225#define NODEPGNO(node)((node)->p.np_pgno) ((node)->p.np_pgno)
226#define NODEDSZ(node)((node)->p.np_dsize) ((node)->p.np_dsize)
227
228#define BT_COMMIT_PAGES64 64 /* max number of pages to write in one commit */
229#define BT_MAXCACHE_DEF1024 1024 /* max number of pages to keep in cache */
230
231static int btree_read_page(struct btree *bt, pgno_t pgno,
232 struct page *page);
233static struct mpage *btree_get_mpage(struct btree *bt, pgno_t pgno);
234static int btree_search_page_root(struct btree *bt,
235 struct mpage *root, struct btval *key,
236 struct cursor *cursor, int modify,
237 struct mpage **mpp);
238static int btree_search_page(struct btree *bt,
239 struct btree_txn *txn, struct btval *key,
240 struct cursor *cursor, int modify,
241 struct mpage **mpp);
242
243static int btree_write_header(struct btree *bt, int fd);
244static int btree_read_header(struct btree *bt);
245static int btree_is_meta_page(struct page *p);
246static int btree_read_meta(struct btree *bt, pgno_t *p_next);
247static int btree_write_meta(struct btree *bt, pgno_t root,
248 unsigned int flags);
249static void btree_ref(struct btree *bt);
250
251static struct node *btree_search_node(struct btree *bt, struct mpage *mp,
252 struct btval *key, int *exactp, unsigned int *kip);
253static int btree_add_node(struct btree *bt, struct mpage *mp,
254 indx_t indx, struct btval *key, struct btval *data,
255 pgno_t pgno, uint8_t flags);
256static void btree_del_node(struct btree *bt, struct mpage *mp,
257 indx_t indx);
258static int btree_read_data(struct btree *bt, struct mpage *mp,
259 struct node *leaf, struct btval *data);
260
261static int btree_rebalance(struct btree *bt, struct mpage *mp);
262static int btree_update_key(struct btree *bt, struct mpage *mp,
263 indx_t indx, struct btval *key);
264static int btree_adjust_prefix(struct btree *bt,
265 struct mpage *src, int delta);
266static int btree_move_node(struct btree *bt, struct mpage *src,
267 indx_t srcindx, struct mpage *dst, indx_t dstindx);
268static int btree_merge(struct btree *bt, struct mpage *src,
269 struct mpage *dst);
270static int btree_split(struct btree *bt, struct mpage **mpp,
271 unsigned int *newindxp, struct btval *newkey,
272 struct btval *newdata, pgno_t newpgno);
273static struct mpage *btree_new_page(struct btree *bt, uint32_t flags);
274static int btree_write_overflow_data(struct btree *bt,
275 struct page *p, struct btval *data);
276
277static void cursor_pop_page(struct cursor *cursor);
278static struct ppage *cursor_push_page(struct cursor *cursor,
279 struct mpage *mp);
280
281static int bt_set_key(struct btree *bt, struct mpage *mp,
282 struct node *node, struct btval *key);
283static int btree_sibling(struct cursor *cursor, int move_right);
284static int btree_cursor_next(struct cursor *cursor,
285 struct btval *key, struct btval *data);
286static int btree_cursor_set(struct cursor *cursor,
287 struct btval *key, struct btval *data, int *exactp);
288static int btree_cursor_first(struct cursor *cursor,
289 struct btval *key, struct btval *data);
290
291static void bt_reduce_separator(struct btree *bt, struct node *min,
292 struct btval *sep);
293static void remove_prefix(struct btree *bt, struct btval *key,
294 size_t pfxlen);
295static void expand_prefix(struct btree *bt, struct mpage *mp,
296 indx_t indx, struct btkey *expkey);
297static void concat_prefix(struct btree *bt, char *s1, size_t n1,
298 char *s2, size_t n2, char *cs, size_t *cn);
299static void common_prefix(struct btree *bt, struct btkey *min,
300 struct btkey *max, struct btkey *pfx);
301static void find_common_prefix(struct btree *bt, struct mpage *mp);
302
303static size_t bt_leaf_size(struct btree *bt, struct btval *key,
304 struct btval *data);
305static size_t bt_branch_size(struct btree *bt, struct btval *key);
306
307static pgno_t btree_compact_tree(struct btree *bt, pgno_t pgno,
308 struct btree *btc);
309
310static int memncmp(const void *s1, size_t n1,
311 const void *s2, size_t n2);
312static int memnrcmp(const void *s1, size_t n1,
313 const void *s2, size_t n2);
314
315static int
316memncmp(const void *s1, size_t n1, const void *s2, size_t n2)
317{
318 if (n1 < n2) {
319 if (memcmp(s1, s2, n1) == 0)
320 return -1;
321 }
322 else if (n1 > n2) {
323 if (memcmp(s1, s2, n2) == 0)
324 return 1;
325 }
326 return memcmp(s1, s2, n1);
327}
328
329static int
330memnrcmp(const void *s1, size_t n1, const void *s2, size_t n2)
331{
332 const unsigned char *p1;
333 const unsigned char *p2;
334
335 if (n1 == 0)
336 return n2 == 0 ? 0 : -1;
337
338 if (n2 == 0)
339 return n1 == 0 ? 0 : 1;
340
341 p1 = (const unsigned char *)s1 + n1 - 1;
342 p2 = (const unsigned char *)s2 + n2 - 1;
343
344 while (*p1 == *p2) {
345 if (p1 == s1)
346 return (p2 == s2) ? 0 : -1;
347 if (p2 == s2)
348 return (p1 == p2) ? 0 : 1;
349 p1--;
350 p2--;
351 }
352 return *p1 - *p2;
353}
354
355int
356btree_cmp(struct btree *bt, const struct btval *a, const struct btval *b)
357{
358 return bt->cmp(a, b);
359}
360
361static void
362common_prefix(struct btree *bt, struct btkey *min, struct btkey *max,
363 struct btkey *pfx)
364{
365 size_t n = 0;
366 char *p1;
367 char *p2;
368
369 if (min->len == 0 || max->len == 0) {
370 pfx->len = 0;
371 return;
372 }
373
374 if (F_ISSET(bt->flags, BT_REVERSEKEY)(((bt->flags) & (0x08)) == (0x08))) {
375 p1 = min->str + min->len - 1;
376 p2 = max->str + max->len - 1;
377
378 while (*p1 == *p2) {
379 if (p1 < min->str || p2 < max->str)
380 break;
381 p1--;
382 p2--;
383 n++;
384 }
385
386 assert(n <= (int)sizeof(pfx->str))((n <= (int)sizeof(pfx->str)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapctl/../ldapd/btree.c"
, 386, __func__, "n <= (int)sizeof(pfx->str)"))
;
387 pfx->len = n;
388 bcopy(p2 + 1, pfx->str, n);
389 } else {
390 p1 = min->str;
391 p2 = max->str;
392
393 while (*p1 == *p2) {
394 if (n == min->len || n == max->len)
395 break;
396 p1++;
397 p2++;
398 n++;
399 }
400
401 assert(n <= (int)sizeof(pfx->str))((n <= (int)sizeof(pfx->str)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapctl/../ldapd/btree.c"
, 401, __func__, "n <= (int)sizeof(pfx->str)"))
;
402 pfx->len = n;
403 bcopy(max->str, pfx->str, n);
404 }
405}
406
407static void
408remove_prefix(struct btree *bt, struct btval *key, size_t pfxlen)
409{
410 if (pfxlen == 0 || bt->cmp != NULL((void*)0))
411 return;
412
413 DPRINTF("removing %zu bytes of prefix from key [%.*s]", pfxlen,
414 (int)key->size, (char *)key->data);
415 assert(pfxlen <= key->size)((pfxlen <= key->size) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapctl/../ldapd/btree.c"
, 415, __func__, "pfxlen <= key->size"))
;
416 key->size -= pfxlen;
417 if (!F_ISSET(bt->flags, BT_REVERSEKEY)(((bt->flags) & (0x08)) == (0x08)))
418 key->data = (char *)key->data + pfxlen;
419}
420
421static void
422expand_prefix(struct btree *bt, struct mpage *mp, indx_t indx,
423 struct btkey *expkey)
424{
425 struct node *node;
426
427 node = NODEPTR(mp, indx)((struct node *)((char *)((mp)->page) + ((mp)->page)->
ptrs[indx]))
;
428 expkey->len = sizeof(expkey->str);
429 concat_prefix(bt, mp->prefix.str, mp->prefix.len,
430 NODEKEY(node)(void *)((node)->data), node->ksize, expkey->str, &expkey->len);
431}
432
433static int
434bt_cmp(struct btree *bt, const struct btval *key1, const struct btval *key2,
435 struct btkey *pfx)
436{
437 if (F_ISSET(bt->flags, BT_REVERSEKEY)(((bt->flags) & (0x08)) == (0x08)))
438 return memnrcmp(key1->data, key1->size - pfx->len,
439 key2->data, key2->size);
440 else
441 return memncmp((char *)key1->data + pfx->len, key1->size - pfx->len,
442 key2->data, key2->size);
443}
444
445void
446btval_reset(struct btval *btv)
447{
448 if (btv) {
449 if (btv->mp)
450 btv->mp->ref--;
451 if (btv->free_data)
452 free(btv->data);
453 memset(btv, 0, sizeof(*btv));
454 }
455}
456
457static int
458mpage_cmp(struct mpage *a, struct mpage *b)
459{
460 if (a->pgno > b->pgno)
461 return 1;
462 if (a->pgno < b->pgno)
463 return -1;
464 return 0;
465}
466
467static struct mpage *
468mpage_lookup(struct btree *bt, pgno_t pgno)
469{
470 struct mpage find, *mp;
471
472 find.pgno = pgno;
473 mp = RB_FIND(page_cache, bt->page_cache, &find)page_cache_RB_FIND(bt->page_cache, &find);
474 if (mp) {
475 bt->stat.hits++;
476 /* Update LRU queue. Move page to the end. */
477 TAILQ_REMOVE(bt->lru_queue, mp, lru_next)do { if (((mp)->lru_next.tqe_next) != ((void*)0)) (mp)->
lru_next.tqe_next->lru_next.tqe_prev = (mp)->lru_next.tqe_prev
; else (bt->lru_queue)->tqh_last = (mp)->lru_next.tqe_prev
; *(mp)->lru_next.tqe_prev = (mp)->lru_next.tqe_next; ;
; } while (0)
;
478 TAILQ_INSERT_TAIL(bt->lru_queue, mp, lru_next)do { (mp)->lru_next.tqe_next = ((void*)0); (mp)->lru_next
.tqe_prev = (bt->lru_queue)->tqh_last; *(bt->lru_queue
)->tqh_last = (mp); (bt->lru_queue)->tqh_last = &
(mp)->lru_next.tqe_next; } while (0)
;
479 }
480 return mp;
481}
482
483static void
484mpage_add(struct btree *bt, struct mpage *mp)
485{
486 assert(RB_INSERT(page_cache, bt->page_cache, mp) == NULL)((page_cache_RB_INSERT(bt->page_cache, mp) == ((void*)0)) ?
(void)0 : __assert2("/usr/src/usr.sbin/ldapctl/../ldapd/btree.c"
, 486, __func__, "RB_INSERT(page_cache, bt->page_cache, mp) == NULL"
))
;
487 bt->stat.cache_size++;
488 TAILQ_INSERT_TAIL(bt->lru_queue, mp, lru_next)do { (mp)->lru_next.tqe_next = ((void*)0); (mp)->lru_next
.tqe_prev = (bt->lru_queue)->tqh_last; *(bt->lru_queue
)->tqh_last = (mp); (bt->lru_queue)->tqh_last = &
(mp)->lru_next.tqe_next; } while (0)
;
489}
490
491static void
492mpage_free(struct mpage *mp)
493{
494 if (mp != NULL((void*)0)) {
495 free(mp->page);
496 free(mp);
497 }
498}
499
500static void
501mpage_del(struct btree *bt, struct mpage *mp)
502{
503 assert(RB_REMOVE(page_cache, bt->page_cache, mp) == mp)((page_cache_RB_REMOVE(bt->page_cache, mp) == mp) ? (void)
0 : __assert2("/usr/src/usr.sbin/ldapctl/../ldapd/btree.c", 503
, __func__, "RB_REMOVE(page_cache, bt->page_cache, mp) == mp"
))
;
504 assert(bt->stat.cache_size > 0)((bt->stat.cache_size > 0) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapctl/../ldapd/btree.c"
, 504, __func__, "bt->stat.cache_size > 0"))
;
505 bt->stat.cache_size--;
506 TAILQ_REMOVE(bt->lru_queue, mp, lru_next)do { if (((mp)->lru_next.tqe_next) != ((void*)0)) (mp)->
lru_next.tqe_next->lru_next.tqe_prev = (mp)->lru_next.tqe_prev
; else (bt->lru_queue)->tqh_last = (mp)->lru_next.tqe_prev
; *(mp)->lru_next.tqe_prev = (mp)->lru_next.tqe_next; ;
; } while (0)
;
507}
508
509static void
510mpage_flush(struct btree *bt)
511{
512 struct mpage *mp;
513
514 while ((mp = RB_MIN(page_cache, bt->page_cache)page_cache_RB_MINMAX(bt->page_cache, -1)) != NULL((void*)0)) {
515 mpage_del(bt, mp);
516 mpage_free(mp);
517 }
518}
519
520static struct mpage *
521mpage_copy(struct btree *bt, struct mpage *mp)
522{
523 struct mpage *copy;
524
525 if ((copy = calloc(1, sizeof(*copy))) == NULL((void*)0))
526 return NULL((void*)0);
527 if ((copy->page = malloc(bt->head.psize)) == NULL((void*)0)) {
528 free(copy);
529 return NULL((void*)0);
530 }
531 bcopy(mp->page, copy->page, bt->head.psize);
532 bcopy(&mp->prefix, &copy->prefix, sizeof(mp->prefix));
533 copy->parent = mp->parent;
534 copy->parent_index = mp->parent_index;
535 copy->pgno = mp->pgno;
536
537 return copy;
538}
539
540/* Remove the least recently used memory pages until the cache size is
541 * within the configured bounds. Pages referenced by cursors or returned
542 * key/data are not pruned.
543 */
544static void
545mpage_prune(struct btree *bt)
546{
547 struct mpage *mp, *next;
548
549 for (mp = TAILQ_FIRST(bt->lru_queue)((bt->lru_queue)->tqh_first); mp; mp = next) {
550 if (bt->stat.cache_size <= bt->stat.max_cache)
551 break;
552 next = TAILQ_NEXT(mp, lru_next)((mp)->lru_next.tqe_next);
553 if (!mp->dirty && mp->ref <= 0) {
554 mpage_del(bt, mp);
555 mpage_free(mp);
556 }
557 }
558}
559
560/* Mark a page as dirty and push it on the dirty queue.
561 */
562static void
563mpage_dirty(struct btree *bt, struct mpage *mp)
564{
565 assert(bt != NULL)((bt != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapctl/../ldapd/btree.c"
, 565, __func__, "bt != NULL"))
;
566 assert(bt->txn != NULL)((bt->txn != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapctl/../ldapd/btree.c"
, 566, __func__, "bt->txn != NULL"))
;
567
568 if (!mp->dirty) {
569 mp->dirty = 1;
570 SIMPLEQ_INSERT_TAIL(bt->txn->dirty_queue, mp, next)do { (mp)->next.sqe_next = ((void*)0); *(bt->txn->dirty_queue
)->sqh_last = (mp); (bt->txn->dirty_queue)->sqh_last
= &(mp)->next.sqe_next; } while (0)
;
571 }
572}
573
574/* Touch a page: make it dirty and re-insert into tree with updated pgno.
575 */
576static struct mpage *
577mpage_touch(struct btree *bt, struct mpage *mp)
578{
579 assert(bt != NULL)((bt != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapctl/../ldapd/btree.c"
, 579, __func__, "bt != NULL"))
;
580 assert(bt->txn != NULL)((bt->txn != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapctl/../ldapd/btree.c"
, 580, __func__, "bt->txn != NULL"))
;
581 assert(mp != NULL)((mp != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapctl/../ldapd/btree.c"
, 581, __func__, "mp != NULL"))
;
582
583 if (!mp->dirty) {
584 DPRINTF("touching page %u -> %u", mp->pgno, bt->txn->next_pgno);
585 if (mp->ref == 0)
586 mpage_del(bt, mp);
587 else {
588 if ((mp = mpage_copy(bt, mp)) == NULL((void*)0))
589 return NULL((void*)0);
590 }
591 mp->pgno = mp->page->pgno = bt->txn->next_pgno++;
592 mpage_dirty(bt, mp);
593 mpage_add(bt, mp);
594
595 /* Update the page number to new touched page. */
596 if (mp->parent != NULL((void*)0))
597 NODEPGNO(NODEPTR(mp->parent,((((struct node *)((char *)((mp->parent)->page) + ((mp->
parent)->page)->ptrs[mp->parent_index])))->p.np_pgno
)
598 mp->parent_index))((((struct node *)((char *)((mp->parent)->page) + ((mp->
parent)->page)->ptrs[mp->parent_index])))->p.np_pgno
)
= mp->pgno;
599 }
600
601 return mp;
602}
603
604static int
605btree_read_page(struct btree *bt, pgno_t pgno, struct page *page)
606{
607 ssize_t rc;
608
609 DPRINTF("reading page %u", pgno);
610 bt->stat.reads++;
611 if ((rc = pread(bt->fd, page, bt->head.psize, (off_t)pgno*bt->head.psize)) == 0) {
612 DPRINTF("page %u doesn't exist", pgno);
613 errno(*__errno()) = ENOENT2;
614 return BT_FAIL-1;
615 } else if (rc != (ssize_t)bt->head.psize) {
616 if (rc > 0)
617 errno(*__errno()) = EINVAL22;
618 DPRINTF("read: %s", strerror(errno));
619 return BT_FAIL-1;
620 }
621
622 if (page->pgno != pgno) {
623 DPRINTF("page numbers don't match: %u != %u", pgno, page->pgno);
624 errno(*__errno()) = EINVAL22;
625 return BT_FAIL-1;
626 }
627
628 DPRINTF("page %u has flags 0x%X", pgno, page->flags);
629
630 return BT_SUCCESS0;
631}
632
633int
634btree_sync(struct btree *bt)
635{
636 if (!F_ISSET(bt->flags, BT_NOSYNC)(((bt->flags) & (0x02)) == (0x02)))
637 return fsync(bt->fd);
638 return 0;
639}
640
641struct btree_txn *
642btree_txn_begin(struct btree *bt, int rdonly)
643{
644 struct btree_txn *txn;
645
646 if (!rdonly && bt->txn != NULL((void*)0)) {
647 DPRINTF("write transaction already begun");
648 errno(*__errno()) = EBUSY16;
649 return NULL((void*)0);
650 }
651
652 if ((txn = calloc(1, sizeof(*txn))) == NULL((void*)0)) {
653 DPRINTF("calloc: %s", strerror(errno));
654 return NULL((void*)0);
655 }
656
657 if (rdonly) {
658 txn->flags |= BT_TXN_RDONLY0x01;
659 } else {
660 txn->dirty_queue = calloc(1, sizeof(*txn->dirty_queue));
661 if (txn->dirty_queue == NULL((void*)0)) {
662 free(txn);
663 return NULL((void*)0);
664 }
665 SIMPLEQ_INIT(txn->dirty_queue)do { (txn->dirty_queue)->sqh_first = ((void*)0); (txn->
dirty_queue)->sqh_last = &(txn->dirty_queue)->sqh_first
; } while (0)
;
666
667 DPRINTF("taking write lock on txn %p", txn);
668 if (flock(bt->fd, LOCK_EX0x02 | LOCK_NB0x04) != 0) {
669 DPRINTF("flock: %s", strerror(errno));
670 errno(*__errno()) = EBUSY16;
671 free(txn->dirty_queue);
672 free(txn);
673 return NULL((void*)0);
674 }
675 bt->txn = txn;
676 }
677
678 txn->bt = bt;
679 btree_ref(bt);
680
681 if (btree_read_meta(bt, &txn->next_pgno) != BT_SUCCESS0) {
682 btree_txn_abort(txn);
683 return NULL((void*)0);
684 }
685
686 txn->root = bt->meta.root;
687 DPRINTF("begin transaction on btree %p, root page %u", bt, txn->root);
688
689 return txn;
690}
691
692void
693btree_txn_abort(struct btree_txn *txn)
694{
695 struct mpage *mp;
696 struct btree *bt;
697
698 if (txn == NULL((void*)0))
699 return;
700
701 bt = txn->bt;
702 DPRINTF("abort transaction on btree %p, root page %u", bt, txn->root);
703
704 if (!F_ISSET(txn->flags, BT_TXN_RDONLY)(((txn->flags) & (0x01)) == (0x01))) {
705 /* Discard all dirty pages.
706 */
707 while (!SIMPLEQ_EMPTY(txn->dirty_queue)(((txn->dirty_queue)->sqh_first) == ((void*)0))) {
708 mp = SIMPLEQ_FIRST(txn->dirty_queue)((txn->dirty_queue)->sqh_first);
709 assert(mp->ref == 0)((mp->ref == 0) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapctl/../ldapd/btree.c"
, 709, __func__, "mp->ref == 0"))
; /* cursors should be closed */
710 mpage_del(bt, mp);
711 SIMPLEQ_REMOVE_HEAD(txn->dirty_queue, next)do { if (((txn->dirty_queue)->sqh_first = (txn->dirty_queue
)->sqh_first->next.sqe_next) == ((void*)0)) (txn->dirty_queue
)->sqh_last = &(txn->dirty_queue)->sqh_first; } while
(0)
;
712 mpage_free(mp);
713 }
714
715 DPRINTF("releasing write lock on txn %p", txn);
716 txn->bt->txn = NULL((void*)0);
717 if (flock(txn->bt->fd, LOCK_UN0x08) != 0) {
718 DPRINTF("failed to unlock fd %d: %s",
719 txn->bt->fd, strerror(errno));
720 }
721 free(txn->dirty_queue);
722 }
723
724 btree_close(txn->bt);
725 free(txn);
726}
727
728int
729btree_txn_commit(struct btree_txn *txn)
730{
731 int n, done;
732 ssize_t rc;
733 off_t size;
734 struct mpage *mp;
735 struct btree *bt;
736 struct iovec iov[BT_COMMIT_PAGES64];
737
738 assert(txn != NULL)((txn != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapctl/../ldapd/btree.c"
, 738, __func__, "txn != NULL"))
;
739 assert(txn->bt != NULL)((txn->bt != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapctl/../ldapd/btree.c"
, 739, __func__, "txn->bt != NULL"))
;
740
741 bt = txn->bt;
742
743 if (F_ISSET(txn->flags, BT_TXN_RDONLY)(((txn->flags) & (0x01)) == (0x01))) {
744 DPRINTF("attempt to commit read-only transaction");
745 btree_txn_abort(txn);
746 errno(*__errno()) = EPERM1;
747 return BT_FAIL-1;
748 }
749
750 if (txn != bt->txn) {
751 DPRINTF("attempt to commit unknown transaction");
752 btree_txn_abort(txn);
753 errno(*__errno()) = EINVAL22;
754 return BT_FAIL-1;
755 }
756
757 if (F_ISSET(txn->flags, BT_TXN_ERROR)(((txn->flags) & (0x02)) == (0x02))) {
758 DPRINTF("error flag is set, can't commit");
759 btree_txn_abort(txn);
760 errno(*__errno()) = EINVAL22;
761 return BT_FAIL-1;
762 }
763
764 if (SIMPLEQ_EMPTY(txn->dirty_queue)(((txn->dirty_queue)->sqh_first) == ((void*)0)))
765 goto done;
766
767 if (F_ISSET(bt->flags, BT_FIXPADDING)(((bt->flags) & (0x01)) == (0x01))) {
768 size = lseek(bt->fd, 0, SEEK_END2);
769 size += bt->head.psize - (size % bt->head.psize);
770 DPRINTF("extending to multiple of page size: %llu", size);
771 if (ftruncate(bt->fd, size) != 0) {
772 DPRINTF("ftruncate: %s", strerror(errno));
773 btree_txn_abort(txn);
774 return BT_FAIL-1;
775 }
776 bt->flags &= ~BT_FIXPADDING0x01;
777 }
778
779 DPRINTF("committing transaction on btree %p, root page %u",
780 bt, txn->root);
781
782 /* Commit up to BT_COMMIT_PAGES dirty pages to disk until done.
783 */
784 do {
785 n = 0;
786 done = 1;
787 SIMPLEQ_FOREACH(mp, txn->dirty_queue, next)for((mp) = ((txn->dirty_queue)->sqh_first); (mp) != ((void
*)0); (mp) = ((mp)->next.sqe_next))
{
788 DPRINTF("committing page %u", mp->pgno);
789 iov[n].iov_len = bt->head.psize;
790 iov[n].iov_base = mp->page;
791 if (++n >= BT_COMMIT_PAGES64) {
792 done = 0;
793 break;
794 }
795 }
796
797 if (n == 0)
798 break;
799
800 DPRINTF("committing %u dirty pages", n);
801 rc = writev(bt->fd, iov, n);
802 if (rc != (ssize_t)bt->head.psize*n) {
803 if (rc > 0)
804 DPRINTF("short write, filesystem full?");
805 else
806 DPRINTF("writev: %s", strerror(errno));
807 btree_txn_abort(txn);
808 return BT_FAIL-1;
809 }
810
811 /* Remove the dirty flag from the written pages.
812 */
813 while (!SIMPLEQ_EMPTY(txn->dirty_queue)(((txn->dirty_queue)->sqh_first) == ((void*)0))) {
814 mp = SIMPLEQ_FIRST(txn->dirty_queue)((txn->dirty_queue)->sqh_first);
815 mp->dirty = 0;
816 SIMPLEQ_REMOVE_HEAD(txn->dirty_queue, next)do { if (((txn->dirty_queue)->sqh_first = (txn->dirty_queue
)->sqh_first->next.sqe_next) == ((void*)0)) (txn->dirty_queue
)->sqh_last = &(txn->dirty_queue)->sqh_first; } while
(0)
;
817 if (--n == 0)
818 break;
819 }
820 } while (!done);
821
822 if (btree_sync(bt) != 0 ||
823 btree_write_meta(bt, txn->root, 0) != BT_SUCCESS0 ||
824 btree_sync(bt) != 0) {
825 btree_txn_abort(txn);
826 return BT_FAIL-1;
827 }
828
829done:
830 mpage_prune(bt);
831 btree_txn_abort(txn);
832
833 return BT_SUCCESS0;
834}
835
836static int
837btree_write_header(struct btree *bt, int fd)
838{
839 struct stat sb;
840 struct bt_head *h;
841 struct page *p;
842 ssize_t rc;
843 unsigned int psize;
844
845 DPRINTF("writing header page");
846 assert(bt != NULL)((bt != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapctl/../ldapd/btree.c"
, 846, __func__, "bt != NULL"))
;
847
848 /*
849 * Ask stat for 'optimal blocksize for I/O', but cap to fit in indx_t.
850 */
851 if (fstat(fd, &sb) == 0)
852 psize = MINIMUM(32*1024, sb.st_blksize)((32*1024) < (sb.st_blksize) ? (32*1024) : (sb.st_blksize)
)
;
853 else
854 psize = PAGESIZE4096;
855
856 if ((p = calloc(1, psize)) == NULL((void*)0))
857 return -1;
858 p->flags = P_HEAD0x10;
859
860 h = METADATA(p)((void *)((char *)p + __builtin_offsetof(struct page, ptrs)));
861 h->magic = BT_MAGIC0xB3DBB3DB;
862 h->version = BT_VERSION4;
863 h->psize = psize;
864 bcopy(h, &bt->head, sizeof(*h));
865
866 rc = write(fd, p, bt->head.psize);
867 free(p);
868 if (rc != (ssize_t)bt->head.psize) {
869 if (rc > 0)
870 DPRINTF("short write, filesystem full?");
871 return BT_FAIL-1;
872 }
873
874 return BT_SUCCESS0;
875}
876
877static int
878btree_read_header(struct btree *bt)
879{
880 char page[PAGESIZE4096];
881 struct page *p;
882 struct bt_head *h;
883 int rc;
884
885 assert(bt != NULL)((bt != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapctl/../ldapd/btree.c"
, 885, __func__, "bt != NULL"))
;
886
887 /* We don't know the page size yet, so use a minimum value.
888 */
889
890 if ((rc = pread(bt->fd, page, PAGESIZE4096, 0)) == 0) {
891 errno(*__errno()) = ENOENT2;
892 return -1;
893 } else if (rc != PAGESIZE4096) {
894 if (rc > 0)
895 errno(*__errno()) = EINVAL22;
896 DPRINTF("read: %s", strerror(errno));
897 return -1;
898 }
899
900 p = (struct page *)page;
901
902 if (!F_ISSET(p->flags, P_HEAD)(((p->flags) & (0x10)) == (0x10))) {
903 DPRINTF("page %d not a header page", p->pgno);
904 errno(*__errno()) = EINVAL22;
905 return -1;
906 }
907
908 h = METADATA(p)((void *)((char *)p + __builtin_offsetof(struct page, ptrs)));
909 if (h->magic != BT_MAGIC0xB3DBB3DB) {
910 DPRINTF("header has invalid magic");
911 errno(*__errno()) = EINVAL22;
912 return -1;
913 }
914
915 if (h->version != BT_VERSION4) {
916 DPRINTF("database is version %u, expected version %u",
917 bt->head.version, BT_VERSION);
918 errno(*__errno()) = EINVAL22;
919 return -1;
920 }
921
922 bcopy(h, &bt->head, sizeof(*h));
923 return 0;
924}
925
926static int
927btree_write_meta(struct btree *bt, pgno_t root, unsigned int flags)
928{
929 struct mpage *mp;
930 struct bt_meta *meta;
931 ssize_t rc;
932
933 DPRINTF("writing meta page for root page %u", root);
934
935 assert(bt != NULL)((bt != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapctl/../ldapd/btree.c"
, 935, __func__, "bt != NULL"))
;
936 assert(bt->txn != NULL)((bt->txn != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapctl/../ldapd/btree.c"
, 936, __func__, "bt->txn != NULL"))
;
937
938 if ((mp = btree_new_page(bt, P_META0x08)) == NULL((void*)0))
939 return -1;
940
941 bt->meta.prev_meta = bt->meta.root;
942 bt->meta.root = root;
943 bt->meta.flags = flags;
944 bt->meta.created_at = time(0);
945 bt->meta.revisions++;
946 SHA1((unsigned char *)&bt->meta, METAHASHLEN__builtin_offsetof(struct bt_meta, hash), bt->meta.hash);
947
948 /* Copy the meta data changes to the new meta page. */
949 meta = METADATA(mp->page)((void *)((char *)mp->page + __builtin_offsetof(struct page
, ptrs)))
;
950 bcopy(&bt->meta, meta, sizeof(*meta));
951
952 rc = write(bt->fd, mp->page, bt->head.psize);
953 mp->dirty = 0;
954 SIMPLEQ_REMOVE_HEAD(bt->txn->dirty_queue, next)do { if (((bt->txn->dirty_queue)->sqh_first = (bt->
txn->dirty_queue)->sqh_first->next.sqe_next) == ((void
*)0)) (bt->txn->dirty_queue)->sqh_last = &(bt->
txn->dirty_queue)->sqh_first; } while (0)
;
955 if (rc != (ssize_t)bt->head.psize) {
956 if (rc > 0)
957 DPRINTF("short write, filesystem full?");
958 return BT_FAIL-1;
959 }
960
961 if ((bt->size = lseek(bt->fd, 0, SEEK_END2)) == -1) {
962 DPRINTF("failed to update file size: %s", strerror(errno));
963 bt->size = 0;
964 }
965
966 return BT_SUCCESS0;
967}
968
969/* Returns true if page p is a valid meta page, false otherwise.
970 */
971static int
972btree_is_meta_page(struct page *p)
973{
974 struct bt_meta *m;
975 unsigned char hash[SHA_DIGEST_LENGTH20];
976
977 m = METADATA(p)((void *)((char *)p + __builtin_offsetof(struct page, ptrs)));
978 if (!F_ISSET(p->flags, P_META)(((p->flags) & (0x08)) == (0x08))) {
979 DPRINTF("page %d not a meta page", p->pgno);
980 errno(*__errno()) = EINVAL22;
981 return 0;
982 }
983
984 if (m->root >= p->pgno && m->root != P_INVALID0xFFFFFFFF) {
985 DPRINTF("page %d points to an invalid root page", p->pgno);
986 errno(*__errno()) = EINVAL22;
987 return 0;
988 }
989
990 SHA1((unsigned char *)m, METAHASHLEN__builtin_offsetof(struct bt_meta, hash), hash);
991 if (bcmp(hash, m->hash, SHA_DIGEST_LENGTH20) != 0) {
992 DPRINTF("page %d has an invalid digest", p->pgno);
993 errno(*__errno()) = EINVAL22;
994 return 0;
995 }
996
997 return 1;
998}
999
1000static int
1001btree_read_meta(struct btree *bt, pgno_t *p_next)
1002{
1003 struct mpage *mp;
1004 struct bt_meta *meta;
1005 pgno_t meta_pgno, next_pgno;
1006 off_t size;
1007
1008 assert(bt != NULL)((bt != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapctl/../ldapd/btree.c"
, 1008, __func__, "bt != NULL"))
;
1009
1010 if ((size = lseek(bt->fd, 0, SEEK_END2)) == -1)
1011 goto fail;
1012
1013 DPRINTF("btree_read_meta: size = %llu", size);
1014
1015 if (size < bt->size) {
1016 DPRINTF("file has shrunk!");
1017 errno(*__errno()) = EIO5;
1018 goto fail;
1019 }
1020
1021 if (size == bt->head.psize) { /* there is only the header */
1022 if (p_next != NULL((void*)0))
1023 *p_next = 1;
1024 return BT_SUCCESS0; /* new file */
1025 }
1026
1027 next_pgno = size / bt->head.psize;
1028 if (next_pgno == 0) {
1029 DPRINTF("corrupt file");
1030 errno(*__errno()) = EIO5;
1031 goto fail;
1032 }
1033
1034 meta_pgno = next_pgno - 1;
1035
1036 if (size % bt->head.psize != 0) {
1037 DPRINTF("filesize not a multiple of the page size!");
1038 bt->flags |= BT_FIXPADDING0x01;
1039 next_pgno++;
1040 }
1041
1042 if (p_next != NULL((void*)0))
1043 *p_next = next_pgno;
1044
1045 if (size == bt->size) {
1046 DPRINTF("size unchanged, keeping current meta page");
1047 if (F_ISSET(bt->meta.flags, BT_TOMBSTONE)(((bt->meta.flags) & (0x01)) == (0x01))) {
1048 DPRINTF("file is dead");
1049 errno(*__errno()) = ESTALE70;
1050 return BT_FAIL-1;
1051 } else
1052 return BT_SUCCESS0;
1053 }
1054 bt->size = size;
1055
1056 while (meta_pgno > 0) {
1057 if ((mp = btree_get_mpage(bt, meta_pgno)) == NULL((void*)0))
1058 break;
1059 if (btree_is_meta_page(mp->page)) {
1060 meta = METADATA(mp->page)((void *)((char *)mp->page + __builtin_offsetof(struct page
, ptrs)))
;
1061 DPRINTF("flags = 0x%x", meta->flags);
1062 if (F_ISSET(meta->flags, BT_TOMBSTONE)(((meta->flags) & (0x01)) == (0x01))) {
1063 DPRINTF("file is dead");
1064 errno(*__errno()) = ESTALE70;
1065 return BT_FAIL-1;
1066 } else {
1067 /* Make copy of last meta page. */
1068 bcopy(meta, &bt->meta, sizeof(bt->meta));
1069 return BT_SUCCESS0;
1070 }
1071 }
1072 --meta_pgno; /* scan backwards to first valid meta page */
1073 }
1074
1075 errno(*__errno()) = EIO5;
1076fail:
1077 if (p_next != NULL((void*)0))
1078 *p_next = P_INVALID0xFFFFFFFF;
1079 return BT_FAIL-1;
1080}
1081
1082struct btree *
1083btree_open_fd(int fd, unsigned int flags)
1084{
1085 struct btree *bt;
1086 int fl;
1087
1088 fl = fcntl(fd, F_GETFL3);
1089 if (fcntl(fd, F_SETFL4, fl | O_APPEND0x0008) == -1)
1090 return NULL((void*)0);
1091
1092 if ((bt = calloc(1, sizeof(*bt))) == NULL((void*)0))
1093 return NULL((void*)0);
1094 bt->fd = fd;
1095 bt->flags = flags;
1096 bt->flags &= ~BT_FIXPADDING0x01;
1097 bt->ref = 1;
1098 bt->meta.root = P_INVALID0xFFFFFFFF;
1099
1100 if ((bt->page_cache = calloc(1, sizeof(*bt->page_cache))) == NULL((void*)0))
1101 goto fail;
1102 bt->stat.max_cache = BT_MAXCACHE_DEF1024;
1103 RB_INIT(bt->page_cache)do { (bt->page_cache)->rbh_root = ((void*)0); } while (
0)
;
1104
1105 if ((bt->lru_queue = calloc(1, sizeof(*bt->lru_queue))) == NULL((void*)0))
1106 goto fail;
1107 TAILQ_INIT(bt->lru_queue)do { (bt->lru_queue)->tqh_first = ((void*)0); (bt->lru_queue
)->tqh_last = &(bt->lru_queue)->tqh_first; } while
(0)
;
1108
1109 if (btree_read_header(bt) != 0) {
1110 if (errno(*__errno()) != ENOENT2)
1111 goto fail;
1112 DPRINTF("new database");
1113 btree_write_header(bt, bt->fd);
1114 }
1115
1116 if (btree_read_meta(bt, NULL((void*)0)) != 0)
1117 goto fail;
1118
1119 DPRINTF("opened database version %u, pagesize %u",
1120 bt->head.version, bt->head.psize);
1121 DPRINTF("timestamp: %s", ctime(&bt->meta.created_at));
1122 DPRINTF("depth: %u", bt->meta.depth);
1123 DPRINTF("entries: %llu", bt->meta.entries);
1124 DPRINTF("revisions: %u", bt->meta.revisions);
1125 DPRINTF("branch pages: %u", bt->meta.branch_pages);
1126 DPRINTF("leaf pages: %u", bt->meta.leaf_pages);
1127 DPRINTF("overflow pages: %u", bt->meta.overflow_pages);
1128 DPRINTF("root: %u", bt->meta.root);
1129 DPRINTF("previous meta page: %u", bt->meta.prev_meta);
1130
1131 return bt;
1132
1133fail:
1134 free(bt->lru_queue);
1135 free(bt->page_cache);
1136 free(bt);
1137 return NULL((void*)0);
1138}
1139
1140struct btree *
1141btree_open(const char *path, unsigned int flags, mode_t mode)
1142{
1143 int fd, oflags;
1144 struct btree *bt;
1145
1146 if (F_ISSET(flags, BT_RDONLY)(((flags) & (0x04)) == (0x04)))
1147 oflags = O_RDONLY0x0000;
1148 else
1149 oflags = O_RDWR0x0002 | O_CREAT0x0200 | O_APPEND0x0008;
1150
1151 if ((fd = open(path, oflags, mode)) == -1)
1152 return NULL((void*)0);
1153
1154 if ((bt = btree_open_fd(fd, flags)) == NULL((void*)0))
1155 close(fd);
1156 else {
1157 bt->path = strdup(path);
1158 DPRINTF("opened btree %p", bt);
1159 }
1160
1161 return bt;
1162}
1163
1164static void
1165btree_ref(struct btree *bt)
1166{
1167 bt->ref++;
1168 DPRINTF("ref is now %d on btree %p", bt->ref, bt);
1169}
1170
1171void
1172btree_close(struct btree *bt)
1173{
1174 if (bt == NULL((void*)0))
1175 return;
1176
1177 if (--bt->ref == 0) {
1178 DPRINTF("ref is zero, closing btree %p", bt);
1179 close(bt->fd);
1180 mpage_flush(bt);
1181 free(bt->lru_queue);
1182 free(bt->path);
1183 free(bt->page_cache);
1184 free(bt);
1185 } else
1186 DPRINTF("ref is now %d on btree %p", bt->ref, bt);
1187}
1188
1189/* Search for key within a leaf page, using binary search.
1190 * Returns the smallest entry larger or equal to the key.
1191 * If exactp is non-null, stores whether the found entry was an exact match
1192 * in *exactp (1 or 0).
1193 * If kip is non-null, stores the index of the found entry in *kip.
1194 * If no entry larger of equal to the key is found, returns NULL.
1195 */
1196static struct node *
1197btree_search_node(struct btree *bt, struct mpage *mp, struct btval *key,
1198 int *exactp, unsigned int *kip)
1199{
1200 unsigned int i = 0;
1201 int low, high;
1202 int rc = 0;
1203 struct node *node;
1204 struct btval nodekey;
1205
1206 DPRINTF("searching %lu keys in %s page %u with prefix [%.*s]",
1207 NUMKEYS(mp),
1208 IS_LEAF(mp) ? "leaf" : "branch",
1209 mp->pgno, (int)mp->prefix.len, (char *)mp->prefix.str);
1210
1211 assert(NUMKEYS(mp) > 0)(((((mp)->page->b.fb.fb_lower - __builtin_offsetof(struct
page, ptrs)) >> 1) > 0) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapctl/../ldapd/btree.c"
, 1211, __func__, "NUMKEYS(mp) > 0"))
;
1212
1213 memset(&nodekey, 0, sizeof(nodekey));
1214
1215 low = IS_LEAF(mp)((((mp)->page->flags) & (0x02)) == (0x02)) ? 0 : 1;
1216 high = NUMKEYS(mp)(((mp)->page->b.fb.fb_lower - __builtin_offsetof(struct
page, ptrs)) >> 1)
- 1;
1217 while (low <= high) {
1218 i = (low + high) >> 1;
1219 node = NODEPTR(mp, i)((struct node *)((char *)((mp)->page) + ((mp)->page)->
ptrs[i]))
;
1220
1221 nodekey.size = node->ksize;
1222 nodekey.data = NODEKEY(node)(void *)((node)->data);
1223
1224 if (bt->cmp)
1225 rc = bt->cmp(key, &nodekey);
1226 else
1227 rc = bt_cmp(bt, key, &nodekey, &mp->prefix);
1228
1229 if (IS_LEAF(mp)((((mp)->page->flags) & (0x02)) == (0x02)))
1230 DPRINTF("found leaf index %u [%.*s], rc = %d",
1231 i, (int)nodekey.size, (char *)nodekey.data, rc);
1232 else
1233 DPRINTF("found branch index %u [%.*s -> %u], rc = %d",
1234 i, (int)node->ksize, (char *)NODEKEY(node),
1235 node->n_pgno, rc);
1236
1237 if (rc == 0)
1238 break;
1239 if (rc > 0)
1240 low = i + 1;
1241 else
1242 high = i - 1;
1243 }
1244
1245 if (rc > 0) { /* Found entry is less than the key. */
1246 i++; /* Skip to get the smallest entry larger than key. */
1247 if (i >= NUMKEYS(mp)(((mp)->page->b.fb.fb_lower - __builtin_offsetof(struct
page, ptrs)) >> 1)
)
1248 /* There is no entry larger or equal to the key. */
1249 return NULL((void*)0);
1250 }
1251 if (exactp)
1252 *exactp = (rc == 0);
1253 if (kip) /* Store the key index if requested. */
1254 *kip = i;
1255
1256 return NODEPTR(mp, i)((struct node *)((char *)((mp)->page) + ((mp)->page)->
ptrs[i]))
;
1257}
1258
1259static void
1260cursor_pop_page(struct cursor *cursor)
1261{
1262 struct ppage *top;
1263
1264 top = CURSOR_TOP(cursor)((&(cursor)->stack)->slh_first);
1265 CURSOR_POP(cursor)do { (&(cursor)->stack)->slh_first = (&(cursor)
->stack)->slh_first->entry.sle_next; } while (0)
;
1266 top->mpage->ref--;
1267
1268 DPRINTF("popped page %u off cursor %p", top->mpage->pgno, cursor);
1269
1270 free(top);
1271}
1272
1273static struct ppage *
1274cursor_push_page(struct cursor *cursor, struct mpage *mp)
1275{
1276 struct ppage *ppage;
1277
1278 DPRINTF("pushing page %u on cursor %p", mp->pgno, cursor);
1279
1280 if ((ppage = calloc(1, sizeof(*ppage))) == NULL((void*)0))
1281 return NULL((void*)0);
1282 ppage->mpage = mp;
1283 mp->ref++;
1284 CURSOR_PUSH(cursor, ppage)do { (ppage)->entry.sle_next = (&(cursor)->stack)->
slh_first; (&(cursor)->stack)->slh_first = (ppage);
} while (0)
;
1285 return ppage;
1286}
1287
1288static struct mpage *
1289btree_get_mpage(struct btree *bt, pgno_t pgno)
1290{
1291 struct mpage *mp;
1292
1293 mp = mpage_lookup(bt, pgno);
1294 if (mp == NULL((void*)0)) {
1295 if ((mp = calloc(1, sizeof(*mp))) == NULL((void*)0))
1296 return NULL((void*)0);
1297 if ((mp->page = malloc(bt->head.psize)) == NULL((void*)0)) {
1298 free(mp);
1299 return NULL((void*)0);
1300 }
1301 if (btree_read_page(bt, pgno, mp->page) != BT_SUCCESS0) {
1302 mpage_free(mp);
1303 return NULL((void*)0);
1304 }
1305 mp->pgno = pgno;
1306 mpage_add(bt, mp);
1307 } else
1308 DPRINTF("returning page %u from cache", pgno);
1309
1310 return mp;
1311}
1312
1313static void
1314concat_prefix(struct btree *bt, char *s1, size_t n1, char *s2, size_t n2,
1315 char *cs, size_t *cn)
1316{
1317 assert(*cn >= n1 + n2)((*cn >= n1 + n2) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapctl/../ldapd/btree.c"
, 1317, __func__, "*cn >= n1 + n2"))
;
1318 if (F_ISSET(bt->flags, BT_REVERSEKEY)(((bt->flags) & (0x08)) == (0x08))) {
1319 bcopy(s2, cs, n2);
1320 bcopy(s1, cs + n2, n1);
1321 } else {
1322 bcopy(s1, cs, n1);
1323 bcopy(s2, cs + n1, n2);
1324 }
1325 *cn = n1 + n2;
1326}
1327
1328static void
1329find_common_prefix(struct btree *bt, struct mpage *mp)
1330{
1331 indx_t lbound = 0, ubound = 0;
1332 struct mpage *lp, *up;
1333 struct btkey lprefix, uprefix;
1334
1335 mp->prefix.len = 0;
1336 if (bt->cmp != NULL((void*)0))
1337 return;
1338
1339 lp = mp;
1340 while (lp->parent != NULL((void*)0)) {
1341 if (lp->parent_index > 0) {
1342 lbound = lp->parent_index;
1343 break;
1344 }
1345 lp = lp->parent;
1346 }
1347
1348 up = mp;
1349 while (up->parent != NULL((void*)0)) {
1350 if (up->parent_index + 1 < (indx_t)NUMKEYS(up->parent)(((up->parent)->page->b.fb.fb_lower - __builtin_offsetof
(struct page, ptrs)) >> 1)
) {
1351 ubound = up->parent_index + 1;
1352 break;
1353 }
1354 up = up->parent;
1355 }
1356
1357 if (lp->parent != NULL((void*)0) && up->parent != NULL((void*)0)) {
1358 expand_prefix(bt, lp->parent, lbound, &lprefix);
1359 expand_prefix(bt, up->parent, ubound, &uprefix);
1360 common_prefix(bt, &lprefix, &uprefix, &mp->prefix);
1361 }
1362 else if (mp->parent)
1363 bcopy(&mp->parent->prefix, &mp->prefix, sizeof(mp->prefix));
1364
1365 DPRINTF("found common prefix [%.*s] (len %zu) for page %u",
1366 (int)mp->prefix.len, mp->prefix.str, mp->prefix.len, mp->pgno);
1367}
1368
1369static int
1370btree_search_page_root(struct btree *bt, struct mpage *root, struct btval *key,
1371 struct cursor *cursor, int modify, struct mpage **mpp)
1372{
1373 struct mpage *mp, *parent;
1374
1375 if (cursor && cursor_push_page(cursor, root) == NULL((void*)0))
1376 return BT_FAIL-1;
1377
1378 mp = root;
1379 while (IS_BRANCH(mp)((((mp)->page->flags) & (0x01)) == (0x01))) {
1380 unsigned int i = 0;
1381 struct node *node;
1382
1383 DPRINTF("branch page %u has %lu keys", mp->pgno, NUMKEYS(mp));
1384 assert(NUMKEYS(mp) > 1)(((((mp)->page->b.fb.fb_lower - __builtin_offsetof(struct
page, ptrs)) >> 1) > 1) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapctl/../ldapd/btree.c"
, 1384, __func__, "NUMKEYS(mp) > 1"))
;
1385 DPRINTF("found index 0 to page %u", NODEPGNO(NODEPTR(mp, 0)));
1386
1387 if (key == NULL((void*)0)) /* Initialize cursor to first page. */
1388 i = 0;
1389 else {
1390 int exact;
1391 node = btree_search_node(bt, mp, key, &exact, &i);
1392 if (node == NULL((void*)0))
1393 i = NUMKEYS(mp)(((mp)->page->b.fb.fb_lower - __builtin_offsetof(struct
page, ptrs)) >> 1)
- 1;
1394 else if (!exact) {
1395 assert(i > 0)((i > 0) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapctl/../ldapd/btree.c"
, 1395, __func__, "i > 0"))
;
1396 i--;
1397 }
1398 }
1399
1400 if (key)
1401 DPRINTF("following index %u for key %.*s",
1402 i, (int)key->size, (char *)key->data);
1403 assert(i >= 0 && i < NUMKEYS(mp))((i >= 0 && i < (((mp)->page->b.fb.fb_lower
- __builtin_offsetof(struct page, ptrs)) >> 1)) ? (void
)0 : __assert2("/usr/src/usr.sbin/ldapctl/../ldapd/btree.c", 1403
, __func__, "i >= 0 && i < NUMKEYS(mp)"))
;
1404 node = NODEPTR(mp, i)((struct node *)((char *)((mp)->page) + ((mp)->page)->
ptrs[i]))
;
1405
1406 if (cursor)
1407 CURSOR_TOP(cursor)((&(cursor)->stack)->slh_first)->ki = i;
1408
1409 parent = mp;
1410 if ((mp = btree_get_mpage(bt, NODEPGNO(node)((node)->p.np_pgno))) == NULL((void*)0))
1411 return BT_FAIL-1;
1412 mp->parent = parent;
1413 mp->parent_index = i;
1414 find_common_prefix(bt, mp);
1415
1416 if (cursor && cursor_push_page(cursor, mp) == NULL((void*)0))
1417 return BT_FAIL-1;
1418
1419 if (modify && (mp = mpage_touch(bt, mp)) == NULL((void*)0))
1420 return BT_FAIL-1;
1421 }
1422
1423 if (!IS_LEAF(mp)((((mp)->page->flags) & (0x02)) == (0x02))) {
1424 DPRINTF("internal error, index points to a %02X page!?",
1425 mp->page->flags);
1426 return BT_FAIL-1;
1427 }
1428
1429 DPRINTF("found leaf page %u for key %.*s", mp->pgno,
1430 key ? (int)key->size : 0, key ? (char *)key->data : NULL);
1431
1432 *mpp = mp;
1433 return BT_SUCCESS0;
1434}
1435
1436/* Search for the page a given key should be in.
1437 * Stores a pointer to the found page in *mpp.
1438 * If key is NULL, search for the lowest page (used by btree_cursor_first).
1439 * If cursor is non-null, pushes parent pages on the cursor stack.
1440 * If modify is true, visited pages are updated with new page numbers.
1441 */
1442static int
1443btree_search_page(struct btree *bt, struct btree_txn *txn, struct btval *key,
1444 struct cursor *cursor, int modify, struct mpage **mpp)
1445{
1446 int rc;
1447 pgno_t root;
1448 struct mpage *mp;
1449
1450 /* Can't modify pages outside a transaction. */
1451 if (txn == NULL((void*)0) && modify) {
1452 errno(*__errno()) = EINVAL22;
1453 return BT_FAIL-1;
1454 }
1455
1456 /* Choose which root page to start with. If a transaction is given
1457 * use the root page from the transaction, otherwise read the last
1458 * committed root page.
1459 */
1460 if (txn == NULL((void*)0)) {
1461 if ((rc = btree_read_meta(bt, NULL((void*)0))) != BT_SUCCESS0)
1462 return rc;
1463 root = bt->meta.root;
1464 } else if (F_ISSET(txn->flags, BT_TXN_ERROR)(((txn->flags) & (0x02)) == (0x02))) {
1465 DPRINTF("transaction has failed, must abort");
1466 errno(*__errno()) = EINVAL22;
1467 return BT_FAIL-1;
1468 } else
1469 root = txn->root;
1470
1471 if (root == P_INVALID0xFFFFFFFF) { /* Tree is empty. */
1472 DPRINTF("tree is empty");
1473 errno(*__errno()) = ENOENT2;
1474 return BT_FAIL-1;
1475 }
1476
1477 if ((mp = btree_get_mpage(bt, root)) == NULL((void*)0))
1478 return BT_FAIL-1;
1479
1480 DPRINTF("root page has flags 0x%X", mp->page->flags);
1481
1482 assert(mp->parent == NULL)((mp->parent == ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapctl/../ldapd/btree.c"
, 1482, __func__, "mp->parent == NULL"))
;
1483 assert(mp->prefix.len == 0)((mp->prefix.len == 0) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapctl/../ldapd/btree.c"
, 1483, __func__, "mp->prefix.len == 0"))
;
1484
1485 if (modify && !mp->dirty) {
1486 if ((mp = mpage_touch(bt, mp)) == NULL((void*)0))
1487 return BT_FAIL-1;
1488 txn->root = mp->pgno;
1489 }
1490
1491 return btree_search_page_root(bt, mp, key, cursor, modify, mpp);
1492}
1493
1494static int
1495btree_read_data(struct btree *bt, struct mpage *mp, struct node *leaf,
1496 struct btval *data)
1497{
1498 struct mpage *omp; /* overflow mpage */
1499 size_t psz;
1500 size_t max;
1501 size_t sz = 0;
1502 pgno_t pgno;
1503
1504 memset(data, 0, sizeof(*data));
1505 max = bt->head.psize - PAGEHDRSZ__builtin_offsetof(struct page, ptrs);
1506
1507 if (!F_ISSET(leaf->flags, F_BIGDATA)(((leaf->flags) & (0x01)) == (0x01))) {
1508 data->size = leaf->n_dsizep.np_dsize;
1509 if (data->size > 0) {
1510 if (mp == NULL((void*)0)) {
1511 if ((data->data = malloc(data->size)) == NULL((void*)0))
1512 return BT_FAIL-1;
1513 bcopy(NODEDATA(leaf)(void *)((char *)(leaf)->data + (leaf)->ksize), data->data, data->size);
1514 data->free_data = 1;
1515 data->mp = NULL((void*)0);
1516 } else {
1517 data->data = NODEDATA(leaf)(void *)((char *)(leaf)->data + (leaf)->ksize);
1518 data->free_data = 0;
1519 data->mp = mp;
1520 mp->ref++;
1521 }
1522 }
1523 return BT_SUCCESS0;
1524 }
1525
1526 /* Read overflow data.
1527 */
1528 DPRINTF("allocating %u byte for overflow data", leaf->n_dsize);
1529 if ((data->data = malloc(leaf->n_dsizep.np_dsize)) == NULL((void*)0))
1530 return BT_FAIL-1;
1531 data->size = leaf->n_dsizep.np_dsize;
1532 data->free_data = 1;
1533 data->mp = NULL((void*)0);
1534 bcopy(NODEDATA(leaf)(void *)((char *)(leaf)->data + (leaf)->ksize), &pgno, sizeof(pgno));
1535 for (sz = 0; sz < data->size; ) {
1536 if ((omp = btree_get_mpage(bt, pgno)) == NULL((void*)0) ||
1537 !F_ISSET(omp->page->flags, P_OVERFLOW)(((omp->page->flags) & (0x04)) == (0x04))) {
1538 DPRINTF("read overflow page %u failed", pgno);
1539 free(data->data);
1540 mpage_free(omp);
1541 return BT_FAIL-1;
1542 }
1543 psz = data->size - sz;
1544 if (psz > max)
1545 psz = max;
1546 bcopy(omp->page->ptrs, (char *)data->data + sz, psz);
1547 sz += psz;
1548 pgno = omp->page->p_next_pgnob.pb_next_pgno;
1549 }
1550
1551 return BT_SUCCESS0;
1552}
1553
1554int
1555btree_txn_get(struct btree *bt, struct btree_txn *txn,
1556 struct btval *key, struct btval *data)
1557{
1558 int rc, exact;
1559 struct node *leaf;
1560 struct mpage *mp;
1561
1562 assert(key)((key) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapctl/../ldapd/btree.c"
, 1562, __func__, "key"))
;
1563 assert(data)((data) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapctl/../ldapd/btree.c"
, 1563, __func__, "data"))
;
1564 DPRINTF("===> get key [%.*s]", (int)key->size, (char *)key->data);
1565
1566 if (bt != NULL((void*)0) && txn != NULL((void*)0) && bt != txn->bt) {
1567 errno(*__errno()) = EINVAL22;
1568 return BT_FAIL-1;
1569 }
1570
1571 if (bt == NULL((void*)0)) {
1572 if (txn == NULL((void*)0)) {
1573 errno(*__errno()) = EINVAL22;
1574 return BT_FAIL-1;
1575 }
1576 bt = txn->bt;
1577 }
1578
1579 if (key->size == 0 || key->size > MAXKEYSIZE255) {
1580 errno(*__errno()) = EINVAL22;
1581 return BT_FAIL-1;
1582 }
1583
1584 if ((rc = btree_search_page(bt, txn, key, NULL((void*)0), 0, &mp)) != BT_SUCCESS0)
1585 return rc;
1586
1587 leaf = btree_search_node(bt, mp, key, &exact, NULL((void*)0));
1588 if (leaf && exact)
1589 rc = btree_read_data(bt, mp, leaf, data);
1590 else {
1591 errno(*__errno()) = ENOENT2;
1592 rc = BT_FAIL-1;
1593 }
1594
1595 mpage_prune(bt);
1596 return rc;
1597}
1598
1599static int
1600btree_sibling(struct cursor *cursor, int move_right)
1601{
1602 int rc;
1603 struct node *indx;
1604 struct ppage *parent, *top;
1605 struct mpage *mp;
1606
1607 top = CURSOR_TOP(cursor)((&(cursor)->stack)->slh_first);
1608 if ((parent = SLIST_NEXT(top, entry)((top)->entry.sle_next)) == NULL((void*)0)) {
1609 errno(*__errno()) = ENOENT2;
1610 return BT_FAIL-1; /* root has no siblings */
1611 }
1612
1613 DPRINTF("parent page is page %u, index %u",
1614 parent->mpage->pgno, parent->ki);
1615
1616 cursor_pop_page(cursor);
1617 if (move_right ? (parent->ki + 1 >= NUMKEYS(parent->mpage)(((parent->mpage)->page->b.fb.fb_lower - __builtin_offsetof
(struct page, ptrs)) >> 1)
)
1618 : (parent->ki == 0)) {
1619 DPRINTF("no more keys left, moving to %s sibling",
1620 move_right ? "right" : "left");
1621 if ((rc = btree_sibling(cursor, move_right)) != BT_SUCCESS0)
1622 return rc;
1623 parent = CURSOR_TOP(cursor)((&(cursor)->stack)->slh_first);
1624 } else {
1625 if (move_right)
1626 parent->ki++;
1627 else
1628 parent->ki--;
1629 DPRINTF("just moving to %s index key %u",
1630 move_right ? "right" : "left", parent->ki);
1631 }
1632 assert(IS_BRANCH(parent->mpage))((((((parent->mpage)->page->flags) & (0x01)) == (
0x01))) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapctl/../ldapd/btree.c"
, 1632, __func__, "IS_BRANCH(parent->mpage)"))
;
1633
1634 indx = NODEPTR(parent->mpage, parent->ki)((struct node *)((char *)((parent->mpage)->page) + ((parent
->mpage)->page)->ptrs[parent->ki]))
;
1635 if ((mp = btree_get_mpage(cursor->bt, indx->n_pgnop.np_pgno)) == NULL((void*)0))
1636 return BT_FAIL-1;
1637 mp->parent = parent->mpage;
1638 mp->parent_index = parent->ki;
1639
1640 cursor_push_page(cursor, mp);
1641 find_common_prefix(cursor->bt, mp);
1642
1643 return BT_SUCCESS0;
1644}
1645
1646static int
1647bt_set_key(struct btree *bt, struct mpage *mp, struct node *node,
1648 struct btval *key)
1649{
1650 if (key == NULL((void*)0))
1651 return 0;
1652
1653 if (mp->prefix.len > 0) {
1654 key->size = node->ksize + mp->prefix.len;
1655 key->data = malloc(key->size);
1656 if (key->data == NULL((void*)0))
1657 return -1;
1658 concat_prefix(bt,
1659 mp->prefix.str, mp->prefix.len,
1660 NODEKEY(node)(void *)((node)->data), node->ksize,
1661 key->data, &key->size);
1662 key->free_data = 1;
1663 } else {
1664 key->size = node->ksize;
1665 key->data = NODEKEY(node)(void *)((node)->data);
1666 key->free_data = 0;
1667 key->mp = mp;
1668 mp->ref++;
1669 }
1670
1671 return 0;
1672}
1673
1674static int
1675btree_cursor_next(struct cursor *cursor, struct btval *key, struct btval *data)
1676{
1677 struct ppage *top;
1678 struct mpage *mp;
1679 struct node *leaf;
1680
1681 if (cursor->eof) {
1682 errno(*__errno()) = ENOENT2;
1683 return BT_FAIL-1;
1684 }
1685
1686 assert(cursor->initialized)((cursor->initialized) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapctl/../ldapd/btree.c"
, 1686, __func__, "cursor->initialized"))
;
1687
1688 top = CURSOR_TOP(cursor)((&(cursor)->stack)->slh_first);
1689 mp = top->mpage;
1690
1691 DPRINTF("cursor_next: top page is %u in cursor %p", mp->pgno, cursor);
1692
1693 if (top->ki + 1 >= NUMKEYS(mp)(((mp)->page->b.fb.fb_lower - __builtin_offsetof(struct
page, ptrs)) >> 1)
) {
1694 DPRINTF("=====> move to next sibling page");
1695 if (btree_sibling(cursor, 1) != BT_SUCCESS0) {
1696 cursor->eof = 1;
1697 return BT_FAIL-1;
1698 }
1699 top = CURSOR_TOP(cursor)((&(cursor)->stack)->slh_first);
1700 mp = top->mpage;
1701 DPRINTF("next page is %u, key index %u", mp->pgno, top->ki);
1702 } else
1703 top->ki++;
1704
1705 DPRINTF("==> cursor points to page %u with %lu keys, key index %u",
1706 mp->pgno, NUMKEYS(mp), top->ki);
1707
1708 assert(IS_LEAF(mp))((((((mp)->page->flags) & (0x02)) == (0x02))) ? (void
)0 : __assert2("/usr/src/usr.sbin/ldapctl/../ldapd/btree.c", 1708
, __func__, "IS_LEAF(mp)"))
;
1709 leaf = NODEPTR(mp, top->ki)((struct node *)((char *)((mp)->page) + ((mp)->page)->
ptrs[top->ki]))
;
1710
1711 if (data && btree_read_data(cursor->bt, mp, leaf, data) != BT_SUCCESS0)
1712 return BT_FAIL-1;
1713
1714 if (bt_set_key(cursor->bt, mp, leaf, key) != 0)
1715 return BT_FAIL-1;
1716
1717 return BT_SUCCESS0;
1718}
1719
1720static int
1721btree_cursor_set(struct cursor *cursor, struct btval *key, struct btval *data,
1722 int *exactp)
1723{
1724 int rc;
1725 struct node *leaf;
1726 struct mpage *mp;
1727 struct ppage *top;
1728
1729 assert(cursor)((cursor) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapctl/../ldapd/btree.c"
, 1729, __func__, "cursor"))
;
1730 assert(key)((key) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapctl/../ldapd/btree.c"
, 1730, __func__, "key"))
;
1731 assert(key->size > 0)((key->size > 0) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapctl/../ldapd/btree.c"
, 1731, __func__, "key->size > 0"))
;
1732
1733 rc = btree_search_page(cursor->bt, cursor->txn, key, cursor, 0, &mp);
1734 if (rc != BT_SUCCESS0)
1735 return rc;
1736 assert(IS_LEAF(mp))((((((mp)->page->flags) & (0x02)) == (0x02))) ? (void
)0 : __assert2("/usr/src/usr.sbin/ldapctl/../ldapd/btree.c", 1736
, __func__, "IS_LEAF(mp)"))
;
1737
1738 top = CURSOR_TOP(cursor)((&(cursor)->stack)->slh_first);
1739 leaf = btree_search_node(cursor->bt, mp, key, exactp, &top->ki);
1740 if (exactp != NULL((void*)0) && !*exactp) {
1741 /* BT_CURSOR_EXACT specified and not an exact match. */
1742 errno(*__errno()) = ENOENT2;
1743 return BT_FAIL-1;
1744 }
1745
1746 if (leaf == NULL((void*)0)) {
1747 DPRINTF("===> inexact leaf not found, goto sibling");
1748 if (btree_sibling(cursor, 1) != BT_SUCCESS0)
1749 return BT_FAIL-1; /* no entries matched */
1750 top = CURSOR_TOP(cursor)((&(cursor)->stack)->slh_first);
1751 top->ki = 0;
1752 mp = top->mpage;
1753 assert(IS_LEAF(mp))((((((mp)->page->flags) & (0x02)) == (0x02))) ? (void
)0 : __assert2("/usr/src/usr.sbin/ldapctl/../ldapd/btree.c", 1753
, __func__, "IS_LEAF(mp)"))
;
1754 leaf = NODEPTR(mp, 0)((struct node *)((char *)((mp)->page) + ((mp)->page)->
ptrs[0]))
;
1755 }
1756
1757 cursor->initialized = 1;
1758 cursor->eof = 0;
1759
1760 if (data && btree_read_data(cursor->bt, mp, leaf, data) != BT_SUCCESS0)
1761 return BT_FAIL-1;
1762
1763 if (bt_set_key(cursor->bt, mp, leaf, key) != 0)
1764 return BT_FAIL-1;
1765 DPRINTF("==> cursor placed on key %.*s",
1766 (int)key->size, (char *)key->data);
1767
1768 return BT_SUCCESS0;
1769}
1770
1771static int
1772btree_cursor_first(struct cursor *cursor, struct btval *key, struct btval *data)
1773{
1774 int rc;
1775 struct mpage *mp;
1776 struct node *leaf;
1777
1778 rc = btree_search_page(cursor->bt, cursor->txn, NULL((void*)0), cursor, 0, &mp);
1779 if (rc != BT_SUCCESS0)
1780 return rc;
1781 assert(IS_LEAF(mp))((((((mp)->page->flags) & (0x02)) == (0x02))) ? (void
)0 : __assert2("/usr/src/usr.sbin/ldapctl/../ldapd/btree.c", 1781
, __func__, "IS_LEAF(mp)"))
;
1782
1783 leaf = NODEPTR(mp, 0)((struct node *)((char *)((mp)->page) + ((mp)->page)->
ptrs[0]))
;
1784 cursor->initialized = 1;
1785 cursor->eof = 0;
1786
1787 if (data && btree_read_data(cursor->bt, mp, leaf, data) != BT_SUCCESS0)
1788 return BT_FAIL-1;
1789
1790 if (bt_set_key(cursor->bt, mp, leaf, key) != 0)
1791 return BT_FAIL-1;
1792
1793 return BT_SUCCESS0;
1794}
1795
1796int
1797btree_cursor_get(struct cursor *cursor, struct btval *key, struct btval *data,
1798 enum cursor_op op)
1799{
1800 int rc;
1801 int exact = 0;
1802
1803 assert(cursor)((cursor) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapctl/../ldapd/btree.c"
, 1803, __func__, "cursor"))
;
1804
1805 switch (op) {
1806 case BT_CURSOR:
1807 case BT_CURSOR_EXACT:
1808 while (CURSOR_TOP(cursor)((&(cursor)->stack)->slh_first) != NULL((void*)0))
1809 cursor_pop_page(cursor);
1810 if (key == NULL((void*)0) || key->size == 0 || key->size > MAXKEYSIZE255) {
1811 errno(*__errno()) = EINVAL22;
1812 rc = BT_FAIL-1;
1813 } else if (op == BT_CURSOR_EXACT)
1814 rc = btree_cursor_set(cursor, key, data, &exact);
1815 else
1816 rc = btree_cursor_set(cursor, key, data, NULL((void*)0));
1817 break;
1818 case BT_NEXT:
1819 if (!cursor->initialized)
1820 rc = btree_cursor_first(cursor, key, data);
1821 else
1822 rc = btree_cursor_next(cursor, key, data);
1823 break;
1824 case BT_FIRST:
1825 while (CURSOR_TOP(cursor)((&(cursor)->stack)->slh_first) != NULL((void*)0))
1826 cursor_pop_page(cursor);
1827 rc = btree_cursor_first(cursor, key, data);
1828 break;
1829 default:
1830 DPRINTF("unhandled/unimplemented cursor operation %u", op);
1831 rc = BT_FAIL-1;
1832 break;
1833 }
1834
1835 mpage_prune(cursor->bt);
1836
1837 return rc;
1838}
1839
1840static struct mpage *
1841btree_new_page(struct btree *bt, uint32_t flags)
1842{
1843 struct mpage *mp;
1844
1845 assert(bt != NULL)((bt != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapctl/../ldapd/btree.c"
, 1845, __func__, "bt != NULL"))
;
1846 assert(bt->txn != NULL)((bt->txn != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapctl/../ldapd/btree.c"
, 1846, __func__, "bt->txn != NULL"))
;
1847
1848 DPRINTF("allocating new mpage %u, page size %u",
1849 bt->txn->next_pgno, bt->head.psize);
1850 if ((mp = calloc(1, sizeof(*mp))) == NULL((void*)0))
1851 return NULL((void*)0);
1852 if ((mp->page = malloc(bt->head.psize)) == NULL((void*)0)) {
1853 free(mp);
1854 return NULL((void*)0);
1855 }
1856 mp->pgno = mp->page->pgno = bt->txn->next_pgno++;
1857 mp->page->flags = flags;
1858 mp->page->lowerb.fb.fb_lower = PAGEHDRSZ__builtin_offsetof(struct page, ptrs);
1859 mp->page->upperb.fb.fb_upper = bt->head.psize;
1860
1861 if (IS_BRANCH(mp)((((mp)->page->flags) & (0x01)) == (0x01)))
1862 bt->meta.branch_pages++;
1863 else if (IS_LEAF(mp)((((mp)->page->flags) & (0x02)) == (0x02)))
1864 bt->meta.leaf_pages++;
1865 else if (IS_OVERFLOW(mp)((((mp)->page->flags) & (0x04)) == (0x04)))
1866 bt->meta.overflow_pages++;
1867
1868 mpage_add(bt, mp);
1869 mpage_dirty(bt, mp);
1870
1871 return mp;
1872}
1873
1874static size_t
1875bt_leaf_size(struct btree *bt, struct btval *key, struct btval *data)
1876{
1877 size_t sz;
1878
1879 sz = LEAFSIZE(key, data)(__builtin_offsetof(struct node, data) + (key)->size + (data
)->size)
;
1880 if (data->size >= bt->head.psize / BT_MINKEYS4) {
1881 /* put on overflow page */
1882 sz -= data->size - sizeof(pgno_t);
1883 }
1884
1885 return sz + sizeof(indx_t);
1886}
1887
1888static size_t
1889bt_branch_size(struct btree *bt, struct btval *key)
1890{
1891 size_t sz;
1892
1893 sz = INDXSIZE(key)(__builtin_offsetof(struct node, data) + ((key) == ((void*)0)
? 0 : (key)->size))
;
1894 if (sz >= bt->head.psize / BT_MINKEYS4) {
1895 /* put on overflow page */
1896 /* not implemented */
1897 /* sz -= key->size - sizeof(pgno_t); */
1898 }
1899
1900 return sz + sizeof(indx_t);
1901}
1902
1903static int
1904btree_write_overflow_data(struct btree *bt, struct page *p, struct btval *data)
1905{
1906 size_t done = 0;
1907 size_t sz;
1908 size_t max;
1909 pgno_t *linkp; /* linked page stored here */
1910 struct mpage *next = NULL((void*)0);
1911
1912 max = bt->head.psize - PAGEHDRSZ__builtin_offsetof(struct page, ptrs);
1913
1914 while (done < data->size) {
1915 if (next != NULL((void*)0))
1916 p = next->page;
1917 linkp = &p->p_next_pgnob.pb_next_pgno;
1918 if (data->size - done > max) {
1919 /* need another overflow page */
1920 if ((next = btree_new_page(bt, P_OVERFLOW0x04)) == NULL((void*)0))
1921 return BT_FAIL-1;
1922 *linkp = next->pgno;
1923 DPRINTF("linking overflow page %u", next->pgno);
1924 } else
1925 *linkp = 0; /* indicates end of list */
1926 sz = data->size - done;
1927 if (sz > max)
1928 sz = max;
1929 DPRINTF("copying %zu bytes to overflow page %u", sz, p->pgno);
1930 bcopy((char *)data->data + done, p->ptrs, sz);
1931 done += sz;
1932 }
1933
1934 return BT_SUCCESS0;
1935}
1936
1937/* Key prefix should already be stripped.
1938 */
1939static int
1940btree_add_node(struct btree *bt, struct mpage *mp, indx_t indx,
1941 struct btval *key, struct btval *data, pgno_t pgno, uint8_t flags)
1942{
1943 unsigned int i;
1944 size_t node_size = NODESIZE__builtin_offsetof(struct node, data);
1945 indx_t ofs;
1946 struct node *node;
1947 struct page *p;
1948 struct mpage *ofp = NULL((void*)0); /* overflow page */
1949
1950 p = mp->page;
1951 assert(p->upper >= p->lower)((p->b.fb.fb_upper >= p->b.fb.fb_lower) ? (void)0 : __assert2
("/usr/src/usr.sbin/ldapctl/../ldapd/btree.c", 1951, __func__
, "p->upper >= p->lower"))
;
69
Assuming field 'fb_upper' is >= field 'fb_lower'
70
'?' condition is true
1952
1953 DPRINTF("add node [%.*s] to %s page %u at index %d, key size %zu",
1954 key ? (int)key->size : 0, key ? (char *)key->data : NULL,
1955 IS_LEAF(mp) ? "leaf" : "branch",
1956 mp->pgno, indx, key ? key->size : 0);
1957
1958 if (key
70.1
'key' is not equal to NULL
!= NULL((void*)0))
71
Taking true branch
1959 node_size += key->size;
1960
1961 if (IS_LEAF(mp)((((mp)->page->flags) & (0x02)) == (0x02))) {
72
Assuming the condition is true
73
Taking true branch
1962 assert(data)((data) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapctl/../ldapd/btree.c"
, 1962, __func__, "data"))
;
74
'?' condition is true
1963 node_size += data->size;
75
Assigned value is garbage or undefined
1964 if (F_ISSET(flags, F_BIGDATA)(((flags) & (0x01)) == (0x01))) {
1965 /* Data already on overflow page. */
1966 node_size -= data->size - sizeof(pgno_t);
1967 } else if (data->size >= bt->head.psize / BT_MINKEYS4) {
1968 /* Put data on overflow page. */
1969 DPRINTF("data size is %zu, put on overflow page",
1970 data->size);
1971 node_size -= data->size - sizeof(pgno_t);
1972 if ((ofp = btree_new_page(bt, P_OVERFLOW0x04)) == NULL((void*)0))
1973 return BT_FAIL-1;
1974 DPRINTF("allocated overflow page %u", ofp->pgno);
1975 flags |= F_BIGDATA0x01;
1976 }
1977 }
1978
1979 if (node_size + sizeof(indx_t) > SIZELEFT(mp)(indx_t)((mp)->page->b.fb.fb_upper - (mp)->page->
b.fb.fb_lower)
) {
1980 DPRINTF("not enough room in page %u, got %lu ptrs",
1981 mp->pgno, NUMKEYS(mp));
1982 DPRINTF("upper - lower = %u - %u = %u", p->upper, p->lower,
1983 p->upper - p->lower);
1984 DPRINTF("node size = %zu", node_size);
1985 return BT_FAIL-1;
1986 }
1987
1988 /* Move higher pointers up one slot. */
1989 for (i = NUMKEYS(mp)(((mp)->page->b.fb.fb_lower - __builtin_offsetof(struct
page, ptrs)) >> 1)
; i > indx; i--)
1990 p->ptrs[i] = p->ptrs[i - 1];
1991
1992 /* Adjust free space offsets. */
1993 ofs = p->upperb.fb.fb_upper - node_size;
1994 assert(ofs >= p->lower + sizeof(indx_t))((ofs >= p->b.fb.fb_lower + sizeof(indx_t)) ? (void)0 :
__assert2("/usr/src/usr.sbin/ldapctl/../ldapd/btree.c", 1994
, __func__, "ofs >= p->lower + sizeof(indx_t)"))
;
1995 p->ptrs[indx] = ofs;
1996 p->upperb.fb.fb_upper = ofs;
1997 p->lowerb.fb.fb_lower += sizeof(indx_t);
1998
1999 /* Write the node data. */
2000 node = NODEPTR(mp, indx)((struct node *)((char *)((mp)->page) + ((mp)->page)->
ptrs[indx]))
;
2001 node->ksize = (key == NULL((void*)0)) ? 0 : key->size;
2002 node->flags = flags;
2003 if (IS_LEAF(mp)((((mp)->page->flags) & (0x02)) == (0x02)))
2004 node->n_dsizep.np_dsize = data->size;
2005 else
2006 node->n_pgnop.np_pgno = pgno;
2007
2008 if (key)
2009 bcopy(key->data, NODEKEY(node)(void *)((node)->data), key->size);
2010
2011 if (IS_LEAF(mp)((((mp)->page->flags) & (0x02)) == (0x02))) {
2012 assert(key)((key) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapctl/../ldapd/btree.c"
, 2012, __func__, "key"))
;
2013 if (ofp == NULL((void*)0)) {
2014 if (F_ISSET(flags, F_BIGDATA)(((flags) & (0x01)) == (0x01)))
2015 bcopy(data->data, node->data + key->size,
2016 sizeof(pgno_t));
2017 else
2018 bcopy(data->data, node->data + key->size,
2019 data->size);
2020 } else {
2021 bcopy(&ofp->pgno, node->data + key->size,
2022 sizeof(pgno_t));
2023 if (btree_write_overflow_data(bt, ofp->page,
2024 data) == BT_FAIL-1)
2025 return BT_FAIL-1;
2026 }
2027 }
2028
2029 return BT_SUCCESS0;
2030}
2031
2032static void
2033btree_del_node(struct btree *bt, struct mpage *mp, indx_t indx)
2034{
2035 unsigned int sz;
2036 indx_t i, j, numkeys, ptr;
2037 struct node *node;
2038 char *base;
2039
2040 DPRINTF("delete node %u on %s page %u", indx,
2041 IS_LEAF(mp) ? "leaf" : "branch", mp->pgno);
2042 assert(indx < NUMKEYS(mp))((indx < (((mp)->page->b.fb.fb_lower - __builtin_offsetof
(struct page, ptrs)) >> 1)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapctl/../ldapd/btree.c"
, 2042, __func__, "indx < NUMKEYS(mp)"))
;
2043
2044 node = NODEPTR(mp, indx)((struct node *)((char *)((mp)->page) + ((mp)->page)->
ptrs[indx]))
;
2045 sz = NODESIZE__builtin_offsetof(struct node, data) + node->ksize;
2046 if (IS_LEAF(mp)((((mp)->page->flags) & (0x02)) == (0x02))) {
2047 if (F_ISSET(node->flags, F_BIGDATA)(((node->flags) & (0x01)) == (0x01)))
2048 sz += sizeof(pgno_t);
2049 else
2050 sz += NODEDSZ(node)((node)->p.np_dsize);
2051 }
2052
2053 ptr = mp->page->ptrs[indx];
2054 numkeys = NUMKEYS(mp)(((mp)->page->b.fb.fb_lower - __builtin_offsetof(struct
page, ptrs)) >> 1)
;
2055 for (i = j = 0; i < numkeys; i++) {
2056 if (i != indx) {
2057 mp->page->ptrs[j] = mp->page->ptrs[i];
2058 if (mp->page->ptrs[i] < ptr)
2059 mp->page->ptrs[j] += sz;
2060 j++;
2061 }
2062 }
2063
2064 base = (char *)mp->page + mp->page->upperb.fb.fb_upper;
2065 bcopy(base, base + sz, ptr - mp->page->upperb.fb.fb_upper);
2066
2067 mp->page->lowerb.fb.fb_lower -= sizeof(indx_t);
2068 mp->page->upperb.fb.fb_upper += sz;
2069}
2070
2071struct cursor *
2072btree_txn_cursor_open(struct btree *bt, struct btree_txn *txn)
2073{
2074 struct cursor *cursor;
2075
2076 if (bt != NULL((void*)0) && txn != NULL((void*)0) && bt != txn->bt) {
2077 errno(*__errno()) = EINVAL22;
2078 return NULL((void*)0);
2079 }
2080
2081 if (bt == NULL((void*)0)) {
2082 if (txn == NULL((void*)0)) {
2083 errno(*__errno()) = EINVAL22;
2084 return NULL((void*)0);
2085 }
2086 bt = txn->bt;
2087 }
2088
2089 if ((cursor = calloc(1, sizeof(*cursor))) != NULL((void*)0)) {
2090 SLIST_INIT(&cursor->stack){ ((&cursor->stack)->slh_first) = ((void*)0); };
2091 cursor->bt = bt;
2092 cursor->txn = txn;
2093 btree_ref(bt);
2094 }
2095
2096 return cursor;
2097}
2098
2099void
2100btree_cursor_close(struct cursor *cursor)
2101{
2102 if (cursor != NULL((void*)0)) {
2103 while (!CURSOR_EMPTY(cursor)(((&(cursor)->stack)->slh_first) == ((void*)0)))
2104 cursor_pop_page(cursor);
2105
2106 btree_close(cursor->bt);
2107 free(cursor);
2108 }
2109}
2110
2111static int
2112btree_update_key(struct btree *bt, struct mpage *mp, indx_t indx,
2113 struct btval *key)
2114{
2115 indx_t ptr, i, numkeys;
2116 int delta;
2117 size_t len;
2118 struct node *node;
2119 char *base;
2120
2121 node = NODEPTR(mp, indx)((struct node *)((char *)((mp)->page) + ((mp)->page)->
ptrs[indx]))
;
2122 ptr = mp->page->ptrs[indx];
2123 DPRINTF("update key %u (ofs %u) [%.*s] to [%.*s] on page %u",
2124 indx, ptr,
2125 (int)node->ksize, (char *)NODEKEY(node),
2126 (int)key->size, (char *)key->data,
2127 mp->pgno);
2128
2129 if (key->size != node->ksize) {
2130 delta = key->size - node->ksize;
2131 if (delta > 0 && SIZELEFT(mp)(indx_t)((mp)->page->b.fb.fb_upper - (mp)->page->
b.fb.fb_lower)
< delta) {
2132 DPRINTF("OUCH! Not enough room, delta = %d", delta);
2133 return BT_FAIL-1;
2134 }
2135
2136 numkeys = NUMKEYS(mp)(((mp)->page->b.fb.fb_lower - __builtin_offsetof(struct
page, ptrs)) >> 1)
;
2137 for (i = 0; i < numkeys; i++) {
2138 if (mp->page->ptrs[i] <= ptr)
2139 mp->page->ptrs[i] -= delta;
2140 }
2141
2142 base = (char *)mp->page + mp->page->upperb.fb.fb_upper;
2143 len = ptr - mp->page->upperb.fb.fb_upper + NODESIZE__builtin_offsetof(struct node, data);
2144 bcopy(base, base - delta, len);
2145 mp->page->upperb.fb.fb_upper -= delta;
2146
2147 node = NODEPTR(mp, indx)((struct node *)((char *)((mp)->page) + ((mp)->page)->
ptrs[indx]))
;
2148 node->ksize = key->size;
2149 }
2150
2151 bcopy(key->data, NODEKEY(node)(void *)((node)->data), key->size);
2152
2153 return BT_SUCCESS0;
2154}
2155
2156static int
2157btree_adjust_prefix(struct btree *bt, struct mpage *src, int delta)
2158{
2159 indx_t i;
2160 struct node *node;
2161 struct btkey tmpkey;
2162 struct btval key;
2163
2164 DPRINTF("adjusting prefix lengths on page %u with delta %d",
2165 src->pgno, delta);
2166 assert(delta != 0)((delta != 0) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapctl/../ldapd/btree.c"
, 2166, __func__, "delta != 0"))
;
2167
2168 for (i = 0; i < NUMKEYS(src)(((src)->page->b.fb.fb_lower - __builtin_offsetof(struct
page, ptrs)) >> 1)
; i++) {
2169 node = NODEPTR(src, i)((struct node *)((char *)((src)->page) + ((src)->page)->
ptrs[i]))
;
2170 tmpkey.len = node->ksize - delta;
2171 if (delta > 0) {
2172 if (F_ISSET(bt->flags, BT_REVERSEKEY)(((bt->flags) & (0x08)) == (0x08)))
2173 bcopy(NODEKEY(node)(void *)((node)->data), tmpkey.str, tmpkey.len);
2174 else
2175 bcopy((char *)NODEKEY(node)(void *)((node)->data) + delta, tmpkey.str,
2176 tmpkey.len);
2177 } else {
2178 if (F_ISSET(bt->flags, BT_REVERSEKEY)(((bt->flags) & (0x08)) == (0x08))) {
2179 bcopy(NODEKEY(node)(void *)((node)->data), tmpkey.str, node->ksize);
2180 bcopy(src->prefix.str, tmpkey.str + node->ksize,
2181 -delta);
2182 } else {
2183 bcopy(src->prefix.str + src->prefix.len + delta,
2184 tmpkey.str, -delta);
2185 bcopy(NODEKEY(node)(void *)((node)->data), tmpkey.str - delta,
2186 node->ksize);
2187 }
2188 }
2189 key.size = tmpkey.len;
2190 key.data = tmpkey.str;
2191 if (btree_update_key(bt, src, i, &key) != BT_SUCCESS0)
2192 return BT_FAIL-1;
2193 }
2194
2195 return BT_SUCCESS0;
2196}
2197
2198/* Move a node from src to dst.
2199 */
2200static int
2201btree_move_node(struct btree *bt, struct mpage *src, indx_t srcindx,
2202 struct mpage *dst, indx_t dstindx)
2203{
2204 int rc;
2205 unsigned int pfxlen, mp_pfxlen = 0;
2206 struct node *srcnode;
2207 struct mpage *mp = NULL((void*)0);
2208 struct btkey tmpkey, srckey;
2209 struct btval key, data;
2210
2211 assert(src->parent)((src->parent) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapctl/../ldapd/btree.c"
, 2211, __func__, "src->parent"))
;
2212 assert(dst->parent)((dst->parent) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapctl/../ldapd/btree.c"
, 2212, __func__, "dst->parent"))
;
2213
2214 srcnode = NODEPTR(src, srcindx)((struct node *)((char *)((src)->page) + ((src)->page)->
ptrs[srcindx]))
;
2215 DPRINTF("moving %s node %u [%.*s] on page %u to node %u on page %u",
2216 IS_LEAF(src) ? "leaf" : "branch",
2217 srcindx,
2218 (int)srcnode->ksize, (char *)NODEKEY(srcnode),
2219 src->pgno,
2220 dstindx, dst->pgno);
2221
2222 find_common_prefix(bt, src);
2223
2224 if (IS_BRANCH(src)((((src)->page->flags) & (0x01)) == (0x01))) {
2225 /* Need to check if the page the moved node points to
2226 * changes prefix.
2227 */
2228 if ((mp = btree_get_mpage(bt, NODEPGNO(srcnode)((srcnode)->p.np_pgno))) == NULL((void*)0))
2229 return BT_FAIL-1;
2230 mp->parent = src;
2231 mp->parent_index = srcindx;
2232 find_common_prefix(bt, mp);
2233 mp_pfxlen = mp->prefix.len;
2234 }
2235
2236 /* Mark src and dst as dirty. */
2237 if ((src = mpage_touch(bt, src)) == NULL((void*)0) ||
2238 (dst = mpage_touch(bt, dst)) == NULL((void*)0))
2239 return BT_FAIL-1;
2240
2241 find_common_prefix(bt, dst);
2242
2243 /* Check if src node has destination page prefix. Otherwise the
2244 * destination page must expand its prefix on all its nodes.
2245 */
2246 srckey.len = srcnode->ksize;
2247 bcopy(NODEKEY(srcnode)(void *)((srcnode)->data), srckey.str, srckey.len);
2248 common_prefix(bt, &srckey, &dst->prefix, &tmpkey);
2249 if (tmpkey.len != dst->prefix.len) {
2250 if (btree_adjust_prefix(bt, dst,
2251 tmpkey.len - dst->prefix.len) != BT_SUCCESS0)
2252 return BT_FAIL-1;
2253 bcopy(&tmpkey, &dst->prefix, sizeof(tmpkey));
2254 }
2255
2256 if (srcindx == 0 && IS_BRANCH(src)((((src)->page->flags) & (0x01)) == (0x01))) {
2257 struct mpage *low;
2258
2259 /* must find the lowest key below src
2260 */
2261 assert(btree_search_page_root(bt, src, NULL, NULL, 0,((btree_search_page_root(bt, src, ((void*)0), ((void*)0), 0, &
low) == 0) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapctl/../ldapd/btree.c"
, 2262, __func__, "btree_search_page_root(bt, src, NULL, NULL, 0, &low) == BT_SUCCESS"
))
2262 &low) == BT_SUCCESS)((btree_search_page_root(bt, src, ((void*)0), ((void*)0), 0, &
low) == 0) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapctl/../ldapd/btree.c"
, 2262, __func__, "btree_search_page_root(bt, src, NULL, NULL, 0, &low) == BT_SUCCESS"
))
;
2263 expand_prefix(bt, low, 0, &srckey);
2264 DPRINTF("found lowest key [%.*s] on leaf page %u",
2265 (int)srckey.len, srckey.str, low->pgno);
2266 } else {
2267 srckey.len = srcnode->ksize;
2268 bcopy(NODEKEY(srcnode)(void *)((srcnode)->data), srckey.str, srcnode->ksize);
2269 }
2270 find_common_prefix(bt, src);
2271
2272 /* expand the prefix */
2273 tmpkey.len = sizeof(tmpkey.str);
2274 concat_prefix(bt, src->prefix.str, src->prefix.len,
2275 srckey.str, srckey.len, tmpkey.str, &tmpkey.len);
2276
2277 /* Add the node to the destination page. Adjust prefix for
2278 * destination page.
2279 */
2280 key.size = tmpkey.len;
2281 key.data = tmpkey.str;
2282 remove_prefix(bt, &key, dst->prefix.len);
2283 data.size = NODEDSZ(srcnode)((srcnode)->p.np_dsize);
2284 data.data = NODEDATA(srcnode)(void *)((char *)(srcnode)->data + (srcnode)->ksize);
2285 rc = btree_add_node(bt, dst, dstindx, &key, &data, NODEPGNO(srcnode)((srcnode)->p.np_pgno),
2286 srcnode->flags);
2287 if (rc != BT_SUCCESS0)
2288 return rc;
2289
2290 /* Delete the node from the source page.
2291 */
2292 btree_del_node(bt, src, srcindx);
2293
2294 /* Update the parent separators.
2295 */
2296 if (srcindx == 0 && src->parent_index != 0) {
2297 expand_prefix(bt, src, 0, &tmpkey);
2298 key.size = tmpkey.len;
2299 key.data = tmpkey.str;
2300 remove_prefix(bt, &key, src->parent->prefix.len);
2301
2302 DPRINTF("update separator for source page %u to [%.*s]",
2303 src->pgno, (int)key.size, (char *)key.data);
2304 if (btree_update_key(bt, src->parent, src->parent_index,
2305 &key) != BT_SUCCESS0)
2306 return BT_FAIL-1;
2307 }
2308
2309 if (srcindx == 0 && IS_BRANCH(src)((((src)->page->flags) & (0x01)) == (0x01))) {
2310 struct btval nullkey;
2311 nullkey.size = 0;
2312 assert(btree_update_key(bt, src, 0, &nullkey) == BT_SUCCESS)((btree_update_key(bt, src, 0, &nullkey) == 0) ? (void)0 :
__assert2("/usr/src/usr.sbin/ldapctl/../ldapd/btree.c", 2312
, __func__, "btree_update_key(bt, src, 0, &nullkey) == BT_SUCCESS"
))
;
2313 }
2314
2315 if (dstindx == 0 && dst->parent_index != 0) {
2316 expand_prefix(bt, dst, 0, &tmpkey);
2317 key.size = tmpkey.len;
2318 key.data = tmpkey.str;
2319 remove_prefix(bt, &key, dst->parent->prefix.len);
2320
2321 DPRINTF("update separator for destination page %u to [%.*s]",
2322 dst->pgno, (int)key.size, (char *)key.data);
2323 if (btree_update_key(bt, dst->parent, dst->parent_index,
2324 &key) != BT_SUCCESS0)
2325 return BT_FAIL-1;
2326 }
2327
2328 if (dstindx == 0 && IS_BRANCH(dst)((((dst)->page->flags) & (0x01)) == (0x01))) {
2329 struct btval nullkey;
2330 nullkey.size = 0;
2331 assert(btree_update_key(bt, dst, 0, &nullkey) == BT_SUCCESS)((btree_update_key(bt, dst, 0, &nullkey) == 0) ? (void)0 :
__assert2("/usr/src/usr.sbin/ldapctl/../ldapd/btree.c", 2331
, __func__, "btree_update_key(bt, dst, 0, &nullkey) == BT_SUCCESS"
))
;
2332 }
2333
2334 /* We can get a new page prefix here!
2335 * Must update keys in all nodes of this page!
2336 */
2337 pfxlen = src->prefix.len;
2338 find_common_prefix(bt, src);
2339 if (src->prefix.len != pfxlen) {
2340 if (btree_adjust_prefix(bt, src,
2341 src->prefix.len - pfxlen) != BT_SUCCESS0)
2342 return BT_FAIL-1;
2343 }
2344
2345 pfxlen = dst->prefix.len;
2346 find_common_prefix(bt, dst);
2347 if (dst->prefix.len != pfxlen) {
2348 if (btree_adjust_prefix(bt, dst,
2349 dst->prefix.len - pfxlen) != BT_SUCCESS0)
2350 return BT_FAIL-1;
2351 }
2352
2353 if (IS_BRANCH(dst)((((dst)->page->flags) & (0x01)) == (0x01))) {
2354 assert(mp)((mp) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapctl/../ldapd/btree.c"
, 2354, __func__, "mp"))
;
2355 mp->parent = dst;
2356 mp->parent_index = dstindx;
2357 find_common_prefix(bt, mp);
2358 if (mp->prefix.len != mp_pfxlen) {
2359 DPRINTF("moved branch node has changed prefix");
2360 if ((mp = mpage_touch(bt, mp)) == NULL((void*)0))
2361 return BT_FAIL-1;
2362 if (btree_adjust_prefix(bt, mp,
2363 mp->prefix.len - mp_pfxlen) != BT_SUCCESS0)
2364 return BT_FAIL-1;
2365 }
2366 }
2367
2368 return BT_SUCCESS0;
2369}
2370
2371static int
2372btree_merge(struct btree *bt, struct mpage *src, struct mpage *dst)
2373{
2374 int rc;
2375 indx_t i;
2376 unsigned int pfxlen;
2377 struct node *srcnode;
2378 struct btkey tmpkey, dstpfx;
2379 struct btval key, data;
2380
2381 DPRINTF("merging page %u and %u", src->pgno, dst->pgno);
2382
2383 assert(src->parent)((src->parent) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapctl/../ldapd/btree.c"
, 2383, __func__, "src->parent"))
; /* can't merge root page */
2384 assert(dst->parent)((dst->parent) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapctl/../ldapd/btree.c"
, 2384, __func__, "dst->parent"))
;
2385 assert(bt->txn != NULL)((bt->txn != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapctl/../ldapd/btree.c"
, 2385, __func__, "bt->txn != NULL"))
;
2386
2387 /* Mark src and dst as dirty. */
2388 if ((src = mpage_touch(bt, src)) == NULL((void*)0) ||
2389 (dst = mpage_touch(bt, dst)) == NULL((void*)0))
2390 return BT_FAIL-1;
2391
2392 find_common_prefix(bt, src);
2393 find_common_prefix(bt, dst);
2394
2395 /* Check if source nodes has destination page prefix. Otherwise
2396 * the destination page must expand its prefix on all its nodes.
2397 */
2398 common_prefix(bt, &src->prefix, &dst->prefix, &dstpfx);
2399 if (dstpfx.len != dst->prefix.len) {
2400 if (btree_adjust_prefix(bt, dst,
2401 dstpfx.len - dst->prefix.len) != BT_SUCCESS0)
2402 return BT_FAIL-1;
2403 bcopy(&dstpfx, &dst->prefix, sizeof(dstpfx));
2404 }
2405
2406 /* Move all nodes from src to dst.
2407 */
2408 for (i = 0; i < NUMKEYS(src)(((src)->page->b.fb.fb_lower - __builtin_offsetof(struct
page, ptrs)) >> 1)
; i++) {
2409 srcnode = NODEPTR(src, i)((struct node *)((char *)((src)->page) + ((src)->page)->
ptrs[i]))
;
2410
2411 /* If branch node 0 (implicit key), find the real key.
2412 */
2413 if (i == 0 && IS_BRANCH(src)((((src)->page->flags) & (0x01)) == (0x01))) {
2414 struct mpage *low;
2415
2416 /* must find the lowest key below src
2417 */
2418 assert(btree_search_page_root(bt, src, NULL, NULL, 0,((btree_search_page_root(bt, src, ((void*)0), ((void*)0), 0, &
low) == 0) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapctl/../ldapd/btree.c"
, 2419, __func__, "btree_search_page_root(bt, src, NULL, NULL, 0, &low) == BT_SUCCESS"
))
2419 &low) == BT_SUCCESS)((btree_search_page_root(bt, src, ((void*)0), ((void*)0), 0, &
low) == 0) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapctl/../ldapd/btree.c"
, 2419, __func__, "btree_search_page_root(bt, src, NULL, NULL, 0, &low) == BT_SUCCESS"
))
;
2420 expand_prefix(bt, low, 0, &tmpkey);
2421 DPRINTF("found lowest key [%.*s] on leaf page %u",
2422 (int)tmpkey.len, tmpkey.str, low->pgno);
2423 } else {
2424 expand_prefix(bt, src, i, &tmpkey);
2425 }
2426
2427 key.size = tmpkey.len;
2428 key.data = tmpkey.str;
2429
2430 remove_prefix(bt, &key, dst->prefix.len);
2431 data.size = NODEDSZ(srcnode)((srcnode)->p.np_dsize);
2432 data.data = NODEDATA(srcnode)(void *)((char *)(srcnode)->data + (srcnode)->ksize);
2433 rc = btree_add_node(bt, dst, NUMKEYS(dst)(((dst)->page->b.fb.fb_lower - __builtin_offsetof(struct
page, ptrs)) >> 1)
, &key,
2434 &data, NODEPGNO(srcnode)((srcnode)->p.np_pgno), srcnode->flags);
2435 if (rc != BT_SUCCESS0)
2436 return rc;
2437 }
2438
2439 DPRINTF("dst page %u now has %lu keys (%.1f%% filled)",
2440 dst->pgno, NUMKEYS(dst), (float)PAGEFILL(bt, dst) / 10);
2441
2442 /* Unlink the src page from parent.
2443 */
2444 btree_del_node(bt, src->parent, src->parent_index);
2445 if (src->parent_index == 0) {
2446 key.size = 0;
2447 if (btree_update_key(bt, src->parent, 0, &key) != BT_SUCCESS0)
2448 return BT_FAIL-1;
2449
2450 pfxlen = src->prefix.len;
2451 find_common_prefix(bt, src);
2452 assert (src->prefix.len == pfxlen)((src->prefix.len == pfxlen) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapctl/../ldapd/btree.c"
, 2452, __func__, "src->prefix.len == pfxlen"))
;
2453 }
2454
2455 if (IS_LEAF(src)((((src)->page->flags) & (0x02)) == (0x02)))
2456 bt->meta.leaf_pages--;
2457 else
2458 bt->meta.branch_pages--;
2459
2460 return btree_rebalance(bt, src->parent);
2461}
2462
2463#define FILL_THRESHOLD250 250
2464
2465static int
2466btree_rebalance(struct btree *bt, struct mpage *mp)
2467{
2468 struct node *node;
2469 struct mpage *parent;
2470 struct mpage *root;
2471 struct mpage *neighbor = NULL((void*)0);
2472 indx_t si = 0, di = 0;
2473
2474 assert(bt != NULL)((bt != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapctl/../ldapd/btree.c"
, 2474, __func__, "bt != NULL"))
;
2475 assert(bt->txn != NULL)((bt->txn != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapctl/../ldapd/btree.c"
, 2475, __func__, "bt->txn != NULL"))
;
2476 assert(mp != NULL)((mp != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapctl/../ldapd/btree.c"
, 2476, __func__, "mp != NULL"))
;
2477
2478 DPRINTF("rebalancing %s page %u (has %lu keys, %.1f%% full)",
2479 IS_LEAF(mp) ? "leaf" : "branch",
2480 mp->pgno, NUMKEYS(mp), (float)PAGEFILL(bt, mp) / 10);
2481
2482 if (PAGEFILL(bt, mp)(1000 * ((bt)->head.psize - __builtin_offsetof(struct page
, ptrs) - (indx_t)((mp)->page->b.fb.fb_upper - (mp)->
page->b.fb.fb_lower)) / ((bt)->head.psize - __builtin_offsetof
(struct page, ptrs)))
>= FILL_THRESHOLD250) {
2483 DPRINTF("no need to rebalance page %u, above fill threshold",
2484 mp->pgno);
2485 return BT_SUCCESS0;
2486 }
2487
2488 parent = mp->parent;
2489
2490 if (parent == NULL((void*)0)) {
2491 if (NUMKEYS(mp)(((mp)->page->b.fb.fb_lower - __builtin_offsetof(struct
page, ptrs)) >> 1)
== 0) {
2492 DPRINTF("tree is completely empty");
2493 bt->txn->root = P_INVALID0xFFFFFFFF;
2494 bt->meta.depth--;
2495 bt->meta.leaf_pages--;
2496 } else if (IS_BRANCH(mp)((((mp)->page->flags) & (0x01)) == (0x01)) && NUMKEYS(mp)(((mp)->page->b.fb.fb_lower - __builtin_offsetof(struct
page, ptrs)) >> 1)
== 1) {
2497 DPRINTF("collapsing root page!");
2498 bt->txn->root = NODEPGNO(NODEPTR(mp, 0))((((struct node *)((char *)((mp)->page) + ((mp)->page)->
ptrs[0])))->p.np_pgno)
;
2499 if ((root = btree_get_mpage(bt, bt->txn->root)) == NULL((void*)0))
2500 return BT_FAIL-1;
2501 root->parent = NULL((void*)0);
2502 bt->meta.depth--;
2503 bt->meta.branch_pages--;
2504 } else
2505 DPRINTF("root page doesn't need rebalancing");
2506 return BT_SUCCESS0;
2507 }
2508
2509 /* The parent (branch page) must have at least 2 pointers,
2510 * otherwise the tree is invalid.
2511 */
2512 assert(NUMKEYS(parent) > 1)(((((parent)->page->b.fb.fb_lower - __builtin_offsetof(
struct page, ptrs)) >> 1) > 1) ? (void)0 : __assert2
("/usr/src/usr.sbin/ldapctl/../ldapd/btree.c", 2512, __func__
, "NUMKEYS(parent) > 1"))
;
2513
2514 /* Leaf page fill factor is below the threshold.
2515 * Try to move keys from left or right neighbor, or
2516 * merge with a neighbor page.
2517 */
2518
2519 /* Find neighbors.
2520 */
2521 if (mp->parent_index == 0) {
2522 /* We're the leftmost leaf in our parent.
2523 */
2524 DPRINTF("reading right neighbor");
2525 node = NODEPTR(parent, mp->parent_index + 1)((struct node *)((char *)((parent)->page) + ((parent)->
page)->ptrs[mp->parent_index + 1]))
;
2526 if ((neighbor = btree_get_mpage(bt, NODEPGNO(node)((node)->p.np_pgno))) == NULL((void*)0))
2527 return BT_FAIL-1;
2528 neighbor->parent_index = mp->parent_index + 1;
2529 si = 0;
2530 di = NUMKEYS(mp)(((mp)->page->b.fb.fb_lower - __builtin_offsetof(struct
page, ptrs)) >> 1)
;
2531 } else {
2532 /* There is at least one neighbor to the left.
2533 */
2534 DPRINTF("reading left neighbor");
2535 node = NODEPTR(parent, mp->parent_index - 1)((struct node *)((char *)((parent)->page) + ((parent)->
page)->ptrs[mp->parent_index - 1]))
;
2536 if ((neighbor = btree_get_mpage(bt, NODEPGNO(node)((node)->p.np_pgno))) == NULL((void*)0))
2537 return BT_FAIL-1;
2538 neighbor->parent_index = mp->parent_index - 1;
2539 si = NUMKEYS(neighbor)(((neighbor)->page->b.fb.fb_lower - __builtin_offsetof(
struct page, ptrs)) >> 1)
- 1;
2540 di = 0;
2541 }
2542 neighbor->parent = parent;
2543
2544 DPRINTF("found neighbor page %u (%lu keys, %.1f%% full)",
2545 neighbor->pgno, NUMKEYS(neighbor), (float)PAGEFILL(bt, neighbor) / 10);
2546
2547 /* If the neighbor page is above threshold and has at least two
2548 * keys, move one key from it.
2549 *
2550 * Otherwise we should try to merge them, but that might not be
2551 * possible, even if both are below threshold, as prefix expansion
2552 * might make keys larger. FIXME: detect this
2553 */
2554 if (PAGEFILL(bt, neighbor)(1000 * ((bt)->head.psize - __builtin_offsetof(struct page
, ptrs) - (indx_t)((neighbor)->page->b.fb.fb_upper - (neighbor
)->page->b.fb.fb_lower)) / ((bt)->head.psize - __builtin_offsetof
(struct page, ptrs)))
>= FILL_THRESHOLD250 && NUMKEYS(neighbor)(((neighbor)->page->b.fb.fb_lower - __builtin_offsetof(
struct page, ptrs)) >> 1)
>= 2)
2555 return btree_move_node(bt, neighbor, si, mp, di);
2556 else { /* FIXME: if (has_enough_room()) */
2557 if (mp->parent_index == 0)
2558 return btree_merge(bt, neighbor, mp);
2559 else
2560 return btree_merge(bt, mp, neighbor);
2561 }
2562}
2563
2564int
2565btree_txn_del(struct btree *bt, struct btree_txn *txn,
2566 struct btval *key, struct btval *data)
2567{
2568 int rc, exact, close_txn = 0;
2569 unsigned int ki;
2570 struct node *leaf;
2571 struct mpage *mp;
2572
2573 DPRINTF("========> delete key %.*s", (int)key->size, (char *)key->data);
2574
2575 assert(key != NULL)((key != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapctl/../ldapd/btree.c"
, 2575, __func__, "key != NULL"))
;
2576
2577 if (bt != NULL((void*)0) && txn != NULL((void*)0) && bt != txn->bt) {
2578 errno(*__errno()) = EINVAL22;
2579 return BT_FAIL-1;
2580 }
2581
2582 if (txn != NULL((void*)0) && F_ISSET(txn->flags, BT_TXN_RDONLY)(((txn->flags) & (0x01)) == (0x01))) {
2583 errno(*__errno()) = EINVAL22;
2584 return BT_FAIL-1;
2585 }
2586
2587 if (bt == NULL((void*)0)) {
2588 if (txn == NULL((void*)0)) {
2589 errno(*__errno()) = EINVAL22;
2590 return BT_FAIL-1;
2591 }
2592 bt = txn->bt;
2593 }
2594
2595 if (key->size == 0 || key->size > MAXKEYSIZE255) {
2596 errno(*__errno()) = EINVAL22;
2597 return BT_FAIL-1;
2598 }
2599
2600 if (txn == NULL((void*)0)) {
2601 close_txn = 1;
2602 if ((txn = btree_txn_begin(bt, 0)) == NULL((void*)0))
2603 return BT_FAIL-1;
2604 }
2605
2606 if ((rc = btree_search_page(bt, txn, key, NULL((void*)0), 1, &mp)) != BT_SUCCESS0)
2607 goto done;
2608
2609 leaf = btree_search_node(bt, mp, key, &exact, &ki);
2610 if (leaf == NULL((void*)0) || !exact) {
2611 errno(*__errno()) = ENOENT2;
2612 rc = BT_FAIL-1;
2613 goto done;
2614 }
2615
2616 if (data && (rc = btree_read_data(bt, NULL((void*)0), leaf, data)) != BT_SUCCESS0)
2617 goto done;
2618
2619 btree_del_node(bt, mp, ki);
2620 bt->meta.entries--;
2621 rc = btree_rebalance(bt, mp);
2622 if (rc != BT_SUCCESS0)
2623 txn->flags |= BT_TXN_ERROR0x02;
2624
2625done:
2626 if (close_txn) {
2627 if (rc == BT_SUCCESS0)
2628 rc = btree_txn_commit(txn);
2629 else
2630 btree_txn_abort(txn);
2631 }
2632 mpage_prune(bt);
2633 return rc;
2634}
2635
2636/* Reduce the length of the prefix separator <sep> to the minimum length that
2637 * still makes it uniquely distinguishable from <min>.
2638 *
2639 * <min> is guaranteed to be sorted less than <sep>
2640 *
2641 * On return, <sep> is modified to the minimum length.
2642 */
2643static void
2644bt_reduce_separator(struct btree *bt, struct node *min, struct btval *sep)
2645{
2646 size_t n = 0;
2647 char *p1;
2648 char *p2;
2649
2650 if (F_ISSET(bt->flags, BT_REVERSEKEY)(((bt->flags) & (0x08)) == (0x08))) {
2651
2652 assert(sep->size > 0)((sep->size > 0) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapctl/../ldapd/btree.c"
, 2652, __func__, "sep->size > 0"))
;
2653
2654 p1 = (char *)NODEKEY(min)(void *)((min)->data) + min->ksize - 1;
2655 p2 = (char *)sep->data + sep->size - 1;
2656
2657 while (p1 >= (char *)NODEKEY(min)(void *)((min)->data) && *p1 == *p2) {
2658 assert(p2 > (char *)sep->data)((p2 > (char *)sep->data) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapctl/../ldapd/btree.c"
, 2658, __func__, "p2 > (char *)sep->data"))
;
2659 p1--;
2660 p2--;
2661 n++;
2662 }
2663
2664 sep->data = p2;
2665 sep->size = n + 1;
2666 } else {
2667
2668 assert(min->ksize > 0)((min->ksize > 0) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapctl/../ldapd/btree.c"
, 2668, __func__, "min->ksize > 0"))
;
2669 assert(sep->size > 0)((sep->size > 0) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapctl/../ldapd/btree.c"
, 2669, __func__, "sep->size > 0"))
;
2670
2671 p1 = (char *)NODEKEY(min)(void *)((min)->data);
2672 p2 = (char *)sep->data;
2673
2674 while (*p1 == *p2) {
2675 p1++;
2676 p2++;
2677 n++;
2678 if (n == min->ksize || n == sep->size)
2679 break;
2680 }
2681
2682 sep->size = n + 1;
2683 }
2684
2685 DPRINTF("reduced separator to [%.*s] > [%.*s]",
2686 (int)sep->size, (char *)sep->data,
2687 (int)min->ksize, (char *)NODEKEY(min));
2688}
2689
2690/* Split page <*mpp>, and insert <key,(data|newpgno)> in either left or
2691 * right sibling, at index <*newindxp> (as if unsplit). Updates *mpp and
2692 * *newindxp with the actual values after split, ie if *mpp and *newindxp
2693 * refer to a node in the new right sibling page.
2694 */
2695static int
2696btree_split(struct btree *bt, struct mpage **mpp, unsigned int *newindxp,
2697 struct btval *newkey, struct btval *newdata, pgno_t newpgno)
2698{
2699 uint8_t flags;
2700 int rc = BT_SUCCESS0, ins_new = 0;
2701 indx_t newindx;
2702 pgno_t pgno = 0;
2703 size_t orig_pfx_len, left_pfx_diff, right_pfx_diff, pfx_diff;
2704 unsigned int i, j, split_indx;
2705 struct node *node;
2706 struct mpage *pright, *p, *mp;
2707 struct btval sepkey, rkey, rdata;
2708 struct btkey tmpkey;
2709 struct page *copy;
2710
2711 assert(bt != NULL)((bt != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapctl/../ldapd/btree.c"
, 2711, __func__, "bt != NULL"))
;
24
'?' condition is true
41
'?' condition is true
2712 assert(bt->txn != NULL)((bt->txn != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapctl/../ldapd/btree.c"
, 2712, __func__, "bt->txn != NULL"))
;
25
'?' condition is true
42
'?' condition is true
2713
2714 mp = *mpp;
2715 newindx = *newindxp;
2716
2717 DPRINTF("-----> splitting %s page %u and adding [%.*s] at index %d",
2718 IS_LEAF(mp) ? "leaf" : "branch", mp->pgno,
2719 (int)newkey->size, (char *)newkey->data, *newindxp);
2720 DPRINTF("page %u has prefix [%.*s]", mp->pgno,
2721 (int)mp->prefix.len, (char *)mp->prefix.str);
2722 orig_pfx_len = mp->prefix.len;
2723
2724 if (mp->parent == NULL((void*)0)) {
26
Assuming field 'parent' is not equal to NULL
27
Taking false branch
43
Assuming field 'parent' is not equal to NULL
44
Taking false branch
2725 if ((mp->parent = btree_new_page(bt, P_BRANCH0x01)) == NULL((void*)0))
2726 return BT_FAIL-1;
2727 mp->parent_index = 0;
2728 bt->txn->root = mp->parent->pgno;
2729 DPRINTF("root split! new root = %u", mp->parent->pgno);
2730 bt->meta.depth++;
2731
2732 /* Add left (implicit) pointer. */
2733 if (btree_add_node(bt, mp->parent, 0, NULL((void*)0), NULL((void*)0),
2734 mp->pgno, 0) != BT_SUCCESS0)
2735 return BT_FAIL-1;
2736 } else {
2737 DPRINTF("parent branch page is %u", mp->parent->pgno);
2738 }
2739
2740 /* Create a right sibling. */
2741 if ((pright = btree_new_page(bt, mp->page->flags)) == NULL((void*)0))
28
Taking false branch
45
Taking false branch
2742 return BT_FAIL-1;
2743 pright->parent = mp->parent;
2744 pright->parent_index = mp->parent_index + 1;
2745 DPRINTF("new right sibling: page %u", pright->pgno);
2746
2747 /* Move half of the keys to the right sibling. */
2748 if ((copy = malloc(bt->head.psize)) == NULL((void*)0))
29
Assuming the condition is false
30
Taking false branch
46
Assuming the condition is false
47
Taking false branch
2749 return BT_FAIL-1;
2750 bcopy(mp->page, copy, bt->head.psize);
2751 assert(mp->ref == 0)((mp->ref == 0) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapctl/../ldapd/btree.c"
, 2751, __func__, "mp->ref == 0"))
; /* XXX */
31
Assuming field 'ref' is equal to 0
32
'?' condition is true
48
Assuming field 'ref' is equal to 0
49
'?' condition is true
2752 memset(&mp->page->ptrs, 0, bt->head.psize - PAGEHDRSZ__builtin_offsetof(struct page, ptrs));
2753 mp->page->lowerb.fb.fb_lower = PAGEHDRSZ__builtin_offsetof(struct page, ptrs);
2754 mp->page->upperb.fb.fb_upper = bt->head.psize;
2755
2756 split_indx = NUMKEYSP(copy)(((copy)->b.fb.fb_lower - __builtin_offsetof(struct page, ptrs
)) >> 1)
/ 2 + 1;
2757
2758 /* First find the separating key between the split pages.
2759 */
2760 memset(&sepkey, 0, sizeof(sepkey));
2761 if (newindx == split_indx) {
33
Assuming 'newindx' is not equal to 'split_indx'
34
Taking false branch
50
Assuming 'newindx' is not equal to 'split_indx'
51
Taking false branch
2762 sepkey.size = newkey->size;
2763 sepkey.data = newkey->data;
2764 remove_prefix(bt, &sepkey, mp->prefix.len);
2765 } else {
2766 node = NODEPTRP(copy, split_indx)((struct node *)((char *)(copy) + (copy)->ptrs[split_indx]
))
;
2767 sepkey.size = node->ksize;
2768 sepkey.data = NODEKEY(node)(void *)((node)->data);
2769 }
2770
2771 if (IS_LEAF(mp)((((mp)->page->flags) & (0x02)) == (0x02)) && bt->cmp == NULL((void*)0)) {
35
Assuming field 'cmp' is not equal to NULL
36
Taking false branch
2772 /* Find the smallest separator. */
2773 /* Ref: Prefix B-trees, R. Bayer, K. Unterauer, 1977 */
2774 node = NODEPTRP(copy, split_indx - 1)((struct node *)((char *)(copy) + (copy)->ptrs[split_indx -
1]))
;
2775 bt_reduce_separator(bt, node, &sepkey);
2776 }
2777
2778 /* Fix separator wrt parent prefix. */
2779 if (bt->cmp
36.1
Field 'cmp' is not equal to NULL
51.1
Field 'cmp' is not equal to NULL
== NULL((void*)0)) {
37
Taking false branch
52
Taking false branch
2780 tmpkey.len = sizeof(tmpkey.str);
2781 concat_prefix(bt, mp->prefix.str, mp->prefix.len,
2782 sepkey.data, sepkey.size, tmpkey.str, &tmpkey.len);
2783 sepkey.data = tmpkey.str;
2784 sepkey.size = tmpkey.len;
2785 }
2786
2787 DPRINTF("separator is [%.*s]", (int)sepkey.size, (char *)sepkey.data);
2788
2789 /* Copy separator key to the parent.
2790 */
2791 if (SIZELEFT(pright->parent)(indx_t)((pright->parent)->page->b.fb.fb_upper - (pright
->parent)->page->b.fb.fb_lower)
< bt_branch_size(bt, &sepkey)
) {
38
Assuming the condition is true
39
Taking true branch
53
Assuming the condition is false
54
Taking false branch
2792 rc = btree_split(bt, &pright->parent, &pright->parent_index,
40
Calling 'btree_split'
2793 &sepkey, NULL((void*)0), pright->pgno);
2794
2795 /* Right page might now have changed parent.
2796 * Check if left page also changed parent.
2797 */
2798 if (pright->parent != mp->parent &&
2799 mp->parent_index >= NUMKEYS(mp->parent)(((mp->parent)->page->b.fb.fb_lower - __builtin_offsetof
(struct page, ptrs)) >> 1)
) {
2800 mp->parent = pright->parent;
2801 mp->parent_index = pright->parent_index - 1;
2802 }
2803 } else {
2804 remove_prefix(bt, &sepkey, pright->parent->prefix.len);
2805 rc = btree_add_node(bt, pright->parent, pright->parent_index,
2806 &sepkey, NULL((void*)0), pright->pgno, 0);
2807 }
2808 if (rc
54.1
'rc' is equal to BT_SUCCESS
!= BT_SUCCESS0) {
55
Taking false branch
2809 free(copy);
2810 return BT_FAIL-1;
2811 }
2812
2813 /* Update prefix for right and left page, if the parent was split.
2814 */
2815 find_common_prefix(bt, pright);
2816 assert(orig_pfx_len <= pright->prefix.len)((orig_pfx_len <= pright->prefix.len) ? (void)0 : __assert2
("/usr/src/usr.sbin/ldapctl/../ldapd/btree.c", 2816, __func__
, "orig_pfx_len <= pright->prefix.len"))
;
56
Assuming 'orig_pfx_len' is <= field 'len'
57
'?' condition is true
2817 right_pfx_diff = pright->prefix.len - orig_pfx_len;
2818
2819 find_common_prefix(bt, mp);
2820 assert(orig_pfx_len <= mp->prefix.len)((orig_pfx_len <= mp->prefix.len) ? (void)0 : __assert2
("/usr/src/usr.sbin/ldapctl/../ldapd/btree.c", 2820, __func__
, "orig_pfx_len <= mp->prefix.len"))
;
58
'?' condition is true
2821 left_pfx_diff = mp->prefix.len - orig_pfx_len;
2822
2823 for (i = j = 0; i <= NUMKEYSP(copy)(((copy)->b.fb.fb_lower - __builtin_offsetof(struct page, ptrs
)) >> 1)
; j++) {
59
Loop condition is true. Entering loop body
2824 if (i < split_indx) {
60
Assuming 'i' is >= 'split_indx'
61
Taking false branch
2825 /* Re-insert in left sibling. */
2826 p = mp;
2827 pfx_diff = left_pfx_diff;
2828 } else {
2829 /* Insert in right sibling. */
2830 if (i
61.1
'i' is equal to 'split_indx'
== split_indx)
62
Taking true branch
2831 /* Reset insert index for right sibling. */
2832 j = (i == newindx && ins_new);
63
Assuming 'i' is not equal to 'newindx'
2833 p = pright;
2834 pfx_diff = right_pfx_diff;
2835 }
2836
2837 if (i
63.1
'i' is not equal to 'newindx'
== newindx && !ins_new) {
2838 /* Insert the original entry that caused the split. */
2839 rkey.data = newkey->data;
2840 rkey.size = newkey->size;
2841 if (IS_LEAF(mp)((((mp)->page->flags) & (0x02)) == (0x02))) {
2842 rdata.data = newdata->data;
2843 rdata.size = newdata->size;
2844 } else
2845 pgno = newpgno;
2846 flags = 0;
2847 pfx_diff = p->prefix.len;
2848
2849 ins_new = 1;
2850
2851 /* Update page and index for the new key. */
2852 *newindxp = j;
2853 *mpp = p;
2854 } else if (i == NUMKEYSP(copy)(((copy)->b.fb.fb_lower - __builtin_offsetof(struct page, ptrs
)) >> 1)
) {
64
Assuming the condition is false
65
Taking false branch
2855 break;
2856 } else {
2857 node = NODEPTRP(copy, i)((struct node *)((char *)(copy) + (copy)->ptrs[i]));
2858 rkey.data = NODEKEY(node)(void *)((node)->data);
2859 rkey.size = node->ksize;
2860 if (IS_LEAF(mp)((((mp)->page->flags) & (0x02)) == (0x02))) {
66
Taking false branch
2861 rdata.data = NODEDATA(node)(void *)((char *)(node)->data + (node)->ksize);
2862 rdata.size = node->n_dsizep.np_dsize;
2863 } else
2864 pgno = node->n_pgnop.np_pgno;
2865 flags = node->flags;
2866
2867 i++;
2868 }
2869
2870 if (!IS_LEAF(mp)((((mp)->page->flags) & (0x02)) == (0x02)) && j
66.1
'j' is equal to 0
== 0) {
67
Taking true branch
2871 /* First branch index doesn't need key data. */
2872 rkey.size = 0;
2873 } else
2874 remove_prefix(bt, &rkey, pfx_diff);
2875
2876 rc = btree_add_node(bt, p, j, &rkey, &rdata, pgno,flags);
68
Calling 'btree_add_node'
2877 }
2878
2879 free(copy);
2880 return rc;
2881}
2882
2883int
2884btree_txn_put(struct btree *bt, struct btree_txn *txn,
2885 struct btval *key, struct btval *data, unsigned int flags)
2886{
2887 int rc = BT_SUCCESS0, exact, close_txn = 0;
2888 unsigned int ki;
2889 struct node *leaf;
2890 struct mpage *mp;
2891 struct btval xkey;
2892
2893 assert(key != NULL)((key != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapctl/../ldapd/btree.c"
, 2893, __func__, "key != NULL"))
;
1
Assuming 'key' is not equal to null
2
'?' condition is true
2894 assert(data != NULL)((data != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapctl/../ldapd/btree.c"
, 2894, __func__, "data != NULL"))
;
3
Assuming 'data' is not equal to null
4
'?' condition is true
2895
2896 if (bt != NULL((void*)0) && txn != NULL((void*)0) && bt != txn->bt) {
5
Assuming 'bt' is equal to NULL
2897 errno(*__errno()) = EINVAL22;
2898 return BT_FAIL-1;
2899 }
2900
2901 if (txn != NULL((void*)0) && F_ISSET(txn->flags, BT_TXN_RDONLY)(((txn->flags) & (0x01)) == (0x01))) {
6
Assuming 'txn' is not equal to NULL
7
Assuming the condition is false
8
Taking false branch
2902 errno(*__errno()) = EINVAL22;
2903 return BT_FAIL-1;
2904 }
2905
2906 if (bt
8.1
'bt' is equal to NULL
== NULL((void*)0)) {
9
Taking true branch
2907 if (txn
9.1
'txn' is not equal to NULL
== NULL((void*)0)) {
10
Taking false branch
2908 errno(*__errno()) = EINVAL22;
2909 return BT_FAIL-1;
2910 }
2911 bt = txn->bt;
2912 }
2913
2914 if (key->size == 0 || key->size > MAXKEYSIZE255) {
11
Assuming field 'size' is not equal to 0
12
Assuming field 'size' is <= MAXKEYSIZE
13
Taking false branch
2915 errno(*__errno()) = EINVAL22;
2916 return BT_FAIL-1;
2917 }
2918
2919 DPRINTF("==> put key %.*s, size %zu, data size %zu",
2920 (int)key->size, (char *)key->data, key->size, data->size);
2921
2922 if (txn
13.1
'txn' is not equal to NULL
== NULL((void*)0)) {
14
Taking false branch
2923 close_txn = 1;
2924 if ((txn = btree_txn_begin(bt, 0)) == NULL((void*)0))
2925 return BT_FAIL-1;
2926 }
2927
2928 rc = btree_search_page(bt, txn, key, NULL((void*)0), 1, &mp);
2929 if (rc
14.1
'rc' is not equal to BT_SUCCESS
== BT_SUCCESS0) {
15
Taking false branch
2930 leaf = btree_search_node(bt, mp, key, &exact, &ki);
2931 if (leaf && exact) {
2932 if (F_ISSET(flags, BT_NOOVERWRITE)(((flags) & (1)) == (1))) {
2933 DPRINTF("duplicate key %.*s",
2934 (int)key->size, (char *)key->data);
2935 errno(*__errno()) = EEXIST17;
2936 rc = BT_FAIL-1;
2937 goto done;
2938 }
2939 btree_del_node(bt, mp, ki);
2940 }
2941 if (leaf == NULL((void*)0)) { /* append if not found */
2942 ki = NUMKEYS(mp)(((mp)->page->b.fb.fb_lower - __builtin_offsetof(struct
page, ptrs)) >> 1)
;
2943 DPRINTF("appending key at index %d", ki);
2944 }
2945 } else if (errno(*__errno()) == ENOENT2) {
16
Assuming the condition is true
17
Taking true branch
2946 /* new file, just write a root leaf page */
2947 DPRINTF("allocating new root leaf page");
2948 if ((mp = btree_new_page(bt, P_LEAF0x02)) == NULL((void*)0)) {
18
Taking false branch
2949 rc = BT_FAIL-1;
2950 goto done;
2951 }
2952 txn->root = mp->pgno;
2953 bt->meta.depth++;
2954 ki = 0;
2955 }
2956 else
2957 goto done;
2958
2959 assert(IS_LEAF(mp))((((((mp)->page->flags) & (0x02)) == (0x02))) ? (void
)0 : __assert2("/usr/src/usr.sbin/ldapctl/../ldapd/btree.c", 2959
, __func__, "IS_LEAF(mp)"))
;
19
Assuming the condition is true
20
'?' condition is true
2960 DPRINTF("there are %lu keys, should insert new key at index %u",
2961 NUMKEYS(mp), ki);
2962
2963 /* Copy the key pointer as it is modified by the prefix code. The
2964 * caller might have malloc'ed the data.
2965 */
2966 xkey.data = key->data;
2967 xkey.size = key->size;
2968
2969 if (SIZELEFT(mp)(indx_t)((mp)->page->b.fb.fb_upper - (mp)->page->
b.fb.fb_lower)
< bt_leaf_size(bt, key, data)
) {
21
Assuming the condition is true
22
Taking true branch
2970 rc = btree_split(bt, &mp, &ki, &xkey, data, P_INVALID0xFFFFFFFF);
23
Calling 'btree_split'
2971 } else {
2972 /* There is room already in this leaf page. */
2973 remove_prefix(bt, &xkey, mp->prefix.len);
2974 rc = btree_add_node(bt, mp, ki, &xkey, data, 0, 0);
2975 }
2976
2977 if (rc != BT_SUCCESS0)
2978 txn->flags |= BT_TXN_ERROR0x02;
2979 else
2980 bt->meta.entries++;
2981
2982done:
2983 if (close_txn) {
2984 if (rc == BT_SUCCESS0)
2985 rc = btree_txn_commit(txn);
2986 else
2987 btree_txn_abort(txn);
2988 }
2989 mpage_prune(bt);
2990 return rc;
2991}
2992
2993static pgno_t
2994btree_compact_tree(struct btree *bt, pgno_t pgno, struct btree *btc)
2995{
2996 ssize_t rc;
2997 indx_t i;
2998 pgno_t *pnext, next;
2999 struct node *node;
3000 struct page *p;
3001 struct mpage *mp;
3002
3003 /* Get the page and make a copy of it.
3004 */
3005 if ((mp = btree_get_mpage(bt, pgno)) == NULL((void*)0))
3006 return P_INVALID0xFFFFFFFF;
3007 if ((p = malloc(bt->head.psize)) == NULL((void*)0))
3008 return P_INVALID0xFFFFFFFF;
3009 bcopy(mp->page, p, bt->head.psize);
3010
3011 /* Go through all nodes in the (copied) page and update the
3012 * page pointers.
3013 */
3014 if (F_ISSET(p->flags, P_BRANCH)(((p->flags) & (0x01)) == (0x01))) {
3015 for (i = 0; i < NUMKEYSP(p)(((p)->b.fb.fb_lower - __builtin_offsetof(struct page, ptrs
)) >> 1)
; i++) {
3016 node = NODEPTRP(p, i)((struct node *)((char *)(p) + (p)->ptrs[i]));
3017 node->n_pgnop.np_pgno = btree_compact_tree(bt, node->n_pgnop.np_pgno, btc);
3018 if (node->n_pgnop.np_pgno == P_INVALID0xFFFFFFFF) {
3019 free(p);
3020 return P_INVALID0xFFFFFFFF;
3021 }
3022 }
3023 } else if (F_ISSET(p->flags, P_LEAF)(((p->flags) & (0x02)) == (0x02))) {
3024 for (i = 0; i < NUMKEYSP(p)(((p)->b.fb.fb_lower - __builtin_offsetof(struct page, ptrs
)) >> 1)
; i++) {
3025 node = NODEPTRP(p, i)((struct node *)((char *)(p) + (p)->ptrs[i]));
3026 if (F_ISSET(node->flags, F_BIGDATA)(((node->flags) & (0x01)) == (0x01))) {
3027 bcopy(NODEDATA(node)(void *)((char *)(node)->data + (node)->ksize), &next, sizeof(next));
3028 next = btree_compact_tree(bt, next, btc);
3029 if (next == P_INVALID0xFFFFFFFF) {
3030 free(p);
3031 return P_INVALID0xFFFFFFFF;
3032 }
3033 bcopy(&next, NODEDATA(node)(void *)((char *)(node)->data + (node)->ksize), sizeof(next));
3034 }
3035 }
3036 } else if (F_ISSET(p->flags, P_OVERFLOW)(((p->flags) & (0x04)) == (0x04))) {
3037 pnext = &p->p_next_pgnob.pb_next_pgno;
3038 if (*pnext > 0) {
3039 *pnext = btree_compact_tree(bt, *pnext, btc);
3040 if (*pnext == P_INVALID0xFFFFFFFF) {
3041 free(p);
3042 return P_INVALID0xFFFFFFFF;
3043 }
3044 }
3045 } else
3046 assert(0)((0) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapctl/../ldapd/btree.c"
, 3046, __func__, "0"))
;
3047
3048 pgno = p->pgno = btc->txn->next_pgno++;
3049 rc = write(btc->fd, p, bt->head.psize);
3050 free(p);
3051 if (rc != (ssize_t)bt->head.psize)
3052 return P_INVALID0xFFFFFFFF;
3053 mpage_prune(bt);
3054 return pgno;
3055}
3056
3057int
3058btree_compact(struct btree *bt)
3059{
3060 char *compact_path = NULL((void*)0);
3061 struct btree *btc;
3062 struct btree_txn *txn, *txnc = NULL((void*)0);
3063 int fd;
3064 pgno_t root;
3065
3066 assert(bt != NULL)((bt != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapctl/../ldapd/btree.c"
, 3066, __func__, "bt != NULL"))
;
3067
3068 DPRINTF("compacting btree %p with path %s", bt, bt->path);
3069
3070 if (bt->path == NULL((void*)0)) {
3071 errno(*__errno()) = EINVAL22;
3072 return BT_FAIL-1;
3073 }
3074
3075 if ((txn = btree_txn_begin(bt, 0)) == NULL((void*)0))
3076 return BT_FAIL-1;
3077
3078 if (asprintf(&compact_path, "%s.compact.XXXXXX", bt->path) == -1) {
3079 btree_txn_abort(txn);
3080 return BT_FAIL-1;
3081 }
3082 fd = mkstemp(compact_path);
3083 if (fd == -1) {
3084 free(compact_path);
3085 btree_txn_abort(txn);
3086 return BT_FAIL-1;
3087 }
3088
3089 if ((btc = btree_open_fd(fd, 0)) == NULL((void*)0))
3090 goto failed;
3091 bcopy(&bt->meta, &btc->meta, sizeof(bt->meta));
3092 btc->meta.revisions = 0;
3093
3094 if ((txnc = btree_txn_begin(btc, 0)) == NULL((void*)0))
3095 goto failed;
3096
3097 if (bt->meta.root != P_INVALID0xFFFFFFFF) {
3098 root = btree_compact_tree(bt, bt->meta.root, btc);
3099 if (root == P_INVALID0xFFFFFFFF)
3100 goto failed;
3101 if (btree_write_meta(btc, root, 0) != BT_SUCCESS0)
3102 goto failed;
3103 }
3104
3105 fsync(fd);
3106
3107 DPRINTF("renaming %s to %s", compact_path, bt->path);
3108 if (rename(compact_path, bt->path) != 0)
3109 goto failed;
3110
3111 /* Write a "tombstone" meta page so other processes can pick up
3112 * the change and re-open the file.
3113 */
3114 if (btree_write_meta(bt, P_INVALID0xFFFFFFFF, BT_TOMBSTONE0x01) != BT_SUCCESS0)
3115 goto failed;
3116
3117 btree_txn_abort(txn);
3118 btree_txn_abort(txnc);
3119 free(compact_path);
3120 btree_close(btc);
3121 mpage_prune(bt);
3122 return 0;
3123
3124failed:
3125 btree_txn_abort(txn);
3126 btree_txn_abort(txnc);
3127 unlink(compact_path);
3128 free(compact_path);
3129 btree_close(btc);
3130 mpage_prune(bt);
3131 return BT_FAIL-1;
3132}
3133
3134/* Reverts the last change. Truncates the file at the last root page.
3135 */
3136int
3137btree_revert(struct btree *bt)
3138{
3139 if (btree_read_meta(bt, NULL((void*)0)) != 0)
3140 return -1;
3141
3142 DPRINTF("truncating file at page %u", bt->meta.root);
3143 return ftruncate(bt->fd, bt->head.psize * bt->meta.root);
3144}
3145
3146void
3147btree_set_cache_size(struct btree *bt, unsigned int cache_size)
3148{
3149 bt->stat.max_cache = cache_size;
3150}
3151
3152unsigned int
3153btree_get_flags(struct btree *bt)
3154{
3155 return (bt->flags & ~BT_FIXPADDING0x01);
3156}
3157
3158const char *
3159btree_get_path(struct btree *bt)
3160{
3161 return bt->path;
3162}
3163
3164const struct btree_stat *
3165btree_stat(struct btree *bt)
3166{
3167 if (bt == NULL((void*)0))
3168 return NULL((void*)0);
3169
3170 bt->stat.branch_pages = bt->meta.branch_pages;
3171 bt->stat.leaf_pages = bt->meta.leaf_pages;
3172 bt->stat.overflow_pages = bt->meta.overflow_pages;
3173 bt->stat.revisions = bt->meta.revisions;
3174 bt->stat.depth = bt->meta.depth;
3175 bt->stat.entries = bt->meta.entries;
3176 bt->stat.psize = bt->head.psize;
3177 bt->stat.created_at = bt->meta.created_at;
3178
3179 return &bt->stat;
3180}
3181