Bug Summary

File:src/usr.sbin/ldapd/btree.c
Warning:line 2842, column 18
Access to field 'data' results in a dereference of a null pointer (loaded from variable 'newdata')

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple amd64-unknown-openbsd7.0 -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name btree.c -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 1 -pic-is-pie -mframe-pointer=all -relaxed-aliasing -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -target-feature +retpoline-indirect-calls -target-feature +retpoline-indirect-branches -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/usr/src/usr.sbin/ldapd/obj -resource-dir /usr/local/lib/clang/13.0.0 -I /usr/src/usr.sbin/ldapd -internal-isystem /usr/local/lib/clang/13.0.0/include -internal-externc-isystem /usr/include -O2 -fdebug-compilation-dir=/usr/src/usr.sbin/ldapd/obj -ferror-limit 19 -fwrapv -D_RET_PROTECTOR -ret-protector -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-valloc -fno-builtin-free -fno-builtin-strdup -fno-builtin-strndup -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /home/ben/Projects/vmm/scan-build/2022-01-12-194120-40624-1 -x c /usr/src/usr.sbin/ldapd/btree.c
1/* $OpenBSD: btree.c,v 1.38 2017/05/26 21:23:14 sthen Exp $ */
2
3/*
4 * Copyright (c) 2009, 2010 Martin Hedenfalk <martin@bzero.se>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18
19#include <sys/types.h>
20#include <sys/tree.h>
21#include <sys/stat.h>
22#include <sys/queue.h>
23#include <sys/uio.h>
24
25#include <assert.h>
26#include <err.h>
27#include <errno(*__errno()).h>
28#include <fcntl.h>
29#include <stddef.h>
30#include <stdint.h>
31#include <stdio.h>
32#include <stdlib.h>
33#include <string.h>
34#include <time.h>
35#include <unistd.h>
36
37#include "btree.h"
38
39/* #define DEBUG */
40
41#ifdef DEBUG
42# define DPRINTF(...) do { fprintf(stderr(&__sF[2]), "%s:%d: ", __func__, __LINE__42); \
43 fprintf(stderr(&__sF[2]), __VA_ARGS__); \
44 fprintf(stderr(&__sF[2]), "\n"); } while(0)
45#else
46# define DPRINTF(...)
47#endif
48
49#define PAGESIZE4096 4096
50#define BT_MINKEYS4 4
51#define BT_MAGIC0xB3DBB3DB 0xB3DBB3DB
52#define BT_VERSION4 4
53#define MAXKEYSIZE255 255
54
55#define P_INVALID0xFFFFFFFF 0xFFFFFFFF
56
57#define F_ISSET(w, f)(((w) & (f)) == (f)) (((w) & (f)) == (f))
58#define MINIMUM(a,b)((a) < (b) ? (a) : (b)) ((a) < (b) ? (a) : (b))
59
60typedef uint32_t pgno_t;
61typedef uint16_t indx_t;
62
63/* There are four page types: meta, index, leaf and overflow.
64 * They all share the same page header.
65 */
66struct page { /* represents an on-disk page */
67 pgno_t pgno; /* page number */
68#define P_BRANCH0x01 0x01 /* branch page */
69#define P_LEAF0x02 0x02 /* leaf page */
70#define P_OVERFLOW0x04 0x04 /* overflow page */
71#define P_META0x08 0x08 /* meta page */
72#define P_HEAD0x10 0x10 /* header page */
73 uint32_t flags;
74#define lowerb.fb.fb_lower b.fb.fb_lower
75#define upperb.fb.fb_upper b.fb.fb_upper
76#define p_next_pgnob.pb_next_pgno b.pb_next_pgno
77 union page_bounds {
78 struct {
79 indx_t fb_lower; /* lower bound of free space */
80 indx_t fb_upper; /* upper bound of free space */
81 } fb;
82 pgno_t pb_next_pgno; /* overflow page linked list */
83 } b;
84 indx_t ptrs[1]; /* dynamic size */
85} __packed__attribute__((__packed__));
86
87#define PAGEHDRSZ__builtin_offsetof(struct page, ptrs) offsetof(struct page, ptrs)__builtin_offsetof(struct page, ptrs)
88
89#define NUMKEYSP(p)(((p)->b.fb.fb_lower - __builtin_offsetof(struct page, ptrs
)) >> 1)
(((p)->lowerb.fb.fb_lower - PAGEHDRSZ__builtin_offsetof(struct page, ptrs)) >> 1)
90#define NUMKEYS(mp)(((mp)->page->b.fb.fb_lower - __builtin_offsetof(struct
page, ptrs)) >> 1)
(((mp)->page->lowerb.fb.fb_lower - PAGEHDRSZ__builtin_offsetof(struct page, ptrs)) >> 1)
91#define SIZELEFT(mp)(indx_t)((mp)->page->b.fb.fb_upper - (mp)->page->
b.fb.fb_lower)
(indx_t)((mp)->page->upperb.fb.fb_upper - (mp)->page->lowerb.fb.fb_lower)
92#define PAGEFILL(bt, mp)(1000 * ((bt)->head.psize - __builtin_offsetof(struct page
, ptrs) - (indx_t)((mp)->page->b.fb.fb_upper - (mp)->
page->b.fb.fb_lower)) / ((bt)->head.psize - __builtin_offsetof
(struct page, ptrs)))
(1000 * ((bt)->head.psize - PAGEHDRSZ__builtin_offsetof(struct page, ptrs) - SIZELEFT(mp)(indx_t)((mp)->page->b.fb.fb_upper - (mp)->page->
b.fb.fb_lower)
) / \
93 ((bt)->head.psize - PAGEHDRSZ__builtin_offsetof(struct page, ptrs)))
94#define IS_LEAF(mp)((((mp)->page->flags) & (0x02)) == (0x02)) F_ISSET((mp)->page->flags, P_LEAF)((((mp)->page->flags) & (0x02)) == (0x02))
95#define IS_BRANCH(mp)((((mp)->page->flags) & (0x01)) == (0x01)) F_ISSET((mp)->page->flags, P_BRANCH)((((mp)->page->flags) & (0x01)) == (0x01))
96#define IS_OVERFLOW(mp)((((mp)->page->flags) & (0x04)) == (0x04)) F_ISSET((mp)->page->flags, P_OVERFLOW)((((mp)->page->flags) & (0x04)) == (0x04))
97
98struct bt_head { /* header page content */
99 uint32_t magic;
100 uint32_t version;
101 uint32_t flags;
102 uint32_t psize; /* page size */
103} __packed__attribute__((__packed__));
104
105struct bt_meta { /* meta (footer) page content */
106#define BT_TOMBSTONE0x01 0x01 /* file is replaced */
107 uint32_t flags;
108 pgno_t root; /* page number of root page */
109 pgno_t prev_meta; /* previous meta page number */
110 time_t created_at;
111 uint32_t branch_pages;
112 uint32_t leaf_pages;
113 uint32_t overflow_pages;
114 uint32_t revisions;
115 uint32_t depth;
116 uint64_t entries;
117 unsigned char hash[SHA_DIGEST_LENGTH20];
118} __packed__attribute__((__packed__));
119
120struct btkey {
121 size_t len;
122 char str[MAXKEYSIZE255];
123};
124
125struct mpage { /* an in-memory cached page */
126 RB_ENTRY(mpage)struct { struct mpage *rbe_left; struct mpage *rbe_right; struct
mpage *rbe_parent; int rbe_color; }
entry; /* page cache entry */
127 SIMPLEQ_ENTRY(mpage)struct { struct mpage *sqe_next; } next; /* queue of dirty pages */
128 TAILQ_ENTRY(mpage)struct { struct mpage *tqe_next; struct mpage **tqe_prev; } lru_next; /* LRU queue */
129 struct mpage *parent; /* NULL if root */
130 unsigned int parent_index; /* keep track of node index */
131 struct btkey prefix;
132 struct page *page;
133 pgno_t pgno; /* copy of page->pgno */
134 short ref; /* increased by cursors */
135 short dirty; /* 1 if on dirty queue */
136};
137RB_HEAD(page_cache, mpage)struct page_cache { struct mpage *rbh_root; };
138SIMPLEQ_HEAD(dirty_queue, mpage)struct dirty_queue { struct mpage *sqh_first; struct mpage **
sqh_last; }
;
139TAILQ_HEAD(lru_queue, mpage)struct lru_queue { struct mpage *tqh_first; struct mpage **tqh_last
; }
;
140
141static int mpage_cmp(struct mpage *a, struct mpage *b);
142static struct mpage *mpage_lookup(struct btree *bt, pgno_t pgno);
143static void mpage_add(struct btree *bt, struct mpage *mp);
144static void mpage_free(struct mpage *mp);
145static void mpage_del(struct btree *bt, struct mpage *mp);
146static void mpage_flush(struct btree *bt);
147static struct mpage *mpage_copy(struct btree *bt, struct mpage *mp);
148static void mpage_prune(struct btree *bt);
149static void mpage_dirty(struct btree *bt, struct mpage *mp);
150static struct mpage *mpage_touch(struct btree *bt, struct mpage *mp);
151
152RB_PROTOTYPE(page_cache, mpage, entry, mpage_cmp)void page_cache_RB_INSERT_COLOR(struct page_cache *, struct mpage
*); void page_cache_RB_REMOVE_COLOR(struct page_cache *, struct
mpage *, struct mpage *); struct mpage *page_cache_RB_REMOVE
(struct page_cache *, struct mpage *); struct mpage *page_cache_RB_INSERT
(struct page_cache *, struct mpage *); struct mpage *page_cache_RB_FIND
(struct page_cache *, struct mpage *); struct mpage *page_cache_RB_NFIND
(struct page_cache *, struct mpage *); struct mpage *page_cache_RB_NEXT
(struct mpage *); struct mpage *page_cache_RB_PREV(struct mpage
*); struct mpage *page_cache_RB_MINMAX(struct page_cache *, int
);
;
153RB_GENERATE(page_cache, mpage, entry, mpage_cmp)void page_cache_RB_INSERT_COLOR(struct page_cache *head, struct
mpage *elm) { struct mpage *parent, *gparent, *tmp; while ((
parent = (elm)->entry.rbe_parent) && (parent)->
entry.rbe_color == 1) { gparent = (parent)->entry.rbe_parent
; if (parent == (gparent)->entry.rbe_left) { tmp = (gparent
)->entry.rbe_right; if (tmp && (tmp)->entry.rbe_color
== 1) { (tmp)->entry.rbe_color = 0; do { (parent)->entry
.rbe_color = 0; (gparent)->entry.rbe_color = 1; } while (0
); elm = gparent; continue; } if ((parent)->entry.rbe_right
== elm) { do { (tmp) = (parent)->entry.rbe_right; if (((parent
)->entry.rbe_right = (tmp)->entry.rbe_left)) { ((tmp)->
entry.rbe_left)->entry.rbe_parent = (parent); } do {} while
(0); if (((tmp)->entry.rbe_parent = (parent)->entry.rbe_parent
)) { if ((parent) == ((parent)->entry.rbe_parent)->entry
.rbe_left) ((parent)->entry.rbe_parent)->entry.rbe_left
= (tmp); else ((parent)->entry.rbe_parent)->entry.rbe_right
= (tmp); } else (head)->rbh_root = (tmp); (tmp)->entry
.rbe_left = (parent); (parent)->entry.rbe_parent = (tmp); do
{} while (0); if (((tmp)->entry.rbe_parent)) do {} while (
0); } while (0); tmp = parent; parent = elm; elm = tmp; } do {
(parent)->entry.rbe_color = 0; (gparent)->entry.rbe_color
= 1; } while (0); do { (tmp) = (gparent)->entry.rbe_left;
if (((gparent)->entry.rbe_left = (tmp)->entry.rbe_right
)) { ((tmp)->entry.rbe_right)->entry.rbe_parent = (gparent
); } do {} while (0); if (((tmp)->entry.rbe_parent = (gparent
)->entry.rbe_parent)) { if ((gparent) == ((gparent)->entry
.rbe_parent)->entry.rbe_left) ((gparent)->entry.rbe_parent
)->entry.rbe_left = (tmp); else ((gparent)->entry.rbe_parent
)->entry.rbe_right = (tmp); } else (head)->rbh_root = (
tmp); (tmp)->entry.rbe_right = (gparent); (gparent)->entry
.rbe_parent = (tmp); do {} while (0); if (((tmp)->entry.rbe_parent
)) do {} while (0); } while (0); } else { tmp = (gparent)->
entry.rbe_left; if (tmp && (tmp)->entry.rbe_color ==
1) { (tmp)->entry.rbe_color = 0; do { (parent)->entry.
rbe_color = 0; (gparent)->entry.rbe_color = 1; } while (0)
; elm = gparent; continue; } if ((parent)->entry.rbe_left ==
elm) { do { (tmp) = (parent)->entry.rbe_left; if (((parent
)->entry.rbe_left = (tmp)->entry.rbe_right)) { ((tmp)->
entry.rbe_right)->entry.rbe_parent = (parent); } do {} while
(0); if (((tmp)->entry.rbe_parent = (parent)->entry.rbe_parent
)) { if ((parent) == ((parent)->entry.rbe_parent)->entry
.rbe_left) ((parent)->entry.rbe_parent)->entry.rbe_left
= (tmp); else ((parent)->entry.rbe_parent)->entry.rbe_right
= (tmp); } else (head)->rbh_root = (tmp); (tmp)->entry
.rbe_right = (parent); (parent)->entry.rbe_parent = (tmp);
do {} while (0); if (((tmp)->entry.rbe_parent)) do {} while
(0); } while (0); tmp = parent; parent = elm; elm = tmp; } do
{ (parent)->entry.rbe_color = 0; (gparent)->entry.rbe_color
= 1; } while (0); do { (tmp) = (gparent)->entry.rbe_right
; if (((gparent)->entry.rbe_right = (tmp)->entry.rbe_left
)) { ((tmp)->entry.rbe_left)->entry.rbe_parent = (gparent
); } do {} while (0); if (((tmp)->entry.rbe_parent = (gparent
)->entry.rbe_parent)) { if ((gparent) == ((gparent)->entry
.rbe_parent)->entry.rbe_left) ((gparent)->entry.rbe_parent
)->entry.rbe_left = (tmp); else ((gparent)->entry.rbe_parent
)->entry.rbe_right = (tmp); } else (head)->rbh_root = (
tmp); (tmp)->entry.rbe_left = (gparent); (gparent)->entry
.rbe_parent = (tmp); do {} while (0); if (((tmp)->entry.rbe_parent
)) do {} while (0); } while (0); } } (head->rbh_root)->
entry.rbe_color = 0; } void page_cache_RB_REMOVE_COLOR(struct
page_cache *head, struct mpage *parent, struct mpage *elm) {
struct mpage *tmp; while ((elm == ((void*)0) || (elm)->entry
.rbe_color == 0) && elm != (head)->rbh_root) { if (
(parent)->entry.rbe_left == elm) { tmp = (parent)->entry
.rbe_right; if ((tmp)->entry.rbe_color == 1) { do { (tmp)->
entry.rbe_color = 0; (parent)->entry.rbe_color = 1; } while
(0); do { (tmp) = (parent)->entry.rbe_right; if (((parent
)->entry.rbe_right = (tmp)->entry.rbe_left)) { ((tmp)->
entry.rbe_left)->entry.rbe_parent = (parent); } do {} while
(0); if (((tmp)->entry.rbe_parent = (parent)->entry.rbe_parent
)) { if ((parent) == ((parent)->entry.rbe_parent)->entry
.rbe_left) ((parent)->entry.rbe_parent)->entry.rbe_left
= (tmp); else ((parent)->entry.rbe_parent)->entry.rbe_right
= (tmp); } else (head)->rbh_root = (tmp); (tmp)->entry
.rbe_left = (parent); (parent)->entry.rbe_parent = (tmp); do
{} while (0); if (((tmp)->entry.rbe_parent)) do {} while (
0); } while (0); tmp = (parent)->entry.rbe_right; } if (((
tmp)->entry.rbe_left == ((void*)0) || ((tmp)->entry.rbe_left
)->entry.rbe_color == 0) && ((tmp)->entry.rbe_right
== ((void*)0) || ((tmp)->entry.rbe_right)->entry.rbe_color
== 0)) { (tmp)->entry.rbe_color = 1; elm = parent; parent
= (elm)->entry.rbe_parent; } else { if ((tmp)->entry.rbe_right
== ((void*)0) || ((tmp)->entry.rbe_right)->entry.rbe_color
== 0) { struct mpage *oleft; if ((oleft = (tmp)->entry.rbe_left
)) (oleft)->entry.rbe_color = 0; (tmp)->entry.rbe_color
= 1; do { (oleft) = (tmp)->entry.rbe_left; if (((tmp)->
entry.rbe_left = (oleft)->entry.rbe_right)) { ((oleft)->
entry.rbe_right)->entry.rbe_parent = (tmp); } do {} while (
0); if (((oleft)->entry.rbe_parent = (tmp)->entry.rbe_parent
)) { if ((tmp) == ((tmp)->entry.rbe_parent)->entry.rbe_left
) ((tmp)->entry.rbe_parent)->entry.rbe_left = (oleft); else
((tmp)->entry.rbe_parent)->entry.rbe_right = (oleft); }
else (head)->rbh_root = (oleft); (oleft)->entry.rbe_right
= (tmp); (tmp)->entry.rbe_parent = (oleft); do {} while (
0); if (((oleft)->entry.rbe_parent)) do {} while (0); } while
(0); tmp = (parent)->entry.rbe_right; } (tmp)->entry.rbe_color
= (parent)->entry.rbe_color; (parent)->entry.rbe_color
= 0; if ((tmp)->entry.rbe_right) ((tmp)->entry.rbe_right
)->entry.rbe_color = 0; do { (tmp) = (parent)->entry.rbe_right
; if (((parent)->entry.rbe_right = (tmp)->entry.rbe_left
)) { ((tmp)->entry.rbe_left)->entry.rbe_parent = (parent
); } do {} while (0); if (((tmp)->entry.rbe_parent = (parent
)->entry.rbe_parent)) { if ((parent) == ((parent)->entry
.rbe_parent)->entry.rbe_left) ((parent)->entry.rbe_parent
)->entry.rbe_left = (tmp); else ((parent)->entry.rbe_parent
)->entry.rbe_right = (tmp); } else (head)->rbh_root = (
tmp); (tmp)->entry.rbe_left = (parent); (parent)->entry
.rbe_parent = (tmp); do {} while (0); if (((tmp)->entry.rbe_parent
)) do {} while (0); } while (0); elm = (head)->rbh_root; break
; } } else { tmp = (parent)->entry.rbe_left; if ((tmp)->
entry.rbe_color == 1) { do { (tmp)->entry.rbe_color = 0; (
parent)->entry.rbe_color = 1; } while (0); do { (tmp) = (parent
)->entry.rbe_left; if (((parent)->entry.rbe_left = (tmp
)->entry.rbe_right)) { ((tmp)->entry.rbe_right)->entry
.rbe_parent = (parent); } do {} while (0); if (((tmp)->entry
.rbe_parent = (parent)->entry.rbe_parent)) { if ((parent) ==
((parent)->entry.rbe_parent)->entry.rbe_left) ((parent
)->entry.rbe_parent)->entry.rbe_left = (tmp); else ((parent
)->entry.rbe_parent)->entry.rbe_right = (tmp); } else (
head)->rbh_root = (tmp); (tmp)->entry.rbe_right = (parent
); (parent)->entry.rbe_parent = (tmp); do {} while (0); if
(((tmp)->entry.rbe_parent)) do {} while (0); } while (0);
tmp = (parent)->entry.rbe_left; } if (((tmp)->entry.rbe_left
== ((void*)0) || ((tmp)->entry.rbe_left)->entry.rbe_color
== 0) && ((tmp)->entry.rbe_right == ((void*)0) ||
((tmp)->entry.rbe_right)->entry.rbe_color == 0)) { (tmp
)->entry.rbe_color = 1; elm = parent; parent = (elm)->entry
.rbe_parent; } else { if ((tmp)->entry.rbe_left == ((void*
)0) || ((tmp)->entry.rbe_left)->entry.rbe_color == 0) {
struct mpage *oright; if ((oright = (tmp)->entry.rbe_right
)) (oright)->entry.rbe_color = 0; (tmp)->entry.rbe_color
= 1; do { (oright) = (tmp)->entry.rbe_right; if (((tmp)->
entry.rbe_right = (oright)->entry.rbe_left)) { ((oright)->
entry.rbe_left)->entry.rbe_parent = (tmp); } do {} while (
0); if (((oright)->entry.rbe_parent = (tmp)->entry.rbe_parent
)) { if ((tmp) == ((tmp)->entry.rbe_parent)->entry.rbe_left
) ((tmp)->entry.rbe_parent)->entry.rbe_left = (oright);
else ((tmp)->entry.rbe_parent)->entry.rbe_right = (oright
); } else (head)->rbh_root = (oright); (oright)->entry.
rbe_left = (tmp); (tmp)->entry.rbe_parent = (oright); do {
} while (0); if (((oright)->entry.rbe_parent)) do {} while
(0); } while (0); tmp = (parent)->entry.rbe_left; } (tmp)
->entry.rbe_color = (parent)->entry.rbe_color; (parent)
->entry.rbe_color = 0; if ((tmp)->entry.rbe_left) ((tmp
)->entry.rbe_left)->entry.rbe_color = 0; do { (tmp) = (
parent)->entry.rbe_left; if (((parent)->entry.rbe_left =
(tmp)->entry.rbe_right)) { ((tmp)->entry.rbe_right)->
entry.rbe_parent = (parent); } do {} while (0); if (((tmp)->
entry.rbe_parent = (parent)->entry.rbe_parent)) { if ((parent
) == ((parent)->entry.rbe_parent)->entry.rbe_left) ((parent
)->entry.rbe_parent)->entry.rbe_left = (tmp); else ((parent
)->entry.rbe_parent)->entry.rbe_right = (tmp); } else (
head)->rbh_root = (tmp); (tmp)->entry.rbe_right = (parent
); (parent)->entry.rbe_parent = (tmp); do {} while (0); if
(((tmp)->entry.rbe_parent)) do {} while (0); } while (0);
elm = (head)->rbh_root; break; } } } if (elm) (elm)->entry
.rbe_color = 0; } struct mpage * page_cache_RB_REMOVE(struct page_cache
*head, struct mpage *elm) { struct mpage *child, *parent, *old
= elm; int color; if ((elm)->entry.rbe_left == ((void*)0)
) child = (elm)->entry.rbe_right; else if ((elm)->entry
.rbe_right == ((void*)0)) child = (elm)->entry.rbe_left; else
{ struct mpage *left; elm = (elm)->entry.rbe_right; while
((left = (elm)->entry.rbe_left)) elm = left; child = (elm
)->entry.rbe_right; parent = (elm)->entry.rbe_parent; color
= (elm)->entry.rbe_color; if (child) (child)->entry.rbe_parent
= parent; if (parent) { if ((parent)->entry.rbe_left == elm
) (parent)->entry.rbe_left = child; else (parent)->entry
.rbe_right = child; do {} while (0); } else (head)->rbh_root
= child; if ((elm)->entry.rbe_parent == old) parent = elm
; (elm)->entry = (old)->entry; if ((old)->entry.rbe_parent
) { if (((old)->entry.rbe_parent)->entry.rbe_left == old
) ((old)->entry.rbe_parent)->entry.rbe_left = elm; else
((old)->entry.rbe_parent)->entry.rbe_right = elm; do {
} while (0); } else (head)->rbh_root = elm; ((old)->entry
.rbe_left)->entry.rbe_parent = elm; if ((old)->entry.rbe_right
) ((old)->entry.rbe_right)->entry.rbe_parent = elm; if (
parent) { left = parent; do { do {} while (0); } while ((left
= (left)->entry.rbe_parent)); } goto color; } parent = (elm
)->entry.rbe_parent; color = (elm)->entry.rbe_color; if
(child) (child)->entry.rbe_parent = parent; if (parent) {
if ((parent)->entry.rbe_left == elm) (parent)->entry.rbe_left
= child; else (parent)->entry.rbe_right = child; do {} while
(0); } else (head)->rbh_root = child; color: if (color ==
0) page_cache_RB_REMOVE_COLOR(head, parent, child); return (
old); } struct mpage * page_cache_RB_INSERT(struct page_cache
*head, struct mpage *elm) { struct mpage *tmp; struct mpage *
parent = ((void*)0); int comp = 0; tmp = (head)->rbh_root;
while (tmp) { parent = tmp; comp = (mpage_cmp)(elm, parent);
if (comp < 0) tmp = (tmp)->entry.rbe_left; else if (comp
> 0) tmp = (tmp)->entry.rbe_right; else return (tmp); }
do { (elm)->entry.rbe_parent = parent; (elm)->entry.rbe_left
= (elm)->entry.rbe_right = ((void*)0); (elm)->entry.rbe_color
= 1; } while (0); if (parent != ((void*)0)) { if (comp < 0
) (parent)->entry.rbe_left = elm; else (parent)->entry.
rbe_right = elm; do {} while (0); } else (head)->rbh_root =
elm; page_cache_RB_INSERT_COLOR(head, elm); return (((void*)
0)); } struct mpage * page_cache_RB_FIND(struct page_cache *head
, struct mpage *elm) { struct mpage *tmp = (head)->rbh_root
; int comp; while (tmp) { comp = mpage_cmp(elm, tmp); if (comp
< 0) tmp = (tmp)->entry.rbe_left; else if (comp > 0
) tmp = (tmp)->entry.rbe_right; else return (tmp); } return
(((void*)0)); } struct mpage * page_cache_RB_NFIND(struct page_cache
*head, struct mpage *elm) { struct mpage *tmp = (head)->rbh_root
; struct mpage *res = ((void*)0); int comp; while (tmp) { comp
= mpage_cmp(elm, tmp); if (comp < 0) { res = tmp; tmp = (
tmp)->entry.rbe_left; } else if (comp > 0) tmp = (tmp)->
entry.rbe_right; else return (tmp); } return (res); } struct mpage
* page_cache_RB_NEXT(struct mpage *elm) { if ((elm)->entry
.rbe_right) { elm = (elm)->entry.rbe_right; while ((elm)->
entry.rbe_left) elm = (elm)->entry.rbe_left; } else { if (
(elm)->entry.rbe_parent && (elm == ((elm)->entry
.rbe_parent)->entry.rbe_left)) elm = (elm)->entry.rbe_parent
; else { while ((elm)->entry.rbe_parent && (elm ==
((elm)->entry.rbe_parent)->entry.rbe_right)) elm = (elm
)->entry.rbe_parent; elm = (elm)->entry.rbe_parent; } }
return (elm); } struct mpage * page_cache_RB_PREV(struct mpage
*elm) { if ((elm)->entry.rbe_left) { elm = (elm)->entry
.rbe_left; while ((elm)->entry.rbe_right) elm = (elm)->
entry.rbe_right; } else { if ((elm)->entry.rbe_parent &&
(elm == ((elm)->entry.rbe_parent)->entry.rbe_right)) elm
= (elm)->entry.rbe_parent; else { while ((elm)->entry.
rbe_parent && (elm == ((elm)->entry.rbe_parent)->
entry.rbe_left)) elm = (elm)->entry.rbe_parent; elm = (elm
)->entry.rbe_parent; } } return (elm); } struct mpage * page_cache_RB_MINMAX
(struct page_cache *head, int val) { struct mpage *tmp = (head
)->rbh_root; struct mpage *parent = ((void*)0); while (tmp
) { parent = tmp; if (val < 0) tmp = (tmp)->entry.rbe_left
; else tmp = (tmp)->entry.rbe_right; } return (parent); }
;
154
155struct ppage { /* ordered list of pages */
156 SLIST_ENTRY(ppage)struct { struct ppage *sle_next; } entry;
157 struct mpage *mpage;
158 unsigned int ki; /* cursor index on page */
159};
160SLIST_HEAD(page_stack, ppage)struct page_stack { struct ppage *slh_first; };
161
162#define CURSOR_EMPTY(c)(((&(c)->stack)->slh_first) == ((void*)0)) SLIST_EMPTY(&(c)->stack)(((&(c)->stack)->slh_first) == ((void*)0))
163#define CURSOR_TOP(c)((&(c)->stack)->slh_first) SLIST_FIRST(&(c)->stack)((&(c)->stack)->slh_first)
164#define CURSOR_POP(c)do { (&(c)->stack)->slh_first = (&(c)->stack
)->slh_first->entry.sle_next; } while (0)
SLIST_REMOVE_HEAD(&(c)->stack, entry)do { (&(c)->stack)->slh_first = (&(c)->stack
)->slh_first->entry.sle_next; } while (0)
165#define CURSOR_PUSH(c,p)do { (p)->entry.sle_next = (&(c)->stack)->slh_first
; (&(c)->stack)->slh_first = (p); } while (0)
SLIST_INSERT_HEAD(&(c)->stack, p, entry)do { (p)->entry.sle_next = (&(c)->stack)->slh_first
; (&(c)->stack)->slh_first = (p); } while (0)
166
167struct cursor {
168 struct btree *bt;
169 struct btree_txn *txn;
170 struct page_stack stack; /* stack of parent pages */
171 short initialized; /* 1 if initialized */
172 short eof; /* 1 if end is reached */
173};
174
175#define METAHASHLEN__builtin_offsetof(struct bt_meta, hash) offsetof(struct bt_meta, hash)__builtin_offsetof(struct bt_meta, hash)
176#define METADATA(p)((void *)((char *)p + __builtin_offsetof(struct page, ptrs))) ((void *)((char *)p + PAGEHDRSZ__builtin_offsetof(struct page, ptrs)))
177
178struct node {
179#define n_pgnop.np_pgno p.np_pgno
180#define n_dsizep.np_dsize p.np_dsize
181 union {
182 pgno_t np_pgno; /* child page number */
183 uint32_t np_dsize; /* leaf data size */
184 } p;
185 uint16_t ksize; /* key size */
186#define F_BIGDATA0x01 0x01 /* data put on overflow page */
187 uint8_t flags;
188 char data[1];
189} __packed__attribute__((__packed__));
190
191struct btree_txn {
192 pgno_t root; /* current / new root page */
193 pgno_t next_pgno; /* next unallocated page */
194 struct btree *bt; /* btree is ref'd */
195 struct dirty_queue *dirty_queue; /* modified pages */
196#define BT_TXN_RDONLY0x01 0x01 /* read-only transaction */
197#define BT_TXN_ERROR0x02 0x02 /* an error has occurred */
198 unsigned int flags;
199};
200
201struct btree {
202 int fd;
203 char *path;
204#define BT_FIXPADDING0x01 0x01 /* internal */
205 unsigned int flags;
206 bt_cmp_func cmp; /* user compare function */
207 struct bt_head head;
208 struct bt_meta meta;
209 struct page_cache *page_cache;
210 struct lru_queue *lru_queue;
211 struct btree_txn *txn; /* current write transaction */
212 int ref; /* increased by cursors & txn */
213 struct btree_stat stat;
214 off_t size; /* current file size */
215};
216
217#define NODESIZE__builtin_offsetof(struct node, data) offsetof(struct node, data)__builtin_offsetof(struct node, data)
218
219#define INDXSIZE(k)(__builtin_offsetof(struct node, data) + ((k) == ((void*)0) ?
0 : (k)->size))
(NODESIZE__builtin_offsetof(struct node, data) + ((k) == NULL((void*)0) ? 0 : (k)->size))
220#define LEAFSIZE(k, d)(__builtin_offsetof(struct node, data) + (k)->size + (d)->
size)
(NODESIZE__builtin_offsetof(struct node, data) + (k)->size + (d)->size)
221#define NODEPTRP(p, i)((struct node *)((char *)(p) + (p)->ptrs[i])) ((struct node *)((char *)(p) + (p)->ptrs[i]))
222#define NODEPTR(mp, i)((struct node *)((char *)((mp)->page) + ((mp)->page)->
ptrs[i]))
NODEPTRP((mp)->page, i)((struct node *)((char *)((mp)->page) + ((mp)->page)->
ptrs[i]))
223#define NODEKEY(node)(void *)((node)->data) (void *)((node)->data)
224#define NODEDATA(node)(void *)((char *)(node)->data + (node)->ksize) (void *)((char *)(node)->data + (node)->ksize)
225#define NODEPGNO(node)((node)->p.np_pgno) ((node)->p.np_pgno)
226#define NODEDSZ(node)((node)->p.np_dsize) ((node)->p.np_dsize)
227
228#define BT_COMMIT_PAGES64 64 /* max number of pages to write in one commit */
229#define BT_MAXCACHE_DEF1024 1024 /* max number of pages to keep in cache */
230
231static int btree_read_page(struct btree *bt, pgno_t pgno,
232 struct page *page);
233static struct mpage *btree_get_mpage(struct btree *bt, pgno_t pgno);
234static int btree_search_page_root(struct btree *bt,
235 struct mpage *root, struct btval *key,
236 struct cursor *cursor, int modify,
237 struct mpage **mpp);
238static int btree_search_page(struct btree *bt,
239 struct btree_txn *txn, struct btval *key,
240 struct cursor *cursor, int modify,
241 struct mpage **mpp);
242
243static int btree_write_header(struct btree *bt, int fd);
244static int btree_read_header(struct btree *bt);
245static int btree_is_meta_page(struct page *p);
246static int btree_read_meta(struct btree *bt, pgno_t *p_next);
247static int btree_write_meta(struct btree *bt, pgno_t root,
248 unsigned int flags);
249static void btree_ref(struct btree *bt);
250
251static struct node *btree_search_node(struct btree *bt, struct mpage *mp,
252 struct btval *key, int *exactp, unsigned int *kip);
253static int btree_add_node(struct btree *bt, struct mpage *mp,
254 indx_t indx, struct btval *key, struct btval *data,
255 pgno_t pgno, uint8_t flags);
256static void btree_del_node(struct btree *bt, struct mpage *mp,
257 indx_t indx);
258static int btree_read_data(struct btree *bt, struct mpage *mp,
259 struct node *leaf, struct btval *data);
260
261static int btree_rebalance(struct btree *bt, struct mpage *mp);
262static int btree_update_key(struct btree *bt, struct mpage *mp,
263 indx_t indx, struct btval *key);
264static int btree_adjust_prefix(struct btree *bt,
265 struct mpage *src, int delta);
266static int btree_move_node(struct btree *bt, struct mpage *src,
267 indx_t srcindx, struct mpage *dst, indx_t dstindx);
268static int btree_merge(struct btree *bt, struct mpage *src,
269 struct mpage *dst);
270static int btree_split(struct btree *bt, struct mpage **mpp,
271 unsigned int *newindxp, struct btval *newkey,
272 struct btval *newdata, pgno_t newpgno);
273static struct mpage *btree_new_page(struct btree *bt, uint32_t flags);
274static int btree_write_overflow_data(struct btree *bt,
275 struct page *p, struct btval *data);
276
277static void cursor_pop_page(struct cursor *cursor);
278static struct ppage *cursor_push_page(struct cursor *cursor,
279 struct mpage *mp);
280
281static int bt_set_key(struct btree *bt, struct mpage *mp,
282 struct node *node, struct btval *key);
283static int btree_sibling(struct cursor *cursor, int move_right);
284static int btree_cursor_next(struct cursor *cursor,
285 struct btval *key, struct btval *data);
286static int btree_cursor_set(struct cursor *cursor,
287 struct btval *key, struct btval *data, int *exactp);
288static int btree_cursor_first(struct cursor *cursor,
289 struct btval *key, struct btval *data);
290
291static void bt_reduce_separator(struct btree *bt, struct node *min,
292 struct btval *sep);
293static void remove_prefix(struct btree *bt, struct btval *key,
294 size_t pfxlen);
295static void expand_prefix(struct btree *bt, struct mpage *mp,
296 indx_t indx, struct btkey *expkey);
297static void concat_prefix(struct btree *bt, char *s1, size_t n1,
298 char *s2, size_t n2, char *cs, size_t *cn);
299static void common_prefix(struct btree *bt, struct btkey *min,
300 struct btkey *max, struct btkey *pfx);
301static void find_common_prefix(struct btree *bt, struct mpage *mp);
302
303static size_t bt_leaf_size(struct btree *bt, struct btval *key,
304 struct btval *data);
305static size_t bt_branch_size(struct btree *bt, struct btval *key);
306
307static pgno_t btree_compact_tree(struct btree *bt, pgno_t pgno,
308 struct btree *btc);
309
310static int memncmp(const void *s1, size_t n1,
311 const void *s2, size_t n2);
312static int memnrcmp(const void *s1, size_t n1,
313 const void *s2, size_t n2);
314
315static int
316memncmp(const void *s1, size_t n1, const void *s2, size_t n2)
317{
318 if (n1 < n2) {
319 if (memcmp(s1, s2, n1) == 0)
320 return -1;
321 }
322 else if (n1 > n2) {
323 if (memcmp(s1, s2, n2) == 0)
324 return 1;
325 }
326 return memcmp(s1, s2, n1);
327}
328
329static int
330memnrcmp(const void *s1, size_t n1, const void *s2, size_t n2)
331{
332 const unsigned char *p1;
333 const unsigned char *p2;
334
335 if (n1 == 0)
336 return n2 == 0 ? 0 : -1;
337
338 if (n2 == 0)
339 return n1 == 0 ? 0 : 1;
340
341 p1 = (const unsigned char *)s1 + n1 - 1;
342 p2 = (const unsigned char *)s2 + n2 - 1;
343
344 while (*p1 == *p2) {
345 if (p1 == s1)
346 return (p2 == s2) ? 0 : -1;
347 if (p2 == s2)
348 return (p1 == p2) ? 0 : 1;
349 p1--;
350 p2--;
351 }
352 return *p1 - *p2;
353}
354
355int
356btree_cmp(struct btree *bt, const struct btval *a, const struct btval *b)
357{
358 return bt->cmp(a, b);
359}
360
361static void
362common_prefix(struct btree *bt, struct btkey *min, struct btkey *max,
363 struct btkey *pfx)
364{
365 size_t n = 0;
366 char *p1;
367 char *p2;
368
369 if (min->len == 0 || max->len == 0) {
370 pfx->len = 0;
371 return;
372 }
373
374 if (F_ISSET(bt->flags, BT_REVERSEKEY)(((bt->flags) & (0x08)) == (0x08))) {
375 p1 = min->str + min->len - 1;
376 p2 = max->str + max->len - 1;
377
378 while (*p1 == *p2) {
379 if (p1 < min->str || p2 < max->str)
380 break;
381 p1--;
382 p2--;
383 n++;
384 }
385
386 assert(n <= (int)sizeof(pfx->str))((n <= (int)sizeof(pfx->str)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c"
, 386, __func__, "n <= (int)sizeof(pfx->str)"))
;
387 pfx->len = n;
388 bcopy(p2 + 1, pfx->str, n);
389 } else {
390 p1 = min->str;
391 p2 = max->str;
392
393 while (*p1 == *p2) {
394 if (n == min->len || n == max->len)
395 break;
396 p1++;
397 p2++;
398 n++;
399 }
400
401 assert(n <= (int)sizeof(pfx->str))((n <= (int)sizeof(pfx->str)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c"
, 401, __func__, "n <= (int)sizeof(pfx->str)"))
;
402 pfx->len = n;
403 bcopy(max->str, pfx->str, n);
404 }
405}
406
407static void
408remove_prefix(struct btree *bt, struct btval *key, size_t pfxlen)
409{
410 if (pfxlen == 0 || bt->cmp != NULL((void*)0))
411 return;
412
413 DPRINTF("removing %zu bytes of prefix from key [%.*s]", pfxlen,
414 (int)key->size, (char *)key->data);
415 assert(pfxlen <= key->size)((pfxlen <= key->size) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c"
, 415, __func__, "pfxlen <= key->size"))
;
416 key->size -= pfxlen;
417 if (!F_ISSET(bt->flags, BT_REVERSEKEY)(((bt->flags) & (0x08)) == (0x08)))
418 key->data = (char *)key->data + pfxlen;
419}
420
421static void
422expand_prefix(struct btree *bt, struct mpage *mp, indx_t indx,
423 struct btkey *expkey)
424{
425 struct node *node;
426
427 node = NODEPTR(mp, indx)((struct node *)((char *)((mp)->page) + ((mp)->page)->
ptrs[indx]))
;
428 expkey->len = sizeof(expkey->str);
429 concat_prefix(bt, mp->prefix.str, mp->prefix.len,
430 NODEKEY(node)(void *)((node)->data), node->ksize, expkey->str, &expkey->len);
431}
432
433static int
434bt_cmp(struct btree *bt, const struct btval *key1, const struct btval *key2,
435 struct btkey *pfx)
436{
437 if (F_ISSET(bt->flags, BT_REVERSEKEY)(((bt->flags) & (0x08)) == (0x08)))
438 return memnrcmp(key1->data, key1->size - pfx->len,
439 key2->data, key2->size);
440 else
441 return memncmp((char *)key1->data + pfx->len, key1->size - pfx->len,
442 key2->data, key2->size);
443}
444
445void
446btval_reset(struct btval *btv)
447{
448 if (btv) {
449 if (btv->mp)
450 btv->mp->ref--;
451 if (btv->free_data)
452 free(btv->data);
453 memset(btv, 0, sizeof(*btv));
454 }
455}
456
457static int
458mpage_cmp(struct mpage *a, struct mpage *b)
459{
460 if (a->pgno > b->pgno)
461 return 1;
462 if (a->pgno < b->pgno)
463 return -1;
464 return 0;
465}
466
467static struct mpage *
468mpage_lookup(struct btree *bt, pgno_t pgno)
469{
470 struct mpage find, *mp;
471
472 find.pgno = pgno;
473 mp = RB_FIND(page_cache, bt->page_cache, &find)page_cache_RB_FIND(bt->page_cache, &find);
474 if (mp) {
475 bt->stat.hits++;
476 /* Update LRU queue. Move page to the end. */
477 TAILQ_REMOVE(bt->lru_queue, mp, lru_next)do { if (((mp)->lru_next.tqe_next) != ((void*)0)) (mp)->
lru_next.tqe_next->lru_next.tqe_prev = (mp)->lru_next.tqe_prev
; else (bt->lru_queue)->tqh_last = (mp)->lru_next.tqe_prev
; *(mp)->lru_next.tqe_prev = (mp)->lru_next.tqe_next; ;
; } while (0)
;
478 TAILQ_INSERT_TAIL(bt->lru_queue, mp, lru_next)do { (mp)->lru_next.tqe_next = ((void*)0); (mp)->lru_next
.tqe_prev = (bt->lru_queue)->tqh_last; *(bt->lru_queue
)->tqh_last = (mp); (bt->lru_queue)->tqh_last = &
(mp)->lru_next.tqe_next; } while (0)
;
479 }
480 return mp;
481}
482
483static void
484mpage_add(struct btree *bt, struct mpage *mp)
485{
486 assert(RB_INSERT(page_cache, bt->page_cache, mp) == NULL)((page_cache_RB_INSERT(bt->page_cache, mp) == ((void*)0)) ?
(void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c", 486, __func__
, "RB_INSERT(page_cache, bt->page_cache, mp) == NULL"))
;
487 bt->stat.cache_size++;
488 TAILQ_INSERT_TAIL(bt->lru_queue, mp, lru_next)do { (mp)->lru_next.tqe_next = ((void*)0); (mp)->lru_next
.tqe_prev = (bt->lru_queue)->tqh_last; *(bt->lru_queue
)->tqh_last = (mp); (bt->lru_queue)->tqh_last = &
(mp)->lru_next.tqe_next; } while (0)
;
489}
490
491static void
492mpage_free(struct mpage *mp)
493{
494 if (mp != NULL((void*)0)) {
495 free(mp->page);
496 free(mp);
497 }
498}
499
500static void
501mpage_del(struct btree *bt, struct mpage *mp)
502{
503 assert(RB_REMOVE(page_cache, bt->page_cache, mp) == mp)((page_cache_RB_REMOVE(bt->page_cache, mp) == mp) ? (void)
0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c", 503, __func__
, "RB_REMOVE(page_cache, bt->page_cache, mp) == mp"))
;
504 assert(bt->stat.cache_size > 0)((bt->stat.cache_size > 0) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c"
, 504, __func__, "bt->stat.cache_size > 0"))
;
505 bt->stat.cache_size--;
506 TAILQ_REMOVE(bt->lru_queue, mp, lru_next)do { if (((mp)->lru_next.tqe_next) != ((void*)0)) (mp)->
lru_next.tqe_next->lru_next.tqe_prev = (mp)->lru_next.tqe_prev
; else (bt->lru_queue)->tqh_last = (mp)->lru_next.tqe_prev
; *(mp)->lru_next.tqe_prev = (mp)->lru_next.tqe_next; ;
; } while (0)
;
507}
508
509static void
510mpage_flush(struct btree *bt)
511{
512 struct mpage *mp;
513
514 while ((mp = RB_MIN(page_cache, bt->page_cache)page_cache_RB_MINMAX(bt->page_cache, -1)) != NULL((void*)0)) {
515 mpage_del(bt, mp);
516 mpage_free(mp);
517 }
518}
519
520static struct mpage *
521mpage_copy(struct btree *bt, struct mpage *mp)
522{
523 struct mpage *copy;
524
525 if ((copy = calloc(1, sizeof(*copy))) == NULL((void*)0))
526 return NULL((void*)0);
527 if ((copy->page = malloc(bt->head.psize)) == NULL((void*)0)) {
528 free(copy);
529 return NULL((void*)0);
530 }
531 bcopy(mp->page, copy->page, bt->head.psize);
532 bcopy(&mp->prefix, &copy->prefix, sizeof(mp->prefix));
533 copy->parent = mp->parent;
534 copy->parent_index = mp->parent_index;
535 copy->pgno = mp->pgno;
536
537 return copy;
538}
539
540/* Remove the least recently used memory pages until the cache size is
541 * within the configured bounds. Pages referenced by cursors or returned
542 * key/data are not pruned.
543 */
544static void
545mpage_prune(struct btree *bt)
546{
547 struct mpage *mp, *next;
548
549 for (mp = TAILQ_FIRST(bt->lru_queue)((bt->lru_queue)->tqh_first); mp; mp = next) {
550 if (bt->stat.cache_size <= bt->stat.max_cache)
551 break;
552 next = TAILQ_NEXT(mp, lru_next)((mp)->lru_next.tqe_next);
553 if (!mp->dirty && mp->ref <= 0) {
554 mpage_del(bt, mp);
555 mpage_free(mp);
556 }
557 }
558}
559
560/* Mark a page as dirty and push it on the dirty queue.
561 */
562static void
563mpage_dirty(struct btree *bt, struct mpage *mp)
564{
565 assert(bt != NULL)((bt != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c"
, 565, __func__, "bt != NULL"))
;
566 assert(bt->txn != NULL)((bt->txn != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c"
, 566, __func__, "bt->txn != NULL"))
;
567
568 if (!mp->dirty) {
569 mp->dirty = 1;
570 SIMPLEQ_INSERT_TAIL(bt->txn->dirty_queue, mp, next)do { (mp)->next.sqe_next = ((void*)0); *(bt->txn->dirty_queue
)->sqh_last = (mp); (bt->txn->dirty_queue)->sqh_last
= &(mp)->next.sqe_next; } while (0)
;
571 }
572}
573
574/* Touch a page: make it dirty and re-insert into tree with updated pgno.
575 */
576static struct mpage *
577mpage_touch(struct btree *bt, struct mpage *mp)
578{
579 assert(bt != NULL)((bt != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c"
, 579, __func__, "bt != NULL"))
;
580 assert(bt->txn != NULL)((bt->txn != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c"
, 580, __func__, "bt->txn != NULL"))
;
581 assert(mp != NULL)((mp != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c"
, 581, __func__, "mp != NULL"))
;
582
583 if (!mp->dirty) {
584 DPRINTF("touching page %u -> %u", mp->pgno, bt->txn->next_pgno);
585 if (mp->ref == 0)
586 mpage_del(bt, mp);
587 else {
588 if ((mp = mpage_copy(bt, mp)) == NULL((void*)0))
589 return NULL((void*)0);
590 }
591 mp->pgno = mp->page->pgno = bt->txn->next_pgno++;
592 mpage_dirty(bt, mp);
593 mpage_add(bt, mp);
594
595 /* Update the page number to new touched page. */
596 if (mp->parent != NULL((void*)0))
597 NODEPGNO(NODEPTR(mp->parent,((((struct node *)((char *)((mp->parent)->page) + ((mp->
parent)->page)->ptrs[mp->parent_index])))->p.np_pgno
)
598 mp->parent_index))((((struct node *)((char *)((mp->parent)->page) + ((mp->
parent)->page)->ptrs[mp->parent_index])))->p.np_pgno
)
= mp->pgno;
599 }
600
601 return mp;
602}
603
604static int
605btree_read_page(struct btree *bt, pgno_t pgno, struct page *page)
606{
607 ssize_t rc;
608
609 DPRINTF("reading page %u", pgno);
610 bt->stat.reads++;
611 if ((rc = pread(bt->fd, page, bt->head.psize, (off_t)pgno*bt->head.psize)) == 0) {
612 DPRINTF("page %u doesn't exist", pgno);
613 errno(*__errno()) = ENOENT2;
614 return BT_FAIL-1;
615 } else if (rc != (ssize_t)bt->head.psize) {
616 if (rc > 0)
617 errno(*__errno()) = EINVAL22;
618 DPRINTF("read: %s", strerror(errno));
619 return BT_FAIL-1;
620 }
621
622 if (page->pgno != pgno) {
623 DPRINTF("page numbers don't match: %u != %u", pgno, page->pgno);
624 errno(*__errno()) = EINVAL22;
625 return BT_FAIL-1;
626 }
627
628 DPRINTF("page %u has flags 0x%X", pgno, page->flags);
629
630 return BT_SUCCESS0;
631}
632
633int
634btree_sync(struct btree *bt)
635{
636 if (!F_ISSET(bt->flags, BT_NOSYNC)(((bt->flags) & (0x02)) == (0x02)))
637 return fsync(bt->fd);
638 return 0;
639}
640
641struct btree_txn *
642btree_txn_begin(struct btree *bt, int rdonly)
643{
644 struct btree_txn *txn;
645
646 if (!rdonly && bt->txn != NULL((void*)0)) {
647 DPRINTF("write transaction already begun");
648 errno(*__errno()) = EBUSY16;
649 return NULL((void*)0);
650 }
651
652 if ((txn = calloc(1, sizeof(*txn))) == NULL((void*)0)) {
653 DPRINTF("calloc: %s", strerror(errno));
654 return NULL((void*)0);
655 }
656
657 if (rdonly) {
658 txn->flags |= BT_TXN_RDONLY0x01;
659 } else {
660 txn->dirty_queue = calloc(1, sizeof(*txn->dirty_queue));
661 if (txn->dirty_queue == NULL((void*)0)) {
662 free(txn);
663 return NULL((void*)0);
664 }
665 SIMPLEQ_INIT(txn->dirty_queue)do { (txn->dirty_queue)->sqh_first = ((void*)0); (txn->
dirty_queue)->sqh_last = &(txn->dirty_queue)->sqh_first
; } while (0)
;
666
667 DPRINTF("taking write lock on txn %p", txn);
668 if (flock(bt->fd, LOCK_EX0x02 | LOCK_NB0x04) != 0) {
669 DPRINTF("flock: %s", strerror(errno));
670 errno(*__errno()) = EBUSY16;
671 free(txn->dirty_queue);
672 free(txn);
673 return NULL((void*)0);
674 }
675 bt->txn = txn;
676 }
677
678 txn->bt = bt;
679 btree_ref(bt);
680
681 if (btree_read_meta(bt, &txn->next_pgno) != BT_SUCCESS0) {
682 btree_txn_abort(txn);
683 return NULL((void*)0);
684 }
685
686 txn->root = bt->meta.root;
687 DPRINTF("begin transaction on btree %p, root page %u", bt, txn->root);
688
689 return txn;
690}
691
692void
693btree_txn_abort(struct btree_txn *txn)
694{
695 struct mpage *mp;
696 struct btree *bt;
697
698 if (txn == NULL((void*)0))
699 return;
700
701 bt = txn->bt;
702 DPRINTF("abort transaction on btree %p, root page %u", bt, txn->root);
703
704 if (!F_ISSET(txn->flags, BT_TXN_RDONLY)(((txn->flags) & (0x01)) == (0x01))) {
705 /* Discard all dirty pages.
706 */
707 while (!SIMPLEQ_EMPTY(txn->dirty_queue)(((txn->dirty_queue)->sqh_first) == ((void*)0))) {
708 mp = SIMPLEQ_FIRST(txn->dirty_queue)((txn->dirty_queue)->sqh_first);
709 assert(mp->ref == 0)((mp->ref == 0) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c"
, 709, __func__, "mp->ref == 0"))
; /* cursors should be closed */
710 mpage_del(bt, mp);
711 SIMPLEQ_REMOVE_HEAD(txn->dirty_queue, next)do { if (((txn->dirty_queue)->sqh_first = (txn->dirty_queue
)->sqh_first->next.sqe_next) == ((void*)0)) (txn->dirty_queue
)->sqh_last = &(txn->dirty_queue)->sqh_first; } while
(0)
;
712 mpage_free(mp);
713 }
714
715 DPRINTF("releasing write lock on txn %p", txn);
716 txn->bt->txn = NULL((void*)0);
717 if (flock(txn->bt->fd, LOCK_UN0x08) != 0) {
718 DPRINTF("failed to unlock fd %d: %s",
719 txn->bt->fd, strerror(errno));
720 }
721 free(txn->dirty_queue);
722 }
723
724 btree_close(txn->bt);
725 free(txn);
726}
727
728int
729btree_txn_commit(struct btree_txn *txn)
730{
731 int n, done;
732 ssize_t rc;
733 off_t size;
734 struct mpage *mp;
735 struct btree *bt;
736 struct iovec iov[BT_COMMIT_PAGES64];
737
738 assert(txn != NULL)((txn != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c"
, 738, __func__, "txn != NULL"))
;
739 assert(txn->bt != NULL)((txn->bt != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c"
, 739, __func__, "txn->bt != NULL"))
;
740
741 bt = txn->bt;
742
743 if (F_ISSET(txn->flags, BT_TXN_RDONLY)(((txn->flags) & (0x01)) == (0x01))) {
744 DPRINTF("attempt to commit read-only transaction");
745 btree_txn_abort(txn);
746 errno(*__errno()) = EPERM1;
747 return BT_FAIL-1;
748 }
749
750 if (txn != bt->txn) {
751 DPRINTF("attempt to commit unknown transaction");
752 btree_txn_abort(txn);
753 errno(*__errno()) = EINVAL22;
754 return BT_FAIL-1;
755 }
756
757 if (F_ISSET(txn->flags, BT_TXN_ERROR)(((txn->flags) & (0x02)) == (0x02))) {
758 DPRINTF("error flag is set, can't commit");
759 btree_txn_abort(txn);
760 errno(*__errno()) = EINVAL22;
761 return BT_FAIL-1;
762 }
763
764 if (SIMPLEQ_EMPTY(txn->dirty_queue)(((txn->dirty_queue)->sqh_first) == ((void*)0)))
765 goto done;
766
767 if (F_ISSET(bt->flags, BT_FIXPADDING)(((bt->flags) & (0x01)) == (0x01))) {
768 size = lseek(bt->fd, 0, SEEK_END2);
769 size += bt->head.psize - (size % bt->head.psize);
770 DPRINTF("extending to multiple of page size: %llu", size);
771 if (ftruncate(bt->fd, size) != 0) {
772 DPRINTF("ftruncate: %s", strerror(errno));
773 btree_txn_abort(txn);
774 return BT_FAIL-1;
775 }
776 bt->flags &= ~BT_FIXPADDING0x01;
777 }
778
779 DPRINTF("committing transaction on btree %p, root page %u",
780 bt, txn->root);
781
782 /* Commit up to BT_COMMIT_PAGES dirty pages to disk until done.
783 */
784 do {
785 n = 0;
786 done = 1;
787 SIMPLEQ_FOREACH(mp, txn->dirty_queue, next)for((mp) = ((txn->dirty_queue)->sqh_first); (mp) != ((void
*)0); (mp) = ((mp)->next.sqe_next))
{
788 DPRINTF("committing page %u", mp->pgno);
789 iov[n].iov_len = bt->head.psize;
790 iov[n].iov_base = mp->page;
791 if (++n >= BT_COMMIT_PAGES64) {
792 done = 0;
793 break;
794 }
795 }
796
797 if (n == 0)
798 break;
799
800 DPRINTF("committing %u dirty pages", n);
801 rc = writev(bt->fd, iov, n);
802 if (rc != (ssize_t)bt->head.psize*n) {
803 if (rc > 0)
804 DPRINTF("short write, filesystem full?");
805 else
806 DPRINTF("writev: %s", strerror(errno));
807 btree_txn_abort(txn);
808 return BT_FAIL-1;
809 }
810
811 /* Remove the dirty flag from the written pages.
812 */
813 while (!SIMPLEQ_EMPTY(txn->dirty_queue)(((txn->dirty_queue)->sqh_first) == ((void*)0))) {
814 mp = SIMPLEQ_FIRST(txn->dirty_queue)((txn->dirty_queue)->sqh_first);
815 mp->dirty = 0;
816 SIMPLEQ_REMOVE_HEAD(txn->dirty_queue, next)do { if (((txn->dirty_queue)->sqh_first = (txn->dirty_queue
)->sqh_first->next.sqe_next) == ((void*)0)) (txn->dirty_queue
)->sqh_last = &(txn->dirty_queue)->sqh_first; } while
(0)
;
817 if (--n == 0)
818 break;
819 }
820 } while (!done);
821
822 if (btree_sync(bt) != 0 ||
823 btree_write_meta(bt, txn->root, 0) != BT_SUCCESS0 ||
824 btree_sync(bt) != 0) {
825 btree_txn_abort(txn);
826 return BT_FAIL-1;
827 }
828
829done:
830 mpage_prune(bt);
831 btree_txn_abort(txn);
832
833 return BT_SUCCESS0;
834}
835
836static int
837btree_write_header(struct btree *bt, int fd)
838{
839 struct stat sb;
840 struct bt_head *h;
841 struct page *p;
842 ssize_t rc;
843 unsigned int psize;
844
845 DPRINTF("writing header page");
846 assert(bt != NULL)((bt != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c"
, 846, __func__, "bt != NULL"))
;
847
848 /*
849 * Ask stat for 'optimal blocksize for I/O', but cap to fit in indx_t.
850 */
851 if (fstat(fd, &sb) == 0)
852 psize = MINIMUM(32*1024, sb.st_blksize)((32*1024) < (sb.st_blksize) ? (32*1024) : (sb.st_blksize)
)
;
853 else
854 psize = PAGESIZE4096;
855
856 if ((p = calloc(1, psize)) == NULL((void*)0))
857 return -1;
858 p->flags = P_HEAD0x10;
859
860 h = METADATA(p)((void *)((char *)p + __builtin_offsetof(struct page, ptrs)));
861 h->magic = BT_MAGIC0xB3DBB3DB;
862 h->version = BT_VERSION4;
863 h->psize = psize;
864 bcopy(h, &bt->head, sizeof(*h));
865
866 rc = write(fd, p, bt->head.psize);
867 free(p);
868 if (rc != (ssize_t)bt->head.psize) {
869 if (rc > 0)
870 DPRINTF("short write, filesystem full?");
871 return BT_FAIL-1;
872 }
873
874 return BT_SUCCESS0;
875}
876
877static int
878btree_read_header(struct btree *bt)
879{
880 char page[PAGESIZE4096];
881 struct page *p;
882 struct bt_head *h;
883 int rc;
884
885 assert(bt != NULL)((bt != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c"
, 885, __func__, "bt != NULL"))
;
886
887 /* We don't know the page size yet, so use a minimum value.
888 */
889
890 if ((rc = pread(bt->fd, page, PAGESIZE4096, 0)) == 0) {
891 errno(*__errno()) = ENOENT2;
892 return -1;
893 } else if (rc != PAGESIZE4096) {
894 if (rc > 0)
895 errno(*__errno()) = EINVAL22;
896 DPRINTF("read: %s", strerror(errno));
897 return -1;
898 }
899
900 p = (struct page *)page;
901
902 if (!F_ISSET(p->flags, P_HEAD)(((p->flags) & (0x10)) == (0x10))) {
903 DPRINTF("page %d not a header page", p->pgno);
904 errno(*__errno()) = EINVAL22;
905 return -1;
906 }
907
908 h = METADATA(p)((void *)((char *)p + __builtin_offsetof(struct page, ptrs)));
909 if (h->magic != BT_MAGIC0xB3DBB3DB) {
910 DPRINTF("header has invalid magic");
911 errno(*__errno()) = EINVAL22;
912 return -1;
913 }
914
915 if (h->version != BT_VERSION4) {
916 DPRINTF("database is version %u, expected version %u",
917 bt->head.version, BT_VERSION);
918 errno(*__errno()) = EINVAL22;
919 return -1;
920 }
921
922 bcopy(h, &bt->head, sizeof(*h));
923 return 0;
924}
925
926static int
927btree_write_meta(struct btree *bt, pgno_t root, unsigned int flags)
928{
929 struct mpage *mp;
930 struct bt_meta *meta;
931 ssize_t rc;
932
933 DPRINTF("writing meta page for root page %u", root);
934
935 assert(bt != NULL)((bt != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c"
, 935, __func__, "bt != NULL"))
;
936 assert(bt->txn != NULL)((bt->txn != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c"
, 936, __func__, "bt->txn != NULL"))
;
937
938 if ((mp = btree_new_page(bt, P_META0x08)) == NULL((void*)0))
939 return -1;
940
941 bt->meta.prev_meta = bt->meta.root;
942 bt->meta.root = root;
943 bt->meta.flags = flags;
944 bt->meta.created_at = time(0);
945 bt->meta.revisions++;
946 SHA1((unsigned char *)&bt->meta, METAHASHLEN__builtin_offsetof(struct bt_meta, hash), bt->meta.hash);
947
948 /* Copy the meta data changes to the new meta page. */
949 meta = METADATA(mp->page)((void *)((char *)mp->page + __builtin_offsetof(struct page
, ptrs)))
;
950 bcopy(&bt->meta, meta, sizeof(*meta));
951
952 rc = write(bt->fd, mp->page, bt->head.psize);
953 mp->dirty = 0;
954 SIMPLEQ_REMOVE_HEAD(bt->txn->dirty_queue, next)do { if (((bt->txn->dirty_queue)->sqh_first = (bt->
txn->dirty_queue)->sqh_first->next.sqe_next) == ((void
*)0)) (bt->txn->dirty_queue)->sqh_last = &(bt->
txn->dirty_queue)->sqh_first; } while (0)
;
955 if (rc != (ssize_t)bt->head.psize) {
956 if (rc > 0)
957 DPRINTF("short write, filesystem full?");
958 return BT_FAIL-1;
959 }
960
961 if ((bt->size = lseek(bt->fd, 0, SEEK_END2)) == -1) {
962 DPRINTF("failed to update file size: %s", strerror(errno));
963 bt->size = 0;
964 }
965
966 return BT_SUCCESS0;
967}
968
969/* Returns true if page p is a valid meta page, false otherwise.
970 */
971static int
972btree_is_meta_page(struct page *p)
973{
974 struct bt_meta *m;
975 unsigned char hash[SHA_DIGEST_LENGTH20];
976
977 m = METADATA(p)((void *)((char *)p + __builtin_offsetof(struct page, ptrs)));
978 if (!F_ISSET(p->flags, P_META)(((p->flags) & (0x08)) == (0x08))) {
979 DPRINTF("page %d not a meta page", p->pgno);
980 errno(*__errno()) = EINVAL22;
981 return 0;
982 }
983
984 if (m->root >= p->pgno && m->root != P_INVALID0xFFFFFFFF) {
985 DPRINTF("page %d points to an invalid root page", p->pgno);
986 errno(*__errno()) = EINVAL22;
987 return 0;
988 }
989
990 SHA1((unsigned char *)m, METAHASHLEN__builtin_offsetof(struct bt_meta, hash), hash);
991 if (bcmp(hash, m->hash, SHA_DIGEST_LENGTH20) != 0) {
992 DPRINTF("page %d has an invalid digest", p->pgno);
993 errno(*__errno()) = EINVAL22;
994 return 0;
995 }
996
997 return 1;
998}
999
1000static int
1001btree_read_meta(struct btree *bt, pgno_t *p_next)
1002{
1003 struct mpage *mp;
1004 struct bt_meta *meta;
1005 pgno_t meta_pgno, next_pgno;
1006 off_t size;
1007
1008 assert(bt != NULL)((bt != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c"
, 1008, __func__, "bt != NULL"))
;
1009
1010 if ((size = lseek(bt->fd, 0, SEEK_END2)) == -1)
1011 goto fail;
1012
1013 DPRINTF("btree_read_meta: size = %llu", size);
1014
1015 if (size < bt->size) {
1016 DPRINTF("file has shrunk!");
1017 errno(*__errno()) = EIO5;
1018 goto fail;
1019 }
1020
1021 if (size == bt->head.psize) { /* there is only the header */
1022 if (p_next != NULL((void*)0))
1023 *p_next = 1;
1024 return BT_SUCCESS0; /* new file */
1025 }
1026
1027 next_pgno = size / bt->head.psize;
1028 if (next_pgno == 0) {
1029 DPRINTF("corrupt file");
1030 errno(*__errno()) = EIO5;
1031 goto fail;
1032 }
1033
1034 meta_pgno = next_pgno - 1;
1035
1036 if (size % bt->head.psize != 0) {
1037 DPRINTF("filesize not a multiple of the page size!");
1038 bt->flags |= BT_FIXPADDING0x01;
1039 next_pgno++;
1040 }
1041
1042 if (p_next != NULL((void*)0))
1043 *p_next = next_pgno;
1044
1045 if (size == bt->size) {
1046 DPRINTF("size unchanged, keeping current meta page");
1047 if (F_ISSET(bt->meta.flags, BT_TOMBSTONE)(((bt->meta.flags) & (0x01)) == (0x01))) {
1048 DPRINTF("file is dead");
1049 errno(*__errno()) = ESTALE70;
1050 return BT_FAIL-1;
1051 } else
1052 return BT_SUCCESS0;
1053 }
1054 bt->size = size;
1055
1056 while (meta_pgno > 0) {
1057 if ((mp = btree_get_mpage(bt, meta_pgno)) == NULL((void*)0))
1058 break;
1059 if (btree_is_meta_page(mp->page)) {
1060 meta = METADATA(mp->page)((void *)((char *)mp->page + __builtin_offsetof(struct page
, ptrs)))
;
1061 DPRINTF("flags = 0x%x", meta->flags);
1062 if (F_ISSET(meta->flags, BT_TOMBSTONE)(((meta->flags) & (0x01)) == (0x01))) {
1063 DPRINTF("file is dead");
1064 errno(*__errno()) = ESTALE70;
1065 return BT_FAIL-1;
1066 } else {
1067 /* Make copy of last meta page. */
1068 bcopy(meta, &bt->meta, sizeof(bt->meta));
1069 return BT_SUCCESS0;
1070 }
1071 }
1072 --meta_pgno; /* scan backwards to first valid meta page */
1073 }
1074
1075 errno(*__errno()) = EIO5;
1076fail:
1077 if (p_next != NULL((void*)0))
1078 *p_next = P_INVALID0xFFFFFFFF;
1079 return BT_FAIL-1;
1080}
1081
1082struct btree *
1083btree_open_fd(int fd, unsigned int flags)
1084{
1085 struct btree *bt;
1086 int fl;
1087
1088 fl = fcntl(fd, F_GETFL3);
1089 if (fcntl(fd, F_SETFL4, fl | O_APPEND0x0008) == -1)
1090 return NULL((void*)0);
1091
1092 if ((bt = calloc(1, sizeof(*bt))) == NULL((void*)0))
1093 return NULL((void*)0);
1094 bt->fd = fd;
1095 bt->flags = flags;
1096 bt->flags &= ~BT_FIXPADDING0x01;
1097 bt->ref = 1;
1098 bt->meta.root = P_INVALID0xFFFFFFFF;
1099
1100 if ((bt->page_cache = calloc(1, sizeof(*bt->page_cache))) == NULL((void*)0))
1101 goto fail;
1102 bt->stat.max_cache = BT_MAXCACHE_DEF1024;
1103 RB_INIT(bt->page_cache)do { (bt->page_cache)->rbh_root = ((void*)0); } while (
0)
;
1104
1105 if ((bt->lru_queue = calloc(1, sizeof(*bt->lru_queue))) == NULL((void*)0))
1106 goto fail;
1107 TAILQ_INIT(bt->lru_queue)do { (bt->lru_queue)->tqh_first = ((void*)0); (bt->lru_queue
)->tqh_last = &(bt->lru_queue)->tqh_first; } while
(0)
;
1108
1109 if (btree_read_header(bt) != 0) {
1110 if (errno(*__errno()) != ENOENT2)
1111 goto fail;
1112 DPRINTF("new database");
1113 btree_write_header(bt, bt->fd);
1114 }
1115
1116 if (btree_read_meta(bt, NULL((void*)0)) != 0)
1117 goto fail;
1118
1119 DPRINTF("opened database version %u, pagesize %u",
1120 bt->head.version, bt->head.psize);
1121 DPRINTF("timestamp: %s", ctime(&bt->meta.created_at));
1122 DPRINTF("depth: %u", bt->meta.depth);
1123 DPRINTF("entries: %llu", bt->meta.entries);
1124 DPRINTF("revisions: %u", bt->meta.revisions);
1125 DPRINTF("branch pages: %u", bt->meta.branch_pages);
1126 DPRINTF("leaf pages: %u", bt->meta.leaf_pages);
1127 DPRINTF("overflow pages: %u", bt->meta.overflow_pages);
1128 DPRINTF("root: %u", bt->meta.root);
1129 DPRINTF("previous meta page: %u", bt->meta.prev_meta);
1130
1131 return bt;
1132
1133fail:
1134 free(bt->lru_queue);
1135 free(bt->page_cache);
1136 free(bt);
1137 return NULL((void*)0);
1138}
1139
1140struct btree *
1141btree_open(const char *path, unsigned int flags, mode_t mode)
1142{
1143 int fd, oflags;
1144 struct btree *bt;
1145
1146 if (F_ISSET(flags, BT_RDONLY)(((flags) & (0x04)) == (0x04)))
1147 oflags = O_RDONLY0x0000;
1148 else
1149 oflags = O_RDWR0x0002 | O_CREAT0x0200 | O_APPEND0x0008;
1150
1151 if ((fd = open(path, oflags, mode)) == -1)
1152 return NULL((void*)0);
1153
1154 if ((bt = btree_open_fd(fd, flags)) == NULL((void*)0))
1155 close(fd);
1156 else {
1157 bt->path = strdup(path);
1158 DPRINTF("opened btree %p", bt);
1159 }
1160
1161 return bt;
1162}
1163
1164static void
1165btree_ref(struct btree *bt)
1166{
1167 bt->ref++;
1168 DPRINTF("ref is now %d on btree %p", bt->ref, bt);
1169}
1170
1171void
1172btree_close(struct btree *bt)
1173{
1174 if (bt == NULL((void*)0))
1175 return;
1176
1177 if (--bt->ref == 0) {
1178 DPRINTF("ref is zero, closing btree %p", bt);
1179 close(bt->fd);
1180 mpage_flush(bt);
1181 free(bt->lru_queue);
1182 free(bt->path);
1183 free(bt->page_cache);
1184 free(bt);
1185 } else
1186 DPRINTF("ref is now %d on btree %p", bt->ref, bt);
1187}
1188
1189/* Search for key within a leaf page, using binary search.
1190 * Returns the smallest entry larger or equal to the key.
1191 * If exactp is non-null, stores whether the found entry was an exact match
1192 * in *exactp (1 or 0).
1193 * If kip is non-null, stores the index of the found entry in *kip.
1194 * If no entry larger of equal to the key is found, returns NULL.
1195 */
1196static struct node *
1197btree_search_node(struct btree *bt, struct mpage *mp, struct btval *key,
1198 int *exactp, unsigned int *kip)
1199{
1200 unsigned int i = 0;
1201 int low, high;
1202 int rc = 0;
1203 struct node *node;
1204 struct btval nodekey;
1205
1206 DPRINTF("searching %lu keys in %s page %u with prefix [%.*s]",
1207 NUMKEYS(mp),
1208 IS_LEAF(mp) ? "leaf" : "branch",
1209 mp->pgno, (int)mp->prefix.len, (char *)mp->prefix.str);
1210
1211 assert(NUMKEYS(mp) > 0)(((((mp)->page->b.fb.fb_lower - __builtin_offsetof(struct
page, ptrs)) >> 1) > 0) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c"
, 1211, __func__, "NUMKEYS(mp) > 0"))
;
1212
1213 memset(&nodekey, 0, sizeof(nodekey));
1214
1215 low = IS_LEAF(mp)((((mp)->page->flags) & (0x02)) == (0x02)) ? 0 : 1;
1216 high = NUMKEYS(mp)(((mp)->page->b.fb.fb_lower - __builtin_offsetof(struct
page, ptrs)) >> 1)
- 1;
1217 while (low <= high) {
1218 i = (low + high) >> 1;
1219 node = NODEPTR(mp, i)((struct node *)((char *)((mp)->page) + ((mp)->page)->
ptrs[i]))
;
1220
1221 nodekey.size = node->ksize;
1222 nodekey.data = NODEKEY(node)(void *)((node)->data);
1223
1224 if (bt->cmp)
1225 rc = bt->cmp(key, &nodekey);
1226 else
1227 rc = bt_cmp(bt, key, &nodekey, &mp->prefix);
1228
1229 if (IS_LEAF(mp)((((mp)->page->flags) & (0x02)) == (0x02)))
1230 DPRINTF("found leaf index %u [%.*s], rc = %d",
1231 i, (int)nodekey.size, (char *)nodekey.data, rc);
1232 else
1233 DPRINTF("found branch index %u [%.*s -> %u], rc = %d",
1234 i, (int)node->ksize, (char *)NODEKEY(node),
1235 node->n_pgno, rc);
1236
1237 if (rc == 0)
1238 break;
1239 if (rc > 0)
1240 low = i + 1;
1241 else
1242 high = i - 1;
1243 }
1244
1245 if (rc > 0) { /* Found entry is less than the key. */
1246 i++; /* Skip to get the smallest entry larger than key. */
1247 if (i >= NUMKEYS(mp)(((mp)->page->b.fb.fb_lower - __builtin_offsetof(struct
page, ptrs)) >> 1)
)
1248 /* There is no entry larger or equal to the key. */
1249 return NULL((void*)0);
1250 }
1251 if (exactp)
1252 *exactp = (rc == 0);
1253 if (kip) /* Store the key index if requested. */
1254 *kip = i;
1255
1256 return NODEPTR(mp, i)((struct node *)((char *)((mp)->page) + ((mp)->page)->
ptrs[i]))
;
1257}
1258
1259static void
1260cursor_pop_page(struct cursor *cursor)
1261{
1262 struct ppage *top;
1263
1264 top = CURSOR_TOP(cursor)((&(cursor)->stack)->slh_first);
1265 CURSOR_POP(cursor)do { (&(cursor)->stack)->slh_first = (&(cursor)
->stack)->slh_first->entry.sle_next; } while (0)
;
1266 top->mpage->ref--;
1267
1268 DPRINTF("popped page %u off cursor %p", top->mpage->pgno, cursor);
1269
1270 free(top);
1271}
1272
1273static struct ppage *
1274cursor_push_page(struct cursor *cursor, struct mpage *mp)
1275{
1276 struct ppage *ppage;
1277
1278 DPRINTF("pushing page %u on cursor %p", mp->pgno, cursor);
1279
1280 if ((ppage = calloc(1, sizeof(*ppage))) == NULL((void*)0))
1281 return NULL((void*)0);
1282 ppage->mpage = mp;
1283 mp->ref++;
1284 CURSOR_PUSH(cursor, ppage)do { (ppage)->entry.sle_next = (&(cursor)->stack)->
slh_first; (&(cursor)->stack)->slh_first = (ppage);
} while (0)
;
1285 return ppage;
1286}
1287
1288static struct mpage *
1289btree_get_mpage(struct btree *bt, pgno_t pgno)
1290{
1291 struct mpage *mp;
1292
1293 mp = mpage_lookup(bt, pgno);
1294 if (mp == NULL((void*)0)) {
1295 if ((mp = calloc(1, sizeof(*mp))) == NULL((void*)0))
1296 return NULL((void*)0);
1297 if ((mp->page = malloc(bt->head.psize)) == NULL((void*)0)) {
1298 free(mp);
1299 return NULL((void*)0);
1300 }
1301 if (btree_read_page(bt, pgno, mp->page) != BT_SUCCESS0) {
1302 mpage_free(mp);
1303 return NULL((void*)0);
1304 }
1305 mp->pgno = pgno;
1306 mpage_add(bt, mp);
1307 } else
1308 DPRINTF("returning page %u from cache", pgno);
1309
1310 return mp;
1311}
1312
1313static void
1314concat_prefix(struct btree *bt, char *s1, size_t n1, char *s2, size_t n2,
1315 char *cs, size_t *cn)
1316{
1317 assert(*cn >= n1 + n2)((*cn >= n1 + n2) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c"
, 1317, __func__, "*cn >= n1 + n2"))
;
1318 if (F_ISSET(bt->flags, BT_REVERSEKEY)(((bt->flags) & (0x08)) == (0x08))) {
1319 bcopy(s2, cs, n2);
1320 bcopy(s1, cs + n2, n1);
1321 } else {
1322 bcopy(s1, cs, n1);
1323 bcopy(s2, cs + n1, n2);
1324 }
1325 *cn = n1 + n2;
1326}
1327
1328static void
1329find_common_prefix(struct btree *bt, struct mpage *mp)
1330{
1331 indx_t lbound = 0, ubound = 0;
1332 struct mpage *lp, *up;
1333 struct btkey lprefix, uprefix;
1334
1335 mp->prefix.len = 0;
1336 if (bt->cmp != NULL((void*)0))
1337 return;
1338
1339 lp = mp;
1340 while (lp->parent != NULL((void*)0)) {
1341 if (lp->parent_index > 0) {
1342 lbound = lp->parent_index;
1343 break;
1344 }
1345 lp = lp->parent;
1346 }
1347
1348 up = mp;
1349 while (up->parent != NULL((void*)0)) {
1350 if (up->parent_index + 1 < (indx_t)NUMKEYS(up->parent)(((up->parent)->page->b.fb.fb_lower - __builtin_offsetof
(struct page, ptrs)) >> 1)
) {
1351 ubound = up->parent_index + 1;
1352 break;
1353 }
1354 up = up->parent;
1355 }
1356
1357 if (lp->parent != NULL((void*)0) && up->parent != NULL((void*)0)) {
1358 expand_prefix(bt, lp->parent, lbound, &lprefix);
1359 expand_prefix(bt, up->parent, ubound, &uprefix);
1360 common_prefix(bt, &lprefix, &uprefix, &mp->prefix);
1361 }
1362 else if (mp->parent)
1363 bcopy(&mp->parent->prefix, &mp->prefix, sizeof(mp->prefix));
1364
1365 DPRINTF("found common prefix [%.*s] (len %zu) for page %u",
1366 (int)mp->prefix.len, mp->prefix.str, mp->prefix.len, mp->pgno);
1367}
1368
1369static int
1370btree_search_page_root(struct btree *bt, struct mpage *root, struct btval *key,
1371 struct cursor *cursor, int modify, struct mpage **mpp)
1372{
1373 struct mpage *mp, *parent;
1374
1375 if (cursor && cursor_push_page(cursor, root) == NULL((void*)0))
1376 return BT_FAIL-1;
1377
1378 mp = root;
1379 while (IS_BRANCH(mp)((((mp)->page->flags) & (0x01)) == (0x01))) {
1380 unsigned int i = 0;
1381 struct node *node;
1382
1383 DPRINTF("branch page %u has %lu keys", mp->pgno, NUMKEYS(mp));
1384 assert(NUMKEYS(mp) > 1)(((((mp)->page->b.fb.fb_lower - __builtin_offsetof(struct
page, ptrs)) >> 1) > 1) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c"
, 1384, __func__, "NUMKEYS(mp) > 1"))
;
1385 DPRINTF("found index 0 to page %u", NODEPGNO(NODEPTR(mp, 0)));
1386
1387 if (key == NULL((void*)0)) /* Initialize cursor to first page. */
1388 i = 0;
1389 else {
1390 int exact;
1391 node = btree_search_node(bt, mp, key, &exact, &i);
1392 if (node == NULL((void*)0))
1393 i = NUMKEYS(mp)(((mp)->page->b.fb.fb_lower - __builtin_offsetof(struct
page, ptrs)) >> 1)
- 1;
1394 else if (!exact) {
1395 assert(i > 0)((i > 0) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c"
, 1395, __func__, "i > 0"))
;
1396 i--;
1397 }
1398 }
1399
1400 if (key)
1401 DPRINTF("following index %u for key %.*s",
1402 i, (int)key->size, (char *)key->data);
1403 assert(i >= 0 && i < NUMKEYS(mp))((i >= 0 && i < (((mp)->page->b.fb.fb_lower
- __builtin_offsetof(struct page, ptrs)) >> 1)) ? (void
)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c", 1403, __func__
, "i >= 0 && i < NUMKEYS(mp)"))
;
1404 node = NODEPTR(mp, i)((struct node *)((char *)((mp)->page) + ((mp)->page)->
ptrs[i]))
;
1405
1406 if (cursor)
1407 CURSOR_TOP(cursor)((&(cursor)->stack)->slh_first)->ki = i;
1408
1409 parent = mp;
1410 if ((mp = btree_get_mpage(bt, NODEPGNO(node)((node)->p.np_pgno))) == NULL((void*)0))
1411 return BT_FAIL-1;
1412 mp->parent = parent;
1413 mp->parent_index = i;
1414 find_common_prefix(bt, mp);
1415
1416 if (cursor && cursor_push_page(cursor, mp) == NULL((void*)0))
1417 return BT_FAIL-1;
1418
1419 if (modify && (mp = mpage_touch(bt, mp)) == NULL((void*)0))
1420 return BT_FAIL-1;
1421 }
1422
1423 if (!IS_LEAF(mp)((((mp)->page->flags) & (0x02)) == (0x02))) {
1424 DPRINTF("internal error, index points to a %02X page!?",
1425 mp->page->flags);
1426 return BT_FAIL-1;
1427 }
1428
1429 DPRINTF("found leaf page %u for key %.*s", mp->pgno,
1430 key ? (int)key->size : 0, key ? (char *)key->data : NULL);
1431
1432 *mpp = mp;
1433 return BT_SUCCESS0;
1434}
1435
1436/* Search for the page a given key should be in.
1437 * Stores a pointer to the found page in *mpp.
1438 * If key is NULL, search for the lowest page (used by btree_cursor_first).
1439 * If cursor is non-null, pushes parent pages on the cursor stack.
1440 * If modify is true, visited pages are updated with new page numbers.
1441 */
1442static int
1443btree_search_page(struct btree *bt, struct btree_txn *txn, struct btval *key,
1444 struct cursor *cursor, int modify, struct mpage **mpp)
1445{
1446 int rc;
1447 pgno_t root;
1448 struct mpage *mp;
1449
1450 /* Can't modify pages outside a transaction. */
1451 if (txn == NULL((void*)0) && modify) {
1452 errno(*__errno()) = EINVAL22;
1453 return BT_FAIL-1;
1454 }
1455
1456 /* Choose which root page to start with. If a transaction is given
1457 * use the root page from the transaction, otherwise read the last
1458 * committed root page.
1459 */
1460 if (txn == NULL((void*)0)) {
1461 if ((rc = btree_read_meta(bt, NULL((void*)0))) != BT_SUCCESS0)
1462 return rc;
1463 root = bt->meta.root;
1464 } else if (F_ISSET(txn->flags, BT_TXN_ERROR)(((txn->flags) & (0x02)) == (0x02))) {
1465 DPRINTF("transaction has failed, must abort");
1466 errno(*__errno()) = EINVAL22;
1467 return BT_FAIL-1;
1468 } else
1469 root = txn->root;
1470
1471 if (root == P_INVALID0xFFFFFFFF) { /* Tree is empty. */
1472 DPRINTF("tree is empty");
1473 errno(*__errno()) = ENOENT2;
1474 return BT_FAIL-1;
1475 }
1476
1477 if ((mp = btree_get_mpage(bt, root)) == NULL((void*)0))
1478 return BT_FAIL-1;
1479
1480 DPRINTF("root page has flags 0x%X", mp->page->flags);
1481
1482 assert(mp->parent == NULL)((mp->parent == ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c"
, 1482, __func__, "mp->parent == NULL"))
;
1483 assert(mp->prefix.len == 0)((mp->prefix.len == 0) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c"
, 1483, __func__, "mp->prefix.len == 0"))
;
1484
1485 if (modify && !mp->dirty) {
1486 if ((mp = mpage_touch(bt, mp)) == NULL((void*)0))
1487 return BT_FAIL-1;
1488 txn->root = mp->pgno;
1489 }
1490
1491 return btree_search_page_root(bt, mp, key, cursor, modify, mpp);
1492}
1493
1494static int
1495btree_read_data(struct btree *bt, struct mpage *mp, struct node *leaf,
1496 struct btval *data)
1497{
1498 struct mpage *omp; /* overflow mpage */
1499 size_t psz;
1500 size_t max;
1501 size_t sz = 0;
1502 pgno_t pgno;
1503
1504 memset(data, 0, sizeof(*data));
1505 max = bt->head.psize - PAGEHDRSZ__builtin_offsetof(struct page, ptrs);
1506
1507 if (!F_ISSET(leaf->flags, F_BIGDATA)(((leaf->flags) & (0x01)) == (0x01))) {
1508 data->size = leaf->n_dsizep.np_dsize;
1509 if (data->size > 0) {
1510 if (mp == NULL((void*)0)) {
1511 if ((data->data = malloc(data->size)) == NULL((void*)0))
1512 return BT_FAIL-1;
1513 bcopy(NODEDATA(leaf)(void *)((char *)(leaf)->data + (leaf)->ksize), data->data, data->size);
1514 data->free_data = 1;
1515 data->mp = NULL((void*)0);
1516 } else {
1517 data->data = NODEDATA(leaf)(void *)((char *)(leaf)->data + (leaf)->ksize);
1518 data->free_data = 0;
1519 data->mp = mp;
1520 mp->ref++;
1521 }
1522 }
1523 return BT_SUCCESS0;
1524 }
1525
1526 /* Read overflow data.
1527 */
1528 DPRINTF("allocating %u byte for overflow data", leaf->n_dsize);
1529 if ((data->data = malloc(leaf->n_dsizep.np_dsize)) == NULL((void*)0))
1530 return BT_FAIL-1;
1531 data->size = leaf->n_dsizep.np_dsize;
1532 data->free_data = 1;
1533 data->mp = NULL((void*)0);
1534 bcopy(NODEDATA(leaf)(void *)((char *)(leaf)->data + (leaf)->ksize), &pgno, sizeof(pgno));
1535 for (sz = 0; sz < data->size; ) {
1536 if ((omp = btree_get_mpage(bt, pgno)) == NULL((void*)0) ||
1537 !F_ISSET(omp->page->flags, P_OVERFLOW)(((omp->page->flags) & (0x04)) == (0x04))) {
1538 DPRINTF("read overflow page %u failed", pgno);
1539 free(data->data);
1540 mpage_free(omp);
1541 return BT_FAIL-1;
1542 }
1543 psz = data->size - sz;
1544 if (psz > max)
1545 psz = max;
1546 bcopy(omp->page->ptrs, (char *)data->data + sz, psz);
1547 sz += psz;
1548 pgno = omp->page->p_next_pgnob.pb_next_pgno;
1549 }
1550
1551 return BT_SUCCESS0;
1552}
1553
1554int
1555btree_txn_get(struct btree *bt, struct btree_txn *txn,
1556 struct btval *key, struct btval *data)
1557{
1558 int rc, exact;
1559 struct node *leaf;
1560 struct mpage *mp;
1561
1562 assert(key)((key) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c"
, 1562, __func__, "key"))
;
1563 assert(data)((data) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c"
, 1563, __func__, "data"))
;
1564 DPRINTF("===> get key [%.*s]", (int)key->size, (char *)key->data);
1565
1566 if (bt != NULL((void*)0) && txn != NULL((void*)0) && bt != txn->bt) {
1567 errno(*__errno()) = EINVAL22;
1568 return BT_FAIL-1;
1569 }
1570
1571 if (bt == NULL((void*)0)) {
1572 if (txn == NULL((void*)0)) {
1573 errno(*__errno()) = EINVAL22;
1574 return BT_FAIL-1;
1575 }
1576 bt = txn->bt;
1577 }
1578
1579 if (key->size == 0 || key->size > MAXKEYSIZE255) {
1580 errno(*__errno()) = EINVAL22;
1581 return BT_FAIL-1;
1582 }
1583
1584 if ((rc = btree_search_page(bt, txn, key, NULL((void*)0), 0, &mp)) != BT_SUCCESS0)
1585 return rc;
1586
1587 leaf = btree_search_node(bt, mp, key, &exact, NULL((void*)0));
1588 if (leaf && exact)
1589 rc = btree_read_data(bt, mp, leaf, data);
1590 else {
1591 errno(*__errno()) = ENOENT2;
1592 rc = BT_FAIL-1;
1593 }
1594
1595 mpage_prune(bt);
1596 return rc;
1597}
1598
1599static int
1600btree_sibling(struct cursor *cursor, int move_right)
1601{
1602 int rc;
1603 struct node *indx;
1604 struct ppage *parent, *top;
1605 struct mpage *mp;
1606
1607 top = CURSOR_TOP(cursor)((&(cursor)->stack)->slh_first);
1608 if ((parent = SLIST_NEXT(top, entry)((top)->entry.sle_next)) == NULL((void*)0)) {
1609 errno(*__errno()) = ENOENT2;
1610 return BT_FAIL-1; /* root has no siblings */
1611 }
1612
1613 DPRINTF("parent page is page %u, index %u",
1614 parent->mpage->pgno, parent->ki);
1615
1616 cursor_pop_page(cursor);
1617 if (move_right ? (parent->ki + 1 >= NUMKEYS(parent->mpage)(((parent->mpage)->page->b.fb.fb_lower - __builtin_offsetof
(struct page, ptrs)) >> 1)
)
1618 : (parent->ki == 0)) {
1619 DPRINTF("no more keys left, moving to %s sibling",
1620 move_right ? "right" : "left");
1621 if ((rc = btree_sibling(cursor, move_right)) != BT_SUCCESS0)
1622 return rc;
1623 parent = CURSOR_TOP(cursor)((&(cursor)->stack)->slh_first);
1624 } else {
1625 if (move_right)
1626 parent->ki++;
1627 else
1628 parent->ki--;
1629 DPRINTF("just moving to %s index key %u",
1630 move_right ? "right" : "left", parent->ki);
1631 }
1632 assert(IS_BRANCH(parent->mpage))((((((parent->mpage)->page->flags) & (0x01)) == (
0x01))) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c"
, 1632, __func__, "IS_BRANCH(parent->mpage)"))
;
1633
1634 indx = NODEPTR(parent->mpage, parent->ki)((struct node *)((char *)((parent->mpage)->page) + ((parent
->mpage)->page)->ptrs[parent->ki]))
;
1635 if ((mp = btree_get_mpage(cursor->bt, indx->n_pgnop.np_pgno)) == NULL((void*)0))
1636 return BT_FAIL-1;
1637 mp->parent = parent->mpage;
1638 mp->parent_index = parent->ki;
1639
1640 cursor_push_page(cursor, mp);
1641 find_common_prefix(cursor->bt, mp);
1642
1643 return BT_SUCCESS0;
1644}
1645
1646static int
1647bt_set_key(struct btree *bt, struct mpage *mp, struct node *node,
1648 struct btval *key)
1649{
1650 if (key == NULL((void*)0))
1651 return 0;
1652
1653 if (mp->prefix.len > 0) {
1654 key->size = node->ksize + mp->prefix.len;
1655 key->data = malloc(key->size);
1656 if (key->data == NULL((void*)0))
1657 return -1;
1658 concat_prefix(bt,
1659 mp->prefix.str, mp->prefix.len,
1660 NODEKEY(node)(void *)((node)->data), node->ksize,
1661 key->data, &key->size);
1662 key->free_data = 1;
1663 } else {
1664 key->size = node->ksize;
1665 key->data = NODEKEY(node)(void *)((node)->data);
1666 key->free_data = 0;
1667 key->mp = mp;
1668 mp->ref++;
1669 }
1670
1671 return 0;
1672}
1673
1674static int
1675btree_cursor_next(struct cursor *cursor, struct btval *key, struct btval *data)
1676{
1677 struct ppage *top;
1678 struct mpage *mp;
1679 struct node *leaf;
1680
1681 if (cursor->eof) {
1682 errno(*__errno()) = ENOENT2;
1683 return BT_FAIL-1;
1684 }
1685
1686 assert(cursor->initialized)((cursor->initialized) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c"
, 1686, __func__, "cursor->initialized"))
;
1687
1688 top = CURSOR_TOP(cursor)((&(cursor)->stack)->slh_first);
1689 mp = top->mpage;
1690
1691 DPRINTF("cursor_next: top page is %u in cursor %p", mp->pgno, cursor);
1692
1693 if (top->ki + 1 >= NUMKEYS(mp)(((mp)->page->b.fb.fb_lower - __builtin_offsetof(struct
page, ptrs)) >> 1)
) {
1694 DPRINTF("=====> move to next sibling page");
1695 if (btree_sibling(cursor, 1) != BT_SUCCESS0) {
1696 cursor->eof = 1;
1697 return BT_FAIL-1;
1698 }
1699 top = CURSOR_TOP(cursor)((&(cursor)->stack)->slh_first);
1700 mp = top->mpage;
1701 DPRINTF("next page is %u, key index %u", mp->pgno, top->ki);
1702 } else
1703 top->ki++;
1704
1705 DPRINTF("==> cursor points to page %u with %lu keys, key index %u",
1706 mp->pgno, NUMKEYS(mp), top->ki);
1707
1708 assert(IS_LEAF(mp))((((((mp)->page->flags) & (0x02)) == (0x02))) ? (void
)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c", 1708, __func__
, "IS_LEAF(mp)"))
;
1709 leaf = NODEPTR(mp, top->ki)((struct node *)((char *)((mp)->page) + ((mp)->page)->
ptrs[top->ki]))
;
1710
1711 if (data && btree_read_data(cursor->bt, mp, leaf, data) != BT_SUCCESS0)
1712 return BT_FAIL-1;
1713
1714 if (bt_set_key(cursor->bt, mp, leaf, key) != 0)
1715 return BT_FAIL-1;
1716
1717 return BT_SUCCESS0;
1718}
1719
1720static int
1721btree_cursor_set(struct cursor *cursor, struct btval *key, struct btval *data,
1722 int *exactp)
1723{
1724 int rc;
1725 struct node *leaf;
1726 struct mpage *mp;
1727 struct ppage *top;
1728
1729 assert(cursor)((cursor) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c"
, 1729, __func__, "cursor"))
;
1730 assert(key)((key) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c"
, 1730, __func__, "key"))
;
1731 assert(key->size > 0)((key->size > 0) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c"
, 1731, __func__, "key->size > 0"))
;
1732
1733 rc = btree_search_page(cursor->bt, cursor->txn, key, cursor, 0, &mp);
1734 if (rc != BT_SUCCESS0)
1735 return rc;
1736 assert(IS_LEAF(mp))((((((mp)->page->flags) & (0x02)) == (0x02))) ? (void
)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c", 1736, __func__
, "IS_LEAF(mp)"))
;
1737
1738 top = CURSOR_TOP(cursor)((&(cursor)->stack)->slh_first);
1739 leaf = btree_search_node(cursor->bt, mp, key, exactp, &top->ki);
1740 if (exactp != NULL((void*)0) && !*exactp) {
1741 /* BT_CURSOR_EXACT specified and not an exact match. */
1742 errno(*__errno()) = ENOENT2;
1743 return BT_FAIL-1;
1744 }
1745
1746 if (leaf == NULL((void*)0)) {
1747 DPRINTF("===> inexact leaf not found, goto sibling");
1748 if (btree_sibling(cursor, 1) != BT_SUCCESS0)
1749 return BT_FAIL-1; /* no entries matched */
1750 top = CURSOR_TOP(cursor)((&(cursor)->stack)->slh_first);
1751 top->ki = 0;
1752 mp = top->mpage;
1753 assert(IS_LEAF(mp))((((((mp)->page->flags) & (0x02)) == (0x02))) ? (void
)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c", 1753, __func__
, "IS_LEAF(mp)"))
;
1754 leaf = NODEPTR(mp, 0)((struct node *)((char *)((mp)->page) + ((mp)->page)->
ptrs[0]))
;
1755 }
1756
1757 cursor->initialized = 1;
1758 cursor->eof = 0;
1759
1760 if (data && btree_read_data(cursor->bt, mp, leaf, data) != BT_SUCCESS0)
1761 return BT_FAIL-1;
1762
1763 if (bt_set_key(cursor->bt, mp, leaf, key) != 0)
1764 return BT_FAIL-1;
1765 DPRINTF("==> cursor placed on key %.*s",
1766 (int)key->size, (char *)key->data);
1767
1768 return BT_SUCCESS0;
1769}
1770
1771static int
1772btree_cursor_first(struct cursor *cursor, struct btval *key, struct btval *data)
1773{
1774 int rc;
1775 struct mpage *mp;
1776 struct node *leaf;
1777
1778 rc = btree_search_page(cursor->bt, cursor->txn, NULL((void*)0), cursor, 0, &mp);
1779 if (rc != BT_SUCCESS0)
1780 return rc;
1781 assert(IS_LEAF(mp))((((((mp)->page->flags) & (0x02)) == (0x02))) ? (void
)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c", 1781, __func__
, "IS_LEAF(mp)"))
;
1782
1783 leaf = NODEPTR(mp, 0)((struct node *)((char *)((mp)->page) + ((mp)->page)->
ptrs[0]))
;
1784 cursor->initialized = 1;
1785 cursor->eof = 0;
1786
1787 if (data && btree_read_data(cursor->bt, mp, leaf, data) != BT_SUCCESS0)
1788 return BT_FAIL-1;
1789
1790 if (bt_set_key(cursor->bt, mp, leaf, key) != 0)
1791 return BT_FAIL-1;
1792
1793 return BT_SUCCESS0;
1794}
1795
1796int
1797btree_cursor_get(struct cursor *cursor, struct btval *key, struct btval *data,
1798 enum cursor_op op)
1799{
1800 int rc;
1801 int exact = 0;
1802
1803 assert(cursor)((cursor) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c"
, 1803, __func__, "cursor"))
;
1804
1805 switch (op) {
1806 case BT_CURSOR:
1807 case BT_CURSOR_EXACT:
1808 while (CURSOR_TOP(cursor)((&(cursor)->stack)->slh_first) != NULL((void*)0))
1809 cursor_pop_page(cursor);
1810 if (key == NULL((void*)0) || key->size == 0 || key->size > MAXKEYSIZE255) {
1811 errno(*__errno()) = EINVAL22;
1812 rc = BT_FAIL-1;
1813 } else if (op == BT_CURSOR_EXACT)
1814 rc = btree_cursor_set(cursor, key, data, &exact);
1815 else
1816 rc = btree_cursor_set(cursor, key, data, NULL((void*)0));
1817 break;
1818 case BT_NEXT:
1819 if (!cursor->initialized)
1820 rc = btree_cursor_first(cursor, key, data);
1821 else
1822 rc = btree_cursor_next(cursor, key, data);
1823 break;
1824 case BT_FIRST:
1825 while (CURSOR_TOP(cursor)((&(cursor)->stack)->slh_first) != NULL((void*)0))
1826 cursor_pop_page(cursor);
1827 rc = btree_cursor_first(cursor, key, data);
1828 break;
1829 default:
1830 DPRINTF("unhandled/unimplemented cursor operation %u", op);
1831 rc = BT_FAIL-1;
1832 break;
1833 }
1834
1835 mpage_prune(cursor->bt);
1836
1837 return rc;
1838}
1839
1840static struct mpage *
1841btree_new_page(struct btree *bt, uint32_t flags)
1842{
1843 struct mpage *mp;
1844
1845 assert(bt != NULL)((bt != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c"
, 1845, __func__, "bt != NULL"))
;
1846 assert(bt->txn != NULL)((bt->txn != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c"
, 1846, __func__, "bt->txn != NULL"))
;
1847
1848 DPRINTF("allocating new mpage %u, page size %u",
1849 bt->txn->next_pgno, bt->head.psize);
1850 if ((mp = calloc(1, sizeof(*mp))) == NULL((void*)0))
1851 return NULL((void*)0);
1852 if ((mp->page = malloc(bt->head.psize)) == NULL((void*)0)) {
1853 free(mp);
1854 return NULL((void*)0);
1855 }
1856 mp->pgno = mp->page->pgno = bt->txn->next_pgno++;
1857 mp->page->flags = flags;
1858 mp->page->lowerb.fb.fb_lower = PAGEHDRSZ__builtin_offsetof(struct page, ptrs);
1859 mp->page->upperb.fb.fb_upper = bt->head.psize;
1860
1861 if (IS_BRANCH(mp)((((mp)->page->flags) & (0x01)) == (0x01)))
1862 bt->meta.branch_pages++;
1863 else if (IS_LEAF(mp)((((mp)->page->flags) & (0x02)) == (0x02)))
1864 bt->meta.leaf_pages++;
1865 else if (IS_OVERFLOW(mp)((((mp)->page->flags) & (0x04)) == (0x04)))
1866 bt->meta.overflow_pages++;
1867
1868 mpage_add(bt, mp);
1869 mpage_dirty(bt, mp);
1870
1871 return mp;
1872}
1873
1874static size_t
1875bt_leaf_size(struct btree *bt, struct btval *key, struct btval *data)
1876{
1877 size_t sz;
1878
1879 sz = LEAFSIZE(key, data)(__builtin_offsetof(struct node, data) + (key)->size + (data
)->size)
;
1880 if (data->size >= bt->head.psize / BT_MINKEYS4) {
1881 /* put on overflow page */
1882 sz -= data->size - sizeof(pgno_t);
1883 }
1884
1885 return sz + sizeof(indx_t);
1886}
1887
1888static size_t
1889bt_branch_size(struct btree *bt, struct btval *key)
1890{
1891 size_t sz;
1892
1893 sz = INDXSIZE(key)(__builtin_offsetof(struct node, data) + ((key) == ((void*)0)
? 0 : (key)->size))
;
1894 if (sz >= bt->head.psize / BT_MINKEYS4) {
1895 /* put on overflow page */
1896 /* not implemented */
1897 /* sz -= key->size - sizeof(pgno_t); */
1898 }
1899
1900 return sz + sizeof(indx_t);
1901}
1902
1903static int
1904btree_write_overflow_data(struct btree *bt, struct page *p, struct btval *data)
1905{
1906 size_t done = 0;
1907 size_t sz;
1908 size_t max;
1909 pgno_t *linkp; /* linked page stored here */
1910 struct mpage *next = NULL((void*)0);
1911
1912 max = bt->head.psize - PAGEHDRSZ__builtin_offsetof(struct page, ptrs);
1913
1914 while (done < data->size) {
1915 if (next != NULL((void*)0))
1916 p = next->page;
1917 linkp = &p->p_next_pgnob.pb_next_pgno;
1918 if (data->size - done > max) {
1919 /* need another overflow page */
1920 if ((next = btree_new_page(bt, P_OVERFLOW0x04)) == NULL((void*)0))
1921 return BT_FAIL-1;
1922 *linkp = next->pgno;
1923 DPRINTF("linking overflow page %u", next->pgno);
1924 } else
1925 *linkp = 0; /* indicates end of list */
1926 sz = data->size - done;
1927 if (sz > max)
1928 sz = max;
1929 DPRINTF("copying %zu bytes to overflow page %u", sz, p->pgno);
1930 bcopy((char *)data->data + done, p->ptrs, sz);
1931 done += sz;
1932 }
1933
1934 return BT_SUCCESS0;
1935}
1936
1937/* Key prefix should already be stripped.
1938 */
1939static int
1940btree_add_node(struct btree *bt, struct mpage *mp, indx_t indx,
1941 struct btval *key, struct btval *data, pgno_t pgno, uint8_t flags)
1942{
1943 unsigned int i;
1944 size_t node_size = NODESIZE__builtin_offsetof(struct node, data);
1945 indx_t ofs;
1946 struct node *node;
1947 struct page *p;
1948 struct mpage *ofp = NULL((void*)0); /* overflow page */
1949
1950 p = mp->page;
1951 assert(p->upper >= p->lower)((p->b.fb.fb_upper >= p->b.fb.fb_lower) ? (void)0 : __assert2
("/usr/src/usr.sbin/ldapd/btree.c", 1951, __func__, "p->upper >= p->lower"
))
;
1952
1953 DPRINTF("add node [%.*s] to %s page %u at index %d, key size %zu",
1954 key ? (int)key->size : 0, key ? (char *)key->data : NULL,
1955 IS_LEAF(mp) ? "leaf" : "branch",
1956 mp->pgno, indx, key ? key->size : 0);
1957
1958 if (key != NULL((void*)0))
1959 node_size += key->size;
1960
1961 if (IS_LEAF(mp)((((mp)->page->flags) & (0x02)) == (0x02))) {
1962 assert(data)((data) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c"
, 1962, __func__, "data"))
;
1963 node_size += data->size;
1964 if (F_ISSET(flags, F_BIGDATA)(((flags) & (0x01)) == (0x01))) {
1965 /* Data already on overflow page. */
1966 node_size -= data->size - sizeof(pgno_t);
1967 } else if (data->size >= bt->head.psize / BT_MINKEYS4) {
1968 /* Put data on overflow page. */
1969 DPRINTF("data size is %zu, put on overflow page",
1970 data->size);
1971 node_size -= data->size - sizeof(pgno_t);
1972 if ((ofp = btree_new_page(bt, P_OVERFLOW0x04)) == NULL((void*)0))
1973 return BT_FAIL-1;
1974 DPRINTF("allocated overflow page %u", ofp->pgno);
1975 flags |= F_BIGDATA0x01;
1976 }
1977 }
1978
1979 if (node_size + sizeof(indx_t) > SIZELEFT(mp)(indx_t)((mp)->page->b.fb.fb_upper - (mp)->page->
b.fb.fb_lower)
) {
1980 DPRINTF("not enough room in page %u, got %lu ptrs",
1981 mp->pgno, NUMKEYS(mp));
1982 DPRINTF("upper - lower = %u - %u = %u", p->upper, p->lower,
1983 p->upper - p->lower);
1984 DPRINTF("node size = %zu", node_size);
1985 return BT_FAIL-1;
1986 }
1987
1988 /* Move higher pointers up one slot. */
1989 for (i = NUMKEYS(mp)(((mp)->page->b.fb.fb_lower - __builtin_offsetof(struct
page, ptrs)) >> 1)
; i > indx; i--)
1990 p->ptrs[i] = p->ptrs[i - 1];
1991
1992 /* Adjust free space offsets. */
1993 ofs = p->upperb.fb.fb_upper - node_size;
1994 assert(ofs >= p->lower + sizeof(indx_t))((ofs >= p->b.fb.fb_lower + sizeof(indx_t)) ? (void)0 :
__assert2("/usr/src/usr.sbin/ldapd/btree.c", 1994, __func__,
"ofs >= p->lower + sizeof(indx_t)"))
;
1995 p->ptrs[indx] = ofs;
1996 p->upperb.fb.fb_upper = ofs;
1997 p->lowerb.fb.fb_lower += sizeof(indx_t);
1998
1999 /* Write the node data. */
2000 node = NODEPTR(mp, indx)((struct node *)((char *)((mp)->page) + ((mp)->page)->
ptrs[indx]))
;
2001 node->ksize = (key == NULL((void*)0)) ? 0 : key->size;
2002 node->flags = flags;
2003 if (IS_LEAF(mp)((((mp)->page->flags) & (0x02)) == (0x02)))
2004 node->n_dsizep.np_dsize = data->size;
2005 else
2006 node->n_pgnop.np_pgno = pgno;
2007
2008 if (key)
2009 bcopy(key->data, NODEKEY(node)(void *)((node)->data), key->size);
2010
2011 if (IS_LEAF(mp)((((mp)->page->flags) & (0x02)) == (0x02))) {
2012 assert(key)((key) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c"
, 2012, __func__, "key"))
;
2013 if (ofp == NULL((void*)0)) {
2014 if (F_ISSET(flags, F_BIGDATA)(((flags) & (0x01)) == (0x01)))
2015 bcopy(data->data, node->data + key->size,
2016 sizeof(pgno_t));
2017 else
2018 bcopy(data->data, node->data + key->size,
2019 data->size);
2020 } else {
2021 bcopy(&ofp->pgno, node->data + key->size,
2022 sizeof(pgno_t));
2023 if (btree_write_overflow_data(bt, ofp->page,
2024 data) == BT_FAIL-1)
2025 return BT_FAIL-1;
2026 }
2027 }
2028
2029 return BT_SUCCESS0;
2030}
2031
2032static void
2033btree_del_node(struct btree *bt, struct mpage *mp, indx_t indx)
2034{
2035 unsigned int sz;
2036 indx_t i, j, numkeys, ptr;
2037 struct node *node;
2038 char *base;
2039
2040 DPRINTF("delete node %u on %s page %u", indx,
2041 IS_LEAF(mp) ? "leaf" : "branch", mp->pgno);
2042 assert(indx < NUMKEYS(mp))((indx < (((mp)->page->b.fb.fb_lower - __builtin_offsetof
(struct page, ptrs)) >> 1)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c"
, 2042, __func__, "indx < NUMKEYS(mp)"))
;
2043
2044 node = NODEPTR(mp, indx)((struct node *)((char *)((mp)->page) + ((mp)->page)->
ptrs[indx]))
;
2045 sz = NODESIZE__builtin_offsetof(struct node, data) + node->ksize;
2046 if (IS_LEAF(mp)((((mp)->page->flags) & (0x02)) == (0x02))) {
2047 if (F_ISSET(node->flags, F_BIGDATA)(((node->flags) & (0x01)) == (0x01)))
2048 sz += sizeof(pgno_t);
2049 else
2050 sz += NODEDSZ(node)((node)->p.np_dsize);
2051 }
2052
2053 ptr = mp->page->ptrs[indx];
2054 numkeys = NUMKEYS(mp)(((mp)->page->b.fb.fb_lower - __builtin_offsetof(struct
page, ptrs)) >> 1)
;
2055 for (i = j = 0; i < numkeys; i++) {
2056 if (i != indx) {
2057 mp->page->ptrs[j] = mp->page->ptrs[i];
2058 if (mp->page->ptrs[i] < ptr)
2059 mp->page->ptrs[j] += sz;
2060 j++;
2061 }
2062 }
2063
2064 base = (char *)mp->page + mp->page->upperb.fb.fb_upper;
2065 bcopy(base, base + sz, ptr - mp->page->upperb.fb.fb_upper);
2066
2067 mp->page->lowerb.fb.fb_lower -= sizeof(indx_t);
2068 mp->page->upperb.fb.fb_upper += sz;
2069}
2070
2071struct cursor *
2072btree_txn_cursor_open(struct btree *bt, struct btree_txn *txn)
2073{
2074 struct cursor *cursor;
2075
2076 if (bt != NULL((void*)0) && txn != NULL((void*)0) && bt != txn->bt) {
2077 errno(*__errno()) = EINVAL22;
2078 return NULL((void*)0);
2079 }
2080
2081 if (bt == NULL((void*)0)) {
2082 if (txn == NULL((void*)0)) {
2083 errno(*__errno()) = EINVAL22;
2084 return NULL((void*)0);
2085 }
2086 bt = txn->bt;
2087 }
2088
2089 if ((cursor = calloc(1, sizeof(*cursor))) != NULL((void*)0)) {
2090 SLIST_INIT(&cursor->stack){ ((&cursor->stack)->slh_first) = ((void*)0); };
2091 cursor->bt = bt;
2092 cursor->txn = txn;
2093 btree_ref(bt);
2094 }
2095
2096 return cursor;
2097}
2098
2099void
2100btree_cursor_close(struct cursor *cursor)
2101{
2102 if (cursor != NULL((void*)0)) {
2103 while (!CURSOR_EMPTY(cursor)(((&(cursor)->stack)->slh_first) == ((void*)0)))
2104 cursor_pop_page(cursor);
2105
2106 btree_close(cursor->bt);
2107 free(cursor);
2108 }
2109}
2110
2111static int
2112btree_update_key(struct btree *bt, struct mpage *mp, indx_t indx,
2113 struct btval *key)
2114{
2115 indx_t ptr, i, numkeys;
2116 int delta;
2117 size_t len;
2118 struct node *node;
2119 char *base;
2120
2121 node = NODEPTR(mp, indx)((struct node *)((char *)((mp)->page) + ((mp)->page)->
ptrs[indx]))
;
2122 ptr = mp->page->ptrs[indx];
2123 DPRINTF("update key %u (ofs %u) [%.*s] to [%.*s] on page %u",
2124 indx, ptr,
2125 (int)node->ksize, (char *)NODEKEY(node),
2126 (int)key->size, (char *)key->data,
2127 mp->pgno);
2128
2129 if (key->size != node->ksize) {
2130 delta = key->size - node->ksize;
2131 if (delta > 0 && SIZELEFT(mp)(indx_t)((mp)->page->b.fb.fb_upper - (mp)->page->
b.fb.fb_lower)
< delta) {
2132 DPRINTF("OUCH! Not enough room, delta = %d", delta);
2133 return BT_FAIL-1;
2134 }
2135
2136 numkeys = NUMKEYS(mp)(((mp)->page->b.fb.fb_lower - __builtin_offsetof(struct
page, ptrs)) >> 1)
;
2137 for (i = 0; i < numkeys; i++) {
2138 if (mp->page->ptrs[i] <= ptr)
2139 mp->page->ptrs[i] -= delta;
2140 }
2141
2142 base = (char *)mp->page + mp->page->upperb.fb.fb_upper;
2143 len = ptr - mp->page->upperb.fb.fb_upper + NODESIZE__builtin_offsetof(struct node, data);
2144 bcopy(base, base - delta, len);
2145 mp->page->upperb.fb.fb_upper -= delta;
2146
2147 node = NODEPTR(mp, indx)((struct node *)((char *)((mp)->page) + ((mp)->page)->
ptrs[indx]))
;
2148 node->ksize = key->size;
2149 }
2150
2151 bcopy(key->data, NODEKEY(node)(void *)((node)->data), key->size);
2152
2153 return BT_SUCCESS0;
2154}
2155
2156static int
2157btree_adjust_prefix(struct btree *bt, struct mpage *src, int delta)
2158{
2159 indx_t i;
2160 struct node *node;
2161 struct btkey tmpkey;
2162 struct btval key;
2163
2164 DPRINTF("adjusting prefix lengths on page %u with delta %d",
2165 src->pgno, delta);
2166 assert(delta != 0)((delta != 0) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c"
, 2166, __func__, "delta != 0"))
;
2167
2168 for (i = 0; i < NUMKEYS(src)(((src)->page->b.fb.fb_lower - __builtin_offsetof(struct
page, ptrs)) >> 1)
; i++) {
2169 node = NODEPTR(src, i)((struct node *)((char *)((src)->page) + ((src)->page)->
ptrs[i]))
;
2170 tmpkey.len = node->ksize - delta;
2171 if (delta > 0) {
2172 if (F_ISSET(bt->flags, BT_REVERSEKEY)(((bt->flags) & (0x08)) == (0x08)))
2173 bcopy(NODEKEY(node)(void *)((node)->data), tmpkey.str, tmpkey.len);
2174 else
2175 bcopy((char *)NODEKEY(node)(void *)((node)->data) + delta, tmpkey.str,
2176 tmpkey.len);
2177 } else {
2178 if (F_ISSET(bt->flags, BT_REVERSEKEY)(((bt->flags) & (0x08)) == (0x08))) {
2179 bcopy(NODEKEY(node)(void *)((node)->data), tmpkey.str, node->ksize);
2180 bcopy(src->prefix.str, tmpkey.str + node->ksize,
2181 -delta);
2182 } else {
2183 bcopy(src->prefix.str + src->prefix.len + delta,
2184 tmpkey.str, -delta);
2185 bcopy(NODEKEY(node)(void *)((node)->data), tmpkey.str - delta,
2186 node->ksize);
2187 }
2188 }
2189 key.size = tmpkey.len;
2190 key.data = tmpkey.str;
2191 if (btree_update_key(bt, src, i, &key) != BT_SUCCESS0)
2192 return BT_FAIL-1;
2193 }
2194
2195 return BT_SUCCESS0;
2196}
2197
2198/* Move a node from src to dst.
2199 */
2200static int
2201btree_move_node(struct btree *bt, struct mpage *src, indx_t srcindx,
2202 struct mpage *dst, indx_t dstindx)
2203{
2204 int rc;
2205 unsigned int pfxlen, mp_pfxlen = 0;
2206 struct node *srcnode;
2207 struct mpage *mp = NULL((void*)0);
2208 struct btkey tmpkey, srckey;
2209 struct btval key, data;
2210
2211 assert(src->parent)((src->parent) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c"
, 2211, __func__, "src->parent"))
;
2212 assert(dst->parent)((dst->parent) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c"
, 2212, __func__, "dst->parent"))
;
2213
2214 srcnode = NODEPTR(src, srcindx)((struct node *)((char *)((src)->page) + ((src)->page)->
ptrs[srcindx]))
;
2215 DPRINTF("moving %s node %u [%.*s] on page %u to node %u on page %u",
2216 IS_LEAF(src) ? "leaf" : "branch",
2217 srcindx,
2218 (int)srcnode->ksize, (char *)NODEKEY(srcnode),
2219 src->pgno,
2220 dstindx, dst->pgno);
2221
2222 find_common_prefix(bt, src);
2223
2224 if (IS_BRANCH(src)((((src)->page->flags) & (0x01)) == (0x01))) {
2225 /* Need to check if the page the moved node points to
2226 * changes prefix.
2227 */
2228 if ((mp = btree_get_mpage(bt, NODEPGNO(srcnode)((srcnode)->p.np_pgno))) == NULL((void*)0))
2229 return BT_FAIL-1;
2230 mp->parent = src;
2231 mp->parent_index = srcindx;
2232 find_common_prefix(bt, mp);
2233 mp_pfxlen = mp->prefix.len;
2234 }
2235
2236 /* Mark src and dst as dirty. */
2237 if ((src = mpage_touch(bt, src)) == NULL((void*)0) ||
2238 (dst = mpage_touch(bt, dst)) == NULL((void*)0))
2239 return BT_FAIL-1;
2240
2241 find_common_prefix(bt, dst);
2242
2243 /* Check if src node has destination page prefix. Otherwise the
2244 * destination page must expand its prefix on all its nodes.
2245 */
2246 srckey.len = srcnode->ksize;
2247 bcopy(NODEKEY(srcnode)(void *)((srcnode)->data), srckey.str, srckey.len);
2248 common_prefix(bt, &srckey, &dst->prefix, &tmpkey);
2249 if (tmpkey.len != dst->prefix.len) {
2250 if (btree_adjust_prefix(bt, dst,
2251 tmpkey.len - dst->prefix.len) != BT_SUCCESS0)
2252 return BT_FAIL-1;
2253 bcopy(&tmpkey, &dst->prefix, sizeof(tmpkey));
2254 }
2255
2256 if (srcindx == 0 && IS_BRANCH(src)((((src)->page->flags) & (0x01)) == (0x01))) {
2257 struct mpage *low;
2258
2259 /* must find the lowest key below src
2260 */
2261 assert(btree_search_page_root(bt, src, NULL, NULL, 0,((btree_search_page_root(bt, src, ((void*)0), ((void*)0), 0, &
low) == 0) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c"
, 2262, __func__, "btree_search_page_root(bt, src, NULL, NULL, 0, &low) == BT_SUCCESS"
))
2262 &low) == BT_SUCCESS)((btree_search_page_root(bt, src, ((void*)0), ((void*)0), 0, &
low) == 0) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c"
, 2262, __func__, "btree_search_page_root(bt, src, NULL, NULL, 0, &low) == BT_SUCCESS"
))
;
2263 expand_prefix(bt, low, 0, &srckey);
2264 DPRINTF("found lowest key [%.*s] on leaf page %u",
2265 (int)srckey.len, srckey.str, low->pgno);
2266 } else {
2267 srckey.len = srcnode->ksize;
2268 bcopy(NODEKEY(srcnode)(void *)((srcnode)->data), srckey.str, srcnode->ksize);
2269 }
2270 find_common_prefix(bt, src);
2271
2272 /* expand the prefix */
2273 tmpkey.len = sizeof(tmpkey.str);
2274 concat_prefix(bt, src->prefix.str, src->prefix.len,
2275 srckey.str, srckey.len, tmpkey.str, &tmpkey.len);
2276
2277 /* Add the node to the destination page. Adjust prefix for
2278 * destination page.
2279 */
2280 key.size = tmpkey.len;
2281 key.data = tmpkey.str;
2282 remove_prefix(bt, &key, dst->prefix.len);
2283 data.size = NODEDSZ(srcnode)((srcnode)->p.np_dsize);
2284 data.data = NODEDATA(srcnode)(void *)((char *)(srcnode)->data + (srcnode)->ksize);
2285 rc = btree_add_node(bt, dst, dstindx, &key, &data, NODEPGNO(srcnode)((srcnode)->p.np_pgno),
2286 srcnode->flags);
2287 if (rc != BT_SUCCESS0)
2288 return rc;
2289
2290 /* Delete the node from the source page.
2291 */
2292 btree_del_node(bt, src, srcindx);
2293
2294 /* Update the parent separators.
2295 */
2296 if (srcindx == 0 && src->parent_index != 0) {
2297 expand_prefix(bt, src, 0, &tmpkey);
2298 key.size = tmpkey.len;
2299 key.data = tmpkey.str;
2300 remove_prefix(bt, &key, src->parent->prefix.len);
2301
2302 DPRINTF("update separator for source page %u to [%.*s]",
2303 src->pgno, (int)key.size, (char *)key.data);
2304 if (btree_update_key(bt, src->parent, src->parent_index,
2305 &key) != BT_SUCCESS0)
2306 return BT_FAIL-1;
2307 }
2308
2309 if (srcindx == 0 && IS_BRANCH(src)((((src)->page->flags) & (0x01)) == (0x01))) {
2310 struct btval nullkey;
2311 nullkey.size = 0;
2312 assert(btree_update_key(bt, src, 0, &nullkey) == BT_SUCCESS)((btree_update_key(bt, src, 0, &nullkey) == 0) ? (void)0 :
__assert2("/usr/src/usr.sbin/ldapd/btree.c", 2312, __func__,
"btree_update_key(bt, src, 0, &nullkey) == BT_SUCCESS"))
;
2313 }
2314
2315 if (dstindx == 0 && dst->parent_index != 0) {
2316 expand_prefix(bt, dst, 0, &tmpkey);
2317 key.size = tmpkey.len;
2318 key.data = tmpkey.str;
2319 remove_prefix(bt, &key, dst->parent->prefix.len);
2320
2321 DPRINTF("update separator for destination page %u to [%.*s]",
2322 dst->pgno, (int)key.size, (char *)key.data);
2323 if (btree_update_key(bt, dst->parent, dst->parent_index,
2324 &key) != BT_SUCCESS0)
2325 return BT_FAIL-1;
2326 }
2327
2328 if (dstindx == 0 && IS_BRANCH(dst)((((dst)->page->flags) & (0x01)) == (0x01))) {
2329 struct btval nullkey;
2330 nullkey.size = 0;
2331 assert(btree_update_key(bt, dst, 0, &nullkey) == BT_SUCCESS)((btree_update_key(bt, dst, 0, &nullkey) == 0) ? (void)0 :
__assert2("/usr/src/usr.sbin/ldapd/btree.c", 2331, __func__,
"btree_update_key(bt, dst, 0, &nullkey) == BT_SUCCESS"))
;
2332 }
2333
2334 /* We can get a new page prefix here!
2335 * Must update keys in all nodes of this page!
2336 */
2337 pfxlen = src->prefix.len;
2338 find_common_prefix(bt, src);
2339 if (src->prefix.len != pfxlen) {
2340 if (btree_adjust_prefix(bt, src,
2341 src->prefix.len - pfxlen) != BT_SUCCESS0)
2342 return BT_FAIL-1;
2343 }
2344
2345 pfxlen = dst->prefix.len;
2346 find_common_prefix(bt, dst);
2347 if (dst->prefix.len != pfxlen) {
2348 if (btree_adjust_prefix(bt, dst,
2349 dst->prefix.len - pfxlen) != BT_SUCCESS0)
2350 return BT_FAIL-1;
2351 }
2352
2353 if (IS_BRANCH(dst)((((dst)->page->flags) & (0x01)) == (0x01))) {
2354 assert(mp)((mp) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c"
, 2354, __func__, "mp"))
;
2355 mp->parent = dst;
2356 mp->parent_index = dstindx;
2357 find_common_prefix(bt, mp);
2358 if (mp->prefix.len != mp_pfxlen) {
2359 DPRINTF("moved branch node has changed prefix");
2360 if ((mp = mpage_touch(bt, mp)) == NULL((void*)0))
2361 return BT_FAIL-1;
2362 if (btree_adjust_prefix(bt, mp,
2363 mp->prefix.len - mp_pfxlen) != BT_SUCCESS0)
2364 return BT_FAIL-1;
2365 }
2366 }
2367
2368 return BT_SUCCESS0;
2369}
2370
2371static int
2372btree_merge(struct btree *bt, struct mpage *src, struct mpage *dst)
2373{
2374 int rc;
2375 indx_t i;
2376 unsigned int pfxlen;
2377 struct node *srcnode;
2378 struct btkey tmpkey, dstpfx;
2379 struct btval key, data;
2380
2381 DPRINTF("merging page %u and %u", src->pgno, dst->pgno);
2382
2383 assert(src->parent)((src->parent) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c"
, 2383, __func__, "src->parent"))
; /* can't merge root page */
2384 assert(dst->parent)((dst->parent) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c"
, 2384, __func__, "dst->parent"))
;
2385 assert(bt->txn != NULL)((bt->txn != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c"
, 2385, __func__, "bt->txn != NULL"))
;
2386
2387 /* Mark src and dst as dirty. */
2388 if ((src = mpage_touch(bt, src)) == NULL((void*)0) ||
2389 (dst = mpage_touch(bt, dst)) == NULL((void*)0))
2390 return BT_FAIL-1;
2391
2392 find_common_prefix(bt, src);
2393 find_common_prefix(bt, dst);
2394
2395 /* Check if source nodes has destination page prefix. Otherwise
2396 * the destination page must expand its prefix on all its nodes.
2397 */
2398 common_prefix(bt, &src->prefix, &dst->prefix, &dstpfx);
2399 if (dstpfx.len != dst->prefix.len) {
2400 if (btree_adjust_prefix(bt, dst,
2401 dstpfx.len - dst->prefix.len) != BT_SUCCESS0)
2402 return BT_FAIL-1;
2403 bcopy(&dstpfx, &dst->prefix, sizeof(dstpfx));
2404 }
2405
2406 /* Move all nodes from src to dst.
2407 */
2408 for (i = 0; i < NUMKEYS(src)(((src)->page->b.fb.fb_lower - __builtin_offsetof(struct
page, ptrs)) >> 1)
; i++) {
2409 srcnode = NODEPTR(src, i)((struct node *)((char *)((src)->page) + ((src)->page)->
ptrs[i]))
;
2410
2411 /* If branch node 0 (implicit key), find the real key.
2412 */
2413 if (i == 0 && IS_BRANCH(src)((((src)->page->flags) & (0x01)) == (0x01))) {
2414 struct mpage *low;
2415
2416 /* must find the lowest key below src
2417 */
2418 assert(btree_search_page_root(bt, src, NULL, NULL, 0,((btree_search_page_root(bt, src, ((void*)0), ((void*)0), 0, &
low) == 0) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c"
, 2419, __func__, "btree_search_page_root(bt, src, NULL, NULL, 0, &low) == BT_SUCCESS"
))
2419 &low) == BT_SUCCESS)((btree_search_page_root(bt, src, ((void*)0), ((void*)0), 0, &
low) == 0) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c"
, 2419, __func__, "btree_search_page_root(bt, src, NULL, NULL, 0, &low) == BT_SUCCESS"
))
;
2420 expand_prefix(bt, low, 0, &tmpkey);
2421 DPRINTF("found lowest key [%.*s] on leaf page %u",
2422 (int)tmpkey.len, tmpkey.str, low->pgno);
2423 } else {
2424 expand_prefix(bt, src, i, &tmpkey);
2425 }
2426
2427 key.size = tmpkey.len;
2428 key.data = tmpkey.str;
2429
2430 remove_prefix(bt, &key, dst->prefix.len);
2431 data.size = NODEDSZ(srcnode)((srcnode)->p.np_dsize);
2432 data.data = NODEDATA(srcnode)(void *)((char *)(srcnode)->data + (srcnode)->ksize);
2433 rc = btree_add_node(bt, dst, NUMKEYS(dst)(((dst)->page->b.fb.fb_lower - __builtin_offsetof(struct
page, ptrs)) >> 1)
, &key,
2434 &data, NODEPGNO(srcnode)((srcnode)->p.np_pgno), srcnode->flags);
2435 if (rc != BT_SUCCESS0)
2436 return rc;
2437 }
2438
2439 DPRINTF("dst page %u now has %lu keys (%.1f%% filled)",
2440 dst->pgno, NUMKEYS(dst), (float)PAGEFILL(bt, dst) / 10);
2441
2442 /* Unlink the src page from parent.
2443 */
2444 btree_del_node(bt, src->parent, src->parent_index);
2445 if (src->parent_index == 0) {
2446 key.size = 0;
2447 if (btree_update_key(bt, src->parent, 0, &key) != BT_SUCCESS0)
2448 return BT_FAIL-1;
2449
2450 pfxlen = src->prefix.len;
2451 find_common_prefix(bt, src);
2452 assert (src->prefix.len == pfxlen)((src->prefix.len == pfxlen) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c"
, 2452, __func__, "src->prefix.len == pfxlen"))
;
2453 }
2454
2455 if (IS_LEAF(src)((((src)->page->flags) & (0x02)) == (0x02)))
2456 bt->meta.leaf_pages--;
2457 else
2458 bt->meta.branch_pages--;
2459
2460 return btree_rebalance(bt, src->parent);
2461}
2462
2463#define FILL_THRESHOLD250 250
2464
2465static int
2466btree_rebalance(struct btree *bt, struct mpage *mp)
2467{
2468 struct node *node;
2469 struct mpage *parent;
2470 struct mpage *root;
2471 struct mpage *neighbor = NULL((void*)0);
2472 indx_t si = 0, di = 0;
2473
2474 assert(bt != NULL)((bt != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c"
, 2474, __func__, "bt != NULL"))
;
2475 assert(bt->txn != NULL)((bt->txn != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c"
, 2475, __func__, "bt->txn != NULL"))
;
2476 assert(mp != NULL)((mp != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c"
, 2476, __func__, "mp != NULL"))
;
2477
2478 DPRINTF("rebalancing %s page %u (has %lu keys, %.1f%% full)",
2479 IS_LEAF(mp) ? "leaf" : "branch",
2480 mp->pgno, NUMKEYS(mp), (float)PAGEFILL(bt, mp) / 10);
2481
2482 if (PAGEFILL(bt, mp)(1000 * ((bt)->head.psize - __builtin_offsetof(struct page
, ptrs) - (indx_t)((mp)->page->b.fb.fb_upper - (mp)->
page->b.fb.fb_lower)) / ((bt)->head.psize - __builtin_offsetof
(struct page, ptrs)))
>= FILL_THRESHOLD250) {
2483 DPRINTF("no need to rebalance page %u, above fill threshold",
2484 mp->pgno);
2485 return BT_SUCCESS0;
2486 }
2487
2488 parent = mp->parent;
2489
2490 if (parent == NULL((void*)0)) {
2491 if (NUMKEYS(mp)(((mp)->page->b.fb.fb_lower - __builtin_offsetof(struct
page, ptrs)) >> 1)
== 0) {
2492 DPRINTF("tree is completely empty");
2493 bt->txn->root = P_INVALID0xFFFFFFFF;
2494 bt->meta.depth--;
2495 bt->meta.leaf_pages--;
2496 } else if (IS_BRANCH(mp)((((mp)->page->flags) & (0x01)) == (0x01)) && NUMKEYS(mp)(((mp)->page->b.fb.fb_lower - __builtin_offsetof(struct
page, ptrs)) >> 1)
== 1) {
2497 DPRINTF("collapsing root page!");
2498 bt->txn->root = NODEPGNO(NODEPTR(mp, 0))((((struct node *)((char *)((mp)->page) + ((mp)->page)->
ptrs[0])))->p.np_pgno)
;
2499 if ((root = btree_get_mpage(bt, bt->txn->root)) == NULL((void*)0))
2500 return BT_FAIL-1;
2501 root->parent = NULL((void*)0);
2502 bt->meta.depth--;
2503 bt->meta.branch_pages--;
2504 } else
2505 DPRINTF("root page doesn't need rebalancing");
2506 return BT_SUCCESS0;
2507 }
2508
2509 /* The parent (branch page) must have at least 2 pointers,
2510 * otherwise the tree is invalid.
2511 */
2512 assert(NUMKEYS(parent) > 1)(((((parent)->page->b.fb.fb_lower - __builtin_offsetof(
struct page, ptrs)) >> 1) > 1) ? (void)0 : __assert2
("/usr/src/usr.sbin/ldapd/btree.c", 2512, __func__, "NUMKEYS(parent) > 1"
))
;
2513
2514 /* Leaf page fill factor is below the threshold.
2515 * Try to move keys from left or right neighbor, or
2516 * merge with a neighbor page.
2517 */
2518
2519 /* Find neighbors.
2520 */
2521 if (mp->parent_index == 0) {
2522 /* We're the leftmost leaf in our parent.
2523 */
2524 DPRINTF("reading right neighbor");
2525 node = NODEPTR(parent, mp->parent_index + 1)((struct node *)((char *)((parent)->page) + ((parent)->
page)->ptrs[mp->parent_index + 1]))
;
2526 if ((neighbor = btree_get_mpage(bt, NODEPGNO(node)((node)->p.np_pgno))) == NULL((void*)0))
2527 return BT_FAIL-1;
2528 neighbor->parent_index = mp->parent_index + 1;
2529 si = 0;
2530 di = NUMKEYS(mp)(((mp)->page->b.fb.fb_lower - __builtin_offsetof(struct
page, ptrs)) >> 1)
;
2531 } else {
2532 /* There is at least one neighbor to the left.
2533 */
2534 DPRINTF("reading left neighbor");
2535 node = NODEPTR(parent, mp->parent_index - 1)((struct node *)((char *)((parent)->page) + ((parent)->
page)->ptrs[mp->parent_index - 1]))
;
2536 if ((neighbor = btree_get_mpage(bt, NODEPGNO(node)((node)->p.np_pgno))) == NULL((void*)0))
2537 return BT_FAIL-1;
2538 neighbor->parent_index = mp->parent_index - 1;
2539 si = NUMKEYS(neighbor)(((neighbor)->page->b.fb.fb_lower - __builtin_offsetof(
struct page, ptrs)) >> 1)
- 1;
2540 di = 0;
2541 }
2542 neighbor->parent = parent;
2543
2544 DPRINTF("found neighbor page %u (%lu keys, %.1f%% full)",
2545 neighbor->pgno, NUMKEYS(neighbor), (float)PAGEFILL(bt, neighbor) / 10);
2546
2547 /* If the neighbor page is above threshold and has at least two
2548 * keys, move one key from it.
2549 *
2550 * Otherwise we should try to merge them, but that might not be
2551 * possible, even if both are below threshold, as prefix expansion
2552 * might make keys larger. FIXME: detect this
2553 */
2554 if (PAGEFILL(bt, neighbor)(1000 * ((bt)->head.psize - __builtin_offsetof(struct page
, ptrs) - (indx_t)((neighbor)->page->b.fb.fb_upper - (neighbor
)->page->b.fb.fb_lower)) / ((bt)->head.psize - __builtin_offsetof
(struct page, ptrs)))
>= FILL_THRESHOLD250 && NUMKEYS(neighbor)(((neighbor)->page->b.fb.fb_lower - __builtin_offsetof(
struct page, ptrs)) >> 1)
>= 2)
2555 return btree_move_node(bt, neighbor, si, mp, di);
2556 else { /* FIXME: if (has_enough_room()) */
2557 if (mp->parent_index == 0)
2558 return btree_merge(bt, neighbor, mp);
2559 else
2560 return btree_merge(bt, mp, neighbor);
2561 }
2562}
2563
2564int
2565btree_txn_del(struct btree *bt, struct btree_txn *txn,
2566 struct btval *key, struct btval *data)
2567{
2568 int rc, exact, close_txn = 0;
2569 unsigned int ki;
2570 struct node *leaf;
2571 struct mpage *mp;
2572
2573 DPRINTF("========> delete key %.*s", (int)key->size, (char *)key->data);
2574
2575 assert(key != NULL)((key != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c"
, 2575, __func__, "key != NULL"))
;
2576
2577 if (bt != NULL((void*)0) && txn != NULL((void*)0) && bt != txn->bt) {
2578 errno(*__errno()) = EINVAL22;
2579 return BT_FAIL-1;
2580 }
2581
2582 if (txn != NULL((void*)0) && F_ISSET(txn->flags, BT_TXN_RDONLY)(((txn->flags) & (0x01)) == (0x01))) {
2583 errno(*__errno()) = EINVAL22;
2584 return BT_FAIL-1;
2585 }
2586
2587 if (bt == NULL((void*)0)) {
2588 if (txn == NULL((void*)0)) {
2589 errno(*__errno()) = EINVAL22;
2590 return BT_FAIL-1;
2591 }
2592 bt = txn->bt;
2593 }
2594
2595 if (key->size == 0 || key->size > MAXKEYSIZE255) {
2596 errno(*__errno()) = EINVAL22;
2597 return BT_FAIL-1;
2598 }
2599
2600 if (txn == NULL((void*)0)) {
2601 close_txn = 1;
2602 if ((txn = btree_txn_begin(bt, 0)) == NULL((void*)0))
2603 return BT_FAIL-1;
2604 }
2605
2606 if ((rc = btree_search_page(bt, txn, key, NULL((void*)0), 1, &mp)) != BT_SUCCESS0)
2607 goto done;
2608
2609 leaf = btree_search_node(bt, mp, key, &exact, &ki);
2610 if (leaf == NULL((void*)0) || !exact) {
2611 errno(*__errno()) = ENOENT2;
2612 rc = BT_FAIL-1;
2613 goto done;
2614 }
2615
2616 if (data && (rc = btree_read_data(bt, NULL((void*)0), leaf, data)) != BT_SUCCESS0)
2617 goto done;
2618
2619 btree_del_node(bt, mp, ki);
2620 bt->meta.entries--;
2621 rc = btree_rebalance(bt, mp);
2622 if (rc != BT_SUCCESS0)
2623 txn->flags |= BT_TXN_ERROR0x02;
2624
2625done:
2626 if (close_txn) {
2627 if (rc == BT_SUCCESS0)
2628 rc = btree_txn_commit(txn);
2629 else
2630 btree_txn_abort(txn);
2631 }
2632 mpage_prune(bt);
2633 return rc;
2634}
2635
2636/* Reduce the length of the prefix separator <sep> to the minimum length that
2637 * still makes it uniquely distinguishable from <min>.
2638 *
2639 * <min> is guaranteed to be sorted less than <sep>
2640 *
2641 * On return, <sep> is modified to the minimum length.
2642 */
2643static void
2644bt_reduce_separator(struct btree *bt, struct node *min, struct btval *sep)
2645{
2646 size_t n = 0;
2647 char *p1;
2648 char *p2;
2649
2650 if (F_ISSET(bt->flags, BT_REVERSEKEY)(((bt->flags) & (0x08)) == (0x08))) {
2651
2652 assert(sep->size > 0)((sep->size > 0) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c"
, 2652, __func__, "sep->size > 0"))
;
2653
2654 p1 = (char *)NODEKEY(min)(void *)((min)->data) + min->ksize - 1;
2655 p2 = (char *)sep->data + sep->size - 1;
2656
2657 while (p1 >= (char *)NODEKEY(min)(void *)((min)->data) && *p1 == *p2) {
2658 assert(p2 > (char *)sep->data)((p2 > (char *)sep->data) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c"
, 2658, __func__, "p2 > (char *)sep->data"))
;
2659 p1--;
2660 p2--;
2661 n++;
2662 }
2663
2664 sep->data = p2;
2665 sep->size = n + 1;
2666 } else {
2667
2668 assert(min->ksize > 0)((min->ksize > 0) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c"
, 2668, __func__, "min->ksize > 0"))
;
2669 assert(sep->size > 0)((sep->size > 0) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c"
, 2669, __func__, "sep->size > 0"))
;
2670
2671 p1 = (char *)NODEKEY(min)(void *)((min)->data);
2672 p2 = (char *)sep->data;
2673
2674 while (*p1 == *p2) {
2675 p1++;
2676 p2++;
2677 n++;
2678 if (n == min->ksize || n == sep->size)
2679 break;
2680 }
2681
2682 sep->size = n + 1;
2683 }
2684
2685 DPRINTF("reduced separator to [%.*s] > [%.*s]",
2686 (int)sep->size, (char *)sep->data,
2687 (int)min->ksize, (char *)NODEKEY(min));
2688}
2689
2690/* Split page <*mpp>, and insert <key,(data|newpgno)> in either left or
2691 * right sibling, at index <*newindxp> (as if unsplit). Updates *mpp and
2692 * *newindxp with the actual values after split, ie if *mpp and *newindxp
2693 * refer to a node in the new right sibling page.
2694 */
2695static int
2696btree_split(struct btree *bt, struct mpage **mpp, unsigned int *newindxp,
2697 struct btval *newkey, struct btval *newdata, pgno_t newpgno)
2698{
2699 uint8_t flags;
2700 int rc = BT_SUCCESS0, ins_new = 0;
2701 indx_t newindx;
2702 pgno_t pgno = 0;
2703 size_t orig_pfx_len, left_pfx_diff, right_pfx_diff, pfx_diff;
2704 unsigned int i, j, split_indx;
2705 struct node *node;
2706 struct mpage *pright, *p, *mp;
2707 struct btval sepkey, rkey, rdata;
2708 struct btkey tmpkey;
2709 struct page *copy;
2710
2711 assert(bt != NULL)((bt != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c"
, 2711, __func__, "bt != NULL"))
;
24
'?' condition is true
41
'?' condition is true
56
'?' condition is true
73
'?' condition is true
2712 assert(bt->txn != NULL)((bt->txn != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c"
, 2712, __func__, "bt->txn != NULL"))
;
25
'?' condition is true
42
'?' condition is true
57
'?' condition is true
74
Assuming field 'txn' is not equal to null
75
'?' condition is true
2713
2714 mp = *mpp;
2715 newindx = *newindxp;
2716
2717 DPRINTF("-----> splitting %s page %u and adding [%.*s] at index %d",
2718 IS_LEAF(mp) ? "leaf" : "branch", mp->pgno,
2719 (int)newkey->size, (char *)newkey->data, *newindxp);
2720 DPRINTF("page %u has prefix [%.*s]", mp->pgno,
2721 (int)mp->prefix.len, (char *)mp->prefix.str);
2722 orig_pfx_len = mp->prefix.len;
2723
2724 if (mp->parent == NULL((void*)0)) {
26
Assuming field 'parent' is not equal to NULL
27
Taking false branch
43
Assuming field 'parent' is not equal to NULL
44
Taking false branch
58
Assuming field 'parent' is not equal to NULL
59
Taking false branch
76
Assuming field 'parent' is not equal to NULL
77
Taking false branch
2725 if ((mp->parent = btree_new_page(bt, P_BRANCH0x01)) == NULL((void*)0))
2726 return BT_FAIL-1;
2727 mp->parent_index = 0;
2728 bt->txn->root = mp->parent->pgno;
2729 DPRINTF("root split! new root = %u", mp->parent->pgno);
2730 bt->meta.depth++;
2731
2732 /* Add left (implicit) pointer. */
2733 if (btree_add_node(bt, mp->parent, 0, NULL((void*)0), NULL((void*)0),
2734 mp->pgno, 0) != BT_SUCCESS0)
2735 return BT_FAIL-1;
2736 } else {
2737 DPRINTF("parent branch page is %u", mp->parent->pgno);
2738 }
2739
2740 /* Create a right sibling. */
2741 if ((pright = btree_new_page(bt, mp->page->flags)) == NULL((void*)0))
28
Taking false branch
45
Taking false branch
60
Taking false branch
78
Assuming the condition is false
79
Taking false branch
2742 return BT_FAIL-1;
2743 pright->parent = mp->parent;
2744 pright->parent_index = mp->parent_index + 1;
2745 DPRINTF("new right sibling: page %u", pright->pgno);
2746
2747 /* Move half of the keys to the right sibling. */
2748 if ((copy = malloc(bt->head.psize)) == NULL((void*)0))
29
Assuming the condition is false
30
Taking false branch
46
Assuming the condition is false
47
Taking false branch
61
Assuming the condition is false
62
Taking false branch
80
Assuming the condition is false
81
Taking false branch
2749 return BT_FAIL-1;
2750 bcopy(mp->page, copy, bt->head.psize);
2751 assert(mp->ref == 0)((mp->ref == 0) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c"
, 2751, __func__, "mp->ref == 0"))
; /* XXX */
31
Assuming field 'ref' is equal to 0
32
'?' condition is true
48
Assuming field 'ref' is equal to 0
49
'?' condition is true
63
Assuming field 'ref' is equal to 0
64
'?' condition is true
82
Assuming field 'ref' is equal to 0
83
'?' condition is true
2752 memset(&mp->page->ptrs, 0, bt->head.psize - PAGEHDRSZ__builtin_offsetof(struct page, ptrs));
2753 mp->page->lowerb.fb.fb_lower = PAGEHDRSZ__builtin_offsetof(struct page, ptrs);
2754 mp->page->upperb.fb.fb_upper = bt->head.psize;
2755
2756 split_indx = NUMKEYSP(copy)(((copy)->b.fb.fb_lower - __builtin_offsetof(struct page, ptrs
)) >> 1)
/ 2 + 1;
2757
2758 /* First find the separating key between the split pages.
2759 */
2760 memset(&sepkey, 0, sizeof(sepkey));
2761 if (newindx == split_indx) {
33
Assuming 'newindx' is not equal to 'split_indx'
34
Taking false branch
50
Assuming 'newindx' is not equal to 'split_indx'
51
Taking false branch
65
Assuming 'newindx' is not equal to 'split_indx'
66
Taking false branch
84
Assuming 'newindx' is not equal to 'split_indx'
85
Taking false branch
2762 sepkey.size = newkey->size;
2763 sepkey.data = newkey->data;
2764 remove_prefix(bt, &sepkey, mp->prefix.len);
2765 } else {
2766 node = NODEPTRP(copy, split_indx)((struct node *)((char *)(copy) + (copy)->ptrs[split_indx]
))
;
2767 sepkey.size = node->ksize;
2768 sepkey.data = NODEKEY(node)(void *)((node)->data);
2769 }
2770
2771 if (IS_LEAF(mp)((((mp)->page->flags) & (0x02)) == (0x02)) && bt->cmp == NULL((void*)0)) {
35
Assuming field 'cmp' is not equal to NULL
36
Taking false branch
86
Assuming the condition is false
2772 /* Find the smallest separator. */
2773 /* Ref: Prefix B-trees, R. Bayer, K. Unterauer, 1977 */
2774 node = NODEPTRP(copy, split_indx - 1)((struct node *)((char *)(copy) + (copy)->ptrs[split_indx -
1]))
;
2775 bt_reduce_separator(bt, node, &sepkey);
2776 }
2777
2778 /* Fix separator wrt parent prefix. */
2779 if (bt->cmp
36.1
Field 'cmp' is not equal to NULL
51.1
Field 'cmp' is not equal to NULL
== NULL((void*)0)
) {
37
Taking false branch
52
Taking false branch
67
Assuming field 'cmp' is not equal to NULL
68
Taking false branch
87
Assuming field 'cmp' is not equal to NULL
88
Taking false branch
2780 tmpkey.len = sizeof(tmpkey.str);
2781 concat_prefix(bt, mp->prefix.str, mp->prefix.len,
2782 sepkey.data, sepkey.size, tmpkey.str, &tmpkey.len);
2783 sepkey.data = tmpkey.str;
2784 sepkey.size = tmpkey.len;
2785 }
2786
2787 DPRINTF("separator is [%.*s]", (int)sepkey.size, (char *)sepkey.data);
2788
2789 /* Copy separator key to the parent.
2790 */
2791 if (SIZELEFT(pright->parent)(indx_t)((pright->parent)->page->b.fb.fb_upper - (pright
->parent)->page->b.fb.fb_lower)
< bt_branch_size(bt, &sepkey)
) {
38
Assuming the condition is true
39
Taking true branch
53
Assuming the condition is true
54
Taking true branch
69
Assuming the condition is true
70
Taking true branch
89
Assuming the condition is true
90
Taking true branch
2792 rc = btree_split(bt, &pright->parent, &pright->parent_index,
40
Calling 'btree_split'
55
Calling 'btree_split'
72
Calling 'btree_split'
2793 &sepkey, NULL((void*)0), pright->pgno);
71
Passing null pointer value via 5th parameter 'newdata'
2794
2795 /* Right page might now have changed parent.
2796 * Check if left page also changed parent.
2797 */
2798 if (pright->parent != mp->parent &&
91
Assuming 'pright->parent' is equal to 'mp->parent'
2799 mp->parent_index >= NUMKEYS(mp->parent)(((mp->parent)->page->b.fb.fb_lower - __builtin_offsetof
(struct page, ptrs)) >> 1)
) {
2800 mp->parent = pright->parent;
2801 mp->parent_index = pright->parent_index - 1;
2802 }
2803 } else {
2804 remove_prefix(bt, &sepkey, pright->parent->prefix.len);
2805 rc = btree_add_node(bt, pright->parent, pright->parent_index,
2806 &sepkey, NULL((void*)0), pright->pgno, 0);
2807 }
2808 if (rc != BT_SUCCESS0) {
92
Assuming 'rc' is equal to BT_SUCCESS
93
Taking false branch
2809 free(copy);
2810 return BT_FAIL-1;
2811 }
2812
2813 /* Update prefix for right and left page, if the parent was split.
2814 */
2815 find_common_prefix(bt, pright);
2816 assert(orig_pfx_len <= pright->prefix.len)((orig_pfx_len <= pright->prefix.len) ? (void)0 : __assert2
("/usr/src/usr.sbin/ldapd/btree.c", 2816, __func__, "orig_pfx_len <= pright->prefix.len"
))
;
94
Assuming 'orig_pfx_len' is <= field 'len'
95
'?' condition is true
2817 right_pfx_diff = pright->prefix.len - orig_pfx_len;
2818
2819 find_common_prefix(bt, mp);
2820 assert(orig_pfx_len <= mp->prefix.len)((orig_pfx_len <= mp->prefix.len) ? (void)0 : __assert2
("/usr/src/usr.sbin/ldapd/btree.c", 2820, __func__, "orig_pfx_len <= mp->prefix.len"
))
;
96
Assuming 'orig_pfx_len' is <= field 'len'
97
'?' condition is true
2821 left_pfx_diff = mp->prefix.len - orig_pfx_len;
2822
2823 for (i = j = 0; i <= NUMKEYSP(copy)(((copy)->b.fb.fb_lower - __builtin_offsetof(struct page, ptrs
)) >> 1)
; j++) {
98
Loop condition is true. Entering loop body
2824 if (i < split_indx) {
99
Assuming 'i' is < 'split_indx'
100
Taking true branch
2825 /* Re-insert in left sibling. */
2826 p = mp;
2827 pfx_diff = left_pfx_diff;
2828 } else {
2829 /* Insert in right sibling. */
2830 if (i == split_indx)
2831 /* Reset insert index for right sibling. */
2832 j = (i == newindx && ins_new);
2833 p = pright;
2834 pfx_diff = right_pfx_diff;
2835 }
2836
2837 if (i == newindx && !ins_new
101.1
'ins_new' is 0
) {
101
Assuming 'i' is equal to 'newindx'
102
Taking true branch
2838 /* Insert the original entry that caused the split. */
2839 rkey.data = newkey->data;
2840 rkey.size = newkey->size;
2841 if (IS_LEAF(mp)((((mp)->page->flags) & (0x02)) == (0x02))) {
103
Assuming the condition is true
104
Taking true branch
2842 rdata.data = newdata->data;
105
Access to field 'data' results in a dereference of a null pointer (loaded from variable 'newdata')
2843 rdata.size = newdata->size;
2844 } else
2845 pgno = newpgno;
2846 flags = 0;
2847 pfx_diff = p->prefix.len;
2848
2849 ins_new = 1;
2850
2851 /* Update page and index for the new key. */
2852 *newindxp = j;
2853 *mpp = p;
2854 } else if (i == NUMKEYSP(copy)(((copy)->b.fb.fb_lower - __builtin_offsetof(struct page, ptrs
)) >> 1)
) {
2855 break;
2856 } else {
2857 node = NODEPTRP(copy, i)((struct node *)((char *)(copy) + (copy)->ptrs[i]));
2858 rkey.data = NODEKEY(node)(void *)((node)->data);
2859 rkey.size = node->ksize;
2860 if (IS_LEAF(mp)((((mp)->page->flags) & (0x02)) == (0x02))) {
2861 rdata.data = NODEDATA(node)(void *)((char *)(node)->data + (node)->ksize);
2862 rdata.size = node->n_dsizep.np_dsize;
2863 } else
2864 pgno = node->n_pgnop.np_pgno;
2865 flags = node->flags;
2866
2867 i++;
2868 }
2869
2870 if (!IS_LEAF(mp)((((mp)->page->flags) & (0x02)) == (0x02)) && j == 0) {
2871 /* First branch index doesn't need key data. */
2872 rkey.size = 0;
2873 } else
2874 remove_prefix(bt, &rkey, pfx_diff);
2875
2876 rc = btree_add_node(bt, p, j, &rkey, &rdata, pgno,flags);
2877 }
2878
2879 free(copy);
2880 return rc;
2881}
2882
2883int
2884btree_txn_put(struct btree *bt, struct btree_txn *txn,
2885 struct btval *key, struct btval *data, unsigned int flags)
2886{
2887 int rc = BT_SUCCESS0, exact, close_txn = 0;
2888 unsigned int ki;
2889 struct node *leaf;
2890 struct mpage *mp;
2891 struct btval xkey;
2892
2893 assert(key != NULL)((key != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c"
, 2893, __func__, "key != NULL"))
;
1
Assuming 'key' is not equal to null
2
'?' condition is true
2894 assert(data != NULL)((data != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c"
, 2894, __func__, "data != NULL"))
;
3
Assuming 'data' is not equal to null
4
'?' condition is true
2895
2896 if (bt != NULL((void*)0) && txn != NULL((void*)0) && bt != txn->bt) {
5
Assuming 'bt' is equal to NULL
2897 errno(*__errno()) = EINVAL22;
2898 return BT_FAIL-1;
2899 }
2900
2901 if (txn != NULL((void*)0) && F_ISSET(txn->flags, BT_TXN_RDONLY)(((txn->flags) & (0x01)) == (0x01))) {
6
Assuming 'txn' is not equal to NULL
7
Assuming the condition is false
8
Taking false branch
2902 errno(*__errno()) = EINVAL22;
2903 return BT_FAIL-1;
2904 }
2905
2906 if (bt
8.1
'bt' is equal to NULL
== NULL((void*)0)) {
9
Taking true branch
2907 if (txn
9.1
'txn' is not equal to NULL
== NULL((void*)0)) {
10
Taking false branch
2908 errno(*__errno()) = EINVAL22;
2909 return BT_FAIL-1;
2910 }
2911 bt = txn->bt;
2912 }
2913
2914 if (key->size == 0 || key->size > MAXKEYSIZE255) {
11
Assuming field 'size' is not equal to 0
12
Assuming field 'size' is <= MAXKEYSIZE
13
Taking false branch
2915 errno(*__errno()) = EINVAL22;
2916 return BT_FAIL-1;
2917 }
2918
2919 DPRINTF("==> put key %.*s, size %zu, data size %zu",
2920 (int)key->size, (char *)key->data, key->size, data->size);
2921
2922 if (txn
13.1
'txn' is not equal to NULL
== NULL((void*)0)) {
14
Taking false branch
2923 close_txn = 1;
2924 if ((txn = btree_txn_begin(bt, 0)) == NULL((void*)0))
2925 return BT_FAIL-1;
2926 }
2927
2928 rc = btree_search_page(bt, txn, key, NULL((void*)0), 1, &mp);
2929 if (rc
14.1
'rc' is not equal to BT_SUCCESS
== BT_SUCCESS0) {
15
Taking false branch
2930 leaf = btree_search_node(bt, mp, key, &exact, &ki);
2931 if (leaf && exact) {
2932 if (F_ISSET(flags, BT_NOOVERWRITE)(((flags) & (1)) == (1))) {
2933 DPRINTF("duplicate key %.*s",
2934 (int)key->size, (char *)key->data);
2935 errno(*__errno()) = EEXIST17;
2936 rc = BT_FAIL-1;
2937 goto done;
2938 }
2939 btree_del_node(bt, mp, ki);
2940 }
2941 if (leaf == NULL((void*)0)) { /* append if not found */
2942 ki = NUMKEYS(mp)(((mp)->page->b.fb.fb_lower - __builtin_offsetof(struct
page, ptrs)) >> 1)
;
2943 DPRINTF("appending key at index %d", ki);
2944 }
2945 } else if (errno(*__errno()) == ENOENT2) {
16
Assuming the condition is true
17
Taking true branch
2946 /* new file, just write a root leaf page */
2947 DPRINTF("allocating new root leaf page");
2948 if ((mp = btree_new_page(bt, P_LEAF0x02)) == NULL((void*)0)) {
18
Taking false branch
2949 rc = BT_FAIL-1;
2950 goto done;
2951 }
2952 txn->root = mp->pgno;
2953 bt->meta.depth++;
2954 ki = 0;
2955 }
2956 else
2957 goto done;
2958
2959 assert(IS_LEAF(mp))((((((mp)->page->flags) & (0x02)) == (0x02))) ? (void
)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c", 2959, __func__
, "IS_LEAF(mp)"))
;
19
Assuming the condition is true
20
'?' condition is true
2960 DPRINTF("there are %lu keys, should insert new key at index %u",
2961 NUMKEYS(mp), ki);
2962
2963 /* Copy the key pointer as it is modified by the prefix code. The
2964 * caller might have malloc'ed the data.
2965 */
2966 xkey.data = key->data;
2967 xkey.size = key->size;
2968
2969 if (SIZELEFT(mp)(indx_t)((mp)->page->b.fb.fb_upper - (mp)->page->
b.fb.fb_lower)
< bt_leaf_size(bt, key, data)
) {
21
Assuming the condition is true
22
Taking true branch
2970 rc = btree_split(bt, &mp, &ki, &xkey, data, P_INVALID0xFFFFFFFF);
23
Calling 'btree_split'
2971 } else {
2972 /* There is room already in this leaf page. */
2973 remove_prefix(bt, &xkey, mp->prefix.len);
2974 rc = btree_add_node(bt, mp, ki, &xkey, data, 0, 0);
2975 }
2976
2977 if (rc != BT_SUCCESS0)
2978 txn->flags |= BT_TXN_ERROR0x02;
2979 else
2980 bt->meta.entries++;
2981
2982done:
2983 if (close_txn) {
2984 if (rc == BT_SUCCESS0)
2985 rc = btree_txn_commit(txn);
2986 else
2987 btree_txn_abort(txn);
2988 }
2989 mpage_prune(bt);
2990 return rc;
2991}
2992
2993static pgno_t
2994btree_compact_tree(struct btree *bt, pgno_t pgno, struct btree *btc)
2995{
2996 ssize_t rc;
2997 indx_t i;
2998 pgno_t *pnext, next;
2999 struct node *node;
3000 struct page *p;
3001 struct mpage *mp;
3002
3003 /* Get the page and make a copy of it.
3004 */
3005 if ((mp = btree_get_mpage(bt, pgno)) == NULL((void*)0))
3006 return P_INVALID0xFFFFFFFF;
3007 if ((p = malloc(bt->head.psize)) == NULL((void*)0))
3008 return P_INVALID0xFFFFFFFF;
3009 bcopy(mp->page, p, bt->head.psize);
3010
3011 /* Go through all nodes in the (copied) page and update the
3012 * page pointers.
3013 */
3014 if (F_ISSET(p->flags, P_BRANCH)(((p->flags) & (0x01)) == (0x01))) {
3015 for (i = 0; i < NUMKEYSP(p)(((p)->b.fb.fb_lower - __builtin_offsetof(struct page, ptrs
)) >> 1)
; i++) {
3016 node = NODEPTRP(p, i)((struct node *)((char *)(p) + (p)->ptrs[i]));
3017 node->n_pgnop.np_pgno = btree_compact_tree(bt, node->n_pgnop.np_pgno, btc);
3018 if (node->n_pgnop.np_pgno == P_INVALID0xFFFFFFFF) {
3019 free(p);
3020 return P_INVALID0xFFFFFFFF;
3021 }
3022 }
3023 } else if (F_ISSET(p->flags, P_LEAF)(((p->flags) & (0x02)) == (0x02))) {
3024 for (i = 0; i < NUMKEYSP(p)(((p)->b.fb.fb_lower - __builtin_offsetof(struct page, ptrs
)) >> 1)
; i++) {
3025 node = NODEPTRP(p, i)((struct node *)((char *)(p) + (p)->ptrs[i]));
3026 if (F_ISSET(node->flags, F_BIGDATA)(((node->flags) & (0x01)) == (0x01))) {
3027 bcopy(NODEDATA(node)(void *)((char *)(node)->data + (node)->ksize), &next, sizeof(next));
3028 next = btree_compact_tree(bt, next, btc);
3029 if (next == P_INVALID0xFFFFFFFF) {
3030 free(p);
3031 return P_INVALID0xFFFFFFFF;
3032 }
3033 bcopy(&next, NODEDATA(node)(void *)((char *)(node)->data + (node)->ksize), sizeof(next));
3034 }
3035 }
3036 } else if (F_ISSET(p->flags, P_OVERFLOW)(((p->flags) & (0x04)) == (0x04))) {
3037 pnext = &p->p_next_pgnob.pb_next_pgno;
3038 if (*pnext > 0) {
3039 *pnext = btree_compact_tree(bt, *pnext, btc);
3040 if (*pnext == P_INVALID0xFFFFFFFF) {
3041 free(p);
3042 return P_INVALID0xFFFFFFFF;
3043 }
3044 }
3045 } else
3046 assert(0)((0) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c",
3046, __func__, "0"))
;
3047
3048 pgno = p->pgno = btc->txn->next_pgno++;
3049 rc = write(btc->fd, p, bt->head.psize);
3050 free(p);
3051 if (rc != (ssize_t)bt->head.psize)
3052 return P_INVALID0xFFFFFFFF;
3053 mpage_prune(bt);
3054 return pgno;
3055}
3056
3057int
3058btree_compact(struct btree *bt)
3059{
3060 char *compact_path = NULL((void*)0);
3061 struct btree *btc;
3062 struct btree_txn *txn, *txnc = NULL((void*)0);
3063 int fd;
3064 pgno_t root;
3065
3066 assert(bt != NULL)((bt != ((void*)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c"
, 3066, __func__, "bt != NULL"))
;
3067
3068 DPRINTF("compacting btree %p with path %s", bt, bt->path);
3069
3070 if (bt->path == NULL((void*)0)) {
3071 errno(*__errno()) = EINVAL22;
3072 return BT_FAIL-1;
3073 }
3074
3075 if ((txn = btree_txn_begin(bt, 0)) == NULL((void*)0))
3076 return BT_FAIL-1;
3077
3078 if (asprintf(&compact_path, "%s.compact.XXXXXX", bt->path) == -1) {
3079 btree_txn_abort(txn);
3080 return BT_FAIL-1;
3081 }
3082 fd = mkstemp(compact_path);
3083 if (fd == -1) {
3084 free(compact_path);
3085 btree_txn_abort(txn);
3086 return BT_FAIL-1;
3087 }
3088
3089 if ((btc = btree_open_fd(fd, 0)) == NULL((void*)0))
3090 goto failed;
3091 bcopy(&bt->meta, &btc->meta, sizeof(bt->meta));
3092 btc->meta.revisions = 0;
3093
3094 if ((txnc = btree_txn_begin(btc, 0)) == NULL((void*)0))
3095 goto failed;
3096
3097 if (bt->meta.root != P_INVALID0xFFFFFFFF) {
3098 root = btree_compact_tree(bt, bt->meta.root, btc);
3099 if (root == P_INVALID0xFFFFFFFF)
3100 goto failed;
3101 if (btree_write_meta(btc, root, 0) != BT_SUCCESS0)
3102 goto failed;
3103 }
3104
3105 fsync(fd);
3106
3107 DPRINTF("renaming %s to %s", compact_path, bt->path);
3108 if (rename(compact_path, bt->path) != 0)
3109 goto failed;
3110
3111 /* Write a "tombstone" meta page so other processes can pick up
3112 * the change and re-open the file.
3113 */
3114 if (btree_write_meta(bt, P_INVALID0xFFFFFFFF, BT_TOMBSTONE0x01) != BT_SUCCESS0)
3115 goto failed;
3116
3117 btree_txn_abort(txn);
3118 btree_txn_abort(txnc);
3119 free(compact_path);
3120 btree_close(btc);
3121 mpage_prune(bt);
3122 return 0;
3123
3124failed:
3125 btree_txn_abort(txn);
3126 btree_txn_abort(txnc);
3127 unlink(compact_path);
3128 free(compact_path);
3129 btree_close(btc);
3130 mpage_prune(bt);
3131 return BT_FAIL-1;
3132}
3133
3134/* Reverts the last change. Truncates the file at the last root page.
3135 */
3136int
3137btree_revert(struct btree *bt)
3138{
3139 if (btree_read_meta(bt, NULL((void*)0)) != 0)
3140 return -1;
3141
3142 DPRINTF("truncating file at page %u", bt->meta.root);
3143 return ftruncate(bt->fd, bt->head.psize * bt->meta.root);
3144}
3145
3146void
3147btree_set_cache_size(struct btree *bt, unsigned int cache_size)
3148{
3149 bt->stat.max_cache = cache_size;
3150}
3151
3152unsigned int
3153btree_get_flags(struct btree *bt)
3154{
3155 return (bt->flags & ~BT_FIXPADDING0x01);
3156}
3157
3158const char *
3159btree_get_path(struct btree *bt)
3160{
3161 return bt->path;
3162}
3163
3164const struct btree_stat *
3165btree_stat(struct btree *bt)
3166{
3167 if (bt == NULL((void*)0))
3168 return NULL((void*)0);
3169
3170 bt->stat.branch_pages = bt->meta.branch_pages;
3171 bt->stat.leaf_pages = bt->meta.leaf_pages;
3172 bt->stat.overflow_pages = bt->meta.overflow_pages;
3173 bt->stat.revisions = bt->meta.revisions;
3174 bt->stat.depth = bt->meta.depth;
3175 bt->stat.entries = bt->meta.entries;
3176 bt->stat.psize = bt->head.psize;
3177 bt->stat.created_at = bt->meta.created_at;
3178
3179 return &bt->stat;
3180}
3181